赞
踩
- import torch
- from torch import nn
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.preprocessing import MinMaxScaler
- from torch.nn import MaxPool2d, Conv2d, Dropout, ReLU
- from torch.utils.data import DataLoader, Dataset
-
- #准备数据集
- df=pd.read_csv("train.csv",parse_dates=["Date"],index_col=[0])
- print(df.shape)
- train_data_size=round(len(df)*0.8)
- test_data_size=round(len(df)*0.2)
- print("训练数据集的长度为:{}".format(train_data_size))
- print("测试数据集的长度为:{}".format(test_data_size))
-
- df[['Open']].plot()
- plt.ylabel("stock price")
- plt.xlabel("times")
- plt.show()
-
- sel_col = ['Open', 'High', 'Low', 'Close']
- df=df[sel_col]
-
- df_close_max=df['Close'].max()
- df_close_min=df['Close'].min()
- print("最高价=", df_close_max)
- print("最低价=", df_close_min)
- print("波动值=", df_close_max-df_close_min)
- print("上涨率=", (df_close_max-df_close_min)/df_close_min)
- print("下跌率=", (df_close_max-df_close_min)/df_close_max)
-
- df=df.apply(lambda x:(x-min(x))/(max(x)-min(x)))
- print(df)
-
- total_len=df.shape[0]
- print("df.shape=",df.shape)
- print("df_len=", total_len)
-
- sequence=10
- x=[]
- y=[]
-
- for i in range(total_len-sequence):
- x.append(np.array(df.iloc[i:(i+sequence),].values,dtype=np.float32))
- y.append(np.array(df.iloc[(i+sequence),1],dtype=np.float32))
- print("train data of item 0: \n", x[0])
- print("train label of item 0: \n", y[0])
-
- print("\n序列化后的数据形状:")
- X = np.array(x)
- Y = np.array(y)
- Y = np.expand_dims(Y, 1)
- print("X.shape =",X.shape)
- print("Y.shape =",Y.shape)
- x_tensor=torch.from_numpy(X)
- y_tensor=torch.from_numpy(Y)
-
- train_x = x_tensor[:int(0.7 * total_len)]
- train_y = y_tensor[:int(0.7 * total_len)]
-
-
- # 数据集前70%后的数据(30%)作为验证集
- valid_x = x_tensor[int(0.7 * total_len):]
- valid_y = y_tensor[int(0.7 * total_len):]
-
- print("训练集x的形状是:",train_x.shape)
- print("测试集y的形状是:",train_y.shape)
- print("测试集x的形状是:",valid_x.shape)
- print("测试集y的形状是:",valid_y.shape)
-
-
- class Mydataset(Dataset):
-
- def __init__(self, x, y, transform=None):
- self.x = x
- self.y = y
-
- def __getitem__(self, index):
- x1 = self.x[index]
- y1 = self.y[index]
- return x1, y1
-
- def __len__(self):
- return len(self.x)
-
-
- dataset_train = Mydataset(train_x, train_y)
- dataset_valid = Mydataset(valid_x, valid_y)
-
- train_dataloader=DataLoader(dataset_train,batch_size=64)
- valid_dataloader=DataLoader(dataset_valid,batch_size=64)
-
- class cnn_lstm(nn.Module):
- def __init__(self,window_size,feature_number):
- super(cnn_lstm, self).__init__()
- self.window_size=window_size
- self.feature_number=feature_number
- self.conv1 = Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1)
- self.relu1 = ReLU()
- self.maxpooling1 = MaxPool2d(3, stride=1,padding=1)
- self.dropout1 = Dropout(0.3)
- self.lstm1 = nn.LSTM(input_size=64 * feature_number, hidden_size=128, num_layers=1, batch_first=True)
- self.lstm2 = nn.LSTM(input_size=128, hidden_size=64, num_layers=1, batch_first=True)
- self.fc = nn.Linear(in_features=64, out_features=32)
- self.relu2 = nn.ReLU()
- self.head = nn.Linear(in_features=32, out_features=1)
-
- def forward(self, x):
-
- x = x.reshape([x.shape[0], 1, self.window_size, self.feature_number])
- # x = x.transpose(-1, -2)
- x = self.conv1(x)
- x = self.relu1(x)
- x = self.maxpooling1(x)
- x = self.dropout1(x)
-
- x = x.reshape([x.shape[0], self.window_size, -1])
- # x = x.transpose(-1, -2) #
- x, (h, c) = self.lstm1(x)
- x, (h, c) = self.lstm2(x)
- x = x[:, -1, :] # 最后一个LSTM只要窗口中最后一个特征的输出
- x = self.fc(x)
- x = self.relu2(x)
- x = self.head(x)
-
- return x
-
- #创建网络模型
- cnn_lstm=cnn_lstm(window_size=10,feature_number=4)
- print(cnn_lstm)
-
- #定义损失函数
- loss_fn=nn.MSELoss(size_average=True)
-
- #定义优化器
- learning_rate=0.01
- opitmizer=torch.optim.Adam(cnn_lstm.parameters(),learning_rate)
-
- #设置训练网络参数
- total_train_step=0
- total_valid_step=0
-
- #训练论数
- epoch=100
- hist = np.zeros(epoch)
-
- for i in range(epoch):
- #print("______第{}轮训练开始________".format((i + 1)))
- y_train_pred=cnn_lstm(train_x)
- loss=loss_fn(y_train_pred,train_y)
-
-
- if i % 10 == 0 and i != 0: # 每训练十次,打印一次均方差
- print("Epoch ", i, "MSE: ", loss.item())
- hist[i] = loss.item()
- #优化器优化模型
- opitmizer.zero_grad()
- loss.backward()
- opitmizer.step()
-
- y_train_pred=cnn_lstm(train_x)
- loss_fn(y_train_pred,train_y).item()
-
- y_test_pred=cnn_lstm(valid_x)
- loss_fn(y_test_pred,valid_y)
-
-
- plt.grid()
- plt.xlabel("iters")
- plt.ylabel("")
- plt.title("loss", fontsize = 12)
- plt.plot(hist, "r")
- plt.show()
-
- data_loader = valid_dataloader
-
- # 存放测试序列的预测结果
- predicts = []
-
- # 存放测试序列的实际发生的结果
- labels = []
-
- for idx, (x, label) in enumerate(data_loader):
- if (x.shape[0] != 64):
- continue
- # 对测试集样本进行批量预测,把结果保存到predict Tensor中
- # 开环预测:即每一次序列预测与前后的序列无关。
- predict= cnn_lstm(x)
-
- # 把保存在tensor中的批量预测结果转换成list
- predicts.extend(predict.data.squeeze(1).tolist())
-
- # 把保存在tensor中的批量标签转换成list
- labels.extend(label.data.squeeze(1).tolist())
-
- predicts = np.array(predicts)
- labels = np.array(labels)
- print(predicts.shape)
- print(labels.shape)
-
- predicts_unnormalized = df_close_min + (df_close_max - df_close_min) * predicts
- labels_unnormalized = df_close_min + (df_close_max - df_close_min) * labels
-
- print("shape:", predicts_unnormalized.shape)
- print("正则化后的预测数据:\n", predicts)
- print("")
- print("正则化前的预测数据:\n", predicts_unnormalized)
-
-
- plt.plot(predicts_unnormalized,"r",label="pred")
- plt.plot(labels_unnormalized, "b",label="real")
- plt.show()
-
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。