赞
踩
需要使用的库:
import tushare as ts
ts.set_token('98edbe9c3e444002decb09646838a02f0307e41bd5ce51bb5b2a99c8')
ts_pro = ts.pro_api()
help(ts)
Help on package tushare: NAME tushare - # -*- coding:utf-8 -*- PACKAGE CONTENTS bond (package) coins (package) data (package) fund (package) futures (package) internet (package) pro (package) stock (package) subs (package) trader (package) util (package) VERSION 1.2.62 AUTHOR Jimmy Liu FILE e:\anaconda3\lib\site-packages\tushare\__init__.py
# 股票代码 ts_code = '002069.SZ' # 开始时间 start_date = '2006-01-01' # 结束时间 end_date = '2020-01-01' df = ts_pro.daily( ts_code=ts_code, start_date=start_date, end_date=end_date) # 将获取的文件存储在本地 df.to_csv('002069.csv', index=0)
import pandas as pd
df = pd.read_csv('002069.csv')
# 按照时间进行排序,方便预测处理
df = df.sort_values(['trade_date'], ascending=True)
# 简要查看文件信息
df.head()
ts_code | trade_date | open | high | low | close | pre_close | change | pct_chg | vol | amount | |
---|---|---|---|---|---|---|---|---|---|---|---|
3034 | 002069.SZ | 20060928 | 60.89 | 64.48 | 58.00 | 62.11 | 25.00 | 37.11 | 148.44 | 169301.73 | 1.028809e+06 |
3033 | 002069.SZ | 20060929 | 64.10 | 67.00 | 62.48 | 62.99 | 62.11 | 0.88 | 1.42 | 49290.44 | 3.182028e+05 |
3032 | 002069.SZ | 20061009 | 63.00 | 66.15 | 62.16 | 65.00 | 62.99 | 2.01 | 3.19 | 28449.04 | 1.827514e+05 |
3031 | 002069.SZ | 20061010 | 64.89 | 71.50 | 64.00 | 71.49 | 65.00 | 6.49 | 9.98 | 34935.76 | 2.381439e+05 |
3030 | 002069.SZ | 20061011 | 70.20 | 71.80 | 69.22 | 69.99 | 71.49 | -1.50 | -2.10 | 15807.24 | 1.109196e+05 |
# 选取这几个属性作为输入特征
use_cols = ['open', 'high', 'low', 'close', 'pre_close', 'vol', 'amount']
df = df[use_cols]
df.head()
open | high | low | close | pre_close | vol | amount | |
---|---|---|---|---|---|---|---|
3034 | 60.89 | 64.48 | 58.00 | 62.11 | 25.00 | 169301.73 | 1.028809e+06 |
3033 | 64.10 | 67.00 | 62.48 | 62.99 | 62.11 | 49290.44 | 3.182028e+05 |
3032 | 63.00 | 66.15 | 62.16 | 65.00 | 62.99 | 28449.04 | 1.827514e+05 |
3031 | 64.89 | 71.50 | 64.00 | 71.49 | 65.00 | 34935.76 | 2.381439e+05 |
3030 | 70.20 | 71.80 | 69.22 | 69.99 | 71.49 | 15807.24 | 1.109196e+05 |
# 为了归一化后复现原来数据
close_min = df['close'].min()
close_max = df['close'].max()
# 归一化处理(0,1)
df=df.apply(lambda x:(x-min(x))/(max(x)-min(x)))
df.head()
open | high | low | close | pre_close | vol | amount | |
---|---|---|---|---|---|---|---|
3034 | 0.401512 | 0.416768 | 0.404243 | 0.401345 | 0.151699 | 0.166571 | 0.800115 |
3033 | 0.423566 | 0.433710 | 0.436792 | 0.407265 | 0.401345 | 0.047720 | 0.247007 |
3032 | 0.416008 | 0.427995 | 0.434467 | 0.420787 | 0.407265 | 0.027080 | 0.141577 |
3031 | 0.428993 | 0.463964 | 0.447835 | 0.464447 | 0.420787 | 0.033504 | 0.184693 |
3030 | 0.465476 | 0.465981 | 0.485760 | 0.454356 | 0.464447 | 0.014561 | 0.085666 |
import numpy as np
# 序列长度为30,即用前一个月的数据预测之后一天的数据
sequence = 30
X = []
Y = []
for i in range(df.shape[0]-sequence):
# 选择use_cols作为特征
X.append(np.array(df.iloc[i:(i+sequence),].values, dtype=np.float))
# 选择close作为标签输出
Y.append(np.array(df.iloc[(i+sequence),3],dtype=np.float))
# 划分训练集和测试集
trainx, trainy = X[:int(0.8*df.shape[0])], Y[:int(0.8*df.shape[0])]
testx, testy = X[int(0.8*df.shape[0]):], Y[int(0.8*df.shape[0]):]
print(len(trainx))
print(len(testx))
2428
577
import torch
import torch.utils.data as Data
torch.manual_seed(1)
<torch._C.Generator at 0x271cfb1a110>
# list -> numpy trainx = np.array(trainx) trainy = np.array(trainy) testx = np.array(testx) testy = np.array(testy) # numpy -> torch trainx = torch.from_numpy(trainx) trainy = torch.from_numpy(trainy) testx = torch.from_numpy(testx) testy = torch.from_numpy(testy) print('trainx size: ', trainx.size()) print('trainy size: ', trainy.size()) print('testx size: ', testx.size()) print('testy size: ', testy.size())
trainx size: torch.Size([2428, 30, 7])
trainy size: torch.Size([2428])
testx size: torch.Size([577, 30, 7])
testy size: torch.Size([577])
# 批处理 batch的大小为32 train_dataset = Data.TensorDataset(trainx, trainy) test_dataset = Data.TensorDataset(testx, testy) train_loader = Data.DataLoader( dataset=train_dataset, batch_size=32, shuffle=True, num_workers=2 ) test_loader = Data.DataLoader( dataset=test_dataset, batch_size=32, shuffle=True, num_workers=2 )
input_size = 7 seq_len = 30 hidden_size = 32 output_size = 1 import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable class MyNet(nn.Module): def __init__(self, input_size=input_size, hidden_size=hidden_size, output_size=output_size): super(MyNet, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True) self.fc = nn.Linear(self.hidden_size*seq_len, self.output_size) def forward(self, input): out,_ = self.lstm(input) b, s, h = out.size() out = self.fc(out.reshape(b, s*h)) return out net = MyNet() print(net)
MyNet(
(lstm): LSTM(7, 32, batch_first=True)
(fc): Linear(in_features=960, out_features=1, bias=True)
)
import torch.optim as optim from tqdm import tqdm loss_function = nn.MSELoss() optimizer = optim.Adam(net.parameters(), lr=0.001) for epoch in tqdm(range(100)): total_loss = 0 for _,(data, label) in enumerate(train_loader): data = Variable(data).float() pred = net(data) label = Variable(label).float() label = label.unsqueeze(1) loss = loss_function(pred, label) loss.backward() optimizer.step() optimizer.zero_grad() total_loss += loss.item() if (epoch+1) % 10 == 0: print('Epoch: ', epoch+1, ' loss: ', total_loss)
10%|█ | 10/100 [00:17<02:32, 1.70s/it]Epoch: 10 loss: 0.03518655754669453
20%|██ | 20/100 [00:34<02:14, 1.68s/it]Epoch: 20 loss: 0.023873500191257335
30%|███ | 30/100 [00:53<02:11, 1.88s/it]Epoch: 30 loss: 0.02149457185441861
40%|████ | 40/100 [01:14<02:07, 2.13s/it]Epoch: 40 loss: 0.017912164659719565
50%|█████ | 50/100 [01:31<01:25, 1.72s/it]Epoch: 50 loss: 0.013031252356086043
60%|██████ | 60/100 [01:51<01:14, 1.87s/it]Epoch: 60 loss: 0.014165752041662927
70%|███████ | 70/100 [02:10<01:02, 2.09s/it]Epoch: 70 loss: 0.011516284157551127
80%|████████ | 80/100 [02:27<00:35, 1.79s/it]Epoch: 80 loss: 0.01033103184090578
90%|█████████ | 90/100 [02:47<00:17, 1.74s/it]Epoch: 90 loss: 0.010540407126427453
100%|██████████| 100/100 [03:07<00:00, 1.88s/it]Epoch: 100 loss: 0.01101792572899285
pred_list = []
label_list = []
for _, (data, label) in enumerate(test_loader):
data = Variable(data).float()
pred = net(data)
pred_list.extend(pred.data.squeeze(1).tolist())
label_list.extend(label.tolist())
pred_list[:5]
[0.003661651164293289,
0.004493666812777519,
0.017206232994794846,
0.004013188183307648,
-0.003268543630838394]
label_list[:5]
[0.005449041372351158,
0.007938109653548601,
0.015136226034308779,
0.00538176925664312,
0.0014127144298688192]
简单查看预测结果与真实值,发现相差不是特别明显
import matplotlib.pyplot as plt
plt.plot([i*(close_max-close_min)+close_min for i in pred_list[:100]] , label='pred')
plt.plot([i*(close_max-close_min)+close_min for i in label_list[:100]], label='real')
plt.title('Stock Forecast')
plt.legend()
plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。