赞
踩
相关链接:
【ai竞赛系列】新冠疫情预测–BiLSTM未来数值预测
【ai竞赛系列】新冠疫情预测–堆叠LSTM未来数值预测
【ai竞赛系列】新冠疫情预测–卷积LSTM未来数值预测
第一次参加竞赛,评测新冠疫情未来预测。
给该地区历史几个月的新冠新增数据,然后预测未来一周的新冠。
官方给的历史数据如下:
后面采用的是LSTM进行预测,PyTorch实现:
class LSTMpred(nn.Module): def __init__(self, input_size, hidden_dim): super(LSTMpred, self).__init__() self.input_dim = input_size self.hidden_dim = hidden_dim self.lstm = nn.LSTM(input_size, hidden_dim) self.hidden2out = nn.Linear(hidden_dim, 1) self.hidden = self.init_hidden() def init_hidden(self): return (Variable(torch.zeros(1, 1, self.hidden_dim)), Variable(torch.zeros(1, 1, self.hidden_dim))) def forward(self, seq): lstm_out, self.hidden = self.lstm( seq.view(len(seq), 1, -1), self.hidden) outdat = self.hidden2out(lstm_out.view(len(seq), -1)) return outdat
然后我们输入一个时间序列,然后输出一个值或者多个值;
为此训练集需要分别做训练数据和标签两个时序数据;
# sep:原始数据, k:周期序列长度, gap:周期序列间隔
def trainDataGen_gap0(seq,k,gap):
dat = list()
L = len(seq)
for i in range(L-k-gap):
indat = list()
outdat = list()
count = gap+1
for j in range(k):
if count % (gap+1) == 0:
indat.append(seq[i+j])
outdat.append(seq[i+j+1+gap])
count+= 1
dat.append((indat,outdat))
return dat
最简单的时序预测方式就是连续数据预测,比如用前面四个值预测后面一个值,但这个方式对预测未来连续数据时缺过去简单,呈线性方式。
举例说明:
1~10为已知序列,预测11–15的未来连续数据,采用前四个数据推导后一个数据的方式:
7,8,9,10 --> (11)
8,9,10,(11) --> (12)
9, 10, (11), (12) --> (13)
10, (11), (12), (13) --> (14)
(11), (12), (13), (14) --> (15)
很容易看出,这样后面数值的预测会有越来越多的预测数据作为推导数据参与,其结果很逐渐呈线性。
比如采用间隔为1的序列推导:
3, 5, 7, 9 --> (11)
4, 6, 8, 10 --> (12)
5, 7, 9, (11) --> (13)
6, 8, 10, (12) --> (14)
7, 9, (11), (13) --> (15)
这样,每个未来的数据的推导,都会有更多真实数据的参与;
而且可能挖掘出一些特定时间间隔所蕴含的特征;
最后,我们用多种时间序列训练多个模型得到多个结果,最后再把多个结果进行综合。
完整代码:
import torch import torch.nn as nn import torch.optim as optim from torch.autograd import Variable import matplotlib.pyplot as plt import numpy as np import get_data ### LSTM 实现时间序列预测 # 预设变量 xl_index = 3 # 读取xl文件的序列 continuous_seq_10_length = 10 #连续周期为10的序列长度 continuous_seq_5_length = 5 #连续周期为5的序列长度 continuous_seq_3_length = 3 #连续周期为3的序列长度 gap_1_seq_5_length = 3 #间隔为1周期为5的序列长度 gap_1_seq_9_length = 5 #间隔为1周期为9的序列长度 gap_2_seq_7_length = 3 #间隔为2周期为7的序列长度 gap_3_seq_9_length = 3 #间隔为3周期为9的序列长度 gap_6_seq_21_length = 3 #间隔一周的序列长度 def SeriesGen(N): x = torch.arange(1,N,0.01) print(len(x)) return torch.sin(x) # sep:原始数据, k:周期序列长度, gap:周期序列间隔 def trainDataGen_gap0(seq,k,gap): dat = list() L = len(seq) for i in range(L-k-gap): indat = list() outdat = list() count = gap+1 for j in range(k): if count % (gap+1) == 0: indat.append(seq[i+j]) outdat.append(seq[i+j+1+gap]) count+= 1 dat.append((indat,outdat)) return dat def ToVariable(x): tmp = torch.FloatTensor(x) return Variable(tmp) class LSTMpred(nn.Module): def __init__(self, input_size, hidden_dim): super(LSTMpred, self).__init__() self.input_dim = input_size self.hidden_dim = hidden_dim self.lstm = nn.LSTM(input_size, hidden_dim) self.hidden2out = nn.Linear(hidden_dim, 1) self.hidden = self.init_hidden() def init_hidden(self): return (Variable(torch.zeros(1, 1, self.hidden_dim)), Variable(torch.zeros(1, 1, self.hidden_dim))) def forward(self, seq): lstm_out, self.hidden = self.lstm( seq.view(len(seq), 1, -1), self.hidden) outdat = self.hidden2out(lstm_out.view(len(seq), -1)) return outdat # 读取数据 case_count = get_data.get_xldata(xl_index) # 归一化处理 max_v = max(case_count) new_case_count = [x/(max_v) for x in case_count] # 生成序列文件 y = torch.tensor(new_case_count) dat_10_0 = trainDataGen_gap0(y.numpy(),10,0) dat_5_0 = trainDataGen_gap0(y.numpy(),5,0) dat_3_0 = trainDataGen_gap0(y.numpy(),3,0) dat_5_1 = trainDataGen_gap0(y.numpy(),5,1) dat_9_1 = trainDataGen_gap0(y.numpy(),9,1) dat_7_2 = trainDataGen_gap0(y.numpy(),7,2) dat_9_3 = trainDataGen_gap0(y.numpy(),9,3) # 模型训练参数设置 model_10_0 = LSTMpred(1,6) # 输入层和隐藏层尺寸 model_5_0 = LSTMpred(1,6) # 输入层和隐藏层尺寸 model_3_0 = LSTMpred(1,6) # 输入层和隐藏层尺寸 model_5_1 = LSTMpred(1,6) # 输入层和隐藏层尺寸 model_9_1 = LSTMpred(1,6) # 输入层和隐藏层尺寸 model_7_2 = LSTMpred(1,6) # 输入层和隐藏层尺寸 model_9_3 = LSTMpred(1,6) # 输入层和隐藏层尺寸 loss_function = nn.MSELoss() # 损失函数 optimizer_10_0 = optim.SGD(model_10_0.parameters(), lr=0.01) # 优化器设置,SGD方法,学习率0.01 optimizer_5_0 = optim.SGD(model_5_0.parameters(), lr=0.01) # 优化器设置,SGD方法,学习率0.01 optimizer_3_0 = optim.SGD(model_3_0.parameters(), lr=0.01) # 优化器设置,SGD方法,学习率0.01 optimizer_5_1 = optim.SGD(model_5_1.parameters(), lr=0.01) # 优化器设置,SGD方法,学习率0.01 optimizer_9_1 = optim.SGD(model_9_1.parameters(), lr=0.01) # 优化器设置,SGD方法,学习率0.01 optimizer_7_2 = optim.SGD(model_7_2.parameters(), lr=0.01) # 优化器设置,SGD方法,学习率0.01 optimizer_9_3 = optim.SGD(model_9_3.parameters(), lr=0.01) # 优化器设置,SGD方法,学习率0.01 num_epochs = 400 # 迭代次数 for epoch in range(num_epochs): print("迭代训练:",epoch) loss_sum = 0.0 for seq, outs in dat_10_0[:]: seq = ToVariable(seq) # 转 tensor outs = ToVariable(outs) # 转 tensor optimizer_10_0.zero_grad() # 清空梯度 model_10_0.hidden = model_10_0.init_hidden() modout = model_10_0(seq) # 模型计算 loss = loss_function(modout, outs) # 计算loss loss_sum += loss loss.backward() optimizer_10_0.step() # if epoch % 20 == 0 and epoch > 0: # loss = loss_sum / len(dat) # print("迭代训练",epoch,":",loss) for seq, outs in dat_5_0[:]: seq = ToVariable(seq) # 转 tensor outs = ToVariable(outs) # 转 tensor optimizer_5_0.zero_grad() # 清空梯度 model_5_0.hidden = model_5_0.init_hidden() modout = model_5_0(seq) # 模型计算 loss = loss_function(modout, outs) # 计算loss loss_sum += loss loss.backward() optimizer_5_0.step() for seq, outs in dat_3_0[:]: seq = ToVariable(seq) # 转 tensor outs = ToVariable(outs) # 转 tensor optimizer_3_0.zero_grad() # 清空梯度 model_3_0.hidden = model_3_0.init_hidden() modout = model_3_0(seq) # 模型计算 loss = loss_function(modout, outs) # 计算loss loss_sum += loss loss.backward() optimizer_3_0.step() for seq, outs in dat_5_1[:]: seq = ToVariable(seq) # 转 tensor outs = ToVariable(outs) # 转 tensor optimizer_5_1.zero_grad() # 清空梯度 model_5_1.hidden = model_5_1.init_hidden() modout = model_5_1(seq) # 模型计算 loss = loss_function(modout, outs) # 计算loss loss_sum += loss loss.backward() optimizer_5_1.step() for seq, outs in dat_9_1[:]: seq = ToVariable(seq) # 转 tensor outs = ToVariable(outs) # 转 tensor optimizer_9_1.zero_grad() # 清空梯度 model_9_1.hidden = model_9_1.init_hidden() modout = model_9_1(seq) # 模型计算 loss = loss_function(modout, outs) # 计算loss loss_sum += loss loss.backward() optimizer_9_1.step() for seq, outs in dat_7_2[:]: seq = ToVariable(seq) # 转 tensor outs = ToVariable(outs) # 转 tensor optimizer_7_2.zero_grad() # 清空梯度 model_7_2.hidden = model_7_2.init_hidden() modout = model_7_2(seq) # 模型计算 loss = loss_function(modout, outs) # 计算loss loss_sum += loss loss.backward() optimizer_7_2.step() for seq, outs in dat_9_3[:]: seq = ToVariable(seq) # 转 tensor outs = ToVariable(outs) # 转 tensor optimizer_9_3.zero_grad() # 清空梯度 model_9_3.hidden = model_9_3.init_hidden() modout = model_9_3(seq) # 模型计算 loss = loss_function(modout, outs) # 计算loss loss_sum += loss loss.backward() optimizer_9_3.step() # 预测现有数据 predDat_10_0 = list() for seq, trueVal in dat_10_0[:]: #print("seq=",seq) #print("trueval=",trueVal) seq = ToVariable(seq) trueVal = ToVariable(trueVal) modout = model_10_0(seq) pre = modout[-1].data.numpy()[0] #从 index = 5 开始 #print(pre) loss = loss_function(modout, outs) loss_sum += loss predDat_10_0.append(model_10_0(seq)[-1].data.numpy()[0]) predDat_5_0 = list() for seq, trueVal in dat_5_0[:]: seq = ToVariable(seq) trueVal = ToVariable(trueVal) modout = model_5_0(seq) pre = modout[-1].data.numpy()[0] #从 index = 5 开始 loss = loss_function(modout, outs) loss_sum += loss predDat_5_0.append(model_5_0(seq)[-1].data.numpy()[0]) predDat_3_0 = list() for seq, trueVal in dat_3_0[:]: seq = ToVariable(seq) trueVal = ToVariable(trueVal) modout = model_3_0(seq) pre = modout[-1].data.numpy()[0] #从 index = 5 开始 loss = loss_function(modout, outs) loss_sum += loss predDat_3_0.append(model_3_0(seq)[-1].data.numpy()[0]) predDat_5_1 = list() for seq, trueVal in dat_5_1[:]: seq = ToVariable(seq) trueVal = ToVariable(trueVal) modout = model_5_1(seq) pre = modout[-1].data.numpy()[0] #从 index = 5 开始 loss = loss_function(modout, outs) loss_sum += loss predDat_5_1.append(model_5_1(seq)[-1].data.numpy()[0]) predDat_9_1 = list() for seq, trueVal in dat_9_1[:]: seq = ToVariable(seq) trueVal = ToVariable(trueVal) modout = model_9_1(seq) pre = modout[-1].data.numpy()[0] #从 index = 5 开始 loss = loss_function(modout, outs) loss_sum += loss predDat_9_1.append(model_9_1(seq)[-1].data.numpy()[0]) predDat_7_2 = list() for seq, trueVal in dat_7_2[:]: seq = ToVariable(seq) trueVal = ToVariable(trueVal) modout = model_7_2(seq) pre = modout[-1].data.numpy()[0] #从 index = 5 开始 loss = loss_function(modout, outs) loss_sum += loss predDat_7_2.append(model_7_2(seq)[-1].data.numpy()[0]) predDat_9_3 = list() for seq, trueVal in dat_9_3[:]: seq = ToVariable(seq) trueVal = ToVariable(trueVal) modout = model_9_3(seq) pre = modout[-1].data.numpy()[0] #从 index = 5 开始 loss = loss_function(modout, outs) loss_sum += loss predDat_9_3.append(model_9_3(seq)[-1].data.numpy()[0]) print("10_0:",len(dat_10_0),"; 5_0:",len(dat_5_0),"; 3_0:",len(dat_3_0), "; 5_1:",len(dat_5_1),"; 9_1:",len(dat_9_1),"; 7_2:",len(dat_7_2),"; 9_3:",len(dat_9_3)) # 制作测试标签 t_labels = y.numpy()[12:] t_dat_10_0, t_dat_5_0 , t_dat_3_0, t_dat_5_1, t_dat_9_1, t_dat_7_2, t_dat_9_3 \ = list(),list(),list(),list(),list(),list(),list() #加载各个模型的预测值 for val,_ in dat_10_0[12-10:]: val = ToVariable(val) modout = model_10_0(val) pre = modout[-1].data.numpy()[0] t_dat_10_0.append(pre) for val,_ in dat_5_0[12-5:]: val = ToVariable(val) modout = model_5_0(val) pre = modout[-1].data.numpy()[0] t_dat_5_0.append(pre) for val,_ in dat_3_0[12-3:]: val = ToVariable(val) modout = model_3_0(val) pre = modout[-1].data.numpy()[0] t_dat_3_0.append(pre) for val,_ in dat_5_1[12-6:]: val = ToVariable(val) modout = model_5_1(val) pre = modout[-1].data.numpy()[0] t_dat_5_1.append(pre) for val, _ in dat_9_1[12 - 10:]: val = ToVariable(val) modout = model_9_1(val) pre = modout[-1].data.numpy()[0] t_dat_9_1.append(pre) for val, _ in dat_7_2[12 - 9:]: val = ToVariable(val) modout = model_7_2(val) pre = modout[-1].data.numpy()[0] t_dat_7_2.append(pre) for val, _ in dat_9_3[12 - 12:]: val = ToVariable(val) modout = model_9_3(val) pre = modout[-1].data.numpy()[0] t_dat_9_3.append(pre) wl_10_0,wl_5_0,wl_3_0,wl_5_1,wl_9_1,wl_7_2,wl_9_3 = list(),list(),list(),list(),list(),list(),list() X_list=list() for i in range(len(t_dat_9_3)): # 求平均值 # tmp = 0.0 # tmp += t_dat_10_0[i] # tmp += t_dat_5_0[i] # tmp += t_dat_3_0[i] # tmp += t_dat_5_1[i] # tmp += t_dat_9_1[i] # tmp += t_dat_7_2[i] # tmp += t_dat_9_3[i] # X_list.append(tmp/7) # 根据误差获取权重 loss_10_0 = 1/abs(t_dat_10_0[i] - t_labels[i]) loss_5_0 = 1 / abs(t_dat_5_0[i] - t_labels[i]) loss_3_0 = 1 / abs(t_dat_3_0[i] - t_labels[i]) loss_5_1 = 1 / abs(t_dat_5_1[i] - t_labels[i]) loss_9_1 = 1 / abs(t_dat_9_1[i] - t_labels[i]) loss_7_2 = 1 / abs(t_dat_7_2[i] - t_labels[i]) loss_9_3 = 1 / abs(t_dat_9_3[i] - t_labels[i]) loss_sum = loss_10_0+loss_5_0+loss_3_0+loss_5_1+loss_9_1+loss_7_2+loss_9_3 wl_10_0.append(loss_10_0/loss_sum) wl_5_0.append(loss_5_0/loss_sum) wl_3_0.append(loss_3_0/loss_sum) wl_5_1.append(loss_5_1/loss_sum) wl_9_1.append(loss_9_1/loss_sum) wl_7_2.append(loss_7_2/loss_sum) wl_9_3.append(loss_9_3/loss_sum) # 求权重 w_10_0 = np.mean(wl_10_0) w_5_0 = np.mean(wl_5_0) w_3_0 = np.mean(wl_3_0) w_5_1 = np.mean(wl_5_1) w_9_1 = np.mean(wl_9_1) w_7_2 = np.mean(wl_7_2) w_9_3 = np.mean(wl_9_3) w_sum = w_10_0+w_5_0+w_3_0+w_5_1+w_9_1+w_7_2+w_9_3 w_10_0 = w_10_0 / w_sum w_5_0 = w_5_0 / w_sum w_3_0 = w_3_0 / w_sum w_5_1 = w_5_1 / w_sum w_9_1 = w_9_1 / w_sum w_7_2 = w_7_2 / w_sum w_9_3 = w_9_3 / w_sum # 预测现在值 for i in range(len(t_dat_9_3)): res = w_10_0 * t_dat_10_0[i] + w_5_0 * t_dat_5_0[i] + w_3_0 * t_dat_3_0[i] + w_5_1 * t_dat_5_1[i] + w_9_1 * t_dat_9_1[i] + w_7_2 * t_dat_7_2[i] + w_9_3 * t_dat_9_3[i] X_list.append(res) #预测未来数据 --采用连续模型 #预测未来数据 pre_day_count = 10 seq,trueVal = dat_10_0[-1] pred_10_0 = [] for i in range(pre_day_count): seq_val = ToVariable(seq) pre = model_10_0(seq_val)[-1].data.numpy()[0] pred_10_0.append(pre) seq=np.delete(seq, 0) seq=np.append(seq,pre) seq,trueVal = dat_5_0[-1] pred_5_0 = [] for i in range(pre_day_count): seq_val = ToVariable(seq) pre = model_5_0(seq_val)[-1].data.numpy()[0] pred_5_0.append(pre) seq=np.delete(seq, 0) seq=np.append(seq,pre) seq,trueVal = dat_3_0[-1] pred_3_0 = [] for i in range(pre_day_count): seq_val = ToVariable(seq) pre = model_3_0(seq_val)[-1].data.numpy()[0] pred_3_0.append(pre) seq=np.delete(seq, 0) seq=np.append(seq,pre) # 开始间隔一的预测 seq_1,trueVal_1 = dat_5_1[-1] seq,trueVal = dat_5_1[-2] pred_5_1 = [] for i in range(pre_day_count): seq_val = ToVariable(seq) pre = model_5_1(seq_val)[-1].data.numpy()[0] pred_5_1.append(pre) seq = np.delete(seq, 0) seq = np.append(seq, pre) tmp1,tmp2 = seq_1 ,trueVal_1 seq_1,trueVal_1 = seq,trueVal seq, trueVal = tmp1,tmp2 seq_1,trueVal_1 = dat_9_1[-1] seq,trueVal = dat_9_1[-2] pred_9_1 = [] for i in range(pre_day_count): seq_val = ToVariable(seq) pre = model_9_1(seq_val)[-1].data.numpy()[0] pred_9_1.append(pre) seq = np.delete(seq, 0) seq = np.append(seq, pre) tmp1,tmp2 = seq_1 ,trueVal_1 seq_1,trueVal_1 = seq,trueVal seq, trueVal = tmp1,tmp2 # 开始间隔2的预测 seq_2,trueVal_2 = dat_7_2[-1] seq_1,trueVal_1 = dat_7_2[-2] seq,trueVal = dat_7_2[-3] pred_7_2 = [] for i in range(pre_day_count): seq_val = ToVariable(seq) pre = model_7_2(seq_val)[-1].data.numpy()[0] pred_7_2.append(pre) seq = np.delete(seq, 0) seq = np.append(seq, pre) tmp1,tmp2 = seq_2,trueVal_2 seq_2,trueVal_2 = seq_1,trueVal_1 seq_1, trueVal_1 = seq, trueVal seq, trueVal = tmp1,tmp2 # 开始间隔3的预测 seq_3,trueVal_3 = dat_9_3[-1] seq_2,trueVal_2 = dat_9_3[-2] seq_1,trueVal_1 = dat_9_3[-3] seq,trueVal = dat_9_3[-4] pred_9_3 = [] for i in range(pre_day_count): seq_val = ToVariable(seq) pre = model_9_3(seq_val)[-1].data.numpy()[0] pred_9_3.append(pre) seq = np.delete(seq, 0) seq = np.append(seq, pre) tmp1,tmp2 = seq_3,trueVal_3 seq_3, trueVal_3 = seq_2,trueVal_2 seq_2,trueVal_2 = seq_1,trueVal_1 seq_1, trueVal_1 = seq, trueVal seq, trueVal = tmp1,tmp2 final_pre = [] for i in range(pre_day_count): res = w_10_0*pred_10_0[i] + w_5_0*pred_5_0[i] + w_3_0*pred_3_0[i] + w_5_1*pred_5_1[i]+ w_9_1*pred_9_1[i] + w_7_2*pred_7_2[i] + w_9_3*pred_9_3[i] final_pre.append(res) X_list.append(res) print("pre:",res * max_v) # fig = plt.figure() true_y = [] true_pre= [] for val in y.tolist(): val *= max_v true_y.append(val) for val in X_list: val *= max_v true_pre.append(val) # print(true_pre) # 图像显示 plt.plot(true_y) plt.plot(range(len(true_y)-len(true_pre)+pre_day_count-1, len(true_y)+pre_day_count-1), true_pre) plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。