当前位置:   article > 正文

Python时间序列LSTM预测系列学习笔记(11)-多步预测_python n_lag = n_seq = n_test = n_epochs = n_batch

python n_lag = n_seq = n_test = n_epochs = n_batch = n_neurons =

本文是对:

https://machinelearningmastery.com/multi-step-time-series-forecasting-long-short-term-memory-networks-python/

https://blog.csdn.net/iyangdi/article/details/77895186

博文的学习笔记,博主笔风都很浪,有些细节一笔带过,本人以谦逊的态度进行了学习和整理,笔记内容都在代码的注释中。有不清楚的可以去原博主文中查看。

数据集下载:https://datamarket.com/data/set/22r0/sales-of-shampoo-over-a-three-year-period

后期我会补上我的github

源码地址:https://github.com/yangwohenmai/LSTM/tree/master/LSTM%E7%B3%BB%E5%88%97/Multi-Step%20LSTM%E9%A2%84%E6%B5%8B2

本文其实是iyangdi博主最后一篇LSTM的文章,后续没有继续进行连载,不过后面的课程我会继续通过对Jason Brownlee博士文章的学习上传上来

本文在上文的基础上,对真实数据进行了处理,进行了一次实战的多步预测

上一章节可能有人疑惑为什么数据预测出来都是横线,是因为那些数据都是没有意义的实验数据

本节中的数据是真实数据

代码分析写在了注释里

 

  1. from pandas import DataFrame
  2. from pandas import Series
  3. from pandas import concat
  4. from pandas import read_csv
  5. from pandas import datetime
  6. from sklearn.metrics import mean_squared_error
  7. from sklearn.preprocessing import MinMaxScaler
  8. from keras.models import Sequential
  9. from keras.layers import Dense
  10. from keras.layers import LSTM
  11. from math import sqrt
  12. from matplotlib import pyplot
  13. from numpy import array
  14. # 加载数据集
  15. def parser(x):
  16. return datetime.strptime(x, '%Y/%m/%d')
  17. # 将时间序列转换为监督类型的数据序列
  18. def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
  19. n_vars = 1 if type(data) is list else data.shape[1]
  20. df = DataFrame(data)
  21. cols, names = list(), list()
  22. # 这个for循环是用来输入列标题的 var1(t-1),var1(t),var1(t+1),var1(t+2)
  23. for i in range(n_in, 0, -1):
  24. cols.append(df.shift(i))
  25. names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
  26. # 转换为监督型数据的预测序列 每四个一组,对应 var1(t-1),var1(t),var1(t+1),var1(t+2)
  27. for i in range(0, n_out):
  28. cols.append(df.shift(-i))
  29. if i == 0:
  30. names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
  31. else:
  32. names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
  33. # 拼接数据
  34. agg = concat(cols, axis=1)
  35. agg.columns = names
  36. # 把null值转换为0
  37. if dropnan:
  38. agg.dropna(inplace=True)
  39. print(agg)
  40. return agg
  41. # 对传入的数列做差分操作,相邻两值相减
  42. def difference(dataset, interval=1):
  43. diff = list()
  44. for i in range(interval, len(dataset)):
  45. value = dataset[i] - dataset[i - interval]
  46. diff.append(value)
  47. return Series(diff)
  48. # 将序列转换为用于监督学习的训练和测试集
  49. def prepare_data(series, n_test, n_lag, n_seq):
  50. # 提取原始值
  51. raw_values = series.values
  52. # 将数据转换为静态的
  53. diff_series = difference(raw_values, 1)
  54. diff_values = diff_series.values
  55. diff_values = diff_values.reshape(len(diff_values), 1)
  56. # 重新调整数据为(-1,1)之间
  57. scaler = MinMaxScaler(feature_range=(-1, 1))
  58. scaled_values = scaler.fit_transform(diff_values)
  59. scaled_values = scaled_values.reshape(len(scaled_values), 1)
  60. # 转化为有监督的数据X,y
  61. supervised = series_to_supervised(scaled_values, n_lag, n_seq)
  62. supervised_values = supervised.values
  63. # 分割为测试数据和训练数据
  64. train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
  65. return scaler, train, test
  66. # 匹配LSTM网络训练数据
  67. def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons):
  68. # 重塑训练数据格式 [samples, timesteps, features]
  69. X, y = train[:, 0:n_lag], train[:, n_lag:]
  70. X = X.reshape(X.shape[0], 1, X.shape[1])
  71. # 配置一个LSTM神经网络,添加网络参数
  72. model = Sequential()
  73. model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
  74. model.add(Dense(y.shape[1]))
  75. model.compile(loss='mean_squared_error', optimizer='adam')
  76. # 调用网络,迭代数据对神经网络进行训练,最后输出训练好的网络模型
  77. for i in range(nb_epoch):
  78. model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False)
  79. model.reset_states()
  80. return model
  81. # 用LSTM做预测
  82. def forecast_lstm(model, X, n_batch):
  83. # 重构输入参数 [samples, timesteps, features]
  84. X = X.reshape(1, 1, len(X))
  85. # 开始预测
  86. forecast = model.predict(X, batch_size=n_batch)
  87. # 结果转换成数组
  88. return [x for x in forecast[0, :]]
  89. # 利用训练好的网络模型,对测试数据进行预测
  90. def make_forecasts(model, n_batch, train, test, n_lag, n_seq):
  91. forecasts = list()
  92. # 预测方式是用一个X值预测出后三步的Y值
  93. for i in range(len(test)):
  94. X, y = test[i, 0:n_lag], test[i, n_lag:]
  95. # 调用训练好的模型预测未来数据
  96. forecast = forecast_lstm(model, X, n_batch)
  97. # 将预测的数据保存
  98. forecasts.append(forecast)
  99. return forecasts
  100. # 对预测后的缩放值(-1,1)进行逆变换
  101. def inverse_difference(last_ob, forecast):
  102. # invert first forecast
  103. inverted = list()
  104. inverted.append(forecast[0] + last_ob)
  105. # propagate difference forecast using inverted first value
  106. for i in range(1, len(forecast)):
  107. inverted.append(forecast[i] + inverted[i - 1])
  108. return inverted
  109. # 对预测完成的数据进行逆变换
  110. def inverse_transform(series, forecasts, scaler, n_test):
  111. inverted = list()
  112. for i in range(len(forecasts)):
  113. # create array from forecast
  114. forecast = array(forecasts[i])
  115. forecast = forecast.reshape(1, len(forecast))
  116. # 将预测后的数据缩放逆转换
  117. inv_scale = scaler.inverse_transform(forecast)
  118. inv_scale = inv_scale[0, :]
  119. # invert differencing
  120. index = len(series) - n_test + i - 1
  121. last_ob = series.values[index]
  122. # 将预测后的数据差值逆转换
  123. inv_diff = inverse_difference(last_ob, inv_scale)
  124. # 保存数据
  125. inverted.append(inv_diff)
  126. return inverted
  127. # 评估每个预测时间步的RMSE
  128. def evaluate_forecasts(test, forecasts, n_lag, n_seq):
  129. for i in range(n_seq):
  130. actual = [row[i] for row in test]
  131. predicted = [forecast[i] for forecast in forecasts]
  132. rmse = sqrt(mean_squared_error(actual, predicted))
  133. print('t+%d RMSE: %f' % ((i + 1), rmse))
  134. # 在原始数据集的上下文中绘制预测图
  135. def plot_forecasts(series, forecasts, n_test):
  136. # plot the entire dataset in blue
  137. pyplot.plot(series.values)
  138. # plot the forecasts in red
  139. for i in range(len(forecasts)):
  140. off_s = len(series) - n_test + i - 1
  141. off_e = off_s + len(forecasts[i]) + 1
  142. xaxis = [x for x in range(off_s, off_e)]
  143. yaxis = [series.values[off_s]] + forecasts[i]
  144. pyplot.plot(xaxis, yaxis, color='red')
  145. # show the plot
  146. pyplot.show()
  147. # 加载数据
  148. series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
  149. # 配置网络信息
  150. n_lag = 1
  151. n_seq = 3
  152. n_test = 10
  153. n_epochs = 1500
  154. n_batch = 1
  155. n_neurons = 1
  156. # 准备数据
  157. scaler, train, test = prepare_data(series, n_test, n_lag, n_seq)
  158. # 准备预测模型
  159. model = fit_lstm(train, n_lag, n_seq, n_batch, n_epochs, n_neurons)
  160. # 开始预测
  161. forecasts = make_forecasts(model, n_batch, train, test, n_lag, n_seq)
  162. # 逆转换训练数据和预测数据
  163. forecasts = inverse_transform(series, forecasts, scaler, n_test + 2)
  164. # 逆转换测试数据
  165. actual = [row[n_lag:] for row in test]
  166. actual = inverse_transform(series, actual, scaler, n_test + 2)
  167. # 比较预测数据和测试数据,计算两者之间的损失值
  168. evaluate_forecasts(actual, forecasts, n_lag, n_seq)
  169. # 画图
  170. plot_forecasts(series, forecasts, n_test + 2)

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/134897
推荐阅读
相关标签
  

闽ICP备14008679号