赞
踩
最近突发奇想,要通过机器学习来优化时间序列的预测结果。
下面就是我通过使用BP神经网络优化版本的代码。
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- from sklearn.preprocessing import MinMaxScaler
- import tensorflow as tf
- from tensorflow.keras.models import Sequential
- from tensorflow.keras import regularizers
- from tensorflow.keras.layers import LSTM, Dense
- from tensorflow.keras.optimizers.schedules import ExponentialDecay
- from tensorflow.keras.layers import Dropout
-
- plt.rcParams['font.sans-serif'] = ['SimHei']
-
- # 数据读取
- file_path = '文件位置'
- data = pd.read_excel(file_path)
-
- # 数据预处理
- scaler = MinMaxScaler(feature_range=(0, 1))
- scaled_data = scaler.fit_transform(data[['需要预测的数据']])
-
- # 创建训练集和测试集
- train_size = int(len(scaled_data) * 0.8)
- train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]
-
-
- # 准备时间序列数据
- def create_dataset(data, look_back=1):
- X, Y = [], []
- for i in range(len(data) - look_back):
- X.append(data[i:(i + look_back), 0])
- Y.append(data[i + look_back, 0])
- return np.array(X), np.array(Y)
-
-
- look_back = 1 # 由于是单变量时间序列,简单的滞后1步
- X_train, y_train = create_dataset(train_data, look_back)
- X_test, y_test = create_dataset(test_data, look_back)
-
- # 重塑输入数据以适应LSTM层
- X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1])) # 这里应该是正确的
- X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1])) # 同上
-
- # 构建LSTM模型_通过对LSTM模型采用进一步改进
- model = Sequential()
- model.add(LSTM(units=100, return_sequences=True,
- input_shape=(1, 1), kernel_regularizer=regularizers.l2(0.001),
- recurrent_regularizer=regularizers.l2(0.001))) # L2正则化
- model.add(Dropout(0.2)) # 添加Dropout层
- model.add(LSTM(units=50, kernel_regularizer=regularizers.l2(0.001),
- recurrent_regularizer=regularizers.l2(0.001))) # 第二个LSTM层也加入正则化
- model.add(Dropout(0.2)) # 再次添加Dropout
- model.add(Dense(1))
-
- # 使用Adam优化器,并考虑学习率衰减
-
- initial_learning_rate = 0.001
- lr_schedule = ExponentialDecay(
- initial_learning_rate,
- decay_steps=100000,
- decay_rate=0.96,
- staircase=True)
-
- optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
-
- model.compile(loss='mean_squared_error', optimizer=optimizer)
- model.summary()
-
- # 训练模型时考虑使用EarlyStopping回调防止过拟合
- from tensorflow.keras.callbacks import EarlyStopping
-
- early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
-
- history = model.fit(X_train, y_train, epochs=300, batch_size=32, verbose=2,
- validation_split=0.1, callbacks=[early_stopping]) # 增加验证集和早停
-
- # 训练模型
- model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=2)
-
- # 预测
- train_predict = model.predict(X_train)
- test_predict = model.predict(X_test)
-
- # 转化维度
- print(y_train)
- print(y_test)
- y_train = y_train.reshape(-1, 1)
- y_test = y_test.reshape(-1, 1)
- print(y_train)
- print(y_test)
-
- # 反标准化
- train_predict = scaler.inverse_transform(train_predict)
- y_train = scaler.inverse_transform(y_train)
- test_predict = scaler.inverse_transform(test_predict)
- y_test = scaler.inverse_transform(y_test)
-
- # 计算预测误差
- train_mae = np.mean(np.abs(train_predict - y_train))
- test_mae = np.mean(np.abs(test_predict - y_test))
- print(f'Train Mean Absolute Error: {train_mae:.2f}')
- print(f'Test Mean Absolute Error: {test_mae:.2f}')
-
- # 生成与训练/测试数据长度匹配的索引
- # 索引调整正确,但注意train_index应该是基于原始数据长度减去look_back,因为create_dataset会消耗掉最后一个元素
- train_years = data['year'][:train_size]
- test_years = data['year'][train_size:]
-
- # 确保训练年份与训练目标长度一致
- train_years_for_plot = train_years[:-1]
- test_years_for_plot = test_years[:-1]
-
- # print(train_years_for_plot)
- # print(test_years_for_plot)
-
- # 计算测试集的误差
- test_errors = np.abs(test_predict - y_test)
- test_errors = test_errors.flatten()
-
- # 绘制预测值与实际值的对比图
- plt.figure(figsize=(12, 6))
- plt.plot(train_years_for_plot, y_train, label='训练真实值', linestyle='--')
- plt.plot(train_years_for_plot, train_predict.flatten(), label='训练预测值', linestyle=':')
- plt.plot(test_years_for_plot, y_test, label='测试真实值', marker='o')
- plt.plot(test_years_for_plot, test_predict.flatten(), label='预测真实值', marker='x')
-
- # 添加误差线
- plt.errorbar(test_years_for_plot, test_predict.flatten(), yerr=test_errors, fmt='none', ecolor='red', capsize=5,
- label='误差')
-
- plt.title('LSTM下真实与预测的(填上你的变量名称)')
- plt.xlabel('年份')
- plt.ylabel('预测变量名称')
- plt.legend()
- plt.show()
-
- # 打印每个测试点的误差
- for actual, predicted, error in zip(y_test, test_predict.flatten(), test_errors):
- print(f"Actual: {actual[0]:.2f}, Predicted: {predicted:.2f}, Error: {error:.2f}")
-
- # 预测未来5年
- last_years_data = data['预测变量名称'].values[-look_back:] # 这里已经是正确的
- last_years_scaled = scaler.transform(last_years_data.reshape(-1, 1))
- future_input = np.array([last_years_scaled[-1]]).reshape(1, 1, 1)
- future_prediction = []
-
- for _ in range(5):
- prediction = model.predict(future_input)[0]
- future_prediction.append(prediction)
- # 直接使用预测值进行下一次循环,不需要再次转换和拼接
- future_input = np.array([prediction]).reshape(1, 1, 1)
-
- future_prediction = scaler.inverse_transform(np.array(future_prediction).reshape(-1, 1))
-
- print("\nPredicted Income for the next 5 years:")
- for i, year in enumerate(range(2024, 2029)):
- print(f"Year {year}: Predicted Income {future_prediction[i][0]:.2f}")
上述代码中的‘year’为时间变量,如果你的为其他,别忘了修改
这个代码要想得到比较好的结果,需要进行多次执行才行。因为建立的这个LSTM是一个需要不断自行调试的过程。
希望这篇文章能够给你帮助。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。