当前位置:   article > 正文

基于机器学习和深度学习的时间序列分析和预测(Python)

基于机器学习和深度学习的时间序列分析和预测(Python)

时间序列数据与其它数据不同主要是因为时间序列数据在时间维度上存在依赖关系,这说明在时间序列数据当中过去的历史数据当中隐藏着一些时间序列数据固有的特性,例如,周期性、趋势性、不规则性等。时间序列预测便是通过不同的方法来捕捉这种规律来进行未来值的预测。近年来,随着各种技术的进步,时间序列数据的采集规模逐渐增加,采样频率也不断升高,采集特征也存在多维度等特点,如果依旧把时间序列当成是一维向量数据来进行处理的话,不仅带来数据维度灾难,还有可能造成数据信息丢失等问题,因此给多元时间序列预测问题带来了更大的挑战。

传统的时间序列预测方法大多是通过分析时间序列数据符合什么样的分布,或者说什么样的模型对此类数据拟合效果好等先验知识来建立数学模型,后进行参数求解来解决问题,然而随着近些年时间序列数据越来越呈现出大规模、复杂度高等特点,利用传统方法来对时序数据进行分析建模越来越难以达到一个让人理想的效果。深度学习擅长分析大量数据的内在联系,通过大量神经元的不同连接方式与计算方式,能够很容易的捕捉到是时间序列数据之间的周期、趋势和不规则性的联系,具有很强的适应性,因此,采用深度学习技术来对时间按序列数据经行预测的研究已经成为在时间序列处理领域的必然之路。

  1. # ============================================= Import Libraries ========================================
  2. import os
  3. import numpy as np
  4. import pandas as pd
  5. import seaborn as sns
  6. from ARX_Model import arx
  7. import statsmodels.api as sm
  8. from AR_Model import ar_model
  9. import matplotlib.pyplot as plt
  10. from ARIMA_Model import arima_model
  11. from Plot_Models import plot_models
  12. from Least_Squares import lest_squares
  13. from Normalize_Regression import normalize_regression
  14. from Sequences_Data import sequences_data
  15. from Test_Stationary import test_stationary
  16. from Auto_Correlation import auto_correlation
  17. from Linear_Regression import linear_regression
  18. from Xgboost_Regression import xgboost_regression
  19. from keras import models, layers
  20. from Random_Forest_Regression import random_forest_regression
  21. from Tree_Decision_Regression import tree_decision_regression
  22. # ======================================== Step 1: Load Data ==================================================
  23. os.system('cls')
  24. data = sm.datasets.sunspots.load_pandas() # df = pd.read_csv('monthly_milk_production.csv'), df.info(), X = df["Value"].values
  25. data = data.data["SUNACTIVITY"]
  26. # print('Shape of data \t', data.shape)
  27. # print('Original Dataset:\n', data.head())
  28. # print('Values:\n', data)
  29. # ================================ Step 2.1: Normalize Data (0-1) ================================================
  30. #data, normalize_modele = normalize_regression(data, type_normalize='MinMaxScaler', display_figure='on') # Type_Normalize: 'MinMaxScaler', 'normalize'
  31. # ================================ Step 2.2: Check Stationary Time Series ========================================
  32. #data = test_stationary(data, window=20)
  33. # ==================================== Step 3: Find the lags of AR and etc models ==============================
  34. #auto_correlation(data, nLags=10)
  35. # =========================== Step 4: Split Dataset intro Train and Test =======================================
  36. nLags = 3
  37. num_sample = 300
  38. mu = 0.000001
  39. Data_Lags = pd.DataFrame(np.zeros((len(data), nLags)))
  40. for i in range(0, nLags):
  41. Data_Lags[i] = data.shift(i + 1)
  42. Data_Lags = Data_Lags[nLags:]
  43. data = data[nLags:]
  44. Data_Lags.index = np.arange(0, len(Data_Lags), 1, dtype=int)
  45. data.index = np.arange(0, len(data), 1, dtype=int)
  46. train_size = int(len(data) * 0.8)
  47. # ================================= Step 5: Autoregressive and Automated Methods ===============================
  48. sns.set(style='white')
  49. fig, axs = plt.subplots(nrows=4, ncols=1, sharey='row', figsize=(16, 10))
  50. plot_models(data, [], [], axs, nLags, train_size, num_sample=num_sample, type_model='Actual_Data')
  51. # ------------------------------------------- Least Squares ---------------------------------------------------
  52. lest_squares(data, Data_Lags, train_size, axs, num_sample=num_sample)
  53. # -------------------------------------------- Auto-Regressive (AR) model --------------------------------------
  54. ar_model(data, train_size, axs, n_lags=nLags, num_sample=num_sample)
  55. # ------------------------------------------------ ARX --------------------------------------------------------
  56. arx(data, Data_Lags, train_size, axs, mu=mu, num_sample=num_sample)
  57. # ----------------------------- Auto-Regressive Integrated Moving Averages (ARIMA) -----------------------------
  58. arima_model(data, train_size, axs, order=(5, 1, (1, 1, 1, 1)), seasonal_order=(0, 0, 2, 12), num_sample=num_sample)
  59. # ======================================= Step 5: Machine Learning Models ======================================
  60. # ------------------------------------------- Linear Regression Model -----------------------------------------
  61. linear_regression(data, Data_Lags, train_size, axs, num_sample=num_sample)
  62. # ------------------------------------------ RandomForestRegressor Model ---------------------------------------
  63. random_forest_regression(data, Data_Lags, train_size, axs, n_estimators=100, max_features=nLags, num_sample=num_sample)
  64. # -------------------------------------------- Decision Tree Model ---------------------------------------------
  65. tree_decision_regression(data, Data_Lags, train_size, axs, max_depth=2, num_sample=num_sample)
  66. # ---------------------------------------------- xgboost -------------------------------------------------------
  67. xgboost_regression(data, Data_Lags, train_size, axs, n_estimators=1000, num_sample=num_sample)
  68. # ----------------------------------------------- LSTM model --------------------------------------------------
  69. train_x, train_y = sequences_data(np.array(data[:train_size]), nLags) # Convert to a time series dimension:[samples, nLags, n_features]
  70. test_x, test_y = sequences_data(np.array(data[train_size:]), nLags)
  71. mod = models.Sequential() # Build the model
  72. # mod.add(layers.ConvLSTM2D(filters=64, kernel_size=(1, 1), activation='relu', input_shape=(None, nLags))) # ConvLSTM2D
  73. # mod.add(layers.Flatten())
  74. mod.add(layers.LSTM(units=100, activation='tanh', input_shape=(None, nLags)))
  75. mod.add(layers.Dropout(rate=0.2))
  76. # mod.add(layers.LSTM(units=100, activation='tanh')) # Stacked LSTM
  77. # mod.add(layers.Bidirectional(layers.LSTM(units=100, activation='tanh'), input_shape=(None, 1))) # Bidirectional LSTM: forward and backward
  78. mod.add(layers.Dense(32))
  79. mod.add(layers.Dense(1)) # A Dense layer of 1 node is added in order to predict the label(Prediction of the next value)
  80. mod.compile(optimizer='adam', loss='mse')
  81. mod.fit(train_x, train_y, validation_data=(test_x, test_y), verbose=2, epochs=100)
  82. y_train_pred = pd.Series(mod.predict(train_x).ravel())
  83. y_test_pred = pd.Series(mod.predict(test_x).ravel())
  84. y_train_pred.index = np.arange(nLags, len(y_train_pred)+nLags, 1, dtype=int)
  85. y_test_pred.index = np.arange(train_size + nLags, len(data), 1, dtype=int)
  86. plot_models(data, y_train_pred, y_test_pred, axs, nLags, train_size, num_sample=num_sample, type_model='LSTM')
  87. # data_train = normalize.inverse_transform((np.array(data_train)).reshape(-1, 1))
  88. mod.summary(), plt.tight_layout(), plt.subplots_adjust(wspace=0, hspace=0.2), plt.show()

  1. 完整代码:mbd.pub/o/bread/ZpmWl5hx
  2. 担任《Mechanical System and Signal Processing》等审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/938795
推荐阅读
相关标签
  

闽ICP备14008679号