当前位置:   article > 正文

机器学习lgbm时间序列预测实战

机器学习lgbm时间序列预测实战

 

 

完整代码

  1. from sklearn import preprocessing
  2. import random
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.preprocessing import MinMaxScaler
  5. from sklearn import preprocessing
  6. from datetime import datetime
  7. import time
  8. import math
  9. from matplotlib import pyplot
  10. from sklearn.model_selection import train_test_split
  11. from scipy import stats, integrate
  12. import matplotlib.pyplot as plt
  13. from matplotlib.font_manager import FontProperties
  14. import numpy as np
  15. import pandas as pd
  16. import matplotlib.pyplot as plt
  17. from sklearn.model_selection import KFold
  18. from sklearn.metrics import classification_report
  19. from sklearn.model_selection import learning_curve
  20. from sklearn.model_selection import validation_curve
  21. from sklearn.model_selection import cross_val_score
  22. from sklearn.preprocessing import StandardScaler
  23. from sklearn.decomposition import PCA
  24. from sklearn.metrics import confusion_matrix
  25. from sklearn.model_selection import train_test_split
  26. from sklearn import metrics
  27. from sklearn.svm import SVR
  28. from sklearn.neighbors import KNeighborsRegressor
  29. from sklearn.metrics import mean_squared_error, mean_absolute_error # 评价指标
  30. from sklearn.linear_model import LogisticRegression
  31. from sklearn.ensemble import RandomForestRegressor
  32. from sklearn.ensemble import AdaBoostRegressor
  33. from xgboost import XGBRegressor
  34. from sklearn.ensemble import GradientBoostingRegressor
  35. from lightgbm import LGBMRegressor
  36. from collections import Counter
  37. #--------------------------------------------
  38. data=pd.read_csv('销售数据0321-8家店铺.csv',encoding='gbk')
  39. print(data.columns)
  40. my_dict=Counter(data['店铺'].values)
  41. sorted_items = sorted(my_dict.items(), key=lambda x: x[1],reverse=True)
  42. sorted_dict = {k: v for k, v in sorted_items}
  43. print(sorted_dict)
  44. # 根据条件筛选行
  45. data = data[data['店铺'] == 'AMZDELAHK'] # 例 AMZDELAHK
  46. # {'AMZUSLA': 53329, 'AMZUS3CZ': 30697, 'AMZUS5HZ': 30277, 'AMZDELAHK': 14695, 'AMZUS6PTXZ': 10742, 'AMZUS2XZ': 8676, 'AMZITLAHK': 4738, 'WayfairUSYZ': 3616}
  47. # 将时间列解析为日期时间格式
  48. data['订单日期'] = pd.to_datetime(data['订单日期'])
  49. print(data)
  50. # 筛选出特定时间段内的数据
  51. start_date = '2021-01-01'
  52. end_date = '2023-08-29'
  53. filtered_df = data[(data['订单日期'] >= start_date) & (data['订单日期'] <= end_date)]
  54. print(filtered_df)
  55. # 筛选出特定时间段内的数据
  56. start_date = '2023-09-01'
  57. end_date = '2023-10-01'
  58. filtered_df_test = data[(data['订单日期'] >= start_date) & (data['订单日期'] <= end_date)]
  59. print(filtered_df_test)
  60. # 训练数据
  61. data_x=[]
  62. data_y=[]
  63. # 输入时序长度:
  64. squence=28 # 7 14 28
  65. filtered_df=filtered_df['店铺SKU_销量'].values
  66. for i in range(0,len(filtered_df)-squence-1,1):
  67. data_x.append(filtered_df[i:i+squence])
  68. data_y.append(filtered_df[i+squence])
  69. # 测试数据
  70. test_x=[]
  71. test_y=[]
  72. # 输入时序长度:
  73. filtered_df_test=filtered_df_test['店铺SKU_销量'].values
  74. for i in range(0,len(filtered_df_test)-squence-1,1):
  75. test_x.append(filtered_df_test[i:i+squence])
  76. test_y.append(filtered_df_test[i+squence])
  77. test_x=np.array(test_x)
  78. print(test_x.shape)
  79. # -----------------------------------------------------------
  80. x_train, x_test, y_train, y_test = train_test_split(np.array(data_x), np.array(data_y), test_size=0.99,shuffle=False,random_state=1)
  81. print('x_train.shape',x_train.shape)
  82. print('x_test.shape',x_test.shape)
  83. # 集成学习模型
  84. # svm算法
  85. from sklearn.linear_model import LassoLarsIC as LR#逻辑回归
  86. svm = LGBMRegressor()
  87. svm.fit(x_train,y_train)
  88. svm_pred = svm.predict(test_x)
  89. from metra import metric
  90. mae, mse, rmse, mape, mspe,r2=metric(np.array(svm_pred), np.array(test_y))
  91. print('mae, mse, rmse, mape, mspe')
  92. print(mae, mse, rmse, mape, mspe)
  93. # 设置Seaborn样式
  94. import seaborn as sns
  95. import matplotlib.pyplot as plt
  96. sns.set(style="darkgrid")
  97. x = range(len(test_y))
  98. data = pd.DataFrame({'x': x, 'y_pred': svm_pred, 'y_true': test_y})
  99. # 绘制y_pred的折线图
  100. sns.lineplot(x='x', y='y_pred', data=data, linewidth=1, label='y_pred')
  101. # 绘制y_true的折线图
  102. sns.lineplot(x='x', y='y_true', data=data, linewidth=1, label='y_true')
  103. # 添加标题和标签
  104. plt.title('Prediction vs True')
  105. plt.xlabel('Date')
  106. plt.ylabel('Values')
  107. plt.savefig('预测1.png')
  108. # 显示图形
  109. plt.show()

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/309065?site
推荐阅读
相关标签
  

闽ICP备14008679号