提出一种基于最大离散重叠小波变换和支持向量回归的金融时间序列预测方法,程序运行环境为Python或Jupyter Notebook,所用模块如下:

  1. import numpy as np
  2. import pandas as pd
  3. import copy
  4. import matplotlib.pyplot as plt
  5. from sklearn.model_selection import train_test_split
  6. from sklearn import svm
  7. from sklearn.metrics import mean_squared_error
  8. from numpy.lib.stride_tricks import sliding_window_view
  9. from modwt import modwt, modwtmra,imodwt


  1. #第一部分,使用原始时间序列的SVM + 滑动窗口
  2. #读取数据
  3. prices = pd.read_csv('Data/AUD-JPY-2003-2014-day.csv',delimiter=";", header=0, encoding='utf-8', parse_dates=['Date'])
  4. prices
  5. # 删除不使用的列
  6. prices.drop(["Open", "High", "Low"],axis = 1, inplace = True)
  7. #定义变量
  8. dates = prices['Date'].copy()
  9. closing_prices = prices['Close'].copy()
  10. #使用 matplotlib 绘制原始时间序列
  11. plt.subplots(figsize=(16,4))
  12. plt.plot(dates, closing_prices, label='Original series AUD-JPY 2003-2014')
  13. plt.legend(loc = 'best')
  14. plt.show()
  15. #SVM + 滑动窗口实现
  16. #实现滑动窗口
  17. def slideWindow(series, window_lenght = 2):
  18. _X, _Y = [], []
  19. #Auxiliary variable to store the sliding window combinations. We sum up +1 as we are taking the last values of Aux_window
  20. #as the output values of our time series
  21. aux_Window = sliding_window_view(series, window_lenght+1)
  22. #将第一个“window_lenght”值作为输入 (X),将最后一个值 (window_lenght+1) 作为输出 (Y)
  23. for i in range(len(aux_Window)):
  24. _Y.append(aux_Window[i][-1])
  25. _X.append(aux_Window[i][:-1])
  26. return _X, _Y
  27. window_lenght = 2
  28. #调用滑动窗函数
  29. X, Y = slideWindow(closing_prices,window_lenght)
  30. #25% 的数据用于测试 SVM
  31. idx_test_date = int(0.75*len(Y)) + window_lenght
  32. df = pd.DataFrame(columns = ['test_date'])
  33. df['test_date'] = prices['Date'].iloc[idx_test_date:]
  34. ##Splitting and plotting test data
  35. #拆分和绘制测试数据,将数据拆分为训练数据(75%)和测试数据(25%)
  36. #shuffle = False 表示不是随机打乱数据,而是要保持有序
  37. x_train,x_test,y_train,y_test = train_test_split(X, Y, test_size=0.25, random_state=None, shuffle=False)
  38. fig, ax = plt.subplots(2,1,figsize=(16,8))
  39. ax[0].plot(dates, closing_prices, label='Original')
  40. ax[0].plot(df['test_date'], y_test, label='Values to test the model out',color='orange')
  41. ax[1].plot(df['test_date'], y_test, label='Values to test the model out',color='orange')
  42. ax[0].legend(loc = 'best')
  43. ax[1].legend(loc = 'best')
  44. plt.show()
  45. #构建SVR
  46. def evaluateSVR(_x_train,_y_train,_x_test,_y_test, kernel = 'rbf'):
  47. if (kernel == 'rbf'):
  48. clf = svm.SVR(kernel ='rbf', C=1e3, gamma=0.1)
  49. elif (kernel == 'poly'):
  50. clf = svm.SVR(kernel ='poly', C=1e3, degree=2)
  51. else:
  52. clf = svm.SVR(kernel ='linear',C=1e3)
  53. _y_predict = clf.fit(_x_train,_y_train).predict(_x_test)
  54. return _y_predict
  55. y_predict = evaluateSVR(x_train,y_train,x_test,y_test)
  56. plotValuesWt = y_test.copy()


