赞
踩
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import plotly.express as px
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import mean_absolute_percentage_error
- import tensorflow as tf
- from keras import Model
- from keras.layers import Input, Dense, Dropout, LSTM
- df = pd.read_csv('Gold Price (2013-2023).csv' )
- df
- df.info()
- <class 'pandas.core.frame.DataFrame'>
- RangeIndex: 2583 entries, 0 to 2582
- Data columns (total 7 columns):
- # Column Non-Null Count Dtype
- --- ------ -------------- -----
- 0 Date 2583 non-null object
- 1 Price 2583 non-null object
- 2 Open 2583 non-null object
- 3 High 2583 non-null object
- 4 Low 2583 non-null object
- 5 Vol. 2578 non-null object
- 6 Change % 2583 non-null object
- dtypes: object(7)
- memory usage: 141.4+ KB
- df.drop(['Vol.', 'Change %'], axis=1, inplace=True)
- df
- # Convert the 'Date' column to datetime
- df['Date'] = pd.to_datetime(df['Date'])
-
-
- # Sort the DataFrame by the 'Date' column in ascending order
- df.sort_values(by='Date', ascending=True, inplace=True)
-
-
- # Reset the index of the DataFrame
- df.reset_index(drop=True, inplace=True)
- numCols = df.columns.drop('Date')
- df[numCols] = df[numCols].replace({',': ''}, regex=True)
- df[numCols] = df[numCols].astype('float64')
- df.head()
- df.duplicated().sum()
- df.isnull().sum()
- Date 0
- Price 0
- Open 0
- High 0
- Low 0
- dtype: int64
- import plotly.express as px
-
-
- fig = px.line(y=df['Price'], x=df['Date'])
- fig.update_traces(line_color='black')
- fig.update_layout(
- xaxis_title='Date',
- yaxis_title='Price',
- title={
- 'text': 'Gold Price Data',
- 'y': 0.95,
- 'x': 0.5,
- 'xanchor': 'center',
- 'yanchor': 'top'
- },
- plot_bgcolor='rgba(255,223,0,0.9)'
- )
- fig.show()
- test_size = df[df.Date.dt.year == 2022].shape[0]
- print(test_size)
260
- import matplotlib.pyplot as plt
-
-
- plt.figure(figsize=(15, 6), dpi=150)
- plt.rcParams['axes.facecolor'] = 'cyan'
- plt.rc('axes', edgecolor='white')
-
-
- plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
- plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)
-
-
- plt.title('Gold Price Train and Test', fontsize=15)
- plt.xlabel('Date', fontsize=12)
- plt.ylabel('Price', fontsize=12)
- plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15})
- plt.grid(color='white')
-
-
- plt.show()
- scaler = MinMaxScaler()
- scaler.fit(df.Price.values.reshape(-1, 1))
- MinMaxScaler()
- window_size = 60
- train_data = df.Price[:-test_size]
- train_data = scaler.fit_transform(train_data.values.reshape(-1, 1))
- window_size = 60
- X_train = []
- y_train = []
-
-
- for i in range(window_size, len(train_data)):
- X_train.append(train_data[i-window_size:i, 0])
- y_train.append(train_data[i, 0])
- test_data = df.Price[-test_size-window_size:]
- test_data = scaler.transform(test_data.values.reshape(-1, 1))
- X_test = []
- y_test = []
-
-
- for i in range(window_size, len(test_data)):
- X_test.append(test_data[i-window_size:i, 0])
- y_test.append(test_data[i, 0])
- X_train = np.array(X_train)
- X_test = np.array(X_test)
- y_train = np.array(y_train)
- y_test = np.array(y_test)
-
-
- X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
- X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
- y_train = np.reshape(y_train, (-1, 1))
- y_test = np.reshape(y_test, (-1, 1))
- print('X_train shape:', X_train.shape)
- print('y_train shape:', y_train.shape)
- print('X_test shape:', X_test.shape)
- print('y_test shape:', y_test.shape)
- X_train shape: (2263, 60, 1)
- y_train shape: (2263, 1)
- X_test shape: (260, 60, 1)
- y_test shape: (260, 1)
- import tensorflow as tf
-
-
- def define_model():
- input1 = Input(shape=(window_size, 1))
- x = tf.keras.layers.LSTM(units=64, return_sequences=True)(input1)
- x = tf.keras.layers.Dropout(0.2)(x)
- x = tf.keras.layers.LSTM(units=64, return_sequences=True)(x)
- x = tf.keras.layers.Dropout(0.2)(x)
- x = tf.keras.layers.LSTM(units=64)(x)
- x = tf.keras.layers.Dropout(0.2)(x)
- x = tf.keras.layers.Dense(32, activation='softmax')(x)
- dnn_output = tf.keras.layers.Dense(1)(x)
- model = tf.keras.models.Model(inputs=input1, outputs=dnn_output)
- # Import and use the Nadam optimizer
- model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Nadam())
- model.summary()
- return model
- model = define_model()
-
-
- history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1)
- Model: "model_3"
- _________________________________________________________________
- Layer (type) Output Shape Param #
- =================================================================
- input_4 (InputLayer) [(None, 60, 1)] 0
-
- lstm_9 (LSTM) (None, 60, 64) 16896
-
- dropout_9 (Dropout) (None, 60, 64) 0
-
- lstm_10 (LSTM) (None, 60, 64) 33024
-
- dropout_10 (Dropout) (None, 60, 64) 0
-
- lstm_11 (LSTM) (None, 64) 33024
-
- dropout_11 (Dropout) (None, 64) 0
-
- dense_6 (Dense) (None, 32) 2080
-
- dense_7 (Dense) (None, 1) 33
-
- =================================================================
- Total params: 85057 (332.25 KB)
- Trainable params: 85057 (332.25 KB)
- Non-trainable params: 0 (0.00 Byte)
- _________________________________________________________________
- result = model.evaluate(X_test, y_test)
-
-
- y_pred = model.predict(X_test)
- MAPE = mean_absolute_percentage_error(y_test, y_pred)
- Accuracy = 1 - MAPE
- print('Test Loss:', result)
- print('Test MAPE:', MAPE)
- print('Test Accuracy:', Accuracy)
- Test Loss: 0.0008509838371537626
- Test MAPE: 0.0319030650799213
- Test Accuracy: 0.9680969349200788
- y_test_true = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
- y_test_pred = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
- plt.figure(figsize=(15, 6), dpi=150)
- plt.rcParams['axes.facecolor'] = 'cyan'
- plt.rc('axes', edgecolor='white')
-
-
- plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2)
- plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)
-
-
- plt.title('Gold Price Train and Test', fontsize=15)
- plt.xlabel('Date', fontsize=12)
- plt.ylabel('Price', fontsize=12)
- plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15})
- plt.grid(color='white')
-
-
- plt.show()
知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1
担任《Mechanical System and Signal Processing》审稿专家,担任《中国电机工程学报》,《控制与决策》等EI期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。