菜鸟,刚入机器学习不久,数据是导师给的
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.linear_model import LinearRegression
- from sklearn import metrics
- from sklearn.cross_validation import KFold
-
-
- def mape_cal(originalValue, predictValue, length):
- "mape值计算"
- temp = (originalValue - predictValue) / originalValue
- mape_cal = sum(pd.Series(temp).abs()) / length
- return mape_cal
-
-
- data = pd.read_csv("east训练集.csv")
- data2 = pd.read_csv("测试集(1).csv")
-
- cols = data.shape[1] # 获取列数
-
- row = data.shape[0]
-
- predictors = ['x1', 'x2', 'x3', 'x4', 'x5']
-
- X_train = data.values[:, 0:cols-1] # 获取最后一列之前的所有数据
- y_train = data.values[:, cols-1:cols]
- X_test = data2.values[:, 0:cols-1] # 获取最后一列之前的所有数据
- y_test = data2.values[:, cols-1:cols]
-
- lr = LinearRegression()
-
- kf = KFold(row, n_folds=3, random_state=1)
- predictions = []
- # kf:一个存储所有随机组合的数据集的列表
- for train, test in kf:
- # 取训练集的特征数据
- train_predictors = (data[predictors].loc[train, :])
- # 取训练集的标记数据
- train_target = data['y'].loc[train]
- # 模型训练
- lr.fit(train_predictors, train_target)
- # 预测并返回预测值
- test_predictions = lr.predict(data2[predictors])
- predictions.append(test_predictions)
-
- y_predict = np.array(predictions).mean(axis=0)
- print(y_predict)
-
- #模型拟合测试集
- print("-----预测结果-----")
- # y_predict = lr.predict(X_test)
- print("均方差MSE:",metrics.mean_squared_error(y_test, y_predict))
- # 用scikit-learn计算均方根差RMSE
- print("均根方差RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_predict)))
- print("MAE:", metrics.mean_absolute_error(y_test, y_predict))
-
-
- mapeValue = mape_cal(y_test.ravel(), y_predict.ravel(), X_test.shape[0])
- print("mape值:", mapeValue)
-
- temp = pd.Series(y_predict.ravel())
- temp.to_csv('test2.csv')
-
- fig = plt.figure(figsize=(15, 7))
-
- ax1 = fig.add_subplot(2, 1, 1)
- ax1.scatter(y_test, y_predict, linewidths=4, c='blue')
- ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4, c='red')
- ax1.set_xlabel('Measured')
- ax1.set_ylabel('Predicted')
-
- ax3 = fig.add_subplot(2, 1, 2)
- ax3.plot(range(X_test.shape[0]), y_predict, c='red', label='predictValue', lw=2)
- ax3.plot(range(X_test.shape[0]), y_test, c='blue', label='originalValue', lw=2)
- ax3.legend(loc='upper left')
-
- plt.show()
预测结果:
- -----预测结果-----
- 均方差MSE: 1413.927330961531
- 均根方差RMSE: 37.60222507992753
- MAE: 30.085778687356473
- mape值: 0.18161724802211374