赞
踩
实现线性回归的梯度下降算法,解决糖尿病预测问题,输出mse和的值
熟练的掌握线性回归的梯度下降算法应用
- from sklearn.datasets import load_diabetes
-
- X, y = load_diabetes(return_X_y = True) #获取数据
2、对数据进行训练,标准化处理
- y = y.reshape((442, 1))
-
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)
-
- X_train = process_features(X_train) #矩阵的标准化
-
- X_test = process_features(X_test) #矩阵的标准化
- model = LinearRegression()
-
- model.fit(X_train, y_train, eta=0.001, epsilon=0.0001)
3、预测数据
y_pred = model.predict(X_test)
4、求均方误差和R2
- mse = mean_squared_error(y_test, y_pred)
- score = r2_score(y_test, y_pred)
- print("mse={} andr2={}".format(mse,score))
5、图形化显示
- #图像化显示函数
- def printLine(x_name, y_name, title, X, Y): #x轴,y轴,标题,矩阵
- plt.figure(1)
- plt.plot(X[:,0], X[:,1], 'bo', ms=3) #蓝色散点
- plt.plot(X[:,0], X[:,1], 'b', ms=3, label='line1') #蓝色实线
- plt.plot(Y[:,0], Y[:,1], 'ro', ms=3) #蓝色散点
- plt.plot(Y[:,0], Y[:,1], 'r', ms=3, label='line2') #蓝色实线
- plt.xlabel(x_name, fontproperties = font) #步骤三
- plt.ylabel(y_name, fontproperties = font)
- plt.title(title, fontproperties = font)
- plt.show()
- return 0
1. 糖尿病数据
2. 运行结果
机器学习GitHub:https://github.com/wanglei18/machine_learning
- import numpy as np
-
- class LinearRegression:
-
- def fit(self, X, y, eta, epsilon):
- m, n = X.shape
- w = np.zeros((n, 1))
- while True:
- e = X.dot(w) - y
- g = 2 * X.T.dot(e) / m # 梯度g
- w = w - eta * g
- if np.linalg.norm(g, 2) < epsilon:
- break
- self.w = w
-
- #进行预测
- def predict(self, X):
- return X.dot(self.w)
-
- import numpy as np
- import matplotlib.pyplot as plt
- from machine_learning.homework.libs.grade import LinearRegression
- from sklearn.datasets import load_diabetes
- from sklearn.preprocessing import StandardScaler
- from sklearn.model_selection import train_test_split
- from matplotlib.font_manager import FontProperties #步骤一
-
- font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=15) #步骤二
-
- #将一维数组变成二维数组
- def addLine(X):
- length = X.shape[0] #矩阵第一维度的长度
- num = np.ones((length,1))
- for i in range(0, length):
- num[i] = i+1 #坐标轴 x
- X = np.c_[num, X] #合并坐标轴
- return X
-
- #图像化显示函数
- def printLine(x_name, y_name, title, X, Y): #x轴,y轴,标题,矩阵
- plt.figure(1)
- plt.plot(X[:,0], X[:,1], 'bo', ms=3) #蓝色散点
- plt.plot(X[:,0], X[:,1], 'b', ms=3, label='line1') #蓝色实线
- plt.plot(Y[:,0], Y[:,1], 'ro', ms=3) #蓝色散点
- plt.plot(Y[:,0], Y[:,1], 'r', ms=3, label='line2') #蓝色实线
- plt.xlabel(x_name, fontproperties = font) #步骤三
- plt.ylabel(y_name, fontproperties = font)
- plt.title(title, fontproperties = font)
- plt.show()
- return 0
-
- #求均方误差
- def mean_squared_error(y_true,y_pred):
- return np.average((y_true-y_pred) ** 2, axis=0)
-
- #求R2决定系数
- def r2_score(y_true,y_pred):
- numerator = (y_true-y_pred) ** 2
- denominator = (y_true-np.average(y_true,axis=0)) ** 2
- return (1 - numerator.sum(axis=0) / denominator.sum(axis=0))
-
- #函数预期
- def process_features(X):
- scaler = StandardScaler()
- X = scaler.fit_transform(X)
- m, n = X.shape
- X = np.c_[np.ones((m, 1)), X]
- return X
-
- X, y = load_diabetes(return_X_y = True) #获取数据
- print(X.shape, X)
- print(y.shape, y)
- y = y.reshape((442, 1)) #一维变二维
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5) #训练集、测试集划分
- X_train = process_features(X_train) #矩阵的标准化
- X_test = process_features(X_test) #矩阵的标准化
-
- print(X_train.shape,y_train.shape)
-
- model = LinearRegression()
- model.fit(X_train, y_train, eta=0.001, epsilon=0.0001) #标准化处理
- y_pred = model.predict(X_test) #数据预测
- print(y_pred.shape,y_test.shape,model.w.shape)
-
- mse = mean_squared_error(y_test, y_pred) #均方误差
- score = r2_score(y_test, y_pred) #R^2的值
- print("mse={} andr2={}".format(mse,score))
-
- y_test = addLine(y_test) #图形化显示
- y_pred = addLine(y_pred)
-
- printLine('范围', '期望值', '梯度下降算法解决糖尿病问题', y_pred, y_test)
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。