赞
踩
手动实现
- '''
- 使用Python语言实现线性回归(梯度下降)
- '''
- import numpy as np
- import matplotlib.pyplot as plt
-
- x = np.array([0.5, 0.6, 0.8, 1.1, 1.4])
- y = np.array([5.0, 5.5, 6.0, 6.8, 7.1])
-
- # 设定初始的模型参数
- w1 = 1 # 权重
- w0 = 1 # 偏置
- learning_rate = 0.01 # 学习率
- epoch = 500 # 轮数
-
- w0s, w1s, losses, epoches = [], [], [], []
- for i in range(epoch):
- # 输出每一轮参数更新之前的损失值以及模型参数
- loss = ((w1 * x + w0 - y) ** 2).sum() / 2
- print('轮数:{:3},w1:{:.8f},w0:{:.8f},loss:{:.8f}'.format(i + 1, w1, w0, loss))
-
- # 收集w0,w1,loss的变化情况
- w0s.append(w0)
- w1s.append(w1)
- losses.append(loss)
- epoches.append(i + 1)
-
- d0 = (w0 + w1 * x - y).sum()
- d1 = (x * (w1 * x + w0 - y)).sum()
- # 更新w0和w1
- w0 = w0 - learning_rate * d0
- w1 = w1 - learning_rate * d1
- print('w1:{},w0:{}'.format(w1, w0))
-
- pred_y = w1 * x + w0 # 预测值
-
- # plt.scatter(x,y)
- # plt.plot(x,pred_y,color='orangered')
- # plt.show()
-
- # 模型参数更新过程可视化,及损失值变化情况
- plt.figure('params', figsize=(8, 6), facecolor='lightgray')
-
- plt.subplot(3, 1, 1)
- plt.plot(epoches, w0s, color='dodgerblue', label='w0')
- plt.legend()
-
- plt.subplot(3, 1, 2)
- plt.plot(epoches, w1s, color='dodgerblue', label='w1')
- plt.legend()
-
- plt.subplot(3, 1, 3)
- plt.plot(epoches, losses, color='orangered', label='loss')
- plt.legend()
-
- plt.show()
调用sk-learn
- '''
- 基于sklearn提供的API实现线性回归
- '''
- import pandas as pd
- import sklearn.linear_model as lm # 线性模型
- import matplotlib.pyplot as plt
- import sklearn.metrics as sm # 模型评估模块
- import pickle
-
- # 加载数据
- data = pd.read_csv('../data_test/Salary_Data.csv')
-
- # 整理输入(二维)和输出(一维)
- x = data.iloc[:, :-1] # 所有行不要最后一列
- y = data.iloc[:, -1] # 所有行只要最后一列
- print('x:', x)
- print('y:', y)
- # 构建模型
- model = lm.LinearRegression() # y = w1x1 + w2x2 + ......wnxn + w0
- # 训练模型
- model.fit(x, y)
- # 执行预测
- pred_y = model.predict(x)
-
- print('w:{}'.format(model.coef_[0]))
- print('b:{}'.format(model.intercept_))
- # 模型可视化
- # plt.scatter(x,y)
- # plt.plot(x,pred_y,color='orangered')
- # plt.show()
-
- # 模型的评估
- # 拿到一组数据,作为测试数据,假设测试数据没参加过训练
- test_x = x.iloc[::4] # 测试集的输入
- test_y = y[::4] # 测试集的输出(真实值)
- pred_test_y = model.predict(test_x) # (预测值)
-
- # 平均绝对误差
- print(sm.mean_absolute_error(test_y, pred_test_y))
- # 中位数绝对偏差
- print(sm.median_absolute_error(test_y, pred_test_y))
- # r2得分
- print(sm.r2_score(test_y, pred_test_y))
-
- # 模型保存与加载
- # with open('./model.pickle','wb') as f:
- # pickle.dump(model,f)
- # print('模型保存成功')
- #
- #
- # with open('./model.pickle','rb') as f:
- # model = pickle.load(f)
- # model.predict()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。