赞
踩
首先导入库并避免显示错误FutureWorning
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn import preprocessing
import warnings
warnings.filterwarnings("ignore") # 避免显示FutureWorning
预处理数据并筛选相关系数较大的特征值
注释的第一部分:为显示各个影响因素与价格的关系图
注释的第二部分:通过一元线性回归,将筛选的三个相关系数最大的特征值与价格的关系图绘制出来
boston = load_boston() x = boston['data'] # 所有特征信息 y = boston['target'] # 房价 feature_names = boston['feature_names'] # 十三个影响因素 boston_data = pd.DataFrame(x, columns=feature_names) # 特征信息和影响因素 boston_data['price'] = y # 添加价格列 ''' for i in range(13): plt.figure(figsize=(10, 7)) plt.scatter(x[:,i],y,s=2)#某影响因素的特征信息与房价 plt.title(feature_names[i]) plt.show()''' cor = boston_data.corr()['price'] # 求取相关系数 print(cor) # 取相关系数大于0.5的特征值 RM LSTAT PTRATIO price boston_data = boston_data[['LSTAT', 'PTRATIO', 'RM', 'price']] # 保留该四组因素 y = np.array(boston_data['price']) feature_names = ['LSTAT', 'PTRATIO', 'RM', 'price'] ''' for i in range(3): x = np.array(boston_data[feature_names[i]]) var = np.var(x,ddof=1) cov = np.cov(x,y)[0][1] k = cov/var b = np.mean(y) - k*np.mean(x) y_price = k*x + b plt.plot(x,y,'b.') plt.plot(x,y_price,'k-') plt.show()'''
—————————————————————————————————————————————
通过LinearRegression库直接实现
boston_data = boston_data.drop(['price'], axis=1) x = np.array(boston_data) print(x) train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=0) # 分割训练集和测试集 # 直接使用模型求解 # 加载模型 li = LinearRegression() # 拟合数据 li.fit(train_x, train_y) # 进行预测 y_predict = li.predict(test_x) plt.figure(figsize=(10, 7)) plt.plot(y_predict, 'b-') plt.plot(test_y, 'r--') plt.legend(['predict', 'true']) plt.title('Module') plt.show() mes = metrics.mean_squared_error(y_predict, test_y) print(mes)
—————————————————————————————————————————————
对系数B矩阵求偏导,偏导为0时,即为目标解
boston_data.insert(loc=0, column='one', value=1) print(boston_data) x = np.array(boston_data) train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=0) # 分割训练集和测试集 xT = x.T x_mat = np.mat(train_x) y_mat = np.mat(train_y).T xT = x_mat.T B = (xT * x_mat).I * xT * y_mat print(B) y_predict = test_x * B plt2 = plt.figure(figsize=(10, 7)) plt2 = plt.plot(y_predict, 'b-') plt2 = plt.plot(test_y, 'r--') plt2 = plt.legend(['predict', 'true']) plt2 = plt.title('Least Sqaure Method') plt.show() # [[22.17686885] # [-0.55760828] # [-1.11165635] # [ 4.44512502]]
—————————————————————————————————————————————
简单多元线性回归(梯度下降算法与矩阵法)
机器学习方法之线性回归(LR)
首先处理x中的数据,在x每一行前添加1
print(boston_data) x = np.array(boston_data) train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=0) # 分割训练集和测试集 xT = x.T print(xT) x_mat = np.mat(train_x) y_mat = np.mat(train_y).T xT = x_mat.T B = [[22.1768],[-0.5576],[-1.11165],[4.44512]] B = np.mat(B) Bp = -2*xT*(y_mat-x_mat*B) # 即B的偏导 while 1: B = B - 0.000005*Bp Bp = -2 * xT * (y_mat - x_mat * B) for i in range(4): sum = 0 sum += abs(Bp[i]) print(sum) if sum <=0.00001: break print(B) print("最终B:",B) # 最终B: [[22.17682455] # [-0.55760809] # [-1.11165553] # [ 4.44512921]] y_predict = test_x*B plt3 = plt.figure(figsize=(10, 7)) plt3 = plt.plot(y_predict,'b-') plt3 = plt.plot(test_y,'r--') plt3 = plt.legend(['predict', 'true']) plt.title('Multiple Linear Regression') plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。