当前位置:   article > 正文

线性回归---sklearn+python实现_python实现sklearn库对数据进行均值方差规范化使用线性回归类创建线性回归对象。(

python实现sklearn库对数据进行均值方差规范化使用线性回归类创建线性回归对象。(

简单线性回归

问题

思想



回到正题,对于简单线性回归有如下问题:






下面通过程序来实现简单的线性回归:

  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. x=np.array([1,2,3,4,5])
  4. y=np.array([1,3,2,3,5])
  5. plt.scatter(x,y)
  6. x_mean=np.mean(x)
  7. y_mean=np.mean(y)
  8. up=0.0
  9. down=0.0
  10. for x_i,y_i in zip(x,y):
  11. up+=(x_i-x_mean)*(y_i-y_mean)
  12. down+=(x_i-x_mean)**2
  13. a=up/down
  14. b=y_mean-a*x_mean
  15. print(a,b)
  16. y_hat=a*x+b
  17. plt.plot(x,y_hat,c='red')
  18. plt.axis([0,6,0,6])
  19. plt.show()

创建自己的线性回归类:

  1. import numpy as np
  2. class SimpleLinearRegression1:
  3. def __init__(self):
  4. self.a_=None
  5. self.b_=None
  6. def fit(self,x_train,y_train):
  7. x_mean = np.mean(x_train)
  8. y_mean = np.mean(y_train)
  9. up = 0.0
  10. down = 0.0
  11. for x_i, y_i in zip(x_train, y_train):
  12. up += (x_i - x_mean) * (y_i - y_mean)
  13. down += (x_i - x_mean) ** 2
  14. self.a_ = up / down
  15. self.b_ = y_mean - self.a_ * x_mean
  16. return self
  17. def _predict(self,x_single):
  18. return x_single*self.a_+self.b_
  19. def predict(self,x_predict):
  20. return np.array([self._predict(x) for x in x_predict])

  1. #改进,将for循环用向量化实现,增加效率
  2. class SimpleLinearRegression2:
  3. def __init__(self):
  4. self.a_=None
  5. self.b_=None
  6. def fit(self,x_train,y_train):
  7. x_mean = np.mean(x_train)
  8. y_mean = np.mean(y_train)
  9. # up = np.dot((x_train-x_mean),(y_train-y_mean))
  10. # down =np.dot((x_train-x_mean),(x_train-x_mean))
  11. up = np.sum((x_train - x_mean)*(y_train - y_mean))
  12. down = np.sum((x_train - x_mean)*(x_train - x_mean))
  13. self.a_ = up / down
  14. self.b_ = y_mean - self.a_ * x_mean
  15. return self
  16. def _predict(self,x_single):
  17. return x_single*self.a_+self.b_
  18. def predict(self,x_predict):
  19. return np.array([self._predict(x) for x in x_predict])
  1. from ML import SimpleLinearRegression
  2. s=SimpleLinearRegression.SimpleLinearRegression2()
  3. s.fit(x,y)
  4. y_hat=s.predict(np.array([6]))
  5. y_hat=s.a_*x+s.b_
  6. plt.plot(x,y_hat,c='red')
  7. plt.axis([0,6,0,6])
  8. plt.title('y=%s*x+%s'%(s.a_,s.b_))
  9. plt.show()

线性回归算法的评测

均方误差MSE均方根误差RMSE(与用本同量纲)平均绝对误差MAE

编程实现:

  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. from sklearn import datasets
  4. #生成数据
  5. boston=datasets.load_boston()
  6. print(boston.DESCR)
  7. print(boston.feature_names)
  8. x=boston.data[:,5]
  9. y=boston.target
  10. x=x[y<50]
  11. y=y[y<50]
  12. #进行训练集和测试集划分
  13. from ML.model_selection import train_test_split
  14. x_train,x_test,y_train,y_test=train_test_split(x,y,seed=666)
  15. #进行简单的线性回归
  16. from ML.SimpleLinearRegression import SimpleLinearRegression2
  17. s=SimpleLinearRegression2()
  18. s.fit(x_train,y_train)
  19. print(s.a_,s.b_)
  20. plt.scatter(x,y)
  21. plt.plot(x,s.a_*x+s.b_,c='red')
  22. plt.show()
  23. #线性回归模型的评估指标
  24. y_predict=s.predict(x_test)
  25. #MSE
  26. mse_test=np.sum((y_predict-y_test)**2)/len(x_test)
  27. print(mse_test)
  28. #RMSE
  29. rmse_test=np.sqrt(mse_test)
  30. print(rmse_test)
  31. #MAE
  32. mae_test=np.sum(np.absolute(y_predict-y_test))/len(x_test)
  33. print(mae_test)
  1. mse_test 24.1566021344
  2. rmse_test 4.91493663585
  3. mae_test 3.54309744095

sklearn中的MSE和MAE

  1. from sklearn.metrics import mean_squared_error,mean_absolute_error
  2. mean_squared_error(y_test,y_predict)
  3. print('sk_mse_test',mse_test)
  4. mean_absolute_error(y_test,y_predict)
  5. print('sk_mae_test',mae_test)
  1. sk_mse_test 24.1566021344
  2. sk_mae_test 3.54309744095

更好的衡量线性回归的指标 R Squared




  1. #R Square
  2. R=1-mean_squared_error(y_test,y_predict)/np.var(y_test)
  3. print(R)
R Square: 0.612931680394

使用sklearn计算R Square

  1. from sklearn.metrics import r2_score
  2. r2=r2_score(y_test,y_predict)
  3. print('r2_score',r2)
r2_score 0.612931680394






  1. import numpy as np
  2. from .metrics import r2_score
  3. class LinearRegression:
  4. def __init__(self):
  5. self.coef_=None #系数
  6. self.interception_=None #截距
  7. self._theta=None
  8. def fit_normal(self,X_train,y_train):
  9. x_b=np.hstack([np.ones((len(X_train),1)),X_train])
  10. self._theta=np.linalg.inv(x_b.T.dot(x_b)).dot(x_b.T).dot(y_train)
  11. self.coef_=self._theta[1:]
  12. self.interception_=self._theta[0]
  13. return self
  14. def predict(self,x_predict):
  15. x_b = np.hstack([np.ones((len(x_predict), 1)), x_predict])
  16. return x_b.dot(self._theta)
  17. def score(self,x_test,y):
  18. y_predict=self.predict(x_test)
  19. return r2_score(y_predict,y)

使用自己的类实现线性回归:

  1. from sklearn import datasets
  2. from sklearn.model_selection import train_test_split
  3. from ML.LinearRegression import LinearRegression
  4. boston=datasets.load_boston()
  5. #使用全部的列
  6. X=boston.data
  7. y=boston.target
  8. x=X[y<50]
  9. y=y[y<50]
  10. x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=666)
  11. L=LinearRegression()
  12. L.fit_normal(x_train,y_train)
  13. print(L)
  14. print(L.coef_)
  15. print(L.interception_)
  16. score=L.score(x_test,y_test)
  17. print(score)

使用sklearn实现线性回归

  1. from sklearn.linear_model import LinearRegression
  2. lin_reg=LinearRegression()
  3. lin_reg.fit(x_train,y_train)
  4. score=lin_reg.score(x_test,y_test)
  5. print(score)












声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/从前慢现在也慢/article/detail/412040
推荐阅读
相关标签
  

闽ICP备14008679号