赞
踩
主要内容:
- 线性回归方程实现
- 梯度下降效果
- 对比不同梯度下降策略
- 建模曲线分析
- 过拟合与欠拟合
- 正则化的作用
- 提前停止策略
1.实验目标分析
import numpy as np import os %matplotlib inline import matplotlib import matplotlib.pyplot as plt plt.rcParams['axes.labelsize'] = 14 plt.rcParams['xtick.labelsize'] = 12 plt.rcParams['ytick.labelsize'] = 12 import warnings # 过滤警告 warnings.filterwarnings('ignore') np.random.seed(42)回归方程:
当做是一个巧合就可以了,机器学习中核心的思想是迭代更新
'
import numpy as np X = 2*np.random.rand(100,1) y = 4+ 3*X +np.random.randn(100,1) # np.random.randn(100,1)随机抖动运行
plt.plot(X,y,'r.') plt.xlabel('X_1') plt.ylabel('y') plt.axis([0,2,0,15]) plt.show()
2.参数求解方法
X_b = np.c_[np.ones((100,1)),X] theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y) # (x(T)x)(-1)x(T)y .linalg.inv相当于-1theta_best
array([[4.21509616], [2.77011339]])
X_new = np.array([[0],[2]]) # [0]-> 起始位置 [2]-> 终止位置 X_new_b = np.c_[np.ones((2,1)),X_new]#测试数据 y_predict = X_new_b.dot(theta_best)# theta_best权重值 y_predict # theta0 和 theta1
array([[4.21509616], [9.75532293]])
plt.plot(X_new,y_predict,'r--') plt.plot(X,y,'b.') plt.axis([0,2,0,15]) plt.show()
sklearn api文档:
API Reference — scikit-learn 1.0.2 documentation
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X,y) print (lin_reg.coef_)#lin_reg.coef_权重参数 print (lin_reg.intercept_)#lin_reg.intercept_偏置参数
[[2.77011339]] [4.21509616]
3.预处理对结果的影响
梯度下降
核心解决方案,不光在线性回归中能用上,还有其他算法中能用上,比如神经网络
问题:步长太小
问题:步长太大
学习率应当尽可能小,随着迭代的进行应当越来越小。
标准化的作用:
- 拿到数据之后基本上都需要做一次标准化操作
批量梯度下降计算公式
4.梯度下降
批量梯度下降
eta = 0.1# 学习率 n_iterations = 1000# 迭代次数 m = 100# 样本个数 theta = np.random.randn(2,1)#随机初始化 for iteration in range(n_iterations): gradients = 2/m* X_b.T.dot(X_b.dot(theta)-y) #(2/m)X(T)(X*theta-y) theta = theta - eta*gradients# theta = theta - theta*学习率theta
array([[4.21509616], [2.77011339]])
X_new_b.dot(theta)
array([[4.21509616], [9.75532293]])
5.学习率对结果的影响
'
theta_path_bgd = [] def plot_gradient_descent(theta,eta,theta_path = None): m = len(X_b) plt.plot(X,y,'b.') n_iterations = 1000 for iteration in range(n_iterations): y_predict = X_new_b.dot(theta)#预测结果 plt.plot(X_new,y_predict,'b-') gradients = 2/m* X_b.T.dot(X_b.dot(theta)-y) theta = theta - eta*gradients#theta值更新 if theta_path is not None: theta_path.append(theta) plt.xlabel('X_1') plt.axis([0,2,0,15]) plt.title('eta = {}'.format(eta))运行
theta = np.random.randn(2,1) plt.figure(figsize=(10,4)) plt.subplot(131) plot_gradient_descent(theta,eta = 0.02) plt.subplot(132) plot_gradient_descent(theta,eta = 0.1,theta_path=theta_path_bgd) plt.subplot(133) plot_gradient_descent(theta,eta = 0.5) plt.show()
6.随机梯度下降得到的结果
theta_path_sgd=[] m = len(X_b) np.random.seed(42) n_epochs = 50 t0 = 5 t1 = 50 def learning_schedule(t): return t0/(t1+t) theta = np.random.randn(2,1)#theta的初始化 for epoch in range(n_epochs): for i in range(m): if epoch < 10 and i<10: y_predict = X_new_b.dot(theta) plt.plot(X_new,y_predict,'r-') random_index = np.random.randint(m) xi = X_b[random_index:random_index+1]#取当前数据 yi = y[random_index:random_index+1] gradients = 2* xi.T.dot(xi.dot(theta)-yi)#梯度值的计算 eta = learning_schedule(epoch*m+i)# 学习率的衰减 theta = theta-eta*gradients#theta值更新 theta_path_sgd.append(theta) plt.plot(X,y,'b.') plt.axis([0,2,0,15]) plt.show()
7.MiniBatch方法
theta_path_mgd=[] n_epochs = 50#迭代次数 minibatch = 16# theta = np.random.randn(2,1)#初始化theta t0, t1 = 200, 1000 def learning_schedule(t): return t0 / (t + t1) np.random.seed(42) t = 0 for epoch in range(n_epochs): shuffled_indices = np.random.permutation(m)#对数据进行洗牌 X_b_shuffled = X_b[shuffled_indices]#及那个新的索引回传到X_b y_shuffled = y[shuffled_indices] for i in range(0,m,minibatch): t+=1 xi = X_b_shuffled[i:i+minibatch] yi = y_shuffled[i:i+minibatch] gradients = 2/minibatch* xi.T.dot(xi.dot(theta)-yi) eta = learning_schedule(t)#学习率的衰减 t表示当前的计数器 theta = theta-eta*gradients#theta值更新 theta_path_mgd.append(theta)theta
array([[4.25490685], [2.80388784]])
8.不同策略效果对比
theta_path_bgd = np.array(theta_path_bgd) theta_path_sgd = np.array(theta_path_sgd) theta_path_mgd = np.array(theta_path_mgd)
plt.figure(figsize=(12,6)) plt.plot(theta_path_sgd[:,0],theta_path_sgd[:,1],'r-s',linewidth=1,label='SGD')#theta_path_sgd[:,0]->theta0 theta_path_sgd[:,0] -> theta1 plt.plot(theta_path_mgd[:,0],theta_path_mgd[:,1],'g-+',linewidth=2,label='MINIGD') plt.plot(theta_path_bgd[:,0],theta_path_bgd[:,1],'b-o',linewidth=3,label='BGD') plt.legend(loc='upper left') plt.axis([3.5,4.5,2.0,4.0]) plt.show()实际当中用minibatch比较多,一般情况下选择batch数量应当越大越好。
9.多项式回归
m = 100 X = 6*np.random.rand(m,1) - 3 y = 0.5*X**2+X+np.random.randn(m,1)#.randn-> 高斯抖动
plt.plot(X,y,'b.') plt.xlabel('X_1') plt.ylabel('y') plt.axis([-3,3,-5,10]) plt.show()
from sklearn.preprocessing import PolynomialFeatures poly_features = PolynomialFeatures(degree = 2,include_bias = False) X_poly = poly_features.fit_transform(X)# fit表示实际进行多项式转换 transform实际执行完后将结果整合在一块 X[0]array([2.38942838])
X_poly[0]#x = 2.38942838 x^2 = 5.709368
array([2.38942838, 5.709368 ])
2.82919615 ** 2
8.004350855174822
'
from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X_poly,y) print (lin_reg.coef_) print (lin_reg.intercept_) # 回归方程为:y=1.63887939x + 0.5810637 * (x ** 2)+4.56140272运行[[0.95038538 0.52577032]] [-0.0264767]
'
X_new = np.linspace(-3,3,100).reshape(100,1) X_new_poly = poly_features.transform(X_new) y_new = lin_reg.predict(X_new_poly) plt.plot(X,y,'b.') plt.plot(X_new,y_new,'r--',label='prediction') plt.axis([-3,3,-5,10]) plt.legend() plt.show()运行
10.模型复杂度
from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler#d导入标准化模块 plt.figure(figsize=(12,6)) for style,width,degree in (('g-',1,100),('m--',1,2),('r-+',1,1)): poly_features = PolynomialFeatures(degree = degree,include_bias = False) std = StandardScaler() lin_reg = LinearRegression() polynomial_reg = Pipeline([('poly_features',poly_features), ('StandardScaler',std), # 标准化操作 ('lin_reg',lin_reg)]) #回归操作 polynomial_reg.fit(X,y) y_new_2 = polynomial_reg.predict(X_new) plt.plot(X_new,y_new_2,style,label = 'degree '+str(degree),linewidth = width) plt.plot(X,y,'b.') plt.axis([-3,3,-5,10]) plt.legend() plt.show()特征变换的越复杂,得到的结果过拟合风险越高,不建议做的特别复杂。
11.样本数量对结果的影响
数据样本数量对结果的影响
sklearn.metrics.mean_squared_error — scikit-learn 1.0.2 documentation
from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split def plot_learning_curves(model,X,y): X_train, X_val, y_train, y_val = train_test_split(X,y,test_size = 0.2,random_state=100) train_errors,val_errors = [],[] for m in range(1,len(X_train)): model.fit(X_train[:m],y_train[:m]) y_train_predict = model.predict(X_train[:m])#训练集的结果 y_val_predict = model.predict(X_val)#验证集的结果 train_errors.append(mean_squared_error(y_train[:m],y_train_predict[:m])) val_errors.append(mean_squared_error(y_val,y_val_predict)) plt.plot(np.sqrt(train_errors),'r-+',linewidth = 2,label = 'train_error')#均方误差。 plt.plot(np.sqrt(val_errors),'b-',linewidth = 3,label = 'val_error') plt.xlabel('Training set size') plt.ylabel('RMSE') plt.legend()
lin_reg = LinearRegression() plot_learning_curves(lin_reg,X,y) plt.axis([0,80,0,5]) plt.show()
数据量越少,训练集的效果会越好,但是实际测试效果很一般。实际做模型的时候需要参考测试集和验证集的效果。
多项式回归的过拟合风险
polynomial_reg = Pipeline([('poly_features',PolynomialFeatures(degree = 25,include_bias = False)), ('lin_reg',LinearRegression())])#流水线 plot_learning_curves(polynomial_reg,X,y) plt.axis([0,80,0,5]) plt.show()
越复杂越过拟合
12.正则化的作用
对权重参数进行惩罚,让权重参数尽可能平滑一些,有两种不同的方法来进行正则化惩罚:
from sklearn.linear_model import Ridge np.random.seed(42) m = 20 X = 3*np.random.rand(m,1) y = 0.5 * X + np.random.randn(m,1)/1.5 +1 X_new = np.linspace(0,3,100).reshape(100,1) def plot_model(model_calss,polynomial,alphas,**model_kargs): for alpha,style in zip(alphas,('b-','g--','r:')): model = model_calss(alpha,**model_kargs) if polynomial: model = Pipeline([('poly_features',PolynomialFeatures(degree =10,include_bias = False)), ('StandardScaler',StandardScaler()), ('lin_reg',model)]) model.fit(X,y) y_new_regul = model.predict(X_new) lw = 2 if alpha > 0 else 1 plt.plot(X_new,y_new_regul,style,linewidth = lw,label = 'alpha = {}'.format(alpha)) plt.plot(X,y,'b.',linewidth =3) plt.legend() plt.figure(figsize=(14,6)) plt.subplot(121) plot_model(Ridge,polynomial=False,alphas = (0,10,100)) plt.subplot(122) plot_model(Ridge,polynomial=True,alphas = (0,10**-5,1)) plt.show()惩罚力度越大,alpha值越大的时候,得到的决策方程越平稳。
13.岭回归与lasso
#https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html#sklearn.linear_model.Ridge from sklearn.linear_model import Lasso plt.figure(figsize=(14,6)) plt.subplot(121) plot_model(Lasso,polynomial=False,alphas = (0,0.1,1)) plt.subplot(122) plot_model(Lasso,polynomial=True,alphas = (0,10**-1,1)) plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。