赞
踩
- import numpy as np
- import pandas as pd
-
- data=pd.read_csv(r'D:\人工智能\python视频\机器学习\5--机器学习-线性回归\5--Lasso回归_Ridge回归_多项式回归\insurance.csv',sep=',')
- data.head(n=6)
- import matplotlib.pyplot as plt
- %matplotlib inline
-
- plt.hist(data['charges'])
- #上图出现右偏现象,要变成正态分布形式
- plt.hist(np.log(data['charges']),bins=20)
- data=pd.get_dummies(data)
- data.head()
- x=data.drop('charges',axis=1)
- x
- y=data['charges']
-
- x.fillna(0,inplace=True)
- y.fillna(0,inplace=True)
-
- from sklearn.model_selection import train_test_split
- x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
-
- from sklearn.preprocessing import StandardScaler
- scaler=StandardScaler(with_mean=True,with_std=True).fit(x_train)
-
-
- x_train_scaled=scaler.transform(x_train)
- x_test_scaled=scaler.transform(x_test)
- x_train_scaled
- from sklearn.preprocessing import PolynomialFeatures
- poly_features=PolynomialFeatures(degree=2,include_bias=False)
- x_train_scaled=poly_features.fit_transform(x_train_scaled)
- x_test_scaled=poly_features.fit_transform(x_test_scaled)
- from sklearn.linear_model import LinearRegression
-
-
- reg=LinearRegression()
-
- reg.fit(x_train_scaled,np.log1p(y_train))
- y_predict=reg.predict(x_test_scaled)
-
- #%%
- from sklearn.linear_model import Ridge
- ridge=Ridge(alpha=0.4)
-
- ridge.fit(x_train_scaled,np.log1p(y_train))
- y_predict_ridge=ridge.predict(x_test_scaled)
- #%%
- from sklearn.ensemble import GradientBoostingRegressor
- booster=GradientBoostingRegressor()
-
- booster.fit(x_train_scaled,np.log1p(y_train))
- y_predict_booster=ridge.predict(x_test_scaled)
- from sklearn.metrics import mean_squared_error
-
- #log变换之后的
- log_rmse_train=np.sqrt(mean_squared_error(y_true=np.log1p(y_train),y_pred=reg.predict(x_train_scaled)))
- log_rmse_test=np.sqrt(mean_squared_error(y_true=np.log1p(y_test),y_pred=y_predict))
- #没有做log变换的
- rmse_train=np.sqrt(mean_squared_error(y_true=y_train,y_pred=np.exp(reg.predict(x_train_scaled))))
- rmse_test=np.sqrt(mean_squared_error(y_true=y_test,y_pred=np.exp(reg.predict(x_test_scaled))))
-
- log_rmse_train,log_rmse_test,rmse_train,rmse_test
- #log变换之后的
- log_rmse_train=np.sqrt(mean_squared_error(y_true=np.log1p(y_train),y_pred=ridge.predict(x_train_scaled)))
- log_rmse_test=np.sqrt(mean_squared_error(y_true=np.log1p(y_test),y_pred=y_predict_ridge))
- #没有做log变换的
- rmse_train=np.sqrt(mean_squared_error(y_true=y_train,y_pred=np.exp(ridge.predict(x_train_scaled))))
- rmse_test=np.sqrt(mean_squared_error(y_true=y_test,y_pred=np.exp(ridge.predict(x_test_scaled))))
-
- log_rmse_train,log_rmse_test,rmse_train,rmse_test
- #log变换之后的
- log_rmse_train=np.sqrt(mean_squared_error(y_true=np.log1p(y_train),y_pred=booster.predict(x_train_scaled)))
- log_rmse_test=np.sqrt(mean_squared_error(y_true=np.log1p(y_test),y_pred=y_predict_booster))
- #没有做log变换的
- rmse_train=np.sqrt(mean_squared_error(y_true=y_train,y_pred=np.exp(booster.predict(x_train_scaled))))
- rmse_test=np.sqrt(mean_squared_error(y_true=y_test,y_pred=np.exp(booster.predict(x_test_scaled))))
-
- log_rmse_train,log_rmse_test,rmse_train,rmse_test
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。