当前位置:   article > 正文

机器学习-保险花销预测笔记+代码

机器学习-保险花销预测笔记+代码

读取数据

  1. import numpy as np
  2. import pandas as pd
  3. data=pd.read_csv(r'D:\人工智能\python视频\机器学习\5--机器学习-线性回归\5--Lasso回归_Ridge回归_多项式回归\insurance.csv',sep=',')
  4. data.head(n=6)

EDA 数据探索

  1. import matplotlib.pyplot as plt
  2. %matplotlib inline
  3. plt.hist(data['charges'])

 

  1. #上图出现右偏现象,要变成正态分布形式
  2. plt.hist(np.log(data['charges']),bins=20)

特征工程

  1. data=pd.get_dummies(data)
  2. data.head()

  1. x=data.drop('charges',axis=1)
  2. x

  1. y=data['charges']
  2. x.fillna(0,inplace=True)
  3. y.fillna(0,inplace=True)
  4. from sklearn.model_selection import train_test_split
  5. x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
  6. from sklearn.preprocessing import StandardScaler
  7. scaler=StandardScaler(with_mean=True,with_std=True).fit(x_train)
  8. x_train_scaled=scaler.transform(x_train)
  9. x_test_scaled=scaler.transform(x_test)
  10. x_train_scaled

  1. from sklearn.preprocessing import PolynomialFeatures
  2. poly_features=PolynomialFeatures(degree=2,include_bias=False)
  3. x_train_scaled=poly_features.fit_transform(x_train_scaled)
  4. x_test_scaled=poly_features.fit_transform(x_test_scaled)

模型训练

  1. from sklearn.linear_model import LinearRegression
  2. reg=LinearRegression()
  3. reg.fit(x_train_scaled,np.log1p(y_train))
  4. y_predict=reg.predict(x_test_scaled)
  5. #%%
  6. from sklearn.linear_model import Ridge
  7. ridge=Ridge(alpha=0.4)
  8. ridge.fit(x_train_scaled,np.log1p(y_train))
  9. y_predict_ridge=ridge.predict(x_test_scaled)
  10. #%%
  11. from sklearn.ensemble import GradientBoostingRegressor
  12. booster=GradientBoostingRegressor()
  13. booster.fit(x_train_scaled,np.log1p(y_train))
  14. y_predict_booster=ridge.predict(x_test_scaled)

模型评估

  1. from sklearn.metrics import mean_squared_error
  2. #log变换之后的
  3. log_rmse_train=np.sqrt(mean_squared_error(y_true=np.log1p(y_train),y_pred=reg.predict(x_train_scaled)))
  4. log_rmse_test=np.sqrt(mean_squared_error(y_true=np.log1p(y_test),y_pred=y_predict))
  5. #没有做log变换的
  6. rmse_train=np.sqrt(mean_squared_error(y_true=y_train,y_pred=np.exp(reg.predict(x_train_scaled))))
  7. rmse_test=np.sqrt(mean_squared_error(y_true=y_test,y_pred=np.exp(reg.predict(x_test_scaled))))
  8. log_rmse_train,log_rmse_test,rmse_train,rmse_test

  1. #log变换之后的
  2. log_rmse_train=np.sqrt(mean_squared_error(y_true=np.log1p(y_train),y_pred=ridge.predict(x_train_scaled)))
  3. log_rmse_test=np.sqrt(mean_squared_error(y_true=np.log1p(y_test),y_pred=y_predict_ridge))
  4. #没有做log变换的
  5. rmse_train=np.sqrt(mean_squared_error(y_true=y_train,y_pred=np.exp(ridge.predict(x_train_scaled))))
  6. rmse_test=np.sqrt(mean_squared_error(y_true=y_test,y_pred=np.exp(ridge.predict(x_test_scaled))))
  7. log_rmse_train,log_rmse_test,rmse_train,rmse_test

  1. #log变换之后的
  2. log_rmse_train=np.sqrt(mean_squared_error(y_true=np.log1p(y_train),y_pred=booster.predict(x_train_scaled)))
  3. log_rmse_test=np.sqrt(mean_squared_error(y_true=np.log1p(y_test),y_pred=y_predict_booster))
  4. #没有做log变换的
  5. rmse_train=np.sqrt(mean_squared_error(y_true=y_train,y_pred=np.exp(booster.predict(x_train_scaled))))
  6. rmse_test=np.sqrt(mean_squared_error(y_true=y_test,y_pred=np.exp(booster.predict(x_test_scaled))))
  7. log_rmse_train,log_rmse_test,rmse_train,rmse_test

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Monodyee/article/detail/540628
推荐阅读
相关标签
  

闽ICP备14008679号