赞
踩
投资商经常会通过多个不同渠道投放广告,以此来获得经济利益。在本案例中我们选取公司在电视、广播和报纸上的投入,来预测广告收益,这对公司策略的制定是有较重要的意义。
- # 读取数据
- import pandas as pd
- df = pd.read_excel('广告收益数据.xlsx')
- df.head()
- # 1.提取特征变量和目标变量
- X = df.drop(columns='收益')
- y = df['收益']
-
- # 2.划分训练集和测试集
- from sklearn.model_selection import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
-
- # 3. 模型训练和搭建
- from lightgbm import LGBMRegressor
- model = LGBMRegressor()
- model.fit(X_train, y_train)
- # 预测测试数据
- y_pred = model.predict(X_test)
- y_pred[0:5]
- # 预测值和实际值对比
- a = pd.DataFrame() # 创建一个空DataFrame
- a['预测值'] = list(y_pred)
- a['实际值'] = list(y_test)
- a.head()
- # 查看评分
- model.score(X_test, y_test)
- # 特征重要性
- model.feature_importances_
- # 参数调优
- from sklearn.model_selection import GridSearchCV # 网格搜索合适的超参数
- parameters = {'num_leaves': [15, 31, 62], 'n_estimators': [20, 30, 50, 70], 'learning_rate': [0.1, 0.2, 0.3, 0.4]} # 指定分类器中参数的范围
- model = LGBMRegressor() # 构建模型
- grid_search = GridSearchCV(model, parameters,scoring='r2',cv=5) # cv=5表示交叉验证5次,scoring='r2'表示以R-squared作为模型评价准则
- # 输出参数最优值
- grid_search.fit(X_train, y_train) # 传入数据
- grid_search.best_params_ # 输出参数的最优值
- # 重新搭建LightGBM回归模型
- model = LGBMRegressor(num_leaves=31, n_estimators=50,learning_rate=0.3)
- model.fit(X_train, y_train)
-
- # 查看得分
- model.score(X_test, y_test)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。