赞
踩
手机问题,其他行业的人,以为程序员们,什么都会,
程序员中,女程序员以为男程序员,什么都会,
男程序员中,一般程序员以为技术好的程序员,什么都会,
技术好的程序员,每次都在网上苦苦找答案。。。。。。
工程代码完整已上传:使用SVM预测波士顿房价
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import warnings
import sklearn
from sklearn.svm import SVR#对比SVC,是svm的回归形式
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
## 设置属性防止中文乱码
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def notEmpty(s):
return s != ''
## 加载数据 names = ['CRIM','ZN', 'INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT'] path = "datas/boston_housing.data" ## 由于数据文件格式不统一,所以读取的时候,先按照一行一个字段属性读取数据,然后再安装每行数据进行处理 fd = pd.read_csv(path, header=None) data = np.empty((len(fd), 14)) for i, d in enumerate(fd.values): d = map(float, filter(notEmpty, d[0].split(' '))) data[i] = list(d) ## 分割数据 x, y = np.split(data, (13,), axis=1) y = y.ravel() # 转换格式 print ("样本数据量:%d, 特征个数:%d" % x.shape) print ("target样本数据量:%d" % y.shape[0])
样本数据量:506, 特征个数:13
target样本数据量:506
# 数据分割
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=28)
## 模型构建(参数类型和SVC基本一样)
parameters = {
'kernel': ['linear', 'rbf'],
'C': [0.1, 0.5,0.9,1,5],
'gamma': [0.001,0.01,0.1,1]
}
model = GridSearchCV(SVR(), param_grid=parameters, cv=3)
model.fit(x_train, y_train)
GridSearchCV(cv=3, error_score=‘raise’,
estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=‘auto’,
kernel=‘rbf’, max_iter=-1, shrinking=True, tol=0.001, verbose=False),
fit_params={}, iid=True, n_jobs=1,
param_grid={‘kernel’: [‘linear’, ‘rbf’], ‘C’: [0.1, 0.5, 0.9, 1, 5], ‘gamma’: [0.001, 0.01, 0.1, 1]},
pre_dispatch=‘2*n_jobs’, refit=True, return_train_score=True,
scoring=None, verbose=0)
## 获取最优参数
print ("最优参数列表:", model.best_params_)
print ("最优模型:", model.best_estimator_)
print ("最优准确率:", model.best_score_)
最优参数列表: {‘C’: 5, ‘gamma’: 0.001, ‘kernel’: ‘linear’}
最优模型: SVR(C=5, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.001,
kernel=‘linear’, max_iter=-1, shrinking=True, tol=0.001, verbose=False)
最优准确率: 0.7372572871508232
## 模型效果输出
print ("训练集准确率:%.2f%%" % (model.score(x_train, y_train) * 100))
print ("测试集准确率:%.2f%%" % (model.score(x_test, y_test) * 100))
训练集准确率:74.37%
测试集准确率:48.77%
## 画图 colors = ['g-', 'b-'] ln_x_test = range(len(x_test)) y_predict = model.predict(x_test) plt.figure(figsize=(16,8), facecolor='w') plt.plot(ln_x_test, y_test, 'r-', lw=2, label=u'真实值') plt.plot(ln_x_test, y_predict, 'g-', lw = 3, label=u'SVR算法估计值,$R^2$=%.3f' % (model.best_score_)) # 图形显示 plt.legend(loc = 'upper left') plt.grid(True) plt.title(u"波士顿房屋价格预测(SVM)") plt.xlim(0, 101) plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。