赞
踩
一. 简答题(共2题,100分)
import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn import model_selection from sklearn import tree from sklearn import svm from sklearn.neural_network import MLPClassifier from pandas import DataFrame #处理数据 data = pd.read_csv('C:\\python\\wine.data') d_input = [] [d_input.append(str(i)) for i in range(14)] data.columns = d_input X = data.iloc[:,1:14].values.astype(float) y = data.iloc[:,0].values.astype(float) #划分数据集 X_train,X_test,y_train,y_test = model_selection.train_test_split(X,y,test_size=0.25,random_state=1) # 决策树 learning = tree.DecisionTreeClassifier() learning.fit(X_train,y_train) print('决策树性能:{:.2f}'.format(learning.score(X_test,y_test))) d1 = learning.score(X_test,y_test) # SVM learning = svm.SVC(kernel='linear', gamma=0.6, C = 100) learning.fit(X_train,y_train) print('支持向量机性能:{:.2f}'.format(learning.score(X_test,y_test))) d2 = learning.score(X_test,y_test) # 神经网络 learning = MLPClassifier(solver='lbfgs',batch_size='auto',random_state=1) learning.fit(X_train,y_train) print('神经网络性能:{:.2f}'.format(learning.score(X_test,y_test))) d3 = learning.score(X_test,y_test) # 分析性能 d4=[d1,d2,d3] d = DataFrame(d4,columns=['score'],index=['tree','svm','MLP']) d.plot(kind='bar',rot=0,use_index=True) plt.show()
基于Keras建立深度神经网络模型,在bankpep数据集上训练神经网络分类模型,将训练模型的耗时以及模型性能,与XGBoost、SVM、朴素贝叶斯等方法进行比较。
import datetime import time import pandas as pd from sklearn import model_selection from sklearn import tree from sklearn.naive_bayes import GaussianNB from sklearn import svm from pandas import DataFrame import matplotlib.pyplot as plt import matplotlib from xgboost.sklearn import XGBClassifier from sklearn.neural_network import MLPClassifier data = pd.read_csv('C:\\python\\bankpep.csv',index_col=0,header=0) seq1 = ['married','car','save_act','current_act','mortgage','pep'] for feature in seq1: data.loc[data[feature]=='YES',feature] = 1 data.loc[data[feature]=='NO',feature] = 0 data.loc[data['sex']=='FEMALE','sex'] = 0 data.loc[data['sex']=='MALE','sex'] = 1 data.loc[data['region']=='INNER_CITY','region'] = 1 data.loc[data['region']=='RURAL','region'] = 2 data.loc[data['region']=='TOWN','region'] = 3 data.loc[data['region']=='SUBURBAN','region'] = 4 X1 = data.iloc[:,0:9].values.astype(float) y1 = data.iloc[:,10].values.astype(int) # (1 X_train1,X_test1,y_train1,y_test1 = model_selection.train_test_split(X1,y1,test_size=0.25,random_state=int(time.time())) s1=datetime.datetime.now() d1 = MLPClassifier(solver='lbfgs',activation='identity',random_state=1) d1.fit(X_train1, y_train1) s2=datetime.datetime.now() d1 = d1.score(X_test1,y_test1) s_1=s2-s1 print('神经网络性能:{:.2f},运行时间为{}'.format(d1,s_1)) # 朴素贝叶斯 s1=datetime.datetime.now() learning = GaussianNB() learning.fit(X_train1, y_train1) s2=datetime.datetime.now() s_2=s2-s1 print("朴素贝叶斯性能:{:.2f},运行时间为{}".format(learning.score(X_test1,y_test1),s_2)) d2 = learning.score(X_test1,y_test1) # SVM data.loc[data['sex']=='FEMALE','sex'] = 1 data.loc[data['sex']=='MALE','sex'] = 0 dumm_reg = pd.get_dummies(data['region'],prefix='region') dumm_child = pd.get_dummies(data['children'],prefix='children') df1 = data.drop(['region','children'],axis=1) df2 = df1.join([dumm_reg,dumm_child],how='outer') X3 = df2.drop(['pep'],axis=1).values.astype(float) y3 = df2['pep'].values.astype(int) X_train3,X_test3,y_train3,y_test3 = model_selection.train_test_split(X3,y3,test_size=0.25,random_state=int(time.time())) s1=datetime.datetime.now() learning = svm.SVC(kernel='rbf',gamma=0.7,C=0.001) learning.fit(X_train3,y_train3) s2=datetime.datetime.now() s_3=s2-s1 print("SVM性能:{:.2f},运行时间为{}".format(learning.score(X_test3,y_test3),s_3)) d3 = learning.score(X_test3,y_test3) # XGBoost s1=datetime.datetime.now() learning=XGBClassifier(max_depth=6,gamma=0,subsample=1,colsample_bytree=1) learning.fit(X_train3,y_train3) s2=datetime.datetime.now() s_4=s2-s1 d4 = learning.score(X_test3,y_test3) print("XGBoost性能:{:.2f},运行时间为{}".format(learning.score(X_test3,y_test3),s_4)) # 性能分析 data = [d1,d2,d3,d4] data = DataFrame(data,columns=['score'],index=['MLP','Naive Bayes','SVM','XGBoost']) data.plot(kind='bar',title='decision score on test set',rot=0) plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。