赞
踩
KNN(K紧邻)
高斯朴素贝叶斯(GNB)
决策树:
支持向量机
逻辑回归:
随机森林:
其它集成学习:
from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split import numpy as np import time # 读取数据 X=[] y=[] file_name='./心血管疾病数据集.csv' with open(file = file_name,mode='r',encoding='utf8') as f: line_first = np.array(f.readline().strip().split(',')) for line in f: if line: line = f.readline().strip().split(',') X.append(line[:-1]) y.append(line[-1]) # 将数据转为numpy数组 X=np.array(X) y=np.array(y) # 把所有数据都转为float类型 X=X.astype(float) y=y.astype(float) # 删掉第1列id X=X[:,1:] # 切分数据 from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2) # 数据预处理(规范化) _mean = X_train.mean(axis=0) _std = X_train.std(axis=0) X_train = (X_train-_mean)/(_std+1e-9) X_test = (X_test-_mean)/(_std+1e-9)
""" 测试1:KNN """ from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=5) # 取一个时间戳 start_fit = time.time() knn.fit(X=X_train, y=y_train) # 取一个时间戳 start_predict = time.time() y_pred = knn.predict(X=X_test) # 取一个时间戳 stop_predict = time.time() # 评估 acc = ( y_pred== y_test).mean() # 打印结果 print(f"""KNN: --> 训练耗时:{start_predict-start_fit} 秒; --> 推理耗时:{stop_predict-start_predict} 秒; --> 准确率:{acc} ;""")
KNN:
–> 训练耗时:0.04751253128051758 秒;
–> 推理耗时:1.3453574180603027 秒;
–> 准确率:0.6402857142857142 ;
""" 测试2:高斯朴素贝叶斯 """ from sklearn.naive_bayes import GaussianNB gnb = GaussianNB() # 取一个时间戳 start_fit = time.time() gnb.fit(X=X_train, y=y_train) # 取一个时间戳 start_predict = time.time() y_pred = gnb.predict(X=X_test) # 取一个时间戳 stop_predict = time.time() # 评估 acc = ( y_pred== y_test).mean() # 打印结果 print(f"""GNB: --> 训练耗时:{start_predict-start_fit} 秒; --> 推理耗时:{stop_predict-start_predict} 秒; --> 准确率:{acc} ;""")
GNB:
–> 训练耗时:0.009502649307250977 秒;
–> 推理耗时:0.0010018348693847656 秒;
–> 准确率:0.5931428571428572 ;
""" 测试3:决策树 """ from sklearn.tree import DecisionTreeClassifier dtc = DecisionTreeClassifier() # 取一个时间戳 start_fit = time.time() dtc.fit(X=X_train, y=y_train) # 取一个时间戳 start_predict = time.time() y_pred = dtc.predict(X=X_test) # 取一个时间戳 stop_predict = time.time() # 评估 acc = ( y_pred==y_test).mean() # 打印结果 print(f"""DTC: --> 训练耗时:{start_predict-start_fit} 秒; --> 推理耗时:{stop_predict-start_predict} 秒; --> 准确率:{acc} ;""")
DTC:
–> 训练耗时:0.11947154998779297 秒;
–> 推理耗时:0.002510547637939453 秒;
–> 准确率:0.6367142857142857 ;
""" 测试4:随机森林 """ from sklearn.ensemble import RandomForestClassifier rfc = RandomForestClassifier() # 取一个时间戳 start_fit = time.time() rfc.fit(X=X_train, y=y_train) # 取一个时间戳 start_predict = time.time() y_pred = rfc.predict(X=X_test) # 取一个时间戳 stop_predict = time.time() # 评估 acc = ( y_pred==y_test).mean() # 打印结果 print(f"""RFC: --> 训练耗时:{start_predict-start_fit} 秒; --> 推理耗时:{stop_predict-start_predict} 秒; --> 准确率:{acc} ;""")
RFC:
–> 训练耗时:3.7813894748687744 秒;
–> 推理耗时:0.18322372436523438 秒;
–> 准确率:0.715 ;
""" 测试5:支持向量机 """ from sklearn.svm import SVC svc = SVC() # 取一个时间戳 start_fit = time.time() svc.fit(X=X_train, y=y_train) # 取一个时间戳 start_predict = time.time() y_pred = svc.predict(X=X_test) # 取一个时间戳 stop_predict = time.time() # 评估 acc = ( y_pred==y_test).mean() # 打印结果 print(f"""SVC: --> 训练耗时:{start_predict-start_fit} 秒; --> 推理耗时:{stop_predict-start_predict} 秒; --> 准确率:{acc} ;""")
SVC:
–> 训练耗时:22.884344339370728 秒;
–> 推理耗时:10.314218997955322 秒;
–> 准确率:0.7188571428571429 ;
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。