赞
踩
1、创建模型
- #读取文件
- df = pd.read_csv('/2.csv')
-
- #数据预处理
- df_norm = (df - df.min()) / (df.max() - df.min())
-
- #构建特征数据
- X = df_norm.drop('target',axis=1)
-
- #构建预测数据
- y = df_norm['target']
-
- #划分训练集和测试集
- from sklearn.model_selection import train_test_split
-
- X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1)
-
- #构建模型并预测
- from sklearn.linear_model import LogisticRegression
-
- #建立模型
- model = LogisticRegression()
-
- #拟合数据
- model.fit(X_train,y_train)
-
- #预测数据
- y_pred = model.predict(X_test)

2、模型评估
- from sklearn.metrics import confusion_matrix
-
- #计算混淆矩阵
- cm = confusion_matrix(y_test,y_pred,labels=[0,1])
-
- #混淆矩阵行和列求和
- df_cm = pd.DataFrame(cm)
-
- #计算混淆矩阵行的和
- df_cm['Row_sum'] = df_cm.apply(lambda x: x.sum(), axis=1)
-
- #计算混淆矩阵列的和
- df_cm.loc['Col_sum'] = df_cm.apply(lambda x: x.sum())
-
- #准确率计算
- Acc = (df_cm.iloc[0,0]+df_cm.iloc[1,1])/df_cm.iloc[2,2]
-
- #精确度计算
- Precision = df_cm.iloc[0,0]/df_cm.iloc[2,0]
-
- #召回率计算
- Recall = df_cm.iloc[0,0]/df_cm.iloc[0,2]
-
- #F1计算
- F1 = 2*Precision*Recall/(Precision+Recall)

3、混淆矩阵可视化
- import seaborn as sn
-
- #混淆矩阵可视化
- ax = sn.heatmap(df_cm,annot=True,fmt='.20g')
-
- #添加标题
- ax.set_title('confusion matrix')
-
- #添加x轴标签
- ax.set_xlabel('predict')
-
- #添加y轴标签
- ax.set_ylabel('true')
-
- plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。