赞
踩
- train=pd.read_excel("鸢尾花训练数据.xlsx")
- test=pd.read_excel("鸢尾花测试数据.xlsx")
- x_train=train[["萼片长(cm)","萼片宽(cm)","花瓣长(cm)","花瓣宽(cm)"]]
- y_train=train[["类型_num"]].values.ravel()
- from sklearn.linear_model import LogisticRegression
- lr=LogisticRegression()
- lr.fit(x_train,y_train)
- train_predicted=lr.predict(x_train)
- from sklearn import metrics
- print(metrics.classification_report(y_train,train_predicted))
- import pandas as pd
- from sklearn.linear_model import LogisticRegression
- train=pd.read_excel("鸢尾花训练数据.xlsx")
- test=pd.read_excel("鸢尾花测试数据.xlsx")
- x_train=train[["萼片长(cm)","萼片宽(cm)","花瓣长(cm)","花瓣宽(cm)"]]
- y_train=train[["类型_num"]].values.ravel()
-
- lr=LogisticRegression()
- lr.fit(x_train,y_train)
- train_predicted=lr.predict(x_train)
- from sklearn import metrics
- print(metrics.classification_report(y_train,train_predicted))
-
- x_test=test[["萼片长(cm)","萼片宽(cm)","花瓣长(cm)","花瓣宽(cm)"]]
- y_test=test[["类型_num"]].values.ravel()
- test_predicted=lr.predict(x_test)
- print(metrics.classification_report(y_test,test_predicted))
-
- predict=pd.read_excel("鸢尾花预测数据.xlsx")
- x_predict=predict[["萼片长(cm)","萼片宽(cm)","花瓣长(cm)","花瓣宽(cm)"]]
- predict_predicted=lr.predict(x_predict)
- print(predict_predicted)
- print(lr.predict_proba(x_predict))
numpy
和 pandas
用于数据处理。LogisticRegression
用于创建逻辑回归模型。StandardScaler
用于特征缩放。train_test_split
用于分割数据集。cross_val_score
用于交叉验证。metrics
用于评估模型性能。 pandas
的 read_csv
函数读取 "creditcard.csv" 文件。 StandardScaler
对 "Amount" 特征进行标准化。 - data["Amount"]=scaler.fit_transform(data[["Amount"]])
- data=data.drop(["Time"],axis=1)
train_test_split
将数据集分割为训练集和测试集。 - from sklearn.model_selection import train_test_split
- x=data.drop(["Class"],axis=1)
- y=data["Class"]
- x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0)
C
值,使用交叉验证计算召回率,并找到最佳 C
值。 - from sklearn.model_selection import cross_val_score
- c=[0.01,0.1,1,10,100]
- d=[]
- for i in c:
- lr=LogisticRegression(penalty="l2",C=i,solver="lbfgs",max_iter=1000)
- score=cross_val_score(lr,x_train,y_train,cv=8,scoring="recall")
- scores=sum(score)/len(score)
- d.append(scores)
-
C
值创建逻辑回归模型,并用训练集数据训练模型。 - best_c=c[np.argmax(d)]
- lr=LogisticRegression(penalty="l2",C=best_c,solver="lbfgs",max_iter=1000)
- lr.fit(x_train,y_train)
- from sklearn import metrics
- predicted_train=lr.predict(x_train)
- print(metrics.classification_report(y_train,predicted_train))
-
- predicted_test=lr.predict(x_test)
- print(metrics.classification_report(y_test,predicted_test))
thresh
,根据阈值调整预测结果,并计算每个阈值下的召回率。 - thresh=[0.1,0.2,0.3,0.4,0.5,0.6]
- recalls=[]
- for j in thresh:
- y_predic_proba=lr.predict_proba(x_test)
- y_predic_proba=pd.DataFrame(y_predic_proba).drop([0],axis=1)
- y_predic_proba[y_predic_proba[[1]]>j]=1
- y_predic_proba[y_predic_proba[[1]]<=j]=0
- recall=metrics.recall_score(y_test,y_predic_proba[1])
- recalls.append(recall)
- print(j,recall)
- import numpy as np
- import pandas as pd
- from sklearn.linear_model import LogisticRegression
- from sklearn.preprocessing import StandardScaler
- scaler=StandardScaler()
- data=pd.read_csv("creditcard.csv")
- data["Amount"]=scaler.fit_transform(data[["Amount"]])
- data=data.drop(["Time"],axis=1)
-
- from sklearn.model_selection import train_test_split
- x=data.drop(["Class"],axis=1)
- y=data["Class"]
- x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=0)
-
- from sklearn.model_selection import cross_val_score
- c=[0.01,0.1,1,10,100]
- d=[]
- for i in c:
- lr=LogisticRegression(penalty="l2",C=i,solver="lbfgs",max_iter=1000)
- score=cross_val_score(lr,x_train,y_train,cv=8,scoring="recall")
- scores=sum(score)/len(score)
- d.append(scores)
- best_c=c[np.argmax(d)]
- lr=LogisticRegression(penalty="l2",C=best_c,solver="lbfgs",max_iter=1000)
- lr.fit(x_train,y_train)
- from sklearn import metrics
- predicted_train=lr.predict(x_train)
- print(metrics.classification_report(y_train,predicted_train))
-
- predicted_test=lr.predict(x_test)
- print(metrics.classification_report(y_test,predicted_test))
-
- thresh=[0.1,0.2,0.3,0.4,0.5,0.6]
- recalls=[]
- for j in thresh:
- y_predic_proba=lr.predict_proba(x_test)
- y_predic_proba=pd.DataFrame(y_predic_proba).drop([0],axis=1)
- y_predic_proba[y_predic_proba[[1]]>j]=1
- y_predic_proba[y_predic_proba[[1]]<=j]=0
- recall=metrics.recall_score(y_test,y_predic_proba[1])
- recalls.append(recall)
- print(j,recall)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。