赞
踩
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)#DataFrame构造函数用法
df['is_train'] = np.random.uniform(0, 1, len(df)) <=0.75
df.head() #输出DataFrame头部
df['is_train'] = np.random.uniform(0, 1)
df['is_train'] = np.random.uniform(0, 1, len(df))
df['is_train'] = np.random.uniform(0, 1, len(df)) >= 0.9
#由float_64型转换为bool型
print iris.target
print iris.target_names
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
2 2]
['setosa' 'versicolor' 'virginica']
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
df.head()
train, test = df[df['is_train']==True], df[df['is_train']==False]
features = df.columns[:4]
clf = RandomForestClassifier(n_jobs=2)
y, _ = pd.factorize(train['species'])
clf.fit(train[features], y)
predict_result = iris.target_names[clf.predict(test[features])]
pd.crosstab(test['species'], predict_result, rownames= ['actual'],colnames=['preds'])
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。