赞
踩
头歌学习平台
ps:如果出现时间超时,多提交几遍就好。
- import pandas as pd
- import numpy as np
- import sklearn
- #********* Begin *********#
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.ensemble import RandomForestRegressor
- titanic = pd.read_csv('./train.csv')
- def set_missing_ages(df):
- age_df = df[['Age', 'Fare', 'Parch', 'SibSp', 'Pclass']]
- known_age = age_df[age_df.Age.notnull()].values
- unknown_age = age_df[age_df.Age.isnull()].values
- y = known_age[:, 0]
- X = known_age[:, 1:]
- rfr = RandomForestRegressor(random_state=0, n_estimators=2000, n_jobs=-1)
- rfr.fit(X, y)
- predictedAges = rfr.predict(unknown_age[:, 1::])
- df.loc[(df.Age.isnull()), 'Age'] = predictedAges
- return df
- titanic = set_missing_ages(titanic)
- dummies_Embarked = pd.get_dummies(titanic['Embarked'], prefix= 'Embarked')
- dummies_Sex = pd.get_dummies(titanic['Sex'], prefix= 'Sex')
- dummies_Pclass = pd.get_dummies(titanic['Pclass'], prefix= 'Pclass')
- df = pd.concat([titanic, dummies_Embarked, dummies_Sex, dummies_Pclass], axis=1)
- df.drop(['Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)
- train_label = df['Survived']
- train_titanic = df.drop('Survived', 1)
- titanic_test = pd.read_csv('./test.csv')
- titanic_test = set_missing_ages(titanic_test)
- dummies_Embarked = pd.get_dummies(titanic_test['Embarked'], prefix= 'Embarked')
- dummies_Sex = pd.get_dummies(titanic_test['Sex'], prefix= 'Sex')
- dummies_Pclass = pd.get_dummies(titanic_test['Pclass'], prefix= 'Pclass')
- df_test = pd.concat([titanic_test,dummies_Embarked, dummies_Sex, dummies_Pclass], axis=1)
- df_test.drop(['Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1, inplace=True)
- model = RandomForestClassifier(n_estimators=10)
- model.fit(train_titanic, train_label)
- predictions = model.predict(df_test)
- result = pd.DataFrame({'Survived':predictions.astype(np.int32)})
- result.to_csv("./predict.csv", index=False)
- #********* End *********#
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。