赞
踩
import pandas as pd
'''
对比随机决策森林以及XGBoost模型对泰坦尼克号上的乘客是否生还的预测能力
'''
'''
***************************************************************
***************************************************************
'''
'''
随机森林对泰坦尼克号上的乘客是否生还的预测能力
'''
#通过URL地址下载Titanic数据
titanic = pd.read_csv('http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt')
#选取pclass、age以及sex作为训练特征
X = titanic[['pclass','age','sex']]
y = titanic['survived']
#对缺失的age信息,采用平均值方法进行补全,即以age列已知数据的平均数填充
X['age'].fillna(X['age'].mean(),inplace=True)
#对原数据进行分割,随机采样25%作为测试集
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=33)
print(X_train)
print(y_train)
#从sklearn.feature_extraction导入DictVectorizer
from sklearn.feature_extraction import DictVectorizer
vec = DictVectorizer(sparse=False)
#对原数据进行特征向量化处理
X_train = vec.fit_transform(X_train.to_dict(orient='record'))
X_test = vec.transform(X_test.to_dict(orient='record'))
print(X_train)
print(y_train)
#采用默认配置的随机森林分类器对测试集进行预测
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(X_train,y_train)
print('the accuracy of RandomForestClassifier on training set:',rfc.score(X_test,y_test))
'''
XGBoost对泰坦尼克号上的乘客是否生还的预测能力
'''
from xgboost import XGBClassifier
xgbc = XGBClassifier()
xgbc.fit(X_train,y_train)
print('the accuracy of XGBoost on training set:',xgbc.score(X_test,y_test))
C:\Users\xxz\Anaconda3\lib\site-packages\pandas\core\generic.py:3191: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy self._update_inplace(new_data) pclass age sex 1086 3rd 31.194181 male 12 1st 31.194181 female 1036 3rd 31.194181 male 833 3rd 32.000000 male 1108 3rd 31.194181 male 562 2nd 41.000000 male 437 2nd 48.000000 female 663 3rd 26.000000 male 669 3rd 19.000000 male 507 2nd 31.194181 male 1167 3rd 31.194181 male 821 3rd 9.000000 male 327 2nd 32.000000 female 715 3rd 21.000000 male 308 1st 31.194181 female 1274 3rd 31.194181 male 640 3rd 40.000000 male 72 1st 70.000000 male 1268 3rd 31.194181 male 1024 3rd 31.194181 male 1047 3rd 31.194181 female 940 3rd 31.194181 male 350 2nd 20.000000 female 892 3rd 31.194181 male 555 2nd 30.000000 female 176 1st 36.000000 male 107 1st 31.194181 female 475 2nd 34.000000 female 330 2nd 23.000000 male 533 2nd 34.000000 male ... ... ... ... 235 1st 24.000000 male 465 2nd 22.000000 female 210 1st 31.194181 male 579 2nd 40.000000 female 650 3rd 23.000000 male 1031 3rd 31.194181 male 99 1st 24.000000 female 969 3rd 31.194181 male 535 2nd 31.194181 male 403 2nd 31.194181 male 744 3rd 45.000000 male 344 2nd 26.000000 male 84 1st 31.194181 male 528 2nd 20.000000 male 1270 3rd 31.194181 male 662 3rd 40.000000 male 395 2nd 42.000000 male 1196 3rd 31.194181 male 543 2nd 23.000000 male 845 3rd 31.194181 male 813 3rd 25.000000 male 61 1st 31.194181 female 102 1st 23.000000 female 195 1st 28.000000 male 57 1st 27.000000 male 1225 3rd 31.194181 male 658 3rd 31.194181 female 578 2nd 12.000000 female 391 2nd 18.000000 male 1044 3rd 31.194181 female [984 rows x 3 columns] 1086 0 12 1 1036 0 833 0 1108 0 562 0 437 1 663 0 669 0 507 0 1167 1 821 1 327 1 715 0 308 1 1274 0 640 0 72 0 1268 0 1024 0 1047 1 940 1 350 1 892 0 555 1 176 1 107 1 475 1 330 0 533 0 .. 235 0 465 0 210 1 579 1 650 0 1031 0 99 1 969 0 535 0 403 0 744 1 344 0 84 1 528 0 1270 0 662 0 395 0 1196 0 543 0 845 0 813 0 61 1 102 1 195 0 57 1 1225 0 658 1 578 1 391 0 1044 0 Name: survived, dtype: int64 [[ 31.19418104 0. 0. 1. 0. 1. ] [ 31.19418104 1. 0. 0. 1. 0. ] [ 31.19418104 0. 0. 1. 0. 1. ] ..., [ 12. 0. 1. 0. 1. 0. ] [ 18. 0. 1. 0. 0. 1. ] [ 31.19418104 0. 0. 1. 1. 0. ]] 1086 0 12 1 1036 0 833 0 1108 0 562 0 437 1 663 0 669 0 507 0 1167 1 821 1 327 1 715 0 308 1 1274 0 640 0 72 0 1268 0 1024 0 1047 1 940 1 350 1 892 0 555 1 176 1 107 1 475 1 330 0 533 0 .. 235 0 465 0 210 1 579 1 650 0 1031 0 99 1 969 0 535 0 403 0 744 1 344 0 84 1 528 0 1270 0 662 0 395 0 1196 0 543 0 845 0 813 0 61 1 102 1 195 0 57 1 1225 0 658 1 578 1 391 0 1044 0 Name: survived, dtype: int64 the accuracy of RandomForestClassifier on training set: 0.775075987842 the accuracy of XGBoost on training set: 0.787234042553
import numpy as np
'''
使用Tensorflow输出一句话
'''
import tensorflow as tf
#初始化一个Tensorflow的常量: Hello Google Tensorflow! 字符串,并命名为greeting作为一个计算模块
greeting = tf.constant('Hello Google Tensorflow! ')
#
#启动一个会话
sess = tf.Session()
#使用会话执行greeting计算模块
result = sess.run(greeting)
#输出会话结果
print(result)
#关闭会话,这是一种显示关闭会话的方式
sess.close()
b'Hello Google Tensorflow! '
import tensorflow as tf
'''
使用Tensorflow完成一次线性函数的计算
'''
#声明matrix1为Tensorflow的一个1*2行向量
matrix1 = tf.constant([[3,3]])
#声明matrix2为Tensorflow的一个2*1列向量
matrix2 = tf.constant([[2],[2]])
#product将上述两个算子相乘,作为新算例
product = tf.matmul(matrix1,matrix2)
#继续将product与一个标量2.0求和拼接,作为最终的linear算例
linear = tf.add(product,tf.constant(2))
#直接在会话中执行linear算例,相当于将上面所有单独算例拼接成流程图来执行
with tf.Session() as sess:
result = sess.run(linear)
print(result)
[[14]]
import numpy as np
import pandas as pd
import tensorflow as tf
'''
使用Tensorflow自定义一个线性分类器用于对“良/恶性乳腺癌肿瘤”进行预测
'''
#从本地使用pandas读取乳腺癌肿瘤的训练和测试数据
train = pd.read_csv('breast-cancer-train.csv')
test = pd.read_csv('breast-cancer-test.csv')
print(train)
print(test)
#分隔特征与分类目标
X_train = np.float32(train[['Clump Thickness','Cell Size']].T)
y_train = np.float32(train['Type'].T)
X_test = np.float32(test[['Clump Thickness','Cell Size']].T)
y_test =np.float32(test['Type'].T)
print(X_train)
print(X_train.shape)
print(X_test)
print(X_test.shape)
#定义一个tensorflow的变量b作为线性模型的截距,同时设置初始值1.0
b = tf.Variable(tf.zeros([1]))
#定义一个tensorflow的变量W作为线性模型的系数,并设置初始值为-1.0到1.0之间均匀分布随机函数
W = tf.Variable(tf.random_uniform([1,2],-1.0,1.0))
#显示定义这个线性函数
y = tf.matmul(W,X_train) + b
#使用tensorflow中的reduce_mean取得训练集上均方误差
loss = tf.reduce_mean(tf.square(y-y_train))
#使用梯度下降法估计W,b,并且设置迭代步长为0.01,这个与scikit-learn中SGDRegressor
optimizer = tf.train.GradientDescentOptimizer(0.01)
#以最小二乘损失为优化目标
train_optimizer = optimizer.minimize(loss)
#初始化所有变量
init = tf.initialize_all_variables()
#开启Tensorflow中的会话
sess = tf.Session()
#执行变量初始化操作
sess.run(init)
#迭代1000轮次,训练参数
for step in range(0,1000):
sess.run(train_optimizer)
if step % 10 == 0:
print(step,sess.run(W),sess.run(b))
# print(sess.run(W)[0][0],sess.run(W)[0][1])
#准备测试样本
test_negative = test.loc[test['Type'] == 0][['Clump Thickness','Cell Size']]
test_positive = test.loc[test['Type'] == 1][['Clump Thickness','Cell Size']]
#以最终更新的参数作图
import matplotlib.pyplot as plt
plt.scatter(test_negative['Clump Thickness'],test_negative['Cell Size'],marker='o',s=200,c='red')
plt.scatter(test_positive['Clump Thickness'],test_positive['Cell Size'],marker='x',s=150,c='black')
plt.xlabel('Clump Thickness')
plt.ylabel('Cell Size')
lx = np.arange(0,12)
#这里要强调一下,我们以0.5作为分界面,所以计算方式如下:
ly = (0.5 - sess.run(b) - lx * sess.run(W)[0][0])/sess.run(W)[0][1]
plt.plot(lx,ly,color='green')
plt.show()
Unnamed: 0 Clump Thickness Cell Size Type 0 163 1 1 0 1 286 10 10 1 2 612 10 10 1 3 517 1 1 0 4 464 1 1 0 5 277 1 1 0 6 408 3 2 0 7 104 10 10 1 8 114 3 2 0 9 627 1 1 0 10 545 1 1 0 11 467 6 6 1 12 92 1 1 0 13 7 1 2 0 14 89 1 1 0 15 528 1 3 0 16 380 1 1 0 17 521 1 1 0 18 539 1 1 0 19 363 4 4 0 20 638 1 1 0 21 140 1 1 0 22 28 1 1 0 23 43 6 5 1 24 42 10 10 1 25 73 4 5 1 26 167 8 10 1 27 210 10 10 1 28 610 4 3 1 29 66 1 1 0 .. ... ... ... ... 494 252 3 3 0 495 21 5 5 1 496 313 1 1 0 497 459 1 3 0 498 160 7 7 1 499 276 1 1 0 500 191 5 10 1 501 385 3 2 0 502 413 1 2 0 503 491 8 9 1 504 343 1 1 0 505 308 8 7 1 506 661 1 1 0 507 130 1 3 0 508 663 1 3 0 509 99 5 6 1 510 372 1 2 0 511 87 6 6 1 512 458 1 2 0 513 330 4 7 1 514 214 10 10 1 515 466 6 6 1 516 121 2 1 0 517 614 1 1 0 518 20 3 2 1 519 71 10 2 1 520 106 10 10 1 521 270 4 7 1 522 435 8 10 1 523 102 1 2 0 [524 rows x 4 columns] Unnamed: 0 Clump Thickness Cell Size Type 0 158 1 2 0 1 499 1 1 0 2 396 1 1 0 3 155 5 5 1 4 321 1 1 0 5 212 1 1 0 6 234 3 2 0 7 289 6 6 1 8 300 4 10 1 9 356 3 3 1 10 672 1 1 0 11 328 10 3 1 12 199 1 1 0 13 78 1 1 0 14 598 1 1 0 15 569 10 8 1 16 446 1 1 0 17 506 10 10 1 18 626 6 6 1 19 603 4 6 1 20 360 10 10 1 21 338 1 1 0 22 668 7 4 1 23 290 1 1 0 24 284 4 5 1 25 331 1 1 0 26 477 1 1 0 27 54 5 5 1 28 248 1 1 0 29 223 5 6 1 .. ... ... ... ... 145 302 10 10 1 146 552 2 2 0 147 215 7 8 1 148 235 1 4 0 149 18 7 7 1 150 250 2 2 0 151 260 5 8 1 152 430 3 1 0 153 264 9 4 1 154 61 1 1 0 155 213 10 10 1 156 377 1 1 0 157 29 1 3 0 158 182 1 1 0 159 306 1 1 0 160 388 1 1 0 161 329 4 6 1 162 437 1 1 0 163 296 3 4 0 164 584 1 1 0 165 342 1 1 0 166 436 10 10 1 167 579 1 1 0 168 326 1 1 1 169 362 2 2 0 170 617 1 1 0 171 578 1 1 0 172 231 8 7 1 173 336 5 5 1 174 655 1 1 0 [175 rows x 4 columns] [[ 1. 10. 10. ..., 4. 8. 1.] [ 1. 10. 10. ..., 7. 10. 2.]] (2, 524) [[ 1. 1. 1. 5. 1. 1. 3. 6. 4. 3. 1. 10. 1. 1. 1. 10. 1. 10. 6. 4. 10. 1. 7. 1. 4. 1. 1. 5. 1. 5. 1. 1. 1. 5. 1. 1. 1. 10. 1. 10. 1. 3. 10. 1. 1. 1. 2. 4. 1. 1. 2. 1. 10. 1. 3. 1. 1. 6. 1. 1. 1. 1. 10. 3. 1. 1. 10. 6. 1. 2. 3. 1. 9. 1. 1. 1. 1. 3. 1. 1. 1. 1. 1. 1. 1. 10. 3. 1. 1. 1. 2. 1. 10. 1. 1. 10. 1. 1. 1. 1. 1. 1. 1. 4. 4. 1. 8. 1. 1. 5. 7. 3. 1. 3. 3. 1. 1. 1. 1. 1. 1. 7. 1. 1. 10. 3. 1. 3. 7. 4. 1. 1. 10. 1. 6. 1. 10. 1. 1. 3. 2. 3. 1. 1. 1. 10. 2. 7. 1. 7. 2. 5. 3. 9. 1. 10. 1. 1. 1. 1. 1. 4. 1. 3. 1. 1. 10. 1. 1. 2. 1. 1. 8. 5. 1.] [ 2. 1. 1. 5. 1. 1. 2. 6. 10. 3. 1. 3. 1. 1. 1. 8. 1. 10. 6. 6. 10. 1. 4. 1. 5. 1. 1. 5. 1. 6. 1. 1. 1. 4. 1. 1. 1. 10. 1. 8. 1. 3. 10. 1. 1. 1. 2. 5. 4. 1. 2. 1. 10. 1. 4. 4. 1. 5. 1. 1. 1. 3. 4. 1. 1. 1. 8. 6. 1. 1. 3. 1. 9. 1. 1. 1. 1. 2. 2. 1. 1. 1. 1. 1. 1. 10. 5. 1. 1. 1. 1. 1. 10. 3. 3. 4. 1. 1. 1. 1. 1. 1. 1. 6. 2. 2. 7. 1. 1. 3. 7. 3. 1. 4. 1. 2. 1. 1. 1. 3. 1. 10. 1. 1. 3. 6. 1. 2. 4. 3. 1. 1. 10. 1. 7. 1. 7. 1. 1. 1. 1. 3. 1. 1. 3. 10. 2. 8. 4. 7. 2. 8. 1. 4. 1. 10. 1. 3. 1. 1. 1. 6. 1. 4. 1. 1. 10. 1. 1. 2. 1. 1. 7. 5. 1.]] (2, 175) WARNING:tensorflow:From <ipython-input-9-85653c33931d>:41: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02. Instructions for updating: Use `tf.global_variables_initializer` instead. 0 [[ 0.36668363 -0.50090975]] [ 0.07310659] 10 [[ 0.42902589 -0.31664807]] [ 0.08392984] 20 [[ 0.37770221 -0.26446074]] [ 0.07397496] 30 [[ 0.33350319 -0.21931998]] [ 0.06422745] 40 [[ 0.29543743 -0.18026219]] [ 0.05475901] 50 [[ 0.26265171 -0.1464566 ]] [ 0.04562278] 60 [[ 0.23441158 -0.11718685]] [ 0.03685662] 70 [[ 0.21008505 -0.09183522]] [ 0.02848593] 80 [[ 0.18912806 -0.06986891]] [ 0.02052602] 90 [[ 0.17107238 -0.05082829]] [ 0.01298404] 100 [[ 0.15551494 -0.03431684]] [ 0.00586066] 110 [[ 0.14210881 -0.01999237]] [-0.00084857] 120 [[ 0.13055533 -0.00755955]] [-0.00715207] 130 [[ 0.12059743 0.00323657]] [-0.0130613] 140 [[ 0.11201376 0.01261609]] [-0.01858996] 150 [[ 0.10461382 0.02076911]] [-0.02375336] 160 [[ 0.09823353 0.02785983]] [-0.02856789] 170 [[ 0.09273166 0.03403014]] [-0.03305059] 180 [[ 0.0879866 0.03940263]] [-0.03721882] 190 [[ 0.08389364 0.0440833 ]] [-0.04109001] 200 [[ 0.08036258 0.0481638 ]] [-0.04468136] 210 [[ 0.07731578 0.05172339]] [-0.04800977] 220 [[ 0.07468636 0.05483068]] [-0.05109166] 230 [[ 0.07241672 0.05754501]] [-0.05394287] 240 [[ 0.07045723 0.05991777]] [-0.05657862] 250 [[ 0.06876517 0.06199349]] [-0.05901347] 260 [[ 0.06730371 0.06381072]] [-0.06126123] 270 [[ 0.06604112 0.06540291]] [-0.06333503] 280 [[ 0.06495008 0.06679903]] [-0.06524725] 290 [[ 0.06400704 0.06802423]] [-0.06700955] 300 [[ 0.06319169 0.06910033]] [-0.06863291] 310 [[ 0.06248654 0.07004629]] [-0.07012761] 320 [[ 0.06187651 0.07087857]] [-0.07150328] 330 [[ 0.06134861 0.07161149]] [-0.07276891] 340 [[ 0.06089162 0.07225746]] [-0.07393289] 350 [[ 0.06049588 0.07282734]] [-0.07500301] 360 [[ 0.06015305 0.07333054]] [-0.07598653] 370 [[ 0.05985595 0.07377529]] [-0.07689022] 380 [[ 0.05959837 0.07416874]] [-0.07772031] 390 [[ 0.05937495 0.07451714]] [-0.07848261] 400 [[ 0.05918108 0.07482593]] [-0.07918249] 410 [[ 0.05901278 0.07509987]] [-0.07982493] 420 [[ 0.05886659 0.07534314]] [-0.08041453] 430 [[ 0.05873957 0.07555936]] [-0.0809555] 440 [[ 0.05862912 0.07575173]] [-0.08145178] 450 [[ 0.05853304 0.07592303]] [-0.08190699] 460 [[ 0.05844941 0.07607572]] [-0.08232445] 470 [[ 0.05837657 0.07621194]] [-0.08270727] 480 [[ 0.05831309 0.07633358]] [-0.08305824] 490 [[ 0.05825774 0.07644231]] [-0.08337997] 500 [[ 0.05820943 0.07653957]] [-0.08367487] 510 [[ 0.05816725 0.07662665]] [-0.08394514] 520 [[ 0.05813038 0.07670469]] [-0.08419282] 530 [[ 0.05809814 0.07677467]] [-0.08441976] 540 [[ 0.05806994 0.07683749]] [-0.0846277] 550 [[ 0.05804524 0.07689392]] [-0.08481821] 560 [[ 0.05802359 0.07694465]] [-0.08499274] 570 [[ 0.0580046 0.0769903]] [-0.08515258] 580 [[ 0.05798792 0.0770314 ]] [-0.085299] 590 [[ 0.05797327 0.07706842]] [-0.08543309] 600 [[ 0.05796038 0.0771018 ]] [-0.0855559] 610 [[ 0.05794904 0.07713193]] [-0.08566836] 620 [[ 0.05793905 0.07715912]] [-0.08577135] 630 [[ 0.05793025 0.07718369]] [-0.08586565] 640 [[ 0.05792246 0.0772059 ]] [-0.08595198] 650 [[ 0.05791559 0.07722599]] [-0.08603103] 660 [[ 0.05790952 0.07724417]] [-0.08610339] 670 [[ 0.05790414 0.07726063]] [-0.08616965] 680 [[ 0.05789939 0.07727554]] [-0.0862303] 690 [[ 0.05789516 0.07728906]] [-0.08628582] 700 [[ 0.05789141 0.07730132]] [-0.08633664] 710 [[ 0.05788808 0.07731244]] [-0.08638318] 720 [[ 0.05788512 0.07732254]] [-0.08642577] 730 [[ 0.05788249 0.0773317 ]] [-0.08646475] 740 [[ 0.05788014 0.07734002]] [-0.08650042] 750 [[ 0.05787804 0.07734759]] [-0.08653308] 760 [[ 0.05787617 0.07735448]] [-0.08656296] 770 [[ 0.0578745 0.07736073]] [-0.08659032] 780 [[ 0.057873 0.07736642]] [-0.08661535] 790 [[ 0.05787166 0.0773716 ]] [-0.08663826] 800 [[ 0.05787047 0.07737631]] [-0.08665923] 810 [[ 0.05786939 0.07738061]] [-0.08667842] 820 [[ 0.05786842 0.07738452]] [-0.08669598] 830 [[ 0.05786756 0.07738808]] [-0.08671205] 840 [[ 0.05786679 0.07739132]] [-0.08672676] 850 [[ 0.05786609 0.07739428]] [-0.08674022] 860 [[ 0.05786546 0.07739697]] [-0.08675253] 870 [[ 0.05786489 0.07739943]] [-0.0867638] 880 [[ 0.05786439 0.07740167]] [-0.08677411] 890 [[ 0.05786392 0.07740371]] [-0.08678355] 900 [[ 0.05786351 0.07740557]] [-0.08679216] 910 [[ 0.05786314 0.07740727]] [-0.08680007] 920 [[ 0.0578628 0.07740883]] [-0.0868073] 930 [[ 0.05786249 0.07741025]] [-0.08681391] 940 [[ 0.05786222 0.07741154]] [-0.08681997] 950 [[ 0.05786196 0.07741272]] [-0.0868255] 960 [[ 0.05786174 0.0774138 ]] [-0.08683058] 970 [[ 0.05786153 0.07741478]] [-0.08683521] 980 [[ 0.05786135 0.07741567]] [-0.08683946] 990 [[ 0.05786117 0.0774165 ]] [-0.08684334]
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。