赞
踩
1.导入所需的包
import functools
import numpy as np
np.set_printoptions(precision=3,suppress=True)
import tensorflow as tf
train_file_path="./titanic/train.csv"
test_file_path="./titanic/test.csv"
2.加载数据
#标签列 LABEL_COLUMN="survived" LABELS=[0,1] def get_dataset(file_path): """ 构建tensorflow的数据集格式 """ dataset=tf.data.experimental.make_csv_dataset( file_path, batch_size=12, label_name=LABEL_COLUMN, na_value="?", num_epochs=1, ignore_errors=True) return dataset #将train和test的csv,分别加载成tensorflow的对象格式 raw_train_data=get_dataset(train_file_path) raw_test_data=get_dataset(test_file_path) #测试一个批次 examples,labels=next(iter(raw_train_data)) print("EXAMPLES:\n",examples,"\n") print("LABELS:\n",labels)
#分类数据的码表 CATEGORIES={ 'sex':['male','female'], 'class':['First','Second','Third'], 'deck':['A','B','C','D','E','F','G','H','I','J'], 'embark_town':['Cherbourg','Southhampton','Queenstown'], 'alone':['y','n'] } categorical_columns=[] for feature,vocab in CATEGORIES.items(): #提供码表的特征输入 cat_col=tf.feature_column.categorical_column_with_vocabulary_list( key=feature,vocabulary_list=vocab) categorical_columns.append(tf.feature_column.indicator_column(cat_col)) #分类特征列 categorical_columns
连续数据
def process_continuous_data(mean,data): #标准化数据的函数 data=tf.cast(data,tf.float32)*1/(2*mean) return tf.reshape(data,[-1,1]) #提前算好的均值 MEANS={ 'age':29.631308, 'n_siblings_spouses':0.545455, 'parch':0.379585, 'fare':34.385399 } numerical_columns=[] for feature in MEANS.keys(): num_col=tf.feature_column.numeric_column( feature,normalizer_fn=functools.partial(process_continuous_data,MEANS[feature])) numerical_columns.append(num_col) #连续特征列的列表 numerical_columns
4.构建DNN深度学习模型
创建输入层layer
将这两个特征列的集合相加,并且传给tf.keras.layers.DenseFeatures从而创建一个进行预处理的输入层。
preprocessing_layer=tf.keras.layers.DenseFeatures(
categorical_columns+numerical_columns)
从preprocessing_layer开始构建tf.keras.Sequential.
#构建一个DNN模型g(f(x))
model=tf.keras.Sequential([
preprocessing_layer,
tf.keras.layers.Dense(64,activation='relu'),
tf.keras.layers.Dense(32,activation='relu'),
tf.keras.layers.Dense(16,activation='relu'),
tf.keras.layers.Dense(1,activation='sigmoid'),
])
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
5.训练、评估和预测
现在可以实例化和训练模型
train_data=raw_train_data.shuffle(500)
test_data=raw_test_data
model.fit(train_data,epochs=20)
model.summary()
当模型训练完成的时候,可以在测试集test_data上检查准确性
test_loss,test_accuracy=model.evaluate(test_data)
print()
print(f'Test Loss {test_loss},Test Accuracy {test_accuracy}')
使用tf.keras.Model.predict推断一个批次或多个批次的标签
predictions=model.predict(test_data)
predictions[:10]
list(test_data)[0][1]
#显示部分结果
for prediction,survived in zip(predictions[:10],list(test_data)[0][1][:10]):
is_survived="SURVIVED" if bool(survived) else 'DIED'
print(f"预测活着的概率:{prediction[0]}|实际值:{is_survived}")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。