赞
踩
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import activations
from tensorflow.keras.layers import Layer, Input, Embedding, LSTM, Dense, Attention, GRU, Dropout
from tensorflow.keras.models import Model
import numpy as np
class Encoder(keras.Model):
def __init__(self, hidden_units):
super(Encoder, self).__init__()
# Encode LSTM Layer
self.encoder_lstm = GRU(hidden_units, return_sequences=True, return_state=True, name="encode_lstm")
self.dropout = Dropout(rate=0.5)
def call(self, inputs):
encoder_outputs, state_h = self.encoder_lstm(inputs)
return encoder_outputs, state_h
class Decoder(keras.Model): def __init__(self, hidden_units): super(Decoder, self).__init__() # Decode LSTM Layer self.decoder_lstm = GRU(hidden_units, return_sequences=True, return_state=True, name="decode_lstm") # Attention Layer self.attention = Attention() self.dropout = Dropout(rate=0.5) def call(self, enc_outputs, dec_inputs, states_inputs): dec_outputs, dec_state_h = self.decoder_lstm(dec_inputs, initial_state=states_inputs) attention_output = self.attention([dec_outputs, enc_outputs]) return attention_output, dec_state_h
loss_fn = tf.keras.losses.MeanAbsoluteError()
loss_fn = mae
def mae(y_true, y_pred):
return K.mean(K.abs(y_pred - y_true))
整体代码如下:
def seq2seq_attention(encode_shape, decode_shape, hidden_units, output_dim): """ 带注意力机制的seq2seq 模型 """ # Input Layer encoder_inputs = Input(shape=encode_shape, name="encode_input") decoder_inputs = Input(shape=decode_shape, name="decode_input") # Encoder Layer encoder = Encoder(hidden_units) enc_outputs, enc_state_h = encoder(encoder_inputs) dec_states_inputs = enc_state_h # Decoder Layer decoder = Decoder(hidden_units) attention_output, dec_state_h = decoder(enc_outputs, decoder_inputs, dec_states_inputs) # Dense Layer dense_outputs = Dense(output_dim, activation='sigmoid', name="dense")(Dropout(rate=0.5)(attention_output)) # seq2seq model model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=dense_outputs) model.summary() opt = keras.optimizers.Adam(lr=0.0005) #loss_fn = tf.keras.losses.MeanAbsolutePercentageError() #loss_fn = tf.keras.losses.MeanAbsoluteError() loss_fn = mae model.compile(loss=loss_fn, optimizer=opt) return model
if __name__ == '__main__':
seq2seq_attention( (72, 21), (24, 20), 50, 1 )
最后
根据业务需求,生成对应的数据X1,X2,Y,分别对应encoder的输入、decoder的输入、输出
训练函数
def train(train_data_path, test_data_path): batch_size = 512 epochs = 1000 X1, X2, Y = create_dataset(train_data_path) train_data, eval_data, y_train, y_eval = split_data(X1, X2, Y, test_size=0.2, shuffle=shuffle) #搭建模型 model = seq2seq_attention(encode_shape, decode_shape, hidden_units, output_dim) #训练模型 callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=model_path, save_weights_only=True, monitor='val_loss', mode='min', save_best_only=True) model.fit(x = train_data, y = y_train, batch_size = batch_size, epochs = epochs, callbacks=[callback, checkpoint_callback], verbose = 2, shuffle = True, validation_data = (eval_data, y_eval)) #模型测试 X1, X2, Y = create_dataset(test_data_path) test_data = [X1, X2] y_test = Y scores = model.evaluate(test_data, y_test, verbose=0) print(model.metrics_names, scores)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。