当前位置:   article > 正文

【NLP】几种用于文本分类的网络架构_文本分类网络

文本分类网络

以唐诗生成模型为例,模型输入一个长度为 input_length=20 的序列,编码维度为 emb_dim = 128 ,token 最大词数为 max_word=10000 搭建如下模型。

1. LSTM

# 开始搭建网络
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout
from keras.optimizers import Adam

model = Sequential()
model.add(Embedding(10000, 128, input_length=20))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(10000, activation='softmax'))

# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

在这里插入图片描述

2. TextCNN



model = Sequential()
# 建立一个 TextCNN 模型
model.add(Embedding(10000, 128, input_length=20))
model.add(Conv1D(64, 3, padding='same', activation='relu'))
model.add(Conv1D(32, 3, padding='same', activation='relu'))
model.add(Conv1D(16, 3, padding='same', activation='relu'))
model.add(Flatten())
model.add(Dense(10000, activation='softmax'))

# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14

在这里插入图片描述

3. Multi_head Layer 堆叠模型

# 开始搭建网络
from keras.models import Sequential , Model
from keras.layers import Dense, Embedding, LSTM, Dropout, MultiHeadAttention, Input, Flatten
from keras.optimizers import Adam

# 从 keras_nlp 导入位置编码层
from keras_nlp.layers import position_embedding 
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7

inputs = Input(shape=(20,))
x = Embedding(10000, 128, input_length=20)(inputs)
pos = position_embedding.PositionEmbedding(sequence_length=20)(x)
x = x + pos
x = MultiHeadAttention(num_heads=3, key_dim=128)(x,x)
# x = Dropout(0.1)(x)
x = MultiHeadAttention(num_heads=3, key_dim=128)(x,x)
# x = Dropout(0.1)(x)
x = MultiHeadAttention(num_heads=3, key_dim=128)(x,x)
x = Flatten()(x)
x = Dense(10000, activation='softmax')(x)
# 建立模型
model = Model(inputs=inputs, outputs=x)
# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17

在这里插入图片描述

4. Transformer (Encoder)

# 开始搭建网络
from keras.models import Sequential , Model
from keras.layers import Dense, Embedding, LSTM, Dropout, MultiHeadAttention, Input, Flatten
from keras.optimizers import Adam
from keras import layers

# 从 keras_nlp 导入位置编码层
from keras_nlp.layers import position_embedding 
import keras
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

    def get_config(self):                               # 重写 get_config 方法,以便保存模型
            config = super().get_config().copy()
            config.update({
                'att': self.att,
                'ffn': self.ffn,
                'layernorm1': self.layernorm1,
                'layernorm2': self.layernorm2,
                'dropout1': self.dropout1,
                'dropout2': self.dropout2,
            })
            return config
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31

inputs = Input(shape=(20,))
x = Embedding(10000, 128, input_length=20)(inputs)
pos = position_embedding.PositionEmbedding(sequence_length=20)(x)
x = x + pos

x = TransformerBlock(embed_dim=128, num_heads=8, ff_dim=256)(x)     # (None, 20, 128)

x = layers.GlobalAveragePooling1D()(x)                              # (None, 128)

x = Dense(10000, activation='softmax')(x)
# 建立模型
model = Model(inputs=inputs, outputs=x)
# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16

在这里插入图片描述

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/代码探险家/article/detail/763064
推荐阅读
相关标签
  

闽ICP备14008679号