赞
踩
-
- model = model_class.from_pretrained("t5-small")
- # print(model)
- #t5 = transformers.T5ForConditionalGeneration.from_pretrained("t5-small")
- #model = src.model.FiDT5(t5.config)
- for mod in model.decoder.block:
-
- print(mod)
- '''
- # 调用了t5的decoder,t5-small的decoder.block中有共有6个T5Block,
- (0): T5LayerSelfAttention
- (1): T5LayerCrossAttention(
- (2): T5LayerFF(
- 具体如下:
- (decoder): T5Stack(
- (embed_tokens): Embedding(32128, 512)
- (block): ModuleList(
- (0): T5Block(
- ......
-
- (5): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerCrossAttention(
- (EncDecAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (2): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- (final_layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (lm_head): Linear(in_features=512, out_features=32128, bias=False)
- )
- '''
- FiDT5(
- (shared): Embedding(32128, 512)
- (encoder): EncoderWrapper(
- (encoder): T5Stack(
- (embed_tokens): Embedding(32128, 512)
- (block): ModuleList(
- (0): CheckpointWrapper(
- (module): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- (relative_attention_bias): Embedding(32, 8)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- ...
-
- (5): CheckpointWrapper(
- (module): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- )
- (final_layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- FiDT5(
- (shared): Embedding(32128, 512)
- (encoder): EncoderWrapper(
- (encoder): T5Stack(
- (embed_tokens): Embedding(32128, 512)
- (block): ModuleList(
- (0): CheckpointWrapper(
- (module): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- (relative_attention_bias): Embedding(32, 8)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- (1): CheckpointWrapper(
- (module): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- (2): CheckpointWrapper(
- (module): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- (3): CheckpointWrapper(
- (module): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- (4): CheckpointWrapper(
- (module): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- (5): CheckpointWrapper(
- (module): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- )
- (final_layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- (decoder): T5Stack(
- (embed_tokens): Embedding(32128, 512)
- (block): ModuleList(
- (0): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- (relative_attention_bias): Embedding(32, 8)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerCrossAttention(
- (EncDecAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (2): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- (1): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerCrossAttention(
- (EncDecAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (2): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- (2): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerCrossAttention(
- (EncDecAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (2): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- (3): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerCrossAttention(
- (EncDecAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (2): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- (4): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerCrossAttention(
- (EncDecAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (2): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- (5): T5Block(
- (layer): ModuleList(
- (0): T5LayerSelfAttention(
- (SelfAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (1): T5LayerCrossAttention(
- (EncDecAttention): T5Attention(
- (q): Linear(in_features=512, out_features=512, bias=False)
- (k): Linear(in_features=512, out_features=512, bias=False)
- (v): Linear(in_features=512, out_features=512, bias=False)
- (o): Linear(in_features=512, out_features=512, bias=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (2): T5LayerFF(
- (DenseReluDense): T5DenseReluDense(
- (wi): Linear(in_features=512, out_features=2048, bias=False)
- (wo): Linear(in_features=2048, out_features=512, bias=False)
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- )
- )
- )
- (final_layer_norm): T5LayerNorm()
- (dropout): Dropout(p=0.1, inplace=False)
- )
- (lm_head): Linear(in_features=512, out_features=32128, bias=False)
- )
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。