当前位置:   article > 正文

T5-small的encoder,decoder模型结构

t5-small

 decoder结构

  1. model = model_class.from_pretrained("t5-small")
  2. # print(model)
  3. #t5 = transformers.T5ForConditionalGeneration.from_pretrained("t5-small")
  4. #model = src.model.FiDT5(t5.config)
  5. for mod in model.decoder.block:
  6. print(mod)
  7. '''
  8. # 调用了t5的decoder,t5-small的decoder.block中有共有6个T5Block,
  9. (0): T5LayerSelfAttention
  10. (1): T5LayerCrossAttention(
  11. (2): T5LayerFF(
  12. 具体如下:
  13. (decoder): T5Stack(
  14. (embed_tokens): Embedding(32128, 512)
  15. (block): ModuleList(
  16. (0): T5Block(
  17. ......
  18. (5): T5Block(
  19. (layer): ModuleList(
  20. (0): T5LayerSelfAttention(
  21. (SelfAttention): T5Attention(
  22. (q): Linear(in_features=512, out_features=512, bias=False)
  23. (k): Linear(in_features=512, out_features=512, bias=False)
  24. (v): Linear(in_features=512, out_features=512, bias=False)
  25. (o): Linear(in_features=512, out_features=512, bias=False)
  26. )
  27. (layer_norm): T5LayerNorm()
  28. (dropout): Dropout(p=0.1, inplace=False)
  29. )
  30. (1): T5LayerCrossAttention(
  31. (EncDecAttention): T5Attention(
  32. (q): Linear(in_features=512, out_features=512, bias=False)
  33. (k): Linear(in_features=512, out_features=512, bias=False)
  34. (v): Linear(in_features=512, out_features=512, bias=False)
  35. (o): Linear(in_features=512, out_features=512, bias=False)
  36. )
  37. (layer_norm): T5LayerNorm()
  38. (dropout): Dropout(p=0.1, inplace=False)
  39. )
  40. (2): T5LayerFF(
  41. (DenseReluDense): T5DenseReluDense(
  42. (wi): Linear(in_features=512, out_features=2048, bias=False)
  43. (wo): Linear(in_features=2048, out_features=512, bias=False)
  44. (dropout): Dropout(p=0.1, inplace=False)
  45. )
  46. (layer_norm): T5LayerNorm()
  47. (dropout): Dropout(p=0.1, inplace=False)
  48. )
  49. )
  50. )
  51. )
  52. (final_layer_norm): T5LayerNorm()
  53. (dropout): Dropout(p=0.1, inplace=False)
  54. )
  55. (lm_head): Linear(in_features=512, out_features=32128, bias=False)
  56. )
  57. '''

encoder结构

  1. FiDT5(
  2. (shared): Embedding(32128, 512)
  3. (encoder): EncoderWrapper(
  4. (encoder): T5Stack(
  5. (embed_tokens): Embedding(32128, 512)
  6. (block): ModuleList(
  7. (0): CheckpointWrapper(
  8. (module): T5Block(
  9. (layer): ModuleList(
  10. (0): T5LayerSelfAttention(
  11. (SelfAttention): T5Attention(
  12. (q): Linear(in_features=512, out_features=512, bias=False)
  13. (k): Linear(in_features=512, out_features=512, bias=False)
  14. (v): Linear(in_features=512, out_features=512, bias=False)
  15. (o): Linear(in_features=512, out_features=512, bias=False)
  16. (relative_attention_bias): Embedding(32, 8)
  17. )
  18. (layer_norm): T5LayerNorm()
  19. (dropout): Dropout(p=0.1, inplace=False)
  20. )
  21. (1): T5LayerFF(
  22. (DenseReluDense): T5DenseReluDense(
  23. (wi): Linear(in_features=512, out_features=2048, bias=False)
  24. (wo): Linear(in_features=2048, out_features=512, bias=False)
  25. (dropout): Dropout(p=0.1, inplace=False)
  26. )
  27. (layer_norm): T5LayerNorm()
  28. (dropout): Dropout(p=0.1, inplace=False)
  29. )
  30. )
  31. )
  32. )
  33. ...
  34. (5): CheckpointWrapper(
  35. (module): T5Block(
  36. (layer): ModuleList(
  37. (0): T5LayerSelfAttention(
  38. (SelfAttention): T5Attention(
  39. (q): Linear(in_features=512, out_features=512, bias=False)
  40. (k): Linear(in_features=512, out_features=512, bias=False)
  41. (v): Linear(in_features=512, out_features=512, bias=False)
  42. (o): Linear(in_features=512, out_features=512, bias=False)
  43. )
  44. (layer_norm): T5LayerNorm()
  45. (dropout): Dropout(p=0.1, inplace=False)
  46. )
  47. (1): T5LayerFF(
  48. (DenseReluDense): T5DenseReluDense(
  49. (wi): Linear(in_features=512, out_features=2048, bias=False)
  50. (wo): Linear(in_features=2048, out_features=512, bias=False)
  51. (dropout): Dropout(p=0.1, inplace=False)
  52. )
  53. (layer_norm): T5LayerNorm()
  54. (dropout): Dropout(p=0.1, inplace=False)
  55. )
  56. )
  57. )
  58. )
  59. )
  60. (final_layer_norm): T5LayerNorm()
  61. (dropout): Dropout(p=0.1, inplace=False)
  62. )
  63. )

全部的结构

  1. FiDT5(
  2. (shared): Embedding(32128, 512)
  3. (encoder): EncoderWrapper(
  4. (encoder): T5Stack(
  5. (embed_tokens): Embedding(32128, 512)
  6. (block): ModuleList(
  7. (0): CheckpointWrapper(
  8. (module): T5Block(
  9. (layer): ModuleList(
  10. (0): T5LayerSelfAttention(
  11. (SelfAttention): T5Attention(
  12. (q): Linear(in_features=512, out_features=512, bias=False)
  13. (k): Linear(in_features=512, out_features=512, bias=False)
  14. (v): Linear(in_features=512, out_features=512, bias=False)
  15. (o): Linear(in_features=512, out_features=512, bias=False)
  16. (relative_attention_bias): Embedding(32, 8)
  17. )
  18. (layer_norm): T5LayerNorm()
  19. (dropout): Dropout(p=0.1, inplace=False)
  20. )
  21. (1): T5LayerFF(
  22. (DenseReluDense): T5DenseReluDense(
  23. (wi): Linear(in_features=512, out_features=2048, bias=False)
  24. (wo): Linear(in_features=2048, out_features=512, bias=False)
  25. (dropout): Dropout(p=0.1, inplace=False)
  26. )
  27. (layer_norm): T5LayerNorm()
  28. (dropout): Dropout(p=0.1, inplace=False)
  29. )
  30. )
  31. )
  32. )
  33. (1): CheckpointWrapper(
  34. (module): T5Block(
  35. (layer): ModuleList(
  36. (0): T5LayerSelfAttention(
  37. (SelfAttention): T5Attention(
  38. (q): Linear(in_features=512, out_features=512, bias=False)
  39. (k): Linear(in_features=512, out_features=512, bias=False)
  40. (v): Linear(in_features=512, out_features=512, bias=False)
  41. (o): Linear(in_features=512, out_features=512, bias=False)
  42. )
  43. (layer_norm): T5LayerNorm()
  44. (dropout): Dropout(p=0.1, inplace=False)
  45. )
  46. (1): T5LayerFF(
  47. (DenseReluDense): T5DenseReluDense(
  48. (wi): Linear(in_features=512, out_features=2048, bias=False)
  49. (wo): Linear(in_features=2048, out_features=512, bias=False)
  50. (dropout): Dropout(p=0.1, inplace=False)
  51. )
  52. (layer_norm): T5LayerNorm()
  53. (dropout): Dropout(p=0.1, inplace=False)
  54. )
  55. )
  56. )
  57. )
  58. (2): CheckpointWrapper(
  59. (module): T5Block(
  60. (layer): ModuleList(
  61. (0): T5LayerSelfAttention(
  62. (SelfAttention): T5Attention(
  63. (q): Linear(in_features=512, out_features=512, bias=False)
  64. (k): Linear(in_features=512, out_features=512, bias=False)
  65. (v): Linear(in_features=512, out_features=512, bias=False)
  66. (o): Linear(in_features=512, out_features=512, bias=False)
  67. )
  68. (layer_norm): T5LayerNorm()
  69. (dropout): Dropout(p=0.1, inplace=False)
  70. )
  71. (1): T5LayerFF(
  72. (DenseReluDense): T5DenseReluDense(
  73. (wi): Linear(in_features=512, out_features=2048, bias=False)
  74. (wo): Linear(in_features=2048, out_features=512, bias=False)
  75. (dropout): Dropout(p=0.1, inplace=False)
  76. )
  77. (layer_norm): T5LayerNorm()
  78. (dropout): Dropout(p=0.1, inplace=False)
  79. )
  80. )
  81. )
  82. )
  83. (3): CheckpointWrapper(
  84. (module): T5Block(
  85. (layer): ModuleList(
  86. (0): T5LayerSelfAttention(
  87. (SelfAttention): T5Attention(
  88. (q): Linear(in_features=512, out_features=512, bias=False)
  89. (k): Linear(in_features=512, out_features=512, bias=False)
  90. (v): Linear(in_features=512, out_features=512, bias=False)
  91. (o): Linear(in_features=512, out_features=512, bias=False)
  92. )
  93. (layer_norm): T5LayerNorm()
  94. (dropout): Dropout(p=0.1, inplace=False)
  95. )
  96. (1): T5LayerFF(
  97. (DenseReluDense): T5DenseReluDense(
  98. (wi): Linear(in_features=512, out_features=2048, bias=False)
  99. (wo): Linear(in_features=2048, out_features=512, bias=False)
  100. (dropout): Dropout(p=0.1, inplace=False)
  101. )
  102. (layer_norm): T5LayerNorm()
  103. (dropout): Dropout(p=0.1, inplace=False)
  104. )
  105. )
  106. )
  107. )
  108. (4): CheckpointWrapper(
  109. (module): T5Block(
  110. (layer): ModuleList(
  111. (0): T5LayerSelfAttention(
  112. (SelfAttention): T5Attention(
  113. (q): Linear(in_features=512, out_features=512, bias=False)
  114. (k): Linear(in_features=512, out_features=512, bias=False)
  115. (v): Linear(in_features=512, out_features=512, bias=False)
  116. (o): Linear(in_features=512, out_features=512, bias=False)
  117. )
  118. (layer_norm): T5LayerNorm()
  119. (dropout): Dropout(p=0.1, inplace=False)
  120. )
  121. (1): T5LayerFF(
  122. (DenseReluDense): T5DenseReluDense(
  123. (wi): Linear(in_features=512, out_features=2048, bias=False)
  124. (wo): Linear(in_features=2048, out_features=512, bias=False)
  125. (dropout): Dropout(p=0.1, inplace=False)
  126. )
  127. (layer_norm): T5LayerNorm()
  128. (dropout): Dropout(p=0.1, inplace=False)
  129. )
  130. )
  131. )
  132. )
  133. (5): CheckpointWrapper(
  134. (module): T5Block(
  135. (layer): ModuleList(
  136. (0): T5LayerSelfAttention(
  137. (SelfAttention): T5Attention(
  138. (q): Linear(in_features=512, out_features=512, bias=False)
  139. (k): Linear(in_features=512, out_features=512, bias=False)
  140. (v): Linear(in_features=512, out_features=512, bias=False)
  141. (o): Linear(in_features=512, out_features=512, bias=False)
  142. )
  143. (layer_norm): T5LayerNorm()
  144. (dropout): Dropout(p=0.1, inplace=False)
  145. )
  146. (1): T5LayerFF(
  147. (DenseReluDense): T5DenseReluDense(
  148. (wi): Linear(in_features=512, out_features=2048, bias=False)
  149. (wo): Linear(in_features=2048, out_features=512, bias=False)
  150. (dropout): Dropout(p=0.1, inplace=False)
  151. )
  152. (layer_norm): T5LayerNorm()
  153. (dropout): Dropout(p=0.1, inplace=False)
  154. )
  155. )
  156. )
  157. )
  158. )
  159. (final_layer_norm): T5LayerNorm()
  160. (dropout): Dropout(p=0.1, inplace=False)
  161. )
  162. )
  163. (decoder): T5Stack(
  164. (embed_tokens): Embedding(32128, 512)
  165. (block): ModuleList(
  166. (0): T5Block(
  167. (layer): ModuleList(
  168. (0): T5LayerSelfAttention(
  169. (SelfAttention): T5Attention(
  170. (q): Linear(in_features=512, out_features=512, bias=False)
  171. (k): Linear(in_features=512, out_features=512, bias=False)
  172. (v): Linear(in_features=512, out_features=512, bias=False)
  173. (o): Linear(in_features=512, out_features=512, bias=False)
  174. (relative_attention_bias): Embedding(32, 8)
  175. )
  176. (layer_norm): T5LayerNorm()
  177. (dropout): Dropout(p=0.1, inplace=False)
  178. )
  179. (1): T5LayerCrossAttention(
  180. (EncDecAttention): T5Attention(
  181. (q): Linear(in_features=512, out_features=512, bias=False)
  182. (k): Linear(in_features=512, out_features=512, bias=False)
  183. (v): Linear(in_features=512, out_features=512, bias=False)
  184. (o): Linear(in_features=512, out_features=512, bias=False)
  185. )
  186. (layer_norm): T5LayerNorm()
  187. (dropout): Dropout(p=0.1, inplace=False)
  188. )
  189. (2): T5LayerFF(
  190. (DenseReluDense): T5DenseReluDense(
  191. (wi): Linear(in_features=512, out_features=2048, bias=False)
  192. (wo): Linear(in_features=2048, out_features=512, bias=False)
  193. (dropout): Dropout(p=0.1, inplace=False)
  194. )
  195. (layer_norm): T5LayerNorm()
  196. (dropout): Dropout(p=0.1, inplace=False)
  197. )
  198. )
  199. )
  200. (1): T5Block(
  201. (layer): ModuleList(
  202. (0): T5LayerSelfAttention(
  203. (SelfAttention): T5Attention(
  204. (q): Linear(in_features=512, out_features=512, bias=False)
  205. (k): Linear(in_features=512, out_features=512, bias=False)
  206. (v): Linear(in_features=512, out_features=512, bias=False)
  207. (o): Linear(in_features=512, out_features=512, bias=False)
  208. )
  209. (layer_norm): T5LayerNorm()
  210. (dropout): Dropout(p=0.1, inplace=False)
  211. )
  212. (1): T5LayerCrossAttention(
  213. (EncDecAttention): T5Attention(
  214. (q): Linear(in_features=512, out_features=512, bias=False)
  215. (k): Linear(in_features=512, out_features=512, bias=False)
  216. (v): Linear(in_features=512, out_features=512, bias=False)
  217. (o): Linear(in_features=512, out_features=512, bias=False)
  218. )
  219. (layer_norm): T5LayerNorm()
  220. (dropout): Dropout(p=0.1, inplace=False)
  221. )
  222. (2): T5LayerFF(
  223. (DenseReluDense): T5DenseReluDense(
  224. (wi): Linear(in_features=512, out_features=2048, bias=False)
  225. (wo): Linear(in_features=2048, out_features=512, bias=False)
  226. (dropout): Dropout(p=0.1, inplace=False)
  227. )
  228. (layer_norm): T5LayerNorm()
  229. (dropout): Dropout(p=0.1, inplace=False)
  230. )
  231. )
  232. )
  233. (2): T5Block(
  234. (layer): ModuleList(
  235. (0): T5LayerSelfAttention(
  236. (SelfAttention): T5Attention(
  237. (q): Linear(in_features=512, out_features=512, bias=False)
  238. (k): Linear(in_features=512, out_features=512, bias=False)
  239. (v): Linear(in_features=512, out_features=512, bias=False)
  240. (o): Linear(in_features=512, out_features=512, bias=False)
  241. )
  242. (layer_norm): T5LayerNorm()
  243. (dropout): Dropout(p=0.1, inplace=False)
  244. )
  245. (1): T5LayerCrossAttention(
  246. (EncDecAttention): T5Attention(
  247. (q): Linear(in_features=512, out_features=512, bias=False)
  248. (k): Linear(in_features=512, out_features=512, bias=False)
  249. (v): Linear(in_features=512, out_features=512, bias=False)
  250. (o): Linear(in_features=512, out_features=512, bias=False)
  251. )
  252. (layer_norm): T5LayerNorm()
  253. (dropout): Dropout(p=0.1, inplace=False)
  254. )
  255. (2): T5LayerFF(
  256. (DenseReluDense): T5DenseReluDense(
  257. (wi): Linear(in_features=512, out_features=2048, bias=False)
  258. (wo): Linear(in_features=2048, out_features=512, bias=False)
  259. (dropout): Dropout(p=0.1, inplace=False)
  260. )
  261. (layer_norm): T5LayerNorm()
  262. (dropout): Dropout(p=0.1, inplace=False)
  263. )
  264. )
  265. )
  266. (3): T5Block(
  267. (layer): ModuleList(
  268. (0): T5LayerSelfAttention(
  269. (SelfAttention): T5Attention(
  270. (q): Linear(in_features=512, out_features=512, bias=False)
  271. (k): Linear(in_features=512, out_features=512, bias=False)
  272. (v): Linear(in_features=512, out_features=512, bias=False)
  273. (o): Linear(in_features=512, out_features=512, bias=False)
  274. )
  275. (layer_norm): T5LayerNorm()
  276. (dropout): Dropout(p=0.1, inplace=False)
  277. )
  278. (1): T5LayerCrossAttention(
  279. (EncDecAttention): T5Attention(
  280. (q): Linear(in_features=512, out_features=512, bias=False)
  281. (k): Linear(in_features=512, out_features=512, bias=False)
  282. (v): Linear(in_features=512, out_features=512, bias=False)
  283. (o): Linear(in_features=512, out_features=512, bias=False)
  284. )
  285. (layer_norm): T5LayerNorm()
  286. (dropout): Dropout(p=0.1, inplace=False)
  287. )
  288. (2): T5LayerFF(
  289. (DenseReluDense): T5DenseReluDense(
  290. (wi): Linear(in_features=512, out_features=2048, bias=False)
  291. (wo): Linear(in_features=2048, out_features=512, bias=False)
  292. (dropout): Dropout(p=0.1, inplace=False)
  293. )
  294. (layer_norm): T5LayerNorm()
  295. (dropout): Dropout(p=0.1, inplace=False)
  296. )
  297. )
  298. )
  299. (4): T5Block(
  300. (layer): ModuleList(
  301. (0): T5LayerSelfAttention(
  302. (SelfAttention): T5Attention(
  303. (q): Linear(in_features=512, out_features=512, bias=False)
  304. (k): Linear(in_features=512, out_features=512, bias=False)
  305. (v): Linear(in_features=512, out_features=512, bias=False)
  306. (o): Linear(in_features=512, out_features=512, bias=False)
  307. )
  308. (layer_norm): T5LayerNorm()
  309. (dropout): Dropout(p=0.1, inplace=False)
  310. )
  311. (1): T5LayerCrossAttention(
  312. (EncDecAttention): T5Attention(
  313. (q): Linear(in_features=512, out_features=512, bias=False)
  314. (k): Linear(in_features=512, out_features=512, bias=False)
  315. (v): Linear(in_features=512, out_features=512, bias=False)
  316. (o): Linear(in_features=512, out_features=512, bias=False)
  317. )
  318. (layer_norm): T5LayerNorm()
  319. (dropout): Dropout(p=0.1, inplace=False)
  320. )
  321. (2): T5LayerFF(
  322. (DenseReluDense): T5DenseReluDense(
  323. (wi): Linear(in_features=512, out_features=2048, bias=False)
  324. (wo): Linear(in_features=2048, out_features=512, bias=False)
  325. (dropout): Dropout(p=0.1, inplace=False)
  326. )
  327. (layer_norm): T5LayerNorm()
  328. (dropout): Dropout(p=0.1, inplace=False)
  329. )
  330. )
  331. )
  332. (5): T5Block(
  333. (layer): ModuleList(
  334. (0): T5LayerSelfAttention(
  335. (SelfAttention): T5Attention(
  336. (q): Linear(in_features=512, out_features=512, bias=False)
  337. (k): Linear(in_features=512, out_features=512, bias=False)
  338. (v): Linear(in_features=512, out_features=512, bias=False)
  339. (o): Linear(in_features=512, out_features=512, bias=False)
  340. )
  341. (layer_norm): T5LayerNorm()
  342. (dropout): Dropout(p=0.1, inplace=False)
  343. )
  344. (1): T5LayerCrossAttention(
  345. (EncDecAttention): T5Attention(
  346. (q): Linear(in_features=512, out_features=512, bias=False)
  347. (k): Linear(in_features=512, out_features=512, bias=False)
  348. (v): Linear(in_features=512, out_features=512, bias=False)
  349. (o): Linear(in_features=512, out_features=512, bias=False)
  350. )
  351. (layer_norm): T5LayerNorm()
  352. (dropout): Dropout(p=0.1, inplace=False)
  353. )
  354. (2): T5LayerFF(
  355. (DenseReluDense): T5DenseReluDense(
  356. (wi): Linear(in_features=512, out_features=2048, bias=False)
  357. (wo): Linear(in_features=2048, out_features=512, bias=False)
  358. (dropout): Dropout(p=0.1, inplace=False)
  359. )
  360. (layer_norm): T5LayerNorm()
  361. (dropout): Dropout(p=0.1, inplace=False)
  362. )
  363. )
  364. )
  365. )
  366. (final_layer_norm): T5LayerNorm()
  367. (dropout): Dropout(p=0.1, inplace=False)
  368. )
  369. (lm_head): Linear(in_features=512, out_features=32128, bias=False)
  370. )

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Gausst松鼠会/article/detail/356957
推荐阅读
相关标签
  

闽ICP备14008679号