赞
踩
- from transformers import AutoTokenizer
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- from transformers import AutoModel
-
- text = "I don't really like working, but I need the money!"
-
- tokens = tokenizer.tokenize(text)
- print(tokens)
-
- dic = tokenizer(text, padding="max_length", truncation=True, max_length=39, return_tensors="pt")
- # tokens ['i', 'don', "'", 't', 'really', 'like', 'working', ',', 'but', 'i', 'need', 'the', 'money', '!']
- #'input_ids': [ 101, 1045, 2123, 1005, 1056, 2428, 2066, 2551, 1010, 2021, 1045, 2342,
- # 1996, 2769, 999, 102, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
- # 这里说明了tokenizer获取input_ids时,是自带了分词功能的。当然,只是分词的话,调用.tokenize就行
-
- text= ['I like he', 'like he', 'he but', 'but he', 'he like she', 'like', 'she like I', 'I he she']
-
- # [bs]
-
- tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
-
- dic = tokenizer(text, padding="max_length", truncation=True, max_length=10, return_tensors="pt")
-
- input_ids = dic['input_ids'] # [bs, max_length] 添加了开头101和结尾102
-
- token_type_ids = dic['token_type_ids'] # [bs, max_length]
-
- attention_mask = dic['attention_mask']# [bs, max_length]
- # 这里只是想看一看它们的实际样子
- # attention_mask
-
- # [[1,1,1,1,1,0,0,0,0,0],
-
- # [1,1,1,1,0,0,0,0,0,0]
-
- # ....]
- # input_ids
-
- # [[101,1045,1001,2006,102,0,0,0,0,0],
-
- # [101,1001,2006,102,0,0,0,0,0,0]
-
- # ....]
- class PretrainedLanguageModel(nn.Module):
- def __init__(self, pretrained_language_model_name):
- super(PretrainedLanguageModel, self).__init__()
- self.model = AutoModel.from_pretrained(pretrained_language_model_name)
-
- def forward(self, input_ids, token_type_ids, attention_mask):
- output = self.model(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask).last_hidden_state
- return output
-
-
- model = PretrainedLanguageModel('bert-base-uncased')
-
- output = model(input_ids, token_type_ids, attention_mask)
- # 【bs, max_length, hidden_dim] 返回结果始终是这个样子。是文本的表示
- print(input_ids.shape)
- print(output.shape)
![](https://csdnimg.cn/release/blogv2/dist/pc/img/newCodeMoreWhite.png)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。