赞
踩
1、我的设想是用开源模型跑自己的数据集训练模型
2、我是用跑通bert-base-chinese模型的训练代码,中间把模型替换成了Chinese_Chat_T5_Base
3、报错如下:has no attribute ‘batch_encode_plus’
C:\Users\Admin\anaconda3\envs\hug_gpu2_fenlei\python.exe C:/Users/Admin/PycharmProjects/hug3/test2/hug/data/chat5_muban.py
=======================#1.配置本地模型===========================
=======================#配置本地模型完成===========================
=======================#2.测试/试编码句子===========================
=======================#测试/试编码句子===========================
=======================#3.第7章/试编码句子===========================
Traceback (most recent call last):
File "C:\Users\Admin\PycharmProjects\hug3\test2\hug\data\chat5_muban.py", line 34, in <module>
out = token.batch_encode_plus(
File "C:\Users\Admin\anaconda3\envs\hug_gpu2_fenlei\lib\site-packages\torch\nn\modules\module.py", line 778, in __getattr__
raise ModuleAttributeError("'{}' object has no attribute '{}'".format(
torch.nn.modules.module.ModuleAttributeError: 'T5ForConditionalGeneration' object has no attribute 'batch_encode_plus'
Process finished with exit code 1
4、源代码如下
import numpy as np import torch from transformers import BertTokenizer, BertConfig, BertForMaskedLM, BertForNextSentencePrediction from transformers import BertModel import os import numpy as np import torch # from transformers import BertTokenizer, BertConfig, BertForMaskedLM, BertForNextSentencePrediction from transformers import BertModel os.environ["CUDA_VISIBLE_DEVICES"] = '0' import torch from torch import cuda from transformers import AutoTokenizer, AutoModelForSeq2SeqLM print("=======================#1.配置本地模型===========================") model_name = 'Chinese_Chat_T5_Base' MODEL_PATH = 'C:/bert/Chinese_Chat_T5_Base/' token = AutoModelForSeq2SeqLM.from_pretrained("C:/bert/Chinese_Chat_T5_Base/") model2 = AutoModelForSeq2SeqLM.from_pretrained("C:/bert/Chinese_Chat_T5_Base/") print("=======================#配置本地模型完成===========================") print("=======================#2.测试/试编码句子===========================") # model2 = BertModel.from_pretrained("bert-base-uncased",output_hidden_states=True) # # out2=model2.batc print("=======================#测试/试编码句子===========================") print("=======================#3.第7章/试编码句子===========================") out = token.batch_encode_plus( batch_text_or_text_pairs=['从明天起,做一个幸福的人。', '喂马,劈柴,周游世界。'], truncation=True, padding='max_length', max_length=17, return_tensors='pt', return_length=True ) # print(out) # print(out.last_hidden_state) #查看编码输出 # for k, v in out.items(): # print(k, v.shape) #把编码还原为句子 # print(token.decode(out['input_ids'][0])) print("=======================#3.第7章/试编码句子完毕===========================") print("=======================#4.第7章/定义数据集===========================") import torch from datasets import load_from_disk class Dataset(torch.utils.data.Dataset): def __init__(self, split): self.dataset = load_from_disk('../data/ChnSentiCorp')[split] def __len__(self): return len(self.dataset) def __getitem__(self, i): text = self.dataset[i]['text'] label = self.dataset[i]['label'] return text, label dataset = Dataset('train') len(dataset), dataset[20] print("=======================#4.第7章/定义数据集完毕===========================") print("=======================#5.第7章/定义计算设备===========================") device = 'cpu' if torch.cuda.is_available(): device = 'cuda' print('we will use the GPU: ', torch.cuda.get_device_name(0)) print("=======================#5.第7章/定义计算设备完毕===========================") print("=======================#6.第7章/数据整理函数===========================") def collate_fn(data): sents = [i[0] for i in data] labels = [i[1] for i in data] #编码 data = token.batch_encode_plus(batch_text_or_text_pairs=sents, truncation=True, padding='max_length', max_length=500, return_tensors='pt', return_length=True) #input_ids:编码之后的数字 #attention_mask:是补零的位置是0,其他位置是1 input_ids = data['input_ids'] attention_mask = data['attention_mask'] token_type_ids = data['token_type_ids'] labels = torch.LongTensor(labels) #把数据移动到计算设备上 input_ids = input_ids.to(device) attention_mask = attention_mask.to(device) token_type_ids = token_type_ids.to(device) labels = labels.to(device) return input_ids, attention_mask, token_type_ids, labels print("=======================#6.第7章/数据整理函数完毕===========================") print("=======================#7.第7章/数据整理函数试算===========================") #模拟一批数据 data = [ ('你站在桥上看风景', 1), ('看风景的人在楼上看你', 0), ('明月装饰了你的窗子', 1), ('你装饰了别人的梦', 0), ] #试算 input_ids, attention_mask, token_type_ids, labels = collate_fn(data) input_ids.shape, attention_mask.shape, token_type_ids.shape, labels print("=======================#7.第7章/数据整理函数试算完毕===========================") print("=======================#8.第7章/数据加载器===========================") loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=16, collate_fn=collate_fn, shuffle=True, drop_last=True) len(loader) print("=======================#8.第7章/数据加载器完毕===========================") print("=======================#9.第7章/查看数据样例===========================") for i, (input_ids, attention_mask, token_type_ids, labels) in enumerate(loader): break input_ids.shape, attention_mask.shape, token_type_ids.shape, labels print("=======================#9.第7章/查看数据样例完毕===========================") print("=======================#10.第7章/加载预训练模型===========================") from transformers import BertModel print("===============如何设置本地模型?========#第7章/下载预训练模型===========================") pretrained = BertModel.from_pretrained(model_name) # model预训练模型 model2 = AutoModelForSeq2SeqLM.from_pretrained("C:/bert/Chinese_Chat_T5_Base/") #统计参数量 sum(i.numel() for i in pretrained.parameters()) / 10000 print("=======================#10.第7章/加载预训练模型完毕===========================") print("=======================#11.第7章/不训练预训练模型,不需要计算梯度===========================") for param in pretrained.parameters(): param.requires_grad_(False) print("=======================#11.第7章/不训练预训练模型,不需要计算梯度完毕===========================") print("=======================#12.第7章/预训练模型试算===========================") #设定计算设备 pretrained.to(device) # model训练模型试算 model2.to(device) #模型试算 out = pretrained(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) out.last_hidden_state.shape # out.last_hidden_state.shape # print(out.last_hidden_state.shape) # "Traceback (most recent call last): # File "C:/Users/Admin/PycharmProjects/hug3/test2/hug/data/fenlei.py", line 136, in <module> # out.last_hidden_state.shape # AttributeError: 'tuple' object has no attribute 'last_hidden_state'" print("=======================#12.第7章/预训练模型试算完毕===========================") print("=======================#13.第7章/定义下游任务模型===========================") class Model(torch.nn.Module): def __init__(self): super().__init__() self.fc = torch.nn.Linear(in_features=768, out_features=2) def forward(self, input_ids, attention_mask, token_type_ids): #使用预训练模型抽取数据特征 with torch.no_grad(): out = pretrained(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) #对抽取的特征只取第一个字的结果做分类即可 out = self.fc(out.last_hidden_state[:, 0]) out = out.softmax(dim=1) return out model = Model() #设定计算设备 model.to(device) #试算 model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids).shape print("=======================#13.第7章/定义下游任务模型完毕===========================") print("=======================#14.第7章/训练===========================") from transformers import AdamW # from transformers.optimization import get_constant_schedule from transformers.optimization import get_scheduler def train(): #定义优化器 optimizer = AdamW(model.parameters(), lr=5e-4) #定义loss函数 criterion = torch.nn.CrossEntropyLoss() #定义学习率调节器 scheduler = get_scheduler(name='linear', num_warmup_steps=0, num_training_steps=len(loader), optimizer=optimizer) #模型切换到训练模式 model.train() #按批次遍历训练集中的数据 for i, (input_ids, attention_mask, token_type_ids, labels) in enumerate(loader): #模型计算 out = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) #计算loss并使用梯度下降法优化模型参数 loss = criterion(out, labels) loss.backward() optimizer.step() scheduler.step() optimizer.zero_grad() #输出各项数据的情况,便于观察 if i % 10 == 0: out = out.argmax(dim=1) accuracy = (out == labels).sum().item() / len(labels) lr = optimizer.state_dict()['param_groups'][0]['lr'] print(i, loss.item(), lr, accuracy) train() print("=======================14.#第7章/训练完毕===========================") print("=======================15.#第7章/测试===========================") def test(): #定义测试数据集加载器 loader_test = torch.utils.data.DataLoader(dataset=Dataset('test'), batch_size=32, collate_fn=collate_fn, shuffle=True, drop_last=True) #下游任务模型切换到运行模式 model.eval() correct = 0 total = 0 #按批次遍历测试集中的数据 for i, (input_ids, attention_mask, token_type_ids, labels) in enumerate(loader_test): #计算5个批次即可,不需要全部遍历 if i == 5: break print(i) #计算 with torch.no_grad(): out = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) #统计正确率 out = out.argmax(dim=1) correct += (out == labels).sum().item() total += len(labels) print(correct / total) test() print("=======================#15.第7章/测试完毕===========================")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。