赞
踩
class是一个生产bert_lstm的工厂,工厂里有一个初始化函数(init)和两个instance method功能函数(forward,init_hidden)
instance method是指工厂里加工初始化后bert_lstm的函数
还有一种class method,是指对工厂进行加工的函数,这里不涉及。
一个bert_lstm需要output_size,n_layers,hidden_dim,bidirectional,lstm,dropout(所有self.的东西)才能被构造
使用instance method,需要知道对哪个bert_lstm进行加工,但是由于还未初始化,所以必须空出一个位置,暂时用self替代,等到给出具体初始化参数后,生成了一个原始的bert_lstm后,再对这个原始bert_lstm进行加工操作。
class bert_lstm(nn.Module):
def __init__(self, hidden_dim, output_size, n_layers, bidirectional=True, drop_prob=0.5):
super(bert_lstm, self).__init__()
drop_prob:训练网络时,将网络神经单元,按一定概率暂时丢弃。具有随机性,用于防止过拟合,一般设置0.5
self.output_size = output_size self.n_layers = n_layers self.hidden_dim = hidden_dim self.bidirectional = bidirectional # Bert ----------------重点,bert模型需要嵌入到自定义模型里面 self.bert = BertModel.from_pretrained("bert_base_chinese") for param in self.bert.parameters(): param.requires_grad = True # LSTM layers self.lstm = nn.LSTM(768, hidden_dim, n_layers, batch_first=True, bidirectional=bidirectional) # dropout layer self.dropout = nn.Dropout(drop_prob) # linear and sigmoid layers if bidirectional: self.fc = nn.Linear(hidden_dim * 2, output_size) else: self.fc = nn.Linear(hidden_dim, output_size) # self.sig = nn.Sigmoid() def forward(self, x, hidden): batch_size = x.size(0) # 生成bert字向量 x = self.bert(x)[0] # bert 字向量
x(一个tensor)经卷积或者池化之后的维度为(batchsize,channels,x,y),取第0个位置的值,即batchsize的值作为batch_size
# lstm_out # x = x.float() lstm_out, (hidden_last, cn_last) = self.lstm(x, hidden) # print("3shape:") #[32,100,768] # print(lstm_out.shape) # print(hidden_last.shape) #[4, 32, 384] # print(cn_last.shape) #[4, 32, 384] # 修改 双向的需要单独处理 if self.bidirectional: # 正向最后一层,最后一个时刻 hidden_last_L = hidden_last[-2] # print("hidden_last_L.shape:") # print(hidden_last_L.shape) #[32, 384] # 反向最后一层,最后一个时刻 hidden_last_R = hidden_last[-1] # print("hidden_last_R.shape:") # print(hidden_last_R.shape) #[32, 384] # 进行拼接 hidden_last_out = torch.cat([hidden_last_L, hidden_last_R], dim=-1) # print("hidden_last_out.shape:") # print(hidden_last_out.shape,'hidden_last_out') #[32, 768] else: hidden_last_out = hidden_last[-1] # [32, 384] # dropout and fully-connected layer out = self.dropout(hidden_last_out) # print('out.shape:') # print(out.shape) #[32,768] out = self.fc(out) return out
调用ConvLSTMCell中的init_hidden函数,得到初始的hidden_state
def init_hidden(self, batch_size): weight = next(self.parameters()).data number = 1 if self.bidirectional: number = 2 if (USE_CUDA): hidden = (weight.new(self.n_layers * number, batch_size, self.hidden_dim).zero_().float().cuda(), weight.new(self.n_layers * number, batch_size, self.hidden_dim).zero_().float().cuda() ) else: hidden = (weight.new(self.n_layers * number, batch_size, self.hidden_dim).zero_().float(), weight.new(self.n_layers * number, batch_size, self.hidden_dim).zero_().float() ) return hidden
根据上述代码,提供参数,可以构造出一个bert_lstm
output_size = 1
hidden_dim = 384 # 768/2
n_layers = 2
bidirectional = True # 这里为True,为双向LSTM
net = bert_lstm(hidden_dim, output_size, n_layers, bidirectional)
预测函数(不属于class bert_lstm)
主要的类是BasicTokenizer,做一些基础的大小写、unicode转换、标点符号分割、小写转换、中文字符分割、去除重音符号等操作,最后返回的是关于词的数组(中文是字的数组)
def predict(net, test_comments):
comments_list = pretreatment(test_comments) # 预处理去掉标点符号
# 转换为字id
tokenizer = BertTokenizer.from_pretrained("bert_base_chinese")
根据bert_base_chinese中的字典vocab.txt,tokenizer(分词器)可以将一个句子分解为单个字,每个字编号,即id
comments_list_id = tokenizer(comments_list, padding=True, truncation=True, max_length=120, return_tensors='pt')
tokenizer_id = comments_list_id['input_ids']
inputs = tokenizer_id
batch_size = inputs.size(0)
# initialize hidden state
h = net.init_hidden(batch_size)
if (USE_CUDA):
inputs = inputs.cuda()
net.eval()
在训练模型时会在前面加上:net.train(),在测试模型时在前面使用:net.eval()
with torch.no_grad():
让tensor进行反向传播时不会自动求导,节约内存。
output = net(inputs, h)
当output是一个元素张量,用item得到元素值。
if output.item() >= 0:
print("预测结果为:正向")
else:
print("预测结果为:负向")
if __name__ == '__main__': np.random.seed(2020) torch.manual_seed(2020) # 为CPU设置种子用于生成随机数,以使得结果是确定的 USE_CUDA = torch.cuda.is_available() if USE_CUDA: torch.cuda.manual_seed(2020) # 为当前GPU设置随机种子; path = 'ChnSentiCorp_htl_8000.csv' # 训练集路径 data = pd.read_csv(path, encoding='gbk') # print(data) comments_list = pretreatment(list(data['review'].values))#对csv文件中review栏的内容做预处理 lenth = len(comments_list) print('the length of data set is:',lenth) print('the raw comments_list:',comments_list) comments_list[:1] print('after [:1] :',comments_list) tokenizer = BertTokenizer.from_pretrained("bert_base_chinese") comments_list_id = tokenizer(comments_list, padding=True, truncation=True, max_length=200, return_tensors='pt') #分割数据集为测试集,验证集,训练集,按1.5:1.5:7 X = comments_list_id['input_ids']#x是评论内容 y = torch.from_numpy(data['label'].values).float()#y是label X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y, random_state=2020) X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.5, shuffle=True, stratify=y_test, random_state=2020)
shape函数的作用:LINK
(1)参数是一个数时,返回空:
(2)参数是一维矩阵,返回第一维度的长度:
(3)参数是二维矩阵,返回第一维度和第二维度的长度:
#shape函数读取矩阵形状 X_train.shape # print(X_train.shape) y_train.shape # print(y_train.shape) # 为验证集,测试集,训练集创建 Tensor datasets train_data = TensorDataset(X_train, y_train) valid_data = TensorDataset(X_valid, y_valid) test_data = TensorDataset(X_test, y_test) # 每一次加载的数据量 batch_size = 5 # 构建验证集,测试集,训练集的加载器 make sure the SHUFFLE your training data train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=True) valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size, drop_last=True) test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size, drop_last=True) #print('valid_loader', valid_loader) # 取训练集中的一个batch进行输出检查 dataiter = iter(train_loader) sample_x, sample_y = dataiter.next() print('Sample comment size: ', sample_x.size()) # batch_size, seq_length print('Sample comment: \n', sample_x)# 真实评论内容 print() print('Sample label size: ', sample_y.size()) # batch_size print('Sample label: \n', sample_y)# 真实label # 建立模型 if (USE_CUDA): print('Training on GPU.') else: print('No GPU available, training on CPU.') # 构造bert_lstm output_size = 1 hidden_dim = 384 # 768/2 n_layers = 2 bidirectional = True # 这里为True,为双向LSTM net = bert_lstm(hidden_dim, output_size, n_layers, bidirectional) # 训练模型 # 学习率,损失函数,优化器 lr = 0.00001 criterion = nn.BCELoss() optimizer = torch.optim.Adam(net.parameters(), lr=lr) # 模型训练的参数 epochs = 2 # 步长 print_every = 5 clip = 5 # 梯度剪裁,解决梯度爆炸问题 # 将模型从CPU放入GPU训练 if (USE_CUDA): net.cuda() net.train()# 模型训练前要使用train函数,测试前使用eval函数 # 使用划分出的训练集,按照设置的训练次数训练模型 for e in range(epochs): # initialize hidden state h = net.init_hidden(batch_size) counter = 0 # 训练集根据batch值加载训练 for inputs, labels in train_loader: counter += 1 if (USE_CUDA): inputs, labels = inputs.cuda(), labels.cuda() h = tuple([each.data for each in h]) # 删除不必要的加载代码,缓解内存溢出 if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() # 把模型的参数梯度设成0 net.zero_grad() output = net(inputs, h) # print('label:') # print(labels.long()) # print(labels.size) # wang 这些是做测试用的 目的检验模型是否正常训练 ''' print('output', output) # wang print('output_sigmoid', torch.sigmoid(output.squeeze())) # wang print('labels', labels) # wang print('train_max',torch.max(torch.sigmoid(output.squeeze()), 0)[1]) print('pred_max', torch.max(torch.nn.Softmax(dim=1)(output), 1)[1]) 使得在softmax操作之后在dim这个维度相加等于1 ''' ''' out_max = torch.sigmoid(output.squeeze()) #wang print('out_before', out_max) #wang out_max=torch.max(out_max) #wang print('out_after', out_max.item()) #wang ''' loss = criterion(torch.sigmoid(output.squeeze()), labels.float()) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), clip) optimizer.step() if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() # 每一个步长,计算验证集损失 if counter % print_every == 0: net.eval() with torch.no_grad(): val_h = net.init_hidden(batch_size) val_losses = [] for inputs, labels in valid_loader:# 验证集验证 val_h = tuple([each.data for each in val_h]) if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() if (USE_CUDA): inputs, labels = inputs.cuda(), labels.cuda() output = net(inputs, val_h) val_loss = criterion(torch.sigmoid(output.squeeze()), labels.float()) # print('loss:') # print(val_loss) val_losses.append(val_loss.item()) if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() net.train() print("Epoch: {}/{}...".format(e + 1, epochs), "Step: {}...".format(counter), "Loss: {:.6f}...".format(loss.item()),# 训练集的损失 "Val Loss: {:.6f}".format(np.mean(val_losses)))# 验证集的损失 # 测试 test_losses = [] # track loss num_correct = 0 # init hidden state h = net.init_hidden(batch_size) net.eval() # 计算测试集损失 for inputs, labels in test_loader: h = tuple([each.data for each in h]) if (USE_CUDA): inputs, labels = inputs.cuda(), labels.cuda() output = net(inputs, h) test_loss = criterion(torch.sigmoid(output.squeeze()), labels.float()) test_losses.append(test_loss.item()) output = torch.nn.Softmax(dim=1)(output) prediction = torch.max(output, 1)[1] # print('prediction_type',type(prediction)) #wang # print('prediction_size', pred.size()) #wang # compare predictions to true label correct_tensor = prediction.eq(labels.long().view_as(prediction)) # 将label处理成和prediction相同的形状,即可以比较 correct = np.squeeze(correct_tensor.numpy()) if not USE_CUDA else np.squeeze(correct_tensor.cpu().numpy())# cpu中的tensor可以直接取出来,GPU中的tensor需要先放到CPU 也就是.cpu()那个方法 然后才能取出来 num_correct += np.sum(correct)# 计算正确预测的数量 print("Test loss: {:.3f}".format(np.mean(test_losses))) # 计算测试集的准确率 test_acc = num_correct / len(test_loader.dataset) print("Test accuracy: {:.3f}".format(test_acc)) # 测试 comment1 = ['exo私生饭尾随成员进宿舍,公司以后能不能加大监管力度?这都什么事情啊'] predict(net, comment1) comment2 = ['同期最好的一部电影,真实不虚伪'] predict(net, comment2) comment3 = ['有一说一,上晚班回来吃上一顿烧烤简直太幸福了'] predict(net, comment3) comment4 = ['电影很值得,要二刷'] predict(net, comment4) comment5 = ['警察暴力执法?黑人的命就不是命吗,这个世界还能继续恶臭吗'] predict(net, comment5) comment6 = ['这家饭店的饭还不错,就是价格有点小高'] predict(net, comment6) comment7 = ['有没有好心人愿意收养这只猫咪啊,很乖很漂亮的'] predict(net, comment7) comment8 = ['娱乐圈的风气真的该严整一下了,偷税漏税的人一定不止一个'] predict(net, comment8) # 模型保存 torch.save(net.state_dict(), './酒店评论二分类_parameters.pth') output_size = 1 hidden_dim = 384 # 768/2 n_layers = 2 bidirectional = True # 这里为True,为双向LSTM net = bert_lstm(hidden_dim, output_size, n_layers, bidirectional) net.load_state_dict(torch.load('./酒店评论二分类_parameters.pth'))# 模型加载 # move model to GPU, if available if (USE_CUDA): net.cuda() comment1 = ['日本要将核污水排入大海,命运共同体不是说着玩的,大多数人只看到了盈利的共同体,但是灾难大家也要一起承担啊!!'] predict(net, comment1)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。