赞
踩
今天在于Bert进行文本情感分析时,由于要调用Bert的预训练模型,但是出现报错“OSError: We couldn‘t connect to ‘https://huggingface.co‘ to load this file”
意思是无法访问这个网址,点进这个网址,浏览器访问,依旧失败
原因是这个网址,国内的IP已经没办法访问了。 于是想到用离线模式下载
方法一:安全上网,访问该网址
方法二:使用镜像网址
非常好用的镜像网站:
不需要安全上网也能访问!
接下来是解决方法:
1、搜索 bert-base-uncased
2、下载所需要的文件(权重文件)
3、将下载的文件保存到项目下面
4、更改源代码中的地址
主要是在预训练模块处进行更改
5、成功运行!
附上Bert的一个简单运行demo
- import pickle
-
- import torch
- from transformers import BertTokenizer, BertForSequenceClassification
- from sklearn.model_selection import train_test_split
- from torch.utils.data import TensorDataset, DataLoader
-
- # 读取数据
- pickle_file = 'data/Belt_and_Road.pickle'
- with open(pickle_file, 'rb') as f:
- pickle_data = pickle.load(f) # 反序列化,与pickle.dump相反
- X_train = pickle_data['train_dataset']
- y_train = pickle_data['train_labels']
- X_val = pickle_data['test_dataset']
- y_val = pickle_data['test_labels']
- del pickle_data # 释放内存
-
- print('Data and modules loaded.')
-
- # 加载预训练的BERT模型和tokenizer
- bert_model_path = 'uncased'
- tokenizer = BertTokenizer.from_pretrained(bert_model_path)
- model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=3) # 3 是输出类别数
-
- # 对文本进行tokenize和padding
- max_length = 128
- train_encodings = tokenizer(X_train, truncation=True, padding=True, max_length=max_length)
- val_encodings = tokenizer(X_val, truncation=True, padding=True, max_length=max_length)
-
- # 转换为PyTorch张量
- train_dataset = TensorDataset(torch.tensor(train_encodings['input_ids']),
- torch.tensor(train_encodings['attention_mask']),
- torch.tensor(y_train))
- val_dataset = TensorDataset(torch.tensor(val_encodings['input_ids']),
- torch.tensor(val_encodings['attention_mask']),
- torch.tensor(y_val))
-
- # 定义数据加载器
- batch_size = 32
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
- val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
-
- # 定义优化器和损失函数
- optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
- criterion = torch.nn.CrossEntropyLoss()
-
- # 训练模型
- num_epochs = 3
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- model.to(device)
- for epoch in range(num_epochs):
- model.train()
- running_loss = 0.0
- for input_ids, attention_mask, labels in train_loader:
- input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
- optimizer.zero_grad()
- outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
- loss = outputs.loss
- loss.backward()
- optimizer.step()
- running_loss += loss.item()
-
- # 在验证集上评估模型
- model.eval()
- correct = 0
- total = 0
- with torch.no_grad():
- for input_ids, attention_mask, labels in val_loader:
- input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
- outputs = model(input_ids=input_ids, attention_mask=attention_mask)
- _, predicted = torch.max(outputs.logits, 1)
- total += labels.size(0)
- correct += (predicted == labels).sum().item()
-
- print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Validation Accuracy: {correct/total:.2%}')
都看到这里了~给个小心心❤❤呗~
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。