赞
踩
当处理自然语言处理任务时,可以使用PyTorch来实现LSTM模型。下面是一个简单的示例代码,用于情感分类任务。
首先,导入所需的库:
- import torch
- import torch.nn as nn
- import torch.optim as optim
- from torchtext.data import Field, TabularDataset, BucketIterator
定义模型类:
- class LSTMModel(nn.Module):
- def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
- super(LSTMModel, self).__init__()
- self.embedding = nn.Embedding(input_dim, embedding_dim)
- self.lstm = nn.LSTM(embedding_dim, hidden_dim)
- self.fc = nn.Linear(hidden_dim, output_dim)
-
- def forward(self, text):
- embedded = self.embedding(text)
- output, (hidden, cell) = self.lstm(embedded)
- hidden = hidden[-1, :, :]
- prediction = self.fc(hidden)
- return prediction.squeeze(0)
定义数据预处理和加载数据函数:
- def preprocess_data():
- # 定义Field对象
- TEXT = Field(tokenize='spacy', lower=True)
- LABEL = Field(sequential=False, is_target=True)
-
- # 加载数据集
- train_data, test_data = TabularDataset.splits(
- path='data_path',
- train='train.csv',
- test='test.csv',
- format='csv',
- fields=[('text', TEXT), ('label', LABEL)]
- )
-
- # 构建词汇表
- TEXT.build_vocab(train_data, vectors='glove.6B.100d')
- LABEL.build_vocab(train_data)
-
- # 构建数据迭代器
- train_iterator, test_iterator = BucketIterator.splits(
- (train_data, test_data),
- batch_size=64,
- sort_within_batch=True,
- sort_key=lambda x: len(x.text),
- device=torch.device('cuda')
- )
-
- return train_iterator, test_iterator, TEXT.vocab.vectors
定义训练函数:
- def train(model, iterator, optimizer, criterion):
- model.train()
-
- for batch in iterator:
- optimizer.zero_grad()
- text, label = batch.text, batch.label
- predictions = model(text)
- loss = criterion(predictions, label)
- loss.backward()
- optimizer.step()
定义评估函数:
- def evaluate(model, iterator, criterion):
- model.eval()
- total_loss = 0
- total_accuracy = 0
-
- with torch.no_grad():
- for batch in iterator:
- text, label = batch.text, batch.label
- predictions = model(text)
- loss = criterion(predictions, label)
- total_loss += loss.item()
-
- _, predicted_label = torch.max(predictions, 1)
- total_accuracy += (predicted_label == label).float().mean().item()
-
- return total_loss / len(iterator), total_accuracy / len(iterator)
最后,实例化模型并进行训练和评估:
- # 定义超参数
- input_dim = len(TEXT.vocab)
- embedding_dim = 100
- hidden_dim = 256
- output_dim = 2
-
- # 实例化模型
- model = LSTMModel(input_dim, embedding_dim, hidden_dim, output_dim)
-
- # 加载预训练的词向量
- pretrained_embeddings = TEXT.vocab.vectors
- model.embedding.weight.data.copy_(pretrained_embeddings)
-
- # 定义损失函数和优化器
- criterion = nn.CrossEntropyLoss()
- optimizer = optim.Adam(model.parameters())
-
- # 加载数据
- train_iterator, test_iterator, _ = preprocess_data()
-
- # 训练和评估模型
- for epoch in range(num_epochs):
- train(model, train_iterator, optimizer, criterion)
- test_loss, test_accuracy = evaluate(model, test_iterator, criterion)
- print(f'Epoch: {epoch+1}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')
以上代码是一个简单的LSTM模型用于情感分类任务的示例。你可以根据自己的具体任务和数据进行相应的修改和调整。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。