赞
踩
首先,这篇文章只能算是小笔记,如果前面的看过了这篇甚至可以省略就好了,更多的都是直接调用现成的,并不能算深入的手撕代码,只是做个小记录而已。这段代码提供了一个使用PyTorch库实现的循环神经网络模型,具体采用了长短期记忆网络(LSTM)作为其核心组件。该模型是为了处理序列数据而设计的,可以广泛应用于需要序列预测的场景。
import torch import torch.nn as nn import torchvision import torchvision.transforms as transforms # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters sequence_length = 28 input_size = 28 hidden_size = 128 num_layers = 2 num_classes = 10 batch_size = 100 num_epochs = 2 learning_rate = 0.01 # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='../../data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/', train=False, transform=transforms.ToTensor()) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # Recurrent neural network (many-to-one) class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x): # Set initial hidden and cell states h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # Forward propagate LSTM out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size) # Decode the hidden state of the last time step out = self.fc(out[:, -1, :]) return out model = RNN(input_size, hidden_size, num_layers, num_classes).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint torch.save(model.state_dict(), 'model.ckpt')
这段代码是一个用于处理MNIST数据集的循环神经网络(RNN)的PyTorch实现。具体来说,它使用了长短期记忆网络(LSTM)作为其核心组件。以下是代码的关键部分的详细解析:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
导入PyTorch及其相关模块,用于构建神经网络和处理数据。
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
设置计算设备,优先使用GPU,如果没有GPU,则使用CPU。
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 2
learning_rate = 0.01
设置模型和训练过程的参数,包括序列长度、输入大小、隐藏层大小、层数、类别数、批次大小、训练轮数和学习率。
train_dataset = torchvision.datasets.MNIST([...)
test_dataset = torchvision.datasets.MNIST([...)
加载MNIST训练集和测试集。
train_loader = torch.utils.data.DataLoader([...)
test_loader = torch.utils.data.DataLoader([...)
创建用于训练和测试的数据加载器。
class RNN(nn.Module):
# 定义RNN模型,使用LSTM层和全连接层。
定义一个RNN模型,其中包含LSTM层和用于分类的全连接层。
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
创建RNN模型实例并将其移动到配置的设备上。
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
定义交叉熵损失函数和Adam优化器。
for epoch in range(num_epochs):
# 训练循环
执行训练循环,包括前向传播、损失计算、反向传播和参数更新。
model.eval()
with torch.no_grad():
# 测试循环
在测试模式下评估模型性能。
torch.save(model.state_dict(), 'model.ckpt')
保存训练好的模型参数。
在这里加入了各种可视化、模型可视化、数据可视化、训练过程可视化等等,直接奉上完整代码。
import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms from torchsummary import summary import matplotlib.pyplot as plt import numpy as np # 设备配置 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 超参数设置 sequence_length = 28 input_size = 28 hidden_size = 128 num_layers = 2 num_classes = 10 batch_size = 100 num_epochs = 5 learning_rate = 0.01 # 数据预处理和加载 transform = transforms.Compose([ transforms.Pad(2), transforms.RandomHorizontalFlip(), transforms.RandomCrop(28), transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) train_dataset = torchvision.datasets.MNIST(root='../../data/', train=True, transform=transform, download=True) test_dataset = torchvision.datasets.MNIST(root='../../data/', train=False, transform=transforms.ToTensor()) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) # 定义RNN模型 class RNN(nn.Module): def __init__(self, input_size, hidden_size, num_layers, num_classes): super(RNN, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x): h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) out, _ = self.lstm(x, (h0, c0)) out = self.fc(out[:, -1, :]) return out model = RNN(input_size, hidden_size, num_layers, num_classes).to(device) # 可视化数据集样本 def visualize_data(loader): plt.figure(figsize=(10, 3)) for i, (images, labels) in enumerate(loader): images = images.to(device) labels = labels.to(device) for j in range(10): plt.subplot(1, 10, j+1) plt.imshow(images[j].squeeze().cpu(), cmap='gray') plt.title(f'Label: {labels[j]}') plt.axis('off') plt.show() break visualize_data(train_loader) # 模型架构可视化 print(model) # 损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) # 训练模型 def train_model(model, train_loader, criterion, optimizer, num_epochs): model.train() train_losses, test_accuracies = [], [] for epoch in range(num_epochs): total_train_loss = 0 correct_pred, total_samples = 0, 0 for i, (images, labels) in enumerate(train_loader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() total_train_loss += loss.item() _, predicted = torch.max(outputs.data, 1) correct_pred += (predicted == labels).sum().item() total_samples += labels.size(0) train_losses.append(total_train_loss / len(train_loader)) accuracy = correct_pred / total_samples print(f'Epoch {epoch+1}/{num_epochs}, Loss: {train_losses[-1]}, Accuracy: {accuracy}') # 训练过程可视化 plt.figure(figsize=(12, 5)) plt.subplot(1, 2, 1) plt.plot(train_losses, label='Training Loss') plt.title('Training Loss Over Epochs') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.subplot(1, 2, 2) plt.plot(test_accuracies, label='Test Accuracy') plt.title('Test Accuracy Over Epochs') plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.legend() plt.show() train_model(model, train_loader, criterion, optimizer, num_epochs) # 测试模型 def test_model(model, test_loader): model.eval() correct_pred, total_samples = 0, 0 with torch.no_grad(): for images, labels in test_loader: images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) correct_pred += (predicted == labels).sum().item() total_samples += labels.size(0) test_accuracy = correct_pred / total_samples print(f'Test Accuracy of the model on the test images: {test_accuracy * 100:.2f} %') test_model(model, test_loader) # 结果可视化 def visualize_test_results(model, test_loader): model.eval() with torch.no_grad(): for i, (images, labels) in enumerate(test_loader): images = images.to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) plt.figure(figsize=(15, 5)) for j in range(10): plt.subplot(2, 10, j+1) plt.imshow(images[j].squeeze().cpu(), cmap='gray') plt.title(f'Predicted: {predicted[j].item()}, True: {labels[j].item()}') plt.axis('off') plt.show() break visualize_test_results(model, test_loader) # 保存模型 torch.save(model.state_dict(), 'rnn_mnist.pth')
这段代码首先导入了必要的库,并设置了设备、超参数、数据预处理和加载。然后定义了一个RNN模型,并进行了可视化数据集样本、模型架构和训练过程的训练。训练完成后,测试模型性能并可视化一些测试结果。最后,保存模型的权重。
请注意,需要安装torchsummary
库来使用summary
函数。可以使用pip install torchsummary
进行安装。此外,代码中的可视化部分使用了matplotlib
库来绘制图像和结果。
错误信息 ValueError: LSTM: Expected input to be 2D or 3D, got 4D instead
指出传入 LSTM 层的输入数据维度不正确。LSTM 层期望的输入是一个二维或三维张量,但提供的是一个四维张量。
在 PyTorch 中,LSTM 层的输入应该是如下形状之一:
(batch_size, sequence_length)
(batch_size, sequence_length, input_size)
错误发生的原因是您可能以四维张量的形式传递了输入数据,例如 (batch_size, channels, height, width)
,这通常是图像数据的常见形状。
为了解决这个问题,需要在将数据传递给 LSTM 之前对其进行重塑。在代码中,应该在传递给模型之前将图像数据从四维张量转换为三维张量。以下是如何对数据进行重塑的示例:
# 假设 images 是一个四维张量,形状为 (batch_size, channels, height, width)
# 我们需要将其重塑为 (batch_size, sequence_length, input_size)
# 首先,确保您的输入尺寸是正确的,并且 channel、height 和 width 能够被重塑为期望的形状
# 例如,如果您的 MNIST 图像是 28x28,您可以将其重塑为 sequence_length x 784
sequence_length = images.shape[1] # channels * height * width
input_size = images.shape[2] * images.shape[3]
# 重塑 images 以匹配 LSTM 输入
images = images.view(images.size(0), sequence_length, -1) # -1 将自动计算剩余的维度
# 现在 images 的形状应该是 (batch_size, sequence_length, input_size)
# 您可以将其传递给模型
请注意,上面的代码假设图像数据已经是一个四维张量,并且将图像的通道、高度和宽度维度展平为一个序列。在 MNIST 数据集的情况下,每个图像是 28x28 像素,所以可以将其展平为 1D 张量,大小为 784。
在代码中,应该在传递给模型之前对数据进行重塑,如下所示:
for images, labels in train_loader:
# 重塑 images 以匹配 LSTM 输入
images = images.reshape(-1, sequence_length, input_size).to(device)
# 接下来进行模型训练或测试的其余步骤
...
确保在调用模型之前对数据进行正确的重塑。如果在训练或测试循环中正确地重塑了输入数据,那么 LSTM 层应该能够接收到正确形状的输入。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。