赞
踩
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 8.00 GiB (GPU 0; 7.92 GiB total capacity; 1.48 MiB already allocated; 6.91 GiB free; 22.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
遇到这个错误,按网上改小batch_size改的很小了,依然报错。
后改小了网络结构,搞定。
错误原因是网络的输入输出shape不一致。
错误的shape
128 * 128 * 8=131072
改后。
torch.nn.Flatten(),
torch.nn.Linear(in_features=8 * 8 * 8, out_features=512),
- import torch
- from matplotlib import pyplot as plt
- from torch import nn, optim
- # from torch.autograd import Variable
- from torch.utils.data import DataLoader
- from torchvision import datasets, transforms
- from tqdm import tqdm
- from matplotlib.ticker import MaxNLocator
-
- # 超参数
- batch_size = 128 # 批大小
- learning_rate = 0.0001 # 学习率
- epochs = 20 # 迭代次数
- channels = 3 # 图像通道大小
- class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer','dog', 'frog', 'horse', 'ship', 'truck']
-
- # 数据集下载和预处理
- transform = transforms.Compose([transforms.ToTensor(), # 将图片转换成PyTorch中处理的对象Tensor,并且进行标准化0-1
- transforms.Normalize([0.5], [0.5])]) # 归一化处理
- path = './data/' # 数据集下载后保存的目录
- # 下载训练集和测试集
- trainData = datasets.CIFAR10(path, train=True, transform=transform, download=True)
- testData = datasets.CIFAR10(path, train=False, transform=transform)
-
- # 将数据集前20个图片数据可视化显示
- # 进行图像大小为20宽、10长的绘图(单位为英寸inch)
- plt.figure(figsize=(20,10))
- # 遍历MNIST数据集下标数值0~49
- for i in range(20):
- # 将整个figure分成5行10列,绘制第i+1个子图。
- plt.subplot(4,5,i+1)
- # 设置不显示x轴刻度
- plt.xticks([])
- # 设置不显示y轴刻度
- plt.yticks([])
- # 设置不显示子图网格线
- plt.grid(False)
- # 图像展示,cmap为颜色图谱,"plt.cm.binary"为matplotlib.cm中的色表
- plt.imshow(trainData.data[i], cmap=plt.cm.binary)
- # 设置x轴标签显示为图片对应的数字
- plt.xlabel(class_names[trainData.targets[i]])
- # 显示图片
- plt.show()
-
-
- # 处理成data loader
- trainDataLoader = torch.utils.data.DataLoader(dataset=trainData, batch_size=batch_size, shuffle=True) # 批量读取并打乱
- testDataLoader = torch.utils.data.DataLoader(dataset=testData, batch_size=batch_size)
-
-
- # 开始构建cnn模型
- class cnn(torch.nn.Module):
- def __init__(self):
- super(cnn, self).__init__()
- self.model = torch.nn.Sequential(
- # The size of the picture is 28*28
- torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3,3), stride=1, padding=1),
- torch.nn.ReLU(),
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
-
- # The size of the picture is 14*14
- torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3,3), stride=1, padding=1),
- torch.nn.ReLU(),
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
- #
- # The size of the picture is 7*7
- torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), stride=1, padding=1),
- torch.nn.ReLU(),
- # torch.nn.MaxPool2d(kernel_size=2, stride=2),
-
- # The size of the picture is 7*7
- torch.nn.Conv2d(in_channels=128, out_channels=8, kernel_size=(3,3), stride=1, padding=1),
- torch.nn.ReLU(),
- # torch.nn.MaxPool2d(kernel_size=2, stride=2),
-
- torch.nn.Flatten(),
- torch.nn.Linear(in_features=8 * 8 * 8, out_features=512),
- torch.nn.ReLU(),
- torch.nn.Dropout(0.2), # 抑制过拟合 随机丢掉一些节点
- torch.nn.Linear(in_features=512, out_features=10),
- # torch.nn.Softmax(dim=1) # pytorch的交叉熵函数其实是softmax-log-NLL 所以这里的输出就不需要再softmax了
- )
-
- def forward(self, input):
- output = self.model(input)
- return output
-
-
- # 选择模型
- model = cnn()
- # GPU可用时转到cuda上执行
- if torch.cuda.is_available():
- model = model.cuda()
-
- # 定义损失函数和优化器
- criterion = nn.CrossEntropyLoss() # 选用交叉熵函数作为损失函数
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
- # optimizer = optim.Adam(model.parameters())
-
- # 训练模型并存储训练时的指标
- epoch = 1
- history = {'Train Loss': [],
- 'Test Loss': [],
- 'Train Acc': [],
- 'Test Acc': []}
- for epoch in range(1, epochs+1):
- processBar = tqdm(trainDataLoader, unit='step')
- model.train(True)
- train_loss, train_correct = 0, 0
- for step, (train_imgs, labels) in enumerate(processBar):
-
- if torch.cuda.is_available(): # GPU可用
- train_imgs = train_imgs.cuda()
- labels = labels.cuda()
- model.zero_grad() # 梯度清零
- outputs = model(train_imgs) # 输入训练集
- loss = criterion(outputs, labels) # 计算损失函数
- predictions = torch.argmax(outputs, dim=1) # 得到预测值
- correct = torch.sum(predictions == labels)
- accuracy = correct / labels.shape[0] # 计算这一批次的正确率
- loss.backward() # 反向传播
- optimizer.step() # 更新优化器参数
- processBar.set_description("[%d/%d] Loss: %.4f, Acc: %.4f" % # 可视化训练进度条设置
- (epoch, epochs, loss.item(), accuracy.item()))
-
- # 记录下训练的指标
- train_loss = train_loss + loss
- train_correct = train_correct + correct
-
- # 当所有训练数据都进行了一次训练后,在验证集进行验证
- if step == len(processBar) - 1:
- tst_correct, totalLoss = 0, 0
- model.train(False) # 开始测试
- model.eval() # 固定模型的参数并在测试阶段不计算梯度
- with torch.no_grad():
- for test_imgs, test_labels in testDataLoader:
- if torch.cuda.is_available():
- test_imgs = test_imgs.cuda()
- test_labels = test_labels.cuda()
- tst_outputs = model(test_imgs)
- tst_loss = criterion(tst_outputs, test_labels)
- predictions = torch.argmax(tst_outputs, dim=1)
-
- totalLoss += tst_loss
- tst_correct += torch.sum(predictions == test_labels)
-
- train_accuracy = train_correct / len(trainDataLoader.dataset)
- train_loss = train_loss / len(trainDataLoader) # 累加loss后除以步数即为平均loss值
-
- test_accuracy = tst_correct / len(testDataLoader.dataset) # 累加正确数除以样本数即为验证集正确率
- test_loss = totalLoss / len(testDataLoader) # 累加loss后除以步数即为平均loss值
-
- history['Train Loss'].append(train_loss.item()) # 记录loss和acc
- history['Train Acc'].append(train_accuracy.item())
- history['Test Loss'].append(test_loss.item())
- history['Test Acc'].append(test_accuracy.item())
-
- processBar.set_description("[%d/%d] Loss: %.4f, Acc: %.4f, Test Loss: %.4f, Test Acc: %.4f" %
- (epoch, epochs, train_loss.item(), train_accuracy.item(), test_loss.item(),
- test_accuracy.item()))
- processBar.close()
-
- # 对测试Loss进行可视化
- plt.plot(history['Test Loss'], color='red', label='Test Loss')
- plt.plot(history['Train Loss'], label='Train Loss')
- plt.legend(loc='best')
- plt.grid(True)
- plt.xlabel('Epoch')
- plt.xlim([0, epoch])
- plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
- plt.ylabel('Loss')
- plt.title('Train and Test LOSS')
- plt.legend(loc='upper right')
- plt.savefig('LOSS')
- plt.show()
-
- # 对测试准确率进行可视化
- plt.plot(history['Test Acc'], color='red', label='Test Acc')
- plt.plot(history['Train Acc'], label='Train Acc')
- plt.legend(loc='best')
- plt.grid(True)
- plt.xlabel('Epoch')
- plt.xlim([0, epoch])
- plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
- plt.ylabel('Accuracy')
- plt.title('Train and Test ACC')
- plt.legend(loc='lower right')
- plt.savefig('ACC')
- plt.show()
-
- torch.save(model, './model.pth')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。