赞
踩
基于卷积神经网络实现手写数字识别。具体过程如下:
(1) 定义ConvNet结构类及其前向传播方式
(2) 设置超参数以及导入相关的包。
(3) 定义训练网络函数和绘图函数,并在main函数中完成调用过程
import os import numpy as np #from sklearn.datasets import fetch_openml # 引入openml数据源 from matplotlib import pyplot as plt # 引入绘图工具 import torch from torchvision.datasets import mnist #from mnist_models import AlexNet, ConvNet import torchvision.transforms as transforms from torch.utils.data import DataLoader from torch.autograd import Variable BASE_PATH = os.path.dirname(__file__) # 设置模型超参数 EPOCHS = 50 SAVE_PATH = './models' ''' # 载入MNIST数据集并显示部分样本 def load_mnist(): # 从openml源载入MNIST数据集 mnist = fetch_openml('mnist_784', version=1, data_home=os.path.join(BASE_PATH, './dataset')) X, y = mnist['data'], mnist['target'] #X = mnist['data']#.astype(np.float32) #y = mnist['target']#.astype(np.int32) print('MNIST数据集大小:{}'.format(X.shape)) # 显示其中25张样本图片 for i in range(25): #print(i) digit = X.iloc[i * 2500] # 将图片恢复到28*28大小 digit_image = digit.values.reshape(28, 28) # 绘制图片 plt.subplot(5, 5, i + 1) # 隐藏坐标轴 plt.axis('off') # 按灰度图绘制图片 plt.imshow(digit_image, cmap='gray') # 显示图片 plt.show() return X, y ''' # 定义卷积网络结构 class ConvNet(torch.nn.Module): def __init__(self): super(ConvNet, self).__init__() self.conv1 = torch.nn.Sequential( torch.nn.Conv2d(1, 10, 5, 1, 1), torch.nn.MaxPool2d(2), torch.nn.ReLU(), torch.nn.BatchNorm2d(10) ) self.conv2 = torch.nn.Sequential( torch.nn.Conv2d(10, 20, 5, 1, 1), torch.nn.MaxPool2d(2), torch.nn.ReLU(), torch.nn.BatchNorm2d(20) ) self.fc1 = torch.nn.Sequential( torch.nn.Linear(500, 60), torch.nn.Dropout(0.5), torch.nn.ReLU() ) self.fc2 = torch.nn.Sequential( torch.nn.Linear(60, 20), torch.nn.Dropout(0.5), torch.nn.ReLU() ) self.fc3 = torch.nn.Linear(20, 10) # 定义网络前向传播方式 def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = x.view(-1, 500) x = self.fc1(x) x = self.fc2(x) x = self.fc3(x) return x # 定义AlexNet结构 class AlexNet(torch.nn.Module): def __init__(self, num_classes=10): super(AlexNet, self).__init__() self.features = torch.nn.Sequential( torch.nn.Conv2d(1, 64, kernel_size=5, stride=1, padding=2), torch.nn.ReLU(inplace=True), torch.nn.MaxPool2d(kernel_size=3, stride=1), torch.nn.Conv2d(64, 192, kernel_size=3, padding=2), torch.nn.ReLU(inplace=True), torch.nn.MaxPool2d(kernel_size=3, stride=2), torch.nn.Conv2d(192, 384, kernel_size=3, padding=1), torch.nn.ReLU(inplace=True), torch.nn.Conv2d(384, 256, kernel_size=3, padding=1), torch.nn.ReLU(inplace=True), torch.nn.Conv2d(256, 256, kernel_size=3, padding=1), torch.nn.ReLU(inplace=True), torch.nn.MaxPool2d(kernel_size=3, stride=2) ) self.classifier = torch.nn.Sequential( torch.nn.Dropout(), torch.nn.Linear(256 * 6 * 6, 4096), torch.nn.ReLU(inplace=True), torch.nn.Dropout(), torch.nn.Linear(4096, 4096), torch.nn.ReLU(inplace=True), torch.nn.Linear(4096, num_classes) ) # 定义AlexNet前向传播过程 def forward(self, x): x = self.features(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) return x # 训练网络函数 def train_net(net, train_data, test_data): losses = [] acces = [] # 测试集上Loss变化情况 eval_losses = [] eval_acces = [] # 损失函数设置为交叉熵函数 criterion = torch.nn.CrossEntropyLoss() # 优化方法选用SGD,初始学习率为1e-2 optimizer = torch.optim.SGD(net.parameters(), 1e-2) for e in range(EPOCHS): train_loss = 0 train_acc = 0 # 将网络设置为训练模型 net.train() for image, label in train_data: image = Variable(image) label = Variable(label) # 前向传播 out = net(image) loss = criterion(out, label) # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() # 记录误差 train_loss += loss.data # 计算分类的准确率 _, pred = out.max(1) num_correct = (np.array(pred, dtype=np.int32) == np.array(label, dtype=np.int32)).sum() acc = num_correct / image.shape[0] train_acc += acc train_loss_rate = train_loss / len(train_data) train_acc_rate = train_acc / len(train_data) losses.append(train_loss_rate) acces.append(train_acc_rate) # 在测试集上检验效果 eval_loss = 0 eval_acc = 0 net.eval() # 将模型改为预测模式 for image, label in test_data: image = Variable(image) label = Variable(label) out = net(image) loss = criterion(out, label) # 记录误差 eval_loss += loss.data # 记录准确率 _, pred = out.max(1) num_correct = (np.array(pred, dtype=np.int32) == np.array(label, dtype=np.int32)).sum() acc = num_correct / image.shape[0] eval_acc += acc eval_loss_rate = eval_loss / len(test_data) eval_acc_rate = eval_acc / len(test_data) eval_losses.append(eval_loss_rate) eval_acces.append(eval_acc_rate) print('epoch:{}, Train Loss: {:.6f}, Train Acc:{:.6f}, Eval Loss:{:.6f}, Eval Acc:{:.6f}'.format(e, train_loss_rate, train_acc_rate, eval_loss_rate, eval_acc_rate)) torch.save(net.state_dict(), os.path.join(BASE_PATH, SAVE_PATH, 'Alex_model_epoch' + str(e) + '.pkl')) return eval_losses, eval_acces def draw_result(eval_losses, eval_acces): x = range(1, EPOCHS + 1) fig, left_axis = plt.subplots() p1, = left_axis.plot(x, eval_losses, 'ro-') right_axis = left_axis.twinx() p2, = right_axis.plot(x, eval_acces, 'bo-') plt.xticks(x, rotation=0) # 设置左坐标轴以及右坐标轴的范围、精度 left_axis.set_ylim(0, 0.5) left_axis.set_yticks(np.arange(0, 0.5, 0.1)) right_axis.set_ylim(0.9, 1.01) right_axis.set_yticks(np.arange(0.9, 1.01, 0.02)) # 设置坐标及标题的大小、颜色 left_axis.set_xlabel('Labels') left_axis.set_ylabel('Loss', color='r') left_axis.tick_params(axis='y', colors='r') right_axis.set_ylabel('Accuracy', color='b') right_axis.tick_params(axis='y', colors='b') plt.show() if __name__ == '__main__': #x, y = load_mnist() print("基于卷积神经网络实现手写数字识别") train_set = mnist.MNIST('./data', train=True, download=True, transform=transforms.ToTensor())//需要转化成tensor数据格式 test_set = mnist.MNIST('./data', train=False, download=True, transform=transforms.ToTensor()) train_data = DataLoader(train_set, batch_size=64, shuffle=True) test_data = DataLoader(test_set, batch_size=64, shuffle=False) a, a_label = next(iter(train_data)) #net = AlexNet() net = ConvNet() eval_losses, eval_acces = train_net(net, train_data, test_data) draw_result(eval_losses, eval_acces)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。