赞
踩
目录
在过去的几十年里,手写体识别一直是计算机视觉和模式识别领域的重要课题。随着深度学习技术的兴起,特别是卷积神经网络(Convolutional Neural Networks, CNNs)的发展,我们已经能够以前所未有的精度和效率识别手写字符。本文将深入探讨如何使用PyTorch这一强大的深度学习框架,实现手写体识别,并介绍一些最新的技术进步。
PyTorch是由Facebook的人工智能研究实验室开发的一个开源机器学习库。它提供了动态计算图,使得构建和调整复杂的深度学习模型变得直观而高效。PyTorch的灵活性和易用性使其成为学术界和工业界广泛使用的工具之一。
手写体识别的传统方法依赖于特征工程和基于规则的系统,但这些方法往往无法处理手写体的多样性和复杂性。相比之下,深度学习模型,尤其是CNNs,能够自动学习和提取图像中的特征,无需显式的人工特征设计。这使得它们在手写体识别任务上取得了显著的成功。
我们将使用经典的MNIST数据集作为案例研究,这是一个包含60,000个训练样本和10,000个测试样本的手写数字数据集。下面是如何使用PyTorch构建一个基本的CNN模型的步骤:
torchvision.datasets.MNIST
加载并分割训练和测试数据。transforms
对图像进行归一化和张量化处理。近年来,手写体识别领域的一些最新进展包括:
pytorch实现手写字体的识别。本算法最终识别率在97.76左右。
- import pandas as pd
- import numpy as np
- import torch
- from torch.utils.data import Dataset, DataLoader
- import torch.nn as nn
- import torch.optim as optim
- # 读取文件
- filename = r"F:\BaiduNetdiskDownload\mnist_train.csv"
- # 使用 loadtxt 读取文件,忽略第一列(标签),并将剩余列转换为整数
- train_features = (np.loadtxt(filename, delimiter=',', skiprows=0, usecols=range(1, 785), dtype=int) / 255.0)
- train_labels = np.loadtxt(filename, delimiter=',', usecols=(0,), dtype=int)
-
- # 加载训练和测试数据
- # train_features, train_labels = load_data_from_excel(r'F:\BaiduNetdiskDownload\mnist_train.csv')
- # test_features, test_labels = load_data_from_excel(r'F:\BaiduNetdiskDownload\mnist_test.csv')
- # 读取文件
- fileTestname = r"F:\BaiduNetdiskDownload\mnist_test.csv"
- # 使用 loadtxt 读取文件,忽略第一列(标签),并将剩余列转换为整数
- test_features = (np.loadtxt(fileTestname, delimiter=',', skiprows=0, usecols=range(1, 785), dtype=int) / 255.0)
- test_labels = np.loadtxt(fileTestname, delimiter=',', usecols=(0,), dtype=int)
- # 转换为PyTorch的Tensor
- train_features = torch.from_numpy(train_features).float()
- train_labels = torch.from_numpy(train_labels).long()
- test_features = torch.from_numpy(test_features).float()
- test_labels = torch.from_numpy(test_labels).long()
-
-
- # 自定义Dataset类
- class ExcelDataset(Dataset):
- def __init__(self, features, labels):
- self.features = features
- self.labels = labels
-
- def __len__(self):
- return len(self.features)
-
- def __getitem__(self, idx):
- return self.features[idx], self.labels[idx]
-
-
- # 创建数据集实例
- train_dataset = ExcelDataset(train_features, train_labels)
- test_dataset = ExcelDataset(test_features, test_labels)
-
- # 创建DataLoader
- train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
- test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)
-
- # 定义模型
- class MnistModel(nn.Module):
- def __init__(self):
- super(MnistModel, self).__init__()
- self.fc = nn.Sequential(
- nn.Linear(784, HIDDEN_SIZE_1),
- nn.ReLU(),
- nn.Linear(HIDDEN_SIZE_1, HIDDEN_SIZE_2),
- nn.ReLU(),
- nn.Linear(HIDDEN_SIZE_2, 32),
- nn.ReLU(),
- nn.Linear(32, 10),
-
- )
-
- def forward(self, x):
- x = x.view(x.size(0), -1)
- return self.fc(x)
-
- model = MnistModel()
-
- # 定义损失函数和优化器
- criterion = nn.CrossEntropyLoss()
- optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
- # 训练模型
- for epoch in range(NUM_EPOCHS):
- for i, (images, labels) in enumerate(train_loader):
- # 清零优化器中累积的梯度
- optimizer.zero_grad()
- # 构建训练模型
- outputs = model(images)
- # 计算损失函数
- loss = criterion(outputs, labels)
- # 启动反向传播过程
- loss.backward()
- # 使用优化算法来更新模型的参数
- optimizer.step()
-
- if (i + 1) % 100 == 0:
- print(f'Epoch [{epoch + 1}/{NUM_EPOCHS}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')
-
- # 保存模型
- torch.save(model.state_dict(), MODEL_PATH)
- # 加载模型进行测试
- model.load_state_dict(torch.load(MODEL_PATH))
- model.eval()
- #
- # # 测试模型
- with torch.no_grad():
- correct = 0
- total = 0
- for images, labels in test_loader:
- outputs = model(images)
- _, predicted = torch.max(outputs.data, 1)
- total += labels.size(0)
- correct += (predicted == labels).sum().item()
- print('Test Accuracy: {} %'.format(100 * correct / total))
- import pandas as pd
- import numpy as np
- import torch
- from torch.utils.data import Dataset, DataLoader
- import torch.nn as nn
- import torch.optim as optim
-
- # 定义超参数
- BATCH_SIZE = 64
- NUM_EPOCHS = 30
- LEARNING_RATE = 0.0009
- HIDDEN_SIZE_1 = 128
- HIDDEN_SIZE_2 = 64
- MODEL_PATH = r'F:\ai\moudle.ckpt'
-
- # 读取文件
- filename = r"F:\BaiduNetdiskDownload\mnist_train.csv"
- # 使用 loadtxt 读取文件,忽略第一列(标签),并将剩余列转换为整数
- train_features = (np.loadtxt(filename, delimiter=',', skiprows=0, usecols=range(1, 785), dtype=int) / 255.0)
- train_labels = np.loadtxt(filename, delimiter=',', usecols=(0,), dtype=int)
-
- # 加载训练和测试数据
- # train_features, train_labels = load_data_from_excel(r'F:\BaiduNetdiskDownload\mnist_train.csv')
- # test_features, test_labels = load_data_from_excel(r'F:\BaiduNetdiskDownload\mnist_test.csv')
- # 读取文件
- fileTestname = r"F:\BaiduNetdiskDownload\mnist_test.csv"
- # 使用 loadtxt 读取文件,忽略第一列(标签),并将剩余列转换为整数
- test_features = (np.loadtxt(fileTestname, delimiter=',', skiprows=0, usecols=range(1, 785), dtype=int) / 255.0)
- test_labels = np.loadtxt(fileTestname, delimiter=',', usecols=(0,), dtype=int)
- # 转换为PyTorch的Tensor
- train_features = torch.from_numpy(train_features).float()
- train_labels = torch.from_numpy(train_labels).long()
- test_features = torch.from_numpy(test_features).float()
- test_labels = torch.from_numpy(test_labels).long()
-
-
- # 自定义Dataset类
- class ExcelDataset(Dataset):
- def __init__(self, features, labels):
- self.features = features
- self.labels = labels
-
- def __len__(self):
- return len(self.features)
-
- def __getitem__(self, idx):
- return self.features[idx], self.labels[idx]
-
-
- # 创建数据集实例
- train_dataset = ExcelDataset(train_features, train_labels)
- test_dataset = ExcelDataset(test_features, test_labels)
-
- # 创建DataLoader
- train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
- test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)
-
-
- # 定义模型
- class MnistModel(nn.Module):
- def __init__(self):
- super(MnistModel, self).__init__()
- self.fc = nn.Sequential(
- nn.Linear(784, HIDDEN_SIZE_1),
- nn.ReLU(),
- nn.Linear(HIDDEN_SIZE_1, HIDDEN_SIZE_2),
- nn.ReLU(),
- nn.Linear(HIDDEN_SIZE_2, 32),
- nn.ReLU(),
- nn.Linear(32, 10),
-
- )
-
- def forward(self, x):
- x = x.view(x.size(0), -1)
- return self.fc(x)
-
-
- model = MnistModel()
-
- # 定义损失函数和优化器
- criterion = nn.CrossEntropyLoss()
- optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
-
- # 训练模型
- for epoch in range(NUM_EPOCHS):
- for i, (images, labels) in enumerate(train_loader):
- # 清零优化器中累积的梯度
- optimizer.zero_grad()
- # 构建训练模型
- outputs = model(images)
- # 计算损失函数
- loss = criterion(outputs, labels)
- # 启动反向传播过程
- loss.backward()
- # 使用优化算法来更新模型的参数
- optimizer.step()
-
- if (i + 1) % 100 == 0:
- print(f'Epoch [{epoch + 1}/{NUM_EPOCHS}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')
-
- # 保存模型
- torch.save(model.state_dict(), MODEL_PATH)
-
- # 加载模型进行测试
- # model.load_state_dict(torch.load(MODEL_PATH))
- # model.eval()
- #
- # # 测试模型
- # with torch.no_grad():
- # correct = 0
- # total = 0
- # for images, labels in test_loader:
- # outputs = model(images)
- # _, predicted = torch.max(outputs.data, 1)
- # total += labels.size(0)
- # correct += (predicted == labels).sum().item()
- # print('Test Accuracy: {} %'.format(100 * correct / total))
手写体识别是深度学习技术应用的一个生动例子,展示了AI在理解和解析人类创造的内容方面的能力。随着算法和硬件的进步,我们可以期待未来在手写体识别和其他相关领域看到更多令人兴奋的成果。
需要训练集的同学可以访问以下链接获取:
链接:https://pan.baidu.com/s/1afPQFahKy9Ei8IjNk8o8pw?pwd=so5x
提取码:so5x
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。