赞
踩
一. 背景
这个项目在机器学习中经常被用作入门练习,因为它相对简单,但又涵盖了许多基本的概念。手写数字识别项目可以视为机器学习中的 “Hello World”,因为它涉及到数据收集、特征提取、模型选择、训练和评估等机器学习中的基本步骤。所以手写数字识别项目是一个很好的起点。
二.MNIST数据集
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# 数据集具体会存储在~/.keras/datasets/目录下
from sklearn.datasets import fetch_openml
mnist = fetch_openml(name='mnist_784', version=1)
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
# 定义数据变换
transform = transforms.Compose([
transforms.ToTensor(), # 将图像转换为Tensor
transforms.Normalize((0.5,), (0.5,)) # 标准化图像
])
# 下载训练集
train_dataset = MNIST(root='./data', train=True, transform=transform, download=True)
# 下载测试集
test_dataset = MNIST(root='./data', train=False, transform=transform, download=True)
三.实际操作
这里分别介绍两种主要的方法,分别是传统的机器学习方法和深度学习方法。
首先导入了所需的库和模块。numpy用于数组操作,sklearn包含了机器学习的工具,PIL用于处理图像,matplotlib用于绘图,os用于处理文件路径。
import numpy as np
from sklearn import datasets
from sklearn import svm
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image as Im
import sys
import os
定义了一个函数 loaddata,用于加载数据。它接受一个目录路径和一个加载率,返回两个列表 x 和 y,分别存储特征和标签。函数遍历给定路径下的文件夹,读取每个文件夹下的图像,并将其转换为一维数组。
def loaddata(dir_path, load_rate): dirs = os.listdir(dir_path) x = [] y = [] for tag in dirs: image_names = os.listdir(os.path.join(dir_path, tag)) nums = int(len(image_names) * load_rate) for image_name in image_names[:nums]: image_path = os.path.join(dir_path, tag, image_name) img = Im.open(image_path, "r") mat = np.array(img) mat_size = np.shape(mat)[0] * np.shape(mat)[1] x.append(np.reshape(mat, mat_size)) y.append(int(tag)) return [x, y]
导入svm模块,创建了一个支持向量机(SVM)分类器对象,使用线性核函数(‘linear’)进行多分类('ovo’表示 “one-vs-one”,即一对一)。然后,对模型进行训练,使用 fit 函数传入训练集的特征 train_x 和标签 train_y。
from sklearn import svm
svm_classifier = svm.SVC(kernel='linear', decision_function_shape='ovo')
svm_classifier.fit(train_x, train_y)
计算并输出模型在训练集上的准确率。
train_accuracy = svm_classifier.score(train_x, train_y)
print(f"训练集精度:{train_accuracy:.2%}")
对测试集进行预测并计算模型在测试集上的准确率。
test_predictions = svm_classifier.predict(test_x)
test_accuracy = svm_classifier.score(test_x, test_y)
print(f"测试集精度:{test_accuracy:.2%}")
对测试集中的第5001个样本进行预测,并显示预测结果和真实结果,以及该样本的图像。
index_to_predict = 5000
predicted_label = svm_classifier.predict([test_x[index_to_predict]])[0]
true_label = test_y[index_to_predict]
print(f"预测结果:{predicted_label}")
print(f"真实结果:{true_label}")
image_to_display = np.reshape(test_x[index_to_predict], (28, 28))
plt.imshow(image_to_display, cmap='gray')
plt.title("测试集第5001个数据")
plt.show()
对于不同的核函数(线性、多项式、径向基函数),分别创建 SVM 模型并在训练集和测试集上评估它们的性能。
kernel_functions = ['linear', 'poly', 'rbf']
for kernel_func in kernel_functions:
svm_classifier = svm.SVC(kernel=kernel_func, decision_function_shape='ovo')
svm_classifier.fit(train_x, train_y)
train_accuracy = svm_classifier.score(train_x, train_y)
test_accuracy = svm_classifier.score(test_x, test_y)
print(f"\n核函数:{kernel_func}")
print(f"训练集精度:{train_accuracy:.2%}")
print(f"测试集精度:{test_accuracy:.2%}")
import tensorflow as tf from tensorflow.keras import layers, models from tensorflow.keras.datasets import mnist from tensorflow.keras.utils import to_categorical # 加载和预处理数据 (train_images, train_labels), (test_images, test_labels) = mnist.load_data() train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255 test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255 train_labels = to_categorical(train_labels) test_labels = to_categorical(test_labels) # 构建卷积神经网络模型 model = models.Sequential() model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1))) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(64, (3, 3), activation='relu')) model.add(layers.Flatten()) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dense(10, activation='softmax')) # 编译模型 model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # 训练模型 model.fit(train_images, train_labels, epochs=5, batch_size=64, validation_data=(test_images, test_labels)) # 在测试集上评估模型 test_loss, test_acc = model.evaluate(test_images, test_labels) print(f'Test accuracy: {test_acc * 100:.2f}%')
TensorFlow 默认会尝试使用可用的 GPU(如果有的话)来执行计算。前提是确保你的 TensorFlow 版本支持 GPU,并且你的系统上已经正确安装了 GPU 驱动和 CUDA。
import torch import torch.nn as nn import torch.optim as optim from torchvision import datasets, transforms from torch.utils.data import DataLoader # 定义简单的卷积神经网络模型 class SimpleCNN(nn.Module): def __init__(self): super(SimpleCNN, self).__init__() self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) self.pool = nn.MaxPool2d(kernel_size=2, stride=2) self.fc1 = nn.Linear(64 * 7 * 7, 128) self.fc2 = nn.Linear(128, 10) def forward(self, x): x = self.pool(nn.functional.relu(self.conv1(x))) x = self.pool(nn.functional.relu(self.conv2(x))) x = x.view(-1, 64 * 7 * 7) x = nn.functional.relu(self.fc1(x)) x = self.fc2(x) return x # 定义数据转换和加载器 transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True) test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) # 初始化模型、损失函数和优化器 model = SimpleCNN() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # 训练模型 epochs = 5 for epoch in range(epochs): model.train() for inputs, labels in train_loader: optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # 在测试集上评估模型 model.eval() correct = 0 total = 0 with torch.no_grad(): for inputs, labels in test_loader: outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = correct / total print(f'Epoch {epoch + 1}/{epochs}, Test Accuracy: {accuracy * 100:.2f}%')
当然这段代码是默认在CPU上运行的。PyTorch 框架会在 CPU 上运行,如果你有 GPU 并想在 GPU 上运行,需要将模型和数据移到 GPU 上,通常使用 .to(device) 的方式。具体请参照如下。
import torch import torch.nn as nn import torch.optim as optim from torchvision import datasets, transforms from torch.utils.data import DataLoader # 检查 GPU 是否可用 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 定义简单的卷积神经网络模型 class SimpleCNN(nn.Module): def __init__(self): super(SimpleCNN, self).__init__() self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1) self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) self.pool = nn.MaxPool2d(kernel_size=2, stride=2) self.fc1 = nn.Linear(64 * 7 * 7, 128) self.fc2 = nn.Linear(128, 10) def forward(self, x): x = self.pool(nn.functional.relu(self.conv1(x))) x = self.pool(nn.functional.relu(self.conv2(x))) x = x.view(-1, 64 * 7 * 7) x = nn.functional.relu(self.fc1(x)) x = self.fc2(x) return x # 将模型移到 GPU 上 model = SimpleCNN().to(device) # 定义数据转换和加载器 transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True) test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) # 初始化模型、损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # 训练模型 epochs = 5 for epoch in range(epochs): model.train() for inputs, labels in train_loader: inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # 在测试集上评估模型 model.eval() correct = 0 total = 0 with torch.no_grad(): for inputs, labels in test_loader: inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = correct / total print(f'Epoch {epoch + 1}/{epochs}, Test Accuracy: {accuracy * 100:.2f}%')
在 TensorFlow 中,如果要在特定设备上运行,可以使用 with tf.device(‘/device:GPU:0’): 的上下文管理器。
import tensorflow as tf
# 检查 GPU 是否可用
if tf.test.is_gpu_available():
with tf.device('/device:GPU:0'):
# 在 GPU 上运行的代码
model = tf.keras.models.Sequential(...)
else:
# 在 CPU 上运行的代码
model = tf.keras.models.Sequential(...)
import paddle import paddle.vision.transforms as T import paddle.vision.datasets as datasets import paddle.nn as nn import paddle.optimizer as optimizer # 环境配置 transform = T.Normalize(mean=[127.5], std=[127.5]) train_dataset = datasets.MNIST(mode='train', transform=transform) test_dataset = datasets.MNIST(mode='test', transform=transform) # 模型构建 class Net(nn.Layer): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2D(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1) self.pool1 = nn.MaxPool2D(kernel_size=2, stride=2) self.conv2 = nn.Conv2D(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1) self.pool2 = nn.MaxPool2D(kernel_size=2, stride=2) self.fc = nn.Linear(in_features=32*7*7, out_features=10) def forward(self, x): x = self.conv1(x) x = nn.functional.relu(x) x = self.pool1(x) x = self.conv2(x) x = nn.functional.relu(x) x = self.pool2(x) x = paddle.flatten(x, start_axis=1, stop_axis=-1) x = self.fc(x) return x model = paddle.Model(Net()) # 模型训练 model.prepare(optimizer=optimizer.Adam(learning_rate=0.001, parameters=model.parameters()), loss=nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) model.fit(train_dataset, epochs=5, batch_size=64, verbose=1) # 模型评估 result = model.evaluate(test_dataset, verbose=1) print(result) # 模型保存 model.save('mnist_model') # 模型加载 model.load('mnist_model') # 使用模型进行预测 image = test_dataset prediction = model.predict(image) max_index = prediction.index(max(prediction)) print("Predicted digit:", max_index)
PaddlePaddle框架与TensorFlow一样,默认情况下会在CPU上运行,但如果你的系统中有可用的GPU,会自动检测并尝试在GPU上进行计算。这使得在拥有GPU的环境下就可以充分利用GPU的并行计算能力,加速深度学习模型的训练和推理过程。
四.总结
传统机器学习算法在实现简单、对缺失值和异常值不敏感等方面具有优势,但在处理大规模数据和复杂任务时可能表现不佳。深度学习算法在处理大规模数据和复杂任务方面表现出色,但需要更多的数据和计算资源,并且调参困难。
PaddlePaddle适合国内用户,安装和使用简单,有丰富的资料和代码库可供参考;PyTorch在学术研究领域领先,代码简洁易用;TensorFlow在工业应用领域处于领先地位,具有丰富的生态系统
手写数字识别的基础知识介绍到这儿吧。当然它还做些很有意思的事情,欲知后事如何,且听下回分解。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。