赞
踩
先简单记录下,后期有空再补充。
1. 定义cnn 模型
- import torch.nn as nn
- import torch
- from torchvision import transforms,models
-
- class CNN(torch.nn.Module):
- def __init__(self):
- super(CNN, self).__init__()
- self.conv = torch.nn.Sequential(
- # 用来实现2d卷积操作,h和w2个维度,当前图片的channel是1,输出是32,卷积核是5
- torch.nn.Conv2d(1, 32, kernel_size=5, padding=2),
- torch.nn.BatchNorm2d(32),
- torch.nn.ReLU(),
- torch.nn.MaxPool2d(2)
- )
- #第一轮卷积之后的大小,输入尺寸是28*28,变为14*14*channel32,输出结果为10维
- self.fc = torch.nn.Linear(14 * 14 * 32, 10)
- def forward(self, x):
- out = self.conv(x)
- out = out.view(out.size()[0], -1)
- out = self.fc(out)
- return out
2.制作自己的数据集
- import os
- import torch
- from torch.utils import data
- from PIL import Image
- from torchvision import transforms
- from torch.utils.data import Dataset
-
- species = {'0':0,'1':1,'2':2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9
- }
-
- class MyDataset(Dataset):
- def __init__(self, root, transform = None):
- # root :'mnist\\test' or 'mnist\\train'
- self.root = root
- self.transform = transform
- self.data =[]
- # 获取子目录 '0','1','2','3',...
- sub_root_test = os.listdir(self.root)
- for sub_root in sub_root_test:
- # 获取子目录下所有图片的名字
- sub_image_name_list = os.listdir(os.path.join(self.root,sub_root))
- for sub_image_name in sub_image_name_list:
- # 获取每张图片的完整路径
- image_path = os.path.join(self.root, os.path.join(sub_root, sub_image_name))
- # 获取标签,也就是子目录的文件名
- label = species[image_path.split('\\')[-2]]
- # 做成(图片路径,标签)的元组
- sample = (image_path,label)
- self.data.append(sample)
- def __len__(self):
- return len(self.data)
- def __getitem__(self, index):
- image_path,label = self.data[index]
- image_original = Image.open(image_path).convert('RGB')
- image_tensor = self.transform(image_original)
- return image_tensor,label
- class MyDataset_pre(Dataset):
- def __init__(self, root, transform = None):
- # root :'test_images
- self.root = root
- self.transform = transform
- self.data =[]
- image_name_list = os.listdir(self.root)
- for image_name in image_name_list:
- image_path = os.path.join(self.root, image_name)
- self.data.append(image_path)
- def __len__(self):
- return len(self.data)
- def __getitem__(self, index):
- image_path = self.data[index]
- image_original = Image.open(image_path).convert('RGB')
- image_tensor = self.transform(image_original)
- return image_tensor
-
3.模型训练
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- import torch.optim as optim
- from torchvision import transforms
- from torch.utils.data import DataLoader
-
- from preprocess_dataset import MyDataset
- from model import CNN
-
- BATCH_SIZE = 32
- EPOCHS = 5
- DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
- trans = transforms.Compose([transforms.ToTensor(),transforms.Grayscale()])
- # trans = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor()])
- train_dataset = MyDataset('mnist1_new\\train',transform = trans)
- test_dataset = MyDataset('mnist1_new\\test',transform = trans)
-
- train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
- test_loader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = True)
-
- #model = torch.load("model/mnist_model_nn.pkl")
- model = CNN()
- # model = NeuralNetwork()
- # model = VGG16().vgg16_model()
- net = model.to(DEVICE)
-
- # loss function
- criterion = torch.nn.CrossEntropyLoss()
- # 优化器
- optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
-
- # training
- # 将所有的样本遍历完,对模型进行训练后,这一轮称为epoch
- for epoch in range(EPOCHS):
- model.train()
- running_loss = 0.0
- for i, data in enumerate(train_loader):
- images, labels = data
- outputs = net(images)
- loss = criterion(outputs, labels)
- # running_loss += loss.item()
- # 反向传播,完成对参数的优化
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- print("epoch is {}, batch is{}/{}, loss is {}".format(epoch + 1, i, len(train_dataset)/BATCH_SIZE, loss.item()))
- # eval/test 计算在测试集的精度
- loss_test = 0.0
- acc = 0.0
- accuracy = 0.0
- model.eval()
- for i, data in enumerate(test_loader):
- images , labels = data
- outputs = net(images)
- loss_test += criterion(outputs, labels)
- _, pred = outputs.max(1)
- # 判断是否相等计算准确率
- accuracy += (pred == labels).sum().item()
- accuracy = accuracy / len(test_dataset)
- loss_test = loss_test / (len(test_dataset) / BATCH_SIZE)
- # 打印精度和损失
- print("epoch is {}, accuracy is {}, loss test is {}".format(epoch + 1, accuracy, loss_test.item()))
- if accuracy > acc:
- acc = accuracy
- torch.save(net, "model/mnist_model_nn.pkl")
- print('accuancy',acc)
4之前.对图片进行变换,将一串数字按照从左到右切成单个图片。(仅供参考)
- import cv2
- import numpy as np
- import os,torch
- from torchvision.transforms import transforms
- import numpy
- from PIL import Image
-
- def sort_contours(cnts, method='left-to-right'):
- # 从左到右排序
- reverse = False
- i = 0
- # handle if sort in reverse
- if method == 'right-to-left' or method == 'bottom-to-top':
- reverse = True
- # handle if sort against y rather than x of the bounding box
- if method == 'bottom-to-top' or method == 'top-to-bottom':
- i = 1
- boundingBoxes = [cv2.boundingRect(c) for c in cnts]
- (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes), key = lambda b: b[1][i], reverse = reverse))
- return (cnts, boundingBoxes)
-
- def cut_image_sign():
- '''
- 中间数字串,切成方块图,白底黑字
- '''
- root_dir = 'output_me\\'
- for im_name in os.listdir(root_dir):
- # 149,341,3
- image_writer_recongnize = cv2.imread(os.path.join(root_dir,im_name))
- h, w = image_writer_recongnize.shape[:2]
- SIZE = 138
- w_size = 256
- image_writer_recongnize = cv2.resize(image_writer_recongnize, (w_size, SIZE))
-
- # cv2.imshow('image_writer_recongnize', image_writer_recongnize)
- # cv2.waitKey()
- gray_new = cv2.cvtColor(image_writer_recongnize,cv2.COLOR_BGR2GRAY)
- # cv2.imshow('gray_new', gray_new)
- # cv2.waitKey()
- threshold, adaptive_image_1 = cv2.threshold(gray_new, 100, 255, cv2.THRESH_BINARY)
- adaptive_image_1 = cv2.dilate(adaptive_image_1,(15,15))
- # cv2.imshow('adaptive_image_1', adaptive_image_1)
- # cv2.waitKey()
- adaptive_image_copy = adaptive_image_1.copy()
- cnts_1, h = cv2.findContours(adaptive_image_1, cv2.RETR_CCOMP , cv2.CHAIN_APPROX_SIMPLE)
- # contourPic_1 = cv2.drawContours(adaptive_image_copy, cnts_1, -1, (0, 0, 255)
- (cnts_new, boundingboxes) = sort_contours(cnts_1)
-
- count1 = 0
- for c in cnts_new:
- # print(cv2.contourArea(c))
- if cv2.contourArea(c) > 300 and cv2.contourArea(c) <4000:
- # print(cv2.contourArea(c))
- count1 = count1 + 1
- x, y, w, h = cv2.boundingRect(c)
- image_result = image_writer_recongnize[y:y+h, x:x+w]
- image_path = os.path.join('output_me_cut','me_cut{}.png'.format(count1))
- cv2.imwrite(image_path, image_result)
-
- def image_address():
- '''
- 图像裁剪为28*28,图像增强,变成黑底白字
- '''
- root_dir = 'output_me_cut\\'
- count3 =0
- for im_name in os.listdir(root_dir):
- # 149,341,3
- count3 = count3 + 1
- image_writer_recongnize = Image.open(os.path.join(root_dir,im_name))
- image_1 = transforms.Resize((28,28))(image_writer_recongnize)
- image2 = np.array(image_1)
- image1 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
- ret,thresh1 = cv2.threshold(image1,0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
- thresh2 = cv2.bitwise_not(thresh1)
- thresh3 = cv2.erode(thresh2, (15,15))
- cv2.imwrite(os.path.join('output_me_cut_black\\','0_{}.png'.format(count3)),thresh2)
- # cv2.imshow('image_writer_recongnize',thresh3)
- # cv2.waitKey()
4.预测单张图片
- import os
- import torch
- from PIL import Image
- from torch import nn
- from torchvision import transforms, models
- from torch.utils.data import DataLoader
- from preprocess_dataset import MyDataset_pre
- from torchvision.transforms import ToPILImage
- import cv2
- species= ['0','1','2','3','4','5','6','7','8','9']
-
- model = torch.load("model\\mnist_model_nn.pkl", map_location=torch.device("cpu"))
- trans = transforms.Compose([transforms.Resize((28,28)),transforms.ToTensor(), transforms.Grayscale()])
- #预测的图片最好是按照训练图片的尺寸维度先进行前处理,黑底白字,尺寸为28*28
- predict_dataset = MyDataset_pre('output_me_cut_black',transform = trans)
- predict_loader = DataLoader(predict_dataset, batch_size=32)
- model.eval()
- predict =[]
-
- with torch.no_grad():
- for i,data in enumerate(predict_loader):
- predict =[]
- images = data
- output = model(images)
- _, pred = torch.max(output, 1)
- # print(pred)
- for i in range(0,len(images)):
- class_name = species[int(pred[i].item())]
- predict.append(class_name)
- s = ''
- for i in range(0,len(predict)):
- s += predict[i]
- print(s)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。