赞
踩
在开始实现以上操作时,我们需要准备python环境,具体流程可以参考:(本文所采用python=3.9的环境)https://blog.csdn.net/weixin_43366149/article/details/132206526
本文将此代码保存为LeNet.py。
- import torch
- import torch.nn as nn
-
- class LeNet(nn.Module):
- def __init__(self, num_classes=10):
- super(LeNet, self).__init__()
- self.conv1 = nn.Conv2d(1, 6, kernel_size=5) # 输入通道数为1(灰度图像),输出通道数为6,卷积核大小为5x5
- self.conv2 = nn.Conv2d(6, 16, kernel_size=5) # 输入通道数为6,输出通道数为16,卷积核大小为5x5
- self.fc1 = nn.Linear(16*5*5, 120) # 全连接层,输入大小为16*5*5,输出大小为120
- self.fc2 = nn.Linear(120, 84) # 全连接层,输入大小为120,输出大小为84
- self.fc3 = nn.Linear(84, num_classes) # 全连接层,输入大小为84,输出大小为类别数
-
- def forward(self, x):
- # 输入数据经过卷积层1,使用ReLU激活函数,再经过2x2最大池化层
- x = torch.max_pool2d(torch.relu(self.conv1(x)), (2, 2))
- # 输入数据经过卷积层2,使用ReLU激活函数,再经过2x2最大池化层
- x = torch.max_pool2d(torch.relu(self.conv2(x)), (2, 2))
- # 将特征图展平成一维向量
- x = x.view(-1, 16*5*5)
- # 经过全连接层1和ReLU激活函数
- x = torch.relu(self.fc1(x))
- # 经过全连接层2和ReLU激活函数
- x = torch.relu(self.fc2(x))
- # 经过全连接层3
- x = self.fc3(x)
- return x
-
- # # 创建LeNet模型实例
- # model = LeNet(num_classes=10)
- # # 输出模型结构
- # print(model)
本文将其命名为Mydata.py。
- import os
- from PIL import Image
- from torch.utils.data import Dataset
-
-
- class MyDataset(Dataset):
- def __init__(self, data_dir, transform=None):
- """
- 初始化 MyDataset 类。
- Args:
- data_dir (str): 数据目录的路径。
- transform (callable, optional): 对图像进行预处理的转换函数。
- """
- self.data_dir = data_dir
- self.transform = transform
-
- # 获取类别文件夹和类别名称
- self.classes = sorted([d.name for d in os.scandir(data_dir) if d.is_dir()])
- self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}
-
- # 获取图像文件路径和对应的类别标签
- self.image_paths = []
- self.labels = []
-
- for class_name in self.classes:
- class_dir = os.path.join(data_dir, class_name)
- for img_name in os.listdir(class_dir):
- img_path = os.path.join(class_dir, img_name)
- self.image_paths.append(img_path)
- self.labels.append(self.class_to_idx[class_name])
-
- def __len__(self):
- """
- 返回数据集中样本的数量。
- """
- return len(self.image_paths)
-
- def __getitem__(self, index):
- """
- 根据索引获取样本(图像和标签)。
- Args:
- index (int): 样本的索引。
- Returns:
- tuple: (image, label) 图像和标签。
- """
- # 获取图像路径和标签
- img_path = self.image_paths[index]
- label = self.labels[index]
-
- # 打开图像并进行预处理
- image = Image.open(img_path)
- if self.transform:
- image = self.transform(image)
-
- return image, label
-
本文将其命名为train_lenet.py。
- import torch
- import torch.nn as nn
- import torch.optim as optim
- from torch.utils.data import DataLoader
- from LeNet import LeNet
- from Mydata import MyDataset
- from torchvision import transforms
- from torch.utils.tensorboard import SummaryWriter
-
- # 定义训练参数
- batch_size = 64
- learning_rate = 0.001
- num_epochs = 5
-
- # 检查是否可以使用CUDA加速
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
- # 创建数据集实例
- data_dir = "4_Recognize/训练集"
- transform = transforms.Compose([
- transforms.Resize((32, 32)),
- transforms.Grayscale(num_output_channels=1), # Convert to grayscale
- transforms.ToTensor(),
- transforms.Normalize((0.5,), (0.5,))
- ])
-
- dataset = MyDataset(data_dir, transform=transform)
- data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
-
- # 创建LeNet模型实例
- model = LeNet(num_classes=76)
- model.to(device)
-
- # 定义损失函数和优化器
- criterion = nn.CrossEntropyLoss()
- optimizer = optim.Adam(model.parameters(), lr=learning_rate)
-
- # 创建SummaryWriter实例
- writer = SummaryWriter()
-
- # 训练模型
- total_steps = len(data_loader)
- for epoch in range(num_epochs):
- for i, (images, labels) in enumerate(data_loader):
- images = images.to(device)
- labels = labels.to(device)
-
- # 前向传播
- outputs = model(images)
- loss = criterion(outputs, labels)
-
- # 反向传播和优化
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
-
- if (i + 1) % 100 == 0:
- print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{total_steps}], Loss: {loss.item():.4f}')
-
- # 将损失写入日志
- writer.add_scalar('Loss/train', loss.item(), epoch * total_steps + i)
-
- print('Finished Training')
-
- # 关闭SummaryWriter
- writer.close()
-
- # 保存模型
- torch.save(model.state_dict(), 'lenet_model.pth')
最后保存所需模型文件即可,本文主要讨论的是方法以及具体实现思路,本文未进行模型评估检验,感兴趣的小伙伴可以外加代码对模型精度进行检验。
通过参考https://blog.csdn.net/wlh156423/article/details/118861987 本文对其进行改进,即可得到如下代码实现:
- import os
- import json
- import shutil
- from PIL import Image
-
- def convert_bbox(img_size, box):
- dw = 1. / img_size[0]
- dh = 1. / img_size[1]
- x = (box[0] + box[2]) / 2.0 - 1
- y = (box[1] + box[3]) / 2.0 - 1
- w = box[2] - box[0]
- h = box[3] - box[1]
- x = x * dw
- w = w * dw
- y = y * dh
- h = h * dh
- return (x, y, w, h)
-
- def get_image_size(image_path):
- with Image.open(image_path) as img:
- width, height = img.size
- return width, height
-
- def extract_data_from_json(json_folder, train_folder):
- images_folder = os.path.join(train_folder, 'images')
- labels_folder = os.path.join(train_folder, 'labels')
- os.makedirs(images_folder, exist_ok=True)
- os.makedirs(labels_folder, exist_ok=True)
-
- json_files = [f for f in os.listdir(json_folder) if f.endswith('.json')]
-
- for json_file in json_files:
- with open(os.path.join(json_folder, json_file)) as f:
- data = json.load(f)
- img_name = data.get('img_name')
- ann = data.get('ann')
-
- img_path = os.path.join('2_Train', img_name + '.jpg')
- img_w, img_h = get_image_size(img_path)
- shutil.copy(img_path, os.path.join(images_folder, img_name + '.jpg'))
-
- txt_path = os.path.join(labels_folder, img_name + '.txt')
- with open(txt_path, 'w') as txt_file:
- for bbox in ann:
- bbox = convert_bbox((img_w, img_h), bbox[:-1]) # Ignore the class label for now
- txt_file.write(f"0 {' '.join(map(str, bbox))}\n") # Assuming class index is 0
-
- if __name__ == "__main__":
- json_folder_path = '2_Train'
- train_folder_path = 'train'
- extract_data_from_json(json_folder_path, train_folder_path)
将JSON文件转化为所需要的YOLO TXT文件,且保存再同项目train文件夹下。其中txt数据类如下
0 0.3091482649842271 0.5269662921348315 0.22082018927444794 0.1865168539325843 0 0.555205047318612 0.5730337078651686 0.17034700315457413 0.39550561797752815 0 0.7302839116719243 0.5808988764044944 0.167192429022082 0.402247191011236
- import torch
- import torchvision.transforms as transforms
- from PIL import Image
- from LeNet import LeNet
- import os
-
- # 定义预处理操作
- transform = transforms.Compose([
- transforms.Grayscale(), # 转为灰度图像
- transforms.Resize((32, 32)), # 改变尺寸以适配LeNet模型输入
- transforms.ToTensor(), # 转为张量
- ])
-
- # 初始化模型
- model = LeNet(num_classes=76)
- model.load_state_dict(torch.load("lenet_model.pth"))
- model.eval() # 设置为评估模式
-
- # 文件和文件夹路径
- label_dir = 'B题/labels/train'
- image_dir = 'B题/images/train'
- output_dir = 'new_data'
-
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
-
- # 遍历标签文件夹
- for label_file in os.listdir(label_dir):
- label_path = os.path.join(label_dir, label_file)
- image_path = os.path.join(image_dir, label_file.replace('.txt', '.jpg')) # 假设图片文件名与标签相同,仅扩展名不同
-
- if os.path.exists(image_path):
- img = Image.open(image_path) # 加载图像
- with open(label_path, 'r') as file:
- lines = file.readlines()
-
- new_lines = []
- for line in lines:
- parts = line.strip().split()
- # 假设标签格式为:class x_center y_center width height
- _, x_center, y_center, width, height = map(float, parts)
- bbox = [
- (x_center - width / 2) * img.width,
- (y_center - height / 2) * img.height,
- width * img.width,
- height * img.height
- ] # 计算实际像素坐标和尺寸
-
- # 裁剪并转换图像
- cropped_img = img.crop((bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]))
- input_tensor = transform(cropped_img).unsqueeze(0)
-
- # 模型预测
- with torch.no_grad():
- output = model(input_tensor)
- predicted_class = output.argmax(1).item()
-
- # 更新行
- new_line = f"{predicted_class} {x_center} {y_center} {width} {height}\n"
- new_lines.append(new_line)
-
- # 保存新的标签文件
- new_label_path = os.path.join(output_dir, label_file)
- with open(new_label_path, 'w') as new_file:
- new_file.writelines(new_lines)
这段代码会将原本的标签进行处理并且以txt形式保存在new_data文件夹下,小伙伴们运行时记得更改路径为自己的路径。转化后的图片txt信息如下
59 0.3091482649842271 0.5269662921348315 0.22082018927444794 0.1865168539325843 69 0.555205047318612 0.5730337078651686 0.17034700315457413 0.39550561797752815 6 0.7302839116719243 0.5808988764044944 0.167192429022082 0.402247191011236
基于yolov8训练数据集时需要类别标签,本文运用如下代码进行相关处理
- import os
-
- # 指定文件夹路径
- folder_path = '4_Recognize/训练集'
-
- # 获取子文件夹名称并按照指定格式保存
- with open('name.txt', 'w') as file:
- for idx, subfolder in enumerate(sorted(os.listdir(folder_path))):
- file.write(f"{idx}: {subfolder}\n")
训练操作可以参考https://blog.csdn.net/qq_42452134/article/details/135149531
以下是实现过程
建立一个python文件且输入如下代码
- from ultralytics import YOLO
-
- # 加载模型
- model = YOLO('yolov8n.yaml').load('yolov8n.pt') # 从YAML构建并转移权重
-
- if __name__ == '__main__':
- # 训练模型
- results = model.train(data='fnal.yaml', epochs=10, imgsz=256)
-
- metrics = model.val()
建立对应的.yaml文件,这里就需要用到我们提取的类别标签。(本文命名为fnal.yaml)
- # 这里的path需要指向你项目中数据集的目录
- path: E:/dell/比赛/new/train/
- # 这里分别指向你训练、验证、测试的文件地址,只需要指向图片的文件夹即可。但是要注意图片和labels名称要对应
- train: images/train # train images (relative to 'path') 128 images
- val: images/val # val images (relative to 'path') 128 images
- test: images/test # test images (optional)
-
- # Classes
- names:
- 0: 万
- 1: 丘
- 2: 丙
- 3: 丧
- 4: 乘
- 5: 亦
- 6: 人
- 7: 今
- 8: 介
- 9: 从
- 10: 令
- 11: 以
- 12: 伊
- 13: 何
- 14: 余
- 15: 允
- 16: 元
- 17: 兄
- 18: 光
- 19: 兔
- 20: 入
- 21: 凤
- 22: 化
- 23: 北
- 24: 印
- 25: 及
- 26: 取
- 27: 口
- 28: 吉
- 29: 囚
- 30: 夫
- 31: 央
- 32: 宗
- 33: 宾
- 34: 尞
- 35: 巳
- 36: 帽
- 37: 并
- 38: 彘
- 39: 往
- 40: 御
- 41: 微
- 42: 旨
- 43: 昃
- 44: 木
- 45: 朿
- 46: 涎
- 47: 灾
- 48: 焦
- 49: 爽
- 50: 牝
- 51: 牡
- 52: 牧
- 53: 生
- 54: 田
- 55: 疑
- 56: 祝
- 57: 福
- 58: 立
- 59: 羊
- 60: 羌
- 61: 翌
- 62: 翼
- 63: 老
- 64: 艰
- 65: 艺
- 66: 若
- 67: 莫
- 68: 获
- 69: 衣
- 70: 逆
- 71: 门
- 72: 降
- 73: 陟
- 74: 雍
- 75: 鹿
同时要注意文件夹类型的指向,文件夹的命名也很关键,具体操作可以看https://blog.csdn.net/qq_42452134/article/details/135181244这位博主所讲内容。
这样我们就可以开始训练了
当我们进行训练后,可以在项目路径下的runs文件中观看所得参数图像,以及python运行框中查看模型是否训练完全。
再训练好模型后如何进行预测呢?可以参考官方文档https://docs.ultralytics.com/modes/predict/#key-features-of-predict-mode
本文给出相应的二份预测处理的代码
- from PIL import Image
- from ultralytics import YOLO
-
- # Load a pretrained YOLOv8n model
- model = YOLO('yolov8n.pt')
-
- # Run inference on 'bus.jpg'
- results = model(['4_Recognize/测试集/w01790.jpg', '4_Recognize/测试集/w01791.jpg']) # results list
-
- # Visualize the results
- for i, r in enumerate(results):
- # Plot results image
- im_bgr = r.plot() # BGR-order numpy array
- im_rgb = Image.fromarray(im_bgr[..., ::-1]) # RGB-order PIL image
-
- # Show results to screen (in supported environments)
- r.show()
-
- # Save results to disk
- r.save(filename=f'results{i}.jpg')
- # -*- coding: gbk -*-
- import os
- from PIL import Image
- from tqdm import tqdm
- from glob import glob
- from ultralytics import YOLO
-
- # 定义测试集图片文件夹和保存预测图片的文件夹
- test_images_folder = "4_Recognize/测试集"
- output_folder = "new_images"
-
- # 加载训练好的YOLOv8模型
- model = YOLO("yolov8n.pt") # 替换为你训练好的模型的路径
-
- # 确保保存预测图片的文件夹存在
- os.makedirs(output_folder, exist_ok=True)
-
- # 获取测试集中所有图片的路径
- test_image_paths = glob(os.path.join(test_images_folder, "*.jpg")) + glob(os.path.join(test_images_folder, "*.png"))
-
- # 对每张图片进行预测并保存预测后的图片
- # 对每张图片进行预测并保存预测后的图片
- for image_path in tqdm(test_image_paths, desc="Predicting"):
- # 读取图片
- image = Image.open(image_path)
-
- # 对图片进行预测
- predicted_images = model.predict(image)
-
- # 处理每个预测结果并保存到文件夹中
- for idx, predicted_image in enumerate(predicted_images):
- # 构建保存预测图片的路径,这里假设以预测结果的索引命名文件
- output_path = os.path.join(output_folder, f"{os.path.basename(image_path)}_{idx}.jpg")
-
- # 保存预测后的图片
- predicted_image.save(output_path)
-
- print("预测完成,并且预测图片已保存到'new_images'文件夹中。")
本文只是提供一种参考思路且对对应数据集进行了处理。在进行相关的图像识别中,最让人头疼的应该是数据集,例如本次比赛的数据集处理起来较为复杂,如果不熟悉YOLO数据类型,以及对应关系转化,难以将题目进行。
同时作者参与此次比赛收获颇多,虽然是赛后补救,但也希望提供给大家一种思路,以及运用YOLOV8的细节。
第一次发表文章,望对大家有所帮助。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。