赞
踩
下面图片是模仿coco128.yaml文件改写的mydata.yaml;
以下代码将voc数据集变成yolo格式,将voc数据集的图片存放到images文件中,并且按比例划分为训练集,验证集和测试集,比例为8:1:1。而将voc数据集标注文件原本的xml格式转换为txt格式并存放在labels文件夹中,里面的标签文件对于images文件夹中的训练集,验证集和测试集的标签文件;
注意:需在文件夹中提前准备好images文件夹和labels文件夹,并在其中创建train,val,test文件夹。如果不需要test文件,将代码中test删除,把对应的比例划分给train或者val。
- #voc转yolo
- import os
- import random
- import shutil
- import xml.etree.ElementTree as ET
-
- # 设置VOC数据集文件夹路径
- voc_dir = './cfg/datasets/VOC2012' #源文件名
- output_dir = './cfg/datasets/VOC' #目标文件名
-
- # 创建YOLO格式的文件夹结构
- yolo_subdirs = ['images', 'labels']
- for subdir in yolo_subdirs:
- os.makedirs(os.path.join(output_dir, subdir), exist_ok=True)
-
- # 获取所有图片文件的文件名(不包括文件扩展名)
- image_files = [os.path.splitext(file)[0] for file in os.listdir(os.path.join(voc_dir, 'JPEGImages'))]
-
- # 随机打乱图片文件顺序
- random.shuffle(image_files)
-
- # 划分数据集比例(8:1:1)
- total_images = len(image_files)
- train_ratio = 0.8
- val_ratio = 0.1
-
- train_split = int(total_images * train_ratio)
- val_split = int(total_images * (train_ratio + val_ratio))
-
- # 分割数据集
- train_images = image_files[:train_split]
- val_images = image_files[train_split:val_split]
- test_images = image_files[val_split:]
-
-
- # 复制图片文件到YOLO格式文件夹
- def copy_images(image_list, subset):
- for image_name in image_list:
- image_path_src = os.path.join(voc_dir, 'JPEGImages', image_name + '.jpg')
- image_path_dest = os.path.join(output_dir, 'images', subset, image_name + '.jpg')
- shutil.copy(image_path_src, image_path_dest)
-
-
- copy_images(train_images, 'train')
- copy_images(val_images, 'val')
- copy_images(test_images, 'test')
-
- class_to_idx = {
- 'aeroplane': 0,
- 'bicycle': 1,
- 'bird': 2,
- 'boat': 3,
- 'bottle': 4,
- 'bus': 5,
- 'car': 6,
- 'cat': 7,
- 'chair': 8,
- 'cow': 9,
- 'diningtable': 10,
- 'dog': 11,
- 'horse': 12,
- 'motorbike': 13,
- 'person': 14,
- 'pottedplant': 15,
- 'sheep': 16,
- 'sofa': 17,
- 'train': 18,
- 'tvmonitor': 19,
- # 如果你的数据集有更多类别,请继续添加映射
- }
-
- # 转换XML标签为YOLO格式并保存为txt文件
- def convert_voc_to_yolo_label(image_list, subset):
- for image_name in image_list:
- label_path_src = os.path.join(voc_dir, 'Annotations', image_name + '.xml')
- label_path_dest = os.path.join(output_dir, 'labels', subset, image_name + '.txt')
-
- with open(label_path_dest, 'w') as label_file:
- root = ET.parse(label_path_src).getroot()
- img_size = root.find('size')
- img_width = float(img_size.find('width').text)
- img_height = float(img_size.find('height').text)
-
- for obj in root.findall('object'):
- class_name = obj.find('name').text
- if class_name not in class_to_idx:
- continue
-
- class_idx = class_to_idx[class_name]
- bbox = obj.find('bndbox')
- xmin = float(bbox.find('xmin').text)
- ymin = float(bbox.find('ymin').text)
- xmax = float(bbox.find('xmax').text)
- ymax = float(bbox.find('ymax').text)
-
- # 计算YOLO格式的坐标(中心点坐标、宽度和高度)
- x_center = (xmin + xmax) / (2 * img_width)
- y_center = (ymin + ymax) / (2 * img_height)
- width = (xmax - xmin) / img_width
- height = (ymax - ymin) / img_height
-
- # 将YOLO格式的标签写入文件
- label_file.write(f"{class_idx} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")
-
-
- # 对训练集、验证集和测试集执行标签转换
- convert_voc_to_yolo_label(train_images, 'train')
- convert_voc_to_yolo_label(val_images, 'val')
- convert_voc_to_yolo_label(test_images, 'test')
-
- print("数据集转换完成,YOLO格式的数据集保存在", output_dir)

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。