赞
踩
目录
2.2.3 将大的txt分解成多个小txt,txt以图片名字命名
2.2.4 将txt中对应的路径的图片复制到images/train
3.1、设置官方配置文件:default.yaml,可自行修改。
本文是使用YOLOv8-Detect训练自己的数据集,数据集包含COCO数据集的人猫狗数据以及自己制作的人猫狗数据集,类别为0:person、1:cat、2:dog三类,大家可根据自己的数据集类别进行调整。
可以参考这篇博客:深度学习环境搭建-CSDN博客
本文环境:
- mydata
- ______images
- ____________train
- _________________001.jpg
- ____________val
- _________________002.jpg
- ______labels
- ____________train
- _________________001.txt
- ____________val
- _________________002.txt
本人的数据都存放在mydata文件夹中(自定义命名)
目录结构如下:images存放训练集和验证集图片,labels存放训练集和验证集txt
从官网下载CoCo数据集的json文件
- import argparse, json
- import cytoolz
- from lxml import etree, objectify
- import os, re
-
- def instance2xml_base(anno):
- E = objectify.ElementMaker(annotate=False)
- anno_tree = E.annotation(
- E.folder('VOC2014_instance/{}'.format(anno['category_id'])),
- E.filename(anno['file_name']),
- E.source(
- E.database('MS COCO 2014'),
- E.annotation('MS COCO 2014'),
- E.image('Flickr'),
- E.url(anno['coco_url'])
- ),
- E.size(
- E.width(anno['width']),
- E.height(anno['height']),
- E.depth(3)
- ),
- E.segmented(0),
- )
- return anno_tree
-
- def instance2xml_bbox(anno, bbox_type='xyxy'):
- """bbox_type: xyxy (xmin, ymin, xmax, ymax); xywh (xmin, ymin, width, height)"""
- assert bbox_type in ['xyxy', 'xywh']
- if bbox_type == 'xyxy':
- xmin, ymin, w, h = anno['bbox']
- xmax = xmin+w
- ymax = ymin+h
- else:
- xmin, ymin, xmax, ymax = anno['bbox']
- E = objectify.ElementMaker(annotate=False)
- anno_tree = E.object(
- E.name(anno['category_id']),
- E.bndbox(
- E.xmin(xmin),
- E.ymin(ymin),
- E.xmax(xmax),
- E.ymax(ymax)
- ),
- E.difficult(anno['iscrowd'])
- )
- return anno_tree
-
-
- def parse_instance(content, outdir):
- # print('11111',content)
- categories = {d['id']: d['name'] for d in content['categories']}
- # merge images and annotations: id in images vs image_id in annotations
- merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
-
- # print('111111111111',merged_info_list)
- # convert category id to name
- for instance in merged_info_list:
- instance['category_id'] = categories[instance['category_id']]
- # group by filename to pool all bbox in same file
- for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
- anno_tree = instance2xml_base(groups[0])
- # if one file have multiple different objects, save it in each category sub-directory
- filenames = []
- # print(groups)
- for group in groups:
- filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']), os.path.splitext(name)[0] + ".xml"))
- anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
- for filename in filenames:
- etree.ElementTree(anno_tree).write(filename, pretty_print=True)
- print("Formating instance xml file {} done!".format(name))
-
- def main(args):
- if not os.path.exists(args.output_dir):
- os.makedirs(args.output_dir)
- content = json.load(open(args.anno_file, 'r'))
- if args.type == 'instance':
- # make subdirectories
- sub_dirs = [re.sub(" ", "_", cate['name']) for cate in content['categories']]
- for sub_dir in sub_dirs:
- sub_dir = os.path.join(args.output_dir, str(sub_dir))
- if not os.path.exists(sub_dir):
- os.makedirs(sub_dir)
- parse_instance(content, args.output_dir)
-
-
-
- if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument("--anno_file", help="annotation file for object instance/keypoint", default=r'D:\CoCoData\annotations_14_17\instances_train2017.json')
- parser.add_argument("--type", type=str, default='instance', help="object instance or keypoint", choices=['instance', 'keypoint'])
- parser.add_argument("--output_dir", help="output directory for voc annotation xml file", default=r'D:\CoCoData\CoCoXml')
- args = parser.parse_args()
- main(args)
修改文件存放的路径和保存xml文件的路径。
运行后得到80个文件夹,分别为80类的xml文件:
再将需要类别的xml文件整合,得到训练集和验证集xml
- import os
- import random
- import xml.etree.ElementTree as ET
-
- import glob
-
-
- classes = ['person', 'cat', 'dog']
-
- def convert(size, box):
- dw = 1. / size[0]
- dh = 1. / size[1]
- x = (box[0] + box[2]) / 2.0
- y = (box[1] + box[3]) / 2.0
- w = min(size[0],box[2] - box[0])
- h = min(size[1],box[3] - box[1])
- # print(x,y,w,h)
- x = x * dw
- w = w * dw
- y = y * dh
- h = h * dh
- return (x, y, w, h)
- def convert_annotation(xml, list_file):
- in_file = open(os.path.join(xml), encoding='utf-8')
- tree = ET.parse(in_file)
- root = tree.getroot()
-
-
- xmlsize = root.find('size')
- w = int(xmlsize.find('width').text)
- h = int(xmlsize.find('height').text)
- # print(w,h)
-
- for obj in root.iter('object'):
- difficult = 0
- if obj.find('difficult') != None:
- difficult = obj.find('difficult').text
- cls = obj.find('name').text
- if cls not in classes or int(difficult) == 1:
- print(cls, "------------------------------------\n")
- continue
- cls_id = classes.index(cls)
- print(cls, cls_id)
- xmlbox = obj.find('bndbox')
- x0 = float(xmlbox.find('xmin').text)
- y0 = float(xmlbox.find('ymin').text)
- x1 = float(xmlbox.find('xmax').text)
- y1 = float(xmlbox.find('ymax').text)
- xmin = min(x0, x1)
- ymin = min(y0, y1)
- xmax = max(x0, x1)
- ymax = max(y0, y1)
-
- # b = (int(xmin), int(ymin), int(xmax), int(ymax))
- b = (float(xmin), float(ymin), float(xmax), float(ymax))
- # print(b)
- bb = convert((w, h), b)
- # print(bb)
-
- list_file.write(" "+str(cls_id) +"," + ",".join([str(a) for a in bb]))
-
-
- if __name__ == "__main__":
- random.seed(0)
-
- # 图片路径
- cocoImgPath = r'D:\Yolov8\Yolov8_dataset\selfXml\err04'
- # Xml路径
- cocoXmlPath = r'D:\Yolov8\Yolov8_dataset\selfXml\err04'
- #txt保存路径
- txtsavepath = r'D:\Yolov8\ultralytics-main\datasets\mydata\selfv8txt'
- fileWriteTxt = txtsavepath + '\\'+ cocoImgPath.split('\\')[-1] + '_v8.txt'
- xmls = glob.glob(os.path.join(cocoXmlPath, '*.xml'))
- list_file = open(fileWriteTxt, 'w', encoding='utf-8')
-
- for xml in xmls:
- img = xml.replace(cocoXmlPath, cocoImgPath).replace('.xml', '.jpg')
- if not os.path.exists(img):
- print(img, ' is not exit')
- continue
- list_file.write(img)
- convert_annotation(xml, list_file)
- list_file.write('\n')
- list_file.close()
-
运行以上代码可得到txt,内容如下:图片路径,xmin,ymin,xmax,ymax,classID
-
- import glob
- import os
-
-
- #批量处理,txt存放文件夹
- txtpathdir = r'D:\Yolov8\ultralytics-main\datasets\mydata\selfv8txt\err04_v8.txt'
- #保存txt路径
- savetxtpath = r'D:\Yolov8\ultralytics-main\datasets\mydata\labels\train'
-
-
- file = open(txtpathdir,'r',encoding='utf-8')
- lines = file.readlines()
- for line in lines:
- line = line.split('\n')[0]
- # print(line)
- imgdir = line.split(' ')[0]
- bboxinfo = line.split(' ')[1:]
- # print('imgdir',imgdir)
- savetxtdir = os.path.join(savetxtpath,imgdir.split('\\')[-1].replace('.jpg','.txt'))
- file = open(savetxtdir, 'w', encoding='utf-8')
- # print(savetxtdir)
- for i in range(len(bboxinfo)):
- # print(bboxinfo[i])
- info = bboxinfo[i].split(',')
- # print(info)
- info1 = ' '.join(info)
- print(info1)
- file.write(info1+'\n')
-
运行以上代码,即得到需要的txt
(根据自己需求,因为我的图片类别多,存放位置也不同,所以需要找到对应的图片用于训练)
-
- import glob
- import os
- import shutil
-
- txtpathdir = r'D:\Yolov8\ultralytics-main\datasets\mydata\cocotxt\train2017_v8.txt'
- saveimgpath = r'D:\Yolov8\ultralytics-main\datasets\mydata\images\train'
- file = open(txtpathdir,'r',encoding='utf-8')
- lines = file.readlines()
- for line in lines:
- line = line.split('\n')[0]
- imgdir = line.split(' ')[0]
- print('imgdir',imgdir)
- saveimgdir = os.path.join(saveimgpath,imgdir.split('\\')[-1])
- print(saveimgdir)
-
- shutil.copy(imgdir,saveimgdir)
-
-
上述步骤可根据实际情况进行调整,这样就得到了可用于训练的数据,train中存放训练数据,val存放验证集。
labels中txt存放的数据格式如下(与官方一致):
分别代表:类别、框的中心点xy坐标以及框的宽高(进行了归一化处理)
所有参数参见:https://docs.ultralytics.com/usage/cfg/
根据自己的数据集位置进行修改和配置。
- path: D:\Yolov8\ultralytics-main\datasets\mydata # dataset root dir
- train: images/train # train images (relative to 'path') 118287 images
- val: images/val # val images (relative to 'path') 5000 images
- #test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
-
- # Classes
- names:
- 0: person
- 1: cat
- 2: dog
- nc: 3
根据自己想使用的权重进行选择,我这里采用的是yolov8s.pt进行训练,类别为3。
- # Ultralytics YOLO 声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Monodyee/article/detail/337732推荐阅读
相关标签
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。