当前位置:   article > 正文

yolov5 8 labelme labelimg数据标注 并生成训练数据集 训练_yolov5带label标记的数据集

yolov5带label标记的数据集

一.数据集准备 文件夹结构,数据集标注

  1. 创建一个data 文件夹
  2. 在data文件夹下创建一个images 文件夹
  3. 将所有图片数据放入images文件夹下

使用labelme或者labelimg标注数据

  1. python环境下使用 pip install labelme 安装labelme,使用 pip install labelimg 安装labelimg
  2. 在cmd 中使用命令 labelme 或者 labelimg命令打开软件
  3. 进行标注,将标注文件保存在图片的相同目录下面
  4. 将标注文件和原图都放在images文件夹

标注完成后 images 文件夹下 存在原图和标注的json 文件或者xml文件

在这里插入图片描述

二.转换为yolo 数据集

然后运行以下代码 修改为自己的类别, 以及比例。
会直接生成labels 标签和train,val txt文件

1. labelimg 转换

import json
import cv2
import numpy as np
import glob
import os
import xml.etree.ElementTree as ET

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_annotation(t):
    ishas = False
    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        tree = ET.parse(ft)
        root = tree.getroot()
        
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)


        
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls in class_names:
                ishas = True
        if not ishas:return ishas

        with open("labels/"+basename + ".txt", 'w') as fa:
            for obj in root.iter('object'):
                cls = obj.find('name').text
                if cls not in class_names:continue
                class_id  = class_names.index(cls)
                
                xmlbox = obj.find('bndbox')
                x1,x2,y1,y2 = float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),float(xmlbox.find('ymax').text)
                
                print(x1,x2,y1,y2,width,height)
                x_center = (x1 + x2) / 2 / width
                y_center = (y1 + y2) / 2 / height
                w = abs(x2 - x1) / width
                h = abs(y2 - y1) / height
                print(x_center,y_center,w,h)
                fa.write(f"{class_id} {x_center} {y_center} {w} {h}\n")

    return ishas

# 改为自己的类别
class_names = ['persona']

if __name__=="__main__":
    # 文件列表
    xml_list = glob.glob("images/*.xml")
    np.random.shuffle(xml_list)
    trains,vals = split_by_ratio(xml_list,0.7,0.3)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            
            ishas = convert_annotation(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            
            ishas = convert_annotation(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)




  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94

2. labelme 转换 目标检测数据集

import json
import cv2
import numpy as np
import glob
import os

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_json(t):

    ishas = False

    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        data = json.load(ft)
            
        for shape in data['shapes']:
            if shape['label'] in class_names:
                ishas = True
        if not ishas:return ishas

        height = data["imageHeight"]
        width = data["imageWidth"]
        with open("labels/"+basename+ ".txt", 'w') as fa:
            for shape in data['shapes']:
                assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
                class_id  = class_names.index(shape['label'])

                x1, y1 = shape['points'][0]
                x2, y2 = shape['points'][1]
                x_center = (x1 + x2) / 2 / width
                y_center = (y1 + y2) / 2 / height
                width = abs(x2 - x1) / width
                height = abs(y2 - y1) / height

                fa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


    return ishas
# 类别
class_names = ['glass']

if __name__=="__main__":
    # 文件列表
    json_list = glob.glob("images/*.json")
    np.random.shuffle(json_list)
    trains,vals = split_by_ratio(json_list,0.9,0.1)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)


  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81

3. labelme 转换 目标分割数据集

import json
import cv2
import numpy as np
import glob
import os

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

def convert_json(t):

    ishas = False

    basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
    with open(t, 'r', encoding='utf-8') as ft:
        data = json.load(ft)
            
        for shape in data['shapes']:
            if shape['label'] in class_names:
                ishas = True
        if not ishas:return ishas

        height = data["imageHeight"]
        width = data["imageWidth"]
        with open("labels/"+basename+ ".txt", 'w') as fa:
            s="" # 用来储藏txt中的内容
            for shape in data["shapes"]: # 遍历数据集中每一个分割子类
                assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
                class_id  = class_names.index(shape['label'])

                s = s+str(class_id)+" "

                points = shape["points"]
                for point in points:
                    s=s+str(point[0]/width)+" "
                    s=s+str(point[1]/height)+" "
                s = s[:-1]+"\n"

            fa.write(s)


    return ishas
# 类别
class_names = ['glass']

if __name__=="__main__":
    # 文件列表
    json_list = glob.glob("images/*.json")
    np.random.shuffle(json_list)
    trains,vals = split_by_ratio(json_list,0.7,0.3)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                # yololabels
                out_txt_file = "../data/images/" +basename + ".jpg\n"
                f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            ishas = convert_json(t)
            if ishas:
                out_txt_file = "../data/images/" + basename+ ".jpg\n"
                f.write(out_txt_file)



  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84

三. 数据集yaml 文件

yolov5的数据配置yaml文件

train: ../data/train.txt   # 此路径为相对路径, 如果运行路径在yolov5 文件夹下 就不需要path 路径
val: ../data/val.txt  

nc: 1 # number of classes
names: ['bird']  # class names

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6

yolov8的数据配置yaml文件

因为yolov8 采用pip直接安装 所以需要知道path 路径,此时的train 文件就是相对于path路径。
如果是像yolov5一样clone仓库 也可以采用yolov5 的写法,是通用的。

path: E:\\Backup\\Desktop\\yolov8-acne\\data 
train: train.txt  
val: val.txt  

# Classes
names:
  0: acne_white
  1: acne_red
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8

四. 训练

1. yolov5


python train.py --weights yolov5n.pt --data data/my.yaml --cfg models/yolov5n.yaml --imgsz 640 --batch-size -1 --epochs 300  --cos-lr --patience 10  --name yolov5n

  • 1
  • 2
  • 3

2. yolov8

采用python 文件的方式训练
注意自定义网络结构,命名不需要带后面的 n,s 。在代码里面添加 会自带去识别。

from ultralytics import YOLO
if __name__=='__main__':

	# 目标检测
    # Create a new YOLO model from scratch
	model = YOLO('yolov8n.yaml')  # build a new model from YAML
	model = YOLO('yolov8n.pt')  # load a pretrained model (recommended for training)
	model = YOLO('yolov8n.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
		
	# 目标分割
	# Load a model
	model = YOLO('yolov8n-seg.yaml')  # build a new model from YAML
	model = YOLO('yolov8n-seg.pt')  # load a pretrained model (recommended for training)
	model = YOLO('yolov8n-seg.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
    # Train the model using the 'coco128.yaml' dataset for 3 epochs
    results = model.train(data='data/my.yaml', epochs=300,batch=16,workers=1,imgsz=640)

    # Evaluate the model's performance on the validation set
    results = model.val()

    # Export the model to ONNX format
    success = model.export(format='onnx', opset=12,imgsz=640,simplify=True,half=True)


  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24

训练的参数
在这里插入图片描述

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/知新_RL/article/detail/418033
推荐阅读
相关标签
  

闽ICP备14008679号