赞
踩
X-AnyLabeling
是一款全新的交互式自动标注工具,其基于AnyLabeling
进行构建和二次开发,在此基础上扩展并支持了许多的模型和功能,并借助Segment Anything
和YOLO
等主流模型提供强大的 AI 支持。无须任何复杂配置,下载即用,支持自定义模型,极大提升用户标注效率!相较于LabelImg 和 Labelme 效率更高。
https://github.com/CVHub520/X-AnyLabeling
使用教程:https://zhuanlan.zhihu.com/p/636164570
标签文件的格式是json, 会存放在图片文件里。
注意:不要使用脚本直接将json文件转化成txt文件。
在这段代码中,需要根据实际情况更改路径。Annotations文件夹会自动创建。如果你的图像数据集不是jpg,也需要修改代码。
- import os
- import xml.dom
- import numpy as np
- import codecs
- import json
- import glob
- import cv2
- import shutil
-
- # 1.标签路径
- labelme_path = r"D:/SoftWareSpace/stu_py/stu_py/img/goodApple" # 原始json、bmp标注数据路径,需要更换成自己的数据集名称
- saved_path = r"D:/SoftWareSpace/stu_py/stu_py/img/Annotations" # 保存路径
- if not os.path.exists(saved_path):
- os.makedirs(saved_path)
-
- # 2.获取待处理文件
- files = glob.glob("%s/*.json" % (labelme_path))
-
- # 3.读取标注信息并写入 xml
- for json_filename in files:
- json_file = json.load(open(json_filename, "r", encoding="utf-8"))
- i = 0
- # 图像名字,若图像格式不是bmp,需要修改此处
- img_name = json_filename.replace(".json", ".jpg")
- height, width, channels = cv2.imread(img_name).shape
- # xml名字
- xmlName = os.path.join(saved_path, json_filename.split("\\")[-1].replace(".json", ".xml"))
-
- with codecs.open(xmlName, "w", "utf-8") as xml:
- print(2)
- xml.write('<annotation>\n')
- xml.write('\t<folder>' + 'jpg' + '</folder>\n')
- xml.write('\t<filename>' + img_name + '</filename>\n')
- # -------------------------------------------------
- xml.write('\t<source>\n')
- xml.write('\t\t<database>hulan</database>\n')
-
- # --------------------------------------------------
- xml.write('\t</source>\n')
- # -----------------------------------------------------------
- xml.write('\t<size>\n')
- xml.write('\t\t<width>' + str(width) + '</width>\n')
- xml.write('\t\t<height>' + str(height) + '</height>\n')
- xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
- # ------------------------------------------------
- xml.write('\t</size>\n')
- xml.write('\t\t<segmented>0</segmented>\n')
-
- # 节点判断
- for multi in json_file["shapes"]:
- points = np.array(multi["points"])
- xmin = min(points[:, 0])
- xmax = max(points[:, 0])
- ymin = min(points[:, 1])
- ymax = max(points[:, 1])
- label = multi["label"]
- if xmax <= xmin:
- pass
- elif ymax <= ymin:
- pass
- else:
- xml.write('\t<object>\n')
- xml.write('\t\t<name>' + json_file["shapes"][i]["label"] + '</name>\n')
- xml.write('\t\t<pose>Unspecified</pose>\n')
- xml.write('\t\t<truncated>0</truncated>\n')
- xml.write('\t\t<difficult>0</difficult>\n')
- xml.write('\t\t<bndbox>\n')
- xml.write('\t\t\t<xmin>' + str(xmin) + '</xmin>\n')
- xml.write('\t\t\t<ymin>' + str(ymin) + '</ymin>\n')
- xml.write('\t\t\t<xmax>' + str(xmax) + '</xmax>\n')
- xml.write('\t\t\t<ymax>' + str(ymax) + '</ymax>\n')
- xml.write('\t\t</bndbox>\n')
- xml.write('\t</object>\n')
- print(json_filename, xmin, ymin, xmax, ymax, label)
- i = i + 1
- xml.write('</annotation>')
注意:路径需要根据实际路径进行更改。
- import xml.dom.minidom as xmldom
- import os
-
- # voc数据集获取所有标签的所有类别数"
- annotation_path = "D:/SoftWareSpace/stu_py/stu_py/img/Annotation/"#需要根据实际路径更改
-
- annotation_names = [os.path.join(annotation_path, i) for i in os.listdir(annotation_path)]
-
- labels = list()
- for names in annotation_names:
- xmlfilepath = names
- domobj = xmldom.parse(xmlfilepath)
- # 得到元素对象
- elementobj = domobj.documentElement
- # 获得子标签
- subElementObj = elementobj.getElementsByTagName("object")
- for s in subElementObj:
- label = s.getElementsByTagName("name")[0].firstChild.data
- # print(label)
- if label not in labels:
- labels.append(label)
- print(labels)
注意:这里存放txt文件的文件夹需要自己创建,classes需要修改成2.3代码所获取的labels。
- import xml.etree.ElementTree as ET
- import os
-
- classes = ['apple'] # 类别
- CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
-
- def convert(size, box):
- dw = 1. / size[0]
- dh = 1. / size[1]
- x = (box[0] + box[1]) / 2.0 # (x_min + x_max) / 2.0
- y = (box[2] + box[3]) / 2.0 # (y_min + y_max) / 2.0
- w = box[1] - box[0] # x_max - x_min
- h = box[3] - box[2] # y_max - y_min
- x = x * dw
- w = w * dw
- y = y * dh
- h = h * dh
- return (x, y, w, h)
-
- def convert_annotation(image_id):
- in_file = open('D:/SoftWareSpace/stu_py/stu_py/img/Annotation\%s.xml' % (image_id), encoding='UTF-8')
-
- out_file = open('D:/SoftWareSpace/stu_py/stu_py/img/labels\%s.txt' % (image_id), 'w') # 生成txt格式文件
- tree = ET.parse(in_file)
- root = tree.getroot()
- size = root.find('size')
- w = int(size.find('width').text)
- h = int(size.find('height').text)
-
- for obj in root.iter('object'):
- cls = obj.find('name').text
- # print(cls)
- if cls not in classes:
- continue
- cls_id = classes.index(cls)
- xmlbox = obj.find('bndbox')
- b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
- float(xmlbox.find('ymax').text))
- bb = convert((w, h), b)
- out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
-
- xml_path = os.path.join(CURRENT_DIR, 'D:/SoftWareSpace/stu_py/stu_py/img/Annotation')
-
- # xml list
- img_xmls = os.listdir(xml_path)
- for img_xml in img_xmls:
- label_name = img_xml.split('.')[0]
- print(label_name)
- convert_annotation(label_name)
这段代码会把数据集拆开,拆成train,val,test
新建文件夹 datasets/data/images
然后将图片和标签全部放到这个文件夹下。
在datasets下新建一个分配数据集数据的脚本文件train.py
- import os
- import random
- import shutil
-
- def split_dataset(data_dir,train_val_test_dir, train_ratio, val_ratio, test_ratio):
- # 创建目标文件夹
- train_dir = os.path.join(train_val_test_dir, 'train')
- val_dir = os.path.join(train_val_test_dir, 'val')
- test_dir = os.path.join(train_val_test_dir, 'test')
- os.makedirs(train_dir, exist_ok=True)
- os.makedirs(val_dir, exist_ok=True)
- os.makedirs(test_dir, exist_ok=True)
-
- # 获取数据集中的所有文件
- files = os.listdir(data_dir)
-
- # 过滤掉非图片文件
- image_files = [f for f in files if f.endswith('.jpg') or f.endswith('.png')]
- # 随机打乱文件列表
- random.shuffle(image_files)
-
- # 计算切分数据集的索引
- num_files = len(image_files)
- num_train = int(num_files * train_ratio)
- num_val = int(num_files * val_ratio)
- num_test = num_files - num_train - num_val
-
- # 分离训练集
- train_files = image_files[:num_train]
- for file in train_files:
- src_image_path = os.path.join(data_dir, file)
- src_label_path = os.path.join(data_dir, file.replace('.jpg', '.txt').replace('.png', '.txt'))
- dst_image_path = os.path.join(train_dir, file)
- dst_label_path = os.path.join(train_dir, file.replace('.jpg', '.txt').replace('.png', '.txt'))
- shutil.copy(src_image_path, dst_image_path)
- shutil.copy(src_label_path, dst_label_path)
-
- # 分离验证集
- val_files = image_files[num_train:num_train+num_val]
- for file in val_files:
- src_image_path = os.path.join(data_dir, file)
- src_label_path = os.path.join(data_dir, file.replace('.jpg', '.txt').replace('.png', '.txt'))
- dst_image_path = os.path.join(val_dir, file)
- dst_label_path = os.path.join(val_dir, file.replace('.jpg', '.txt').replace('.png', '.txt'))
- shutil.copy(src_image_path, dst_image_path)
- shutil.copy(src_label_path, dst_label_path)
-
- # 分离测试集
- test_files = image_files[num_train+num_val:]
- for file in test_files:
- src_image_path = os.path.join(data_dir, file)
- src_label_path = os.path.join(data_dir, file.replace('.jpg', '.txt').replace('.png', '.txt'))
- dst_image_path = os.path.join(test_dir, file)
- dst_label_path = os.path.join(test_dir, file.replace('.jpg', '.txt').replace('.png', '.txt'))
- shutil.copy(src_image_path, dst_image_path)
- shutil.copy(src_label_path, dst_label_path)
-
- print("数据集分离完成!")
- print(f"训练集数量:{len(train_files)}")
- print(f"验证集数量:{len(val_files)}")
- print(f"测试集数量:{len(test_files)}")
-
- def move_files(data_dir):
- # 创建目标文件夹
- images_dir = os.path.join(data_dir, 'images')
- labels_dir = os.path.join(data_dir, 'labels')
- os.makedirs(images_dir, exist_ok=True)
- os.makedirs(labels_dir, exist_ok=True)
-
- # 获取数据集中的所有文件
- files = os.listdir(data_dir)
-
- # 移动PNG文件到images文件夹
- png_files = [f for f in files if f.endswith('.jpg')]
- for file in png_files:
- src_path = os.path.join(data_dir, file)
- dst_path = os.path.join(images_dir, file)
- shutil.move(src_path, dst_path)
-
- # 移动TXT文件到labels文件夹
- txt_files = [f for f in files if f.endswith('.txt')]
- for file in txt_files:
- src_path = os.path.join(data_dir, file)
- dst_path = os.path.join(labels_dir, file)
- shutil.move(src_path, dst_path)
-
- print(f"{data_dir}文件移动完成!")
- print(f"总共移动了 {len(png_files)} 个PNG文件到images文件夹")
- print(f"总共移动了 {len(txt_files)} 个TXT文件到labels文件夹")
-
-
- # 设置数据集路径和切分比例
- data_dir = './data/images' # 图片和标签路径
- train_val_test_dir= './data' # 目标文件夹
- train_ratio = 0.7 # 训练集比例
- val_ratio = 0.2 # 验证集比例
- test_ratio = 0.1 # 测试集比例
-
- # 调用函数分离数据集
- split_dataset(data_dir, train_val_test_dir,train_ratio, val_ratio, test_ratio)
- # 调用函数移动文件
- move_files(os.path.join(train_val_test_dir, 'train'))
- move_files(os.path.join(train_val_test_dir, 'val'))
- move_files(os.path.join(train_val_test_dir, 'test'))
-
3.在datasets同级目录下新建一个mydata.yaml文件
- train: ./data/train/images
- val: ./data/val/images
- test: ./data/test/images
-
- # 类别数
- nc: 2
-
- # 类别名称
- names: ["goodapple","badapple"]
在终端执行以下命令
yolo task=detect mode=train model=yolov8.yaml data=mydata.yaml epochs=1000 batch=16 device=0
data:需要改成mydata.yaml文件的路径,
epochs:指的是训练轮数,可以根据实际修改
device:0指的是GPU,如果没有GPU,也可以将0改成cpu
yolo detect predict model=yolov8n.pt source='badimg'
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。