当前位置:   article > 正文

YOLov5 分割数据集的制作+划分(详细过程+代码)_yolo人像分割数据集


1.labelme 的使用





  1. import shutil
  2. # 设置源文件和目标文件夹路径
  3. source_file = 'E:\\data_seg\\quanjingCameraPicture\\segdata0001.json'
  4. target_folder = 'E:\\data_seg\\quanjingCameraPicture\\'
  5. # 循环从2到2275
  6. for i in range(2, 2276):
  7. # 创建目标文件名
  8. target_file = target_folder + 'segdata' + str(i).zfill(4) + '.json'
  9. # 复制源文件到目标文件
  10. shutil.copyfile(source_file, target_file)



由于是直接复制第一张图片生成的json ,所以,其中的字典值    

imagePath 和imageData 需要做出相应的替换。 


1.修改 imagePath ---的相对应的图片名称
  1. import json
  2. import os
  3. # 设置文件夹路径
  4. folder_path = 'E:\\data_seg\\quanjingCameraPicture\\'
  5. # 循环从2到2275
  6. for i in range(2, 2276):
  7. # 创建文件名
  8. file_name = 'segdata' + str(i).zfill(4) + '.json'
  9. file_path = os.path.join(folder_path, file_name)
  10. # 读取文件
  11. with open(file_path, 'r') as json_file:
  12. data = json.load(json_file)
  13. # 修改参数
  14. data['imagePath'] = 'segdata' + str(i).zfill(4) + '.jpg'
  15. # 重新写入文件
  16. with open(file_path, 'w') as json_file:
  17. json.dump(data, json_file, indent=4)
2.修改 imageData ---的相对应的图片的字节流时的信息,
  1. import json
  2. import os
  3. import base64
  4. from PIL import Image
  5. import io
  6. # 设置文件夹路径
  7. folder_path = 'E:\\data_seg\\quanjingCameraPicture\\'
  8. # 循环从2到2275
  9. for i in range(2, 2276):
  10. # 创建文件名
  11. json_file_name = 'segdata' + str(i).zfill(4) + '.json'
  12. img_file_name = 'segdata' + str(i).zfill(4) + '.jpg'
  13. json_file_path = os.path.join(folder_path, json_file_name)
  14. img_file_path = os.path.join(folder_path, img_file_name)
  15. # 将图像转换为Base64编码的字节流
  16. with Image.open(img_file_path) as img:
  17. buf = io.BytesIO()
  18. img.save(buf, format='JPEG')
  19. base64_data = base64.b64encode(buf.getvalue()).decode()
  20. # 读取JSON文件
  21. with open(json_file_path, 'r') as json_file:
  22. data = json.load(json_file)
  23. # 修改参数
  24. data['imageData'] = base64_data
  25. # 重新写入文件
  26. with open(json_file_path, 'w') as json_file:
  27. json.dump(data, json_file, indent=4)


3. Labelme格式数据转为COCO格式

这部分,首先需要将Labelme标注的数据(Json格式数据)转换为COCO格式数据(将所有的Labelme标注的Json数据合并到一个COCO json文件中)。废话不多说,直接上代码labelme2coco.py:

  1. # Labelme格式数据转为COCO格式
  2. # !/usr/bin/env python
  3. import argparse
  4. import collections
  5. import datetime
  6. import glob
  7. import json
  8. import os
  9. import os.path as osp
  10. import sys
  11. import uuid
  12. import imgviz
  13. import numpy as np
  14. import labelme
  15. try:
  16. import pycocotools.mask
  17. except ImportError:
  18. print("Please install pycocotools:\n\n pip install pycocotools\n")
  19. sys.exit(1)
  20. def main():
  21. parser = argparse.ArgumentParser(
  22. formatter_class=argparse.ArgumentDefaultsHelpFormatter
  23. )
  24. # --input_dir为存放labelme标注json文件所在的文件夹
  25. parser.add_argument("--input_dir", type=str, default="E:/data_seg/quanjingCameraPicture",
  26. help="input annotated directory")
  27. # --output_dir为输出COCO格式的json文件的文件夹
  28. parser.add_argument("--output_dir", type=str,
  29. default="E:/data_seg/quanjingCameraPicture/coco_json",
  30. help="output dataset directory")
  31. # --labels参数为一个txt文本文件,其中包含标注的所有类别,按行分开
  32. parser.add_argument("--labels", type=str, default="E:/data_seg/quanjingCameraPicture/labels.txt", help="labels file")
  33. parser.add_argument(
  34. "--noviz", help="no visualization", action="store_true"
  35. )
  36. args = parser.parse_args()
  37. if osp.exists(args.output_dir):
  38. print("Output directory already exists:", args.output_dir)
  39. sys.exit(1)
  40. os.makedirs(args.output_dir)
  41. os.makedirs(osp.join(args.output_dir, "JPEGImages"))
  42. if not args.noviz:
  43. os.makedirs(osp.join(args.output_dir, "Visualization"))
  44. print("Creating dataset:", args.output_dir)
  45. now = datetime.datetime.now()
  46. data = dict(
  47. info=dict(
  48. description=None,
  49. url=None,
  50. version=None,
  51. year=now.year,
  52. contributor=None,
  53. date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
  54. ),
  55. licenses=[
  56. dict(
  57. url=None,
  58. id=0,
  59. name=None,
  60. )
  61. ],
  62. images=[
  63. # license, url, file_name, height, width, date_captured, id
  64. ],
  65. type="instances",
  66. annotations=[
  67. # segmentation, area, iscrowd, image_id, bbox, category_id, id
  68. ],
  69. categories=[
  70. # supercategory, id, name
  71. ],
  72. )
  73. errcnt = 0
  74. class_name_to_id = {}
  75. for i, line in enumerate(open(args.labels).readlines()):
  76. class_id = i # starts with -1
  77. class_name = line.strip()
  78. if class_id == -1:
  79. assert class_name == "__ignore__"
  80. continue
  81. class_name_to_id[class_name] = class_id
  82. data["categories"].append(
  83. dict(
  84. supercategory=None,
  85. id=class_id,
  86. name=class_name,
  87. )
  88. )
  89. out_ann_file = osp.join(args.output_dir, "annotations.json")
  90. label_files = glob.glob(osp.join(args.input_dir, "*.json"))
  91. # label_files = label_files1[1500:]
  92. # print(type(label_files))
  93. # print(label_files)
  94. for image_id, filename in enumerate(label_files):
  95. print("Generating dataset from:", filename)
  96. try:
  97. label_file = labelme.LabelFile(filename=filename)
  98. except:
  99. errcnt += 1
  100. print("出现异常")
  101. continue
  102. base = osp.splitext(osp.basename(filename))[0]
  103. out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
  104. img = labelme.utils.img_data_to_arr(label_file.imageData)
  105. imgviz.io.imsave(out_img_file, img)
  106. data["images"].append(
  107. dict(
  108. license=0,
  109. url=None,
  110. file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),
  111. height=img.shape[0],
  112. width=img.shape[1],
  113. date_captured=None,
  114. id=image_id,
  115. )
  116. )
  117. masks = {} # for area
  118. segmentations = collections.defaultdict(list) # for segmentation
  119. for shape in label_file.shapes:
  120. points = shape["points"]
  121. label = shape["label"]
  122. group_id = shape.get("group_id")
  123. shape_type = shape.get("shape_type", "polygon")
  124. mask = labelme.utils.shape_to_mask(
  125. img.shape[:2], points, shape_type
  126. )
  127. if group_id is None:
  128. group_id = uuid.uuid1()
  129. instance = (label, group_id)
  130. if instance in masks:
  131. masks[instance] = masks[instance] | mask
  132. else:
  133. masks[instance] = mask
  134. if shape_type == "rectangle":
  135. (x1, y1), (x2, y2) = points
  136. x1, x2 = sorted([x1, x2])
  137. y1, y2 = sorted([y1, y2])
  138. points = [x1, y1, x2, y1, x2, y2, x1, y2]
  139. if shape_type == "circle":
  140. (x1, y1), (x2, y2) = points
  141. r = np.linalg.norm([x2 - x1, y2 - y1])
  142. # r(1-cos(a/2))<x, a=2*pi/N => N>pi/arccos(1-x/r)
  143. # x: tolerance of the gap between the arc and the line segment
  144. n_points_circle = max(int(np.pi / np.arccos(1 - 1 / r)), 12)
  145. i = np.arange(n_points_circle)
  146. x = x1 + r * np.sin(2 * np.pi / n_points_circle * i)
  147. y = y1 + r * np.cos(2 * np.pi / n_points_circle * i)
  148. points = np.stack((x, y), axis=1).flatten().tolist()
  149. else:
  150. points = np.asarray(points).flatten().tolist()
  151. segmentations[instance].append(points)
  152. segmentations = dict(segmentations)
  153. for instance, mask in masks.items():
  154. cls_name, group_id = instance
  155. if cls_name not in class_name_to_id:
  156. continue
  157. cls_id = class_name_to_id[cls_name]
  158. mask = np.asfortranarray(mask.astype(np.uint8))
  159. mask = pycocotools.mask.encode(mask)
  160. area = float(pycocotools.mask.area(mask))
  161. bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
  162. data["annotations"].append(
  163. dict(
  164. id=len(data["annotations"]),
  165. image_id=image_id,
  166. category_id=cls_id,
  167. segmentation=segmentations[instance],
  168. area=area,
  169. bbox=bbox,
  170. iscrowd=0,
  171. )
  172. )
  173. if not args.noviz:
  174. viz = img
  175. if masks:
  176. labels, captions, masks = zip(
  177. *[
  178. (class_name_to_id[cnm], cnm, msk)
  179. for (cnm, gid), msk in masks.items()
  180. if cnm in class_name_to_id
  181. ]
  182. )
  183. viz = imgviz.instances2rgb(
  184. image=img,
  185. labels=labels,
  186. masks=masks,
  187. captions=captions,
  188. font_size=15,
  189. line_width=2,
  190. )
  191. out_viz_file = osp.join(
  192. args.output_dir, "Visualization", base + ".jpg"
  193. )
  194. imgviz.io.imsave(out_viz_file, viz)
  195. with open(out_ann_file, "w") as f:
  196. json.dump(data, f)
  197. print(errcnt)
  198. if __name__ == "__main__":
  199. main()







general_json2yolo.py 第二处修改 



还需要修改的是utils.py 中需要修改





  1. import glob
  2. import shutil
  3. import random
  4. import os
  5. def split_dataset():
  6. # txt_path 路径存放的是图片和txt标注数据混合的文件夹路径
  7. txt_path = r"E://yolov5-6.0//json2yolo-master//new_dir//images"
  8. txt_files = glob.glob(txt_path + "/*.txt")
  9. # 输出训练集和验证集图片的文件夹
  10. images_base_dir = r"E://yolov5-6.0//json2yolo-master//new_dir//outputs//images"
  11. # 输出训练集和验证集标注txt的文件夹
  12. labels_base_dir = r"E://yolov5-6.0//json2yolo-master//new_dir//outputs//labels"
  13. # 训练集图片文件夹
  14. images_train_dir = os.path.join(images_base_dir, "train")
  15. # 训练集标注文件夹
  16. labels_train_dir = os.path.join(labels_base_dir, "train")
  17. # 验证集图片文件夹
  18. images_val_dir = os.path.join(images_base_dir, "val")
  19. # 验证集标注文件夹
  20. labels_val_dir = os.path.join(labels_base_dir, "val")
  21. # 生成所需4个文件夹
  22. [os.mkdir(dir_path) for dir_path in [images_train_dir, labels_train_dir, images_val_dir, labels_val_dir]]
  23. # 验证集数据的比例,可以自定义成任何你所需要的比例
  24. val_rate = 0.2
  25. for txt_ori_path in txt_files:
  26. fpath, fname = os.path.split(txt_ori_path) # 分离文件名和路径
  27. if random.randint(1, 5) == 10 * val_rate:
  28. # 验证集数据
  29. txt_dst_path = os.path.join(labels_val_dir, fname)
  30. img_dst_path = os.path.join(images_val_dir, fname.replace(".txt", ".jpg"))
  31. else:
  32. # 训练集
  33. txt_dst_path = os.path.join(labels_train_dir, fname)
  34. img_dst_path = os.path.join(images_train_dir, fname.replace(".txt", ".jpg"))
  35. # 执行复制
  36. # 图片都是jpg,且和原始txt文件在同一个目录,所以可以这么写
  37. img_ori_path = txt_ori_path.replace(".txt", ".jpg")
  38. # 移动标注文件
  39. shutil.copy(txt_ori_path, txt_dst_path)
  40. # 移动图片文件
  41. shutil.copy(img_ori_path, img_dst_path)
  42. if __name__ == "__main__":
  43. split_dataset()

后续更新在qt 中如何使用。

