当前位置:   article > 正文

有手就行的自定义制作coco、voc、yolo格式数据集_voc数据集制作软件

voc数据集制作软件

目录

1. 准备工作

2. 开始打标注  

 3. 转换为coco格式

4. 转换为voc格式 

5. 转换为yolo格式


1. 准备工作

    (1)安装软件labelme,自行安装,不再说明。

    (2)准备好原始图片,本文以10张图片如下图所示,用labelme软件打标签

2. 开始打标注  

         首先打开labelme软件 ,然后打开目录,定位到你数据源所在的文件夹

                               

 用矩形框打标注,这里只标注两个类别:mask、person

 

打完标注后保存图片,保存后得到的json文件名字要和图片名字一次,点击NextImage继续下一张图片打标注,直至所有图片打完标注。

 待所有10张图片打完标签,得到如下结果,接着需要根据json文件分别转换为coco、voc、yolo格式的数据集。

 

 3. 转换为coco格式

     转换cooc格式,把上面的json文件复制到原来的图片文件夹source-ing中,因为需要把图片划分为训练集和验证集。

 下面是转换代码json -> coco

  1. import os
  2. import json
  3. import numpy as np
  4. import glob
  5. import shutil
  6. import cv2
  7. from sklearn.model_selection import train_test_split
  8. np.random.seed(41)
  9. classname_to_id = {
  10. "mask": 0, #改成自己的类别
  11. "person": 1
  12. }
  13. class Lableme2CoCo:
  14. def __init__(self):
  15. self.images = []
  16. self.annotations = []
  17. self.categories = []
  18. self.img_id = 0
  19. self.ann_id = 0
  20. def save_coco_json(self, instance, save_path):
  21. json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1) # indent=2 更加美观显示
  22. # 由json文件构建COCO
  23. def to_coco(self, json_path_list):
  24. self._init_categories()
  25. for json_path in json_path_list:
  26. obj = self.read_jsonfile(json_path)
  27. self.images.append(self._image(obj, json_path))
  28. shapes = obj['shapes']
  29. for shape in shapes:
  30. annotation = self._annotation(shape)
  31. self.annotations.append(annotation)
  32. self.ann_id += 1
  33. self.img_id += 1
  34. instance = {}
  35. instance['info'] = 'spytensor created'
  36. instance['license'] = ['license']
  37. instance['images'] = self.images
  38. instance['annotations'] = self.annotations
  39. instance['categories'] = self.categories
  40. return instance
  41. # 构建类别
  42. def _init_categories(self):
  43. for k, v in classname_to_id.items():
  44. category = {}
  45. category['id'] = v
  46. category['name'] = k
  47. self.categories.append(category)
  48. # 构建COCO的image字段
  49. def _image(self, obj, path):
  50. image = {}
  51. from labelme import utils
  52. img_x = utils.img_b64_to_arr(obj['imageData'])
  53. h, w = img_x.shape[:-1]
  54. image['height'] = h
  55. image['width'] = w
  56. image['id'] = self.img_id
  57. image['file_name'] = os.path.basename(path).replace(".json", ".jpg")
  58. return image
  59. # 构建COCO的annotation字段
  60. def _annotation(self, shape):
  61. # print('shape', shape)
  62. label = shape['label']
  63. points = shape['points']
  64. annotation = {}
  65. annotation['id'] = self.ann_id
  66. annotation['image_id'] = self.img_id
  67. annotation['category_id'] = int(classname_to_id[label])
  68. annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
  69. annotation['bbox'] = self._get_box(points)
  70. annotation['iscrowd'] = 0
  71. annotation['area'] = 1.0
  72. return annotation
  73. # 读取json文件,返回一个json对象
  74. def read_jsonfile(self, path):
  75. with open(path, "r", encoding='utf-8') as f:
  76. return json.load(f)
  77. # COCO的格式: [x1,y1,w,h] 对应COCO的bbox格式
  78. def _get_box(self, points):
  79. min_x = min_y = np.inf
  80. max_x = max_y = 0
  81. for x, y in points:
  82. min_x = min(min_x, x)
  83. min_y = min(min_y, y)
  84. max_x = max(max_x, x)
  85. max_y = max(max_y, y)
  86. return [min_x, min_y, max_x - min_x, max_y - min_y]
  87. #训练过程中,如果遇到Index put requires the source and destination dtypes match, got Long for the destination and Int for the source
  88. #参考:https://github.com/open-mmlab/mmdetection/issues/6706
  89. if __name__ == '__main__':
  90. labelme_path = "./source-img" #json和图片的存放目录
  91. saved_coco_path = "./data-" #生成coco格式数据的保存文件夹名字
  92. print('reading...')
  93. # 创建文件
  94. if not os.path.exists("%scoco/annotations/" % saved_coco_path):
  95. os.makedirs("%scoco/annotations/" % saved_coco_path)
  96. if not os.path.exists("%scoco/images/train/" % saved_coco_path):
  97. os.makedirs("%scoco/images/train" % saved_coco_path)
  98. if not os.path.exists("%scoco/images/val/" % saved_coco_path):
  99. os.makedirs("%scoco/images/val" % saved_coco_path)
  100. # 获取images目录下所有的joson文件列表
  101. print(labelme_path + "/*.json")
  102. json_list_path = glob.glob(labelme_path + "/*.json")
  103. print('json_list_path: ', len(json_list_path))
  104. # 数据划分,这里没有区分val2017和tran2017目录,所有图片都放在images目录下
  105. train_path, val_path = train_test_split(json_list_path, test_size=0.2, train_size=0.8)
  106. print("train_n:", len(train_path), 'val_n:', len(val_path))
  107. # 把训练集转化为COCO的json格式
  108. l2c_train = Lableme2CoCo()
  109. train_instance = l2c_train.to_coco(train_path)
  110. l2c_train.save_coco_json(train_instance, '%scoco/annotations/instances_train.json' % saved_coco_path)
  111. for file in train_path:
  112. # shutil.copy(file.replace("json", "jpg"), "%scoco/images/train2017/" % saved_coco_path)
  113. img_name = file.replace('json', 'jpg')
  114. temp_img = cv2.imread(img_name)
  115. try:
  116. cv2.imwrite("{}coco/images/train/{}".format(saved_coco_path, img_name.split('\\')[-1].replace('png', 'jpg')), temp_img)
  117. except Exception as e:
  118. print(e)
  119. print('Wrong Image:', img_name )
  120. continue
  121. print(img_name + '-->', img_name.replace('png', 'jpg'))
  122. for file in val_path:
  123. # shutil.copy(file.replace("json", "jpg"), "%scoco/images/val2017/" % saved_coco_path)
  124. img_name = file.replace('json', 'jpg')
  125. temp_img = cv2.imread(img_name)
  126. try:
  127. cv2.imwrite("{}coco/images/val/{}".format(saved_coco_path, img_name.split('\\')[-1].replace('png', 'jpg')), temp_img)
  128. except Exception as e:
  129. print(e)
  130. print('Wrong Image:', img_name)
  131. continue
  132. print(img_name + '-->', img_name.replace('png', 'jpg'))
  133. # 把验证集转化为COCO的json格式
  134. l2c_val = Lableme2CoCo()
  135. val_instance = l2c_val.to_coco(val_path)
  136. l2c_val.save_coco_json(val_instance, '%scoco/annotations/instances_val.json' % saved_coco_path)

 生成的coco数据集文件夹为data-coco,原代码中train和val的比例为8:2(比例可在代码中修改),所以train有8张图片,val有2张图片。

4. 转换为voc格式 

        转换代码在本小节最下面,需要给脚本传递3个参数:

    输入数据集数据文件夹   转换结果存放的文件夹名字    --labels   label文件

        输入数据集数据文件夹:就是上面source-img

        转换结果存放的文件夹名字: 就是转换成voc格式后的数据保存路径名字,这个路径是程序自己创建的,不需要我们事先创建,我们只是传递名字给脚本。

         label文件:内容是类别的名字,格式如下

         以本文为例,传递参数这样写   source-img  voc-data   --labels   labels.txt

       pycharm 编译配置参数:

         label文件内容

  1. __ignore__
  2. _background_
  3. mask
  4. person

        转换成voc脚本代码 

  1. #!/usr/bin/env python
  2. from __future__ import print_function
  3. import argparse
  4. import glob
  5. import os
  6. import os.path as osp
  7. import sys
  8. import imgviz
  9. import numpy as np
  10. import labelme
  11. #传递参数3个: 输入数据文件夹 转换结果文件夹名字 --labels label文件
  12. #如 source-img voc-data --labels labels.txt
  13. def main():
  14. parser = argparse.ArgumentParser(
  15. formatter_class=argparse.ArgumentDefaultsHelpFormatter
  16. )
  17. parser.add_argument("input_dir", help="input annotated directory")
  18. parser.add_argument("output_dir", help="output dataset directory")
  19. parser.add_argument("--labels", help="labels file", required=True)
  20. parser.add_argument(
  21. "--noviz", help="no visualization", action="store_true"
  22. )
  23. args = parser.parse_args()
  24. if osp.exists(args.output_dir):
  25. print("Output directory already exists:", args.output_dir)
  26. sys.exit(1)
  27. os.makedirs(args.output_dir)
  28. os.makedirs(osp.join(args.output_dir, "JPEGImages"))
  29. os.makedirs(osp.join(args.output_dir, "SegmentationClass"))
  30. os.makedirs(osp.join(args.output_dir, "SegmentationClassPNG"))
  31. if not args.noviz:
  32. os.makedirs(
  33. osp.join(args.output_dir, "SegmentationClassVisualization")
  34. )
  35. os.makedirs(osp.join(args.output_dir, "SegmentationObject"))
  36. os.makedirs(osp.join(args.output_dir, "SegmentationObjectPNG"))
  37. if not args.noviz:
  38. os.makedirs(
  39. osp.join(args.output_dir, "SegmentationObjectVisualization")
  40. )
  41. print("Creating dataset:", args.output_dir)
  42. class_names = []
  43. class_name_to_id = {}
  44. for i, line in enumerate(open(args.labels).readlines()):
  45. class_id = i - 1 # starts with -1
  46. class_name = line.strip()
  47. class_name_to_id[class_name] = class_id
  48. if class_id == -1:
  49. assert class_name == "__ignore__"
  50. continue
  51. elif class_id == 0:
  52. assert class_name == "_background_"
  53. class_names.append(class_name)
  54. class_names = tuple(class_names)
  55. print("class_names:", class_names)
  56. out_class_names_file = osp.join(args.output_dir, "class_names.txt")
  57. with open(out_class_names_file, "w") as f:
  58. f.writelines("\n".join(class_names))
  59. print("Saved class_names:", out_class_names_file)
  60. for filename in glob.glob(osp.join(args.input_dir, "*.json")):
  61. print("Generating dataset from:", filename)
  62. label_file = labelme.LabelFile(filename=filename)
  63. base = osp.splitext(osp.basename(filename))[0]
  64. out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
  65. out_cls_file = osp.join(
  66. args.output_dir, "SegmentationClass", base + ".npy"
  67. )
  68. out_clsp_file = osp.join(
  69. args.output_dir, "SegmentationClassPNG", base + ".png"
  70. )
  71. if not args.noviz:
  72. out_clsv_file = osp.join(
  73. args.output_dir,
  74. "SegmentationClassVisualization",
  75. base + ".jpg",
  76. )
  77. out_ins_file = osp.join(
  78. args.output_dir, "SegmentationObject", base + ".npy"
  79. )
  80. out_insp_file = osp.join(
  81. args.output_dir, "SegmentationObjectPNG", base + ".png"
  82. )
  83. if not args.noviz:
  84. out_insv_file = osp.join(
  85. args.output_dir,
  86. "SegmentationObjectVisualization",
  87. base + ".jpg",
  88. )
  89. img = labelme.utils.img_data_to_arr(label_file.imageData)
  90. imgviz.io.imsave(out_img_file, img)
  91. cls, ins = labelme.utils.shapes_to_label(
  92. img_shape=img.shape,
  93. shapes=label_file.shapes,
  94. label_name_to_value=class_name_to_id,
  95. )
  96. ins[cls == -1] = 0 # ignore it.
  97. # class label
  98. labelme.utils.lblsave(out_clsp_file, cls)
  99. np.save(out_cls_file, cls)
  100. if not args.noviz:
  101. clsv = imgviz.label2rgb(
  102. cls,
  103. imgviz.rgb2gray(img),
  104. label_names=class_names,
  105. font_size=15,
  106. loc="rb",
  107. )
  108. imgviz.io.imsave(out_clsv_file, clsv)
  109. # instance label
  110. labelme.utils.lblsave(out_insp_file, ins)
  111. np.save(out_ins_file, ins)
  112. if not args.noviz:
  113. instance_ids = np.unique(ins)
  114. instance_names = [str(i) for i in range(max(instance_ids) + 1)]
  115. insv = imgviz.label2rgb(
  116. ins,
  117. imgviz.rgb2gray(img),
  118. label_names=instance_names,
  119. font_size=15,
  120. loc="rb",
  121. )
  122. imgviz.io.imsave(out_insv_file, insv)
  123. if __name__ == "__main__":
  124. main()
5. 转换为yolo格式

        指定类别,labelme生成的json文件所在路径,和输出保存的路径即可,直接在代码里写,然后直接编译即可

  

 

  1. import json
  2. import os
  3. #自己打标签有多少类别就写在这里
  4. name2id = {'mask':0,'person':1}
  5. def convert(img_size, box):
  6. dw = 1./(img_size[0])
  7. dh = 1./(img_size[1])
  8. x = (box[0] + box[2])/2.0 - 1
  9. y = (box[1] + box[3])/2.0 - 1
  10. w = box[2] - box[0]
  11. h = box[3] - box[1]
  12. x = x*dw
  13. w = w*dw
  14. y = y*dh
  15. h = h*dh
  16. return (x,y,w,h)
  17. #
  18. def decode_json(json_floder_path,json_name):
  19. #转换好的标签放哪里
  20. txt_name = 'D:/deep_learn/user-define-data/yolo-data/' + json_name[0:-5] + '.txt'
  21. txt_file = open(txt_name, 'w')
  22. json_path = os.path.join(json_floder_path, json_name)
  23. data = json.load(open(json_path, 'r', encoding='gb2312'))
  24. img_w = data['imageWidth']
  25. img_h = data['imageHeight']
  26. for i in data['shapes']:
  27. label_name = i['label']
  28. if (i['shape_type'] == 'rectangle'):
  29. x1 = int(i['points'][0][0])
  30. y1 = int(i['points'][0][1])
  31. x2 = int(i['points'][1][0])
  32. y2 = int(i['points'][1][1])
  33. bb = (x1,y1,x2,y2)
  34. bbox = convert((img_w,img_h),bb)
  35. txt_file.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in bbox]) + '\n')
  36. #
  37. if __name__ == "__main__":
  38. #labelme生成标签后的数据路径,json文件路径
  39. json_floder_path = 'D:/deep_learn/user-define-data/labelme-data'
  40. json_names = os.listdir(json_floder_path)
  41. for json_name in json_names:
  42. decode_json(json_floder_path,json_name)

        下面就是转换完成后的yolo格式数据 

 

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号