当前位置:   article > 正文

labelme转coco数据改良_labelme2coco

labelme2coco

问题:labelme转coco文件,coco文件在swin-transformer模型训练时会遇到无法找到文件问题

解决方案:修改labelme转coco代码。

问题原因:json文件中‘\\’无法识别改成‘/’,其实这里也纯在疑惑按理来说‘\\’也是可以识别的,希望有大佬可以解释一下。

  1. # 命令行执行: python labelme2coco.py --input_dir images --output_dir coco --labels labels.txt
  2. # 输出文件夹必须为空文件夹
  3. import argparse
  4. import collections
  5. import datetime
  6. import glob
  7. import json
  8. import os
  9. import os.path as osp
  10. import sys
  11. import uuid
  12. import imgviz
  13. import numpy as np
  14. import labelme
  15. from sklearn.model_selection import train_test_split
  16. try:
  17. import pycocotools.mask
  18. except ImportError:
  19. print("Please install pycocotools:\n\n pip install pycocotools\n")
  20. sys.exit(1)
  21. def to_coco(args, label_files, train):
  22. # 创建 总标签data
  23. now = datetime.datetime.now()
  24. data = dict(
  25. info=dict(
  26. description=None,
  27. url=None,
  28. version=None,
  29. year=now.year,
  30. contributor=None,
  31. date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
  32. ),
  33. licenses=[dict(url=None, id=0, name=None, )],
  34. images=[
  35. # license, url, file_name, height, width, date_captured, id
  36. ],
  37. type="instances",
  38. annotations=[
  39. # segmentation, area, iscrowd, image_id, bbox, category_id, id
  40. ],
  41. categories=[
  42. # supercategory, id, name
  43. ],
  44. )
  45. # 创建一个 {类名 : id} 的字典,并保存到 总标签data 字典中。
  46. class_name_to_id = {}
  47. for i, line in enumerate(open(args.labels).readlines()):
  48. class_id = i - 1 # starts with -1
  49. class_name = line.strip() # strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。
  50. if class_id == -1:
  51. assert class_name == "__ignore__" # background:0, class1:1, ,,
  52. continue
  53. class_name_to_id[class_name] = class_id
  54. data["categories"].append(
  55. dict(supercategory=None, id=class_id, name=class_name, )
  56. )
  57. if train:
  58. out_ann_file = osp.join(args.output_dir, "annotations", "instances_train2017.json")
  59. else:
  60. out_ann_file = osp.join(args.output_dir, "annotations", "instances_val2017.json")
  61. for image_id, filename in enumerate(label_files):
  62. label_file = labelme.LabelFile(filename=filename)
  63. base = osp.splitext(osp.basename(filename))[0] # 文件名不带后缀
  64. if train:
  65. out_img_file = osp.join(args.output_dir, "train2017", base + ".jpg")
  66. out_img_file1 = '../' + "train2017/"+ base + ".jpg"#增加这一行
  67. else:
  68. out_img_file = osp.join(args.output_dir, "val2017", base + ".jpg")
  69. out_img_file1 = '../' + "val2017/"+ base + ".jpg"#增加这一行
  70. print("| ", out_img_file)
  71. # ************************** 对图片的处理开始 *******************************************
  72. # 将标签文件对应的图片进行保存到对应的 文件夹。train保存到 train2017/ test保存到 val2017/
  73. img = labelme.utils.img_data_to_arr(label_file.imageData) # .json文件中包含图像,用函数提出来
  74. imgviz.io.imsave(out_img_file, img) # 将图像保存到输出路径
  75. # ************************** 对图片的处理结束 *******************************************
  76. # ************************** 对标签的处理开始 *******************************************
  77. data["images"].append(
  78. dict(
  79. license=0,
  80. url=None,
  81. # file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),#改这里osp.relpath(path,[,start])
  82. # file_name='../'+base+'/' + ".jpg",
  83. file_name=out_img_file1,
  84. # out_img_file = "/coco/train2017/1.jpg"
  85. # out_ann_file = "/coco/annotations/annotations_train2017.json"
  86. # osp.dirname(out_ann_file) = "/coco/annotations"
  87. # file_name = ..\train2017\1.jpg out_ann_file文件所在目录下 找 out_img_file 的相对路径
  88. height=img.shape[0],
  89. width=img.shape[1],
  90. date_captured=None,
  91. id=image_id,
  92. )
  93. )
  94. masks = {} # for area
  95. segmentations = collections.defaultdict(list) # for segmentation
  96. for shape in label_file.shapes:
  97. points = shape["points"]
  98. label = shape["label"]
  99. group_id = shape.get("group_id")
  100. shape_type = shape.get("shape_type", "polygon")
  101. mask = labelme.utils.shape_to_mask(
  102. img.shape[:2], points, shape_type
  103. )
  104. if group_id is None:
  105. group_id = uuid.uuid1()
  106. instance = (label, group_id)
  107. if instance in masks:
  108. masks[instance] = masks[instance] | mask
  109. else:
  110. masks[instance] = mask
  111. if shape_type == "rectangle":
  112. (x1, y1), (x2, y2) = points
  113. x1, x2 = sorted([x1, x2])
  114. y1, y2 = sorted([y1, y2])
  115. points = [x1, y1, x2, y1, x2, y2, x1, y2]
  116. else:
  117. points = np.asarray(points).flatten().tolist()
  118. segmentations[instance].append(points)
  119. segmentations = dict(segmentations)
  120. for instance, mask in masks.items():
  121. cls_name, group_id = instance
  122. if cls_name not in class_name_to_id:
  123. continue
  124. cls_id = class_name_to_id[cls_name]
  125. mask = np.asfortranarray(mask.astype(np.uint8))
  126. mask = pycocotools.mask.encode(mask)
  127. area = float(pycocotools.mask.area(mask))
  128. bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
  129. data["annotations"].append(
  130. dict(
  131. id=len(data["annotations"]),
  132. image_id=image_id,
  133. category_id=cls_id,
  134. segmentation=segmentations[instance],
  135. area=area,
  136. bbox=bbox,
  137. iscrowd=0,
  138. )
  139. )
  140. # ************************** 对标签的处理结束 *******************************************
  141. # ************************** 可视化的处理开始 *******************************************
  142. if not args.noviz:
  143. labels, captions, masks = zip(
  144. *[
  145. (class_name_to_id[cnm], cnm, msk)
  146. for (cnm, gid), msk in masks.items()
  147. if cnm in class_name_to_id
  148. ]
  149. )
  150. viz = imgviz.instances2rgb(
  151. image=img,
  152. labels=labels,
  153. masks=masks,
  154. captions=captions,
  155. font_size=15,
  156. line_width=2,
  157. )
  158. out_viz_file = osp.join(
  159. args.output_dir, "visualization", base + ".jpg"
  160. )
  161. # out_viz_file = '../' + "visualization/" + base + ".jpg"
  162. imgviz.io.imsave(out_viz_file, viz)
  163. # ************************** 可视化的处理结束 *******************************************
  164. with open(out_ann_file, "w") as f: # 将每个标签文件汇总成data后,保存总标签data文件
  165. json.dump(data, f)
  166. # 主程序执行
  167. def main():
  168. parser = argparse.ArgumentParser(
  169. formatter_class=argparse.ArgumentDefaultsHelpFormatter
  170. )
  171. parser.add_argument("--input_dir", help="input annotated directory")
  172. parser.add_argument("--output_dir", help="output dataset directory")
  173. parser.add_argument("--labels", help="labels file", required=True)
  174. parser.add_argument("--noviz", help="no visualization", action="store_true")
  175. args = parser.parse_args()
  176. if osp.exists(args.output_dir):
  177. print("Output directory already exists:", args.output_dir)
  178. sys.exit(1)
  179. os.makedirs(args.output_dir)
  180. print("| Creating dataset dir:", args.output_dir)
  181. if not args.noviz:
  182. os.makedirs(osp.join(args.output_dir, "visualization"))
  183. # 创建保存的文件夹
  184. if not os.path.exists(osp.join(args.output_dir, "annotations")):
  185. os.makedirs(osp.join(args.output_dir, "annotations"))
  186. if not os.path.exists(osp.join(args.output_dir, "train2017")):
  187. os.makedirs(osp.join(args.output_dir, "train2017"))
  188. if not os.path.exists(osp.join(args.output_dir, "val2017")):
  189. os.makedirs(osp.join(args.output_dir, "val2017"))
  190. # 获取目录下所有的.jpg文件列表
  191. feature_files = glob.glob(osp.join(args.input_dir, "*.jpg"))
  192. print('| Image number: ', len(feature_files))
  193. # 获取目录下所有的joson文件列表
  194. label_files = glob.glob(osp.join(args.input_dir, "*.json"))
  195. print('| Json number: ', len(label_files))
  196. # feature_files:待划分的样本特征集合 label_files:待划分的样本标签集合 test_size:测试集所占比例
  197. # x_train:划分出的训练集特征 x_test:划分出的测试集特征 y_train:划分出的训练集标签 y_test:划分出的测试集标签
  198. x_train, x_test, y_train, y_test = train_test_split(feature_files, label_files, test_size=0.3)
  199. print("| Train number:", len(y_train), '\t Value number:', len(y_test))
  200. # 把训练集标签转化为COCO的格式,并将标签对应的图片保存到目录 /train2017/
  201. print("—" * 50)
  202. print("| Train images:")
  203. to_coco(args, y_train, train=True)
  204. # 把测试集标签转化为COCO的格式,并将标签对应的图片保存到目录 /val2017/
  205. print("—" * 50)
  206. print("| Test images:")
  207. to_coco(args, y_test, train=False)
  208. if __name__ == "__main__":
  209. print("—" * 50)
  210. main()
  211. print("—" * 50)

 创建如下文件

images文件夹里面是训练的图片和json文件如下图

新建一个labels的txt文件

在终端中输入python labelme2coco.py --input_dir images --output_dir coco --labels labels.txt

运行成功会生成coco文件夹

如有错误请各位大佬批评指正!!!欢迎交流!!!

本文参考gy-7大佬的模板大佬博客链接:labelme转coco数据集-CSDN博客

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Gausst松鼠会/article/detail/542956
推荐阅读
相关标签
  

闽ICP备14008679号