当前位置:   article > 正文

labelme生成的标注数据转换成yolov5格式_labelme yolo

labelme yolo

       本文中的代码旨在一键生成yolov5数据集的格式

       使用labelme标注的json数据会生成在标注时图像文件所在的路径下,数据形式大概是这样的:

        json文件和图像数据同名。

        而yolov5实际训练时使用的数据格式是这样的:

         网上大部分代码都是将yolov5标注格式的txt生成在根目录下,这样在生成txt文件后还需要手动整理成yolov5可训练的文件形式,下面的代码旨在减少人工处理的时间,一键生成可直接训练的文件形式。

  1. # -*- coding: utf-8 -*-
  2. """
  3. Time: 2021.10.26
  4. Author: Athrunsunny
  5. Version: V 0.1
  6. File: toyolo.py
  7. Describe: Functions in this file is change the dataset format to yolov5
  8. """
  9. import os
  10. import numpy as np
  11. import json
  12. from glob import glob
  13. import cv2
  14. import shutil
  15. import yaml
  16. from sklearn.model_selection import train_test_split
  17. from tqdm import tqdm
  18. ROOT_DIR = os.getcwd()
  19. def change_image_format(label_path=ROOT_DIR, suffix='.jpg'):
  20. """
  21. 统一当前文件夹下所有图像的格式,如'.jpg'
  22. :param suffix: 图像文件后缀
  23. :param label_path:当前文件路径
  24. :return:
  25. """
  26. externs = ['png', 'jpg', 'JPEG', 'BMP', 'bmp']
  27. files = list()
  28. for extern in externs:
  29. files.extend(glob(label_path + "\\*." + extern))
  30. for file in files:
  31. name = ''.join(file.split('.')[:-1])
  32. file_suffix = file.split('.')[-1]
  33. if file_suffix != suffix.split('.')[-1]:
  34. new_name = name + suffix
  35. image = cv2.imread(file)
  36. cv2.imwrite(new_name, image)
  37. os.remove(file)
  38. def get_all_class(file_list, label_path=ROOT_DIR):
  39. """
  40. 从json文件中获取当前数据的所有类别
  41. :param file_list:当前路径下的所有文件名
  42. :param label_path:当前文件路径
  43. :return:
  44. """
  45. classes = list()
  46. for filename in tqdm(file_list):
  47. json_path = os.path.join(label_path, filename + '.json')
  48. json_file = json.load(open(json_path, "r", encoding="utf-8"))
  49. for item in json_file["shapes"]:
  50. label_class = item['label']
  51. if label_class not in classes:
  52. classes.append(label_class)
  53. print('read file done')
  54. return classes
  55. def split_dataset(label_path, test_size=0.3, isUseTest=False, useNumpyShuffle=False):
  56. """
  57. 将文件分为训练集,测试集和验证集
  58. :param useNumpyShuffle: 使用numpy方法分割数据集
  59. :param test_size: 分割测试集或验证集的比例
  60. :param isUseTest: 是否使用测试集,默认为False
  61. :param label_path:当前文件路径
  62. :return:
  63. """
  64. files = glob(label_path + "\\*.json")
  65. files = [i.replace("\\", "/").split("/")[-1].split(".json")[0] for i in files]
  66. if useNumpyShuffle:
  67. file_length = len(files)
  68. index = np.arange(file_length)
  69. np.random.seed(32)
  70. np.random.shuffle(index)
  71. test_files = None
  72. if isUseTest:
  73. trainval_files, test_files = np.array(files)[index[:int(file_length * (1 - test_size))]], np.array(files)[
  74. index[int(file_length * (1 - test_size)):]]
  75. else:
  76. trainval_files = files
  77. train_files, val_files = np.array(trainval_files)[index[:int(len(trainval_files) * (1 - test_size))]], \
  78. np.array(trainval_files)[index[int(len(trainval_files) * (1 - test_size)):]]
  79. else:
  80. test_files = None
  81. if isUseTest:
  82. trainval_files, test_files = train_test_split(files, test_size=test_size, random_state=55)
  83. else:
  84. trainval_files = files
  85. train_files, val_files = train_test_split(trainval_files, test_size=test_size, random_state=55)
  86. return train_files, val_files, test_files, files
  87. def create_save_file(label_path=ROOT_DIR):
  88. """
  89. 按照训练时的图像和标注路径创建文件夹
  90. :param label_path:当前文件路径
  91. :return:
  92. """
  93. # 生成训练集
  94. train_image = os.path.join(label_path, 'train', 'images')
  95. if not os.path.exists(train_image):
  96. os.makedirs(train_image)
  97. train_label = os.path.join(label_path, 'train', 'labels')
  98. if not os.path.exists(train_label):
  99. os.makedirs(train_label)
  100. # 生成验证集
  101. val_image = os.path.join(label_path, 'valid', 'images')
  102. if not os.path.exists(val_image):
  103. os.makedirs(val_image)
  104. val_label = os.path.join(label_path, 'valid', 'labels')
  105. if not os.path.exists(val_label):
  106. os.makedirs(val_label)
  107. # 生成测试集
  108. test_image = os.path.join(label_path, 'test', 'images')
  109. if not os.path.exists(test_image):
  110. os.makedirs(test_image)
  111. test_label = os.path.join(label_path, 'test', 'labels')
  112. if not os.path.exists(test_label):
  113. os.makedirs(test_label)
  114. return train_image, train_label, val_image, val_label, test_image, test_label
  115. def convert(size, box):
  116. dw = 1. / (size[0])
  117. dh = 1. / (size[1])
  118. x = (box[0] + box[1]) / 2.0 - 1
  119. y = (box[2] + box[3]) / 2.0 - 1
  120. w = box[1] - box[0]
  121. h = box[3] - box[2]
  122. x = x * dw
  123. w = w * dw
  124. y = y * dh
  125. h = h * dh
  126. return x, y, w, h
  127. def push_into_file(file, images, labels, label_path=ROOT_DIR, suffix='.jpg'):
  128. """
  129. 最终生成在当前文件夹下的所有文件按image和label分别存在到训练集/验证集/测试集路径的文件夹下
  130. :param file: 文件名列表
  131. :param images: 存放images的路径
  132. :param labels: 存放labels的路径
  133. :param label_path: 当前文件路径
  134. :param suffix: 图像文件后缀
  135. :return:
  136. """
  137. for filename in file:
  138. image_file = os.path.join(label_path, filename + suffix)
  139. label_file = os.path.join(label_path, filename + '.txt')
  140. if not os.path.exists(os.path.join(images, filename + suffix)):
  141. try:
  142. shutil.move(image_file, images)
  143. except OSError:
  144. pass
  145. if not os.path.exists(os.path.join(labels, filename + suffix)):
  146. try:
  147. shutil.move(label_file, labels)
  148. except OSError:
  149. pass
  150. def json2txt(classes, txt_Name='allfiles', label_path=ROOT_DIR, suffix='.jpg'):
  151. """
  152. 将json文件转化为txt文件,并将json文件存放到指定文件夹
  153. :param classes: 类别名
  154. :param txt_Name:txt文件,用来存放所有文件的路径
  155. :param label_path:当前文件路径
  156. :param suffix:图像文件后缀
  157. :return:
  158. """
  159. store_json = os.path.join(label_path, 'json')
  160. if not os.path.exists(store_json):
  161. os.makedirs(store_json)
  162. _, _, _, files = split_dataset(label_path)
  163. if not os.path.exists(os.path.join(label_path, 'tmp')):
  164. os.makedirs(os.path.join(label_path, 'tmp'))
  165. list_file = open('tmp/%s.txt' % txt_Name, 'w')
  166. for json_file_ in tqdm(files):
  167. json_filename = os.path.join(label_path, json_file_ + ".json")
  168. imagePath = os.path.join(label_path, json_file_ + suffix)
  169. list_file.write('%s\n' % imagePath)
  170. out_file = open('%s/%s.txt' % (label_path, json_file_), 'w')
  171. json_file = json.load(open(json_filename, "r", encoding="utf-8"))
  172. if os.path.exists(imagePath):
  173. height, width, channels = cv2.imread(imagePath).shape
  174. for multi in json_file["shapes"]:
  175. if len(multi["points"][0]) == 0:
  176. out_file.write('')
  177. continue
  178. points = np.array(multi["points"])
  179. xmin = min(points[:, 0]) if min(points[:, 0]) > 0 else 0
  180. xmax = max(points[:, 0]) if max(points[:, 0]) > 0 else 0
  181. ymin = min(points[:, 1]) if min(points[:, 1]) > 0 else 0
  182. ymax = max(points[:, 1]) if max(points[:, 1]) > 0 else 0
  183. label = multi["label"]
  184. if xmax <= xmin:
  185. pass
  186. elif ymax <= ymin:
  187. pass
  188. else:
  189. cls_id = classes.index(label)
  190. b = (float(xmin), float(xmax), float(ymin), float(ymax))
  191. bb = convert((width, height), b)
  192. out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
  193. # print(json_filename, xmin, ymin, xmax, ymax, cls_id)
  194. if not os.path.exists(os.path.join(store_json, json_file_ + '.json')):
  195. try:
  196. shutil.move(json_filename, store_json)
  197. except OSError:
  198. pass
  199. def create_yaml(classes, label_path, isUseTest=False):
  200. nc = len(classes)
  201. if not isUseTest:
  202. desired_caps = {
  203. 'path': label_path,
  204. 'train': 'train/images',
  205. 'val': 'valid/images',
  206. 'nc': nc,
  207. 'names': classes
  208. }
  209. else:
  210. desired_caps = {
  211. 'path': label_path,
  212. 'train': 'train/images',
  213. 'val': 'valid/images',
  214. 'test': 'test/images',
  215. 'nc': nc,
  216. 'names': classes
  217. }
  218. yamlpath = os.path.join(label_path, "data" + ".yaml")
  219. # 写入到yaml文件
  220. with open(yamlpath, "w+", encoding="utf-8") as f:
  221. for key, val in desired_caps.items():
  222. yaml.dump({key: val}, f, default_flow_style=False)
  223. # 首先确保当前文件夹下的所有图片统一后缀,如.jpg,如果为其他后缀,将suffix改为对应的后缀,如.png
  224. def ChangeToYolo5(label_path=ROOT_DIR, suffix='.jpg', test_size=0.1, isUseTest=False):
  225. """
  226. 生成最终标准格式的文件
  227. :param test_size: 分割测试集或验证集的比例
  228. :param label_path:当前文件路径
  229. :param suffix: 文件后缀名
  230. :param isUseTest: 是否使用测试集
  231. :return:
  232. """
  233. change_image_format(label_path)
  234. train_files, val_files, test_file, files = split_dataset(label_path, test_size=test_size, isUseTest=isUseTest)
  235. classes = get_all_class(files)
  236. json2txt(classes)
  237. create_yaml(classes, label_path, isUseTest=isUseTest)
  238. train_image, train_label, val_image, val_label, test_image, test_label = create_save_file(label_path)
  239. push_into_file(train_files, train_image, train_label, suffix=suffix)
  240. push_into_file(val_files, val_image, val_label, suffix=suffix)
  241. if test_file is not None:
  242. push_into_file(test_file, test_image, test_label, suffix=suffix)
  243. print('create dataset done')
  244. if __name__ == "__main__":
  245. ChangeToYolo5()

        在保存图像的目录下,创建toyolo.py文件,将以上代码拷贝粘贴。

        运行时先确保相应的库已经安装,运行后生成的文件目录如下:

         生成的data.yaml可以直接复制到\yolov5\data目录下,tmp目录主要是处理的图像名,

json主要是原始标注生成的json

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/花生_TL007/article/detail/257242
推荐阅读
相关标签
  

闽ICP备14008679号