当前位置:   article > 正文

将labelme标注的json文件转换为yolo形式_labelme2yolo

labelme2yolo

        最近因为在跑yolov5模型,需要使用数据集进行训练。有时候对数据集进行文件形式转换也是一个很重要的点,所以这里讲一下将labelme标注的json文件转换为yolo形式。个人也刚接触,可能在表述上面会有错误,所以请包含。

        这是原先未修改前的代码(会出现一点小问题):

labelme2yolo.py:

  1. '''
  2. Created on Aug 18, 2021
  3. @author: xiaosonh
  4. '''
  5. import os
  6. import sys
  7. import argparse
  8. import shutil
  9. import math
  10. from collections import OrderedDict
  11. import json
  12. import cv2
  13. import PIL.Image
  14. from sklearn.model_selection import train_test_split
  15. from labelme import utils
  16. class Labelme2YOLO(object):
  17. def __init__(self, json_dir):
  18. self._json_dir = json_dir
  19. self._label_id_map = self._get_label_id_map(self._json_dir)
  20. def _make_train_val_dir(self):
  21. self._label_dir_path = os.path.join(self._json_dir,
  22. 'YOLODataset/labels/')
  23. self._image_dir_path = os.path.join(self._json_dir,
  24. 'YOLODataset/images/')
  25. for yolo_path in (os.path.join(self._label_dir_path + 'train/'),
  26. os.path.join(self._label_dir_path + 'val/'),
  27. os.path.join(self._image_dir_path + 'train/'),
  28. os.path.join(self._image_dir_path + 'val/')):
  29. if os.path.exists(yolo_path):
  30. shutil.rmtree(yolo_path)
  31. os.makedirs(yolo_path)
  32. def _get_label_id_map(self, json_dir):
  33. label_set = set()
  34. for file_name in os.listdir(json_dir):
  35. if file_name.endswith('json'):
  36. json_path = os.path.join(json_dir, file_name)
  37. data = json.load(open(json_path))
  38. for shape in data['shapes']:
  39. label_set.add(shape['label'])
  40. return OrderedDict([(label, label_id) \
  41. for label_id, label in enumerate(label_set)])
  42. def _train_test_split(self, folders, json_names, val_size):
  43. if len(folders) > 0 and 'train' in folders and 'val' in folders:
  44. train_folder = os.path.join(self._json_dir, 'train/')
  45. train_json_names = [train_sample_name + '.json' \
  46. for train_sample_name in os.listdir(train_folder) \
  47. if os.path.isdir(os.path.join(train_folder, train_sample_name))]
  48. val_folder = os.path.join(self._json_dir, 'val/')
  49. val_json_names = [val_sample_name + '.json' \
  50. for val_sample_name in os.listdir(val_folder) \
  51. if os.path.isdir(os.path.join(val_folder, val_sample_name))]
  52. return train_json_names, val_json_names
  53. train_idxs, val_idxs = train_test_split(range(len(json_names)),
  54. test_size=val_size)
  55. train_json_names = [json_names[train_idx] for train_idx in train_idxs]
  56. val_json_names = [json_names[val_idx] for val_idx in val_idxs]
  57. return train_json_names, val_json_names
  58. def convert(self, val_size):
  59. json_names = [file_name for file_name in os.listdir(self._json_dir) \
  60. if os.path.isfile(os.path.join(self._json_dir, file_name)) and \
  61. file_name.endswith('.json')]
  62. folders = [file_name for file_name in os.listdir(self._json_dir) \
  63. if os.path.isdir(os.path.join(self._json_dir, file_name))]
  64. train_json_names, val_json_names = self._train_test_split(folders, json_names, val_size)
  65. self._make_train_val_dir()
  66. # convert labelme object to yolo format object, and save them to files
  67. # also get image from labelme json file and save them under images folder
  68. for target_dir, json_names in zip(('train/', 'val/'),
  69. (train_json_names, val_json_names)):
  70. for json_name in json_names:
  71. json_path = os.path.join(self._json_dir, json_name)
  72. json_data = json.load(open(json_path))
  73. print('Converting %s for %s ...' % (json_name, target_dir.replace('/', '')))
  74. img_path = self._save_yolo_image(json_data,
  75. json_name,
  76. self._image_dir_path,
  77. target_dir)
  78. yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
  79. self._save_yolo_label(json_name,
  80. self._label_dir_path,
  81. target_dir,
  82. yolo_obj_list)
  83. print('Generating dataset.yaml file ...')
  84. self._save_dataset_yaml()
  85. def convert_one(self, json_name):
  86. json_path = os.path.join(self._json_dir, json_name)
  87. json_data = json.load(open(json_path))
  88. print('Converting %s ...' % json_name)
  89. img_path = self._save_yolo_image(json_data, json_name,
  90. self._json_dir, '')
  91. yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
  92. self._save_yolo_label(json_name, self._json_dir,
  93. '', yolo_obj_list)
  94. def _get_yolo_object_list(self, json_data, img_path):
  95. yolo_obj_list = []
  96. img_h, img_w, _ = cv2.imread(img_path).shape
  97. for shape in json_data['shapes']:
  98. # labelme circle shape is different from others
  99. # it only has 2 points, 1st is circle center, 2nd is drag end point
  100. if shape['shape_type'] == 'circle':
  101. yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w)
  102. else:
  103. yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w)
  104. yolo_obj_list.append(yolo_obj)
  105. return yolo_obj_list
  106. def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
  107. obj_center_x, obj_center_y = shape['points'][0]
  108. radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 +
  109. (obj_center_y - shape['points'][1][1]) ** 2)
  110. obj_w = 2 * radius
  111. obj_h = 2 * radius
  112. yolo_center_x= round(float(obj_center_x / img_w), 6)
  113. yolo_center_y = round(float(obj_center_y / img_h), 6)
  114. yolo_w = round(float(obj_w / img_w), 6)
  115. yolo_h = round(float(obj_h / img_h), 6)
  116. label_id = self._label_id_map[shape['label']]
  117. return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
  118. def _get_other_shape_yolo_object(self, shape, img_h, img_w):
  119. def __get_object_desc(obj_port_list):
  120. __get_dist = lambda int_list: max(int_list) - min(int_list)
  121. x_lists = [port[0] for port in obj_port_list]
  122. y_lists = [port[1] for port in obj_port_list]
  123. return min(x_lists), __get_dist(x_lists), min(y_lists), __get_dist(y_lists)
  124. obj_x_min, obj_w, obj_y_min, obj_h = __get_object_desc(shape['points'])
  125. yolo_center_x= round(float((obj_x_min + obj_w / 2.0) / img_w), 6)
  126. yolo_center_y = round(float((obj_y_min + obj_h / 2.0) / img_h), 6)
  127. yolo_w = round(float(obj_w / img_w), 6)
  128. yolo_h = round(float(obj_h / img_h), 6)
  129. label_id = self._label_id_map[shape['label']]
  130. return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
  131. def _save_yolo_label(self, json_name, label_dir_path, target_dir, yolo_obj_list):
  132. txt_path = os.path.join(label_dir_path,
  133. target_dir,
  134. json_name.replace('.json', '.txt'))
  135. with open(txt_path, 'w+') as f:
  136. for yolo_obj_idx, yolo_obj in enumerate(yolo_obj_list):
  137. yolo_obj_line = '%s %s %s %s %s\n' % yolo_obj \
  138. if yolo_obj_idx + 1 != len(yolo_obj_list) else \
  139. '%s %s %s %s %s' % yolo_obj
  140. f.write(yolo_obj_line)
  141. def _save_yolo_image(self, json_data, json_name, image_dir_path, target_dir):
  142. img_name = json_name.replace('.json', '.png')
  143. img_path = os.path.join(image_dir_path, target_dir,img_name)
  144. if not os.path.exists(img_path):
  145. img = utils.img_b64_to_arr(json_data['imageData'])
  146. PIL.Image.fromarray(img).save(img_path)
  147. return img_path
  148. def _save_dataset_yaml(self):
  149. yaml_path = os.path.join(self._json_dir, 'YOLODataset/', 'dataset.yaml')
  150. with open(yaml_path, 'w+') as yaml_file:
  151. yaml_file.write('train: %s\n' % \
  152. os.path.join(self._image_dir_path, 'train/'))
  153. yaml_file.write('val: %s\n\n' % \
  154. os.path.join(self._image_dir_path, 'val/'))
  155. yaml_file.write('nc: %i\n\n' % len(self._label_id_map))
  156. names_str = ''
  157. for label, _ in self._label_id_map.items():
  158. names_str += "'%s', " % label
  159. names_str = names_str.rstrip(', ')
  160. yaml_file.write('names: [%s]' % names_str)
  161. if __name__ == '__main__':
  162. parser = argparse.ArgumentParser()
  163. parser.add_argument('--json_dir',type=str,
  164. help='Please input the path of the labelme json files.')
  165. parser.add_argument('--val_size',type=float, nargs='?', default=None,
  166. help='Please input the validation dataset size, for example 0.1 ')
  167. parser.add_argument('--json_name',type=str, nargs='?', default=None,
  168. help='If you put json name, it would convert only one json file to YOLO.')
  169. args = parser.parse_args(sys.argv[1:])
  170. convertor = Labelme2YOLO(args.json_dir)
  171. if args.json_name is None:
  172. convertor.convert(val_size=args.val_size)
  173. else:
  174. convertor.convert_one(args.json_name)

        yaml文件中要填写最后转换出来存放的位置,记得要确保路径是存在的。

        首先是将代码直接进行运行,会出现这样的错误:

        因为指向这个位置,让我一直纠结在这个地方进行修改,然而一旦修改了ntpath.py文件后,python环境就遭到破坏,出现了一系列问题,查找了很多方法都没能解决。

        后面通过查找了解到,可以通过命令行去执行该程序。命令行命令参考labelme2yolo官网:labelme2yolo · PyPI

        因为我执行了 labelme2yolo --json_dir + json文件路径 + --val_size0.15 --test_size 0.15出现下面的错误:

        后面我使用python命令来对labelme2yolo.py程序进行执行,执行命令为:python labelme2yolo.py --json_dir + json文件路径 + --val_size 0.15 --test_size 0.15

        由于我的labelme2yolo.py文件中没有test_size所以这里把它去掉,否则会出现下面的错误:

        因此最后我的执行命令为:python labelme2yolo.py --json_dir + json文件路径 + --val_size 0.15

        这里又出现了这样的错误:

        通过去print这个data的数据知道data是空的,所以导致在这里获取不到shapes的值,所以程序报错误。

        错误代码位置:

        通过一步步调试知道是在使用json.load()方法读取文件数据的时候出现了问题,读取不到数据,后面通过改变读取文件数据的方法得以解决。则是使用read()方法对该data的数据进行读取,代码如下(将data = json.load(open(json_path))换掉):

  1. with open(json_path, 'r', encoding='utf-8') as fp:
  2. data = fp.read()
  3. data = json.loads(data)

         再次执行程序,这里出现了问题:

        后面了解到可能是没有对文件进行解码,所以出现了读取不到的情况,对json_data使用json.loads()方法进行解码。json.loads()和json.load()方法的区别如下:

        再次执行程序可以发现该部分问题解决,然而在下一个地方出现同样获取不到文件数据的情况,所以第一反应就是使用同样的方法进行替换,但并没有得到解决。这时就想着是不是因为不同的方法对使用的文件等某些方面有一定的限制,所以查找了其他的获取方法,最后在使用readline()方法时得以解决。

        在这里就把所有的问题解决了,在解决过程中也是很波折,所以想通过写文章的形式记录下来,以供自己后面回顾。最后运行出来的结果是(图片会生成在相应的路径中):

        最终代码:

labelme2yolo.py:

  1. '''
  2. Created on Aug 18, 2021
  3. @author: xiaosonh
  4. '''
  5. import os
  6. import sys
  7. import argparse
  8. import shutil
  9. import math
  10. from collections import OrderedDict
  11. import json
  12. import cv2
  13. import PIL.Image
  14. from sklearn.model_selection import train_test_split
  15. from labelme import utils
  16. class Labelme2YOLO(object):
  17. def __init__(self, json_dir):
  18. self._json_dir = json_dir
  19. self._label_id_map = self._get_label_id_map(self._json_dir)
  20. def _make_train_val_dir(self):
  21. self._label_dir_path = os.path.join(self._json_dir,
  22. 'D:/wc/YOLODataset/labels/')
  23. self._image_dir_path = os.path.join(self._json_dir,
  24. 'D:/wc/YOLODataset/images/')
  25. for yolo_path in (os.path.join(self._label_dir_path + 'train/'),
  26. os.path.join(self._label_dir_path + 'val/'),
  27. os.path.join(self._image_dir_path + 'train/'),
  28. os.path.join(self._image_dir_path + 'val/')):
  29. if os.path.exists(yolo_path):
  30. shutil.rmtree(yolo_path)
  31. os.makedirs(yolo_path)
  32. def _get_label_id_map(self, json_dir):
  33. label_set = set()
  34. for file_name in os.listdir(json_dir):
  35. # print(file_name)
  36. if file_name.endswith('json'):
  37. json_path = os.path.join(json_dir, file_name)
  38. with open(json_path, 'r', encoding='utf-8') as fp:
  39. # print(type(fp))
  40. data = fp.read()
  41. # print(type(data))
  42. # data = json.load(open(json_path))
  43. # print(data)
  44. # data["shapes"] = json.loads(data["shapes"])
  45. data = json.loads(data)
  46. for shape in data['shapes']:
  47. label_set.add(shape['label'])
  48. return OrderedDict([(label, label_id) \
  49. for label_id, label in enumerate(label_set)])
  50. def _train_test_split(self, folders, json_names, val_size):
  51. if len(folders) > 0 and 'train' in folders and 'val' in folders:
  52. train_folder = os.path.join(self._json_dir, 'train/')
  53. train_json_names = [train_sample_name + '.json' \
  54. for train_sample_name in os.listdir(train_folder) \
  55. if os.path.isdir(os.path.join(train_folder, train_sample_name))]
  56. val_folder = os.path.join(self._json_dir, 'val/')
  57. val_json_names = [val_sample_name + '.json' \
  58. for val_sample_name in os.listdir(val_folder) \
  59. if os.path.isdir(os.path.join(val_folder, val_sample_name))]
  60. return train_json_names, val_json_names
  61. train_idxs, val_idxs = train_test_split(range(len(json_names)),
  62. test_size=val_size)
  63. train_json_names = [json_names[train_idx] for train_idx in train_idxs]
  64. val_json_names = [json_names[val_idx] for val_idx in val_idxs]
  65. return train_json_names, val_json_names
  66. def convert(self, val_size):
  67. json_names = [file_name for file_name in os.listdir(self._json_dir) \
  68. if os.path.isfile(os.path.join(self._json_dir, file_name)) and \
  69. file_name.endswith('.json')]
  70. folders = [file_name for file_name in os.listdir(self._json_dir) \
  71. if os.path.isdir(os.path.join(self._json_dir, file_name))]
  72. train_json_names, val_json_names = self._train_test_split(folders, json_names, val_size)
  73. self._make_train_val_dir()
  74. # convert labelme object to yolo format object, and save them to files
  75. # also get image from labelme json file and save them under images folder
  76. for target_dir, json_names in zip(('train/', 'val/'),
  77. (train_json_names, val_json_names)):
  78. for json_name in json_names:
  79. json_path = os.path.join(self._json_dir, json_name)
  80. # print(json_path)
  81. with open(json_path) as f:
  82. json_data = f.readline()
  83. # print(json_data)
  84. # json_data = json.load(open(json_path))
  85. print('Converting %s for %s ...' % (json_name, target_dir.replace('/', '')))
  86. img_path = self._save_yolo_image(json_data,
  87. json_name,
  88. self._image_dir_path,
  89. target_dir)
  90. yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
  91. self._save_yolo_label(json_name,
  92. self._label_dir_path,
  93. target_dir,
  94. yolo_obj_list)
  95. print('Generating dataset.yaml file ...')
  96. self._save_dataset_yaml()
  97. def convert_one(self, json_name):
  98. json_path = os.path.join(self._json_dir, json_name)
  99. json_data = json.load(open(json_path))
  100. print('Converting %s ...' % json_name)
  101. img_path = self._save_yolo_image(json_data, json_name,
  102. self._json_dir, '')
  103. yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
  104. self._save_yolo_label(json_name, self._json_dir,
  105. '', yolo_obj_list)
  106. def _get_yolo_object_list(self, json_data, img_path):
  107. yolo_obj_list = []
  108. img_h, img_w, _ = cv2.imread(img_path).shape
  109. # print(json_data)
  110. json_data = json.loads(json_data)
  111. # print(json_data)
  112. for shape in json_data['shapes']:
  113. # labelme circle shape is different from others
  114. # it only has 2 points, 1st is circle center, 2nd is drag end point
  115. if shape['shape_type'] == 'circle':
  116. yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w)
  117. else:
  118. yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w)
  119. yolo_obj_list.append(yolo_obj)
  120. return yolo_obj_list
  121. def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
  122. obj_center_x, obj_center_y = shape['points'][0]
  123. radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 +
  124. (obj_center_y - shape['points'][1][1]) ** 2)
  125. obj_w = 2 * radius
  126. obj_h = 2 * radius
  127. yolo_center_x= round(float(obj_center_x / img_w), 6)
  128. yolo_center_y = round(float(obj_center_y / img_h), 6)
  129. yolo_w = round(float(obj_w / img_w), 6)
  130. yolo_h = round(float(obj_h / img_h), 6)
  131. label_id = self._label_id_map[shape['label']]
  132. return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
  133. def _get_other_shape_yolo_object(self, shape, img_h, img_w):
  134. def __get_object_desc(obj_port_list):
  135. __get_dist = lambda int_list: max(int_list) - min(int_list)
  136. x_lists = [port[0] for port in obj_port_list]
  137. y_lists = [port[1] for port in obj_port_list]
  138. return min(x_lists), __get_dist(x_lists), min(y_lists), __get_dist(y_lists)
  139. obj_x_min, obj_w, obj_y_min, obj_h = __get_object_desc(shape['points'])
  140. yolo_center_x= round(float((obj_x_min + obj_w / 2.0) / img_w), 6)
  141. yolo_center_y = round(float((obj_y_min + obj_h / 2.0) / img_h), 6)
  142. yolo_w = round(float(obj_w / img_w), 6)
  143. yolo_h = round(float(obj_h / img_h), 6)
  144. label_id = self._label_id_map[shape['label']]
  145. return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h
  146. def _save_yolo_label(self, json_name, label_dir_path, target_dir, yolo_obj_list):
  147. txt_path = os.path.join(label_dir_path,
  148. target_dir,
  149. json_name.replace('.json', '.txt'))
  150. with open(txt_path, 'w+') as f:
  151. for yolo_obj_idx, yolo_obj in enumerate(yolo_obj_list):
  152. yolo_obj_line = '%s %s %s %s %s\n' % yolo_obj \
  153. if yolo_obj_idx + 1 != len(yolo_obj_list) else \
  154. '%s %s %s %s %s' % yolo_obj
  155. f.write(yolo_obj_line)
  156. def _save_yolo_image(self, json_data, json_name, image_dir_path, target_dir):
  157. img_name = json_name.replace('.json', '.png')
  158. img_path = os.path.join(image_dir_path, target_dir,img_name)
  159. if not os.path.exists(img_path):
  160. # print(json_data)
  161. json_data = json.loads(json_data)
  162. # print(json_data)
  163. img = utils.img_b64_to_arr(json_data['imageData'])
  164. PIL.Image.fromarray(img).save(img_path)
  165. return img_path
  166. def _save_dataset_yaml(self):
  167. yaml_path = os.path.join(self._json_dir, 'D:/convert/', 'dataset.yaml')
  168. with open(yaml_path, 'w+') as yaml_file:
  169. yaml_file.write('train: %s\n' % \
  170. os.path.join(self._image_dir_path, 'train/'))
  171. yaml_file.write('val: %s\n\n' % \
  172. os.path.join(self._image_dir_path, 'val/'))
  173. yaml_file.write('nc: %i\n\n' % len(self._label_id_map))
  174. names_str = ''
  175. for label, _ in self._label_id_map.items():
  176. names_str += "'%s', " % label
  177. names_str = names_str.rstrip(', ')
  178. yaml_file.write('names: [%s]' % names_str)
  179. if __name__ == '__main__':
  180. parser = argparse.ArgumentParser()
  181. parser.add_argument('--json_dir',type=str,
  182. help='Please input the path of the labelme json files.')
  183. parser.add_argument('--val_size',type=float, nargs='?', default=None,
  184. help='Please input the validation dataset size, for example 0.1 ')
  185. parser.add_argument('--json_name',type=str, nargs='?', default=None,
  186. help='If you put json name, it would convert only one json file to YOLO.')
  187. args = parser.parse_args(sys.argv[1:])
  188. convertor = Labelme2YOLO(args.json_dir)
  189. if args.json_name is None:
  190. convertor.convert(val_size=args.val_size)
  191. else:
  192. convertor.convert_one(args.json_name)

        写得比较匆忙,相对也比较潦草,希望可以帮助到大家!!

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/你好赵伟/article/detail/665743
推荐阅读
相关标签
  

闽ICP备14008679号