当前位置:   article > 正文

Python实现将LabelMe生成的JSON格式转换成YOLOv8支持的TXT格式_labelme的json转txt

labelme的json转txt

      标注工具 LabelMe 生成的标注文件为JSON格式,而YOLOv8中支持的为TXT文件格式。以下Python代码实现3个功能

     1.将JSON格式转换成TXT格式;

     2.将数据集进行随机拆分,生成YOLOv8支持的目录结构;

     3.生成YOLOv8支持的YAML文件。

     代码test_labelme2yolov8.py如下:

  1. import os
  2. import json
  3. import argparse
  4. import colorama
  5. import random
  6. import shutil
  7. def parse_args():
  8. parser = argparse.ArgumentParser(description="json(LabelMe) to txt(YOLOv8)")
  9. parser.add_argument("--dir", required=True, type=str, help="images, json files, and generated txt files, all in the same directory")
  10. parser.add_argument("--labels", required=True, type=str, help="txt file that hold indexes and labels, one label per line, for example: face 0")
  11. parser.add_argument("--val_size", default=0.2, type=float, help="the proportion of the validation set to the overall dataset:[0., 0.5]")
  12. parser.add_argument("--name", required=True, type=str, help="the name of the dataset")
  13. args = parser.parse_args()
  14. return args
  15. def get_labels_index(name):
  16. labels = {} # key,value
  17. with open(name, "r") as file:
  18. for line in file:
  19. # print("line:", line)
  20. key_value = []
  21. for v in line.split(" "):
  22. # print("v:", v)
  23. key_value.append(v.replace("\n", "")) # remove line breaks(\n) at the end of the line
  24. if len(key_value) != 2:
  25. print(colorama.Fore.RED + "Error: each line should have only two values(key value):", len(key_value))
  26. continue
  27. labels[key_value[0]] = key_value[1]
  28. with open(name, "r") as file:
  29. line_num = len(file.readlines())
  30. if line_num != len(labels):
  31. print(colorama.Fore.RED + "Error: there may be duplicate lables:", line_num, len(labels))
  32. return labels
  33. def get_json_files(dir):
  34. jsons = []
  35. for x in os.listdir(dir):
  36. if x.endswith(".json"):
  37. jsons.append(x)
  38. return jsons
  39. def parse_json(name):
  40. with open(name, "r") as file:
  41. data = json.load(file)
  42. width = data["imageWidth"]
  43. height = data["imageHeight"]
  44. # print(f"width: {width}; height: {height}")
  45. objects=[]
  46. for shape in data["shapes"]:
  47. if shape["shape_type"] != "rectangle":
  48. print(colorama.Fore.YELLOW + "Warning: only the rectangle type is supported:", shape["shape_type"])
  49. continue
  50. object = []
  51. object.append(shape["label"])
  52. object.append(shape["points"])
  53. objects.append(object)
  54. return width, height, objects
  55. def get_box_width_height(box):
  56. dist = lambda val: max(val) - min(val)
  57. x = [pt[0] for pt in box]
  58. y = [pt[1] for pt in box]
  59. return min(x), min(y), dist(x), dist(y)
  60. def bounding_box_normalization(width, height, objects, labels):
  61. boxes = []
  62. for object in objects:
  63. box = [] # class x_center y_center width height
  64. box.append(labels[object[0]])
  65. # print("point:", object[1])
  66. x_min, y_min, box_w, box_h = get_box_width_height(object[1])
  67. box.append(round((float(x_min + box_w / 2.0) / width), 6))
  68. box.append(round((float(y_min + box_h / 2.0) / height), 6))
  69. box.append(round(float(box_w / width), 6))
  70. box.append(round(float(box_h / height), 6))
  71. boxes.append(box)
  72. return boxes
  73. def write_to_txt(dir, json, width, height, objects, labels):
  74. boxes = bounding_box_normalization(width, height, objects, labels)
  75. # print("boxes:", boxes)
  76. name = json[:-len(".json")] + ".txt"
  77. # print("name:", name)
  78. with open(dir + "/" + name, "w") as file:
  79. for item in boxes:
  80. # print("item:", item)
  81. if len(item) != 5:
  82. print(colorama.Fore.RED + "Error: the length must be 5:", len(item))
  83. continue
  84. string = item[0] + " " + str(item[1]) + " " + str(item[2]) + " " + str(item[3]) + " " + str(item[4]) + "\r"
  85. file.write(string)
  86. def json_to_txt(dir, jsons, labels):
  87. for json in jsons:
  88. name = dir + "/" + json
  89. # print("name:", name)
  90. width, height, objects = parse_json(name)
  91. # print(f"width: {width}; height: {height}; objects: {objects}")
  92. write_to_txt(dir, json, width, height, objects, labels)
  93. def is_in_range(value, a, b):
  94. return a <= value <= b
  95. def get_random_sequence(length, val_size):
  96. numbers = list(range(0, length))
  97. val_sequence = random.sample(numbers, int(length*val_size))
  98. # print("val_sequence:", val_sequence)
  99. train_sequence = [x for x in numbers if x not in val_sequence]
  100. # print("train_sequence:", train_sequence)
  101. return train_sequence, val_sequence
  102. def get_files_number(dir):
  103. count = 0
  104. for file in os.listdir(dir):
  105. if os.path.isfile(os.path.join(dir, file)):
  106. count += 1
  107. return count
  108. def split_train_val(dir, jsons, name, val_size):
  109. if is_in_range(val_size, 0., 0.5) is False:
  110. print(colorama.Fore.RED + "Error: the interval for val_size should be:[0., 0.5]:", val_size)
  111. raise
  112. dst_dir_images_train = "datasets/" + name + "/images/train"
  113. dst_dir_images_val = "datasets/" + name + "/images/val"
  114. dst_dir_labels_train = "datasets/" + name + "/labels/train"
  115. dst_dir_labels_val = "datasets/" + name + "/labels/val"
  116. try:
  117. os.makedirs(dst_dir_images_train) #, exist_ok=True
  118. os.makedirs(dst_dir_images_val)
  119. os.makedirs(dst_dir_labels_train)
  120. os.makedirs(dst_dir_labels_val)
  121. except OSError as e:
  122. print(colorama.Fore.RED + "Error: cannot create directory:", e.strerror)
  123. raise
  124. # supported image formats
  125. img_formats = (".bmp", ".jpeg", ".jpg", ".png", ".webp")
  126. # print("jsons:", jsons)
  127. train_sequence, val_sequence = get_random_sequence(len(jsons), val_size)
  128. for index in train_sequence:
  129. for format in img_formats:
  130. file = dir + "/" + jsons[index][:-len(".json")] + format
  131. # print("file:", file)
  132. if os.path.isfile(file):
  133. shutil.copy(file, dst_dir_images_train)
  134. break
  135. file = dir + "/" + jsons[index][:-len(".json")] + ".txt"
  136. if os.path.isfile(file):
  137. shutil.copy(file, dst_dir_labels_train)
  138. for index in val_sequence:
  139. for format in img_formats:
  140. file = dir + "/" + jsons[index][:-len(".json")] + format
  141. if os.path.isfile(file):
  142. shutil.copy(file, dst_dir_images_val)
  143. break
  144. file = dir + "/" + jsons[index][:-len(".json")] + ".txt"
  145. if os.path.isfile(file):
  146. shutil.copy(file, dst_dir_labels_val)
  147. num_images_train = get_files_number(dst_dir_images_train)
  148. num_images_val = get_files_number(dst_dir_images_val)
  149. num_labels_train = get_files_number(dst_dir_labels_train)
  150. num_labels_val = get_files_number(dst_dir_labels_val)
  151. if num_images_train + num_images_val != len(jsons) or num_labels_train + num_labels_val != len(jsons):
  152. print(colorama.Fore.RED + "Error: the number of files is inconsistent:", num_images_train, num_images_val, num_labels_train, num_labels_val, len(jsons))
  153. raise
  154. def generate_yaml_file(labels, name):
  155. path = os.path.join("datasets", name, name+".yaml")
  156. # print("path:", path)
  157. with open(path, "w") as file:
  158. file.write("path: ../datasets/%s # dataset root dir\n" % name)
  159. file.write("train: images/train # train images (relative to 'path')\n")
  160. file.write("val: images/val # val images (relative to 'path')\n")
  161. file.write("test: # test images (optional)\n\n")
  162. file.write("# Classes\n")
  163. file.write("names:\n")
  164. for key, value in labels.items():
  165. # print(f"key: {key}; value: {value}")
  166. file.write(" %d: %s\n" % (int(value), key))
  167. if __name__ == "__main__":
  168. colorama.init()
  169. args = parse_args()
  170. # 1. parse JSON file and write it to a TXT file
  171. labels = get_labels_index(args.labels)
  172. # print("labels:", labels)
  173. jsons = get_json_files(args.dir)
  174. # print("jsons:", jsons)
  175. json_to_txt(args.dir, jsons, labels)
  176. # 2. split the dataset
  177. split_train_val(args.dir, jsons, args.name, args.val_size)
  178. # 3. generate a YAML file
  179. generate_yaml_file(labels, args.name)
  180. print(colorama.Fore.GREEN + "====== execution completed ======")

      代码有些多,主要函数说明如下:

     1.函数parse_args:解析输入参数;

     2.函数get_labels_index:解析labels文件,数据集中的所有类别及对应的索引,格式labels.txt如下所示:生成YOLOv8的YAML文件时也需要此文件

  1. face 0
  2. hand 1
  3. eye 2
  4. mouth 3
  5. horse 4
  6. tree 5
  7. bridge 6
  8. house 7

     3.函数get_json_files:获取指定目录下的所有json文件;

     4.函数parse_json:解析json文件,将txt文件中需要的数据提取出来;

     5.函数bounding_box_normalization:将bounding box值归一化到(0,1)区间;

     6.函数write_to_txt:将最终结果写入txt文件;

     7.函数split_train_val:将数据集随机拆分为训练集和验证集,并按YOLOv8支持的目录结构存放,根目录为datasets,接着是指定的数据集名,例如为fake,与YOLOv8中数据集coco8目录结构完全一致

     8.函数generate_yaml_file:生成YOLOv8支持的yaml文件,存放在datasets/数据集名下,例如为fake.yaml

      接收4个参数:参数dir为存放数据集的目录;参数labels指定labels文件;参数val_size指定验证集所占的比例;参数name指定新生成的YOLOv8数据集的名字

      这里从网上随机下载了10幅图像,使用LabelMe进行了标注,执行结果如下图所示:

     生成的fake.yaml文件如下图所示:

  1. path: ../datasets/fake # dataset root dir
  2. train: images/train # train images (relative to 'path')
  3. val: images/val # val images (relative to 'path')
  4. test: # test images (optional)
  5. # Classes
  6. names:
  7. 0: face
  8. 1: hand
  9. 2: eye
  10. 3: mouth
  11. 4: horse
  12. 5: tree
  13. 6: bridge
  14. 7: house

      将生成的fake数据集进行训练,测试代码test_yolov8_detect.py如下:

  1. import argparse
  2. import colorama
  3. from ultralytics import YOLO
  4. def parse_args():
  5. parser = argparse.ArgumentParser(description="YOLOv8 object detect")
  6. parser.add_argument("--yaml", required=True, type=str, help="yaml file")
  7. parser.add_argument("--epochs", required=True, type=int, help="number of training")
  8. args = parser.parse_args()
  9. return args
  10. def train(yaml, epochs):
  11. model = YOLO("yolov8n.pt") # load a pretrained model
  12. results = model.train(data=yaml, epochs=epochs, imgsz=640) # train the model
  13. metrics = model.val() # It'll automatically evaluate the data you trained, no arguments needed, dataset and settings remembered
  14. model.export(format="onnx", dynamic=True) # export the model
  15. if __name__ == "__main__":
  16. colorama.init()
  17. args = parse_args()
  18. train(args.yaml, args.epochs)
  19. print(colorama.Fore.GREEN + "====== execution completed ======")

      执行结果如下图所示:目前此测试代码接收2个参数:参数yaml指定yaml文件;参数epochs指定训练次数;由以下结果可知,生成的新数据集无需做任何改动即可进行训练

      GitHubhttps://github.com/fengbingchun/NN_Test

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/665748
推荐阅读
相关标签
  

闽ICP备14008679号