当前位置:   article > 正文

将CoCo数据集Json格式转成训练Yolov8-seg分割的txt格式_yolov8分割json转txt

yolov8分割json转txt

最近在训练Yolov8-seg时遇到一个问题,就是如何将CoCo数据Json文件转化成可用于Yolov8-seg训练的txt文件,并且是自己想要训练的类别,CoCo数据有80类,我只需要其中的某几类,例如person、cat、dog等。

Yolov8-seg训练数据目录结构如下:images存放训练集和验证集图片,labels存放训练集和验证集txt

  1. mydata
  2. ______images
  3. ____________train
  4. _________________001.jpg
  5. ____________val
  6. _________________002.jpg
  7. ______labels
  8. ____________train
  9. _________________001.txt
  10. ____________val
  11. _________________002.txt

具体代码如下:分别是utils.py 、cocojson2segtxt.py

utils.py

  1. import glob
  2. import os
  3. import shutil
  4. from pathlib import Path
  5. import numpy as np
  6. from PIL import ExifTags
  7. from tqdm import tqdm
  8. # Parameters
  9. img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes
  10. vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
  11. # Get orientation exif tag
  12. for orientation in ExifTags.TAGS.keys():
  13. if ExifTags.TAGS[orientation] == 'Orientation':
  14. break
  15. def exif_size(img):
  16. # Returns exif-corrected PIL size
  17. s = img.size # (width, height)
  18. try:
  19. rotation = dict(img._getexif().items())[orientation]
  20. if rotation in [6, 8]: # rotation 270
  21. s = (s[1], s[0])
  22. except:
  23. pass
  24. return s
  25. def split_rows_simple(file='../data/sm4/out.txt'): # from utils import *; split_rows_simple()
  26. # splits one textfile into 3 smaller ones based upon train, test, val ratios
  27. with open(file) as f:
  28. lines = f.readlines()
  29. s = Path(file).suffix
  30. lines = sorted(list(filter(lambda x: len(x) > 0, lines)))
  31. i, j, k = split_indices(lines, train=0.9, test=0.1, validate=0.0)
  32. for k, v in {'train': i, 'test': j, 'val': k}.items(): # key, value pairs
  33. if v.any():
  34. new_file = file.replace(s, f'_{k}{s}')
  35. with open(new_file, 'w') as f:
  36. f.writelines([lines[i] for i in v])
  37. def split_files(out_path, file_name, prefix_path=''): # split training data
  38. file_name = list(filter(lambda x: len(x) > 0, file_name))
  39. file_name = sorted(file_name)
  40. i, j, k = split_indices(file_name, train=0.9, test=0.1, validate=0.0)
  41. datasets = {'train': i, 'test': j, 'val': k}
  42. for key, item in datasets.items():
  43. if item.any():
  44. with open(f'{out_path}_{key}.txt', 'a') as file:
  45. for i in item:
  46. file.write('%s%s\n' % (prefix_path, file_name[i]))
  47. def split_indices(x, train=0.9, test=0.1, validate=0.0, shuffle=True): # split training data
  48. n = len(x)
  49. v = np.arange(n)
  50. if shuffle:
  51. np.random.shuffle(v)
  52. i = round(n * train) # train
  53. j = round(n * test) + i # test
  54. k = round(n * validate) + j # validate
  55. return v[:i], v[i:j], v[j:k] # return indices
  56. def make_dirs(dir='new_dir/'):
  57. # Create folders
  58. dir = Path(dir)
  59. if dir.exists():
  60. shutil.rmtree(dir) # delete dir
  61. for p in dir, dir / 'labels', dir / 'images':
  62. p.mkdir(parents=True, exist_ok=True) # make dir
  63. return dir
  64. def write_data_data(fname='data.data', nc=80):
  65. # write darknet *.data file
  66. lines = ['classes = %g\n' % nc,
  67. 'train =../out/data_train.txt\n',
  68. 'valid =../out/data_test.txt\n',
  69. 'names =../out/data.names\n',
  70. 'backup = backup/\n',
  71. 'eval = coco\n']
  72. with open(fname, 'a') as f:
  73. f.writelines(lines)
  74. def image_folder2file(folder='images/'): # from utils import *; image_folder2file()
  75. # write a txt file listing all imaged in folder
  76. s = glob.glob(f'{folder}*.*')
  77. with open(f'{folder[:-1]}.txt', 'w') as file:
  78. for l in s:
  79. file.write(l + '\n') # write image list
  80. def add_coco_background(path='../data/sm4/', n=1000): # from utils import *; add_coco_background()
  81. # add coco background to sm4 in outb.txt
  82. p = f'{path}background'
  83. if os.path.exists(p):
  84. shutil.rmtree(p) # delete output folder
  85. os.makedirs(p) # make new output folder
  86. # copy images
  87. for image in glob.glob('../coco/images/train2014/*.*')[:n]:
  88. os.system(f'cp {image} {p}')
  89. # add to outb.txt and make train, test.txt files
  90. f = f'{path}out.txt'
  91. fb = f'{path}outb.txt'
  92. os.system(f'cp {f} {fb}')
  93. with open(fb, 'a') as file:
  94. file.writelines(i + '\n' for i in glob.glob(f'{p}/*.*'))
  95. split_rows_simple(file=fb)
  96. def create_single_class_dataset(path='../data/sm3'): # from utils import *; create_single_class_dataset('../data/sm3/')
  97. # creates a single-class version of an existing dataset
  98. os.system(f'mkdir {path}_1cls')
  99. def flatten_recursive_folders(path='../../Downloads/data/sm4/'): # from utils import *; flatten_recursive_folders()
  100. # flattens nested folders in path/images and path/JSON into single folders
  101. idir, jdir = f'{path}images/', f'{path}json/'
  102. nidir, njdir = Path(f'{path}images_flat/'), Path(f'{path}json_flat/')
  103. n = 0
  104. # Create output folders
  105. for p in [nidir, njdir]:
  106. if os.path.exists(p):
  107. shutil.rmtree(p) # delete output folder
  108. os.makedirs(p) # make new output folder
  109. for parent, dirs, files in os.walk(idir):
  110. for f in tqdm(files, desc=parent):
  111. f = Path(f)
  112. stem, suffix = f.stem, f.suffix
  113. if suffix.lower()[1:] in img_formats:
  114. n += 1
  115. stem_new = '%g_' % n + stem
  116. image_new = nidir / (stem_new + suffix) # converts all formats to *.jpg
  117. json_new = njdir / f'{stem_new}.json'
  118. image = parent / f
  119. json = Path(parent.replace('images', 'json')) / str(f).replace(suffix, '.json')
  120. os.system("cp '%s' '%s'" % (json, json_new))
  121. os.system("cp '%s' '%s'" % (image, image_new))
  122. # cv2.imwrite(str(image_new), cv2.imread(str(image)))
  123. print('Flattening complete: %g jsons and images' % n)
  124. def coco91_to_coco80_class(): # converts 80-index (val2014) to 91-index (paper)
  125. # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
  126. x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,
  127. None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
  128. 51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
  129. None, 73, 74, 75, 76, 77, 78, 79, None]
  130. return x

cocojson2segtxt.py

  1. import contextlib
  2. import json
  3. import cv2
  4. import pandas as pd
  5. from PIL import Image
  6. from collections import defaultdict
  7. from utils import *
  8. classname = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train',
  9. 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign',
  10. 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse',
  11. 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe',
  12. 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
  13. 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat',
  14. 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
  15. 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana',
  16. 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog',
  17. 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant',
  18. 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse',
  19. 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster',
  20. 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors',
  21. 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
  22. def convert_coco_json(json_dir,savepath, selfclasses,use_segments=False, cls91to80=False):
  23. save_dir = make_dirs(savepath) # output directory
  24. coco80 = coco91_to_coco80_class()
  25. # print('coco80',coco80)
  26. # Import json
  27. for json_file in sorted(Path(json_dir).resolve().glob('*.json')):
  28. fn = Path(save_dir) / 'labels' / json_file.stem.replace('instances_', '') # folder name
  29. fn.mkdir()
  30. with open(json_file) as f:
  31. data = json.load(f)
  32. # Create image dict
  33. images = {'%g' % x['id']: x for x in data['images']}
  34. # Create image-annotations dict
  35. imgToAnns = defaultdict(list)
  36. for ann in data['annotations']:
  37. # print(ann)
  38. imgToAnns[ann['image_id']].append(ann)
  39. # Write labels file
  40. for img_id, anns in tqdm(imgToAnns.items(), desc=f'Annotations {json_file}'):
  41. img = images['%g' % img_id]
  42. h, w, f = img['height'], img['width'], img['file_name']
  43. bboxes = []
  44. segments = []
  45. for ann in anns:
  46. if ann['iscrowd']:
  47. continue
  48. # The COCO box format is [top left x, top left y, width, height]
  49. box = np.array(ann['bbox'], dtype=np.float64)
  50. box[:2] += box[2:] / 2 # xy top-left corner to center
  51. box[[0, 2]] /= w # normalize x
  52. box[[1, 3]] /= h # normalize y
  53. if box[2] <= 0 or box[3] <= 0: # if w <= 0 and h <= 0
  54. continue
  55. # print(ann)
  56. cls = coco80[ann['category_id'] - 1] if cls91to80 else ann['category_id'] - 1 # class
  57. box = [cls] + box.tolist()
  58. if box not in bboxes:
  59. bboxes.append(box)
  60. clsname = classname[cls]
  61. if clsname in selfclasses:
  62. if clsname == 'person':
  63. cls = 0
  64. if clsname == 'cat':
  65. cls = 1
  66. if clsname == 'dog':
  67. cls = 2
  68. # Segments
  69. if use_segments:
  70. if len(ann['segmentation']) > 1:
  71. s = merge_multi_segment(ann['segmentation'])
  72. s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist()
  73. else:
  74. s = [j for i in ann['segmentation'] for j in i] # all segments concatenated
  75. s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist()
  76. s = [cls] + s
  77. if s not in segments:
  78. segments.append(s)
  79. # Write
  80. if len(segments)>0:
  81. with open((fn / f).with_suffix('.txt'), 'a') as file:
  82. for i in range(len(segments)):
  83. # print(len(segments[i]))
  84. line = *(segments[i] if use_segments else bboxes[i]), # cls, box or segments
  85. # print(line)
  86. file.write(('%g ' * len(line)).rstrip() % line + '\n')
  87. def min_index(arr1, arr2):
  88. """Find a pair of indexes with the shortest distance.
  89. Args:
  90. arr1: (N, 2).
  91. arr2: (M, 2).
  92. Return:
  93. a pair of indexes(tuple).
  94. """
  95. dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1)
  96. return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
  97. def merge_multi_segment(segments):
  98. """Merge multi segments to one list.
  99. Find the coordinates with min distance between each segment,
  100. then connect these coordinates with one thin line to merge all
  101. segments into one.
  102. Args:
  103. segments(List(List)): original segmentations in coco's json file.
  104. like [segmentation1, segmentation2,...],
  105. each segmentation is a list of coordinates.
  106. """
  107. s = []
  108. segments = [np.array(i).reshape(-1, 2) for i in segments]
  109. idx_list = [[] for _ in range(len(segments))]
  110. # record the indexes with min distance between each segment
  111. for i in range(1, len(segments)):
  112. idx1, idx2 = min_index(segments[i - 1], segments[i])
  113. idx_list[i - 1].append(idx1)
  114. idx_list[i].append(idx2)
  115. # use two round to connect all the segments
  116. for k in range(2):
  117. # forward connection
  118. if k == 0:
  119. for i, idx in enumerate(idx_list):
  120. # middle segments have two indexes
  121. # reverse the index of middle segments
  122. if len(idx) == 2 and idx[0] > idx[1]:
  123. idx = idx[::-1]
  124. segments[i] = segments[i][::-1, :]
  125. segments[i] = np.roll(segments[i], -idx[0], axis=0)
  126. segments[i] = np.concatenate([segments[i], segments[i][:1]])
  127. # deal with the first segment and the last one
  128. if i in [0, len(idx_list) - 1]:
  129. s.append(segments[i])
  130. else:
  131. idx = [0, idx[1] - idx[0]]
  132. s.append(segments[i][idx[0]:idx[1] + 1])
  133. else:
  134. for i in range(len(idx_list) - 1, -1, -1):
  135. if i not in [0, len(idx_list) - 1]:
  136. idx = idx_list[i]
  137. nidx = abs(idx[1] - idx[0])
  138. s.append(segments[i][nidx:])
  139. return s
  140. if __name__ == '__main__':
  141. source = 'COCO'
  142. cocojsonpath = r'G:\XRW\Data\yolodata\json'
  143. savepath = r'G:\XRW\Data\yolodata\save'
  144. selfclasses = ['person', 'cat', 'dog']
  145. if source == 'COCO':
  146. convert_coco_json(cocojsonpath, # directory with *.json
  147. savepath,
  148. selfclasses,
  149. use_segments=True,
  150. cls91to80=True)
  • cocojsonpath:CoCo数据集json文件存放路径
  • savepath:生成的txt存放路径
  • selfclasses:自己想要训练的类别

 运行cocojson2segtxt.py

运行完成后得到的txt要少于上图显示的,因为这些txt只包含person、cat、dog类别

txt存放的数据格式如下(与官方一致):

<class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>

<class-index>是对象类的索引,<x1> <y1> <x2> <y2> ... <xn> <yn>是对象分割掩码的边界坐标。坐标由空格分隔。(进行了归一化处理)

注意:我这里将person、cat、dog3类分别对应成0、1、2,可自行修改

以上步骤完成后只生成了txt,需要再将对应的图片copy到对应路径中。

  1. import glob
  2. import os
  3. import shutil
  4. imgpath = r'G:\CoCoData\val2017'
  5. txtpath = r'G:\Yolov8\ultralytics-main\datasets\mysegdata\labels\val2017'
  6. savepath = r'G:\Yolov8\ultralytics-main\datasets\mysegdata\images\val2017'
  7. imglist = glob.glob(os.path.join(imgpath,'*.jpg'))
  8. txtlist = glob.glob(os.path.join(txtpath,'*.txt'))
  9. for img in imglist:
  10. name = txtpath + '\\'+img.split('\\')[-1].split('.')[0]+'.txt'
  11. if name in txtlist:
  12. shutil.copy(img,savepath)
  • imgpath CoCo数据集图片路径
  • txtpath 生成的人猫狗txt路径
  • savepath 保存图片的路径

CoCo数据

人猫狗类别的txt

人猫狗类别的图片

这样CoCo数据集的人猫狗类别的Yolov8分割数据集就制作完成了。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/盐析白兔/article/detail/474281
推荐阅读
相关标签
  

闽ICP备14008679号