当前位置:   article > 正文

YOLO交通标志识别数据集准备_交通识别数据

交通识别数据

index (tsinghua.edu.cn)

  

 

模板转换jinjia2包链接:https://pan.baidu.com/s/1ycb_zf8oS88HF0FvpXrYFg?pwd=ym9n 
提取码:ym9n 

  1. import os
  2. from jinja2 import Environment, PackageLoader
  3. class xml_fill:
  4. def __init__(self, path, width, height, depth=3, database='Unknown', segmented=0):
  5. environment = Environment(loader=PackageLoader('source', 'XML_template'), keep_trailing_newline=True)
  6. self.annotation_template = environment.get_template('voc_template.xml')
  7. abspath = os.path.abspath(path)
  8. self.template_parameters = {
  9. 'path': abspath,
  10. 'filename': os.path.basename(abspath),
  11. 'folder': os.path.basename(os.path.dirname(abspath)),
  12. 'width': width,
  13. 'height': height,
  14. 'depth': depth,
  15. 'database': database,
  16. 'segmented': segmented,
  17. 'objects': []
  18. }
  19. def add_obj_box(self, name, xmin, ymin, xmax, ymax, pose='Unspecified', truncated=0, difficult=0):
  20. self.template_parameters['objects'].append({
  21. 'name': name,
  22. 'xmin': xmin,
  23. 'ymin': ymin,
  24. 'xmax': xmax,
  25. 'ymax': ymax,
  26. 'pose': pose,
  27. 'truncated': truncated,
  28. 'difficult': difficult,
  29. })
  30. def save_xml(self, annotation_path):
  31. with open(annotation_path, 'w') as file:
  32. content = self.annotation_template.render(**self.template_parameters)
  33. file.write(content)

 

  1. import json
  2. import os
  3. from PIL import Image
  4. from voc_xml_generator import xml_fill
  5. tt100k_parent_dir = "G:\\"
  6. def find_image_size(filename):
  7. with Image.open(filename) as img:
  8. img_weight = img.size[0]
  9. img_height = img.size[1]
  10. img_depth = 3
  11. return img_weight, img_height, img_depth
  12. def load_mask(annos, datadir, imgid, filler):
  13. img = annos["imgs"][imgid]
  14. path = img['path']
  15. for obj in img['objects']:
  16. name = obj['category']
  17. box = obj['bbox']
  18. xmin = int(box['xmin'])
  19. ymin = int(box['ymin'])
  20. xmax = int(box['xmax'])
  21. ymax = int(box['ymax'])
  22. filler.add_obj_box(name, xmin, ymin, xmax, ymax)
  23. work_sapce_dir = os.path.join(tt100k_parent_dir, "TT100K\\VOCdevkit\\")
  24. if not os.path.isdir(work_sapce_dir):
  25. os.mkdir(work_sapce_dir)
  26. work_sapce_dir = os.path.join(work_sapce_dir, "VOC20230102\\")
  27. if not os.path.isdir(work_sapce_dir):
  28. os.mkdir(work_sapce_dir)
  29. jpeg_images_path = os.path.join(work_sapce_dir, 'JPEGImages')
  30. annotations_path = os.path.join(work_sapce_dir, 'Annotations')
  31. if not os.path.isdir(jpeg_images_path):
  32. os.mkdir(jpeg_images_path)
  33. if not os.path.isdir(annotations_path):
  34. os.mkdir(annotations_path)
  35. datadir = tt100k_parent_dir + "TT100K\\data"
  36. filedir = datadir + "\\annotations.json"
  37. ids = open(datadir + "\\train\\ids.txt").read().splitlines()
  38. annos = json.loads(open(filedir).read())
  39. for i, value in enumerate(ids):
  40. imgid = value
  41. filename = datadir + "\\train\\" + imgid + ".jpg"
  42. width,height,depth = find_image_size(filename)
  43. filler = xml_fill(filename, width, height, depth)
  44. load_mask(annos, datadir, imgid, filler)
  45. filler.save_xml(annotations_path + '\\' + imgid + '.xml')
  46. print("%s.xml saved\n"%imgid)

 

  1. import xml.etree.ElementTree as ET
  2. import os
  3. import random
  4. from shutil import move
  5. type45="i2,i4,i5,il100,il60,il80,io,ip,p10,p11,p12,p19,p23,p26,p27,p3,p5,p6,pg,ph4,ph4.5,ph5,pl100,pl120,pl20,pl30,pl40,pl5,pl50,pl60,pl70,pl80,pm20,pm30,pm55,pn,pne,po,pr40,w13,w32,w55,w57,w59,wo"
  6. type45 = type45.split(',')
  7. classes = type45
  8. TRAIN_RATIO = 80
  9. def clear_hidden_files(path):
  10. dir_list = os.listdir(path)
  11. for i in dir_list:
  12. abspath = os.path.join(os.path.abspath(path), i)
  13. if os.path.isfile(abspath):
  14. if i.startswith("._"):
  15. os.remove(abspath)
  16. else:
  17. clear_hidden_files(abspath)
  18. def convert(size, box):
  19. dw = 1./size[0]
  20. dh = 1./size[1]
  21. x = (box[0] + box[1])/2.0
  22. y = (box[2] + box[3])/2.0
  23. w = box[1] - box[0]
  24. h = box[3] - box[2]
  25. x = x*dw
  26. w = w*dw
  27. y = y*dh
  28. h = h*dh
  29. return (x,y,w,h)
  30. def convert_annotation(image_id):
  31. in_file = open('VOC/2022/ANNOTATIONS/%s.xml' %image_id)
  32. out_file = open('VOC/2022/YOLOLabels/%s.txt' %image_id, 'w')
  33. tree=ET.parse(in_file)
  34. root = tree.getroot()
  35. size = root.find('size')
  36. w = int(size.find('width').text)
  37. h = int(size.find('height').text)
  38. for obj in root.iter('object'):
  39. difficult = obj.find('difficult').text
  40. cls = obj.find('name').text
  41. if cls not in classes or int(difficult) == 1:
  42. continue
  43. cls_id = classes.index(cls)
  44. xmlbox = obj.find('bndbox')
  45. b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
  46. bb = convert((w,h), b)
  47. out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
  48. in_file.close()
  49. out_file.close()
  50. wd = os.getcwd()
  51. wd = os.getcwd()
  52. data_base_dir = os.path.join(wd, "VOC/")
  53. if not os.path.isdir(data_base_dir):
  54. os.mkdir(data_base_dir)
  55. work_sapce_dir = os.path.join(data_base_dir, "2022/")
  56. if not os.path.isdir(work_sapce_dir):
  57. os.mkdir(work_sapce_dir)
  58. annotation_dir = os.path.join(work_sapce_dir, "ANNOTATIONS/")
  59. if not os.path.isdir(annotation_dir):
  60. os.mkdir(annotation_dir)
  61. clear_hidden_files(annotation_dir)
  62. image_dir = os.path.join(work_sapce_dir, "IMAGE/")
  63. if not os.path.isdir(image_dir):
  64. os.mkdir(image_dir)
  65. clear_hidden_files(image_dir)
  66. yolo_labels_dir = os.path.join(work_sapce_dir, "YOLOLabels/")
  67. if not os.path.isdir(yolo_labels_dir):
  68. os.mkdir(yolo_labels_dir)
  69. clear_hidden_files(yolo_labels_dir)
  70. yolov5_images_dir = os.path.join(data_base_dir, "images/")
  71. if not os.path.isdir(yolov5_images_dir):
  72. os.mkdir(yolov5_images_dir)
  73. clear_hidden_files(yolov5_images_dir)
  74. yolov5_labels_dir = os.path.join(data_base_dir, "labels/")
  75. if not os.path.isdir(yolov5_labels_dir):
  76. os.mkdir(yolov5_labels_dir)
  77. clear_hidden_files(yolov5_labels_dir)
  78. yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")
  79. if not os.path.isdir(yolov5_images_train_dir):
  80. os.mkdir(yolov5_images_train_dir)
  81. clear_hidden_files(yolov5_images_train_dir)
  82. yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")
  83. if not os.path.isdir(yolov5_images_test_dir):
  84. os.mkdir(yolov5_images_test_dir)
  85. clear_hidden_files(yolov5_images_test_dir)
  86. yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")
  87. if not os.path.isdir(yolov5_labels_train_dir):
  88. os.mkdir(yolov5_labels_train_dir)
  89. clear_hidden_files(yolov5_labels_train_dir)
  90. yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")
  91. if not os.path.isdir(yolov5_labels_test_dir):
  92. os.mkdir(yolov5_labels_test_dir)
  93. clear_hidden_files(yolov5_labels_test_dir)
  94. train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
  95. test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
  96. train_file.close()
  97. test_file.close()
  98. train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
  99. test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
  100. list_imgs = os.listdir(image_dir) # list image files
  101. prob = random.randint(1, 100)
  102. for i in range(0,len(list_imgs)):
  103. path = os.path.join(image_dir,list_imgs[i])
  104. if os.path.isfile(path):
  105. image_path = image_dir + list_imgs[i]
  106. voc_path = list_imgs[i]
  107. (nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
  108. (voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
  109. annotation_name = nameWithoutExtention + '.xml'
  110. annotation_path = os.path.join(annotation_dir, annotation_name)
  111. label_name = nameWithoutExtention + '.txt'
  112. label_path = os.path.join(yolo_labels_dir, label_name)
  113. prob = random.randint(1, 100)
  114. if(prob < TRAIN_RATIO): # train dataset
  115. if os.path.exists(annotation_path):
  116. train_file.write(image_path + '\n')
  117. convert_annotation(nameWithoutExtention) # convert label
  118. move(image_path, yolov5_images_train_dir + voc_path)
  119. move(label_path, yolov5_labels_train_dir + label_name)
  120. else: # test dataset
  121. if os.path.exists(annotation_path):
  122. test_file.write(image_path + '\n')
  123. convert_annotation(nameWithoutExtention) # convert label
  124. move(image_path, yolov5_images_test_dir + voc_path)
  125. move(label_path, yolov5_labels_test_dir + label_name)
  126. train_file.close()
  127. test_file.close()

 

 

  1. class LoadImgLabels(Dataset):
  2. # root = "YOLO/VOC"
  3. def __init__(self,root,mode):
  4. super(LoadImgLabels, self).__init__()
  5. self.root = root
  6. self.mode = mode
  7. img_path = get_path(os.path.join(root,'images',self.mode))
  8. lab_path = get_path(os.path.join(root,'labels',self.mode))
  9. self.img_files = get_file(img_path)
  10. self.label_files = img2label_paths(self.img_files)
  11. def __len__(self):
  12. return len()
  13. def __getitem__(self, item):
  14. return

# 获得(不同操作系统)标准路径
  1. def get_path(path):
  2. p = str(Path(path))
  3. return p
# 得到路径下的每个文件
  1. def get_file(path):
  2. file = []
  3. if os.path.isdir(path):
  4. file += glob.iglob(path + os.sep + '*.*')
  5. return file
# 由图片的文件得到对应标签的文件
  1. def img2label_paths(img_paths):
  2. sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep
  3. return [x.replace(sa, sb, 1).replace(os.path.splitext(x)[-1], '.txt') for x in img_paths]
# 缓存标签
  1. def cache_labels(img_files, label_files, path='labels.cache'):
  2. x = {}
  3. pbar = tqdm(zip(img_files, label_files), desc='Scanning images', total=len(img_files))
  4. for (img, label) in pbar:
  5. print(img,label)
  6. try:
  7. l=[]
  8. im = Image.open(img)
  9. im.verify()
  10. shape = im.size
  11. if os.path.isfile(label):
  12. with open(label,'r') as f:
  13. l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
  14. if len(l) == 0:
  15. l = np.zeros((0, 5), dtype=np.float32)
  16. x[img] = [l,shape]
  17. except:
  18. pass
  19. torch.save(x, path)
  20. return x

  1. class LoadImgLabels(Dataset):
  2. # root = "../VOC"
  3. def __init__(self,root,mode,img_size):
  4. super(LoadImgLabels, self).__init__()
  5. self.root = root
  6. self.mode = mode
  7. self.img_size = img_size # 输入图片分辨率大小
  8. img_path = get_path(os.path.join(root,'images',self.mode))
  9. if os.path.isfile('labels.cache'):
  10. print("读取缓存标签文件'labels.cache'")
  11. cache = torch.load('labels.cache')
  12. else:
  13. print("生成缓存标签文件'labels.cache'")
  14. self.img_files = get_file(img_path)
  15. self.label_files = img2label_paths(self.img_files)
  16. cache = cache_labels(self.img_files, self.label_files)
  17. labels, shapes = zip(*cache.values())
  18. self.labels = list(labels)
  19. self.shapes = np.array(shapes, dtype=np.float64)
  20. self.img_files = list(cache.keys())
  21. self.label_files = img2label_paths(cache.keys())
  22. def __len__(self):
  23. return len(self.img_files)
  24. def __getitem__(self, index):
  25. return 0


# 加载图片

并根据设定的输入大小与图片原大小的比例ratio进行resize;

if img_size = 640:(1080, 1920)———>(360, 640)

  1. def load_image(img_files, img_size , index): # img_size = 640
  2. path = img_files[index]
  3. img = cv2.imread(path)
  4. h0 ,w0 = img.shape[:2]
  5. r = img_size / max(h0,w0)
  6. if r != 1:
  7. interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
  8. img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
  9. return img, (h0, w0), img.shape[:2] # (1080, 1920)———>(360, 640)
# 图像缩放: 保持图片的宽高比例,剩下的部分采用灰色填充。
  1. def Make_squqre(img, new_shape=(640, 640), color=(114, 114, 114)):
  2. # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
  3. shape = img.shape[:2] # 当前图片大小
  4. if isinstance(new_shape, int):
  5. new_shape = (new_shape, new_shape)
  6. # ----------------计算填充大小-----------------------------------------
  7. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])# r = 1.0
  8. ratio = r, r # ratio = (1.0,1.0)
  9. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  10. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # 填充宽度,高度
  11. # 计算上下左右填充大小
  12. dw /= 2
  13. dh /= 2
  14. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  15. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  16. # ------------------进行填充-------------------------------------------
  17. img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
  18. return img, ratio, (dw, dh)

对标签的处理

# 1根据pad调整框的标签坐标

# 2调整框的标签,xyxy->xywh

# 3归一化标签0 - 1

  1. labels = []
  2. x = self.labels[index]
  3. if x.size > 0:
  4. # 根据pad调整框的标签坐标:注意label是真实位置,没有归一化的
  5. labels = x.copy()
  6. labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0]
  7. labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1]
  8. labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
  9. labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
  10. nL = len(labels)
  11. if nL:
  12. labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
  13. # 重新归一化标签0 - 1
  14. labels[:, [2, 4]] /= img.shape[0] # normalized height 0~1
  15. labels[:, [1, 3]] /= img.shape[1] # normalized width 0~1
  16. labels_out = torch.zeros((nL, 6))
  17. if nL:
  18. labels_out[:, 1:] = torch.from_numpy(labels)
# 左上角右下角坐标格式转换成中心点+宽高坐标格式
  1. def xyxy2xywh(x):
  2. # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
  3. y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
  4. y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
  5. y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
  6. y[:, 2] = x[:, 2] - x[:, 0] # width
  7. y[:, 3] = x[:, 3] - x[:, 1] # height
  8. return y

dataloader

  1. import torch
  2. from contextlib import contextmanager
  3. from tqdm import tqdm
  4. from YOLO.dataset.dataset import LoadImgLabels
  5. # 定义生成器 _RepeatSampler
  6. class _RepeatSampler(object):
  7. def __init__(self, sampler):
  8. self.sampler = sampler
  9. def __iter__(self):
  10. while True:
  11. yield from iter(self.sampler)
  12. # 定义DataLoader(一个python生成器)
  13. class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
  14. def __init__(self, *args, **kwargs):
  15. super().__init__(*args, **kwargs)
  16. object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
  17. self.iterator = super().__iter__()
  18. def __len__(self):
  19. return len(self.batch_sampler.sampler)
  20. def __iter__(self): # 实现了__iter__方法的对象是可迭代的
  21. for i in range(len(self)):
  22. yield next(self.iterator)
  23. @contextmanager
  24. def torch_distributed_zero_first(local_rank: int):""
  25. if local_rank not in [-1, 0]:
  26. torch.distributed.barrier() # Synchronizes all processes
  27. yield
  28. if local_rank == 0:
  29. torch.distributed.barrier()
  30. # 利用自定义的数据集(LoadImagesAndLabels)创建dataloader
  31. def create_dataloader(path, mode , imgsz, batch_size,rank=-1):
  32. with torch_distributed_zero_first(rank):
  33. dataset = LoadImgLabels(path, mode, imgsz)
  34. batch_size = min(batch_size, len(dataset))
  35. dataloader = InfiniteDataLoader(dataset,# torch.utils.data.DataLoader
  36. batch_size=batch_size,
  37. shuffle=True,
  38. collate_fn=LoadImgLabels.collate_fn,
  39. pin_memory=True)
  40. return dataloader, dataset
  41. dataloader, dataset = create_dataloader("G:\VOC", 'train',640, 2)
  42. pbar = enumerate(dataloader)
  43. nb = len(dataloader)
  44. pbar = tqdm(pbar, total=nb)
  45. for i, (imgs, targets, path) in pbar:
  46. ni = i + nb * 1
  47. imgs = imgs / 255.0
  48. print(imgs.size(),targets.size())

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/832116
推荐阅读
相关标签
  

闽ICP备14008679号