当前位置:   article > 正文

yolo数据集 标签格式转换以及数据集划分_yolo的标签

yolo的标签

1 xml格式转txt格式

  1. import xml.etree.ElementTree as ET
  2. import pickle
  3. import os
  4. from os import listdir, getcwd
  5. from os.path import join
  6. import glob
  7. classes = ["", "", "", ""] #这里是需要改的第一个地方,写入自己的标签类型,不可多写,不可少写
  8. def convert(size, box):
  9. dw = 1.0 / size[0]
  10. dh = 1.0 / size[1]
  11. x = (box[0] + box[1]) / 2.0
  12. y = (box[2] + box[3]) / 2.0
  13. w = box[1] - box[0]
  14. h = box[3] - box[2]
  15. x = x * dw
  16. w = w * dw
  17. y = y * dh
  18. h = h * dh
  19. return (x, y, w, h)
  20. def convert_annotation(image_name):
  21. in_file = open("D:\\data set\\Pepper_3.0\\test\\xmllabels\\" + image_name[:-3] + 'xml') # 这里是需要改的第二个地方,输入xml文件的路径
  22. out_file = open("D:\\data set\\Pepper_3.0\\test\\labels\\" + image_name[:-3] + 'txt', 'w') # 这里是需要改的第三个地方,输入用于存放转换后的txt文件的路径
  23. f = open("D:\\data set\\Pepper_3.0\\test\\xmllabels\\"+ image_name[:-3] + 'xml')
  24. xml_text = f.read()
  25. root = ET.fromstring(xml_text)
  26. f.close()
  27. size = root.find('size')
  28. w = int(size.find('width').text)
  29. h = int(size.find('height').text)
  30. for obj in root.iter('object'):
  31. cls = obj.find('name').text
  32. if cls not in classes:
  33. print(cls)
  34. continue
  35. cls_id = classes.index(cls)
  36. xmlbox = obj.find('bndbox')
  37. b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
  38. float(xmlbox.find('ymax').text))
  39. bb = convert((w, h), b)
  40. out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
  41. wd = getcwd()
  42. if __name__ == '__main__':
  43. for image_path in glob.glob("D:\\data set\\Pepper_3.0\\test\\images\\*.jpg"): # 这里是需要改的最后一个地方,输入图片的路径,每一张图图片都要有一个xml文件对应,没有对应xml文件的图片要删除,当然,这里图片的命名要和对应xml文件的命名一致,之后就可以输出对应命名的txt文件
  44. image_name = image_path.split('\\')[-1]
  45. convert_annotation(image_name)
'
运行

2 txt格式转xml格式

  1. from xml.dom.minidom import Document
  2. import os
  3. import cv2
  4. def makexml(picPath, txtPath, xmlPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
  5. """此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件
  6. 在自己的标注图片文件夹下建三个子文件夹,分别命名为picture、txt、xml
  7. """
  8. # 创建字典用来对类型进行转换,要与classes.txt文件中的类对应,且顺序要一致
  9. dic = {'0': "Capsicum anthracnose", '1': "Viral diseases", '2': "Bacterial diseases", '3': "Umbilical rot"}
  10. files = os.listdir(txtPath)
  11. for i, name in enumerate(files):
  12. xmlBuilder = Document()
  13. annotation = xmlBuilder.createElement("annotation") # 创建annotation标签
  14. xmlBuilder.appendChild(annotation)
  15. txtFile = open(txtPath + name)
  16. txtList = txtFile.readlines()
  17. img = cv2.imread(picPath + name[0:-4] + ".jpg") # 注意这里的图片后缀,.jpg/.png
  18. Pheight, Pwidth, Pdepth = img.shape
  19. folder = xmlBuilder.createElement("folder") # folder标签
  20. foldercontent = xmlBuilder.createTextNode("datasetRGB")
  21. folder.appendChild(foldercontent)
  22. annotation.appendChild(folder)
  23. filename = xmlBuilder.createElement("filename") # filename标签
  24. filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
  25. filename.appendChild(filenamecontent)
  26. annotation.appendChild(filename)
  27. size = xmlBuilder.createElement("size") # size标签
  28. width = xmlBuilder.createElement("width") # size子标签width
  29. widthcontent = xmlBuilder.createTextNode(str(Pwidth))
  30. width.appendChild(widthcontent)
  31. size.appendChild(width)
  32. height = xmlBuilder.createElement("height") # size子标签height
  33. heightcontent = xmlBuilder.createTextNode(str(Pheight))
  34. height.appendChild(heightcontent)
  35. size.appendChild(height)
  36. depth = xmlBuilder.createElement("depth") # size子标签depth
  37. depthcontent = xmlBuilder.createTextNode(str(Pdepth))
  38. depth.appendChild(depthcontent)
  39. size.appendChild(depth)
  40. annotation.appendChild(size)
  41. for j in txtList:
  42. oneline = j.strip().split(" ")
  43. object = xmlBuilder.createElement("object") # object 标签
  44. picname = xmlBuilder.createElement("name") # name标签
  45. namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
  46. picname.appendChild(namecontent)
  47. object.appendChild(picname)
  48. pose = xmlBuilder.createElement("pose") # pose标签
  49. posecontent = xmlBuilder.createTextNode("Unspecified")
  50. pose.appendChild(posecontent)
  51. object.appendChild(pose)
  52. truncated = xmlBuilder.createElement("truncated") # truncated标签
  53. truncatedContent = xmlBuilder.createTextNode("0")
  54. truncated.appendChild(truncatedContent)
  55. object.appendChild(truncated)
  56. difficult = xmlBuilder.createElement("difficult") # difficult标签
  57. difficultcontent = xmlBuilder.createTextNode("0")
  58. difficult.appendChild(difficultcontent)
  59. object.appendChild(difficult)
  60. bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
  61. xmin = xmlBuilder.createElement("xmin") # xmin标签
  62. mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
  63. xminContent = xmlBuilder.createTextNode(str(mathData))
  64. xmin.appendChild(xminContent)
  65. bndbox.appendChild(xmin)
  66. ymin = xmlBuilder.createElement("ymin") # ymin标签
  67. mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
  68. yminContent = xmlBuilder.createTextNode(str(mathData))
  69. ymin.appendChild(yminContent)
  70. bndbox.appendChild(ymin)
  71. xmax = xmlBuilder.createElement("xmax") # xmax标签
  72. mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
  73. xmaxContent = xmlBuilder.createTextNode(str(mathData))
  74. xmax.appendChild(xmaxContent)
  75. bndbox.appendChild(xmax)
  76. ymax = xmlBuilder.createElement("ymax") # ymax标签
  77. mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
  78. ymaxContent = xmlBuilder.createTextNode(str(mathData))
  79. ymax.appendChild(ymaxContent)
  80. bndbox.appendChild(ymax)
  81. object.appendChild(bndbox) # bndbox标签结束
  82. annotation.appendChild(object)
  83. f = open(xmlPath + name[0:-4] + ".xml", 'w')
  84. xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
  85. f.close()
  86. if __name__ == "__main__":
  87. picPath = "D:\\data set\\d1_2.0\\d1_2.0\\" # 图片所在文件夹路径,后面的\\一定要带上
  88. txtPath = "D:\\data set\\d1_2.0\\d1_2.0txt\\" # txt所在文件夹路径,后面的\\一定要带上
  89. xmlPath = "D:\\data set\\d1_2.0\\d1_2.0xml\\" # xml文件保存路径,后面的\\一定要带上
  90. makexml(picPath, txtPath, xmlPath)

3 数据集划分训练集验证集及测试集

  1. import os
  2. import shutil
  3. import random
  4. random.seed(0)
  5. def split_data(file_path,xml_path, new_file_path, train_rate, val_rate, test_rate):
  6. each_class_image = []
  7. each_class_label = []
  8. for image in os.listdir(file_path):
  9. each_class_image.append(image)
  10. for label in os.listdir(xml_path):
  11. each_class_label.append(label)
  12. data=list(zip(each_class_image,each_class_label))
  13. total = len(each_class_image)
  14. random.shuffle(data)
  15. each_class_image,each_class_label=zip(*data)
  16. train_images = each_class_image[0:int(train_rate * total)]
  17. val_images = each_class_image[int(train_rate * total):int((train_rate + val_rate) * total)]
  18. test_images = each_class_image[int((train_rate + val_rate) * total):]
  19. train_labels = each_class_label[0:int(train_rate * total)]
  20. val_labels = each_class_label[int(train_rate * total):int((train_rate + val_rate) * total)]
  21. test_labels = each_class_label[int((train_rate + val_rate) * total):]
  22. for image in train_images:
  23. print(image)
  24. old_path = file_path + '/' + image
  25. new_path1 = new_file_path + '/' + 'train' + '/' + 'images'
  26. if not os.path.exists(new_path1):
  27. os.makedirs(new_path1)
  28. new_path = new_path1 + '/' + image
  29. shutil.copy(old_path, new_path)
  30. for label in train_labels:
  31. print(label)
  32. old_path = xml_path + '/' + label
  33. new_path1 = new_file_path + '/' + 'train' + '/' + 'labels'
  34. if not os.path.exists(new_path1):
  35. os.makedirs(new_path1)
  36. new_path = new_path1 + '/' + label
  37. shutil.copy(old_path, new_path)
  38. for image in val_images:
  39. old_path = file_path + '/' + image
  40. new_path1 = new_file_path + '/' + 'val' + '/' + 'images'
  41. if not os.path.exists(new_path1):
  42. os.makedirs(new_path1)
  43. new_path = new_path1 + '/' + image
  44. shutil.copy(old_path, new_path)
  45. for label in val_labels:
  46. old_path = xml_path + '/' + label
  47. new_path1 = new_file_path + '/' + 'val' + '/' + 'labels'
  48. if not os.path.exists(new_path1):
  49. os.makedirs(new_path1)
  50. new_path = new_path1 + '/' + label
  51. shutil.copy(old_path, new_path)
  52. for image in test_images:
  53. old_path = file_path + '/' + image
  54. new_path1 = new_file_path + '/' + 'test' + '/' + 'images'
  55. if not os.path.exists(new_path1):
  56. os.makedirs(new_path1)
  57. new_path = new_path1 + '/' + image
  58. shutil.copy(old_path, new_path)
  59. for label in test_labels:
  60. old_path = xml_path + '/' + label
  61. new_path1 = new_file_path + '/' + 'test' + '/' + 'labels'
  62. if not os.path.exists(new_path1):
  63. os.makedirs(new_path1)
  64. new_path = new_path1 + '/' + label
  65. shutil.copy(old_path, new_path)
  66. if __name__ == '__main__':
  67. file_path = "D:/data set/Pepper_data/image"
  68. xml_path = "D:/data set/Pepper_data/label"
  69. new_file_path = "D:/data set/Pepper_3.0"
  70. split_data(file_path, xml_path, new_file_path, train_rate=0.6, val_rate=0.2, test_rate=0.2)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/喵喵爱编程/article/detail/869521
推荐阅读
相关标签
  

闽ICP备14008679号