当前位置:   article > 正文

yolo数据集划分:对图片和txt类型标签数据集进行自定义比例划分_txt格式的图像标签如何进行数据集划分

txt格式的图像标签如何进行数据集划分

第一步:把自己的图片和标签目录放进去
 

  1. # 原始路径 分别写自己的图片和标签目录
  2. image_original_path = "D:\VOC_1186\VOC\\base1186_X5\\images\\"
  3. label_original_path = "D:\VOC_1186\VOC\\base1186_X5\\labels\\"

 第二步:把三种类别的输出的路径放进去,没有会自动新建

  1. # 训练集路径
  2. train_image_path = os.path.join(cur_path, "D:\datasets_yolo\images\\train\\")
  3. train_label_path = os.path.join(cur_path, "D:\datasets_yolo\labels\\train\\")
  4. # 验证集路径
  5. val_image_path = os.path.join(cur_path, "D:\datasets_yolo\images\\val\\")
  6. val_label_path = os.path.join(cur_path, "D:\datasets_yolo\labels\\val\\")
  7. # 测试集路径
  8. test_image_path = os.path.join(cur_path, "D:\datasets_yolo\images\\test\\")
  9. test_label_path = os.path.join(cur_path, "D:\datasets_yolo\labels\\test\\")
  10. # 训练集目录
  11. list_train = os.path.join(cur_path, "D:\datasets_yolo\\train.txt")
  12. list_val = os.path.join(cur_path, "D:\datasets_yolo\\val.txt")
  13. list_test = os.path.join(cur_path, "D:\datasets_yolo\\test.txt")

 第三步:自定义比例系数

  1. #自定义比例
  2. train_percent = 0.8
  3. val_percent = 0.1
  4. test_percent = 0.1

 代码支持jpg、png等图片格式和txt标签格式,代码示例为png自己换成自己的图片格式就行 

以下为源码修改完路径直接运行就可以了

  1. # 将图片和标注数据按比例切分为 训练集和测试集
  2. import shutil
  3. import random
  4. import os
  5. # 原始路径 分别写自己的图片和标签目录
  6. image_original_path = "D:\VOC_1186\VOC\\base1186_X5\\images\\"
  7. label_original_path = "D:\VOC_1186\VOC\\base1186_X5\\labels\\"
  8. cur_path = os.getcwd()
  9. # 训练集路径
  10. train_image_path = os.path.join(cur_path, "D:\datasets_yolo\images\\train\\")
  11. train_label_path = os.path.join(cur_path, "D:\datasets_yolo\labels\\train\\")
  12. # 验证集路径
  13. val_image_path = os.path.join(cur_path, "D:\datasets_yolo\images\\val\\")
  14. val_label_path = os.path.join(cur_path, "D:\datasets_yolo\labels\\val\\")
  15. # 测试集路径
  16. test_image_path = os.path.join(cur_path, "D:\datasets_yolo\images\\test\\")
  17. test_label_path = os.path.join(cur_path, "D:\datasets_yolo\labels\\test\\")
  18. # 训练集目录
  19. list_train = os.path.join(cur_path, "D:\datasets_yolo\\train.txt")
  20. list_val = os.path.join(cur_path, "D:\datasets_yolo\\val.txt")
  21. list_test = os.path.join(cur_path, "D:\datasets_yolo\\test.txt")
  22. #自定义比例
  23. train_percent = 0.8
  24. val_percent = 0.1
  25. test_percent = 0.1
  26. def del_file(path):
  27. for i in os.listdir(path):
  28. file_data = path + "\\" + i
  29. os.remove(file_data)
  30. def mkdir():
  31. if not os.path.exists(train_image_path):
  32. os.makedirs(train_image_path)
  33. else:
  34. del_file(train_image_path)
  35. if not os.path.exists(train_label_path):
  36. os.makedirs(train_label_path)
  37. else:
  38. del_file(train_label_path)
  39. if not os.path.exists(val_image_path):
  40. os.makedirs(val_image_path)
  41. else:
  42. del_file(val_image_path)
  43. if not os.path.exists(val_label_path):
  44. os.makedirs(val_label_path)
  45. else:
  46. del_file(val_label_path)
  47. if not os.path.exists(test_image_path):
  48. os.makedirs(test_image_path)
  49. else:
  50. del_file(test_image_path)
  51. if not os.path.exists(test_label_path):
  52. os.makedirs(test_label_path)
  53. else:
  54. del_file(test_label_path)
  55. def clearfile():
  56. if os.path.exists(list_train):
  57. os.remove(list_train)
  58. if os.path.exists(list_val):
  59. os.remove(list_val)
  60. if os.path.exists(list_test):
  61. os.remove(list_test)
  62. def main():
  63. mkdir()
  64. clearfile()
  65. file_train = open(list_train, 'w')
  66. file_val = open(list_val, 'w')
  67. file_test = open(list_test, 'w')
  68. total_txt = os.listdir(label_original_path)
  69. num_txt = len(total_txt)
  70. list_all_txt = range(num_txt)
  71. num_train = int(num_txt * train_percent)
  72. num_val = int(num_txt * val_percent)
  73. num_test = num_txt - num_train - num_val
  74. train = random.sample(list_all_txt, num_train)
  75. # train从list_all_txt取出num_train个元素
  76. # 所以list_all_txt列表只剩下了这些元素
  77. val_test = [i for i in list_all_txt if not i in train]
  78. # 再从val_test取出num_val个元素,val_test剩下的元素就是test
  79. val = random.sample(val_test, num_val)
  80. print("训练集数目:{}, 验证集数目:{}, 测试集数目:{}".format(len(train), len(val), len(val_test) - len(val)))
  81. for i in list_all_txt:
  82. name = total_txt[i][:-4]
  83. srcImage = image_original_path + name + '.png'
  84. srcLabel = label_original_path + name + ".txt"
  85. if i in train:
  86. dst_train_Image = train_image_path + name + '.png'
  87. dst_train_Label = train_label_path + name + '.txt'
  88. shutil.copyfile(srcImage, dst_train_Image)
  89. shutil.copyfile(srcLabel, dst_train_Label)
  90. file_train.write(dst_train_Image + '\n')
  91. elif i in val:
  92. dst_val_Image = val_image_path + name + '.png'
  93. dst_val_Label = val_label_path + name + '.txt'
  94. shutil.copyfile(srcImage, dst_val_Image)
  95. shutil.copyfile(srcLabel, dst_val_Label)
  96. file_val.write(dst_val_Image + '\n')
  97. else:
  98. dst_test_Image = test_image_path + name + '.png'
  99. dst_test_Label = test_label_path + name + '.txt'
  100. shutil.copyfile(srcImage, dst_test_Image)
  101. shutil.copyfile(srcLabel, dst_test_Label)
  102. file_test.write(dst_test_Image + '\n')
  103. file_train.close()
  104. file_val.close()
  105. file_test.close()
  106. if __name__ == "__main__":
  107. main()

声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号