当前位置:   article > 正文

HRSC2016_dataset 旋转框 + 划分数据集_hrsc2016数据集

hrsc2016数据集

一、下载HRSC2016_dataset数据集,只需解压part01即可。

点击解压后文件,只需要用到其中的 Test 和 Train 文件。两个文件的目录相同:

Alllmages为格式为bmp的原图;Annotations为格式为xml的标签。

二、旋转框代码

hrsc2dota.py。运行后可得到dota_labels,通过后面的data_drawed.py可以查看labels是否正确。注意需要自行先创建dota_labels文件夹。此代码需运行两次,Train、Test各一次,dota_labels文件夹也需要创建两次。

  1. import xml.etree.ElementTree as ET
  2. import os
  3. import math
  4. import cv2
  5. import numpy as np
  6. def get_label(xml_path):
  7. in_file = open(xml_path)
  8. tree=ET.parse(in_file)
  9. root = tree.getroot()
  10. labels = []
  11. for obj in root.iter('HRSC_Object'):
  12. difficult = obj.find('difficult').text
  13. class_id = int(obj.find('Class_ID').text) % 100
  14. # class_id = 0 # 标签对应关系自行修改
  15. # if int(difficult) == 1:
  16. # continue
  17. mbox_cx, mbox_cy, mbox_w, mbox_h, mbox_ang = (
  18. float(obj.find('mbox_cx').text),
  19. float(obj.find('mbox_cy').text),
  20. float(obj.find('mbox_w').text),
  21. float(obj.find('mbox_h').text),
  22. float(obj.find('mbox_ang').text)
  23. )
  24. labels.append([class_id,mbox_cx, mbox_cy, mbox_w, mbox_h,mbox_ang])
  25. return labels
  26. # 计算旋转框四个顶点的坐标
  27. def get_rotated_box_vertices(labels,label_path):
  28. with open(label_path,'w') as f:
  29. for i in range(len(labels)):
  30. class_id,mbox_cx, mbox_cy, mbox_w, mbox_h,angle_rad= labels[i]
  31. rotation_matrix = np.array([[np.cos(angle_rad), -np.sin(angle_rad)],
  32. [np.sin(angle_rad), np.cos(angle_rad)]])
  33. box_half_width = mbox_w / 2
  34. box_half_height = mbox_h / 2
  35. box_vertices = np.array([[-box_half_width, -box_half_height],
  36. [box_half_width, -box_half_height],
  37. [box_half_width, box_half_height],
  38. [-box_half_width, box_half_height]])
  39. rotated_vertices = np.dot(box_vertices, rotation_matrix.T)
  40. rotated_vertices[:, 0] += mbox_cx
  41. rotated_vertices[:, 1] += mbox_cy
  42. rotated_vertices = np.round(rotated_vertices).astype(np.int32)
  43. # print(rotated_vertices)
  44. # f.write(" ".join([str(a) for a in rotated_vertices]) + '\n')
  45. rotated_vertices = rotated_vertices.reshape(-1)
  46. f.write(" ".join([str(a) for a in rotated_vertices]) + " " + str(class_id) + '\n')
  47. # return rotated_vertices_list
  48. xml_root = r"HRSC2016\Test\Annotations"
  49. txt_root = r"HRSC2016\Test\DOTA_labels"
  50. xml_name = os.listdir(xml_root)
  51. # print(len(xml_name))
  52. for i in range(len(xml_name)):
  53. xml_path = os.path.join(xml_root,xml_name[i])
  54. txt_path = os.path.join(txt_root,xml_name[i].split('.')[0]+'.txt')
  55. get_rotated_box_vertices(get_label(xml_path),txt_path)

dota_drawed.py。运行后可得到旋转框的图片,同样需要先创建dota_labels_drawed文件夹,运行程序后能在文件夹内得到有旋转框的bmp格式图片。

  1. import xml.etree.ElementTree as ET
  2. import os
  3. import math
  4. import cv2
  5. import numpy as np
  6. import dota_utils as util
  7. import random
  8. # 手动输入cx cy w h angle进行绘制
  9. # from HRSC_to_DOTA import get_rotated_box_vertices
  10. # cx = 569.5045
  11. # cy = 263.4875
  12. # w = 261.0578
  13. # h = 65.08137
  14. # angle = -1.562451
  15. # vertices = get_rotated_box_vertices(cx, cy, w, h, angle)
  16. # vertices = np.array(vertices,dtype=np.int32)
  17. # img = cv2.imread(r'AllImages\100000640.bmp')
  18. # cv2.polylines(img,[vertices], isClosed=True, color=(255, 0, 0), thickness=2)
  19. # cv2.imshow('test',img)
  20. # cv2.waitKey(0)
  21. # cv2.destroyAllWindows()
  22. img_root = r"HRSC2016\Train\AllImages"
  23. label_root = r"HRSC2016\Train\DOTA_labels"
  24. drawed_img_root = r"HRSC2016\Train\DOTA_labels_drawed"
  25. image_name = os.listdir(img_root)
  26. for i in range(len(image_name)):
  27. img_path = os.path.join(img_root,image_name[i])
  28. label_path = os.path.join(label_root,image_name[i].split('.')[0]+'.txt')
  29. drawed_img_path = os.path.join(drawed_img_root,image_name[i])
  30. objects = util.parse_dota_poly(label_path)
  31. print(objects)
  32. img = cv2.imread(img_path)
  33. poly = []
  34. for i in range(len(objects)):
  35. poly.append(np.array(objects[i]['poly'],dtype=np.int32))
  36. print(poly)
  37. cv2.polylines(img,poly, isClosed=True, color=(255, 0, 0), thickness=2)
  38. cv2.imwrite(drawed_img_path,img)

dota2yolo.py。将dota格式转成yolo格式,形式上把类别放在最前面了,然后做了一个normalization。这个文件里的众多函数中就用到了get_normalization_hrsc()。原作者的文件这一部分我没有看懂,为什么要+14?将其注释掉。

  1. import xml.etree.ElementTree as ET
  2. import os
  3. import math
  4. import cv2
  5. import dota_utils
  6. """
  7. get_normalization_dota:DOTA转YOLO v8格式,具体格式参照官网:https://docs.ultralytics.com/zh/datasets/obb/
  8. get_normalization_hrscHRSC:转换后的DOTA转YOLO v8格式,先配合HRSC_to_DOTA使用
  9. """
  10. def get_hrsc_wh(xml_path):
  11. in_file = open(xml_path)
  12. tree=ET.parse(in_file)
  13. root = tree.getroot()
  14. image_width = int(root.find('Img_SizeWidth').text)
  15. image_height = int(root.find('Img_SizeHeight').text)
  16. return image_width,image_height
  17. def get_dota_wh(img_path):
  18. img = cv2.imread(img_path)
  19. image_height, image_width, channels = img.shape
  20. return image_width,image_height
  21. def get_normalization_hrsc(image_width,image_height,dota_label_path,yolo_label_path):
  22. with open(dota_label_path,'r') as f:
  23. lines = f.readlines()
  24. #print(lines)
  25. normalized_data = []
  26. aircraft_carrier = [2,5,6,12,13,31,32,33]
  27. warcraft = [3,7,8,9,10,11,14,15,16,17,19,28,29]
  28. merchant_ship = [4,18,20,22,24,25,26,30]
  29. submarine = [27]
  30. #aircraft_carrier = [x + 14 for x in aircraft_carrier]
  31. #print(aircraft_carrier)
  32. #warcraft = [x + 14 for x in warcraft]
  33. #merchant_ship = [x + 14 for x in merchant_ship]
  34. #submarine = [x + 14 for x in submarine]
  35. for line in lines:
  36. data = line.strip().split()
  37. x1, y1, x2, y2, x3, y3, x4, y4, class_label = map(int, data)
  38. if class_label in aircraft_carrier:
  39. class_label = 1
  40. elif class_label in warcraft:
  41. class_label = 2
  42. elif class_label in merchant_ship:
  43. class_label = 3
  44. elif class_label in submarine:
  45. class_label = 4
  46. else:
  47. continue
  48. x1_normalized = x1 / image_width
  49. y1_normalized = y1 / image_height
  50. x2_normalized = x2 / image_width
  51. y2_normalized = y2 / image_height
  52. x3_normalized = x3 / image_width
  53. y3_normalized = y3 / image_height
  54. x4_normalized = x4 / image_width
  55. y4_normalized = y4 / image_height
  56. normalized_line = "{} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f}\n".format(
  57. class_label,x1_normalized, y1_normalized, x2_normalized, y2_normalized,
  58. x3_normalized, y3_normalized, x4_normalized, y4_normalized,
  59. )
  60. #print(normalized_line)
  61. normalized_data.append(normalized_line)
  62. with open(yolo_label_path,'w') as f:
  63. f.writelines(normalized_data)
  64. def get_normalization_dota(image_width,image_height,dota_label_path,yolo_label_path):
  65. with open(dota_label_path,'r') as f:
  66. lines = f.readlines()
  67. normalized_data = []
  68. for line in lines[2:]:
  69. data = line.strip().split()
  70. if data[-2] in dota_utils.wordname_14_noship and data[-2] != 'ship':
  71. data[-2] = dota_utils.wordname_14_noship.index(data[-2])
  72. elif data[-2] == 'ship':
  73. continue
  74. else:
  75. print("发生重大错误,格式\标注不正确")
  76. print(data[-2])
  77. break
  78. x1, y1, x2, y2, x3, y3, x4, y4, class_label,difficult = map(int, data)
  79. x1_normalized = x1 / image_width
  80. y1_normalized = y1 / image_height
  81. x2_normalized = x2 / image_width
  82. y2_normalized = y2 / image_height
  83. x3_normalized = x3 / image_width
  84. y3_normalized = y3 / image_height
  85. x4_normalized = x4 / image_width
  86. y4_normalized = y4 / image_height
  87. normalized_line = "{:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {}\n".format(
  88. class_label,x1_normalized, y1_normalized, x2_normalized, y2_normalized,
  89. x3_normalized, y3_normalized, x4_normalized, y4_normalized,
  90. )
  91. normalized_data.append(normalized_line)
  92. with open(yolo_label_path,'w') as f:
  93. f.writelines(normalized_data)
  94. if __name__ == '__main__':
  95. """
  96. HRSC
  97. """
  98. hrsc_root = r"HRSC2016\Train\Annotations"
  99. dota_root = r"HRSC2016\Train\DOTA_labels"
  100. yolo_root = r"HRSC2016\Train\YOLO_labels"
  101. dota_label_names = os.listdir(dota_root)
  102. for i in range(len(dota_label_names)):
  103. dota_label_name = dota_label_names[i]
  104. hrsc_label_path = os.path.join(hrsc_root,dota_label_name.split('.')[0]+'.xml')
  105. dota_label_path = os.path.join(dota_root,dota_label_name)
  106. yolo_root_path = os.path.join(yolo_root,dota_label_name.split('.')[0]+'.txt')
  107. image_width,image_height = get_hrsc_wh(hrsc_label_path)
  108. get_normalization_hrsc(image_width,image_height,dota_label_path,yolo_root_path)
  109. # if __name__ == "__main__":
  110. # """
  111. # DOTA
  112. # """
  113. # dota_root = r"labelTxt-v1.0\labelTxt"
  114. # yolo_root = r"labelTxt-v1.0\YOLO_labels"
  115. # img_root = r"images\images"
  116. # dota_label_names = os.listdir(dota_root)
  117. # for i in range(len(dota_label_names)):
  118. # dota_label_name = dota_label_names[i]
  119. # img_path = os.path.join(img_root,dota_label_name.split('.')[0]+'.png')
  120. # dota_label_path = os.path.join(dota_root,dota_label_name)
  121. # yolo_root_path = os.path.join(yolo_root,dota_label_name.split('.')[0]+'.txt')
  122. # image_width,image_height = get_dota_wh(img_path)
  123. # get_normalization_dota(image_width,image_height,dota_label_path,yolo_root_path)

同样的需要自己先建一个Yolo_labels的文件夹。生成的txt文件长这样,第一个数表示类别,后面表示坐标。注意在这个文件中将类别进行了修改,删除了ship这个大类,将所有小类合并成了四个类。

yolo_drawed.py文件可以可视化yolo的标签结果。点击打开文件,第一次选择原图片文件夹AllImages,第二次选择yolo标签文件夹。

原文链接:HRSC数据集解读,DOTA数据集转YOLO旋转框格式,HRSC数据集转YOLO旋转框格式_hrsc2016数据集下载-CSDN博客文章浏览阅读1.3k次,点赞24次,收藏36次。HRSC数据集解读,DOTA数据集转YOLO旋转框格式,HRSC数据集转YOLO旋转框格式_hrsc2016数据集下载https://blog.csdn.net/weixin_52450371/article/details/135687628

三、划分数据集

新建mydataset文件夹,把刚刚分别得到的 Train 和 Test 放进该文件夹。然后将split.py文件放在 Train 文件夹下,用来将其中的数据集划分成训练集和验证集。

split.py

  1. # 将标签格式为xml的数据集按照8:2的比例划分为训练集和验证集
  2. import os
  3. import shutil
  4. import random
  5. from tqdm import tqdm
  6. def split_img(img_path, label_path, split_list):
  7. try: # 创建数据集文件夹
  8. Data = 'D:\Mydataset\DataSetparts'
  9. os.mkdir(Data)
  10. train_img_dir = Data + '/images/train'
  11. val_img_dir = Data + '/images/val'
  12. # test_img_dir = Data + '/images/test'
  13. train_label_dir = Data + '/labels/train'
  14. val_label_dir = Data + '/labels/val'
  15. # test_label_dir = Data + '/labels/test'
  16. # 创建文件夹
  17. os.makedirs(train_img_dir)
  18. os.makedirs(train_label_dir)
  19. os.makedirs(val_img_dir)
  20. os.makedirs(val_label_dir)
  21. # os.makedirs(test_img_dir)
  22. # os.makedirs(test_label_dir)
  23. except:
  24. print('文件目录已存在')
  25. train, val = split_list
  26. all_img = os.listdir(img_path)
  27. all_img_path = [os.path.join(img_path, img) for img in all_img]
  28. # all_label = os.listdir(label_path)
  29. # all_label_path = [os.path.join(label_path, label) for label in all_label]
  30. train_img = random.sample(all_img_path, int(train * len(all_img_path)))
  31. train_img_copy = [os.path.join(train_img_dir, img.split('\\')[-1]) for img in train_img]
  32. train_label = [toLabelPath(img, label_path) for img in train_img]
  33. train_label_copy = [os.path.join(train_label_dir, label.split('\\')[-1]) for label in train_label]
  34. for i in tqdm(range(len(train_img)), desc='train ', ncols=80, unit='img'):
  35. _copy(train_img[i], train_img_dir)
  36. _copy(train_label[i], train_label_dir)
  37. all_img_path.remove(train_img[i])
  38. val_img = all_img_path
  39. val_label = [toLabelPath(img, label_path) for img in val_img]
  40. for i in tqdm(range(len(val_img)), desc='val ', ncols=80, unit='img'):
  41. _copy(val_img[i], val_img_dir)
  42. _copy(val_label[i], val_label_dir)
  43. def _copy(from_path, to_path):
  44. shutil.copy(from_path, to_path)
  45. def toLabelPath(img_path, label_path):
  46. img = img_path.split('\\')[-1]
  47. label = img.split('.bmp')[0] + '.xml'
  48. return os.path.join(label_path, label)
  49. def main():
  50. img_path = "D:\Mydataset\AllImages"
  51. label_path = "D:\Mydataset\Yolo_labels"
  52. split_list = [0.8, 0.2] # 数据集划分比例[train:val]
  53. split_img(img_path, label_path, split_list)
  54. if __name__ == '__main__':
  55. main()

 注意这里的dataset文件如果生成过了,运行时会报错,运行前要保证该目录下没有这个名字的文件。

此时在 Train 文件夹下的 dataset 文件夹中就会有划分好的数据集,按照yolo数据集文件整理我们已经得到的文件。

images中按照 test 、train 、val 分类放原AllImages文件夹中图片,同理labels放原Yolo_labels文件夹中图片。

声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop】
推荐阅读
相关标签
  

闽ICP备14008679号