赞
踩
点击解压后文件,只需要用到其中的 Test 和 Train 文件。两个文件的目录相同:
Alllmages为格式为bmp的原图;Annotations为格式为xml的标签。
hrsc2dota.py。运行后可得到dota_labels,通过后面的data_drawed.py可以查看labels是否正确。注意需要自行先创建dota_labels文件夹。此代码需运行两次,Train、Test各一次,dota_labels文件夹也需要创建两次。
- import xml.etree.ElementTree as ET
- import os
- import math
- import cv2
- import numpy as np
- def get_label(xml_path):
- in_file = open(xml_path)
- tree=ET.parse(in_file)
- root = tree.getroot()
- labels = []
- for obj in root.iter('HRSC_Object'):
- difficult = obj.find('difficult').text
- class_id = int(obj.find('Class_ID').text) % 100
- # class_id = 0 # 标签对应关系自行修改
- # if int(difficult) == 1:
- # continue
- mbox_cx, mbox_cy, mbox_w, mbox_h, mbox_ang = (
- float(obj.find('mbox_cx').text),
- float(obj.find('mbox_cy').text),
- float(obj.find('mbox_w').text),
- float(obj.find('mbox_h').text),
- float(obj.find('mbox_ang').text)
- )
- labels.append([class_id,mbox_cx, mbox_cy, mbox_w, mbox_h,mbox_ang])
- return labels
- # 计算旋转框四个顶点的坐标
- def get_rotated_box_vertices(labels,label_path):
- with open(label_path,'w') as f:
- for i in range(len(labels)):
- class_id,mbox_cx, mbox_cy, mbox_w, mbox_h,angle_rad= labels[i]
- rotation_matrix = np.array([[np.cos(angle_rad), -np.sin(angle_rad)],
- [np.sin(angle_rad), np.cos(angle_rad)]])
- box_half_width = mbox_w / 2
- box_half_height = mbox_h / 2
- box_vertices = np.array([[-box_half_width, -box_half_height],
- [box_half_width, -box_half_height],
- [box_half_width, box_half_height],
- [-box_half_width, box_half_height]])
- rotated_vertices = np.dot(box_vertices, rotation_matrix.T)
- rotated_vertices[:, 0] += mbox_cx
- rotated_vertices[:, 1] += mbox_cy
- rotated_vertices = np.round(rotated_vertices).astype(np.int32)
- # print(rotated_vertices)
- # f.write(" ".join([str(a) for a in rotated_vertices]) + '\n')
- rotated_vertices = rotated_vertices.reshape(-1)
- f.write(" ".join([str(a) for a in rotated_vertices]) + " " + str(class_id) + '\n')
-
-
- # return rotated_vertices_list
-
- xml_root = r"HRSC2016\Test\Annotations"
- txt_root = r"HRSC2016\Test\DOTA_labels"
-
- xml_name = os.listdir(xml_root)
- # print(len(xml_name))
- for i in range(len(xml_name)):
- xml_path = os.path.join(xml_root,xml_name[i])
- txt_path = os.path.join(txt_root,xml_name[i].split('.')[0]+'.txt')
- get_rotated_box_vertices(get_label(xml_path),txt_path)
dota_drawed.py。运行后可得到旋转框的图片,同样需要先创建dota_labels_drawed文件夹,运行程序后能在文件夹内得到有旋转框的bmp格式图片。
- import xml.etree.ElementTree as ET
- import os
- import math
- import cv2
- import numpy as np
- import dota_utils as util
- import random
-
- # 手动输入cx cy w h angle进行绘制
- # from HRSC_to_DOTA import get_rotated_box_vertices
- # cx = 569.5045
- # cy = 263.4875
- # w = 261.0578
- # h = 65.08137
- # angle = -1.562451
- # vertices = get_rotated_box_vertices(cx, cy, w, h, angle)
- # vertices = np.array(vertices,dtype=np.int32)
- # img = cv2.imread(r'AllImages\100000640.bmp')
- # cv2.polylines(img,[vertices], isClosed=True, color=(255, 0, 0), thickness=2)
- # cv2.imshow('test',img)
- # cv2.waitKey(0)
- # cv2.destroyAllWindows()
-
- img_root = r"HRSC2016\Train\AllImages"
- label_root = r"HRSC2016\Train\DOTA_labels"
- drawed_img_root = r"HRSC2016\Train\DOTA_labels_drawed"
- image_name = os.listdir(img_root)
- for i in range(len(image_name)):
- img_path = os.path.join(img_root,image_name[i])
- label_path = os.path.join(label_root,image_name[i].split('.')[0]+'.txt')
- drawed_img_path = os.path.join(drawed_img_root,image_name[i])
- objects = util.parse_dota_poly(label_path)
- print(objects)
- img = cv2.imread(img_path)
- poly = []
- for i in range(len(objects)):
- poly.append(np.array(objects[i]['poly'],dtype=np.int32))
- print(poly)
- cv2.polylines(img,poly, isClosed=True, color=(255, 0, 0), thickness=2)
- cv2.imwrite(drawed_img_path,img)
dota2yolo.py。将dota格式转成yolo格式,形式上把类别放在最前面了,然后做了一个normalization。这个文件里的众多函数中就用到了get_normalization_hrsc()。原作者的文件这一部分我没有看懂,为什么要+14?将其注释掉。
- import xml.etree.ElementTree as ET
- import os
- import math
- import cv2
- import dota_utils
- """
- get_normalization_dota:DOTA转YOLO v8格式,具体格式参照官网:https://docs.ultralytics.com/zh/datasets/obb/
- get_normalization_hrscHRSC:转换后的DOTA转YOLO v8格式,先配合HRSC_to_DOTA使用
- """
- def get_hrsc_wh(xml_path):
- in_file = open(xml_path)
- tree=ET.parse(in_file)
- root = tree.getroot()
- image_width = int(root.find('Img_SizeWidth').text)
- image_height = int(root.find('Img_SizeHeight').text)
- return image_width,image_height
-
- def get_dota_wh(img_path):
- img = cv2.imread(img_path)
- image_height, image_width, channels = img.shape
- return image_width,image_height
-
- def get_normalization_hrsc(image_width,image_height,dota_label_path,yolo_label_path):
- with open(dota_label_path,'r') as f:
- lines = f.readlines()
- #print(lines)
- normalized_data = []
- aircraft_carrier = [2,5,6,12,13,31,32,33]
- warcraft = [3,7,8,9,10,11,14,15,16,17,19,28,29]
- merchant_ship = [4,18,20,22,24,25,26,30]
- submarine = [27]
- #aircraft_carrier = [x + 14 for x in aircraft_carrier]
- #print(aircraft_carrier)
- #warcraft = [x + 14 for x in warcraft]
- #merchant_ship = [x + 14 for x in merchant_ship]
- #submarine = [x + 14 for x in submarine]
-
- for line in lines:
- data = line.strip().split()
- x1, y1, x2, y2, x3, y3, x4, y4, class_label = map(int, data)
- if class_label in aircraft_carrier:
- class_label = 1
- elif class_label in warcraft:
- class_label = 2
- elif class_label in merchant_ship:
- class_label = 3
- elif class_label in submarine:
- class_label = 4
- else:
- continue
- x1_normalized = x1 / image_width
- y1_normalized = y1 / image_height
- x2_normalized = x2 / image_width
- y2_normalized = y2 / image_height
- x3_normalized = x3 / image_width
- y3_normalized = y3 / image_height
- x4_normalized = x4 / image_width
- y4_normalized = y4 / image_height
- normalized_line = "{} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f}\n".format(
- class_label,x1_normalized, y1_normalized, x2_normalized, y2_normalized,
- x3_normalized, y3_normalized, x4_normalized, y4_normalized,
-
- )
- #print(normalized_line)
- normalized_data.append(normalized_line)
- with open(yolo_label_path,'w') as f:
- f.writelines(normalized_data)
-
- def get_normalization_dota(image_width,image_height,dota_label_path,yolo_label_path):
- with open(dota_label_path,'r') as f:
- lines = f.readlines()
- normalized_data = []
- for line in lines[2:]:
- data = line.strip().split()
- if data[-2] in dota_utils.wordname_14_noship and data[-2] != 'ship':
- data[-2] = dota_utils.wordname_14_noship.index(data[-2])
- elif data[-2] == 'ship':
- continue
- else:
- print("发生重大错误,格式\标注不正确")
- print(data[-2])
- break
- x1, y1, x2, y2, x3, y3, x4, y4, class_label,difficult = map(int, data)
- x1_normalized = x1 / image_width
- y1_normalized = y1 / image_height
- x2_normalized = x2 / image_width
- y2_normalized = y2 / image_height
- x3_normalized = x3 / image_width
- y3_normalized = y3 / image_height
- x4_normalized = x4 / image_width
- y4_normalized = y4 / image_height
- normalized_line = "{:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {}\n".format(
- class_label,x1_normalized, y1_normalized, x2_normalized, y2_normalized,
- x3_normalized, y3_normalized, x4_normalized, y4_normalized,
- )
- normalized_data.append(normalized_line)
- with open(yolo_label_path,'w') as f:
- f.writelines(normalized_data)
-
-
- if __name__ == '__main__':
- """
- HRSC
- """
- hrsc_root = r"HRSC2016\Train\Annotations"
- dota_root = r"HRSC2016\Train\DOTA_labels"
- yolo_root = r"HRSC2016\Train\YOLO_labels"
- dota_label_names = os.listdir(dota_root)
- for i in range(len(dota_label_names)):
- dota_label_name = dota_label_names[i]
- hrsc_label_path = os.path.join(hrsc_root,dota_label_name.split('.')[0]+'.xml')
- dota_label_path = os.path.join(dota_root,dota_label_name)
- yolo_root_path = os.path.join(yolo_root,dota_label_name.split('.')[0]+'.txt')
- image_width,image_height = get_hrsc_wh(hrsc_label_path)
- get_normalization_hrsc(image_width,image_height,dota_label_path,yolo_root_path)
-
- # if __name__ == "__main__":
- # """
- # DOTA
- # """
- # dota_root = r"labelTxt-v1.0\labelTxt"
- # yolo_root = r"labelTxt-v1.0\YOLO_labels"
- # img_root = r"images\images"
- # dota_label_names = os.listdir(dota_root)
- # for i in range(len(dota_label_names)):
- # dota_label_name = dota_label_names[i]
- # img_path = os.path.join(img_root,dota_label_name.split('.')[0]+'.png')
- # dota_label_path = os.path.join(dota_root,dota_label_name)
- # yolo_root_path = os.path.join(yolo_root,dota_label_name.split('.')[0]+'.txt')
- # image_width,image_height = get_dota_wh(img_path)
- # get_normalization_dota(image_width,image_height,dota_label_path,yolo_root_path)
-
-
-
-
-
-
同样的需要自己先建一个Yolo_labels的文件夹。生成的txt文件长这样,第一个数表示类别,后面表示坐标。注意在这个文件中将类别进行了修改,删除了ship这个大类,将所有小类合并成了四个类。
yolo_drawed.py文件可以可视化yolo的标签结果。点击打开文件,第一次选择原图片文件夹AllImages,第二次选择yolo标签文件夹。
新建mydataset文件夹,把刚刚分别得到的 Train 和 Test 放进该文件夹。然后将split.py文件放在 Train 文件夹下,用来将其中的数据集划分成训练集和验证集。
split.py
- # 将标签格式为xml的数据集按照8:2的比例划分为训练集和验证集
-
- import os
- import shutil
- import random
- from tqdm import tqdm
-
-
- def split_img(img_path, label_path, split_list):
- try: # 创建数据集文件夹
- Data = 'D:\Mydataset\DataSetparts'
- os.mkdir(Data)
-
- train_img_dir = Data + '/images/train'
- val_img_dir = Data + '/images/val'
- # test_img_dir = Data + '/images/test'
-
- train_label_dir = Data + '/labels/train'
- val_label_dir = Data + '/labels/val'
- # test_label_dir = Data + '/labels/test'
-
- # 创建文件夹
- os.makedirs(train_img_dir)
- os.makedirs(train_label_dir)
- os.makedirs(val_img_dir)
- os.makedirs(val_label_dir)
- # os.makedirs(test_img_dir)
- # os.makedirs(test_label_dir)
-
- except:
- print('文件目录已存在')
-
- train, val = split_list
- all_img = os.listdir(img_path)
- all_img_path = [os.path.join(img_path, img) for img in all_img]
- # all_label = os.listdir(label_path)
- # all_label_path = [os.path.join(label_path, label) for label in all_label]
- train_img = random.sample(all_img_path, int(train * len(all_img_path)))
- train_img_copy = [os.path.join(train_img_dir, img.split('\\')[-1]) for img in train_img]
- train_label = [toLabelPath(img, label_path) for img in train_img]
- train_label_copy = [os.path.join(train_label_dir, label.split('\\')[-1]) for label in train_label]
- for i in tqdm(range(len(train_img)), desc='train ', ncols=80, unit='img'):
- _copy(train_img[i], train_img_dir)
- _copy(train_label[i], train_label_dir)
- all_img_path.remove(train_img[i])
- val_img = all_img_path
- val_label = [toLabelPath(img, label_path) for img in val_img]
- for i in tqdm(range(len(val_img)), desc='val ', ncols=80, unit='img'):
- _copy(val_img[i], val_img_dir)
- _copy(val_label[i], val_label_dir)
-
-
- def _copy(from_path, to_path):
- shutil.copy(from_path, to_path)
-
-
- def toLabelPath(img_path, label_path):
- img = img_path.split('\\')[-1]
- label = img.split('.bmp')[0] + '.xml'
- return os.path.join(label_path, label)
-
-
- def main():
- img_path = "D:\Mydataset\AllImages"
- label_path = "D:\Mydataset\Yolo_labels"
- split_list = [0.8, 0.2] # 数据集划分比例[train:val]
- split_img(img_path, label_path, split_list)
-
-
- if __name__ == '__main__':
- main()
注意这里的dataset文件如果生成过了,运行时会报错,运行前要保证该目录下没有这个名字的文件。
此时在 Train 文件夹下的 dataset 文件夹中就会有划分好的数据集,按照yolo数据集文件整理我们已经得到的文件。
images中按照 test 、train 、val 分类放原AllImages文件夹中图片,同理labels放原Yolo_labels文件夹中图片。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。