赞
踩
目录
3. VOC格式转YOLO格式(xml_to_yolo.py)
4.2 将所有训练集、测试集数据名称(去掉后缀)分别写入到train.txt、test.txt
- 先标注好自己的数据集
- 具体可见——目标检测——0.3 标注数据
- 准备VOCdevkit文件夹及子文件夹
- 各个子文件夹——见下1
- VOCdevkit-VOC2012-Annotations——存放标注后的xml文件
- VOCdevkit-VOC2012-JEPGImages——存放标注后的图像文件
- 准备data文件夹及子文件
- data-pascal_voc_classes.json——
json文件中的内容为想要识别的各个种类
具体文件夹目录见下1、具体内容见下2
├── data │ ├── pascal_voc_classes.json # 用以保存识别类别信息 │ │ ├── VOCdevkit │ ├── VOC2012 │ │ ├── Annotations # 用以存放所有xml标签文件 │ │ ├── JPEGImages # 用以存放所有标注好的图片 │ │ ├── ImageSets │ │ │ ├── Main # 用以存放所有txt文件(注意此时该文件夹是空的)
{ "dec_1_pass": 1, "dec_1_fail": 2, "dec_2_pass": 3, "dec_2_fail": 4 }
- 根据2.1准备好的数据集,制作train.txt与val.txt
- 生成的train.txt与val.txt和当前python文件在同一目录下,之后要挪到2.1中VOCdevkit-VOC2012-ImageSets-Main
- 要设置的参数有两个
- files_path——指定xml标签文件路径
- val_rate——指定验证集占数据集的比例
- """
- 1.针对xml文件(即pascal vocannotations里的文件)运行后生成train.txt与val.txt
- 2.生成的train.txt与val.txt存放当前目录下,之后要转存到指定文件夹(VOCdevkit-VOC2012-ImageSets-Main)
- 3.使用设置——
- 该文件要设定的地方有两处
- (1)files_path——指定xml文件路径;如 files_path = "./VOCdevkit/VOC2012/Annotations"
- (2)val_rate——指定验证集比例,如 val_rate = 0.3 为30%为验证集
- """
- import os
- import random
-
- def main():
- random.seed(0) # 1.设置随机种子,保证随机结果可复现
- files_path = "./VOCdevkit/VOC2012/Annotations" # 2.给定文件路径
- assert os.path.exists(files_path), "path: '{}' does not exist.".format(files_path)
- val_rate = 0.3 # 3.给定验证集比例,为30%
- files_name = sorted([file.split(".")[0] for file in os.listdir(files_path)]) # 通过os.listdir()遍历files_path,遍历后得到所有文件的名称
- files_num = len(files_name)
- val_index = random.sample(range(0, files_num), k=int(files_num*val_rate)) # random.sample——随机采样,range为范围,k为采样个数
- train_files = []
- val_files = []
- for index, file_name in enumerate(files_name):
- if index in val_index:
- val_files.append(file_name)
- else:
- train_files.append(file_name)
-
- try:
- train_f = open("train.txt", "x")
- eval_f = open("val.txt", "x")
- train_f.write("\n".join(train_files))
- eval_f.write("\n".join(val_files))
- except FileExistsError as e:
- print(e)
- exit(1)
-
-
- if __name__ == '__main__':
- main()
- 注意——文件路径
- 运行结果
- 运行后生成文件如下1
- 运行后总文件目录如下2
- 生成的my_data_label.names文件内容如下3
- ├── my_yolo_dataset 自定义数据集根目录
- │ ├── train 训练集目录
- │ │ ├── images 训练集图像目录
- │ │ └── labels 训练集标签目录
- │ └── val 验证集目录
- │ ├── images 验证集图像目录
- │ └── labels 验证集标签目录
- ├── data
- │ ├── my_data_label.names
- ├── data
- │ ├── pascal_voc_classes.json # 用以保存识别类别信息
- │ ├── my_data_label.names
- │
- ├── VOCdevkit
- │ ├── VOC2012
- │ │ ├── Annotations # 用以存放所有xml标签文件
- │ │ ├── JPEGImages # 用以存放所有标注好的图片
- │ │ ├── ImageSets
- │ │ │ ├── Main # 用以存放所有txt文件
- │
- ├── my_yolo_dataset #自定义数据集根目录
- │ ├── train #训练集目录
- │ │ ├── images #训练集图像目录
- │ │ └── labels #训练集标签目录
- │ └── val #验证集目录
- │ ├── images #验证集图像目录
- │ └── labels #验证集标签目录
- dec_1_pass
- dec_1_fail
- dec_2_pass
- dec_2_fail
- """
- 本脚本有两个功能:
- 1.将voc数据集标注信息(.xml)转为yolo标注格式(.txt),并将图像文件复制到相应文件夹
- 2.根据json标签文件,生成对应names标签(my_data_label.names),该文件在data-my_data_label.names
- """
- import os
- from tqdm import tqdm
- from lxml import etree
- import json
- import shutil
-
-
- # voc数据集根目录以及版本
- voc_root = r"E:\Code Collection\make my dataset2\VOCdevkit"
- voc_version = "VOC2012"
-
- # 转换的训练集以及验证集对应txt文件
- train_txt = "train.txt"
- val_txt = "val.txt"
-
- # 转换后的文件保存目录
- save_file_root = "./my_yolo_dataset"
-
- # label标签对应json文件
- label_json_path = r'E:\Code Collection\make my dataset2\data\pascal_voc_classes.json'
-
- # 拼接出voc的images目录,xml目录,txt目录
- voc_images_path = os.path.join(voc_root, voc_version, "JPEGImages")
- voc_xml_path = os.path.join(voc_root, voc_version, "Annotations")
- train_txt_path = os.path.join(voc_root, voc_version, "ImageSets", "Main", train_txt)
- val_txt_path = os.path.join(voc_root, voc_version, "ImageSets", "Main", val_txt)
-
- """剩下的不用看,够用就行"""
-
- # 检查文件/文件夹都是否存在
- assert os.path.exists(voc_images_path), "VOC images path not exist..."
- assert os.path.exists(voc_xml_path), "VOC xml path not exist..."
- assert os.path.exists(train_txt_path), "VOC train txt file not exist..."
- assert os.path.exists(val_txt_path), "VOC val txt file not exist..."
- assert os.path.exists(label_json_path), "label_json_path does not exist..."
- if os.path.exists(save_file_root) is False:
- os.makedirs(save_file_root)
-
-
- def parse_xml_to_dict(xml):
- """
- 将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict
- Args:
- xml: xml tree obtained by parsing XML file contents using lxml.etree
- Returns:
- Python dictionary holding XML contents.
- """
-
- if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息
- return {xml.tag: xml.text}
-
- result = {}
- for child in xml:
- child_result = parse_xml_to_dict(child) # 递归遍历标签信息
- if child.tag != 'object':
- result[child.tag] = child_result[child.tag]
- else:
- if child.tag not in result: # 因为object可能有多个,所以需要放入列表里
- result[child.tag] = []
- result[child.tag].append(child_result[child.tag])
- return {xml.tag: result}
-
-
- def translate_info(file_names: list, save_root: str, class_dict: dict, train_val='train'):
- """
- 将对应xml文件信息转为yolo中使用的txt文件信息
- :param file_names:
- :param save_root:
- :param class_dict:
- :param train_val:
- :return:
- """
- save_txt_path = os.path.join(save_root, train_val, "labels")
- if os.path.exists(save_txt_path) is False:
- os.makedirs(save_txt_path)
- save_images_path = os.path.join(save_root, train_val, "images")
- if os.path.exists(save_images_path) is False:
- os.makedirs(save_images_path)
-
- for file in tqdm(file_names, desc="translate {} file...".format(train_val)):
- # 检查下图像文件是否存在
- img_path = os.path.join(voc_images_path, file + ".jpg")
- assert os.path.exists(img_path), "file:{} not exist...".format(img_path)
-
- # 检查xml文件是否存在
- xml_path = os.path.join(voc_xml_path, file + ".xml")
- assert os.path.exists(xml_path), "file:{} not exist...".format(xml_path)
-
- # read xml
- with open(xml_path, encoding='utf-8') as fid:
- xml_str = fid.read()
- xml = etree.fromstring(xml_str)
- data = parse_xml_to_dict(xml)["annotation"]
- img_height = int(data["size"]["height"])
- img_width = int(data["size"]["width"])
-
- # write object info into txt
- assert "object" in data.keys(), "file: '{}' lack of object key.".format(xml_path)
- if len(data["object"]) == 0:
- # 如果xml文件中没有目标就直接忽略该样本
- print("Warning: in '{}' xml, there are no objects.".format(xml_path))
- continue
-
- with open(os.path.join(save_txt_path, file + ".txt"), "w", encoding='utf-8') as f:
- for index, obj in enumerate(data["object"]):
- # 获取每个object的box信息
- xmin = float(obj["bndbox"]["xmin"])
- xmax = float(obj["bndbox"]["xmax"])
- ymin = float(obj["bndbox"]["ymin"])
- ymax = float(obj["bndbox"]["ymax"])
- class_name = obj["name"]
- class_index = class_dict[class_name] - 1 # 目标id从0开始
-
- # 进一步检查数据,有的标注信息中可能有w或h为0的情况,这样的数据会导致计算回归loss为nan
- if xmax <= xmin or ymax <= ymin:
- print("Warning: in '{}' xml, there are some bbox w/h <=0".format(xml_path))
- continue
-
- # 将box信息转换到yolo格式
- xcenter = xmin + (xmax - xmin) / 2
- ycenter = ymin + (ymax - ymin) / 2
- w = xmax - xmin
- h = ymax - ymin
-
- # 绝对坐标转相对坐标,保存6位小数
- xcenter = round(xcenter / img_width, 6)
- ycenter = round(ycenter / img_height, 6)
- w = round(w / img_width, 6)
- h = round(h / img_height, 6)
-
- info = [str(i) for i in [class_index, xcenter, ycenter, w, h]]
-
- if index == 0:
- f.write(" ".join(info))
- else:
- f.write("\n" + " ".join(info))
-
- # copy image into save_images_path
- path_copy_to = os.path.join(save_images_path, img_path.split(os.sep)[-1])
- if os.path.exists(path_copy_to) is False:
- shutil.copyfile(img_path, path_copy_to)
-
-
- def create_class_names(class_dict: dict):
- keys = class_dict.keys()
- with open("./data/my_data_label.names", "w", encoding='utf-8') as w:
- for index, k in enumerate(keys):
- if index + 1 == len(keys):
- w.write(k)
- else:
- w.write(k + "\n")
-
-
- def main():
- # read class_indict
- json_file = open(label_json_path, 'r')
- class_dict = json.load(json_file)
-
- # 读取train.txt中的所有行信息,删除空行
- with open(train_txt_path, "r") as r:
- train_file_names = [i for i in r.read().splitlines() if len(i.strip()) > 0]
- # voc信息转yolo,并将图像文件复制到相应文件夹
- translate_info(train_file_names, save_file_root, class_dict, "train")
-
- # 读取val.txt中的所有行信息,删除空行
- with open(val_txt_path, "r") as r:
- val_file_names = [i for i in r.read().splitlines() if len(i.strip()) > 0]
- # voc信息转yolo,并将图像文件复制到相应文件夹
- translate_info(val_file_names, save_file_root, class_dict, "val")
-
- # 创建my_data_label.names文件
- create_class_names(class_dict)
-
-
- if __name__ == "__main__":
- main()
- # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
- path: E:\Code Collection\yolov5-6.1\my_yolo_dataset
- train: # train images (relative to 'path') 16551 images
- - train/images
- val: # val images (relative to 'path') 4952 images
- - val/images
-
-
- # Classes
- nc: 4 # number of classes
- names: ['dec_1_pass', 'dec_1_fail', "dec_2_pass", "dec_2_fail"] # class names
- ├── data
- │ ├── pascal_voc_classes.json # 用以保存识别类别信息
- │ ├── my_data_label.names # 用以保存识别类别信息
- │ ├── my_yaml.yaml # 用以存放路径信息与类别信息
- │
- ├── VOCdevkit
- │ ├── VOC2012
- │ │ ├── Annotations # 用以存放所有xml标签文件
- │ │ ├── JPEGImages # 用以存放所有标注好的图片
- │ │ ├── ImageSets
- │ │ │ ├── Main # 用以存放所有txt文件
- │
- ├── my_yolo_dataset # 自定义数据集根目录
- │ ├── train # 训练集目录
- │ │ ├── images # 训练集图像目录
- │ │ └── labels # 训练集标签目录
- │ └── val # 验证集目录
- │ ├── images # 验证集图像目录
- │ └── labels # 验证集标签目录
- # -*- coding:UTF-8 -*-
- """
- @Time :2023/5/17 10:32
- @Author :zjy
- @File :zjy1.py
- @Aim :
- """
- import xml.dom.minidom
- import glob
- from PIL import Image
- from math import ceil
- import shutil
- import os
-
- yolo_file = r'E:\资料备份\小论文相关文件\数据集\YOLO格式数据集——18000+200\18000+200数据集\train\labels' # yolo格式下的存放txt标注文件的文件夹
- turn_xml_file = r'E:\资料备份\小论文相关文件\数据集\VOC格式数据集——18000+200\train\xml' # 转换后储存xml的文件夹地址
- img_file = r'E:\资料备份\小论文相关文件\数据集\YOLO格式数据集——18000+200\18000+200数据集\train\images' # 存放图片的文件夹
-
- labels = ["aeroplane",
- "bicycle",
- "bird",
- "boat",
- "bottle",
- "bus",
- "car",
- "cat",
- "chair",
- "cow",
- "diningtable",
- "dog",
- "horse",
- "motorbike",
- "person",
- "pottedplant",
- "sheep",
- "sofa",
- "train",
- "tvmonitor"]
- src_img_dir = img_file
- src_txt_dir = yolo_file
- src_xml_dir = turn_xml_file # 转换后储存xml的文件夹地址
-
- img_Lists = glob.glob(src_img_dir + '/*.jpg')
- img_basenames = []
- for item in img_Lists:
- img_basenames.append(os.path.basename(item)) # os.path.basename返回path最后的文件名
-
- img_names = []
- for item in img_basenames:
- temp1, temp2 = os.path.splitext(item) # os.path.splitext(“文件路径”) 分离文件名与扩展名
- img_names.append(temp1)
-
- total_num = len(img_names) # 统计当前总共要转换的图片标注数量
- count = 0 # 技术变量
- for img in img_names: # 这里的img是不加后缀的图片名称,如:'GF3_SAY_FSI_002732_E122.3_N29.9_20170215_L1A_HH_L10002188179__1__4320___10368'
- count += 1
- if count % 1000 == 0:
- print("当前转换进度{}/{}".format(count, total_num))
- im = Image.open((src_img_dir + '/' + img + '.jpg'))
- width, height = im.size
-
- # 打开yolo格式下的txt文件
- gt = open(src_txt_dir + '/' + img + '.txt').read().splitlines()
- if gt:
- # 将主干部分写入xml文件中
- xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
- xml_file.write('<annotation>\n')
- xml_file.write(' <folder>VOC2007</folder>\n')
- xml_file.write(' <filename>' + str(img) + '.jpg' + '</filename>\n')
- xml_file.write(' <size>\n')
- xml_file.write(' <width>' + str(width) + '</width>\n')
- xml_file.write(' <height>' + str(height) + '</height>\n')
- xml_file.write(' <depth>3</depth>\n')
- xml_file.write(' </size>\n')
-
- # write the region of image on xml file
- for img_each_label in gt:
- spt = img_each_label.split(' ') # 这里如果txt里面是以逗号‘,’隔开的,那么就改为spt = img_each_label.split(',')。
- xml_file.write(' <object>\n')
- xml_file.write(' <name>' + str(labels[int(spt[0])]) + '</name>\n')
- xml_file.write(' <pose>Unspecified</pose>\n')
- xml_file.write(' <truncated>0</truncated>\n')
- xml_file.write(' <difficult>0</difficult>\n')
- xml_file.write(' <bndbox>\n')
-
- center_x = round(float(spt[1].strip()) * width)
- center_y = round(float(spt[2].strip()) * height)
- bbox_width = round(float(spt[3].strip()) * width)
- bbox_height = round(float(spt[4].strip()) * height)
- xmin = str(int(center_x - bbox_width / 2))
- ymin = str(int(center_y - bbox_height / 2))
- xmax = str(int(center_x + bbox_width / 2))
- ymax = str(int(center_y + bbox_height / 2))
-
- xml_file.write(' <xmin>' + xmin + '</xmin>\n')
- xml_file.write(' <ymin>' + ymin + '</ymin>\n')
- xml_file.write(' <xmax>' + xmax + '</xmax>\n')
- xml_file.write(' <ymax>' + ymax + '</ymax>\n')
- xml_file.write(' </bndbox>\n')
- xml_file.write(' </object>\n')
-
- xml_file.write('</annotation>')
- else:
- # 将主干部分写入xml文件中
- xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
- xml_file.write('<annotation>\n')
- xml_file.write(' <folder>VOC2007</folder>\n')
- xml_file.write(' <filename>' + str(img) + '.jpg' + '</filename>\n')
- xml_file.write(' <size>\n')
- xml_file.write(' <width>' + str(width) + '</width>\n')
- xml_file.write(' <height>' + str(height) + '</height>\n')
- xml_file.write(' <depth>3</depth>\n')
- xml_file.write(' </size>\n')
- xml_file.write('</annotation>')
- # 将转换后的xml文件按train 和test 归类到train.txt和test.txt中
- import glob
-
- path = r'E:\资料备份\小论文相关文件\数据集\VOC格式数据集——18000+200' # 存放生成的txt文件的路径
- xml_Lists = glob.glob(src_xml_dir + '/*.xml')
-
- xml_basenames = []
- for item in xml_Lists:
- xml_basenames.append(os.path.basename(item))
-
- xml_names = [] # 这里是将xml文件去掉.xml后缀储存到的列表中
- for item in xml_basenames:
- temp1, temp2 = os.path.splitext(item) # os.path.splitext(“文件路径”) 分离文件名与扩展名
- xml_names.append(temp1)
-
- txt_file = open((path + '/train.txt'), 'w') # 注意生成train/val的txt时,要改变这里的名字
- for item in xml_names:
- txt_file.write(str(item) + '\n')
- #复制train和val下的文件到一个文件夹中
- base_src = r'/home/dwt/DataSets/MSAR/MSAR-1.0/val/images'#要复制的文件所在的文件夹
- base_dst = r'/home/dwt/MyCode/pycharm_projects/YOLOX_offical/datasets/VOCdevkit/VOC2011/JPEGImages'#将文件复制到的目标文件夹
- list_src = glob.glob(base_src + '/*.jpg')#复制的所有文件
-
- list_src_basenames = []
- for item in list_src:
- list_src_basenames.append(os.path.basename(item))
-
- total_num = len(list_src_basenames)
- c = 0
- for item in list_src_basenames:
- c += 1
- if c % 1000 == 0:
- print('当前已复制{}/{}'.format(c,total_num))
- shutil.copyfile(os.path.join(base_src,item),os.path.join(base_dst,item))
- # -*- coding:UTF-8 -*-
- """
- @Time :2023/5/17 10:32
- @Author :zjy
- @File :zjy1.py
- @Aim :
- """
- import xml.dom.minidom
- import glob
- from PIL import Image
- from math import ceil
- import shutil
- import os
- import glob
-
- yolo_file = r'E:\资料备份\小论文相关文件\数据集\YOLO格式数据集——18000+200\18000+200数据集\test\labels' # yolo格式下的存放txt标注文件的文件夹
- turn_xml_file = r'E:\资料备份\小论文相关文件\数据集\VOC格式数据集——18000+200\新建文件夹' # 转换后储存xml的文件夹地址
- img_file = r'E:\资料备份\小论文相关文件\数据集\YOLO格式数据集——18000+200\18000+200数据集\test\images' # 存放图片的文件夹
- path = r'E:\资料备份\小论文相关文件\数据集\VOC格式数据集——18000+200\新建文件夹' # 存放生成的txt文件的路径
- txt_file = open((path + '/test.txt'), 'w') # 注意生成train/val的txt时,要改变这里的名字
- base_src = r'E:\资料备份\小论文相关文件\数据集\YOLO格式数据集——18000+200\18000+200数据集\test\images' # 要复制的照片文件所在的文件夹
- base_dst = r'E:\资料备份\小论文相关文件\数据集\VOC格式数据集——18000+200\新建文件夹' # 将照片文件复制到的目标文件夹
- list_src = glob.glob(base_src + '/*.jpg') # 复制所有照片文件
-
- labels = ["aeroplane", # 标签名
- "bicycle",
- "bird",
- "boat",
- "bottle",
- "bus",
- "car",
- "cat",
- "chair",
- "cow",
- "diningtable",
- "dog",
- "horse",
- "motorbike",
- "person",
- "pottedplant",
- "sheep",
- "sofa",
- "train",
- "tvmonitor"]
- src_img_dir = img_file
- src_txt_dir = yolo_file
- src_xml_dir = turn_xml_file # 转换后储存xml的文件夹地址
-
- img_Lists = glob.glob(src_img_dir + '/*.jpg')
- img_basenames = []
- for item in img_Lists:
- img_basenames.append(os.path.basename(item)) # os.path.basename返回path最后的文件名
-
- img_names = []
- for item in img_basenames:
- temp1, temp2 = os.path.splitext(item) # os.path.splitext(“文件路径”) 分离文件名与扩展名
- img_names.append(temp1)
-
- total_num = len(img_names) # 统计当前总共要转换的图片标注数量
- count = 0 # 技术变量
- for img in img_names: # 这里的img是不加后缀的图片名称,如:'GF3_SAY_FSI_002732_E122.3_N29.9_20170215_L1A_HH_L10002188179__1__4320___10368'
- count += 1
- if count % 1000 == 0:
- print("当前转换进度{}/{}".format(count, total_num))
- im = Image.open((src_img_dir + '/' + img + '.jpg'))
- width, height = im.size
-
- # 打开yolo格式下的txt文件
- gt = open(src_txt_dir + '/' + img + '.txt').read().splitlines()
- if gt:
- # 将主干部分写入xml文件中
- xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
- xml_file.write('<annotation>\n')
- xml_file.write(' <folder>VOC2007</folder>\n')
- xml_file.write(' <filename>' + str(img) + '.jpg' + '</filename>\n')
- xml_file.write(' <size>\n')
- xml_file.write(' <width>' + str(width) + '</width>\n')
- xml_file.write(' <height>' + str(height) + '</height>\n')
- xml_file.write(' <depth>3</depth>\n')
- xml_file.write(' </size>\n')
-
- # write the region of image on xml file
- for img_each_label in gt:
- spt = img_each_label.split(' ') # 这里如果txt里面是以逗号‘,’隔开的,那么就改为spt = img_each_label.split(',')。
- xml_file.write(' <object>\n')
- xml_file.write(' <name>' + str(labels[int(spt[0])]) + '</name>\n')
- xml_file.write(' <pose>Unspecified</pose>\n')
- xml_file.write(' <truncated>0</truncated>\n')
- xml_file.write(' <difficult>0</difficult>\n')
- xml_file.write(' <bndbox>\n')
-
- center_x = round(float(spt[1].strip()) * width)
- center_y = round(float(spt[2].strip()) * height)
- bbox_width = round(float(spt[3].strip()) * width)
- bbox_height = round(float(spt[4].strip()) * height)
- xmin = str(int(center_x - bbox_width / 2))
- ymin = str(int(center_y - bbox_height / 2))
- xmax = str(int(center_x + bbox_width / 2))
- ymax = str(int(center_y + bbox_height / 2))
-
- xml_file.write(' <xmin>' + xmin + '</xmin>\n')
- xml_file.write(' <ymin>' + ymin + '</ymin>\n')
- xml_file.write(' <xmax>' + xmax + '</xmax>\n')
- xml_file.write(' <ymax>' + ymax + '</ymax>\n')
- xml_file.write(' </bndbox>\n')
- xml_file.write(' </object>\n')
-
- xml_file.write('</annotation>')
- else:
- # 将主干部分写入xml文件中
- xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')
- xml_file.write('<annotation>\n')
- xml_file.write(' <folder>VOC2007</folder>\n')
- xml_file.write(' <filename>' + str(img) + '.jpg' + '</filename>\n')
- xml_file.write(' <size>\n')
- xml_file.write(' <width>' + str(width) + '</width>\n')
- xml_file.write(' <height>' + str(height) + '</height>\n')
- xml_file.write(' <depth>3</depth>\n')
- xml_file.write(' </size>\n')
- xml_file.write('</annotation>')
- # 将转换后的xml文件按train 和test 归类到train.txt和test.txt中
-
-
-
- xml_Lists = glob.glob(src_xml_dir + '/*.xml')
-
- xml_basenames = []
- for item in xml_Lists:
- xml_basenames.append(os.path.basename(item))
-
- xml_names = [] # 这里是将xml文件去掉.xml后缀储存到的列表中
- for item in xml_basenames:
- temp1, temp2 = os.path.splitext(item) # os.path.splitext(“文件路径”) 分离文件名与扩展名
- xml_names.append(temp1)
-
-
- for item in xml_names:
- txt_file.write(str(item) + '\n')
-
- # 复制train和val下的文件到一个文件夹中
-
-
- list_src_basenames = []
- for item in list_src:
- list_src_basenames.append(os.path.basename(item))
-
- total_num = len(list_src_basenames)
- c = 0
- for item in list_src_basenames:
- c += 1
- if c % 1000 == 0:
- print('当前已复制{}/{}'.format(c, total_num))
- shutil.copyfile(os.path.join(base_src, item), os.path.join(base_dst, item))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。