赞
踩
VOC数据格式,会直接把每张图片标注的标签信息保存到一个xml文件中。
xml中的信息如下:
- <annotation>
- <folder>矿区图像</folder>
- <filename>0066.jpg</filename>
- <path>/home/zhy/Documents/智能驾驶项目/标注/矿区图像/0066.jpg</path>
- <source>
- <database>Unknown</database>
- </source>
- <size>
- <width>1280</width>
- <height>720</height>
- <depth>3</depth>
- </size>
- <segmented>0</segmented>
- <object>
- <name>car</name>
- <pose>Unspecified</pose>
- <truncated>1</truncated>
- <difficult>0</difficult>
- <bndbox>
- <xmin>812</xmin>
- <ymin>223</ymin>
- <xmax>1280</xmax>
- <ymax>557</ymax>
- </bndbox>
- </object>
- </annotation>

xml文件中的关键信息说明:
0066.jpg: 是图片名称,则xml文件名为0066.xml;
/home/zhy/Documents/智能驾驶项目/标注/矿区图像/0066.jpg: 是存放该图片的绝对路径;
1280*720: 是图片分辨率,3代表三通道图片;
car: 是类别名;
xmin,ymin,xmax,ymax,定义了每个目标的标定框坐标:即左上角的坐标和右下角的坐标;
YOLO标签格式,会直接把每张图片标注的标签信息保存到一个txt文件中。
图片名称为1.jpg,则对应的txt文件名称为1.txt。
txt中的信息如下:
3 0.286328 0.475694 0.132031 0.123611
txt文件中的关键信息说明:
每一行代表标注的一个目标,张图中只标注了一个目标,所以只有一行;
第一个数字0代表标注目标的类别;
后面四个数字代表标注框的中心坐标和标注框的相对宽和高(进行了归一化处理);
五个数据从左到右依次为:(class_id, x_center, y_center, width, height)
同时还会生成一个classes.txt,里面内容如下:\
car
voctoyolo.py的目的就是把voc数据格式转换为yolo格式:
voc格式标签:图片的实际宽高,标注框的左上角和右下角坐标;
yolo格式标签:标注框的中心坐标(归一化),标注框的宽和高(归一化)。
voc格式转换为yolo格式计算公式:
框中心的实际坐标(x,y),一般可能还会在后面减1
x_center=(xmax+xmin)/2
y_center=(ymax+ymin)/2
归一化以后的中心坐标(x,y)
x=x_center/width
y=y_center/height
框的高和宽(归一化后)
w=(xmax-xmin)/width
h=(ymax-ymin)/height
voc格式的xml标签文件转化yolo格式的txt标签文件代码:voctoyolo.py
- # -*- coding:utf8 -*-
- import os
- import xml.etree.ElementTree as ET
- import io
-
- find_path = '/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/label/3/' # xml所在的文件
- savepath = '/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/label/4/' # 保存文件
-
- classes = ['car','Truck','person','bicycle','bus']
-
-
- class Voc_Yolo(object):
- def __init__(self, find_path):
- self.find_path = find_path
-
- def Make_txt(self, outfile):
- out = open(outfile, 'w')
- print("创建成功:{}".format(outfile))
- return out
-
- def Work(self, count):
- # 找到文件路径
- for root, dirs, files in os.walk(self.find_path):
- # 找到文件目录中每一个xml文件
- for file in files:
- # 记录处理过的文件
- count += 1
- # 输入、输出文件定义
- input_file = find_path + file
- outfile = savepath + file[:-4] + '.txt'
- # 新建txt文件,确保文件正常保存
- out = self.Make_txt(outfile)
- # 分析xml树,取出w_image、h_image
- tree = ET.parse(input_file)
- root = tree.getroot()
- size = root.find('size')
- w_image = float(size.find('width').text)
- h_image = float(size.find('height').text)
- # 继续提取有效信息来计算txt中的四个数据
- for obj in root.iter('object'):
- # 将类型提取出来,不同目标类型不同,本文仅有一个类别->0
- classname = obj.find('name').text
- # 如果类别不是对应在我们预定好的class文件中,或difficult==1则跳过
- if classname not in classes == 1:
- continue
- # 通过类别名称找到id
- cls_id = classes.index(classname)
- xmlbox = obj.find('bndbox')
- x_min = float(xmlbox.find('xmin').text)
- x_max = float(xmlbox.find('xmax').text)
- y_min = float(xmlbox.find('ymin').text)
- y_max = float(xmlbox.find('ymax').text)
- # 计算公式
- x_center = ((x_min + x_max) / 2 - 1) / w_image
- y_center = ((y_min + y_max) / 2 - 1) / h_image
- w = (x_max - x_min) / w_image
- h = (y_max - y_min) / h_image
- # 文件写入
- out.write(
- str(cls_id) + " " + str(x_center) + " " + str(y_center) + " " + str(w) + " " + str(h) + '\n')
- out.close()
- return count
-
-
- if __name__ == "__main__":
- data = Voc_Yolo(find_path)
- number = data.Work(0)
- print(number)

voc格式中保存的信息为:xmin,ymin,xmax,ymax,所以只要根据上面的公式,就可以推导出这四个值。
yolo格式的txt标签文件转化voc格式的xml标签文件代码:yolotovoc.py
- # -*- coding:utf8 -*-
- from xml.dom.minidom import Document
- import os
- import cv2
-
-
- def makexml(picPath, txtPath, xmlPath):
- dic = {'0': "car",
- '1': "lightTruck",
- '2': "person",
- '3': "tipperTruck",
- '4': "construction",
- '5': "tricycle",
- '6': "train",
- '7': "bicycle",
- }
- files = os.listdir(txtPath)
- for i, name in enumerate(files):
- print(name)
- xmlBuilder = Document()
- annotation = xmlBuilder.createElement("annotation")
- xmlBuilder.appendChild(annotation)
- txtFile = open(txtPath + name)
- txtList = txtFile.readlines()
- img = cv2.imread(picPath + name[0:-4] + ".jpg")
- Pheight, Pwidth, Pdepth = img.shape
-
- folder = xmlBuilder.createElement("folder")
- foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
- folder.appendChild(foldercontent)
- annotation.appendChild(folder)
-
- filename = xmlBuilder.createElement("filename")
- filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
- filename.appendChild(filenamecontent)
- annotation.appendChild(filename)
-
- size = xmlBuilder.createElement("size")
- width = xmlBuilder.createElement("width")
- widthcontent = xmlBuilder.createTextNode(str(Pwidth))
- width.appendChild(widthcontent)
- size.appendChild(width)
-
- height = xmlBuilder.createElement("height")
- heightcontent = xmlBuilder.createTextNode(str(Pheight))
- height.appendChild(heightcontent)
- size.appendChild(height)
-
- depth = xmlBuilder.createElement("depth")
- depthcontent = xmlBuilder.createTextNode(str(Pdepth))
- depth.appendChild(depthcontent)
- size.appendChild(depth)
-
- annotation.appendChild(size)
-
- for j in txtList:
- oneline = j.strip().split(" ")
- object = xmlBuilder.createElement("object")
- picname = xmlBuilder.createElement("name")
- namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
- picname.appendChild(namecontent)
- object.appendChild(picname)
-
- pose = xmlBuilder.createElement("pose") # pose标签
- posecontent = xmlBuilder.createTextNode("Unspecified")
- pose.appendChild(posecontent)
- object.appendChild(pose) # pose标签结束
-
- truncated = xmlBuilder.createElement("truncated") # truncated标签
- truncatedContent = xmlBuilder.createTextNode("0")
- truncated.appendChild(truncatedContent)
- object.appendChild(truncated) # truncated标签结束
-
- difficult = xmlBuilder.createElement("difficult") # difficult标签
- difficultcontent = xmlBuilder.createTextNode("0")
- difficult.appendChild(difficultcontent)
- object.appendChild(difficult) # difficult标签结束
-
- bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
- xmin = xmlBuilder.createElement("xmin") # xmin标签
- mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
- xminContent = xmlBuilder.createTextNode(str(mathData))
- xmin.appendChild(xminContent)
- bndbox.appendChild(xmin) # xmin标签结束
-
- ymin = xmlBuilder.createElement("ymin") # ymin标签
- mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
- yminContent = xmlBuilder.createTextNode(str(mathData))
- ymin.appendChild(yminContent)
- bndbox.appendChild(ymin) # ymin标签结束
-
- xmax = xmlBuilder.createElement("xmax") # xmax标签
- mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
- xmaxContent = xmlBuilder.createTextNode(str(mathData))
- xmax.appendChild(xmaxContent)
- bndbox.appendChild(xmax) # xmax标签结束
-
- ymax = xmlBuilder.createElement("ymax") # ymax标签
- mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
- ymaxContent = xmlBuilder.createTextNode(str(mathData))
- ymax.appendChild(ymaxContent)
- bndbox.appendChild(ymax) # ymax标签结束
-
- object.appendChild(bndbox) # bndbox标签结束
-
- annotation.appendChild(object) # object标签结束
-
- f = open(xmlPath + name[0:-4] + ".xml", 'w')
- xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
- f.close()
-
-
- if __name__ == "__main__":
- picPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/image/" # 图片所在文件夹路径,后面的/一定要带上
- txtPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/labels/" # txt所在文件夹路径,后面的/一定要带上
- xmlPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/xml/" # xml文件保存路径,后面的/一定要带上
- makexml(picPath, txtPath, xmlPath)

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。