当前位置:   article > 正文

深度学习训练数据集标注格式转换:xml转txt;txt转xml

xml转txt

1、xml转txt

怎么运行和得到结果(小白式教学):

在桌面新建一个xml2txt.py文件,输入以下代码,把classes和输入和输出路径改了,右键->运行python->在终端中运行python文件(我用的是VScode)即可。

(如果报错:['VOCdevkit\\VOC2007/JPEGImages\\0.bmp', 'company', '640', '640', 'black stain', '258', '150', '287', '185', 'broken', '175', '243', '206', '282', 'broken', '319', '234', '341', '275', 'broken', '228', '236', '251', '284', 'broken', '362', '232', '384', '272', 'black stain', '287', '348', '308', '364']
Traceback (most recent call last):
  File "c:/Users/DELL/Desktop/xml2txt/xml2txt.py", line 52, in <module>
    dw=1/int(width)
ValueError: invalid literal for int() with base 10: 'company'

解决方法:搜索所有文件,将以下代码替换为空即可

  1. <owner>
  2. <flickrid>NULL</flickrid>
  3. <name>company</name>
  4. </owner>

  1. # 以东北大学钢铁数据集标签为例
  2. import xml.etree.ElementTree as ET#xml 是python自带的package
  3. import os
  4. classes=['crazing','inclusion','patches','pitted_surface','rolled-in_scale','scratches']#写自己的分类名
  5. pre_dir=r'C:\Users\Bin\Desktop\Deeplearning\NEU surface defect database\labels'#xml文件所在文件夹
  6. target_dir=r'C:\Users\Bin\Desktop\Deeplearning\NEU surface defect database\labeltxt'#想要存储txt文件的文件夹
  7. path=os.listdir(pre_dir)
  8. for path1 in path:
  9. # path1=r'C:\Users\loadlicb\Desktop\chrome_RTJOXXsYHM.xml'#xml文件路径
  10. tree=ET.parse(os.path.join(pre_dir,path1))
  11. root=tree.getroot()#这两个步骤将xml文件拆出来了
  12. oo=[]
  13. for child in root:
  14. if child.tag == 'filename':#tag对应的是《》中的内容,text对应的是两个《》中间的部分内容
  15. oo.append(child.text)#获得xml文件的名
  16. # print(child.text)
  17. for i in child:
  18. if i.tag == 'width':#获得图片的w
  19. oo.append(i.text)
  20. # print(i.text)
  21. if i.tag == 'height':#获得图片的h
  22. oo.append(i.text)
  23. # print(i.text)
  24. if i.tag == 'name':#获得当前框的class
  25. oo.append(i.text)
  26. # print(i.text)
  27. for j in i:
  28. if j.tag == 'xmin':#获得当前框的两个对角线上的点的两组坐标
  29. oo.append(j.text)
  30. # print(j.text)
  31. if j.tag == 'ymin':
  32. oo.append(j.text)
  33. # print(j.text)
  34. if j.tag == 'xmax':
  35. oo.append(j.text)
  36. # print(j.text)
  37. if j.tag == 'ymax':
  38. oo.append(j.text)
  39. # print(j.text)
  40. print(oo)
  41. filename=oo[0]#读取图片的名和宽高
  42. filename=os.path.split(filename)
  43. # print(filename)
  44. name,extension=os.path.splitext(filename[1])#获取xml名和后缀
  45. width=oo[1]
  46. dw=1/int(width)
  47. height=oo[2]
  48. dh=1/int(height)
  49. oo.pop(0)
  50. oo.pop(0)
  51. oo.pop(0)#删除三次oolist的0号元素
  52. back=[]
  53. # print((len(oo))%5)
  54. for i in range(len(oo)//5):
  55. for p in range(len(classes)):#划定class的序号
  56. if classes[p] == oo[5*i]:#str == str
  57. cl=p
  58. back.append(cl)
  59. x=(int(oo[5*i+1])+int(oo[5*i+3]))/2#oo里的所有元素都是str,数字也是
  60. y = (int(oo[5 * i + 2]) + int(oo[5 * i + 4])) / 2#计算标注框的中心点的xy坐标
  61. w=int(oo[5*i+3])-int(oo[5*i+1])
  62. h=int(oo[5*i+4])-int(oo[5*i+2])#计算标注框的宽高
  63. back.append('{:.4f}'.format(x*dw))
  64. back.append('{:.4f}'.format(y * dh))
  65. back.append('{:.4f}'.format(w * dw))
  66. back.append('{:.4f}'.format(h * dh))
  67. # back.append(y*dh)
  68. # back.append(w*dw)
  69. # back.append(h*dh)#转换到0-1区间
  70. print(back)
  71. # dir=r'C:\Users\loadlicb\Desktop'#label文件夹名
  72. file=open(os.path.join(target_dir,name+'.txt'),'w')
  73. for i in range(len(back)):
  74. l=' '
  75. if (i+1)%5==0:
  76. l='\n'
  77. file.writelines(str(back[i])+l)#完成了,现在进行批量操作修改
  78. #完成

2、txt转xml

怎么运行和得到结果(小白式教学):

在桌面建立一个文件夹名字为txt2xml, 里面又三个文件夹和一个.py文件,它们分别是:picture、txt、xml、txt2xml.py

txt2xml.py文件中输入以下代码:

  1. # 将txt格式转换成xml格式数据集
  2. from xml.dom.minidom import Document
  3. import os
  4. import cv2
  5. def makexml(picPath, txtPath, xmlPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
  6. """此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件
  7. 在自己的标注图片文件夹下建三个子文件夹,分别命名为picture、txt、xml
  8. """
  9. #创建字典用来对类型进行转换,要与classes.txt文件中的类对应,且顺序要一致
  10. dic = {'0': "missing pin", '1': "broken", '2': "scratch", '3': "cataclasm",
  11. '4': "cross fracture", '5': "missing lead", '6': "black stain", '7': "foreign matter"}
  12. files = os.listdir(txtPath)
  13. for i, name in enumerate(files):
  14. xmlBuilder = Document()
  15. annotation = xmlBuilder.createElement("annotation") # 创建annotation标签
  16. xmlBuilder.appendChild(annotation)
  17. txtFile = open(txtPath + name)
  18. txtList = txtFile.readlines()
  19. img = cv2.imread(picPath + name[0:-4] + ".bmp") # 注意这里的图片后缀,.jpg/.png
  20. Pheight, Pwidth, Pdepth = img.shape
  21. folder = xmlBuilder.createElement("folder") # folder标签
  22. foldercontent = xmlBuilder.createTextNode("datasetRGB")
  23. folder.appendChild(foldercontent)
  24. annotation.appendChild(folder)
  25. filename = xmlBuilder.createElement("filename") # filename标签
  26. filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".bmp")
  27. filename.appendChild(filenamecontent)
  28. annotation.appendChild(filename)
  29. size = xmlBuilder.createElement("size") # size标签
  30. width = xmlBuilder.createElement("width") # size子标签width
  31. widthcontent = xmlBuilder.createTextNode(str(Pwidth))
  32. width.appendChild(widthcontent)
  33. size.appendChild(width)
  34. height = xmlBuilder.createElement("height") # size子标签height
  35. heightcontent = xmlBuilder.createTextNode(str(Pheight))
  36. height.appendChild(heightcontent)
  37. size.appendChild(height)
  38. depth = xmlBuilder.createElement("depth") # size子标签depth
  39. depthcontent = xmlBuilder.createTextNode(str(Pdepth))
  40. depth.appendChild(depthcontent)
  41. size.appendChild(depth)
  42. annotation.appendChild(size)
  43. for j in txtList:
  44. oneline = j.strip().split(" ")
  45. object = xmlBuilder.createElement("object") # object 标签
  46. picname = xmlBuilder.createElement("name") # name标签
  47. namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
  48. picname.appendChild(namecontent)
  49. object.appendChild(picname)
  50. pose = xmlBuilder.createElement("pose") # pose标签
  51. posecontent = xmlBuilder.createTextNode("Unspecified")
  52. pose.appendChild(posecontent)
  53. object.appendChild(pose)
  54. truncated = xmlBuilder.createElement("truncated") # truncated标签
  55. truncatedContent = xmlBuilder.createTextNode("0")
  56. truncated.appendChild(truncatedContent)
  57. object.appendChild(truncated)
  58. difficult = xmlBuilder.createElement("difficult") # difficult标签
  59. difficultcontent = xmlBuilder.createTextNode("0")
  60. difficult.appendChild(difficultcontent)
  61. object.appendChild(difficult)
  62. bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
  63. xmin = xmlBuilder.createElement("xmin") # xmin标签
  64. mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
  65. xminContent = xmlBuilder.createTextNode(str(mathData))
  66. xmin.appendChild(xminContent)
  67. bndbox.appendChild(xmin)
  68. ymin = xmlBuilder.createElement("ymin") # ymin标签
  69. mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
  70. yminContent = xmlBuilder.createTextNode(str(mathData))
  71. ymin.appendChild(yminContent)
  72. bndbox.appendChild(ymin)
  73. xmax = xmlBuilder.createElement("xmax") # xmax标签
  74. mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
  75. xmaxContent = xmlBuilder.createTextNode(str(mathData))
  76. xmax.appendChild(xmaxContent)
  77. bndbox.appendChild(xmax)
  78. ymax = xmlBuilder.createElement("ymax") # ymax标签
  79. mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
  80. ymaxContent = xmlBuilder.createTextNode(str(mathData))
  81. ymax.appendChild(ymaxContent)
  82. bndbox.appendChild(ymax)
  83. object.appendChild(bndbox) # bndbox标签结束
  84. annotation.appendChild(object)
  85. f = open(xmlPath + name[0:-4] + ".xml", 'w')
  86. xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
  87. f.close()
  88. if __name__ == "__main__":
  89. picPath = "C:\\Users\\Bin\\Desktop\\txt2xml\\picture\\" # 图片所在文件夹路径,后面的\\一定要带上
  90. txtPath = "C:\\Users\\Bin\\Desktop\\txt2xml\\txt\\" # txt所在文件夹路径,后面的\\一定要带上
  91. xmlPath = "C:\\Users\\Bin\\Desktop\\txt2xml\\xml\\" # xml文件保存路径,后面的\\一定要带上
  92. makexml(picPath, txtPath, xmlPath)

 .py文件做好了,文件夹也做好之后,重新打开VScode->打开文件夹->选择txt2xml文件夹->点击txt2xml.py->右键运行python->在终端中运行python文件(我用的是VScode)即可。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/空白诗007/article/detail/807194
推荐阅读
相关标签
  

闽ICP备14008679号