赞
踩
最近涉及较多根据数据生成xml格式的内容,所以想建立一套简单的规则用于关联xml格式和python数据格式之间的关系,进而提高创建和解析xml代码的通用性。
xml结构与python数据之间的关系如下所示:
用字典表示节点,元祖表示节点属性,字符串表示节点文本。
字典的键为节点的标签,字典的值为列表形式,满足多节点的需求。字典的值的列表元素还是列表,表示节点下有多种类型的信息(属性、文本、子节点)。
from xml.etree import ElementTree as ET
from xml.dom import minidom
def createXmlData(pyData) -> ET.Element:
'''
根据python的数据创建xml格式数据
python数据的格式:
{tag:[[(attrid),string,dict],]}
'''
#创建节点
def creatNode(info_dict,parent):
for key in info_dict:
if type(info_dict[key]) in [list]:
for ei in info_dict[key]:
head = ET.SubElement(parent, key)
if type(ei) in [list]:
for fi in ei:
createElement(fi,head)
else:
pass
#创建元素(属性,文本,节点)
def createElement(fi,head):
thetype = type(fi)
if thetype in [dict]:
creatNode(fi,head)
elif thetype in [tuple]:
if len(fi)==1: #属性名和值相同的情况
head.set(str(fi[0]),str(fi[0]))
elif len(fi)==2:
head.set(str(fi[0]),str(fi[1]))
elif thetype in [str]:
head.text = fi
#用etree创建xml
root = ET.Element('root')
creatNode(pyData,root)
return root
用递归的方式实现多级子节点的转换
# 生成Xml类型文件
def writeXmlFile(fpath,XmlData,method=1) -> bool:
root = XmlData
xmlfile = fpath
try:
if method ==1: #用etree创建xml,再用dom美化 ==>没有值的标签只有一个,和直接用dom的有差别。推荐。
mystr = ET.tostring(root)
xml_w = minidom.parseString(mystr)
with open(xmlfile,'w',encoding='utf-8') as f:
xml_w.writexml(f,addindent=' ',newl='\n',encoding='utf-8')
elif method==2: #直接用etree创建==>不会换行
tree = ET.ElementTree(root)
tree.write(xmlfile,encoding='utf-8',method='xml',xml_declaration=True)
return True
except:
return False
# 读取xml类型文件
def readXmlFile(fpath) -> ET.Element:
xmlfile = fpath
try:
tree = ET.parse(xmlfile)
# tree = ET.ElementTree(file=xmlfile)
root = tree.getroot()
except:
try:
with open(xmlfile,'r',encoding='utf-8') as f:
text = f.read()
except:
with open(xmlfile,'r') as f:
text = f.read()
root = ET.fromstring(text)
return root
# 解析xml格式的数据
def parseXmlData(XmlData) -> dict:
def getElement(root):
info_dict = {}
for ci in root:
node = []
tag = ci.tag
attrib = ci.attrib
text = ci.text.strip()
subNode = list(ci)
if tag not in info_dict:
info_dict[tag] = []
if attrib:
for ai in attrib:
node.append((ai,attrib[ai]))
if text:
node.append(text)
if subNode:
node.append(getElement(subNode))
info_dict[tag].append(node)
return info_dict
pyDtat = getElement(XmlData)
return pyDtat
通过递归的方法,逐级解析各级子节点
if __name__ == '__main__':
#创建xml文件的示例
pyData = {'a':[['name',('ID1','1'),('ID2','1')],
[('ID1','2'),('ID2','2'),{'c':[['onlyone']]}]],
'b':[[{'a':[['a']]}]],
'c':[['onlyone']]}
print('原始数据:',pyData)
root = createXmlData(pyData)
xml_file = r'D:\test.xml'
writeXmlFile(xml_file,root)
mystr = ET.tostring(root)
print('xml格式:',mystr)
root = readXmlFile(xml_file)
data = parseXmlData(root)
print('解析结果:',data)
输出结果:
原始数据: {'a': [['name', ('ID1', '1'), ('ID2', '1')], [('ID1', '2'), ('ID2', '2'), {'c': [['onlyone']]}]], 'b': [[{'a': [['a']]}]], 'c': [['onlyone']]}
xml格式: b'<root><a ID1="1" ID2="1">name</a><a ID1="2" ID2="2"><c>onlyone</c></a><b><a>a</a></b><c>onlyone</c></root>'
解析结果: {'a': [[('ID1', '1'), ('ID2', '1'), 'name'], [('ID1', '2'), ('ID2', '2'), {'c': [['onlyone']]}]], 'b': [[{'a': [['a']]}]], 'c': [['onlyone']]}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。