当前位置:   article > 正文

常用文件类型 -- XML文件介绍_典型的xml文件类型

典型的xml文件类型

XML文件介绍

1.1 介绍

XML是一种数据存储、交换、表达的标准:

  • 存储:优势在于半结构化,可以自定义schema,相比关系型二维表,不用遵循第一范式(可以有嵌套关系);
  • 交换:可以通过schema实现异构数据集成;
  • 表达:本身就可以作为阅读文档,当然还可以使用XSLT之类的进行解析和再显示。

缺点是schema验证复杂,相比后来的json等格式,相对冗余,性能也不够优秀。但是作为半结构化文档的代表性标准,早期代表的是一种数据思想。

鉴于以上缺点,本人不对xml做过多研究,直接上封装的工具类。

1.2 封装工具类

from collections import defaultdict
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, ElementTree


class XML2Dict(object):
    def __init__(self, coding='UTF-8'):
        self._coding = coding
        self.remove_ns = False
        self.contains_attr = True
        self.attr_prefix = '@'

    def _parse_node(self, t):
        ttag = t.tag
        if self.remove_ns:
            ttag = self._remove_namespace(ttag)
        d = {ttag: {} if t.attrib else None}  # the variable 'd' is the constructed target dictionary
        # 't.tag' if have values, it is the first layer of the dictionary
        children = list(t)  # The following recursive traverse processing tree, until the leaf node
        if children:  # Determine whether the node is empty, recursive boundary conditions
            dd = defaultdict(list)
            for dc in map(self._parse_node, children):  # recursive traverse processing tree
                for k, v in dc.items():
                    dd[k].append(v)
            d = {ttag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}  # handle child node
        if self.contains_attr:
            if t.attrib:  # handle attributes,prefix all of the stored attributes @
                # d[ttag].update(('@' + k, v) for k, v in t.attrib.items())
                d[ttag].update((self.attr_prefix + k, v) for k, v in t.attrib.items())
        if t.text.strip():
            # text = t.text.strip().encode(self._coding)  # strip blank space
            text = t.text.strip()  # strip blank space
            if children or t.attrib:
                d[ttag]['#text'] = text
            else:
                d[ttag] = text  # the text value as t.tag
        return d

    def parse(self, xml_file):
        with open(xml_file, 'r') as fp:
            return self.fromstring(fp.read())

    def fromstring(self, xml_str):
        # self.remove_ns = remove_namespace
        t = ET.fromstring(xml_str)
        return self._parse_node(t)

    def _remove_namespace(self, tag):
        if tag.find("{") >= 0 and tag.find("}") >= 0:
            return tag[:tag.find("{")] + tag[(tag.find("}") + 1):]
        else:
            return tag


class XMLHelper(object):

    @staticmethod
    def dict2xml(path, my_dict, root='root', encoding='utf8'):
        elem = Element(root)  # 使用Element创建元素
        # 循环key与val
        for key, val in my_dict.items():
            # 创建新的元素,确定元素的值
            child = Element(key)
            child.text = str(val)
            # 添加为elem的子节点
            elem.append(child)

        tree = ElementTree(elem)
        # 写成xml文件
        tree.write(path, encoding=encoding)

    @staticmethod
    def xml2dict(path, remove_ns=True, contains_attr=True, attr_prefix=''):
        x2d = XML2Dict()
        x2d.remove_ns = remove_ns
        x2d.contains_attr = contains_attr
        x2d.attr_prefix = attr_prefix
        return x2d.parse(path)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
'
运行

1.3 工具类调用举例

  • config.xml文件:
<?xml version="1.0" encoding="utf-8" ?>
<!-- This is list of customers -->
<customers>
  <customer ID="C001">
    <name>Acme Inc.</name>
    <phone>12345</phone>
    <comments>
      <![CDATA[Regular customer since 1995]]>
    </comments>
  </customer>
  <customer ID="C002">
    <name>Star Wars Inc.</name>
    <phone>23456</phone>
    <comments>
      <![CDATA[A small but healthy company.]]>
    </comments>
  </customer>
</customers>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 示例代码
from test_xml.XMLHelper import  XMLHelper


def demo():
    # 读取xml文件
    xml_path = 'config.xml'
    print(XMLHelper.xml2dict(xml_path))
    # {'customers': {'customer': [{'name': 'Acme Inc.', 'phone': '12345', 'comments': 'Regular customer since 1995', '@ID': 'C001'},
    # {'name': 'Star Wars Inc.', 'phone': '23456', 'comments': 'A small but healthy company.', '@ID': 'C002'}]}}

    # 将字典写入xml文件
    db_dict = {'user': 'zet', 'password': '123456'}
    XMLHelper.dict2xml('db.xml', db_dict, root='db')
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop】
推荐阅读
相关标签
  

闽ICP备14008679号