当前位置:   article > 正文

python:xml.etree,用 xmltodict 转换为json数据,生成jstree所需的文件_python elementtree 转json

python elementtree 转json

请参阅:java : pdfbox 读取 PDF文件内书签 或者 python:从PDF中提取目录

请注意:书的目录.txt 编码:UTF-8,推荐用 Notepad++ 转换编码。

xml 是 python 标准库,在 D:\Python39\Lib\xml\etree

pip install xmltodict ;

python 用 xml.etree.ElementTree,用 xmltodict 转换为json数据。

编写 txt_xml_etree_json.py  如下

  1. # -*- coding: utf-8 -*-
  2. """ 读目录.txt文件,用 xmltodict转换为json数据 """
  3. import os
  4. import sys
  5. import codecs
  6. import json
  7. import xml.etree.ElementTree as et
  8. import xmltodict
  9. if len(sys.argv) ==2:
  10. f1 = sys.argv[1]
  11. else:
  12. print('usage: python txt_xml_etree_json.py file1.txt')
  13. sys.exit(1)
  14. if not os.path.exists(f1):
  15. print(f"ERROR: {f1} not found.")
  16. sys.exit(1)
  17. fn,ext = os.path.splitext(f1)
  18. if ext.lower() != '.txt':
  19. print('ext is not .txt')
  20. sys.exit(2)
  21. fp = codecs.open(f1, mode="r", encoding="utf-8")
  22. # 读取第一行:书名
  23. title = fp.readline()
  24. # 创建主题节点
  25. root = et.Element("node")
  26. root.set("id", '1')
  27. root.set("text", title.strip())
  28. # 定义状态:
  29. state = et.SubElement(root, "state")
  30. state.set("opened", 'true')
  31. state.set("disabled", 'true')
  32. # 用缩排表现层级关系,假设最多5个层级
  33. indent1 = ' '*2
  34. indent2 = ' '*4
  35. indent3 = ' '*6
  36. indent4 = ' '*8
  37. n = 2
  38. for line in fp:
  39. txt = line.strip()
  40. if len(txt) ==0:
  41. continue
  42. txt = txt[0:-3] # 去掉行尾的页数
  43. if len(txt) >0 and line[0] !=' ':
  44. # 创建主题的子节点(1级节点)
  45. node1 = et.SubElement(root, "children")
  46. node1.set("id", str(n))
  47. node1.set("text", txt)
  48. p_node = node1 # 寄存父节点
  49. elif line.startswith(indent1) and line[2] !=' ':
  50. # 创建node1的子节点(2级节点)
  51. try: type(node1)
  52. except NameError: node2 = et.SubElement(root, "children")
  53. else: node2 = et.SubElement(node1, "children")
  54. node2.set("id", str(n))
  55. node2.set("text", txt)
  56. p_node = node2
  57. elif line.startswith(indent2) and line[4] !=' ':
  58. # 创建node2的子节点(3级节点)
  59. try: type(node2)
  60. except NameError: node3 = et.SubElement(node1, "children")
  61. else: node3 = et.SubElement(node2, "children")
  62. node3.set("id", str(n))
  63. node3.set("text", txt)
  64. p_node = node3
  65. elif line.startswith(indent3) and line[6] !=' ':
  66. # 创建node3的子节点(4级节点)
  67. try: type(node3)
  68. except NameError: node4 = et.SubElement(node2, "children")
  69. else: node4 = et.SubElement(node3, "children")
  70. node4.set("id", str(n))
  71. node4.set("text", txt)
  72. p_node = node4
  73. elif line.startswith(indent4) and line[8] !=' ':
  74. # 创建node4的子节点(5级节点)
  75. try: type(node4)
  76. except NameError: node5 = et.SubElement(p_node, "children")
  77. else: node5 = et.SubElement(node4, "children")
  78. node5.set("id", str(n))
  79. node5.set("text", txt)
  80. else:
  81. print(txt)
  82. n += 1
  83. fp.close()
  84. print(f"line number: {n}")
  85. # 转换成 str,方便导出
  86. root_bytes = et.tostring(root, encoding="utf-8")
  87. xml_str = root_bytes.decode()
  88. try:
  89. json_dict = xmltodict.parse(xml_str, encoding='utf-8')
  90. json_str = json.dumps(json_dict['node'], indent=2)
  91. except:
  92. print("xmltodict.parse error!")
  93. # 去掉'@'
  94. json_str = '['+ json_str.replace('\"@','"') +']'
  95. #print(json_str)
  96. # 导出.json文件
  97. f2 = fn +'.json'
  98. with codecs.open(f2, 'w', encoding='utf8') as fp:
  99. fp.write(json_str)

 python 用 xml.etree.ElementTree,用 xmltodict 转换为json数据,jinja2 生成jstree模板所需的文件。

编写 txt_xml_etree_htm.py  如下

  1. # -*- coding: utf-8 -*-
  2. """ 读目录.txt文件,用 xmltodict转换为json数据,生成jstree所需的文件 """
  3. import os
  4. import sys
  5. import codecs
  6. import json
  7. import xml.etree.ElementTree as et
  8. import xmltodict
  9. from jinja2 import Environment,FileSystemLoader
  10. if len(sys.argv) ==2:
  11. f1 = sys.argv[1]
  12. else:
  13. print('usage: python txt_xml_etree_htm.py file1.txt')
  14. sys.exit(1)
  15. if not os.path.exists(f1):
  16. print(f"ERROR: {f1} not found.")
  17. sys.exit(1)
  18. fn,ext = os.path.splitext(f1)
  19. if ext.lower() != '.txt':
  20. print('ext is not .txt')
  21. sys.exit(2)
  22. fp = codecs.open(f1, mode="r", encoding="utf-8")
  23. # 读取第一行:书名
  24. title = fp.readline()
  25. # 创建主题节点
  26. root = et.Element("node")
  27. root.set("id", '1')
  28. root.set("text", title.strip())
  29. # 定义状态:
  30. state = et.SubElement(root, "state")
  31. state.set("opened", 'true')
  32. state.set("disabled", 'true')
  33. # 用缩排表现层级关系,假设最多5个层级
  34. indent1 = ' '*2
  35. indent2 = ' '*4
  36. indent3 = ' '*6
  37. indent4 = ' '*8
  38. n = 2
  39. for line in fp:
  40. txt = line.strip()
  41. if len(txt) ==0:
  42. continue
  43. txt = txt[0:-3] # 去掉行尾的页数
  44. if len(txt) >0 and line[0] !=' ':
  45. # 创建主题的子节点(1级节点)
  46. node1 = et.SubElement(root, "children")
  47. node1.set("id", str(n))
  48. node1.set("text", txt)
  49. p_node = node1 # 寄存父节点
  50. elif line.startswith(indent1) and line[2] !=' ':
  51. # 创建node1的子节点(2级节点)
  52. try: type(node1)
  53. except NameError: node2 = et.SubElement(root, "children")
  54. else: node2 = et.SubElement(node1, "children")
  55. node2.set("id", str(n))
  56. node2.set("text", txt)
  57. p_node = node2
  58. elif line.startswith(indent2) and line[4] !=' ':
  59. # 创建node2的子节点(3级节点)
  60. try: type(node2)
  61. except NameError: node3 = et.SubElement(node1, "children")
  62. else: node3 = et.SubElement(node2, "children")
  63. node3.set("id", str(n))
  64. node3.set("text", txt)
  65. p_node = node3
  66. elif line.startswith(indent3) and line[6] !=' ':
  67. # 创建node3的子节点(4级节点)
  68. try: type(node3)
  69. except NameError: node4 = et.SubElement(node2, "children")
  70. else: node4 = et.SubElement(node3, "children")
  71. node4.set("id", str(n))
  72. node4.set("text", txt)
  73. p_node = node4
  74. elif line.startswith(indent4) and line[8] !=' ':
  75. # 创建node4的子节点(5级节点)
  76. try: type(node4)
  77. except NameError: node5 = et.SubElement(p_node, "children")
  78. else: node5 = et.SubElement(node4, "children")
  79. node5.set("id", str(n))
  80. node5.set("text", txt)
  81. else:
  82. print(txt)
  83. n += 1
  84. fp.close()
  85. print(f"line number: {n}")
  86. # 转换成 str,方便导出
  87. root_bytes = et.tostring(root, encoding="utf-8")
  88. xml_str = root_bytes.decode()
  89. try:
  90. json_dict = xmltodict.parse(xml_str, encoding='utf-8')
  91. json_str = json.dumps(json_dict['node'], indent=2)
  92. except:
  93. print("xmltodict.parse error!")
  94. # 去掉'@'
  95. json_str = '['+ json_str.replace('\"@','"') +']'
  96. #print(json_str)
  97. # 使用 jinja2 对html模板文件进行数据替换
  98. env = Environment(loader=FileSystemLoader('d:/python/'))
  99. tpl = env.get_template('jstree_template.htm')
  100. # 导出.html文件
  101. f2 = fn +'.htm'
  102. with codecs.open(f2, 'w', encoding='utf8') as fp:
  103. content = tpl.render(title=title.strip(), mydir=json_str)
  104. fp.write(content)

https://gitee.com/ 搜索 jstree 下载
https://gitee.com/mirrors/jstree?_from=gitee_search
git clone https://gitee.com/mirrors/jstree.git

编写 jstree 模板文件:jstree_template.htm

  1. <!DOCTYPE html>
  2. <html lang="en">
  3. <head>
  4. <meta charset="UTF-8">
  5. <meta http-equiv="X-UA-Compatible" content="IE=Edge">
  6. <meta name="viewport" content="width=device-width, initial-scale=1">
  7. <title>{{title}}</title>
  8. <script src="../js/jquery-3.2.1.min.js"></script>
  9. <link rel="stylesheet" href="../js/jstree/dist/themes/default/style.css" />
  10. <script src="../js/jstree/dist/jstree.min.js"></script>
  11. </head>
  12. <body>
  13. <!-- 搜索框 -->
  14. <div class="search_input">
  15. <input type="text" id="search_a" />
  16. <img src="../js/jstree/dist/search.png" />
  17. </div>
  18. <div id="treeview1" class="treeview">
  19. </div>
  20. <script type="text/javascript">
  21. var mydir = {{mydir}};
  22. $("#treeview1").jstree({
  23. 'core' : {
  24. "multiple" : false,
  25. 'data' : mydir,
  26. 'dblclick_toggle': true
  27. },
  28. "plugins" : ["search"]
  29. });
  30. //输入框输入时自动搜索
  31. var tout = false;
  32. $('#search_a').keyup(function(){
  33. if (tout) clearTimeout(tout);
  34. tout = setTimeout(function(){
  35. $('#treeview1').jstree(true).search($('#search_a').val());
  36. }, 250);
  37. });
  38. </script>
  39. </body>
  40. </html>

运行 python txt_xml_etree_htm.py your_pdf_dir.txt

生成 your_pdf_dir.htm

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/1003799
推荐阅读
相关标签
  

闽ICP备14008679号