赞
踩
#pip install python-docx
import docx
# 创建文档对象,获得word文档
doc = docx.Document(path)
#每一段的内容
for para in doc.paragraphs:
print(para.text)
#每一段的编号、内容
for i in range(len(doc.paragraphs)):
print(str(i), doc.paragraphs[i].text)
doc = docx.Document('test.docx') for paragraph in doc.paragraphs: tmp = '' runs = paragraph.runs for i, run in enumerate(runs): tmp += run.text # 合并run字符串 if '需要替换的字符串' in tmp: # 如果存在匹配得字符串,那么将当前得run替换成合并后得字符串 run.text = run.text.replace(run.text, tmp) run.text = run.text.replace('需要替换的字符串', '我是替换后的字符串') tmp = '' else: # 如果没匹配到目标字符串则把当前run置空 run.text = run.text.replace(run.text, '') if i == len(runs) - 1: # 如果是当前段落一直没有符合规则得字符串直接将当前run替换为tmp run.text = run.text.replace(run.text, tmp)
def docx_inplace_replace(file): file_ = rreplace(file, '.docx', '', 1) new_file = file_ + '_.docx' doc = docx.Document(file) for paragraph in doc.paragraphs: runs = paragraph.runs for i, run in enumerate(runs): tmp = run.text tmp = re.sub("\s+", " ", tmp) sensitive_datas = re_tmp(tmp) names = name_identify([tmp]) if len(sensitive_datas) > 0: names = names + sensitive_datas if len(names) > 0: # 如果存在匹配得字符串,那么将当前得run替换成合并后得字符串 for name in names: tmp = tmp.replace(name, 'X'*len(name)) run.text = run.text.replace(run.text, tmp) # 遍历所有表格的单元格 for table in doc.tables: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: for run in paragraph.runs: tmp = run.text tmp = re.sub("\s+", " ", tmp) sensitive_datas = re_tmp(tmp) names = name_identify([tmp]) if len(sensitive_datas) >0: names = names + sensitive_datas if len(names) > 0: # 如果存在匹配得字符串,那么将当前得run替换成合并后得字符串 for name in names: tmp = tmp.replace(name, 'X'*len(name)) run.text = tmp # 保存文档 doc.save(new_file) remove_header_footer(new_file, new_file) return new_file
import zipfile import os import re import tempfile import shutil from functools import reduce #1. 获取xml 字符串 def getXml(docxFilename): zip = zipfile.ZipFile(open(docxFilename,"rb")) xmlString = zip.read("word/document.xml") return xmlString.decode('utf-8') #2. TODO 对xml 字符串进行替换处理 #3. 封装回docx 文件 def createNewDocx(originalDocx,xmlContent,newFilename): """ Create a temp directory, expand the original docx zip. Write the modified xml to word/document.xml Zip it up as the new docx """ tmpDir = tempfile.mkdtemp() zip = zipfile.ZipFile(open(originalDocx,"rb")) zip.extractall(tmpDir) with open(os.path.join(tmpDir,"word/document.xml"),"w" ,encoding='utf-8') as f: f.write(xmlContent) # Get a list of all the files in the original docx zipfile filenames = zip.namelist() # Now, create the new zip file and add all the filex into the archive zipCopyFilename = newFilename with zipfile.ZipFile(zipCopyFilename,"w") as docx: for filename in filenames: docx.write(os.path.join(tmpDir,filename),filename) # Clean up the temp dir shutil.rmtree(tmpDir)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。