赞
踩
该模块儿可以创建、修改Word(.docx)文件;
此模块儿不属于python标准库,需要单独安装;
python-docx使用官网:python-docx官网
我们在安装此模块儿使用的是pip install python-docx,但是在导入的时候是import docx;
注意:每进行一个操作,必须保存一下,否则等于白做;
有一个这样的docx文件,我们想要提取其中的文字,应该怎么做?代码如下:
- from docx import Document
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- print(doc.paragraphs)
- for paragraph in doc.paragraphs:
- print(paragraph.text)
结果如下:
- from docx import Document
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- print(doc.paragraphs)
- paragraph = doc.paragraphs[0]
- runs = paragraph.runs
- print(runs)
- for run in paragraph.runs:
- print(run.text)
- ------------------------------
- paragraph = doc.paragraphs[1]
- runs = paragraph.runs
- print(runs)
- for run in paragraph.runs:
- print(run.text)
结果如下:
- from docx import Document
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- # print(doc.add_heading("一级标题", level=1)) 添加一级标题的时候出错,还没有解决!
- paragraph1 = doc.add_paragraph("这是一个段落")
- paragraph2 = doc.add_paragraph("这是第二个段落")
- doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- """
- 添加段落的时候,赋值给一个变量,方便我们后面进行格式调整;
- """
结果如下:
- from docx import Document
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- # 这里相当于输入了一个空格,后面等待着文字输入
- paragraph3 = doc.add_paragraph()
- paragraph3.add_run("我被加粗了文字块儿").bold = True
- paragraph3.add_run(",我是普通文字块儿,")
- paragraph3.add_run("我是斜体文字块儿").italic = True
- doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
结果如下:
- from docx import Document
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- doc.add_page_break()
- doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
结果如下:
- from docx import Document
- from docx.shared import Cm
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- doc.add_picture(r"G:\6Tipdm\7python办公自动化\concat_word\sun_wu_kong.png",width=Cm(5),height=Cm(5))
- doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- """
- Cm模块,用于设定图片尺寸大小
- """
结果如下:
- from docx import Document
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
-
-
- list1 = [
- ["姓名","性别","家庭地址"],
- ["唐僧","男","湖北省"],
- ["孙悟空","男","北京市"],
- ["猪八戒","男","广东省"],
- ["沙和尚","男","湖南省"]
- ]
- list2 = [
- ["姓名","性别","家庭地址"],
- ["貂蝉","女","河北省"],
- ["杨贵妃","女","贵州省"],
- ["西施","女","山东省"]
- ]
-
-
- table1 = doc.add_table(rows=5,cols=3)
- for row in range(5):
- cells = table1.rows[row].cells
- for col in range(3):
- cells[col].text = str(list1[row][col])
- doc.add_paragraph("-----------------------------------------------------------")
- table2 = doc.add_table(rows=4,cols=3)
- for row in range(4):
- cells = table2.rows[row].cells
- for col in range(3):
- cells[col].text = str(list2[row][col])
-
- doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
结果如下:
- from docx import Document
- from openpyxl import Workbook
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test2.docx")
- t0 = doc.tables[0]
-
-
- workbook = Workbook()
- sheet = workbook.active
-
-
- for i in range(len(t0.rows)):
- list1 = []
- for j in range(len(t0.columns)):
- list1.append(t0.cell(i,j).text)
- sheet.append(list1)
- workbook.save(filename = r"G:\6Tipdm\7python办公自动化\concat_word\来自word中的表.xlsx")
结果如下:
- from docx import Document
- from docx.shared import Pt,RGBColor
- from docx.oxml.ns import qn
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test2.docx")
- for paragraph in doc.paragraphs:
- for run in paragraph.runs:
- run.font.bold = True
- run.font.italic = True
- run.font.underline = True
- run.font.strike = True
- run.font.shadow = True
- run.font.size = Pt(18)
- run.font.color.rgb = RGBColor(255,255,0)
- run.font.name = "宋体"
- # 设置像宋体这样的中文字体,必须添加下面2行代码
- r = run._element.rPr.rFonts
- r.set(qn("w:eastAsia"),"宋体")
- doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\_test1.docx")
结果如下:
- from docx import Document
- from docx.enum.text import WD_ALIGN_PARAGRAPH
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- print(doc.paragraphs[0].text)
- doc.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
- # 这里设置的是居中对齐
- doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\对齐样式.docx")
- """
- LEFT,CENTER,RIGHT,JUSTIFY,DISTRIBUTE,JUSTIFY_MED,JUSTIFY_HI,JUSTIFY_LOW,THAI_JUSTIFY
- """
结果如下:
- from docx import Document
- from docx.enum.text import WD_ALIGN_PARAGRAPH
-
-
- doc = Document(r"G:\6Tipdm\7python办公自动化\concat_word\test1.docx")
- for paragraph in doc.paragraphs:
- paragraph.paragraph_format.line_spacing = 5.0
- doc.save(r"G:\6Tipdm\7python办公自动化\concat_word\行间距.docx")
结果如下:
这里提供代码,自行下去检验
- 往期精彩回顾
-
-
-
- 适合初学者入门人工智能的路线及资料下载机器学习及深度学习笔记等资料打印机器学习在线手册深度学习笔记专辑《统计学习方法》的代码复现专辑
- AI基础下载机器学习的数学基础专辑
- 本站qq群704220115,加入微信群请扫码:
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。