赞
踩
使用win32com库函数
import win32com.client
word_ap = win32com.client.Dispatch(‘Word.Application’)
doc = word_app.Documents.Open(input_path)
… #对doc的各种操作
doc.Close()
word_ap.Quit()
补充说明:
批量将doc,docx文件转为pdf import shutil import win32com.client import os doc_path = “" pdf_path = "" pdf_tmp = "" fs = [] for roots, _, files in os.walk(doc_path): for file in files: fs.append(roots + '/' + file) print(len(fs)) def convert_to_pdf(input_path, output_path, word_app): print(f) doc = word_app.Documents.Open(input_path) print(doc) doc.SaveAs(output_path, FileFormat=17) doc.Close() word_ap = win32com.client.Dispatch('Word.Application') for f in fs: filename: str = os.path.basename(f) if (filename.endswith('.doc') or filename.endswith('.docx')) and not filename.startswith('~$'): list = filename.split(".") list[-1] = 'pdf' output_file = ".".join(list) try: convert_to_pdf(f, pdf_path + output_file, word_ap) except Exception as e: try: shutil.move(f, pdf_tmp + filename.replace(' ', '1')) convert_to_pdf(pdf_tmp + filename.replace(' ', '1'), pdf_tmp + 'tmp_file.pdf', word_ap) shutil.move(pdf_tmp + 'tmp_file.pdf', pdf_path + output_file) except Exception as e: print(f) with open(pdf_path + "error.txt", mode="a", encoding='utf-8') as file: file.write(f + '\n') word_ap.Quit()
读取excel—pandas
import pandas as pd
data = pd.read_excel(‘example.xlsx’, engine=‘openpyxl’)
print(data[‘A’]) # 打印 A 列的数据
print(data.at[0, ‘A’]) # 打印第一行第一列的单元格数据
data[‘type’] = ’A类‘ 新建列
data.values.tolist() # 将excel表转化为[[],[],[],[],…,[]]的格式,一个[]为一行
读取excel—openpyxl
import openpyxl
workbook = openpyxl.load_workbook(‘example.xlsx’)
ws= workbook[‘Sheet1’] #选择sheet
rows = ws.rows #所有行,相当于ws.iter_rows()
for row in rows :
for cell in row:
print(cell.value, end=’ ')
写入excel:
1)df = pd.DataFrame(data, columns=[‘列名1’, ‘列名2’]) #选择data格式为[[],[],[],[],…,[]]
df.to_excel(‘aaaaaa.xlsx’, index=False, engine=‘openpyxl’)
2)data.to_excel(‘aaaa.xlsx’, index=False, engine =‘openpyxl’)
3)新增一列:
df = pd.read_excel('example.xlsx')
# 添加新列,新列的value固定
df['new_column'] = '1'
#添加新列,新列的value不固定
new_colunm = [1,2,3,4]
df.insert(loc=0, column='new_colunm ', value=new_colunm )
df.to_excel(’aaa.xlsx‘, index=False)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。