赞
踩
该代码可以实现多页PDF的图像转换
- import datetime
- import os
- import fitz # fitz就是pip install PyMuPDF
- import cv2
- import numpy as np
-
-
- def pix_to_image(pix):
- bytes = np.frombuffer(pix.samples, dtype=np.uint8)
- img = bytes.reshape(pix.height, pix.width, 3)
- cv_image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
- return cv_image
-
- def pyMuPDF_fitz(pdfPath):
- pdfDoc = fitz.open(pdfPath)
- pix_totall=[]
- for pg in range(pdfDoc.page_count):
- page = pdfDoc[pg]
- rotate = int(0)
- # zoom_x = 2.33333333 # (1.33333333-->1056x816) (2-->1584x1224)
- # zoom_y = 2.33333333
- zoom_x = 4 # (1.33333333-->1056x816) (2-->1584x1224)
- zoom_y = 4
- mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
- pix = page.get_pixmap(matrix=mat, alpha=False)
- pix_totall.append(pix)
- return pix_totall
-
- def get_files(path):
- """ 获取指定路径下所有文件名称 """
- files = []
- for filename in os.listdir(path):
- if os.path.isfile(os.path.join(path, filename)):
- files.append(filename)
- return files
-
- if __name__ == "__main__":
- # 1、PDF地址
- pdfPath_totall = 'C:/code/box_word/PDF_BOX/pdf/'
- file_list = os.listdir(pdfPath_totall)
- imagePath = 'C:/code/box_word/PDF_BOX/pdf_images/'
- print(file_list)
- # # 2、需要储存图片的目录
- for i, name in enumerate(file_list):
- pdfPath=pdfPath_totall+name
- # image_save_Path=imagePath+'pdf3'+'.bmp'
- print(pdfPath)
- #将pdf转换成Buffer,多张图像依旧可以转换
- pix_totall=pyMuPDF_fitz(pdfPath)
- print("图像的总数为:", len(pix_totall))
-
- #将buffer转换成opencv的图像格式
- for i in range(len(pix_totall)):
- image_pfd=pix_to_image(pix_totall[i])
- gray_image = cv2.cvtColor(image_pfd, cv2.COLOR_BGR2GRAY)
- cv2.imwrite(imagePath + name+'_'+str(i)+'pdf_.png', gray_image)
- cv2.imwrite(imagePath + name+'_'+str(i)+'pdfcolor_.png', image_pfd)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。