当前位置:   article > 正文

基于python的批量PDF文件转图像_pdfdoc = fitz.open(pdfpath) totalpage=pdfdoc.pages

pdfdoc = fitz.open(pdfpath) totalpage=pdfdoc.pages.count

一、基于python代码将批量的pdf转换为opencv的图像

该代码可以实现多页PDF的图像转换

  1. import datetime
  2. import os
  3. import fitz # fitz就是pip install PyMuPDF
  4. import cv2
  5. import numpy as np
  6. def pix_to_image(pix):
  7. bytes = np.frombuffer(pix.samples, dtype=np.uint8)
  8. img = bytes.reshape(pix.height, pix.width, 3)
  9. cv_image = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
  10. return cv_image
  11. def pyMuPDF_fitz(pdfPath):
  12. pdfDoc = fitz.open(pdfPath)
  13. pix_totall=[]
  14. for pg in range(pdfDoc.page_count):
  15. page = pdfDoc[pg]
  16. rotate = int(0)
  17. # zoom_x = 2.33333333 # (1.33333333-->1056x816) (2-->1584x1224)
  18. # zoom_y = 2.33333333
  19. zoom_x = 4 # (1.33333333-->1056x816) (2-->1584x1224)
  20. zoom_y = 4
  21. mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
  22. pix = page.get_pixmap(matrix=mat, alpha=False)
  23. pix_totall.append(pix)
  24. return pix_totall
  25. def get_files(path):
  26. """ 获取指定路径下所有文件名称 """
  27. files = []
  28. for filename in os.listdir(path):
  29. if os.path.isfile(os.path.join(path, filename)):
  30. files.append(filename)
  31. return files
  32. if __name__ == "__main__":
  33. # 1、PDF地址
  34. pdfPath_totall = 'C:/code/box_word/PDF_BOX/pdf/'
  35. file_list = os.listdir(pdfPath_totall)
  36. imagePath = 'C:/code/box_word/PDF_BOX/pdf_images/'
  37. print(file_list)
  38. # # 2、需要储存图片的目录
  39. for i, name in enumerate(file_list):
  40. pdfPath=pdfPath_totall+name
  41. # image_save_Path=imagePath+'pdf3'+'.bmp'
  42. print(pdfPath)
  43. #将pdf转换成Buffer,多张图像依旧可以转换
  44. pix_totall=pyMuPDF_fitz(pdfPath)
  45. print("图像的总数为:", len(pix_totall))
  46. #将buffer转换成opencv的图像格式
  47. for i in range(len(pix_totall)):
  48. image_pfd=pix_to_image(pix_totall[i])
  49. gray_image = cv2.cvtColor(image_pfd, cv2.COLOR_BGR2GRAY)
  50. cv2.imwrite(imagePath + name+'_'+str(i)+'pdf_.png', gray_image)
  51. cv2.imwrite(imagePath + name+'_'+str(i)+'pdfcolor_.png', image_pfd)

声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号