当前位置:   article > 正文

Python拆分PDF、Python合并PDF_fitz库合并pdf

fitz库合并pdf

WPS能拆分合并,但却是要输入编辑密码,我没有。故写了个脚本来做拆分,顺便附上合并的代码。

代码如下(extract.py)

  1. #!/usr/bin/env python
  2. """PDF拆分脚本(需要Python3.10+)
  3. Usage::
  4. $ python extract.py <pdf-file>
  5. """
  6. import os
  7. import sys
  8. from pathlib import Path
  9. # pip install PyMuPDF
  10. import fitz # type:ignore[import-untyped]
  11. SRC_FILE = Path.home() / "Downloads" / "yasi.pdf"
  12. def new_one(pdf: fitz.Document, page_num: int, parent: Path | None = None) -> Path:
  13. target = Path(f"{page_num}.pdf")
  14. if parent is not None:
  15. target = parent / target.name
  16. new_pdf = fitz.Document()
  17. # 用第page_num页生成新的PDF文件
  18. index = page_num - 1
  19. new_pdf.insert_pdf(pdf, from_page=index, to_page=index)
  20. new_pdf.save(target)
  21. return target
  22. def extract(
  23. file: Path,
  24. num: int | None = None,
  25. ) -> Path:
  26. """拆分PDF
  27. :param file: 文件路径
  28. :param num: 要拆分出哪一页,如果传None或不传,则每一页都拆分出来
  29. """
  30. with fitz.open(file) as f:
  31. if num is None:
  32. folder = Path(file.stem)
  33. if not folder.exists():
  34. print(f"Directory {folder} created!")
  35. folder.mkdir()
  36. print(f"Total pages of {file} is {f.page_count}.")
  37. for num in range(1, f.page_count + 1):
  38. new_one(f, num, folder)
  39. return folder
  40. else:
  41. return new_one(f, num)
  42. def main() -> None:
  43. file = SRC_FILE
  44. page_num: int | None = None
  45. if sys.argv[1:]:
  46. if (a := sys.argv[1]).isdigit():
  47. page_num = int(a)
  48. elif (_p := Path(a)).is_file():
  49. file = _p
  50. if sys.argv[2:] and sys.argv[2].isdigit():
  51. page_num = int(sys.argv[2])
  52. elif _p.suffix.lower() == ".pdf":
  53. print(f"文件`{_p}`不存在!")
  54. elif not file.exists():
  55. while True:
  56. a = input("请输入要拆分的PDF文件路径:").strip()
  57. if "~" in a:
  58. a = os.path.expanduser(a)
  59. if (_p := Path(a)).is_file():
  60. file = _p
  61. break
  62. else:
  63. print(f"文件{_p}不存在,请重新输入。\n")
  64. dst = extract(file, page_num)
  65. if dst.is_file():
  66. print(f"Save file to {dst}")
  67. else:
  68. print(f"Save files to {dst}{os.sep}")
  69. if __name__ == "__main__": # pragma: no cover
  70. main()

合并的代码如下:

  1. from pathlib import Path
  2. import fitz
  3. def merge(*files: str, new_name: str | None = None, verbose=True) -> Path:
  4. ps = [Path(i) for i in files]
  5. if new_name is None:
  6. new_name = '_'.join(i.stem for i in ps) + '.pdf'
  7. target = Path(new_name)
  8. new_pdf = fitz.Document()
  9. for p in ps:
  10. with fitz.open(p) as f:
  11. new_pdf.insert_pdf(f)
  12. new_pdf.save(target)
  13. if verbose:
  14. print(f'Save file to {target}')
  15. return target
  16. merge('1.pdf', '2.pdf')

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/代码探险家/article/detail/879992
推荐阅读
相关标签
  

闽ICP备14008679号