赞
踩
WPS能拆分合并,但却是要输入编辑密码,我没有。故写了个脚本来做拆分,顺便附上合并的代码。
代码如下(extract.py)
- #!/usr/bin/env python
- """PDF拆分脚本(需要Python3.10+)
- Usage::
- $ python extract.py <pdf-file>
- """
- import os
- import sys
- from pathlib import Path
-
- # pip install PyMuPDF
- import fitz # type:ignore[import-untyped]
-
- SRC_FILE = Path.home() / "Downloads" / "yasi.pdf"
-
-
- def new_one(pdf: fitz.Document, page_num: int, parent: Path | None = None) -> Path:
- target = Path(f"{page_num}.pdf")
- if parent is not None:
- target = parent / target.name
- new_pdf = fitz.Document()
- # 用第page_num页生成新的PDF文件
- index = page_num - 1
- new_pdf.insert_pdf(pdf, from_page=index, to_page=index)
- new_pdf.save(target)
- return target
-
-
- def extract(
- file: Path,
- num: int | None = None,
- ) -> Path:
- """拆分PDF
- :param file: 文件路径
- :param num: 要拆分出哪一页,如果传None或不传,则每一页都拆分出来
- """
- with fitz.open(file) as f:
- if num is None:
- folder = Path(file.stem)
- if not folder.exists():
- print(f"Directory {folder} created!")
- folder.mkdir()
- print(f"Total pages of {file} is {f.page_count}.")
- for num in range(1, f.page_count + 1):
- new_one(f, num, folder)
- return folder
- else:
- return new_one(f, num)
-
-
- def main() -> None:
- file = SRC_FILE
- page_num: int | None = None
- if sys.argv[1:]:
- if (a := sys.argv[1]).isdigit():
- page_num = int(a)
- elif (_p := Path(a)).is_file():
- file = _p
- if sys.argv[2:] and sys.argv[2].isdigit():
- page_num = int(sys.argv[2])
- elif _p.suffix.lower() == ".pdf":
- print(f"文件`{_p}`不存在!")
- elif not file.exists():
- while True:
- a = input("请输入要拆分的PDF文件路径:").strip()
- if "~" in a:
- a = os.path.expanduser(a)
- if (_p := Path(a)).is_file():
- file = _p
- break
- else:
- print(f"文件{_p}不存在,请重新输入。\n")
- dst = extract(file, page_num)
- if dst.is_file():
- print(f"Save file to {dst}")
- else:
- print(f"Save files to {dst}{os.sep}")
-
-
- if __name__ == "__main__": # pragma: no cover
- main()
合并的代码如下:
- from pathlib import Path
-
- import fitz
-
-
- def merge(*files: str, new_name: str | None = None, verbose=True) -> Path:
- ps = [Path(i) for i in files]
- if new_name is None:
- new_name = '_'.join(i.stem for i in ps) + '.pdf'
- target = Path(new_name)
- new_pdf = fitz.Document()
- for p in ps:
- with fitz.open(p) as f:
- new_pdf.insert_pdf(f)
- new_pdf.save(target)
- if verbose:
- print(f'Save file to {target}')
- return target
-
-
- merge('1.pdf', '2.pdf')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。