赞
踩
本文以Java示例展示读取PDF分页读取、分页拆分的方法。
在pom.xml中配置maven路径,指定依赖,如下:
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.15</version>
</dependency><!-- https://mvnrepository.com/artifact/org.apache.pdfbox/fontbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>fontbox</artifactId>
<version>2.0.15</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/jempbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jempbox</artifactId>
<version>1.8.16</version>
</dependency>
- //String fileName: PDF路径;
- //int from:要读取得PDF开始页码;
- //int end:要读取得PDF结束页码;
- //每页之间分割字符串:“PDF解析第\d+页”
- public static String readPdfByPage(String fileName, int from, int end) {
- String result = "";
- File file = new File(fileName);
- FileInputStream in = null;
- try {
- in = new FileInputStream(fileName);
- // 新建一个PDF解析器对象
- PDFParser parser = new PDFParser(new RandomAccessFile(file,"rw"));
- // 对PDF文件进行解析
- parser.parse();
- // 获取解析后得到的PDF文档对象
- PDDocument pdfdocument = parser.getPDDocument();
- int size = pdfdocument.getNumberOfPages();
- // 新建一个PDF文本剥离器
- PDFTextStripper stripper = new PDFTextStripper();
- stripper.setSortByPosition(false); //sort:设置为true则按照行进行读取,默认是false
- //一页一页读取
- for(int i = from ; i <= end;i++ ){
- // 设置起始页
- stripper.setStartPage(i);
- // 设置结束页
- stripper.setEndPage(i);
- // 从PDF文档对象中剥离文本
- String pageStr = stripper.getText(pdfdocument);
- result = result + pageStr + "\n" + "PDF解析第"+ i + "页\n";
- }
-
- //一次读取完
- // 设置起始页
- // stripper.setStartPage(from);
- // 设置结束页
- // stripper.setEndPage(end);
- // 从PDF文档对象中剥离文本
- // String result = stripper.getText(pdfdocument);
-
- } catch (Exception e) {
- System.out.println("读取PDF文件" + file.getAbsolutePath() + "生失败!" + e);
- e.printStackTrace();
- } finally {
- if (in != null) {
- try {
- in.close();
- } catch (IOException e1) {
- }
- }
- }
- return result;
- }

在pom.xml中配置maven路径,指定依赖,如下:
<dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.1</version> <type>jar</type> </dependency>
2.2.1 代码
- public static void pdfToSub(String filePath,String newFile, int from, int end) {
- Document document = null;
- PdfCopy copy = null;
- try {
- PdfReader reader = new PdfReader(filePath);
- //总页数
- int n = reader.getNumberOfPages();
- if (end == 0) {
- end = n;
- }
- document = new Document(reader.getPageSize(1));
- copy = new PdfCopy(document, new FileOutputStream(newFile));
- document.open();
- for (int j = from; j <= end; j++) {
- document.newPage();
- PdfImportedPage page = copy.getImportedPage(reader, j);
- copy.addPage(page);
- }
- document.close();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }

不再赘述!
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。