赞
踩
将一个PDF文件按页拆分成多个PDF,或者将某一页中某一部分内容单独拆分出来作为一个新的PDF
本文使用pdfbox实现,因此需要先导入pdfbox的依赖
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.25</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jbig2-imageio</artifactId>
<version>3.0.2</version>
</dependency>
代码如下(示例):
public static void splitPdf(File sourceFile) throws IOException { splitPdf(sourceFile, 1); } public static void splitPdf(File sourceFile, String targetPath) throws IOException { splitPdf(sourceFile, targetPath, 1); } public static void splitPdf(File sourceFile, Integer pageStep) throws IOException { splitPdf(sourceFile, sourceFile.getParentFile().getPath(), 1); } /** * @return void * @Description //TODO pdf拆分,结果命名规则为原文件名-startPage_endPage.pdf * @Date 2022/2/16 10:20 * @Param [sourceFile:要拆分的pdf文件, targetPath:结果输出目录, pageStep:以多少页为基准拆分] **/ public static void splitPdf(File sourceFile, String targetPath, Integer pageStep) throws IOException { String fileName = sourceFile.getName().replace(".pdf", ""); PDDocument document = PDDocument.load(sourceFile); int pageSize = document.getNumberOfPages(); int count = pageSize / pageStep + (pageSize % pageStep == 0 ? 0 : 1); log.info("pageSize : {}, pageStep : {}, count : {}", pageSize, pageStep, count); PDDocument doc = new PDDocument(); PDPage page = null; String splitName = null; for (int i = 0; i < count; i++) { PDFContentTool.clearDocument(doc); int startPage = i * pageStep; int endPage = startPage + pageStep; if (endPage > pageSize) { endPage = pageSize; } splitName = fileName + "-" + (startPage + 1) + "_" + endPage + ".pdf"; for (int index = startPage; index < endPage; index++) { page = document.getPage(index); doc.addPage(page); } File outFile = new File(targetPath, splitName); doc.save(outFile); } document.close(); doc.close(); } /** * @return void * @Description //TODO pdf拆分,结果命名规则为原文件名-startPage_endPage.pdf * @Date 2022/2/16 10:20 * @Param [sourceFile:要拆分的pdf文件, targetPath:结果输出目录, pageIndex,:从哪里开始拆分] **/ public static void splitPdfWithPageIndex(File sourceFile, String targetFile, Integer pageIndex) throws IOException { splitPdfWithPageIndex(sourceFile,targetFile,pageIndex,1); } /** * @return void * @Description //TODO pdf拆分,结果命名规则为原文件名-startPage_endPage.pdf * @Date 2022/2/16 10:20 * @Param [sourceFile 要拆分的pdf文件, targetFile 结果输出目录, pageIndex 从哪里开始拆分, totalPages 总共拆出多少页] **/ public static void splitPdfWithPageIndex(File sourceFile, String targetFile, Integer pageIndex, Integer totalPages) throws IOException { PDDocument document = PDDocument.load(sourceFile); PDDocument doc = new PDDocument(); PDPage page = null; for (int index = pageIndex; index < pageIndex + totalPages; index++) { page = document.getPage(index - 1); doc.addPage(page); } doc.save(targetFile); document.close(); doc.close(); }
代码解析:
splitPdf:splitPdf的作用在于将一个pdf以pageStep为步长,按照没pageStep页拆分一个PDF的规则将整个PDF文件全部拆出。
splitPdfWithPageIndex:splitPdfWithPageIndex的作用在于将一个PDF文件从pageIndex位置开始,选totalPages页数据,生成一个新的PDF文件。
将PDF中某一部分内容拆分出来,这一部分的处理思路是先将PDF文件转换成图片,然后采用截图的方式将需要的内容截取出来生成新的图片,然后再将这个图片转换成PDF文件。
PDF转换图片的部分在java使用pdfbox将PDF转化为图片这一篇中有描述,感兴趣的可以去看看
代码如下(示例):
public class CutInfo {
//页数
private Integer pageNo;
//Y轴切割起始坐标
private Float start;
//Y轴切终点坐标
private Float end;
}
public static BufferedImage splitPdfWithImage(PDDocument document, Integer page) throws IOException { PDFRenderer renderer = new PDFRenderer(document); BufferedImage bufferedImage = renderer.renderImage(page - 1, 5); return bufferedImage; } public static void cutPdfWithImage(PDDocument document, CutInfoEntity cutInfo) throws IOException { File file = new File(cutInfo.getOutFile()); cutPdfWithImage(document, cutInfo.getPage(), cutInfo.getReceiptCount(), cutInfo.getCutRange().getStart(), cutInfo.getCutRange().getEnd(), file); } public static void cutPdfWithImage(PDDocument document, Integer page, Integer receiptCount, Integer start, Integer end, String outFile) throws IOException { File file = new File(outFile); cutPdfWithImage(document, page, receiptCount, start, end, file); } /** * @Description //TODO 纵向切分,切分内容为宽为PDF文件宽度,高为制动高度的矩形区域 * @Date 2022/3/4 16:35 * @Param [document 要切分的PDF文件 * page表示要切分的内容在第几页 * receiptCount 本工具哟用于拆分银行回单,一页可能有多张回单,这个参数表示一页PDF有多少张。主要用于计算切分后的内容转换成A4大小的PDF时的高度。 * start 从x轴哪个位置开始切分 * end 切到那个位置 * outFile 输出文件 * @return void **/ public static void cutPdfWithImage(PDDocument document, Integer page, Integer receiptCount, Integer start, Integer end, File outFile) throws IOException { BufferedImage image = splitPdfWithImage(document, page); int subWidth = image.getWidth(); int subHeight = end - start; image = image.getSubimage(0, start, subWidth, subHeight); PDDocument outDocument = new PDDocument(); PDImageXObject imageXObject = LosslessFactory.createFromImage(outDocument, image); PDPage pdPage = new PDPage(PDRectangle.A4); PDPage documentPage = document.getPage(page - 1); PDResources resources = documentPage.getResources(); Iterable<COSName> fontNames = resources.getFontNames(); for (COSName cosName : fontNames) { PDFont font = resources.getFont(cosName); PDResources pdResources = pdPage.getResources(); if (pdResources == null) { pdResources = new PDResources(); pdPage.setResources(pdResources); } pdPage.getResources().put(cosName, font); } outDocument.addPage(pdPage); PDPageContentStream pageContentStream = new PDPageContentStream(outDocument, pdPage); float height = pdPage.getMediaBox().getHeight() / receiptCount; float y = pdPage.getMediaBox().getHeight() - height; pageContentStream.drawImage(imageXObject, 0, y, pdPage.getMediaBox().getWidth(), height); pageContentStream.close(); if (!outFile.getParentFile().exists()) { outFile.getParentFile().mkdirs(); } outDocument.save(outFile); outDocument.close(); }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。