赞
踩
Apache PDFBox 库是一个开源、用于操作 PDF 文档的 Java 工具库。PDFBox 允许创建新的 PDF 文档、操作现有文档,以及从文档中提取内容。
获取 Apache PDFBox 下载地址,目前版本 2.0.25,核心 jar 是 pdfbox-2.0.25.jar,其他还有几个 jar 可以根据需要进行导入。
这里 jar包下载与源码学习可以下载全部的 jar 和一些命令行工具,下载 pdfbox-2.0.25-src.zip 源码可以看到文件夹 examples 里面有不少例子进行了示范。
pdf转word
所需jar包下载 无需积分
如果帮助到你了麻烦点个赞或收藏哦,会不断更新的
package pdfUtity;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import java.io.File;
import java.io.IOException;
import java.util.Scanner;
//import org.apache.pdfbox.util.PDFMergerUtility;
public class pdfMerge {
private static String[] getPdfs(String fileAddress) throws IOException {
File file = new File(fileAddress);
String[] pdfs;
if (file.isDirectory()) {
pdfs = file.list();
return pdfs;
} else {
throw new IOException("输入的路径有问题");
}
}
public static void main(String[] args) throws Exception {
Scanner in = new Scanner(System.in);
PDFMergerUtility mergePdf = new PDFMergerUtility();
System.out.println("请输入要合并的PDF文件所在的文件夹路径");
String fileAddress = in.nextLine();
System.out.println("你输入的路径是:" + fileAddress);
String destinationFileName = "javaweb2020.pdf";
String[] pdfs = getPdfs(fileAddress);
for (int i = 0; i < pdfs.length; i++)
mergePdf.addSource(fileAddress + File.separator + pdfs[i]);
mergePdf.setDestinationFileName(destinationFileName);
System.out.println("合并比较费时间,请等待个几分钟吧!");
mergePdf.mergeDocuments();
System.out.print("合并完成");
}
}
package pdfUtity;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Iterator;
public class pdfSplitter {
public static void main(String[] args)throws IOException {
//把需要拆分的pdf文件加载进来
File file = new File("d://desktopfile//pdfs//1.pdf");
PDDocument document = PDDocument.load(file);
//创建一个拆分器对象
Splitter splitter = new Splitter();
//list中存放好被拆分的pdf对象 其中内容是pdf的每一页
List<PDDocument>Pages = splitter.split(document);
//创建迭代器对象
Iterator<PDDocument>iterator = Pages.listIterator();
//saving splits as individual PDF document
int i = 1;
while(iterator.hasNext()) {
PDDocument pd = iterator.next();
pd.save("d://desktopfile//pdfPhotos//"+i++ +".pdf");
}
System.out.println("pdf拆分成功");
document.close();
}
}
package pdfUtity;/*
* 读取 pdf,将其中的某一页另存为 png 图片
*/
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
public class PDFSavePNG
{
public static void main(String[] args)
{
try
{
// 打开来源 pdf
PDDocument pdfDocument = PDDocument.load(new File("d://desktopfile//pdfs//1.pdf"));
PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument);
for(int pageNumber=0;pageNumber<pdfDocument.getNumberOfPages();pageNumber++){
// 提取的页码
// int pageNumber = 0;
// 以300 dpi 读取存入 BufferedImage 对象
int dpi = 300;
BufferedImage buffImage = pdfRenderer.renderImageWithDPI(pageNumber, dpi, ImageType.RGB);
// 将 BufferedImage 写入到 png
ImageIOUtil.writeImage(buffImage, "d://desktopfile//pdfPhotos//"+pageNumber+".png", dpi);
// 关闭文档
}
pdfDocument.close();
}
catch (InvalidPasswordException e)
{
e.printStackTrace();
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
package pdfUtity;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
/**
* @author: Serendipity
* Date: 2022/3/16 22:40
* Description:
*/
public class photoToPDF {
/**
* 多图片合成pdf的限制后缀
*/
private static final List IMAGE_SUFFIX = Arrays.asList("jpg", "png", "jpeg");
/**
* 多个图片合成一个pdf
*
* @param imgFolder 多图片的文件夹路径 例如:"D:\\image\\"
* @param target 合并的图片路径 "D:\\image\\merge.pdf"
* @throws IOException
*/
public static void manyImageToOnePdf(String imgFolder, String target) throws IOException {
PDDocument doc = new PDDocument();
//创建一个空的pdf文件
doc.save(target);
PDPage page;
PDImageXObject pdImage;
PDPageContentStream contents;
BufferedImage bufferedImage;
String fileName;
float w, h;
String suffix;
File tempFile;
int index;
File folder = new File(imgFolder);
for (int i = 0; i < folder.listFiles().length; i++) {
tempFile = folder.listFiles()[i];
if (!tempFile.isFile()) {
continue;
}
fileName = tempFile.getName();
index = fileName.lastIndexOf(".");
if (index == -1) {
continue;
}
//获取文件的后缀
suffix = fileName.substring(index + 1);
//如果文件后缀不是图片格式,跳过当前循环
if (!IMAGE_SUFFIX.contains(suffix)) {
continue;
}
bufferedImage = ImageIO.read(folder.listFiles()[i]);
//Retrieving the page
pdImage = LosslessFactory.createFromImage(doc, bufferedImage);
w = pdImage.getWidth();
h = pdImage.getHeight();
page = new PDPage(new PDRectangle(w, h));
contents = new PDPageContentStream(doc, page);
contents.drawImage(pdImage, 0, 0, w, h);
System.out.println("Image inserted");
contents.close();
doc.addPage(page);
}
//保存pdf
doc.save(target);
//关闭pdf
doc.close();
}
public static void main(String[] args) {
try {
manyImageToOnePdf("d://图片","photo.pdf");
} catch (IOException e) {
e.printStackTrace();
}
}
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。