赞
踩
程序猿最近要摘取pdf文件中的信息,选材用到了itextpdf工具包,整体比较简单,记录一下
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf --> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.13</version> </dependency> <!-- https://mvnrepository.com/artifact/com.itextpdf/itext-asian --> <!-- 处理中文字符 --> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itext-asian</artifactId> <version>5.2.0</version> </dependency> <!-- https://mvnrepository.com/artifact/org.bouncycastle/bcprov-jdk15 --> <!-- 设置pdf查看密码使用 --> <!-- <dependency> 这个没用 <groupId>org.bouncycastle</groupId> <artifactId>bcprov-jdk15</artifactId> <version>1.46</version> </dependency> --> <!-- https://mvnrepository.com/artifact/org.bouncycastle/bcprov-jdk15on --> <dependency> <groupId>org.bouncycastle</groupId> <artifactId>bcprov-jdk15on</artifactId> <version>1.62</version> </dependency>
package com.chl.tools; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.itextpdf.text.BaseColor; import com.itextpdf.text.Document; import com.itextpdf.text.DocumentException; import com.itextpdf.text.Font; import com.itextpdf.text.PageSize; import com.itextpdf.text.Paragraph; import com.itextpdf.text.Rectangle; import com.itextpdf.text.pdf.BaseFont; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.PdfWriter; import com.itextpdf.text.pdf.parser.PdfTextExtractor; public class PdfParse { private static Logger logger = LoggerFactory.getLogger(PdfParse.class); private static String pdfPath = "/Users/chenhailong/Desktop/hellopdf.pdf"; public static void main(String[] args) { createPdf(); } /** * 创建pdf文件 * 1.创建Document文档 * - 设置doc的多种属性 * 2.PdfWriter初始化 * 3.打开文档 * 4.写入内容 * 5.关闭文档 * * 添加 itext-asian.jar 处理 中文问题 */ private static void createPdf() { try { //设置pdf的纸张大小、背景色 Rectangle rect = new Rectangle(PageSize.A4.rotate()); rect.setBackgroundColor(BaseColor.WHITE); //设置doc属性 Document document = new Document(rect); document.addAuthor("作者"); document.addCreator("创作者"); document.addCreationDate(); document.addKeywords("pdf"); document.addSubject("主题"); document.addTitle("标题"); document.setMargins(10, 20, 30, 40); PdfWriter pw = PdfWriter.getInstance(document, new FileOutputStream(pdfPath)); document.open(); document.add(new Paragraph("Hello World wh")); //设置新的一页 document.newPage(); pw.setPageEmpty(false); document.newPage(); document.add(new Paragraph("New page")); document.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (DocumentException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
//设置阅读密码
//userPassword the user password. Can be null or empty
//ownerPassword the owner password. Can be null or empty
pw.setEncryption("123456".getBytes(), "123456".getBytes(),PdfWriter.ALLOW_SCREENREADERS,PdfWriter.STANDARD_ENCRYPTION_128);
如果包引入的不对,可能会引发以下的异常
Exception in thread "main" java.lang.NoClassDefFoundError: org/bouncycastle/asn1/ASN1Primitive
at com.itextpdf.text.pdf.PdfEncryption.<init>(PdfEncryption.java:147)
at com.itextpdf.text.pdf.PdfWriter.setEncryption(PdfWriter.java:2132)
at com.chl.tools.PdfParse.createPdf(PdfParse.java:59)
at com.chl.tools.PdfParse.main(PdfParse.java:27)
Caused by: java.lang.ClassNotFoundException: org.bouncycastle.asn1.ASN1Primitive
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 4 more
或
Exception in thread "main" java.lang.SecurityException: class "org.bouncycastle.asn1.ASN1Primitive"'s signer information does not match signer information of other classes in the same package at java.lang.ClassLoader.checkCerts(ClassLoader.java:898) at java.lang.ClassLoader.preDefineClass(ClassLoader.java:668) at java.lang.ClassLoader.defineClass(ClassLoader.java:761) at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) at java.net.URLClassLoader.defineClass(URLClassLoader.java:467) at java.net.URLClassLoader.access$100(URLClassLoader.java:73) at java.net.URLClassLoader$1.run(URLClassLoader.java:368) at java.net.URLClassLoader$1.run(URLClassLoader.java:362) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:361) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) at com.itextpdf.text.pdf.PdfEncryption.<init>(PdfEncryption.java:147) at com.itextpdf.text.pdf.PdfWriter.setEncryption(PdfWriter.java:2132) at com.chl.tools.PdfParse.createPdf(PdfParse.java:59) at com.chl.tools.PdfParse.main(PdfParse.java:27)
//如有中文,需要设置font字体 , 依赖 itext-asian
BaseFont bfChinese = BaseFont.createFont( "STSongStd-Light" ,"UniGB-UCS2-H",BaseFont.NOT_EMBEDDED);
Font font = new Font(bfChinese);
document.add(new Paragraph("这是中文汉字",font));
//读取pdf文件的内容
private static void readPdf(String pdfPath) {
try {
pdfPath = "/Users/chenhailong/Desktop/中华人民共和国刑法修正案(九)全文.pdf";
PdfReader pdf = new PdfReader(pdfPath);
int i =pdf.getNumberOfPages();
for(int count = 1;count<=i;count++) {
System.out.println(PdfTextExtractor.getTextFromPage(pdf, count));
}
} catch (IOException e) {
logger.info("read failed!{}",e);
e.printStackTrace();
}
}
针对pdf的写入操作,包含很多设置,请参考官网
操作pdf有 itextpdf, apache poi等工具,各个之间的对比参考
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。