赞
踩
1.使用jacob(Java COM Bridge)操作offfice的方式,基于这种方式无论是水印还是格式都可以完美转换;但是这个方式都只是基于windos下,但有些项目是需要部署到linux下的,因此这个方式有很大局限性。
2.利用apache poi 方式进行转换,可以支持linux和windos系统。但是局限性就是,采用这种方法是将word转换成html格式,再将html格式转成pdf,因此此方法的转换格式上会存在一定差异。
import com.itextpdf.text.BaseColor; import com.itextpdf.text.Font; import com.itextpdf.text.FontProvider; import com.itextpdf.text.PageSize; import com.itextpdf.text.pdf.BaseFont; import com.itextpdf.text.pdf.PdfWriter; import com.itextpdf.tool.xml.XMLWorkerHelper; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.PictureType; import org.apache.poi.xwpf.converter.core.BasicURIResolver; import org.apache.poi.xwpf.converter.core.FileImageExtractor; import org.apache.poi.xwpf.converter.core.utils.StringUtils; import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.jsoup.Jsoup; import org.jsoup.nodes.Element; import org.jsoup.nodes.Entities; import org.jsoup.select.Elements; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.*; import java.nio.charset.Charset; /** * 使用poi+itextpdf进行word转pdf * 先将word转成html,再将html转成pdf * * @author :yuwenke * @date :Created in 2021/10/12 22:41 */ public class TestPoi { /** * 将doc格式文件转成html * * @param docPath doc文件路径 * @param imageDir doc文件中图片存储目录 * @return html */ public static String doc2Html(String docPath, final String imageDir) { String content = null; ByteArrayOutputStream baos = null; try { HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(docPath)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wordToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) { File file = new File(imageDir + suggestedName); FileOutputStream fos = null; try { fos = new FileOutputStream(file); fos.write(content); } catch (IOException e) { e.printStackTrace(); } finally { try { if (fos != null) { fos.close(); } } catch (Exception e) { e.printStackTrace(); } } return imageDir + suggestedName; } }); wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); DOMSource domSource = new DOMSource(htmlDocument); baos = new ByteArrayOutputStream(); StreamResult streamResult = new StreamResult(baos); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); } catch (Exception e) { e.printStackTrace(); } finally { try { if (baos != null) { content = new String(baos.toByteArray(), "utf-8"); baos.close(); } } catch (Exception e) { e.printStackTrace(); } } return content; } /** * 将docx格式文件转成html * * @param docxPath docx文件路径 * @param imageDir docx文件中图片存储目录 * @return html */ public static String docx2Html(String docxPath, String imageDir) { String content = null; FileInputStream in = null; ByteArrayOutputStream baos = null; try { // 1> 加载文档到XWPFDocument in = new FileInputStream(new File(docxPath)); XWPFDocument document = new XWPFDocument(in); // 2> 解析XHTML配置(这里设置IURIResolver来设置图片存放的目录) XHTMLOptions options = XHTMLOptions.create(); // 存放word中图片的目录 options.setExtractor(new FileImageExtractor(new File(imageDir))); options.URIResolver(new BasicURIResolver(imageDir)); options.setIgnoreStylesIfUnused(false); options.setFragment(true); // 3> 将XWPFDocument转换成XHTML baos = new ByteArrayOutputStream(); XHTMLConverter.getInstance().convert(document, baos, options); } catch (Exception e) { e.printStackTrace(); } finally { try { if (in != null) { in.close(); } if (baos != null) { content = new String(baos.toByteArray(), "utf-8"); baos.close(); } } catch (Exception e) { e.printStackTrace(); } } return content; } /** * 使用jsoup规范化html * * @param html html内容 * @return 规范化后的html */ private static String formatHtml(String html) { org.jsoup.nodes.Document doc = Jsoup.parse(html); // 去除过大的宽度 String style = doc.attr("style"); if (StringUtils.isNotEmpty(style) && style.contains("width")) { doc.attr("style", ""); } Elements divs = doc.select("div"); for (Element div : divs) { String divStyle = div.attr("style"); if (StringUtils.isNotEmpty(divStyle) && divStyle.contains("width")) { div.attr("style", ""); } } // jsoup生成闭合标签 doc.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml); doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml); return doc.html(); } /** * html转成pdf * * @param html html * @param outputPdfPath 输出pdf路径 */ private static void htmlToPdf(String html, String outputPdfPath) { com.itextpdf.text.Document document = null; try { // 纸 document = new com.itextpdf.text.Document(PageSize.A4); // 笔 PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(outputPdfPath)); document.open(); // html转pdf XMLWorkerHelper.getInstance().parseXHtml(writer, document, new ByteArrayInputStream(html.getBytes()), Charset.forName("UTF-8"), new FontProvider() { public boolean isRegistered(String s) { return false; } public Font getFont(String s, String s1, boolean embedded, float size, int style, BaseColor baseColor) { // 配置字体 Font font = null; try { // 方案一:使用本地字体(本地需要有字体) // BaseFont bf = BaseFont.createFont("c:/Windows/Fonts/simsun.ttc,0", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); // 方案二:使用jar包:iTextAsian,这样只需一个jar包就可以了 BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED); font = new Font(bf, size, style, baseColor); font.setColor(baseColor); } catch (Exception e) { e.printStackTrace(); } return font; } }); } catch (Exception e) { e.printStackTrace(); } finally { if (document != null) { document.close(); } } } public static void main(String[] args) throws Exception { String basePath = "F://"; // String docPath = basePath + "AA.doc"; String docxPath = basePath + "555.docx"; String pdfPath = basePath + "5.pdf"; String imageDir = "F:/test/pdf/image/"; // 测试doc转pdf // String docHtml = doc2Html(docPath, imageDir); // docHtml = formatHtml(docHtml); // htmlToPdf(docHtml, pdfPath); // 测试docx转pdf String docxHtml = docx2Html(docxPath, imageDir); docxHtml = formatHtml(docxHtml); docxHtml = docxHtml.replace("___", "22"); htmlToPdf(docxHtml, pdfPath); } }
依赖
<!-- poi --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version> </dependency> <!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/xdocreport --> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>xdocreport</artifactId> <version>1.0.6</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> </dependency> <!-- itextpdf --> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.13</version> </dependency> <!-- https://mvnrepository.com/artifact/com.itextpdf.tool/xmlworker --> <dependency> <groupId>com.itextpdf.tool</groupId> <artifactId>xmlworker</artifactId> <version>5.5.11</version> </dependency> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itext-asian</artifactId> <version>5.2.0</version> </dependency> <dependency> <groupId>org.xhtmlrenderer</groupId> <artifactId>flying-saucer-pdf-itext5</artifactId> <version>9.0.3</version> </dependency> <!-- jsoup --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency>
**3.使用aspose的方式进行转换,采用此种方式只需要引入两个jar包即可完美转换,可以跨平台支持linux。
jar包 :aspectjweaver-1.9.1.jar
aspose-words-14.9.0-jdk16.jar
import com.aspose.words.*; import com.aspose.words.Shape; import java.awt.*; import java.io.*; public class Test01 { public static void main(String[] args) throws Exception { Test01 test01 = new Test01(); test01.file2pdf("F:\\","AA",".docx"); } /** * * @param toFilePath 文件夹路径 * @param fileName 文件名 * @param type 文件类型 * @return * @throws Exception */ public String file2pdf(String toFilePath, String fileName, String type ) throws Exception { String htmFileName; //获取转换成PDF之后文件名 if(".doc".equals(type)){ htmFileName = fileName+".pdf"; }else if(".docx".equals(type)){ htmFileName = fileName+".pdf"; }else{ return null; } //通过转换之后的PDF文件名,创建PDF文件 File htmlOutputFile = new File(toFilePath + File.separatorChar + htmFileName); //获取文件输出流 FileOutputStream os = new FileOutputStream(htmlOutputFile); //获取Doc文档对象模型 Document doc = new Document(toFilePath+ File.separatorChar + fileName+type); //为doc文档添加水印 insertWatermarkText(doc, "于文珂"); //将doc文旦转换成PDF文件并输出到之前创建好的pdf文件中 doc.save(os, SaveFormat.PDF); //关闭输出流 if(os!=null){ os.close(); } return htmFileName; } /** * 为word文档添加水印 * @param doc word文档模型 * @param watermarkText 需要添加的水印字段 * @throws Exception */ private static void insertWatermarkText(Document doc, String watermarkText) throws Exception { Shape watermark = new Shape(doc, ShapeType.TEXT_PLAIN_TEXT); //水印内容 watermark.getTextPath().setText(watermarkText); //水印字体 watermark.getTextPath().setFontFamily("宋体"); //水印宽度 watermark.setWidth(500); //水印高度 watermark.setHeight(100); //旋转水印 watermark.setRotation(-40); //水印颜色 watermark.getFill().setColor(Color.lightGray); watermark.setStrokeColor(Color.lightGray); watermark.setRelativeHorizontalPosition(RelativeHorizontalPosition.PAGE); watermark.setRelativeVerticalPosition(RelativeVerticalPosition.PAGE); watermark.setWrapType(WrapType.NONE); watermark.setVerticalAlignment(VerticalAlignment.CENTER); watermark.setHorizontalAlignment(HorizontalAlignment.CENTER); Paragraph watermarkPara = new Paragraph(doc); watermarkPara.appendChild(watermark); for (Section sect : doc.getSections()) { insertWatermarkIntoHeader(watermarkPara, sect, HeaderFooterType.HEADER_PRIMARY); insertWatermarkIntoHeader(watermarkPara, sect, HeaderFooterType.HEADER_FIRST); insertWatermarkIntoHeader(watermarkPara, sect, HeaderFooterType.HEADER_EVEN); } System.out.println("Watermark Set"); } /** * 在页眉中插入水印 * @param watermarkPara * @param sect * @param headerType * @throws Exception */ private static void insertWatermarkIntoHeader(Paragraph watermarkPara, Section sect, int headerType) throws Exception{ HeaderFooter header = sect.getHeadersFooters().getByHeaderFooterType(headerType); if (header == null) { header = new HeaderFooter(sect.getDocument(), headerType); sect.getHeadersFooters().add(header); } header.appendChild(watermarkPara.deepClone(true)); } }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。