赞
踩
本文章记录itextpdf5.3.2获取PDF对应坐标XY,以及spire3.11.6获取pdf对应XY样例代码----------------------
本文记录得两个jar包分别是:itextpdf
- <dependency>
- <groupId>com.itextpdf</groupId>
- <artifactId>itextpdf</artifactId>
- <version>5.3.2</version>
- </dependency>
- import com.itextpdf.awt.geom.Rectangle2D;
- import com.itextpdf.text.pdf.PdfReader;
- import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
- import java.io.IOException;
- import java.util.List;
- import java.util.Map;
- public class pdfTest {
-
- public static void main(String[] args) {
- try {
- PdfReader reader = new PdfReader("C:\\Users\\T440P\\Desktop\\DHF文档.pdf");
- //新建一个PDF解析对象
- PdfReaderContentParser parser = new PdfReaderContentParser(reader);
- //包含了PDF页面的信息,作为处理的对象
- //PdfStamper stamper = new PdfStamper(reader, new FileOutputStream("d:/test.pdf"));
- for (int i=1; i<reader.getNumberOfPages(); i++){
- //新建一个ImageRenderListener对象,该对象实现了RenderListener接口,作为处理PDF的主要类
- TestRenderListener listener = new TestRenderListener();
- //解析PDF,并处理里面的文字
- parser.processContent(i, listener);
- //获取文字的矩形边框
- List<Rectangle2D.Float> rectText = listener.rectText;
- List<String> textList = listener.textList;
- List<Float> listY = listener.listY;
- List<Map<String, Rectangle2D.Float>> list_text = listener.rows_text_rect;
- for(int k = 0;k < list_text.size();k++){
- Map<String,Rectangle2D.Float> map = list_text.get(k);
- System.out.println(">>>map"+map);
- }
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- }
-
-
- import com.itextpdf.awt.geom.Rectangle2D;
- import com.itextpdf.awt.geom.RectangularShape;
- import com.itextpdf.text.pdf.parser.ImageRenderInfo;
- import com.itextpdf.text.pdf.parser.RenderListener;
- import com.itextpdf.text.pdf.parser.TextRenderInfo;
-
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
- public class TestRenderListener implements RenderListener {
- //用来存放文字的矩形
- List<Rectangle2D.Float> rectText = new ArrayList<Rectangle2D.Float>();
- //用来存放文字
- List<String> textList = new ArrayList<String>();
- //用来存放文字的y坐标
- List<Float> listY = new ArrayList<Float>();
- //用来存放每一行文字的坐标位置
- List<Map<String,Rectangle2D.Float>> rows_text_rect = new ArrayList<>();
- //PDF文件的路径
- protected String filepath = null;
- public TestRenderListener() {
- }
-
- //step 2,遇到"BT"执行
- @Override
- public void beginTextBlock() {
- // TODO Auto-generated method stub
- }
-
- //step 3
- /**
- * 文字主要处理方法
- */
- @Override
- public void renderText(TextRenderInfo renderInfo) {
- //获取文字的下面的矩形
- //Rectangle2D.Float rectBase = renderInfo.getBaseline().getBoundingRectange();
-
-
- String text = renderInfo.getText();
- if(text.length() > 0){
- RectangularShape rectBase = renderInfo.getBaseline().getBoundingRectange();
- // 获取文字下面的矩形
- Rectangle2D.Float rectAscen = renderInfo.getAscentLine().getBoundingRectange();
- // 计算出文字的边框矩形
- float leftX = (float) rectBase.getMinX();
- float leftY = (float) rectBase.getMinY()-1;
- float rightX = (float) rectAscen.getMaxX();
- float rightY = (float) rectAscen.getMaxY()+1;
-
- Rectangle2D.Float rect = new Rectangle2D.Float(leftX, leftY, rightX - leftX, rightY - leftY);
-
- System.out.println("text:"+text+"--x:"+rect.x + "--y:"+rect.y + "--width:"+rect.width + "--height:"+rect.height);
-
- if(listY.contains(rect.y)){
- int index = listY.indexOf(rect.y);
- float tempx = rect.x > rectText.get(index).x ? rectText.get(index).x : rect.x;
- rectText.set(index,new Rectangle2D.Float(tempx,rect.y,rect.width + rectText.get(index).width,rect.height));
- textList.set(index,textList.get(index) + text);
- }else{
- rectText.add(rect);
- textList.add(text);
- listY.add(rect.y);
- }
-
- Map<String,Rectangle2D.Float> map = new HashMap<>();
- map.put(text,rect);
- rows_text_rect.add(map);
- }
- }
- //step 4(最后执行的,只执行一次),遇到“ET”执行
- @Override
- public void endTextBlock() {
- // TODO Auto-generated method stub
- }
-
- //step 1(图片处理方法)
- @Override
- public void renderImage(ImageRenderInfo renderInfo) {
- System.out.println(renderInfo.getStartPoint().length());
- System.out.println("测试");
- }
- }
=====================================分隔符===============================
spire能解决coredraw,AUTOCAD转出得PDF,CAD图纸转出得PDF。无法读取字眼以及对应XY
spire:
- <repositories>
- <repository>
- <id>com.e-iceblue</id>
- <url>http://repo.e-iceblue.cn/repository/maven-public/</url>
- </repository>
- </repositories>
- <dependencies>
- <dependency>
- <groupId> e-iceblue </groupId>
- <artifactId>spire.pdf</artifactId>
- <version>3.11.6</version>
- </dependency>
- </dependencies>
- public class pdfTest {
- public static void main(String[] args) throws Exception {
- List<Map<String, Rectangle2D.Float>> list_text = getCroeXYForString("C:\\Users\\T440P\\Desktop\\test.pdf", 0);
-
- for(int k = 0;k < list_text.size();k++) {
- //需要匹配位置的String
- Map<String, Rectangle2D.Float> siteMap = list_text.get(k);
- System.out.println(">>>map"+siteMap);
-
- }
- }
-
- public static List<Map<String,Rectangle2D.Float>> getCroeXYForString(String filePath,int i) throws Exception {
- PdfDocument pdf = new PdfDocument(filePath);
- List<Map<String,Rectangle2D.Float>> mapList=new ArrayList<>();
- //遍历PDF文档中每页
- PdfPageBase page;
- page = pdf.getPages().get(i);
- double height = page.getSize().getHeight();
- //调用extractText()方法提取文本
- String s = page.extractText(true);
- System.out.println(">>>>page.extractText"+s);
- PdfTextFindCollection allText = page.findAllText();
- PdfTextFind[] findss = allText.getFinds();
-
- for(PdfTextFind find : findss) {
- //计算出文字的边框矩形
- float leftX = (float) find.getBounds().getMinX();
- float leftY = (float) (height-find.getBounds().getMinY())-1;
- float rightX = (float) find.getBounds().getMaxX();
- float rightY = (float) (height-find.getBounds().getMaxY())+1;
- Rectangle2D.Float rect = new Rectangle2D.Float(leftX, leftY, rightX - leftX, rightY - leftY);
- System.out.println(find.getMatchText());
- Map<String,Rectangle2D.Float> xyMap=new HashMap<>();
- String str = find.getMatchText();
- // String unicode = stringToUnicode(str);
- // System.out.println("字符串转unicode结果:" + unicode);
- xyMap.put(str,rect);
- mapList.add(xyMap);
- }
- pdf.close();
- return mapList;
- }
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。