当前位置:   article > 正文

itextpdf获取PDFXY----spire.pdf获取XY_itextpdf根据坐标获取pdf内容

itextpdf根据坐标获取pdf内容

本文章记录itextpdf5.3.2获取PDF对应坐标XY,以及spire3.11.6获取pdf对应XY样例代码----------------------

本文记录得两个jar包分别是:itextpdf

  1. <dependency>
  2. <groupId>com.itextpdf</groupId>
  3. <artifactId>itextpdf</artifactId>
  4. <version>5.3.2</version>
  5. </dependency>

  1. import com.itextpdf.awt.geom.Rectangle2D;
  2. import com.itextpdf.text.pdf.PdfReader;
  3. import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
  4. import java.io.IOException;
  5. import java.util.List;
  6. import java.util.Map;
  7. public class pdfTest {
  8. public static void main(String[] args) {
  9. try {
  10. PdfReader reader = new PdfReader("C:\\Users\\T440P\\Desktop\\DHF文档.pdf");
  11. //新建一个PDF解析对象
  12. PdfReaderContentParser parser = new PdfReaderContentParser(reader);
  13. //包含了PDF页面的信息,作为处理的对象
  14. //PdfStamper stamper = new PdfStamper(reader, new FileOutputStream("d:/test.pdf"));
  15. for (int i=1; i<reader.getNumberOfPages(); i++){
  16. //新建一个ImageRenderListener对象,该对象实现了RenderListener接口,作为处理PDF的主要类
  17. TestRenderListener listener = new TestRenderListener();
  18. //解析PDF,并处理里面的文字
  19. parser.processContent(i, listener);
  20. //获取文字的矩形边框
  21. List<Rectangle2D.Float> rectText = listener.rectText;
  22. List<String> textList = listener.textList;
  23. List<Float> listY = listener.listY;
  24. List<Map<String, Rectangle2D.Float>> list_text = listener.rows_text_rect;
  25. for(int k = 0;k < list_text.size();k++){
  26. Map<String,Rectangle2D.Float> map = list_text.get(k);
  27. System.out.println(">>>map"+map);
  28. }
  29. }
  30. } catch (IOException e) {
  31. e.printStackTrace();
  32. }
  33. }
  34. }
  1. import com.itextpdf.awt.geom.Rectangle2D;
  2. import com.itextpdf.awt.geom.RectangularShape;
  3. import com.itextpdf.text.pdf.parser.ImageRenderInfo;
  4. import com.itextpdf.text.pdf.parser.RenderListener;
  5. import com.itextpdf.text.pdf.parser.TextRenderInfo;
  6. import java.util.ArrayList;
  7. import java.util.HashMap;
  8. import java.util.List;
  9. import java.util.Map;
  10. public class TestRenderListener implements RenderListener {
  11. //用来存放文字的矩形
  12. List<Rectangle2D.Float> rectText = new ArrayList<Rectangle2D.Float>();
  13. //用来存放文字
  14. List<String> textList = new ArrayList<String>();
  15. //用来存放文字的y坐标
  16. List<Float> listY = new ArrayList<Float>();
  17. //用来存放每一行文字的坐标位置
  18. List<Map<String,Rectangle2D.Float>> rows_text_rect = new ArrayList<>();
  19. //PDF文件的路径
  20. protected String filepath = null;
  21. public TestRenderListener() {
  22. }
  23. //step 2,遇到"BT"执行
  24. @Override
  25. public void beginTextBlock() {
  26. // TODO Auto-generated method stub
  27. }
  28. //step 3
  29. /**
  30. * 文字主要处理方法
  31. */
  32. @Override
  33. public void renderText(TextRenderInfo renderInfo) {
  34. //获取文字的下面的矩形
  35. //Rectangle2D.Float rectBase = renderInfo.getBaseline().getBoundingRectange();
  36. String text = renderInfo.getText();
  37. if(text.length() > 0){
  38. RectangularShape rectBase = renderInfo.getBaseline().getBoundingRectange();
  39. // 获取文字下面的矩形
  40. Rectangle2D.Float rectAscen = renderInfo.getAscentLine().getBoundingRectange();
  41. // 计算出文字的边框矩形
  42. float leftX = (float) rectBase.getMinX();
  43. float leftY = (float) rectBase.getMinY()-1;
  44. float rightX = (float) rectAscen.getMaxX();
  45. float rightY = (float) rectAscen.getMaxY()+1;
  46. Rectangle2D.Float rect = new Rectangle2D.Float(leftX, leftY, rightX - leftX, rightY - leftY);
  47. System.out.println("text:"+text+"--x:"+rect.x + "--y:"+rect.y + "--width:"+rect.width + "--height:"+rect.height);
  48. if(listY.contains(rect.y)){
  49. int index = listY.indexOf(rect.y);
  50. float tempx = rect.x > rectText.get(index).x ? rectText.get(index).x : rect.x;
  51. rectText.set(index,new Rectangle2D.Float(tempx,rect.y,rect.width + rectText.get(index).width,rect.height));
  52. textList.set(index,textList.get(index) + text);
  53. }else{
  54. rectText.add(rect);
  55. textList.add(text);
  56. listY.add(rect.y);
  57. }
  58. Map<String,Rectangle2D.Float> map = new HashMap<>();
  59. map.put(text,rect);
  60. rows_text_rect.add(map);
  61. }
  62. }
  63. //step 4(最后执行的,只执行一次),遇到“ET”执行
  64. @Override
  65. public void endTextBlock() {
  66. // TODO Auto-generated method stub
  67. }
  68. //step 1(图片处理方法)
  69. @Override
  70. public void renderImage(ImageRenderInfo renderInfo) {
  71. System.out.println(renderInfo.getStartPoint().length());
  72. System.out.println("测试");
  73. }
  74. }

=====================================分隔符===============================

 spire能解决coredraw,AUTOCAD转出得PDF,CAD图纸转出得PDF。无法读取字眼以及对应XY

spire:

  1. <repositories>
  2. <repository>
  3. <id>com.e-iceblue</id>
  4. <url>http://repo.e-iceblue.cn/repository/maven-public/</url>
  5. </repository>
  6. </repositories>
  7. <dependencies>
  8. <dependency>
  9. <groupId> e-iceblue </groupId>
  10. <artifactId>spire.pdf</artifactId>
  11. <version>3.11.6</version>
  12. </dependency>
  13. </dependencies>
  1. public class pdfTest {
  2. public static void main(String[] args) throws Exception {
  3. List<Map<String, Rectangle2D.Float>> list_text = getCroeXYForString("C:\\Users\\T440P\\Desktop\\test.pdf", 0);
  4. for(int k = 0;k < list_text.size();k++) {
  5. //需要匹配位置的String
  6. Map<String, Rectangle2D.Float> siteMap = list_text.get(k);
  7. System.out.println(">>>map"+siteMap);
  8. }
  9. }
  10. public static List<Map<String,Rectangle2D.Float>> getCroeXYForString(String filePath,int i) throws Exception {
  11. PdfDocument pdf = new PdfDocument(filePath);
  12. List<Map<String,Rectangle2D.Float>> mapList=new ArrayList<>();
  13. //遍历PDF文档中每页
  14. PdfPageBase page;
  15. page = pdf.getPages().get(i);
  16. double height = page.getSize().getHeight();
  17. //调用extractText()方法提取文本
  18. String s = page.extractText(true);
  19. System.out.println(">>>>page.extractText"+s);
  20. PdfTextFindCollection allText = page.findAllText();
  21. PdfTextFind[] findss = allText.getFinds();
  22. for(PdfTextFind find : findss) {
  23. //计算出文字的边框矩形
  24. float leftX = (float) find.getBounds().getMinX();
  25. float leftY = (float) (height-find.getBounds().getMinY())-1;
  26. float rightX = (float) find.getBounds().getMaxX();
  27. float rightY = (float) (height-find.getBounds().getMaxY())+1;
  28. Rectangle2D.Float rect = new Rectangle2D.Float(leftX, leftY, rightX - leftX, rightY - leftY);
  29. System.out.println(find.getMatchText());
  30. Map<String,Rectangle2D.Float> xyMap=new HashMap<>();
  31. String str = find.getMatchText();
  32. // String unicode = stringToUnicode(str);
  33. // System.out.println("字符串转unicode结果:" + unicode);
  34. xyMap.put(str,rect);
  35. mapList.add(xyMap);
  36. }
  37. pdf.close();
  38. return mapList;
  39. }
  40. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/天景科技苑/article/detail/744762
推荐阅读
相关标签
  

闽ICP备14008679号