当前位置:   article > 正文

java使用POI读取Excel表格中的图片(包含嵌入单元格图片,浮动图片,xls-2003,xlsx-2007)_workbook解析excel中的图片

workbook解析excel中的图片

1.测试文件如下,xls-2003,xlsx-2007

2.读取表格中的浮动图片

2.1读取表格前需要了解,以下是读取整个工作簿中图片文件方法,但是真实读取图片数据往往需要和其他数据进行对应,比如当有多个工作表(Sheet)时,测试中的那一张图片是vivo手机的图片,是非常不好对应的,这就需要从工作表(Sheet进行图片读取)。

  1. //xls-2003,低版本EXCEL
  2. HSSFWorkbook book1 = new HSSFWorkbook(new FileInputStream(file));
  3. //获取工作簿中所有图片
  4. List<HSSFPictureData> allPictures = book1.getAllPictures();
  5. //获取工作簿中所有文档对象
  6. List<HSSFObjectData> allEmbeddedObjects = book1.getAllEmbeddedObjects();
  7. //xlsx-2007,高版本EXCEL
  8. XSSFWorkbook book = new XSSFWorkbook(new FileInputStream(file));
  9. //获取工作簿中所有图片
  10. List<XSSFPictureData> allPictures1 = book.getAllPictures();
  11. //获取工作簿中所有包数据
  12. List<PackagePart> allEmbeddedParts = book.getAllEmbeddedParts();

2.2读取单个工作表(Sheet)中图片。

  1. public static void main(String[] args) throws Exception {
  2. File file= new File("C:/Users/Administrator/Desktop/excel/测试.xls");
  3. //File file = new File("C:\\Users\\Administrator\\Desktop\\excel\\测试.xlsx");
  4. read(file);
  5. }

2.2.1读取xls文件中图片(2003)

  1. public static void read(File file) throws Exception {
  2. HSSFWorkbook book1 = new HSSFWorkbook(new FileInputStream(file));
  3. //方式1 获取sheet数量,采用下标方式遍历读取每个工作表数据
  4. int sheetsNos = book1.getNumberOfSheets();
  5. for (int sheetNo = 0; sheetNo < sheetsNos; sheetNo++) {
  6. //HSSFSheet sheet = book1.getSheetAt(sheetNo);
  7. Sheet sheet = book1.getSheetAt(sheetNo);
  8. }
  9. //方式2 获取sheet数量,直接遍历读取每个工作表数据
  10. for (Sheet sheet : book1) {
  11. HSSFSheet hssSheet = (HSSFSheet) sheet;
  12. //获取工作表中绘图包
  13. HSSFPatriarch drawingPatriarch = hssSheet.getDrawingPatriarch();
  14. if (drawingPatriarch != null) {
  15. //获取所有图像形状
  16. List<HSSFShape> shapes = drawingPatriarch.getChildren();
  17. if (shapes != null) {
  18. //遍历所有形状
  19. for (HSSFShape shape : shapes) {
  20. //获取形状在工作表中的顶点位置信息(anchor锚点)
  21. HSSFClientAnchor anchor = (HSSFClientAnchor) shape.getAnchor();
  22. if (shape instanceof HSSFPicture) {
  23. //形状获取对应的图片数据
  24. HSSFPicture pic = (HSSFPicture) shape;
  25. HSSFPictureData picData = pic.getPictureData();
  26. //图片形状在工作表中的位置, 所在行列起点和终点位置
  27. short c1 = anchor.getCol1();
  28. //short c2 = anchor.getCol2();
  29. int r1 = anchor.getRow1();
  30. //int r2 = anchor.getRow2();
  31. String key = r1 + "行," + c1 + "列";
  32. //TODO 此处可以将图片位置和数据存入缓存中,以便解析表格数据进行对应操作及保存
  33. //保存图片到本地
  34. byte[] data = picData.getData();
  35. //文件扩展名
  36. String suffix = picData.suggestFileExtension();
  37. File dir = new File("C:/Users/Administrator/Desktop/excel/img2003/");
  38. if (!dir.exists()) {
  39. dir.mkdirs();
  40. }
  41. FileUtils.writeByteArrayToFile(new File(dir.getPath() + "/" + key + "." + suffix), data);
  42. }
  43. }
  44. }
  45. }
  46. }
  47. }

下图可以看见成功读取到xls中的浮动图片,嵌入图片并未被读取到

2.2.2读取xlsx文件中图片(2007)

  1. public static void read(File file) throws Exception {
  2. XSSFWorkbook book = new XSSFWorkbook(new FileInputStream(file));
  3. //方式1 获取sheet数量,采用下标方式遍历读取每个工作表数据
  4. int sheetsNos = book.getNumberOfSheets();
  5. for (int sheetNo = 0; sheetNo < sheetsNos; sheetNo++) {
  6. //XSSFSheet sheet = book1.getSheetAt(sheetNo);
  7. Sheet sheet = book.getSheetAt(sheetNo);
  8. }
  9. //方式2 获取sheet数量,直接遍历读取每个工作表数据
  10. for (Sheet sheet : book) {
  11. XSSFSheet xssSheet = (XSSFSheet) sheet;
  12. //获取工作表中绘图包
  13. XSSFDrawing drawing = xssSheet.getDrawingPatriarch();
  14. if (drawing != null) {
  15. //获取所有图像形状
  16. List<XSSFShape> shapes = drawing.getShapes();
  17. if (shapes != null) {
  18. //遍历所有形状
  19. for (XSSFShape shape : shapes) {
  20. //获取形状在工作表中的顶点位置信息(anchor锚点)
  21. XSSFClientAnchor anchor = (XSSFClientAnchor) shape.getAnchor();
  22. if (shape instanceof XSSFPicture) {
  23. //形状获取对应的图片数据
  24. XSSFPicture pic = (XSSFPicture) shape;
  25. XSSFPictureData picData = pic.getPictureData();
  26. //图片形状在工作表中的位置, 所在行列起点和终点位置
  27. short c1 = anchor.getCol1();
  28. //short c2 = anchor.getCol2();
  29. int r1 = anchor.getRow1();
  30. //int r2 = anchor.getRow2();
  31. String key = r1 + "行," + c1 + "列";
  32. //TODO 此处可以将图片位置和数据存入缓存中,以便解析表格数据进行对应操作及保存
  33. //保存图片到本地
  34. byte[] data = picData.getData();
  35. //文件扩展名
  36. String suffix = picData.suggestFileExtension();
  37. File dir = new File("C:/Users/Administrator/Desktop/excel/img2007/");
  38. if (!dir.exists()) {
  39. dir.mkdirs();
  40. }
  41. FileUtils.writeByteArrayToFile(new File(dir.getPath() + "/" + key + "." + suffix), data);
  42. }
  43. }
  44. }
  45. }
  46. }
  47. }

下图可以看见成功读取到xls中的浮动图片,嵌入图片并未被读取到

获取xlsx-2007表格中浮动图片,方式2

  1. public static void read(File file) throws Exception {
  2. XSSFWorkbook book = new XSSFWorkbook(new FileInputStream(file));
  3. //方式1 获取sheet数量,采用下标方式遍历读取每个工作表数据
  4. int sheetsNos = book.getNumberOfSheets();
  5. for (int sheetNo = 0; sheetNo < sheetsNos; sheetNo++) {
  6. //XSSFSheet sheet = book1.getSheetAt(sheetNo);
  7. Sheet sheet = book.getSheetAt(sheetNo);
  8. }
  9. //方式2 获取sheet数量,直接遍历读取每个工作表数据
  10. for (Sheet sheet : book) {
  11. XSSFSheet xssSheet = (XSSFSheet) sheet;
  12. List<POIXMLDocumentPart> list = xssSheet.getRelations();
  13. for (POIXMLDocumentPart part : list) {
  14. if (part instanceof XSSFDrawing) {
  15. //获取工作表中绘图包
  16. XSSFDrawing drawing = (XSSFDrawing) part;
  17. if (drawing != null) {
  18. //获取所有图像形状
  19. List<XSSFShape> shapes = drawing.getShapes();
  20. if (shapes != null) {
  21. //遍历所有形状
  22. for (XSSFShape shape : shapes) {
  23. //获取形状在工作表中的顶点位置信息(anchor锚点)
  24. XSSFClientAnchor anchor = (XSSFClientAnchor) shape.getAnchor();
  25. if (shape instanceof XSSFPicture) {
  26. //形状获取对应的图片数据
  27. XSSFPicture pic = (XSSFPicture) shape;
  28. XSSFPictureData picData = pic.getPictureData();
  29. //图片形状在工作表中的位置, 所在行列起点和终点位置
  30. short c1 = anchor.getCol1();
  31. //short c2 = anchor.getCol2();
  32. int r1 = anchor.getRow1();
  33. //int r2 = anchor.getRow2();
  34. String key = r1 + "行," + c1 + "列";
  35. //TODO 此处可以将图片位置和数据存入缓存中,以便解析表格数据进行对应操作及保存
  36. //保存图片到本地
  37. byte[] data = picData.getData();
  38. //文件扩展名
  39. String suffix = picData.suggestFileExtension();
  40. File dir = new File("C:/Users/Administrator/Desktop/excel/img2007/");
  41. if (!dir.exists()) {
  42. dir.mkdirs();
  43. }
  44. FileUtils.writeByteArrayToFile(new File(dir.getPath() + "/" + key + "." + suffix), data);
  45. }
  46. }
  47. }
  48. }
  49. }
  50. }
  51. }
  52. }

2.3获取嵌入表格中的图片数据

2.3.1读取xlsx文件中嵌入单元格图片(2007)

效果如下:

实现原理:

(1)将xlsx文件扩展名改为zip,变成压缩文件,再解压,如下图

(2)以下3个关键文件信息,定位表格单元格位置,图片ID,图片编号ID,图片文件地址,如下图。

/xl/_rels/cellimages.xml.rels  rid与图片文件地址关系
/xl/cellimages.xml rid与图片编号ID关系
/xl/worksheets/sheet1.xml 工作表与图片关系(/xl/worksheets/目录下有多个sheet.xml文件,分别代表不同的工作表,sheet1.xml表示是第一个工作表)

由上知道相关信息,以及图片文件,于是可以读取嵌入单元格思路及方法为:

a.将xlsx变为压缩包再读取压缩包中的数据,直接读取xlsx中压缩包数据

b.解析压缩包中关键xml文件数据,获取图片与表格单元格关系,以及图片文件信息

c.解析表格数据,对应图片及保存图片

以下示例中,使用PackageHelper直接打开的xlsx压缩包文件,SAX解析XML文件,且最后并未进行单元格列位置与图片映射。

关键代码片段:

  1. public static void read(File file) throws Exception {
  2. //包管理工具打开压缩包
  3. OPCPackage opc = PackageHelper.open(new FileInputStream(file));
  4. //获取所有包文件
  5. List<PackagePart> parts = opc.getParts();
  6. //获取每个工作表中的包文件
  7. Map<Integer, List<PackagePart>> picturePath = getEmbedPictures(parts);
  8. for (Integer key : picturePath.keySet()) {
  9. List<PackagePart> rows = picturePath.get(key);
  10. for (int i = 0; i < rows.size(); i++) {
  11. PackagePart part = rows.get(i);
  12. //System.out.println("sheetNo" + key + "\t第" + i + "行\t" + part);
  13. if (part != null) {
  14. InputStream imgIs = part.getInputStream();
  15. String name = part.getPartName().getName();
  16. File dir = new File("C:/Users/Administrator/Desktop/excel/img2007_embed/");
  17. if (!dir.exists()) {
  18. dir.mkdirs();
  19. }
  20. FileUtils.copyInputStreamToFile(imgIs, new File(dir.getPath() + "/工作表" + key + "," + i + "行_" + name.substring(name.lastIndexOf("/") + 1)));
  21. }
  22. }
  23. }
  24. try {
  25. opc.close();
  26. } catch (NullPointerException | IOException e) {
  27. }
  28. }

核心代码片段

  1. private static Map<Integer, List<PackagePart>> getEmbedPictures(List<PackagePart> parts) throws JDOMException, IOException, ParserConfigurationException, SAXException {
  2. Map<String, Set<String>> mapImg = new HashMap<>();
  3. Map<String, String> mapImgPath = new HashMap<>();
  4. Map<Integer, List<String>> dataMap = new HashMap<>();
  5. for (PackagePart part : parts) {
  6. // System.out.println(part.getPartName());
  7. PackagePartName partName = part.getPartName();
  8. String name = partName.getName();
  9. if ("/xl/cellimages.xml".equals(name)) {
  10. SAXBuilder builder = new SAXBuilder();
  11. // 获取文档
  12. Document doc = builder.build(part.getInputStream());
  13. // 获取根节点
  14. Element root = doc.getRootElement();
  15. List<Element> cellImageList = root.getChildren();
  16. for (Element imgEle : cellImageList) {
  17. Element xdrPic = imgEle.getChildren().get(0);
  18. Element xdrNvPicPr = xdrPic.getChildren().get(0);
  19. Element xdrBlipFill = xdrPic.getChildren().get(1);
  20. Element aBlip = xdrBlipFill.getChildren().get(0);
  21. Attribute attr = aBlip.getAttributes().get(0);
  22. String imgId = xdrNvPicPr.getChildren().get(0).getAttributeValue("name");
  23. String id = attr.getValue();
  24. // if (id.equals("rId12")) {
  25. // System.out.println(attr.getValue() + "\t" + imgId);
  26. // }
  27. if (mapImg.containsKey(id)) {
  28. mapImg.get(id).add(imgId);
  29. } else {
  30. Set<String> set = new HashSet<>();
  31. set.add(imgId);
  32. mapImg.put(id, set);
  33. }
  34. }
  35. }
  36. if ("/xl/_rels/cellimages.xml.rels".equals(name)) {
  37. SAXBuilder builder = new SAXBuilder();
  38. // 获取文档
  39. Document doc = builder.build(part.getInputStream());
  40. // 获取根节点
  41. Element root = doc.getRootElement();
  42. List<Element> relationshipList = root.getChildren();
  43. /*
  44. <Relationship Id="rId999" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image1000.jpeg"/>
  45. */
  46. for (Element relationship : relationshipList) {
  47. String id = relationship.getAttributeValue("Id");
  48. String target = relationship.getAttributeValue("Target");
  49. mapImgPath.put(id, target);
  50. // if (id.equals("rId12")) {
  51. // System.out.println(id + "\t" + target);
  52. // }
  53. }
  54. }
  55. if (name.contains("/xl/worksheets/sheet")) {
  56. // SAXBuilder builder = new SAXBuilder();
  57. // 获取文档
  58. String sheetNoStr = name.replace("/xl/worksheets/sheet", "").replace(".xml", "");
  59. Integer sheetNo = Integer.valueOf(sheetNoStr) - 1;
  60. // 步骤1:创建SAXParserFactory实例
  61. SAXParserFactory factory = SAXParserFactory.newInstance();
  62. // 步骤2:创建SAXParser实例
  63. SAXParser parser = factory.newSAXParser();
  64. MySAXParserHandler handler = new MySAXParserHandler();
  65. parser.parse(part.getInputStream(), handler);
  66. List<String> rows = handler.getRows();
  67. dataMap.put(sheetNo, rows);
  68. }
  69. }
  70. // for (Integer sheetNo : dataMap.keySet()) {
  71. // System.out.println(sheetNo + "\t" + dataMap.get(sheetNo).size());
  72. // }
  73. Map<String, String> imgMap = new HashMap<>();
  74. for (String id : mapImg.keySet()) {
  75. Set<String> imgIds = mapImg.get(id);
  76. String path = mapImgPath.get(id);
  77. for (String imgId : imgIds) {
  78. imgMap.put(imgId, path);
  79. }
  80. }
  81. for (Integer key : dataMap.keySet()) {
  82. List<String> rows = dataMap.get(key);
  83. for (int i = 0; i < rows.size(); i++) {
  84. String imgId = rows.get(i);
  85. if (imgMap.containsKey(imgId)) {
  86. rows.set(i, imgMap.get(imgId));
  87. }
  88. }
  89. }
  90. Map<Integer, List<PackagePart>> map = new HashMap<>();
  91. for (Integer key : dataMap.keySet()) {
  92. List<PackagePart> list = new ArrayList<>();
  93. map.put(key, list);
  94. List<String> pathList = dataMap.get(key);
  95. for (int i = 0; i < pathList.size(); i++) {
  96. list.add(i, null);
  97. String path = pathList.get(i);
  98. if (StringUtils.isNotEmpty(path)) {
  99. for (PackagePart part : parts) {
  100. PackagePartName partName = part.getPartName();
  101. String name = partName.getName();
  102. // /xl/media/image373.jpeg = media/image702.jpeg
  103. if (name.contains(path)) {
  104. list.set(i, part);
  105. break;
  106. }
  107. }
  108. }
  109. }
  110. }
  111. return map;
  112. }

插件代码(XML解析),采用的是SAX解析XML

  1. package com.sx.jz.modules.tm.utils.vo;
  2. import org.xml.sax.Attributes;
  3. import org.xml.sax.SAXException;
  4. import org.xml.sax.helpers.DefaultHandler;
  5. import java.util.ArrayList;
  6. import java.util.List;
  7. /**
  8. * xml解析
  9. *
  10. * @author NoClient
  11. * @date 2023/8/15 14:24
  12. * @since 1.0
  13. */
  14. public class MySAXParserHandler extends DefaultHandler {
  15. String value = null;
  16. List<String> rows = new ArrayList<>();
  17. int rowIndex = 0;
  18. public List<String> getRows() {
  19. return rows;
  20. }
  21. /**
  22. * 用来标识解析开始
  23. */
  24. @Override
  25. public void startDocument() throws SAXException {
  26. // TODO Auto-generated method stub
  27. super.startDocument();
  28. // System.out.println("SAX解析开始");
  29. }
  30. /**
  31. * 用来标识解析结束
  32. */
  33. @Override
  34. public void endDocument() throws SAXException {
  35. // TODO Auto-generated method stub
  36. super.endDocument();
  37. // System.out.println("SAX解析结束");
  38. }
  39. /**
  40. * 解析xml元素
  41. */
  42. @Override
  43. public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
  44. // 调用DefaultHandler类的startElement方法
  45. super.startElement(uri, localName, qName, attributes);
  46. if (qName.equals("row")) {
  47. value = "";
  48. }
  49. }
  50. @Override
  51. public void endElement(String uri, String localName, String qName)
  52. throws SAXException {
  53. //调用DefaultHandler类的endElement方法
  54. super.endElement(uri, localName, qName);
  55. if (qName.equals("row")) {
  56. if (value != null && value.contains("DISPIMG")) {
  57. value = value.substring(value.lastIndexOf("DISPIMG(")).replace("DISPIMG(\"", "");
  58. value = value.substring(0, value.indexOf("\""));
  59. rows.add(rowIndex, value);
  60. } else {
  61. rows.add(rowIndex, null);
  62. }
  63. rowIndex++;
  64. value = "";
  65. }
  66. }
  67. @Override
  68. public void characters(char[] ch, int start, int length)
  69. throws SAXException {
  70. super.characters(ch, start, length);
  71. value += new String(ch, start, length);
  72. }
  73. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Gausst松鼠会/article/detail/273091
推荐阅读
相关标签
  

闽ICP备14008679号