赞
踩
ofd发票解析
什么是ofd格式
ofd 格式是一种用于存储金融数据的开放格式,它可以包含各种类型的金融信息,通常一XML格式进行存储,因此我们可以使用java中的xml解析器来解析ofd文件并提取其中的数据。
在 java 中,我们可以使用DOM解析器来解析XML文件。然后编写代码来读取OFD 文件并解析其中的数据。
一般情况下根据“key”信息与value信息一一映射,可恢复票面上所有字段
以下信息有所删减
Tags/CustomTag.xml:存放“key”信息【例如InvoiceNo、IssueDate、Note】
This XML file does not appear to have any style information associated with it. The document tree is shown below. <ofd:root xmlns:ofd="http://www.ofdspec.org/2016" version="1.0"> <ofd:InvoiceNo> <ofd:ObjectRef PageRef="61">6922</ofd:ObjectRef> </ofd:InvoiceNo> <ofd:IssueDate> <ofd:ObjectRef PageRef="61">6923</ofd:ObjectRef> </ofd:IssueDate> <ofd:TaxInclusiveTotalAmount> <ofd:ObjectRef PageRef="61">6935</ofd:ObjectRef> <ofd:ObjectRef PageRef="61">6936</ofd:ObjectRef> </ofd:TaxInclusiveTotalAmount> <ofd:Note> <ofd:ObjectRef PageRef="61">6944</ofd:ObjectRef> <ofd:ObjectRef PageRef="61">6945</ofd:ObjectRef> <ofd:ObjectRef PageRef="61">6946</ofd:ObjectRef> <ofd:ObjectRef PageRef="61">6947</ofd:ObjectRef> </ofd:Note> </ofd:root>
Pages/Page_0/Content.xml :
This XML file does not appear to have any style information associated with it. The document tree is shown below. <ofd:Page xmlns:ofd="http://www.ofdspec.org/2016"> <ofd:Area> <ofd:PhysicalBox>0 0 210 140</ofd:PhysicalBox> </ofd:Area> <ofd:Template TemplateID="1" ZOrder="Background"/> <ofd:Content> <ofd:Layer ID="6948"> <ofd:TextObject ID="6922" Boundary="170 10.3 38 5" Font="6919" Size="3.175"> <ofd:TextCode X="0" Y="3.6414" DeltaX="g 19 1.5875">发票编号</ofd:TextCode> </ofd:TextObject> <ofd:TextObject ID="6923" Boundary="170 16.4 38 5" Font="6919" Size="3.175"> <ofd:TextCode X="0" Y="3.6414" DeltaX="g 4 1.5875 3.175 g 2 1.5875 3.175 g 2 1.5875">发票日期</ofd:TextCode> </ofd:TextObject> <ofd:TextObject ID="6934" Boundary="62.9471 96 82.5189 7.4102" Font="6919" Size="3.175"> <ofd:TextCode X="0" Y="4.8465" DeltaX="g 5 3.175">发票金额中文</ofd:TextCode> </ofd:TextObject> <ofd:TextObject ID="6935" Boundary="155.5164 96.0681 48.665 7.6749" Font="6925" Size="3.8806"> <ofd:TextCode X="0" Y="4.9221">¥</ofd:TextCode> </ofd:TextObject> <ofd:TextObject ID="6936" Boundary="155.5164 96.0681 48.665 7.6749" Font="6919" Size="3.8806"> <ofd:TextCode X="2.3284" Y="4.9221" DeltaX="g 6 1.9403">发票金额阿拉伯数字</ofd:TextCode> </ofd:TextObject> <ofd:TextObject ID="6944" Boundary="11.1083 104.5369 193 3.3" Font="6919" Size="3.175"> <ofd:TextCode X="0" Y="2.7273" DeltaX="g 6 3.175 1.5875 g 10 3.175 g 5 1.5875 g 4 3.175 g 20 1.5875">购方开户银行:XXX支行; 银行账号:1234567890; </ofd:TextCode> </ofd:TextObject> <ofd:TextObject ID="6945" Boundary="11.1083 107.8369 193 3.3" Font="6919" Size="3.175"> <ofd:TextCode X="0" Y="2.7273" DeltaX="g 6 3.175 1.5875 g 16 3.175 g 5 1.5875 g 4 3.175 g 24 1.5875">销方开户银行:XXX支行; 银行账号:1234567890; </ofd:TextCode> </ofd:TextObject> <ofd:TextObject ID="6946" Boundary="11.1083 111.1369 193 3.3" Font="6919" Size="3.175"> <ofd:TextCode X="0" Y="2.7273" DeltaX="g 3 1.5875"> </ofd:TextCode> </ofd:TextObject> <ofd:TextObject ID="6947" Boundary="11.1083 114.4369 193 3.3" Font="6919" Size="3.175"> <ofd:TextCode X="0" Y="2.7273" DeltaX="g 3 3.175 1.5875 g 2 3.175 g 5 1.5875 g 3 3.175 1.5875 g 2 3.175 g 4 1.5875">收款人:XX; 复核人:XX; </ofd:TextCode> </ofd:TextObject> <ofd:ImageObject ID="6921" CTM="20 0 0 20 0 0" Boundary="6.5 6 20 20" ResourceID="6920"/> </ofd:Layer> </ofd:Content> </ofd:Page>
添加Maven依赖
<dependency>
<groupId>org.dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>2.1.3</version>
</dependency>
仅有一个实体类Invoice
package com.example.ofd.entity;
import lombok.Data;
@Data
public class Invoice {
private String invoiceNo;// 发票编号
private String issueDate;// 开票日期
private String totalAmount;// 开票金额
private String note;//开票备注
}
package com.example.ofd.utils; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.math.BigDecimal; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import com.example.ofd.entity.Invoice; import lombok.extern.slf4j.Slf4j; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.springframework.util.StreamUtils; import org.springframework.web.multipart.MultipartFile; /** * 专用于处理电子发票识别的类 */ @Slf4j public class OfdInvoice { /** * 调用该方法将前端接受到的文件暂存 * * @param file */ public static Invoice parseOfdFile(MultipartFile file) { Invoice invoice = new Invoice(); // 先判断提交上来的文件是什么类型的 String originalFilename = file.getOriginalFilename(); try { // 创建一个临时文件 Path tempFile = null; if (originalFilename.toLowerCase().endsWith(".ofd")) { tempFile = Files.createTempFile("tempPrefix", ".ofd"); } else if (originalFilename.toLowerCase().endsWith(".pdf")) { tempFile = Files.createTempFile("tempPrefix", ".pdf"); } File tempFilePath = tempFile.toFile(); // 将MultipartFile的内容写入到临时文件 try (FileOutputStream fos = new FileOutputStream(tempFilePath)) { fos.write(file.getBytes()); } // 使用临时文件的路径来调用你的解析方法 invoice = extract(tempFilePath); // 删除临时文件,或者在某些情况下保留它 tempFilePath.delete(); } catch (Exception e) { // 处理异常 e.printStackTrace(); } // 返回值 return invoice; } /** * 从一个ZIP 文件中提取特定格式的发票信息,并构建一个 Invoice 对象来存储这些信息 * * @param file * @return * @throws IOException * @throws DocumentException */ public static Invoice extract(File file) throws IOException, DocumentException { // 打开Zip文件 ZipFile zipFile = new ZipFile(file); // 获取Zip条目 ZipEntry entry = zipFile.getEntry("Doc_0/Tags/CustomTag.xml"); // 标签文件,在本文件中根据key信息,获取到标签的ID ZipEntry entry1 = zipFile.getEntry("Doc_0/Pages/Page_0/Content.xml"); // 本文件中存放的是value信息,根据上个id来查找各自的value // 读取XML文件内容 InputStream input = zipFile.getInputStream(entry); InputStream input1 = zipFile.getInputStream(entry1); String body = StreamUtils.copyToString(input, Charset.forName("utf-8")); // 读取xml文件的内容,同时指定字符集为UTF-8 String content = StreamUtils.copyToString(input1, Charset.forName("utf-8")); // 关闭ZIP文件 zipFile.close(); Map<String, List<String>> map = new HashMap<>(); // 解析 CustomTag.xml 文件 Document document = DocumentHelper.parseText(body); // 解析 CustomTag.xml 的内容,并获取根元素 Element root = document.getRootElement(); // 获取根元素 // 发票编号 Element invoiceNo = root.element("InvoiceNo");// 获取InvoiceNo元素 if (invoiceNo != null) { Element objectRef = invoiceNo.element("ObjectRef");// 获取其下子标签 if (objectRef != null) { String invoiceNumber = objectRef.getTextTrim();// 访问ObjectRef的文本内容 List<String> tmp = new ArrayList<>();// 将文本内容添加到Map中 tmp.add(invoiceNumber); map.put("InvoiceNumber", tmp); } } // 开票日期 Element issueDate = root.element("IssueDate");// 获取InvoiceNo元素 if (issueDate != null) { Element objectRef = issueDate.element("ObjectRef");// 获取其下子标签 if (objectRef != null) { String invoiceDate = objectRef.getTextTrim();// 访问ObjectRef的文本内容 // 将文本内容添加到Map中 List<String> tmp = new ArrayList<>(); tmp.add(invoiceDate); map.put("invoiceDate", tmp); } } // 开票金额【其下有两个子元素,第二个是想要的标签】 Element totalAmount = root.element("TaxInclusiveTotalAmount");// 获取InvoiceNo元素 if (totalAmount != null) { // 遍历InvoiceNo下的所有子元素 for (Iterator<Element> it = totalAmount.elementIterator(); it.hasNext(); ) { Element element = it.next(); if (it.hasNext() == false) {//只要最后一个标签 // 检查子元素是否是ObjectRef if ("ObjectRef".equals(element.getName())) { // 访问ObjectRef的文本内容 String amount = element.getTextTrim(); // 将文本内容添加到Map中 List<String> tmp = new ArrayList<>(); tmp.add(amount); map.put("totalAmount", tmp); } } } } // 开票备注【其下有四条信息,都需要】 Element note = root.element("Note");// 获取InvoiceNo元素 if (note != null) { List<String> noteTmp = new ArrayList<>(); for (Iterator<Element> it = note.elementIterator(); it.hasNext(); ) {// 遍历InvoiceNo下的所有子元素 Element element = it.next(); // 检查子元素是否是ObjectRef if ("ObjectRef".equals(element.getName())) { // 访问ObjectRef的文本内容 String tmpNote = element.getTextTrim(); // 将文本内容添加到List数组中 noteTmp.add(tmpNote); } } map.put("note", noteTmp);// 将文本内容添加到map中 } // 根据id从content.xml中提取必要信息 Invoice invoice = new Invoice();// 先创建一个发票实例,将后续的到的值填充进去 Document contentDocument = DocumentHelper.parseText(content); Element contentRoot = contentDocument.getRootElement();// 获取根元素 for (Map.Entry<String, List<String>> entrySet : map.entrySet()) {// 遍历map String key = entrySet.getKey(); //获得当前key if (key.equals("InvoiceNumber")) {// 发票号码 invoice.setInvoiceNo(getContent(contentRoot, entrySet.getValue().get(0))); } else if (key.equals("invoiceDate")) {// 开票日期 invoice.setIssueDate(getContent(contentRoot, entrySet.getValue().get(0))); } else if (key.equals("totalAmount")) {// 开票金额 invoice.setTotalAmount(getContent(contentRoot, entrySet.getValue().get(0))); } else if (key.equals("note")) {// 发票备注 String detail = ""; for (int i = 0; i < entrySet.getValue().size(); i++) { detail += getContent(contentRoot, entrySet.getValue().get(i)); } invoice.setNote(detail); } } return invoice; } /** * 根据id,获取root中的文本 * * @param root xml中的根元素 * @param id 存储有id * @return */ public static String getContent(Element root, String id) { // 遍历Content元素 Element content = root.element("Content"); if (content != null) { // 遍历所有Layer元素 for (Element layer : content.elements("Layer")) { // 遍历Layer下的所有TextObject元素 for (Element textObject : layer.elements("TextObject")) { // 检查TextObject的ID String textObjectId = textObject.attributeValue("ID"); if (id.equals(textObjectId)) { // 找到匹配的TextObject,现在遍历其下的TextCode元素 for (Element textCode : textObject.elements("TextCode")) { // 获取TextCode的文本内容 String text = textCode.getTextTrim(); return text; } } } } } return null; } }
package com.example.ofd.controller; import com.example.ofd.entity.Invoice; import com.example.ofd.service.InvoiceService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; @RestController @RequestMapping("/invoice") public class InvoiceController { @Autowired InvoiceService invoiceService; /** * @param */ @CrossOrigin(origins = "http://localhost:8081", allowedHeaders = "*", allowCredentials = "true") @PostMapping("/upload") public ResponseEntity<Object> uploadFile(@RequestParam("file") MultipartFile file) { try { // 调用你的文件解析服务 Invoice parsedData = invoiceService.parseOfdFile(file); // 返回解析后的数据 return ResponseEntity.ok(parsedData); } catch (Exception e) { return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("Error parsing file"); } } }
InvoiceServiceImpl
package com.example.ofd.service.impl; import com.example.ofd.entity.Invoice; import com.example.ofd.service.InvoiceService; import com.example.ofd.utils.OfdInvoice; import org.springframework.stereotype.Service; import org.springframework.web.multipart.MultipartFile; @Service public class InvoiceServiceImpl implements InvoiceService { @Override public Invoice parseOfdFile(MultipartFile file) { Invoice invoice = OfdInvoice.parseOfdFile(file); return invoice; } }
InvoiceService
package com.example.ofd.service;
import com.example.ofd.entity.Invoice;
import org.springframework.web.multipart.MultipartFile;
public interface InvoiceService {
Invoice parseOfdFile(MultipartFile file);
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。