当前位置:   article > 正文

java实现识别图片上的文字(OCR识别身份证等证件信息)_java ocr识别

java ocr识别

利用第三方jar包,实现识别图片上的文字。第三方支持地址:Spire.OCR for Java | 专业的图文识别组件,用以读取图片格式中的文本Spire.OCR for Java 是专为 Java 开发者设计的强大OCR库,提供高效的文字识别功能,能够从图像中准确识别和提取文本。icon-default.png?t=N7T8https://www.e-iceblue.cn/Introduce/Spire-OCR-JAVA.html

1.maven项目,添加依赖

  1. <dependency>
  2. <groupId>e-iceblue</groupId>
  3. <artifactId>spire.ocr</artifactId>
  4. <version>1.9.0</version>
  5. </dependency>
  6. <repositories>
  7. <repository>
  8. <id>com.e-iceblue</id>
  9. <name>e-iceblue</name>
  10. <url>https://repo.e-iceblue.cn/repository/maven-public/</url>
  11. </repository>
  12. </repositories>

2.根据服务器类型,下载相关依赖文件

linux

windows

3.编写demo代码

  1. @Test
  2. public void getText() throws OcrException {
  3. //指定依赖文件的路径
  4. String dependencies = "dependencies/";
  5. //指定要需要扫描的图片的路径
  6. String imageFile = "C:\\Users\\user\\Desktop\\3.jpg";
  7. //指定输出文件的路径
  8. String outputFile = "test.txt";
  9. //创建OcrScanner对象,并设置其依赖文件路径
  10. OcrScanner scanner = new OcrScanner();
  11. scanner.setDependencies(dependencies);
  12. //扫描指定的图像文件
  13. scanner.scan(imageFile);
  14. //获取扫描的文本内容
  15. String scannedText = scanner.getText().toString();
  16. System.out.println(scannedText);
  17. }

4.扩展

根据图片OCR识别,来获取身份证等证件图片上的信息,并用正则表达式解析识别的文字

  1. @Test
  2. public void testIdcard() throws OcrException {
  3. String filePath = "C:\\Users\\85753\\user\\idcard\\";
  4. for (int i = 1; i < 11; i++) {
  5. long startTime = System.currentTimeMillis();
  6. HashMap<String, String> hashMap = idCardOCR(filePath + i + ".jpg");
  7. long endTime = System.currentTimeMillis();
  8. System.out.println(hashMap);
  9. System.out.println("一共耗时:" + (startTime - endTime) + "毫秒!");
  10. }
  11. }
  12. public HashMap<String, String> idCardOCR(String filePath) throws OcrException {
  13. //指定依赖文件的路径
  14. String dependencies = "dependencies/";
  15. //指定要需要扫描的图片的路径
  16. String imageFile = filePath;
  17. //创建OcrScanner对象,并设置其依赖文件路径
  18. OcrScanner scanner = new OcrScanner();
  19. scanner.setDependencies(dependencies);
  20. scanner.scan(imageFile);//扫描指定的图像文件
  21. //获取扫描的文本内容
  22. String scannedText = scanner.getText().toString();
  23. // 去除换行,去除无用数据
  24. String result = scannedText.replaceAll("\r", "").replaceAll("\n", "");
  25. result = result.replaceAll("Evaluation Warning : The version can be used only for evaluation purpose...", "");
  26. System.out.println(result);
  27. HashMap<String, String> map = new HashMap<>();
  28. map = this.dealIDCardText(result);
  29. return map;
  30. }
  31. /**
  32. * 正则处理识别的身份证数据
  33. *
  34. * @param result
  35. * @return
  36. */
  37. public static HashMap<String, String> dealIDCardText(String result) {
  38. HashMap<String, String> map = new HashMap<>();
  39. String name = "";
  40. String sex = "";
  41. String nation = "";
  42. String birth = "";
  43. String address = "";
  44. String idCard = "";
  45. // 处理数据(正则表达式获取身份证信息,满足以下一条即可)
  46. String regex = "姓名(\\S+)民族(\\S+)性别(\\S+)出生(\\d{4}年\\d{1,2}月\\d{1,2}日)住址(\\S+)公民身份号码(\\d{17}[0-9Xx])";
  47. Pattern pattern = Pattern.compile(regex);
  48. Matcher matcher = pattern.matcher(result);
  49. String regex2 = "(\\S+)姓名民族(\\S+)性别(\\S+)出生(\\d{4}年\\d{1,2}月\\d{1,2}日)住址(\\S+)(\\d{17}[0-9Xx])公民身份号码";
  50. Pattern pattern2 = Pattern.compile(regex2);
  51. Matcher matcher2 = pattern2.matcher(result);
  52. String regex3 = "姓名(\\S+)性别(\\S+)民族(\\S+)出生(\\d{4}年\\d{1,2}月\\d{1,2}日)住址(\\S+)公民身份号码(\\d{17}[0-9Xx])";
  53. Pattern pattern3 = Pattern.compile(regex3);
  54. Matcher matcher3 = pattern3.matcher(result);
  55. String regex4 = "姓名(\\S+)性别(\\S+)民族(\\S+)出生(\\d{4}年\\d{1,2}月\\d{1,2}日)住址(\\S+)(\\d{17}[0-9Xx])公民身份号码";
  56. Pattern pattern4 = Pattern.compile(regex4);
  57. Matcher matcher4 = pattern4.matcher(result);
  58. String regex5 = "姓名(\\S+)民族(\\S+)性别(\\S+)出生(\\d{4}年\\d{1,2}月\\d{1,2}日)住址(\\S+)(\\d{17}[0-9Xx])公民身份号码";
  59. Pattern pattern5 = Pattern.compile(regex5);
  60. Matcher matcher5 = pattern5.matcher(result);
  61. String regex6 = "(\\S+)姓名性别(\\S+)民族(\\S+)出生(\\d{4}年\\d{1,2}月\\d{1,2}日)住址(\\S+)(\\d{17}[0-9Xx])公民身份号码";
  62. Pattern pattern6 = Pattern.compile(regex6);
  63. Matcher matcher6 = pattern6.matcher(result);
  64. if (matcher.find()) {
  65. name = matcher.group(1);
  66. nation = matcher.group(2);
  67. sex = matcher.group(3);
  68. birth = matcher.group(4);
  69. address = matcher.group(5);
  70. idCard = matcher.group(6);
  71. } else if (matcher2.find()) {
  72. name = matcher2.group(1);
  73. nation = matcher2.group(2);
  74. sex = matcher2.group(3);
  75. birth = matcher2.group(4);
  76. address = matcher2.group(5);
  77. idCard = matcher2.group(6);
  78. } else if (matcher3.find()) {
  79. name = matcher3.group(1);
  80. sex = matcher3.group(2);
  81. nation = matcher3.group(3);
  82. birth = matcher3.group(4);
  83. address = matcher3.group(5);
  84. idCard = matcher3.group(6);
  85. } else if (matcher4.find()) {
  86. name = matcher4.group(1);
  87. sex = matcher4.group(2);
  88. nation = matcher4.group(3);
  89. birth = matcher4.group(4);
  90. address = matcher4.group(5);
  91. idCard = matcher4.group(6);
  92. } else if (matcher5.find()) {
  93. name = matcher5.group(1);
  94. nation = matcher5.group(2);
  95. sex = matcher5.group(3);
  96. birth = matcher5.group(4);
  97. address = matcher5.group(5);
  98. idCard = matcher5.group(6);
  99. } else if (matcher6.find()) {
  100. name = matcher6.group(1);
  101. sex = matcher6.group(2);
  102. nation = matcher6.group(3);
  103. birth = matcher6.group(4);
  104. address = matcher6.group(5);
  105. idCard = matcher6.group(6);
  106. }
  107. map.put("name", name);
  108. map.put("sex", sex);
  109. map.put("nation", nation);
  110. map.put("birth", birth);
  111. map.put("address", address);
  112. map.put("idCard", idCard);
  113. return map;
  114. }

注意:因图片识别后的身份证数据顺序不固定,经过多次测试,将可能的结果通过正则表达式来解析。如果符合其中一个正则,就将匹配的数据返回。

参考:如何在 Java 项目中扫描识别图片中的文字

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号