当前位置:   article > 正文

Springboot实现OCR文字识别_springboot在线图片识别文字

springboot在线图片识别文字
  1. 创建Config
    1. package com.tesseract.tesseract.Config;
    2. import net.sourceforge.tess4j.Tesseract;
    3. import org.springframework.beans.factory.annotation.Value;
    4. import org.springframework.context.annotation.Bean;
    5. import org.springframework.context.annotation.Configuration;
    6. @Configuration
    7. public class TesseractOcrConfig {
    8. @Value("${tesseract.dataPath}")
    9. private static String dataPath;
    10. @Bean
    11. public static Tesseract tesseract() {
    12. Tesseract tesseract = new Tesseract();
    13. // 设置训练数据文件夹路径
    14. tesseract.setDatapath(dataPath);
    15. // 设置为中文简体
    16. tesseract.setLanguage("chi_sim");
    17. return tesseract;
    18. }
    19. }

  2. 创建Service

    1. package com.tesseract.tesseract.Service;
    2. import net.sourceforge.tess4j.TesseractException;
    3. import org.springframework.web.multipart.MultipartFile;
    4. import java.io.IOException;
    5. public interface OcrService {
    6. public String recognizeText(MultipartFile imageFile)throws IOException, TesseractException;
    7. }

    这个是接口,下面是实现

    1. package com.tesseract.tesseract.Service;
    2. import net.sourceforge.tess4j.Tesseract;
    3. import net.sourceforge.tess4j.TesseractException;
    4. import org.springframework.stereotype.Service;
    5. import org.springframework.web.multipart.MultipartFile;
    6. import javax.imageio.ImageIO;
    7. import java.awt.image.BufferedImage;
    8. import java.io.ByteArrayInputStream;
    9. import java.io.IOException;
    10. import java.io.InputStream;
    11. @Service
    12. public class OcrServiceImpl implements OcrService {
    13. private final Tesseract tesseract;
    14. public OcrServiceImpl(Tesseract tesseract) {
    15. this.tesseract = tesseract;
    16. }
    17. /**
    18. *
    19. * @param imageFile 要识别的图片
    20. * @return
    21. */
    22. @Override
    23. public String recognizeText(MultipartFile imageFile) throws IOException, TesseractException {
    24. // 转换
    25. InputStream sbs = new ByteArrayInputStream(imageFile.getBytes());
    26. BufferedImage bufferedImage = ImageIO.read(sbs);
    27. // 对图片进行文字识别
    28. return tesseract.doOCR(bufferedImage);
    29. }
    30. }

  3. 最后就是最重要的Controller了

    1. package com.tesseract.tesseract.Controller;
    2. import com.tesseract.tesseract.Service.OcrService;
    3. import lombok.extern.slf4j.Slf4j;
    4. import net.sourceforge.tess4j.TesseractException;
    5. import org.springframework.http.MediaType;
    6. import org.springframework.web.bind.annotation.*;
    7. import org.springframework.web.multipart.MultipartFile;
    8. import java.io.IOException;
    9. @RestController
    10. @RequestMapping(value = "/api", method = {RequestMethod.GET,RequestMethod.POST})
    11. @Slf4j
    12. public class OcrController {
    13. private final OcrService ocrService;
    14. public OcrController(OcrService ocrService) {
    15. this.ocrService = ocrService;
    16. }
    17. @PostMapping(value = "/recognize", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
    18. public String recognizeImage(@RequestParam(value = "file") MultipartFile file) throws TesseractException, IOException {
    19. log.info(ocrService.recognizeText(file));
    20. // 调用OcrService中的方法进行文字识别
    21. return ocrService.recognizeText(file);
    22. }
    23. }

  4. 最后到网页实现

    1. <!DOCTYPE html>
    2. <html lang="en" xmlns:th="http://www.thymeleaf.org">
    3. <head>
    4. <meta charset="UTF-8">
    5. <title>Title</title>
    6. </head>
    7. <body>
    8. <form action="/api/recognize" method="post" enctype="multipart/form-data">
    9. <input type="file" name="file">
    10. <input type="submit" value="上传">
    11. </form>
    12. </body>
    13. </html>

    然后就是APP

    1. package com.tesseract.tesseract;
    2. import org.springframework.boot.SpringApplication;
    3. import org.springframework.boot.autoconfigure.SpringBootApplication;
    4. @SpringBootApplication
    5. public class TesseractApplication {
    6. public static void main(String[] args) {
    7. SpringApplication.run(TesseractApplication.class, args);
    8. }
    9. }

    最后看到添加的依赖

    1. <?xml version="1.0" encoding="UTF-8"?>
    2. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    3. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    4. <modelVersion>4.0.0</modelVersion>
    5. <groupId>com.tesseract</groupId>
    6. <artifactId>tesseract</artifactId>
    7. <version>0.0.1-SNAPSHOT</version>
    8. <name>Tesseract</name>
    9. <description>Demo project for Spring Boot</description>
    10. <properties>
    11. <java.version>1.8</java.version>
    12. <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    13. <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
    14. <spring-boot.version>2.6.13</spring-boot.version>
    15. </properties>
    16. <dependencies>
    17. <dependency>
    18. <groupId>org.springframework.boot</groupId>
    19. <artifactId>spring-boot-starter-thymeleaf</artifactId>
    20. </dependency>
    21. <dependency>
    22. <groupId>org.springframework.boot</groupId>
    23. <artifactId>spring-boot-starter-web</artifactId>
    24. </dependency>
    25. <dependency>
    26. <groupId>org.springframework.boot</groupId>
    27. <artifactId>spring-boot-starter-test</artifactId>
    28. <scope>test</scope>
    29. </dependency>
    30. <!-- https://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j -->
    31. <dependency>
    32. <groupId>net.sourceforge.tess4j</groupId>
    33. <artifactId>tess4j</artifactId>
    34. <version>5.3.0</version>
    35. </dependency>
    36. <dependency>
    37. <groupId>org.projectlombok</groupId>
    38. <artifactId>lombok</artifactId>
    39. </dependency>
    40. <dependency>
    41. <groupId>org.projectlombok</groupId>
    42. <artifactId>lombok</artifactId>
    43. <version>1.18.20</version>
    44. </dependency>
    45. </dependencies>
    46. <dependencyManagement>
    47. <dependencies>
    48. <dependency>
    49. <groupId>org.springframework.boot</groupId>
    50. <artifactId>spring-boot-dependencies</artifactId>
    51. <version>${spring-boot.version}</version>
    52. <type>pom</type>
    53. <scope>import</scope>
    54. </dependency>
    55. </dependencies>
    56. </dependencyManagement>
    57. <build>
    58. <plugins>
    59. <plugin>
    60. <groupId>org.apache.maven.plugins</groupId>
    61. <artifactId>maven-compiler-plugin</artifactId>
    62. <version>3.8.1</version>
    63. <configuration>
    64. <source>1.8</source>
    65. <target>1.8</target>
    66. <encoding>UTF-8</encoding>
    67. </configuration>
    68. </plugin>
    69. <plugin>
    70. <groupId>org.springframework.boot</groupId>
    71. <artifactId>spring-boot-maven-plugin</artifactId>
    72. <version>${spring-boot.version}</version>
    73. <configuration>
    74. <mainClass>com.tesseract.tesseract.TesseractApplication</mainClass>
    75. <skip>true</skip>
    76. </configuration>
    77. <executions>
    78. <execution>
    79. <id>repackage</id>
    80. <goals>
    81. <goal>repackage</goal>
    82. </goals>
    83. </execution>
    84. </executions>
    85. </plugin>
    86. </plugins>
    87. </build>
    88. </project>

最最后就是application的配置了

  1. # THYMELEAF (ThymeleafAutoConfiguration)
  2. # å¼å¯æ¨¡æ¿ç¼å­ï¼é»è®¤å¼ï¼ true ï¼
  3. spring.thymeleaf.cache=true
  4. # æ£æ¥æ¨¡æ¿æ¯å¦å­å¨ï¼ç¶åååç°
  5. spring.thymeleaf.check-template=true
  6. # æ£æ¥æ¨¡æ¿ä½ç½®æ¯å¦æ­£ç¡®ï¼é»è®¤å¼ :true ï¼
  7. spring.thymeleaf.check-template-location=true
  8. #Content-Type çå¼ï¼é»è®¤å¼ï¼ text/html ï¼
  9. spring.thymeleaf.content-type=text/html
  10. # å¼å¯ MVC Thymeleaf è§å¾è§£æï¼é»è®¤å¼ï¼ true ï¼
  11. spring.thymeleaf.enabled=true
  12. # 模æ¿ç¼ç 
  13. spring.thymeleaf.encoding=UTF-8
  14. # è¦è¢«æé¤å¨è§£æä¹å¤çè§å¾å称å表ï¼â½¤éå·åé
  15. spring.thymeleaf.excluded-view-names=
  16. # è¦è¿â½¤äºæ¨¡æ¿ä¹ä¸ç模æ¿æ¨¡å¼ãå¦â» StandardTemplate-ModeHandlers( é»è®¤å¼ï¼ HTML5)
  17. spring.thymeleaf.mode=HTML5
  18. # å¨æ建 URL æ¶æ·»å å°è§å¾å称åçåç¼ï¼é»è®¤å¼ï¼ classpath:/templates/ ï¼
  19. spring.thymeleaf.prefix=classpath:/templates/
  20. # å¨æ建 URL æ¶æ·»å å°è§å¾å称åçåç¼ï¼é»è®¤å¼ï¼ .html ï¼
  21. spring.thymeleaf.suffix=.html
  22. # åºç¨æå¡ WEB 访é®ç«¯å£
  23. server.port=8080
  24. # Tesseract OCRçæ°æ®è·¯å¾éç½®ï¼ç¡®ä¿æåå­æ¾ .traineddata æ件çç®å½
  25. tesseract.dataPath=classpath:/tessdata/

然后不要忘记在添加环境变量

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/AllinToyou/article/detail/619972
推荐阅读
相关标签
  

闽ICP备14008679号