赞
踩
- package com.tesseract.tesseract.Config;
-
- import net.sourceforge.tess4j.Tesseract;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.context.annotation.Bean;
- import org.springframework.context.annotation.Configuration;
-
- @Configuration
-
- public class TesseractOcrConfig {
-
- @Value("${tesseract.dataPath}")
- private static String dataPath;
-
- @Bean
- public static Tesseract tesseract() {
-
- Tesseract tesseract = new Tesseract();
- // 设置训练数据文件夹路径
- tesseract.setDatapath(dataPath);
- // 设置为中文简体
- tesseract.setLanguage("chi_sim");
- return tesseract;
- }
-
- }
-
创建Service
- package com.tesseract.tesseract.Service;
-
- import net.sourceforge.tess4j.TesseractException;
- import org.springframework.web.multipart.MultipartFile;
-
- import java.io.IOException;
-
- public interface OcrService {
- public String recognizeText(MultipartFile imageFile)throws IOException, TesseractException;
- }
这个是接口,下面是实现
- package com.tesseract.tesseract.Service;
-
- import net.sourceforge.tess4j.Tesseract;
- import net.sourceforge.tess4j.TesseractException;
- import org.springframework.stereotype.Service;
- import org.springframework.web.multipart.MultipartFile;
-
- import javax.imageio.ImageIO;
- import java.awt.image.BufferedImage;
- import java.io.ByteArrayInputStream;
- import java.io.IOException;
- import java.io.InputStream;
-
- @Service
- public class OcrServiceImpl implements OcrService {
-
- private final Tesseract tesseract;
-
- public OcrServiceImpl(Tesseract tesseract) {
- this.tesseract = tesseract;
- }
-
- /**
- *
- * @param imageFile 要识别的图片
- * @return
- */
- @Override
- public String recognizeText(MultipartFile imageFile) throws IOException, TesseractException {
- // 转换
- InputStream sbs = new ByteArrayInputStream(imageFile.getBytes());
- BufferedImage bufferedImage = ImageIO.read(sbs);
-
- // 对图片进行文字识别
- return tesseract.doOCR(bufferedImage);
- }
- }
-
最后就是最重要的Controller了
- package com.tesseract.tesseract.Controller;
-
- import com.tesseract.tesseract.Service.OcrService;
- import lombok.extern.slf4j.Slf4j;
- import net.sourceforge.tess4j.TesseractException;
- import org.springframework.http.MediaType;
- import org.springframework.web.bind.annotation.*;
- import org.springframework.web.multipart.MultipartFile;
-
- import java.io.IOException;
-
- @RestController
- @RequestMapping(value = "/api", method = {RequestMethod.GET,RequestMethod.POST})
- @Slf4j
- public class OcrController {
- private final OcrService ocrService;
-
- public OcrController(OcrService ocrService) {
- this.ocrService = ocrService;
- }
-
- @PostMapping(value = "/recognize", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
- public String recognizeImage(@RequestParam(value = "file") MultipartFile file) throws TesseractException, IOException {
-
- log.info(ocrService.recognizeText(file));
-
- // 调用OcrService中的方法进行文字识别
- return ocrService.recognizeText(file);
- }
- }
-
最后到网页实现
- <!DOCTYPE html>
- <html lang="en" xmlns:th="http://www.thymeleaf.org">
- <head>
- <meta charset="UTF-8">
- <title>Title</title>
- </head>
- <body>
- <form action="/api/recognize" method="post" enctype="multipart/form-data">
- <input type="file" name="file">
- <input type="submit" value="上传">
- </form>
- </body>
- </html>
然后就是APP
- package com.tesseract.tesseract;
-
- import org.springframework.boot.SpringApplication;
- import org.springframework.boot.autoconfigure.SpringBootApplication;
-
- @SpringBootApplication
- public class TesseractApplication {
-
- public static void main(String[] args) {
- SpringApplication.run(TesseractApplication.class, args);
- }
-
- }
最后看到添加的依赖
- <?xml version="1.0" encoding="UTF-8"?>
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>com.tesseract</groupId>
- <artifactId>tesseract</artifactId>
- <version>0.0.1-SNAPSHOT</version>
- <name>Tesseract</name>
- <description>Demo project for Spring Boot</description>
- <properties>
- <java.version>1.8</java.version>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
- <spring-boot.version>2.6.13</spring-boot.version>
- </properties>
- <dependencies>
- <dependency>
- <groupId>org.springframework.boot</groupId>
- <artifactId>spring-boot-starter-thymeleaf</artifactId>
- </dependency>
- <dependency>
- <groupId>org.springframework.boot</groupId>
- <artifactId>spring-boot-starter-web</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.springframework.boot</groupId>
- <artifactId>spring-boot-starter-test</artifactId>
- <scope>test</scope>
- </dependency>
- <!-- https://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j -->
- <dependency>
- <groupId>net.sourceforge.tess4j</groupId>
- <artifactId>tess4j</artifactId>
- <version>5.3.0</version>
- </dependency>
- <dependency>
- <groupId>org.projectlombok</groupId>
- <artifactId>lombok</artifactId>
- </dependency>
- <dependency>
- <groupId>org.projectlombok</groupId>
- <artifactId>lombok</artifactId>
- <version>1.18.20</version>
- </dependency>
- </dependencies>
- <dependencyManagement>
- <dependencies>
- <dependency>
- <groupId>org.springframework.boot</groupId>
- <artifactId>spring-boot-dependencies</artifactId>
- <version>${spring-boot.version}</version>
- <type>pom</type>
- <scope>import</scope>
- </dependency>
- </dependencies>
- </dependencyManagement>
-
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>3.8.1</version>
- <configuration>
- <source>1.8</source>
- <target>1.8</target>
- <encoding>UTF-8</encoding>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.springframework.boot</groupId>
- <artifactId>spring-boot-maven-plugin</artifactId>
- <version>${spring-boot.version}</version>
- <configuration>
- <mainClass>com.tesseract.tesseract.TesseractApplication</mainClass>
- <skip>true</skip>
- </configuration>
- <executions>
- <execution>
- <id>repackage</id>
- <goals>
- <goal>repackage</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
-
- </project>
最最后就是application的配置了
# THYMELEAF (ThymeleafAutoConfiguration) # å¼å¯æ¨¡æ¿ç¼åï¼é»è®¤å¼ï¼ true ï¼ spring.thymeleaf.cache=true # æ£æ¥æ¨¡æ¿æ¯å¦åå¨ï¼ç¶åååç° spring.thymeleaf.check-template=true # æ£æ¥æ¨¡æ¿ä½ç½®æ¯å¦æ£ç¡®ï¼é»è®¤å¼ :true ï¼ spring.thymeleaf.check-template-location=true #Content-Type çå¼ï¼é»è®¤å¼ï¼ text/html ï¼ spring.thymeleaf.content-type=text/html # å¼å¯ MVC Thymeleaf è§å¾è§£æï¼é»è®¤å¼ï¼ true ï¼ spring.thymeleaf.enabled=true # 模æ¿ç¼ç spring.thymeleaf.encoding=UTF-8 # è¦è¢«æé¤å¨è§£æä¹å¤çè§å¾å称å表ï¼â½¤éå·åé spring.thymeleaf.excluded-view-names= # è¦è¿â½¤äºæ¨¡æ¿ä¹ä¸ç模æ¿æ¨¡å¼ãå¦â» StandardTemplate-ModeHandlers( é»è®¤å¼ï¼ HTML5) spring.thymeleaf.mode=HTML5 # å¨æ建 URL æ¶æ·»å å°è§å¾å称åçåç¼ï¼é»è®¤å¼ï¼ classpath:/templates/ ï¼ spring.thymeleaf.prefix=classpath:/templates/ # å¨æ建 URL æ¶æ·»å å°è§å¾å称åçåç¼ï¼é»è®¤å¼ï¼ .html ï¼ spring.thymeleaf.suffix=.html # åºç¨æå¡ WEB 访é®ç«¯å£ server.port=8080 # Tesseract OCRçæ°æ®è·¯å¾éç½®ï¼ç¡®ä¿æååæ¾ .traineddata æ件çç®å½ tesseract.dataPath=classpath:/tessdata/
然后不要忘记在添加环境变量
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。