当前位置:   article > 正文

保姆级——Java调用百度OCR实现身份证识别_通用ocr调用

通用ocr调用

具体实现功能和参数,可以看百度的API开发文档:https://ai.baidu.com/ai-doc/OCR/rk3h7xzck

其实我是看不懂API文档的

文章中的请求参数

调用百度的OCR需要做的准备工作

先注册一个百度智能云账号:然后在首页,选择创建应用 

创建完成之后在应用列表就可以找到你刚创建的应用,就可以获取到你需要用到的APPID、API_KEY 、SECRET_KEY 

 然后你可以在首页免费领取你需要用到的资源

 

接下来就是实现

有两种实现

1.通用OCR文字识别

这种OCR只能按照识别图片中的文字,且是按照行识别返回结果,精度较低

有点类似于word文档的导入

首先引入需要的依赖包

  1. <dependency>
  2. <groupId>com.baidu.aip</groupId>
  3. <artifactId>java-sdk</artifactId>
  4. <version>4.6.0</version>
  5. </dependency>

实现工具类

  1. import java.util.HashMap;
  2. import com.baidu.aip.ocr.AipOcr;
  3. import org.json.JSONObject;
  4. /**
  5. * @author sun 通用OCR文字识别
  6. * @date 2022-11-11 20:10
  7. * @Decsription: com.ocr.util
  8. * @version: 1.0
  9. */
  10. public class OcrApi {
  11. private static final String APP_ID = "";
  12. private static final String API_KEY = "";
  13. private static final String SECRET_KEY = "";
  14. private static AipOcr getAipClient() {
  15. return getAipClient(API_KEY, SECRET_KEY);
  16. }
  17. public static AipOcr getAipClient(String apiKey, String secretKey) {
  18. AipOcr client = new AipOcr(APP_ID, apiKey, secretKey);
  19. // 可选:设置网络连接参数
  20. client.setConnectionTimeoutInMillis(2000);
  21. client.setSocketTimeoutInMillis(60000);
  22. return client;
  23. }
  24. public static String result(AipOcr client) {
  25. // 传入可选参数调用接口
  26. HashMap<String, String> options = new HashMap<>();
  27. options.put("language_type", "CHN_ENG");
  28. options.put("detect_direction", "true");
  29. options.put("detect_language", "true");
  30. options.put("probability", "true");
  31. JSONObject res = client.basicGeneralUrl(
  32. "图片路径", options);
  33. return res.toString(2);
  34. }
  35. public static void main(String[] args) {
  36. System.out.println(result(getAipClient()));
  37. }
  38. }

2.高精度OCR识别身份证信息 

这种就比较高精度,且按照分类显示,返回数据更友好,高可用。

1、准备pom文件

  1. <dependency>
  2. <groupId>org.apache.httpcomponents</groupId>
  3. <artifactId>httpclient</artifactId>
  4. <version>4.5.5</version>
  5. </dependency>
  6. <dependency>
  7. <groupId>com.alibaba</groupId>
  8. <artifactId>fastjson</artifactId>
  9. <version>1.2.28</version>
  10. </dependency>

2.获取Access_token
由于Access_token会过期,为了更好的实现需求,我们每次都获取最新的Access_token,获取的方式也非常的简单。

  1. import java.io.BufferedReader;
  2. import java.io.InputStreamReader;
  3. import java.net.HttpURLConnection;
  4. import java.net.URL;
  5. import java.util.List;
  6. import java.util.Map;
  7. import org.json.JSONObject;
  8. public class AccessTokenUtils {
  9. private static String APIKEY = "";
  10. private static String SecretKEY = "";
  11. // 获取Token路径
  12. private static String PATH = "https://aip.baidubce.com/oauth/2.0/token?";
  13. public static String getAuth() {
  14. // 获取token地址
  15. String getAccessTokenUrl = PATH
  16. // 1. grant_type为固定参数
  17. + "grant_type=client_credentials"
  18. // 2. 官网获取的 API Key
  19. + "&client_id=" + APIKEY
  20. // 3. 官网获取的 Secret Key
  21. + "&client_secret=" + SecretKEY;
  22. try {
  23. URL realUrl = new URL(getAccessTokenUrl);
  24. // 打开和URL之间的连接
  25. HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
  26. connection.setRequestMethod("GET");
  27. connection.connect();
  28. // 获取所有响应头字段
  29. Map<String, List<String>> map = connection.getHeaderFields();
  30. // 定义 BufferedReader输入流来读取URL的响应
  31. BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
  32. String result = "";
  33. String line;
  34. while ((line = in.readLine()) != null) {
  35. result += line;
  36. }
  37. JSONObject jsonObject = new JSONObject(result);
  38. String access_token = jsonObject.getString("access_token");
  39. return access_token;
  40. } catch (Exception e) {
  41. System.err.printf("获取token失败!");
  42. e.printStackTrace(System.err);
  43. }
  44. return null;
  45. }
  46. }

3.本地上传需要将图片转为Base64码,Url图片可以直接传网络地址

该方法只能传本地图片,oss的图片路径无法使用

  1. import java.io.FileInputStream;
  2. import java.io.IOException;
  3. import java.io.InputStream;
  4. import sun.misc.BASE64Encoder;
  5. public class BaseImg64Utils {
  6. /**
  7. * 将一张本地图片转化成Base64字符串
  8. * @param imgPath 本地图片地址
  9. * @return 图片转化base64后再UrlEncode结果
  10. */
  11. public static String getImageStrFromPath(String imgPath) {
  12. InputStream in = null;
  13. byte[] data = null;
  14. // 读取图片字节数组
  15. try {
  16. in = new FileInputStream(imgPath);
  17. data = new byte[in.available()];
  18. in.read(data);
  19. in.close();
  20. } catch (IOException e) {
  21. e.printStackTrace();
  22. }
  23. // 对字节数组Base64编码
  24. BASE64Encoder encoder = new BASE64Encoder();
  25. // 返回Base64编码过的字节数组字符串
  26. return encoder.encode(data).replaceAll("\r\n", "").replaceAll("\\+", "%2B");
  27. }
  28. }

4.调用API接口的方法,获取识别结果

  1. import java.io.File;
  2. import java.io.IOException;
  3. import java.net.URI;
  4. import java.net.URISyntaxException;
  5. import org.apache.http.HttpResponse;
  6. import org.apache.http.client.HttpClient;
  7. import org.apache.http.client.methods.HttpPost;
  8. import org.apache.http.entity.StringEntity;
  9. import org.apache.http.impl.client.HttpClientBuilder;
  10. import org.apache.http.util.EntityUtils;
  11. public class XszOcrUtils {
  12. private static final String POST_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/idcard?access_token="+ AccessTokenUtils.getAuth();
  13. //必传参数:id_card_side=frontfront:身份证含照片的一面 -back:身份证带国徽的一面 自动检测身份证正反面,如果传参指定方向与图片相反,支持正常识别,返回参数image_status字段为"reversed_side"
  14. /**
  15. * 识别本地图片的文字
  16. *
  17. * @param path 本地图片地址
  18. * @return 识别结果,为json格式
  19. * @throws URISyntaxException URI打开异常
  20. * @throws IOException io流异常
  21. */
  22. public static String checkFile(String path) throws URISyntaxException, IOException {
  23. File file = new File(path);
  24. if (!file.exists()) {
  25. throw new NullPointerException("图片不存在");
  26. }
  27. String image = BaseImg64Utils.getImageStrFromPath(path);
  28. String param = "image=" + image + "&id_card_side=front";
  29. return post(param);
  30. }
  31. /**
  32. * @param url 图片url
  33. * @return 识别结果,为json格式
  34. */
  35. public static String checkUrl(String url) throws IOException, URISyntaxException {
  36. String param = "url=" + url;
  37. return post(param);
  38. }
  39. /**
  40. * 通过传递参数:url和image进行文字识别
  41. *
  42. * @param param 区分是url还是image识别
  43. * @return 识别结果
  44. * @throws URISyntaxException URI打开异常
  45. * @throws IOException IO流异常
  46. */
  47. private static String post(String param) throws URISyntaxException, IOException {
  48. // 开始搭建post请求
  49. HttpClient httpClient = HttpClientBuilder.create().build();
  50. HttpPost post = new HttpPost();
  51. URI url = new URI(POST_URL);
  52. post.setURI(url);
  53. // 设置请求头,请求头必须为application/x-www-form-urlencoded,因为是传递一个很长的字符串,不能分段发送
  54. post.setHeader("Content-Type", "application/x-www-form-urlencoded");
  55. StringEntity entity = new StringEntity(param);
  56. post.setEntity(entity);
  57. HttpResponse response = httpClient.execute(post);
  58. if (response.getStatusLine().getStatusCode() == 200) {
  59. String str;
  60. try {
  61. /* 读取服务器返回过来的json字符串数据 */
  62. str = EntityUtils.toString(response.getEntity());
  63. return str;
  64. } catch (Exception e) {
  65. e.printStackTrace();
  66. return null;
  67. }
  68. }
  69. return null;
  70. }
  71. public static void main(String[] args) throws URISyntaxException, IOException {
  72. String checkFile = checkFile("E:\\tmp_40d75be6049049f841cbbee213743430d699596c8c08ae47ac76a60f36de6189.jpeg");
  73. System.out.println("========" + checkFile);
  74. }

5.识别结果(正面)

  1. {
  2. "log_id": 2648325511,
  3. "direction": 0,
  4. "image_status": "normal",
  5. "photo": "/9j/4AAQSkZJRgABA......",
  6. "photo_location": {
  7. "width": 1189,
  8. "top": 638,
  9. "left": 2248,
  10. "height": 1483
  11. },
  12. "card_image": "/9j/4AAQSkZJRgABA......",
  13. "card_location": {
  14. "top": 328,
  15. "left": 275,
  16. "width": 1329,
  17. "height": 571
  18. },
  19. "words_result": {
  20. "住址": {
  21. "location": {
  22. "left": 267,
  23. "top": 453,
  24. "width": 459,
  25. "height": 99
  26. },
  27. "words": "南京市江宁区弘景大道3889号"
  28. },
  29. "公民身份号码": {
  30. "location": {
  31. "left": 443,
  32. "top": 681,
  33. "width": 589,
  34. "height": 45
  35. },
  36. "words": "330881199904173914"
  37. },
  38. "出生": {
  39. "location": {
  40. "left": 270,
  41. "top": 355,
  42. "width": 357,
  43. "height": 45
  44. },
  45. "words": "19990417"
  46. },
  47. "姓名": {
  48. "location": {
  49. "left": 267,
  50. "top": 176,
  51. "width": 152,
  52. "height": 50
  53. },
  54. "words": "伍云龙"
  55. },
  56. "性别": {
  57. "location": {
  58. "left": 269,
  59. "top": 262,
  60. "width": 33,
  61. "height": 52
  62. },
  63. "words": "男"
  64. },
  65. "民族": {
  66. "location": {
  67. "left": 492,
  68. "top": 279,
  69. "width": 30,
  70. "height": 37
  71. },
  72. "words": "汉"
  73. }
  74. },
  75. "words_result_num": 6
  76. }

反面

  1. {
  2. "words_result": {
  3. "失效日期": {
  4. "words": "20390711",
  5. "location": {
  6. "top": 445,
  7. "left": 523,
  8. "width": 153,
  9. "height": 38
  10. }
  11. },
  12. "签发机关": {
  13. "words": "陆丰市公安局",
  14. "location": {
  15. "top": 377,
  16. "left": 339,
  17. "width": 195,
  18. "height": 38
  19. }
  20. },
  21. "签发日期": {
  22. "words": "20190606",
  23. "location": {
  24. "top": 445,
  25. "left": 343,
  26. "width": 152,
  27. "height": 38
  28. }
  29. }
  30. },
  31. "log_id": "1559208562721579328",
  32. "words_result_num": 3,
  33. "error_code": 0,
  34. "image_status": "normal"
  35. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小小林熬夜学编程/article/detail/231316?site
推荐阅读
相关标签
  

闽ICP备14008679号