当前位置:   article > 正文

paddle ocr v4 2.6.1实战笔记_paddle:2.6.1-jupyter

paddle:2.6.1-jupyter

目录

效果图:

安装

模型权重是自动下载,如果提前下载会报错。

识别orc,并opencv可视化结果,支持中文可视化

官方原版预测可视化:


效果图:

安装

安装2.5.2识别结果为空

pip install paddlepaddle-gpu==2.6.1

模型权重是自动下载,如果提前下载会报错。

测试代码:

  1. import os
  2. import time
  3. from paddleocr import PaddleOCR
  4. filepath = r"weights/123.jpg"
  5. ocr_model = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=True, show_log=1,
  6. det_db_box_thresh=0.1, use_dilation=True,
  7. det_model_dir='weight/ch_PP-OCRv4_det_server_infer.tar',
  8. cls_model_dir='weight/ch_ppocr_mobile_v2.0_cls_infer.tar',
  9. rec_model_dir='weight/ch_PP-OCRv4_rec_server_infer.tar')
  10. t1 = time.time()
  11. for i in range(1):
  12. result = ocr_model.ocr(img=filepath, det=True, rec=True, cls=True)[0]
  13. t2 = time.time()
  14. print((t2-t1) / 10)
  15. for res_str in result:
  16. print(res_str)

识别orc,并opencv可视化结果,支持中文可视化

  1. import codecs
  2. import os
  3. import time
  4. import cv2
  5. import numpy as np
  6. from PIL import ImageFont
  7. from PIL import Image
  8. from PIL import ImageDraw
  9. from paddleocr import PaddleOCR
  10. filepath = r"weights/124.jpg"
  11. ocr_model = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=True, show_log=1,
  12. det_db_box_thresh=0.1, use_dilation=True,
  13. det_model_dir='weight/ch_PP-OCRv4_det_server_infer.tar',
  14. cls_model_dir='weight/ch_ppocr_mobile_v2.0_cls_infer.tar',
  15. rec_model_dir='weight/ch_PP-OCRv4_rec_server_infer.tar')
  16. t1 = time.time()
  17. for i in range(1):
  18. result = ocr_model.ocr(img=filepath, det=True, rec=True, cls=True)[0]
  19. t2 = time.time()
  20. print((t2-t1) / 10)
  21. font_path = 'simhei.ttf' # 需要替换为你的中文字体路径
  22. font = ImageFont.truetype(font_path, 24)
  23. def cv2AddChineseText(img, text, position, textColor=(0, 255, 0), textSize=30):
  24. img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
  25. draw = ImageDraw.Draw(img)
  26. draw.text(position, text, textColor, font=font)
  27. return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
  28. image=cv2.imread(filepath)
  29. ocr_index=0
  30. for res_str in result:
  31. if res_str[0][0][0]>36 and res_str[0][2][0]<84:
  32. print(ocr_index,res_str)
  33. points=res_str[0]
  34. text = res_str[1][0]
  35. points = np.array(points, dtype=np.int32).reshape((-1, 1, 2))
  36. cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
  37. text_position = (int(points[0][0][0]), int(points[0][0][1] + 20)) # 微调文本位置
  38. # cv2.putText(image, '中文文本', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
  39. image= cv2AddChineseText(image, text, text_position, textColor=(0, 255, 0), textSize=30)
  40. print(ocr_index)
  41. if res_str[0][0][0]>346 and res_str[0][2][0]<391:
  42. print(ocr_index,res_str)
  43. points=res_str[0]
  44. text = res_str[1][0]
  45. points = np.array(points, dtype=np.int32).reshape((-1, 1, 2))
  46. cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
  47. text_position = (int(points[0][0][0]), int(points[0][0][1] + 20)) # 微调文本位置
  48. # cv2.putText(image, '中文文本', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
  49. image= cv2AddChineseText(image, text, text_position, textColor=(0, 255, 0), textSize=30)
  50. if res_str[0][0][0]>658 and res_str[0][2][0]<705:
  51. print(ocr_index,res_str)
  52. points=res_str[0]
  53. text=res_str[1][0]
  54. points=np.array(points,dtype=np.int32).reshape((-1, 1, 2))
  55. cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
  56. text_position = (int(points[0][0][0]), int(points[0][0][1] + 20)) # 微调文本位置
  57. image= cv2AddChineseText(image, text, text_position, textColor=(0, 255, 0), textSize=30)
  58. cv2.imshow('Image with Rectangle and Text', image)
  59. cv2.waitKey(0)

官方原版预测可视化

  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import sys
  16. import importlib
  17. __dir__ = os.path.dirname(__file__)
  18. import paddle
  19. from paddle.utils import try_import
  20. sys.path.append(os.path.join(__dir__, ""))
  21. import cv2
  22. import logging
  23. import numpy as np
  24. from pathlib import Path
  25. import base64
  26. from io import BytesIO
  27. from PIL import Image, ImageFont, ImageDraw
  28. from tools.infer import predict_system
  29. def _import_file(module_name, file_path, make_importable=False):
  30. spec = importlib.util.spec_from_file_location(module_name, file_path)
  31. module = importlib.util.module_from_spec(spec)
  32. spec.loader.exec_module(module)
  33. if make_importable:
  34. sys.modules[module_name] = module
  35. return module
  36. tools = _import_file("tools", os.path.join(__dir__, "tools/__init__.py"), make_importable=True)
  37. ppocr = importlib.import_module("ppocr", "paddleocr")
  38. ppstructure = importlib.import_module("ppstructure", "paddleocr")
  39. from ppocr.utils.logging import get_logger
  40. logger = get_logger()
  41. from ppocr.utils.utility import (check_and_read, get_image_file_list, alpha_to_color, binarize_img, )
  42. from ppocr.utils.network import (maybe_download, download_with_progressbar, is_link, confirm_model_dir_url, )
  43. from tools.infer.utility import draw_ocr, str2bool, check_gpu
  44. from ppstructure.utility import init_args, draw_structure_result
  45. from ppstructure.predict_system import StructureSystem, save_structure_res, to_excel
  46. logger = get_logger()
  47. __all__ = ["PaddleOCR", "PPStructure", "draw_ocr", "draw_structure_result", "save_structure_res", "download_with_progressbar", "to_excel", ]
  48. SUPPORT_DET_MODEL = ["DB"]
  49. VERSION = "2.8.0"
  50. SUPPORT_REC_MODEL = ["CRNN", "SVTR_LCNet"]
  51. BASE_DIR = os.path.expanduser("~/.paddleocr/")
  52. DEFAULT_OCR_MODEL_VERSION = "PP-OCRv4"
  53. SUPPORT_OCR_MODEL_VERSION = ["PP-OCR", "PP-OCRv2", "PP-OCRv3", "PP-OCRv4"]
  54. DEFAULT_STRUCTURE_MODEL_VERSION = "PP-StructureV2"
  55. SUPPORT_STRUCTURE_MODEL_VERSION = ["PP-Structure", "PP-StructureV2"]
  56. MODEL_URLS = {"OCR": {"PP-OCRv4": {"det": {"ch": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar", }, "en": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar", },
  57. "ml": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar"}, },
  58. "rec": {"ch": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/ppocr_keys_v1.txt", }, "en": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/english/en_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/en_dict.txt", },
  59. "korean": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/korean_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/dict/korean_dict.txt", },
  60. "japan": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/japan_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/dict/japan_dict.txt", },
  61. "chinese_cht": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/chinese_cht_dict.txt", },
  62. "ta": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/ta_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/dict/ta_dict.txt", },
  63. "te": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/te_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/dict/te_dict.txt", },
  64. "ka": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/ka_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/dict/ka_dict.txt", },
  65. "latin": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/latin_dict.txt", },
  66. "arabic": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/arabic_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/dict/arabic_dict.txt", },
  67. "cyrillic": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/cyrillic_dict.txt", },
  68. "devanagari": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/devanagari_PP-OCRv4_rec_infer.tar", "dict_path": "./ppocr/utils/dict/devanagari_dict.txt", }, }, "cls": {"ch": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar", }}, },
  69. "PP-OCRv3": {"det": {"ch": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar", }, "en": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar", },
  70. "ml": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar"}, },
  71. "rec": {"ch": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/ppocr_keys_v1.txt", }, "en": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/en_dict.txt", },
  72. "korean": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/korean_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/korean_dict.txt", },
  73. "japan": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/japan_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/japan_dict.txt", },
  74. "chinese_cht": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/chinese_cht_dict.txt", },
  75. "ta": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/ta_dict.txt", },
  76. "te": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/te_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/te_dict.txt", },
  77. "ka": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/ka_dict.txt", },
  78. "latin": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/latin_dict.txt", },
  79. "arabic": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/arabic_dict.txt", },
  80. "cyrillic": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/cyrillic_dict.txt", },
  81. "devanagari": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tar", "dict_path": "./ppocr/utils/dict/devanagari_dict.txt", }, }, "cls": {"ch": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar", }}, },
  82. "PP-OCRv2": {"det": {"ch": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar", }, }, "rec": {"ch": {"url": "https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar", "dict_path": "./ppocr/utils/ppocr_keys_v1.txt", }},
  83. "cls": {"ch": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar", }}, }, "PP-OCR": {
  84. "det": {"ch": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar", }, "en": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar", },
  85. "structure": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar"}, }, "rec": {"ch": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/ppocr_keys_v1.txt", },
  86. "en": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/en_dict.txt", },
  87. "french": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/french_dict.txt", },
  88. "german": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/german_dict.txt", },
  89. "korean": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/korean_dict.txt", },
  90. "japan": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/japan_dict.txt", },
  91. "chinese_cht": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/chinese_cht_dict.txt", },
  92. "ta": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/ta_dict.txt", },
  93. "te": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/te_dict.txt", },
  94. "ka": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/ka_dict.txt", },
  95. "latin": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/latin_dict.txt", },
  96. "arabic": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/arabic_dict.txt", },
  97. "cyrillic": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/cyrillic_dict.txt", },
  98. "devanagari": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar", "dict_path": "./ppocr/utils/dict/devanagari_dict.txt", },
  99. "structure": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar", "dict_path": "ppocr/utils/dict/table_dict.txt", }, }, "cls": {"ch": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar", }}, }, },
  100. "STRUCTURE": {"PP-Structure": {"table": {"en": {"url": "https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar", "dict_path": "ppocr/utils/dict/table_structure_dict.txt", }}}, "PP-StructureV2": {
  101. "table": {"en": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar", "dict_path": "ppocr/utils/dict/table_structure_dict.txt", },
  102. "ch": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar", "dict_path": "ppocr/utils/dict/table_structure_dict_ch.txt", }, },
  103. "layout": {"en": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar", "dict_path": "ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt", },
  104. "ch": {"url": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar", "dict_path": "ppocr/utils/dict/layout_dict/layout_cdla_dict.txt", }, }, }, }, }
  105. def parse_args(mMain=True):
  106. import argparse
  107. parser = init_args()
  108. parser.add_help = mMain
  109. parser.add_argument("--lang", type=str, default="ch")
  110. parser.add_argument("--det", type=str2bool, default=True)
  111. parser.add_argument("--rec", type=str2bool, default=True)
  112. parser.add_argument("--type", type=str, default="ocr")
  113. parser.add_argument("--savefile", type=str2bool, default=False)
  114. parser.add_argument("--ocr_version", type=str, choices=SUPPORT_OCR_MODEL_VERSION, default="PP-OCRv4", help="OCR Model version, the current model support list is as follows: "
  115. "1. PP-OCRv4/v3 Support Chinese and English detection and recognition model, and direction classifier model"
  116. "2. PP-OCRv2 Support Chinese detection and recognition model. "
  117. "3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.", )
  118. parser.add_argument("--structure_version", type=str, choices=SUPPORT_STRUCTURE_MODEL_VERSION, default="PP-StructureV2", help="Model version, the current model support list is as follows:"
  119. " 1. PP-Structure Support en table structure model."
  120. " 2. PP-StructureV2 Support ch and en table structure model.", )
  121. for action in parser._actions:
  122. if action.dest in ["rec_char_dict_path", "table_char_dict_path", "layout_dict_path", ]:
  123. action.default = None
  124. if mMain:
  125. return parser.parse_args()
  126. else:
  127. inference_args_dict = {}
  128. for action in parser._actions:
  129. inference_args_dict[action.dest] = action.default
  130. return argparse.Namespace(**inference_args_dict)
  131. def parse_lang(lang):
  132. latin_lang = ["af", "az", "bs", "cs", "cy", "da", "de", "es", "et", "fr", "ga", "hr", "hu", "id", "is", "it", "ku", "la", "lt", "lv", "mi", "ms", "mt", "nl", "no", "oc", "pi", "pl", "pt", "ro", "rs_latin", "sk", "sl", "sq", "sv", "sw", "tl", "tr", "uz", "vi", "french", "german", ]
  133. arabic_lang = ["ar", "fa", "ug", "ur"]
  134. cyrillic_lang = ["ru", "rs_cyrillic", "be", "bg", "uk", "mn", "abq", "ady", "kbd", "ava", "dar", "inh", "che", "lbe", "lez", "tab", ]
  135. devanagari_lang = ["hi", "mr", "ne", "bh", "mai", "ang", "bho", "mah", "sck", "new", "gom", "sa", "bgc", ]
  136. if lang in latin_lang:
  137. lang = "latin"
  138. elif lang in arabic_lang:
  139. lang = "arabic"
  140. elif lang in cyrillic_lang:
  141. lang = "cyrillic"
  142. elif lang in devanagari_lang:
  143. lang = "devanagari"
  144. assert (lang in MODEL_URLS["OCR"][DEFAULT_OCR_MODEL_VERSION]["rec"]), "param lang must in {}, but got {}".format(MODEL_URLS["OCR"][DEFAULT_OCR_MODEL_VERSION]["rec"].keys(), lang)
  145. if lang == "ch":
  146. det_lang = "ch"
  147. elif lang == "structure":
  148. det_lang = "structure"
  149. elif lang in ["en", "latin"]:
  150. det_lang = "en"
  151. else:
  152. det_lang = "ml"
  153. return lang, det_lang
  154. def get_model_config(type, version, model_type, lang):
  155. if type == "OCR":
  156. DEFAULT_MODEL_VERSION = DEFAULT_OCR_MODEL_VERSION
  157. elif type == "STRUCTURE":
  158. DEFAULT_MODEL_VERSION = DEFAULT_STRUCTURE_MODEL_VERSION
  159. else:
  160. raise NotImplementedError
  161. model_urls = MODEL_URLS[type]
  162. if version not in model_urls:
  163. version = DEFAULT_MODEL_VERSION
  164. if model_type not in model_urls[version]:
  165. if model_type in model_urls[DEFAULT_MODEL_VERSION]:
  166. version = DEFAULT_MODEL_VERSION
  167. else:
  168. logger.error("{} models is not support, we only support {}".format(model_type, model_urls[DEFAULT_MODEL_VERSION].keys()))
  169. sys.exit(-1)
  170. if lang not in model_urls[version][model_type]:
  171. if lang in model_urls[DEFAULT_MODEL_VERSION][model_type]:
  172. version = DEFAULT_MODEL_VERSION
  173. else:
  174. logger.error("lang {} is not support, we only support {} for {} models".format(lang, model_urls[DEFAULT_MODEL_VERSION][model_type].keys(), model_type, ))
  175. sys.exit(-1)
  176. return model_urls[version][model_type][lang]
  177. def img_decode(content: bytes):
  178. np_arr = np.frombuffer(content, dtype=np.uint8)
  179. return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
  180. def check_img(img, alpha_color=(255, 255, 255)):
  181. """
  182. Check the image data. If it is another type of image file, try to decode it into a numpy array.
  183. The inference network requires three-channel images, So the following channel conversions are done
  184. single channel image: Gray to RGB R←Y,G←Y,B←Y
  185. four channel image: alpha_to_color
  186. args:
  187. img: image data
  188. file format: jpg, png and other image formats that opencv can decode, as well as gif and pdf formats
  189. storage type: binary image, net image file, local image file
  190. alpha_color: Background color in images in RGBA format
  191. return: numpy.array (h, w, 3) or list (p, h, w, 3) (p: page of pdf), boolean, boolean
  192. """
  193. flag_gif, flag_pdf = False, False
  194. if isinstance(img, bytes):
  195. img = img_decode(img)
  196. if isinstance(img, str):
  197. # download net image
  198. if is_link(img):
  199. download_with_progressbar(img, "tmp.jpg")
  200. img = "tmp.jpg"
  201. image_file = img
  202. img, flag_gif, flag_pdf = check_and_read(image_file)
  203. if not flag_gif and not flag_pdf:
  204. with open(image_file, "rb") as f:
  205. img_str = f.read()
  206. img = img_decode(img_str)
  207. if img is None:
  208. try:
  209. buf = BytesIO()
  210. image = BytesIO(img_str)
  211. im = Image.open(image)
  212. rgb = im.convert("RGB")
  213. rgb.save(buf, "jpeg")
  214. buf.seek(0)
  215. image_bytes = buf.read()
  216. data_base64 = str(base64.b64encode(image_bytes), encoding="utf-8")
  217. image_decode = base64.b64decode(data_base64)
  218. img_array = np.frombuffer(image_decode, np.uint8)
  219. img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
  220. except:
  221. logger.error("error in loading image:{}".format(image_file))
  222. return None, flag_gif, flag_pdf
  223. if img is None:
  224. logger.error("error in loading image:{}".format(image_file))
  225. return None, flag_gif, flag_pdf
  226. # single channel image array.shape:h,w
  227. if isinstance(img, np.ndarray) and len(img.shape) == 2:
  228. img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  229. # four channel image array.shape:h,w,c
  230. if isinstance(img, np.ndarray) and len(img.shape) == 3 and img.shape[2] == 4:
  231. img = alpha_to_color(img, alpha_color)
  232. return img, flag_gif, flag_pdf
  233. class PaddleOCR(predict_system.TextSystem):
  234. def __init__(self, **kwargs):
  235. """
  236. paddleocr package
  237. args:
  238. **kwargs: other params show in paddleocr --help
  239. """
  240. params = parse_args(mMain=False)
  241. params.__dict__.update(**kwargs)
  242. assert (params.ocr_version in SUPPORT_OCR_MODEL_VERSION), "ocr_version must in {}, but get {}".format(SUPPORT_OCR_MODEL_VERSION, params.ocr_version)
  243. params.use_gpu = check_gpu(params.use_gpu)
  244. if not params.show_log:
  245. logger.setLevel(logging.INFO)
  246. self.use_angle_cls = params.use_angle_cls
  247. lang, det_lang = parse_lang(params.lang)
  248. # init model dir
  249. det_model_config = get_model_config("OCR", params.ocr_version, "det", det_lang)
  250. params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir, os.path.join(BASE_DIR, "whl", "det", det_lang), det_model_config["url"], )
  251. rec_model_config = get_model_config("OCR", params.ocr_version, "rec", lang)
  252. params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir, os.path.join(BASE_DIR, "whl", "rec", lang), rec_model_config["url"], )
  253. cls_model_config = get_model_config("OCR", params.ocr_version, "cls", "ch")
  254. params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir, os.path.join(BASE_DIR, "whl", "cls"), cls_model_config["url"], )
  255. if params.ocr_version in ["PP-OCRv3", "PP-OCRv4"]:
  256. params.rec_image_shape = "3, 48, 320"
  257. else:
  258. params.rec_image_shape = "3, 32, 320"
  259. # download model if using paddle infer
  260. if not params.use_onnx:
  261. maybe_download(params.det_model_dir, det_url)
  262. maybe_download(params.rec_model_dir, rec_url)
  263. maybe_download(params.cls_model_dir, cls_url)
  264. if params.det_algorithm not in SUPPORT_DET_MODEL:
  265. logger.error("det_algorithm must in {}".format(SUPPORT_DET_MODEL))
  266. sys.exit(0)
  267. if params.rec_algorithm not in SUPPORT_REC_MODEL:
  268. logger.error("rec_algorithm must in {}".format(SUPPORT_REC_MODEL))
  269. sys.exit(0)
  270. if params.rec_char_dict_path is None:
  271. params.rec_char_dict_path = str(Path(__file__).parent / rec_model_config["dict_path"])
  272. logger.debug(params)
  273. # init det_model and rec_model
  274. super().__init__(params)
  275. self.page_num = params.page_num
  276. def ocr(self, img, det=True, rec=True, cls=True, bin=False, inv=False, alpha_color=(255, 255, 255), ):
  277. """
  278. OCR with PaddleOCR
  279. args:
  280. img: img for OCR, support ndarray, img_path and list or ndarray
  281. det: use text detection or not. If False, only rec will be exec. Default is True
  282. rec: use text recognition or not. If False, only det will be exec. Default is True
  283. cls: use angle classifier or not. Default is True. If True, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
  284. bin: binarize image to black and white. Default is False.
  285. inv: invert image colors. Default is False.
  286. alpha_color: set RGB color Tuple for transparent parts replacement. Default is pure white.
  287. """
  288. assert isinstance(img, (np.ndarray, list, str, bytes))
  289. if isinstance(img, list) and det == True:
  290. logger.error("When input a list of images, det must be false")
  291. exit(0)
  292. if cls == True and self.use_angle_cls == False:
  293. logger.warning("Since the angle classifier is not initialized, it will not be used during the forward process")
  294. img, flag_gif, flag_pdf = check_img(img, alpha_color)
  295. # for infer pdf file
  296. if isinstance(img, list) and flag_pdf:
  297. if self.page_num > len(img) or self.page_num == 0:
  298. imgs = img
  299. else:
  300. imgs = img[: self.page_num]
  301. else:
  302. imgs = [img]
  303. def preprocess_image(_image):
  304. _image = alpha_to_color(_image, alpha_color)
  305. if inv:
  306. _image = cv2.bitwise_not(_image)
  307. if bin:
  308. _image = binarize_img(_image)
  309. return _image
  310. if det and rec:
  311. ocr_res = []
  312. for idx, img in enumerate(imgs):
  313. img = preprocess_image(img)
  314. dt_boxes, rec_res, _ = self.__call__(img, cls)
  315. if not dt_boxes and not rec_res:
  316. ocr_res.append(None)
  317. continue
  318. tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
  319. ocr_res.append(tmp_res)
  320. return ocr_res
  321. elif det and not rec:
  322. ocr_res = []
  323. for idx, img in enumerate(imgs):
  324. img = preprocess_image(img)
  325. dt_boxes, elapse = self.text_detector(img)
  326. if dt_boxes.size == 0:
  327. ocr_res.append(None)
  328. continue
  329. tmp_res = [box.tolist() for box in dt_boxes]
  330. ocr_res.append(tmp_res)
  331. return ocr_res
  332. else:
  333. ocr_res = []
  334. cls_res = []
  335. for idx, img in enumerate(imgs):
  336. if not isinstance(img, list):
  337. img = preprocess_image(img)
  338. img = [img]
  339. if self.use_angle_cls and cls:
  340. img, cls_res_tmp, elapse = self.text_classifier(img)
  341. if not rec:
  342. cls_res.append(cls_res_tmp)
  343. rec_res, elapse = self.text_recognizer(img)
  344. ocr_res.append(rec_res)
  345. if not rec:
  346. return cls_res
  347. return ocr_res
  348. class PPStructure(StructureSystem):
  349. def __init__(self, **kwargs):
  350. params = parse_args(mMain=False)
  351. params.__dict__.update(**kwargs)
  352. assert (params.structure_version in SUPPORT_STRUCTURE_MODEL_VERSION), "structure_version must in {}, but get {}".format(SUPPORT_STRUCTURE_MODEL_VERSION, params.structure_version)
  353. params.use_gpu = check_gpu(params.use_gpu)
  354. params.mode = "structure"
  355. if not params.show_log:
  356. logger.setLevel(logging.INFO)
  357. lang, det_lang = parse_lang(params.lang)
  358. if lang == "ch":
  359. table_lang = "ch"
  360. else:
  361. table_lang = "en"
  362. if params.structure_version == "PP-Structure":
  363. params.merge_no_span_structure = False
  364. # init model dir
  365. det_model_config = get_model_config("OCR", params.ocr_version, "det", det_lang)
  366. params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir, os.path.join(BASE_DIR, "whl", "det", det_lang), det_model_config["url"], )
  367. rec_model_config = get_model_config("OCR", params.ocr_version, "rec", lang)
  368. params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir, os.path.join(BASE_DIR, "whl", "rec", lang), rec_model_config["url"], )
  369. table_model_config = get_model_config("STRUCTURE", params.structure_version, "table", table_lang)
  370. params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir, os.path.join(BASE_DIR, "whl", "table"), table_model_config["url"], )
  371. layout_model_config = get_model_config("STRUCTURE", params.structure_version, "layout", lang)
  372. params.layout_model_dir, layout_url = confirm_model_dir_url(params.layout_model_dir, os.path.join(BASE_DIR, "whl", "layout"), layout_model_config["url"], )
  373. # download model
  374. if not params.use_onnx:
  375. maybe_download(params.det_model_dir, det_url)
  376. maybe_download(params.rec_model_dir, rec_url)
  377. maybe_download(params.table_model_dir, table_url)
  378. maybe_download(params.layout_model_dir, layout_url)
  379. if params.rec_char_dict_path is None:
  380. params.rec_char_dict_path = str(Path(__file__).parent / rec_model_config["dict_path"])
  381. if params.table_char_dict_path is None:
  382. params.table_char_dict_path = str(Path(__file__).parent / table_model_config["dict_path"])
  383. if params.layout_dict_path is None:
  384. params.layout_dict_path = str(Path(__file__).parent / layout_model_config["dict_path"])
  385. logger.debug(params)
  386. super().__init__(params)
  387. def __call__(self, img, return_ocr_result_in_table=False, img_idx=0, alpha_color=(255, 255, 255), ):
  388. img, flag_gif, flag_pdf = check_img(img, alpha_color)
  389. if isinstance(img, list) and flag_pdf:
  390. res_list = []
  391. for index, pdf_img in enumerate(img):
  392. logger.info("processing {}/{} page:".format(index + 1, len(img)))
  393. res, _ = super().__call__(pdf_img, return_ocr_result_in_table, img_idx=index)
  394. res_list.append(res)
  395. return res_list
  396. res, _ = super().__call__(img, return_ocr_result_in_table, img_idx=img_idx)
  397. return res
  398. def cv2AddChineseText(img, text, position, textColor=(0, 255, 0), textSize=30):
  399. img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
  400. draw = ImageDraw.Draw(img)
  401. draw.text(position, text, textColor, font=font)
  402. return cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
  403. if __name__ == '__main__':
  404. font_path = 'simhei.ttf' # 需要替换为你的中文字体路径
  405. font = ImageFont.truetype(font_path, 24)
  406. # for cmd
  407. args = parse_args(mMain=True)
  408. image_dir = args.image_dir
  409. image_file_list=['weights/123.jpg']
  410. if args.type == "ocr":
  411. engine = PaddleOCR(**(args.__dict__))
  412. elif args.type == "structure":
  413. engine = PPStructure(**(args.__dict__))
  414. else:
  415. raise NotImplementedError
  416. for img_path in image_file_list:
  417. img_name = os.path.basename(img_path).split(".")[0]
  418. logger.info("{}{}{}".format("*" * 10, img_path, "*" * 10))
  419. if args.type == "ocr":
  420. image=cv2.imread(img_path)
  421. result = engine.ocr(img_path, det=args.det, rec=args.rec, cls=args.use_angle_cls, bin=args.binarize, inv=args.invert, alpha_color=args.alphacolor, )
  422. if result is not None:
  423. lines = []
  424. for idx in range(len(result)):
  425. res = result[idx]
  426. for line in res:
  427. points = line[0]
  428. text = line[1][0]
  429. points = np.array(points, dtype=np.int32).reshape((-1, 1, 2))
  430. cv2.polylines(image, [points], isClosed=True, color=(255, 0, 0), thickness=2)
  431. text_position = (int(points[0][0][0]), int(points[0][0][1] + 20)) # 微调文本位置
  432. # cv2.putText(image, '中文文本', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
  433. image = cv2AddChineseText(image, text, text_position, textColor=(0, 255, 0), textSize=30)
  434. logger.info(line)
  435. val = "["
  436. for box in line[0]:
  437. val += str(box[0]) + "," + str(box[1]) + ","
  438. val = val[:-1]
  439. val += "]," + line[1][0] + "," + str(line[1][1]) + "\n"
  440. lines.append(val)
  441. if args.savefile:
  442. if os.path.exists(args.output) is False:
  443. os.mkdir(args.output)
  444. outfile = args.output + "/" + img_name + ".txt"
  445. with open(outfile, "w", encoding="utf-8") as f:
  446. f.writelines(lines)
  447. elif args.type == "structure":
  448. img, flag_gif, flag_pdf = check_and_read(img_path)
  449. if not flag_gif and not flag_pdf:
  450. img = cv2.imread(img_path)
  451. if not flag_pdf:
  452. if img is None:
  453. logger.error("error in loading image:{}".format(img_path))
  454. continue
  455. img_paths = [[img_path, img]]
  456. else:
  457. img_paths = []
  458. for index, pdf_img in enumerate(img):
  459. os.makedirs(os.path.join(args.output, img_name), exist_ok=True)
  460. pdf_img_path = os.path.join(args.output, img_name, img_name + "_" + str(index) + ".jpg")
  461. cv2.imwrite(pdf_img_path, pdf_img)
  462. img_paths.append([pdf_img_path, pdf_img])
  463. all_res = []
  464. for index, (new_img_path, img) in enumerate(img_paths):
  465. logger.info("processing {}/{} page:".format(index + 1, len(img_paths)))
  466. new_img_name = os.path.basename(new_img_path).split(".")[0]
  467. result = engine(img, img_idx=index)
  468. save_structure_res(result, args.output, img_name, index)
  469. if args.recovery and result != []:
  470. from copy import deepcopy
  471. from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes
  472. h, w, _ = img.shape
  473. result_cp = deepcopy(result)
  474. result_sorted = sorted_layout_boxes(result_cp, w)
  475. all_res += result_sorted
  476. if args.recovery and all_res != []:
  477. try:
  478. from ppstructure.recovery.recovery_to_doc import convert_info_docx
  479. convert_info_docx(img, all_res, args.output, img_name)
  480. except Exception as ex:
  481. logger.error("error in layout recovery image:{}, err msg: {}".format(img_name, ex))
  482. continue
  483. for item in all_res:
  484. item.pop("img")
  485. item.pop("res")
  486. logger.info(item)
  487. logger.info("result save to {}".format(args.output))
  488. cv2.imshow('image', image)
  489. cv2.waitKey(0)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/593603
推荐阅读
相关标签
  

闽ICP备14008679号