当前位置:   article > 正文

使用Python+OpenCV2进行图片中的文字分割(支持竖版)

使用Python+OpenCV2进行图片中的文字分割(支持竖版)

扣字和分割

把图片中的文字,识别出来,并将每个字的图片抠出来;

  1. import cv2
  2. import numpy as np
  3. HIOG = 50
  4. VIOG = 3
  5. Position = []
  6. '''水平投影'''
  7. def getHProjection(image):
  8. hProjection = np.zeros(image.shape,np.uint8)
  9. # 获取图像大小
  10. (h,w)=image.shape
  11. # 统计像素个数
  12. h_ = [0]*h
  13. for y in range(h):
  14. for x in range(w):
  15. if image[y,x] == 255:
  16. h_[y]+=1
  17. #绘制水平投影图像
  18. for y in range(h):
  19. for x in range(h_[y]):
  20. hProjection[y,x] = 255
  21. # cv2.imshow('hProjection2',cv2.resize(hProjection, None, fx=0.3, fy=0.5, interpolation=cv2.INTER_AREA))
  22. # cv2.waitKey(0)
  23. return h_
  24. def getVProjection(image):
  25. vProjection = np.zeros(image.shape,np.uint8);
  26. (h,w) = image.shape
  27. w_ = [0]*w
  28. for x in range(w):
  29. for y in range(h):
  30. if image[y,x] == 255:
  31. w_[x]+=1
  32. for x in range(w):
  33. for y in range(h-w_[x],h):
  34. vProjection[y,x] = 255
  35. # cv2.imshow('vProjection',cv2.resize(vProjection, None, fx=1, fy=0.1, interpolation=cv2.INTER_AREA))
  36. # cv2.waitKey(0)
  37. return w_
  38. def scan(vProjection, iog, pos = 0):
  39. start = 0
  40. V_start = []
  41. V_end = []
  42. for i in range(len(vProjection)):
  43. if vProjection[i] > iog and start == 0:
  44. V_start.append(i)
  45. start = 1
  46. if vProjection[i] <= iog and start == 1:
  47. if i - V_start[-1] < pos:
  48. continue
  49. V_end.append(i)
  50. start = 0
  51. return V_start, V_end
  52. def checkSingle(image):
  53. h = getHProjection(image)
  54. start = 0
  55. end = 0
  56. for i in range(h):
  57. pass
  58. #分割
  59. def CropImage(image,dest,boxMin,boxMax):
  60. a=boxMin[1]
  61. b=boxMax[1]
  62. c=boxMin[0]
  63. d=boxMax[0]
  64. cropImg = image[a:b,c:d]
  65. cv2.imwrite(dest,cropImg)
  66. #开始识别
  67. def DOIT(rawPic):
  68. # 读入原始图像
  69. origineImage = cv2.imread(rawPic)
  70. # 图像灰度化
  71. #image = cv2.imread('test.jpg',0)
  72. image = cv2.cvtColor(origineImage,cv2.COLOR_BGR2GRAY)
  73. # cv2.imshow('gray',image)
  74. # 将图片二值化
  75. retval, img = cv2.threshold(image,127,255,cv2.THRESH_BINARY_INV)
  76. # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
  77. # img = cv2.erode(img, kernel)
  78. # cv2.imshow('binary',cv2.resize(img, None, fx=0.3, fy=0.3, interpolation=cv2.INTER_AREA))
  79. #图像高与宽
  80. (h,w)=img.shape
  81. #垂直投影
  82. V = getVProjection(img)
  83. start = 0
  84. V_start = []
  85. V_end = []
  86. # 对垂直投影水平分割
  87. V_start, V_end = scan(V, HIOG)
  88. if len(V_start) > len(V_end):
  89. V_end.append(w-5)
  90. # 分割行,分割之后再进行列分割并保存分割位置
  91. for i in range(len(V_end)):
  92. #获取行图像
  93. if V_end[i] - V_start[i] < 30:
  94. continue
  95. cropImg = img[0:h, V_start[i]:V_end[i]]
  96. # cv2.imshow('cropImg',cropImg)
  97. # cv2.waitKey(0)
  98. #对行图像进行垂直投影
  99. H = getHProjection(cropImg)
  100. H_start, H_end = scan(H, VIOG, 40)
  101. if len(H_start) > len(H_end):
  102. H_end.append(h-5)
  103. for pos in range(len(H_start)):
  104. # 再进行一次列扫描
  105. DcropImg = cropImg[H_start[pos]:H_end[pos], 0:w]
  106. d_h, d_w = DcropImg.shape
  107. # cv2.imshow("dcrop", DcropImg)
  108. sec_V = getVProjection(DcropImg)
  109. c1, c2 = scan(sec_V, 0)
  110. if len(c1) > len(c2):
  111. c2.append(d_w)
  112. x = 1
  113. while x < len(c1):
  114. if c1[x] - c2[x-1] < 12:
  115. c2.pop(x-1)
  116. c1.pop(x)
  117. x -= 1
  118. x += 1
  119. # cv2.waitKey(0)
  120. if len(c1) == 1:
  121. Position.append([V_start[i],H_start[pos],V_end[i],H_end[pos]])
  122. else:
  123. for x in range(len(c1)):
  124. Position.append([V_start[i]+c1[x], H_start[pos],V_start[i]+c2[x], H_end[pos]])
  125. #根据确定的位置分割字符
  126. number=0
  127. for m in range(len(Position)):
  128. rectMin = (Position[m][0]-5,Position[m][1]-5)
  129. rectMax = (Position[m][2]+5,Position[m][3]+5)
  130. cv2.rectangle(origineImage,rectMin, rectMax, (0 ,0 ,255), 2)
  131. number=number+1
  132. #start-crop
  133. CropImage(origineImage,'result/' + '%d.jpg' % number,rectMin,rectMax)
  134. # cv2.imshow('image',cv2.resize(origineImage, None, fx=0.6, fy=0.6, interpolation=cv2.INTER_AREA))
  135. cv2.imshow('image',origineImage)
  136. cv2.imwrite('result/' + 'ResultImage.jpg' , origineImage)
  137. cv2.waitKey(0)
  138. #############################
  139. rawPicPath = r"H:\TEMP\TEXT_PROCCESS\TEST05.jpg"
  140. DOIT(rawPicPath)
  141. #############################

原图片:

分割后文件夹

重命名

可见此时文件都还是数字为文件名称,那么接下来要利用OCR自动给每个文字图片文件命名

我们使用UMIOCR , UMI-OCR的安装建议去GITHUB上查,windows上部署还是很方便的;

这里使用本机安装好的UMI-OCR 的 API地址  http://127.0.0.1:1224/api/ocr

先定义API调用方法

  1. ############################################
  2. import base64
  3. import requests
  4. import json
  5. #API访问使用
  6. def UMI_OCR_OPT(url,img_path):
  7. # url = "http://127.0.0.1:1224/api/ocr"
  8. # img_path= './result/123.jpg'
  9. with open(img_path,'rb') as f:
  10. image_base64 = base64.b64encode(f.read())
  11. image_base64 =str(image_base64,'utf-8')
  12. data = {
  13. "base64": image_base64,
  14. # 可选参数
  15. # Paddle引擎模式
  16. # "options": {
  17. # "ocr.language": "models/config_chinese.txt",
  18. # "ocr.cls": False,
  19. # "ocr.limit_side_len": 960,
  20. # "tbpu.parser": "MergeLine",
  21. # }
  22. # Rapid引擎模式
  23. # "options": {
  24. # "ocr.language": "简体中文",
  25. # "ocr.angle": False,
  26. # "ocr.maxSideLen": 1024,
  27. # "tbpu.parser": "MergeLine",
  28. # }
  29. }
  30. headers = {"Content-Type": "application/json"}
  31. data_str = json.dumps(data)
  32. response = requests.post(url, data=data_str, headers=headers)
  33. if response.status_code == 200:
  34. res_dict = json.loads(response.text)
  35. #检测失败与否
  36. if(str(res_dict).find('No text found in image')!=-1):
  37. # print("返回失败内容\n", res_dict)
  38. return ''
  39. print("返回值字典\n", res_dict)
  40. resText = res_dict['data'][0]['text']
  41. return resText
  42. return ''

开始批量调用检测

  1. import easyocr
  2. import shutil
  3. error =''
  4. #api地址
  5. uni_orc_url="http://127.0.0.1:1224/api/ocr"
  6. #将所有字体
  7. for i in range(1,285):
  8. filePath = "./result/"+str(i)+".jpg"
  9. # result = reader.readtext(filePath, detail = 0)
  10. result = UMI_OCR_OPT(uni_orc_url,filePath)
  11. if(len(result)==0 or result == ''):
  12. error+= filePath +'\n'
  13. continue
  14. print(result)
  15. destPath = "./resultX/"+result[0]+".jpg"
  16. print('sour :: '+ filePath)
  17. print('dest :: '+ destPath)
  18. shutil.copyfile(filePath, destPath)
  19. print('error:\n'+error)

然后将能够识别出来的所有文字图片都复制并重命名为该字

效果如下:

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Monodyee/article/detail/91209
推荐阅读
相关标签
  

闽ICP备14008679号