当前位置:   article > 正文

OpenCV表格图片寻找有效的x、y坐标并删除异常点

OpenCV表格图片寻找有效的x、y坐标并删除异常点

需求描述:

表格图片,识别出表格里的横、纵坐标列表,并剔除异常点

解决方法:

  1. 通过opencv的getStructuringElement识别出横、竖线
  2. 通过bitwise_and取得交点并去除表格线
  3. 获取x和y的所有可能点,按照相邻点不超过阈值来筛选每一行、列最大的y和x
  4. 对于个别异常点通过卡图片临近点阈值、面积过滤、自定义异常筛选剔除
  5. 自定义异常筛选主要是通过对对相邻坐标数据进行统计,少于指定阈值认为是异常点

  1. import cv2
  2. import pandas as pd
  3. import numpy as np
  4. def outset(df):
  5. df['diff'] = df.diff(periods=-1)
  6. df.fillna(0, inplace=True)
  7. df['flag'] = df['diff'].apply(lambda x: 1 if abs(x) > 10 else 0)
  8. df.at[len(df) - 1, 'flag'] = 1
  9. group0 = 1
  10. for row_index, row_data in df.iterrows():
  11. df.at[row_index, 'group0'] = group0
  12. if row_data['flag'] == 1:
  13. group0 += 1 df = df.astype(int)
  14. grouped_df = df.groupby('group0').count()
  15. df.to_csv(r"D:/df.csv")
  16. filter_df = grouped_df[grouped_df['flag'] <= 40] #60
  17. filter_df = filter_df.reset_index()
  18. finadf=df[df['group0'].isin(list(filter_df['group0']))]['point']
  19. finslist=list(finadf)
  20. print("异常X坐标********")
  21. print(sorted(list(set(finslist))))
  22. print("异常X坐标********")
  23. return list(set(finslist))
  24. def seg_pic(img):
  25. image = cv2.imread(img, 1)
  26. w,h = image.shape[0:2]
  27. print(w,h)
  28. gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  29. binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, -5)
  30. rows, cols = binary.shape
  31. scale = 40
  32. kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (cols // scale, 1))
  33. eroded = cv2.erode(binary, kernel, iterations=1)
  34. dilatedcol = cv2.dilate(eroded, kernel, iterations=1)
  35. scale = 20
  36. kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, rows // scale))
  37. eroded = cv2.erode(binary, kernel, iterations=1)
  38. dilatedrow = cv2.dilate(eroded, kernel, iterations=1)
  39. bitwiseAnd = cv2.bitwise_and(dilatedcol, dilatedrow)
  40. # 标识表格
  41. merge = cv2.add(dilatedcol, dilatedrow)
  42. merge2 = cv2.subtract(binary, bitwiseAnd)
  43. cv2.imwrite(('D:/bitwiseAnd/'+img.split("/")[-1] ), bitwiseAnd)
  44. ys, xs = np.where(bitwiseAnd > 0)
  45. mylisty = []
  46. mylistx = []
  47. i = 0
  48. myxs = np.sort(xs)
  49. myxs = np.delete(myxs, np.where(myxs <=10))
  50. myxs = np.delete(myxs, np.where(myxs >= h-10))
  51. #pd.DataFrame(myxs).to_csv(r"myxs.csv")
  52. for i in range(len(myxs) - 1):
  53. if (myxs[i + 1] - myxs[i] > 20 and abs(myxs[i]-h)>10): #>30
  54. mylistx.append(myxs[i])
  55. i = i + 1
  56. mylistx.append(myxs[i])
  57. myys = np.sort(ys)
  58. #pd.DataFrame(myys).to_csv(r"myys.csv")
  59. tuple1 = np.where(bitwiseAnd > 0)
  60. for i in zip(*tuple1[::-1]):
  61. if i[1] <=20:
  62. myys = np.delete(myys, np.where(myys <= 20))
  63. if i[0] in mylistx:
  64. mylistx.remove(i[0])
  65. elif i[1] >= w-20:
  66. myys = np.delete(myys, np.where(myys >= w - 20))
  67. if i[0] in mylistx:
  68. mylistx.remove(i[0])
  69. i = 0
  70. for i in range(len(myys) - 1):
  71. if (myys[i + 1] - myys[i] >= 23 and abs(myys[i]-w)>10): #阈值
  72. mylisty.append(myys[i])
  73. i = i + 1
  74. mylisty.append(myys[i])
  75. pointx = {'point': myxs}
  76. dfx = pd.DataFrame(pointx)
  77. del_x = []
  78. del_y = []
  79. for x in outset(dfx):
  80. del_x.append(x)
  81. if x in mylistx:
  82. mylistx.remove(x)
  83. for i in zip(*tuple1[::-1]):
  84. for j in del_x:
  85. if i[0] == j:
  86. del_y.append(i[1])
  87. for j in mylisty:
  88. if j in del_y:
  89. mylisty.remove(j)
  90. # 面积法修正异常点
  91. contours, hierarchy = cv2.findContours(np.uint8(bitwiseAnd), cv2.RETR_TREE,
  92. cv2.CHAIN_APPROX_SIMPLE)
  93. cnts = sort_contours(contours, method="top-to-bottom")
  94. x, y, w, h, t = 0,0,0,0,90
  95. for j in cnts:
  96. area = cv2.contourArea(j)
  97. if 100 > area >= 57:
  98. rect = cv2.minAreaRect(j)
  99. (x, y), (w, h), t = rect
  100. points_rect = cv2.boxPoints(rect)
  101. abnormalx=int(points_rect[1][0])
  102. if abnormalx in mylistx:
  103. print("异常区域:\t",abnormalx,area)
  104. mylistx.remove(abnormalx)
  105. return image, mylistx, mylisty
  106. if __name__ == '__main__':
  107. img_path = r"D:/testslope_corr/_21.png"
  108. print(seg_pic(img_path))

结果示例:

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/IT小白/article/detail/459741
推荐阅读
相关标签
  

闽ICP备14008679号