赞
踩
对表格图片,识别出表格里的横、纵坐标列表,并剔除异常点
- import cv2
- import pandas as pd
- import numpy as np
-
- def outset(df):
- df['diff'] = df.diff(periods=-1)
- df.fillna(0, inplace=True)
- df['flag'] = df['diff'].apply(lambda x: 1 if abs(x) > 10 else 0)
- df.at[len(df) - 1, 'flag'] = 1
- group0 = 1
- for row_index, row_data in df.iterrows():
- df.at[row_index, 'group0'] = group0
- if row_data['flag'] == 1:
- group0 += 1 df = df.astype(int)
- grouped_df = df.groupby('group0').count()
- df.to_csv(r"D:/df.csv")
- filter_df = grouped_df[grouped_df['flag'] <= 40] #60
- filter_df = filter_df.reset_index()
- finadf=df[df['group0'].isin(list(filter_df['group0']))]['point']
- finslist=list(finadf)
- print("异常X坐标********")
- print(sorted(list(set(finslist))))
- print("异常X坐标********")
- return list(set(finslist))
-
- def seg_pic(img):
- image = cv2.imread(img, 1)
- w,h = image.shape[0:2]
- print(w,h)
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
- binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, -5)
-
- rows, cols = binary.shape
- scale = 40
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (cols // scale, 1))
- eroded = cv2.erode(binary, kernel, iterations=1)
- dilatedcol = cv2.dilate(eroded, kernel, iterations=1)
-
- scale = 20
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, rows // scale))
- eroded = cv2.erode(binary, kernel, iterations=1)
- dilatedrow = cv2.dilate(eroded, kernel, iterations=1)
-
- bitwiseAnd = cv2.bitwise_and(dilatedcol, dilatedrow)
- # 标识表格
- merge = cv2.add(dilatedcol, dilatedrow)
-
- merge2 = cv2.subtract(binary, bitwiseAnd)
- cv2.imwrite(('D:/bitwiseAnd/'+img.split("/")[-1] ), bitwiseAnd)
- ys, xs = np.where(bitwiseAnd > 0)
-
- mylisty = []
- mylistx = []
-
- i = 0
- myxs = np.sort(xs)
- myxs = np.delete(myxs, np.where(myxs <=10))
- myxs = np.delete(myxs, np.where(myxs >= h-10))
- #pd.DataFrame(myxs).to_csv(r"myxs.csv")
- for i in range(len(myxs) - 1):
- if (myxs[i + 1] - myxs[i] > 20 and abs(myxs[i]-h)>10): #>30
- mylistx.append(myxs[i])
- i = i + 1
- mylistx.append(myxs[i])
- myys = np.sort(ys)
- #pd.DataFrame(myys).to_csv(r"myys.csv")
- tuple1 = np.where(bitwiseAnd > 0)
- for i in zip(*tuple1[::-1]):
- if i[1] <=20:
- myys = np.delete(myys, np.where(myys <= 20))
- if i[0] in mylistx:
- mylistx.remove(i[0])
- elif i[1] >= w-20:
- myys = np.delete(myys, np.where(myys >= w - 20))
- if i[0] in mylistx:
- mylistx.remove(i[0])
-
- i = 0
- for i in range(len(myys) - 1):
- if (myys[i + 1] - myys[i] >= 23 and abs(myys[i]-w)>10): #阈值
- mylisty.append(myys[i])
- i = i + 1
- mylisty.append(myys[i])
- pointx = {'point': myxs}
- dfx = pd.DataFrame(pointx)
- del_x = []
- del_y = []
- for x in outset(dfx):
- del_x.append(x)
- if x in mylistx:
- mylistx.remove(x)
- for i in zip(*tuple1[::-1]):
- for j in del_x:
- if i[0] == j:
- del_y.append(i[1])
- for j in mylisty:
- if j in del_y:
- mylisty.remove(j)
- # 面积法修正异常点
- contours, hierarchy = cv2.findContours(np.uint8(bitwiseAnd), cv2.RETR_TREE,
- cv2.CHAIN_APPROX_SIMPLE)
- cnts = sort_contours(contours, method="top-to-bottom")
- x, y, w, h, t = 0,0,0,0,90
- for j in cnts:
- area = cv2.contourArea(j)
- if 100 > area >= 57:
- rect = cv2.minAreaRect(j)
- (x, y), (w, h), t = rect
- points_rect = cv2.boxPoints(rect)
- abnormalx=int(points_rect[1][0])
- if abnormalx in mylistx:
- print("异常区域:\t",abnormalx,area)
- mylistx.remove(abnormalx)
-
- return image, mylistx, mylisty
-
- if __name__ == '__main__':
- img_path = r"D:/testslope_corr/_21.png"
- print(seg_pic(img_path))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。