赞
踩
我们想识别图像中对我们有用的评论,所以需要卡一个阈值来仅仅获得对我们有用的信息
-
- import easyocr
-
- # 创建reader对象
- import json
- reader = easyocr.Reader(['en'])
- result_list = reader.readtext('review2.png')
- def use_result_list_to_recentangle(result_list):
- pos_list = []
- text_list = []
- for sample in result_list:
- if int(sample[0][1][0])>560:
- pos_list.append(sample[0])
- text_list.append(sample[1])
- return pos_list,text_list
-
- def get_important_information(pos_list,text_list):
- tmp_sample = {}
- content = ''
- all_samples = []
- flag = 'review_background'
- for index in range(len(text_list)):
- if 'Reviewed in the' in text_list[index]:
- place = text_list[index]
- title = text_list[index-1]
- tmp_sample['title'] = title
- tmp_sample['place'] = place
-
- if 'Color' in text_list[index]:
- color = text_list[index]
- tmp_sample['color'] = color
- if flag == 'content':
- content += text_list[index]
- if 'Purchase' in text_list[index]:
- Verify_purchase = text_list[index]
- tmp_sample['Verify_purchase'] = Verify_purchase
- flag = 'content'
-
- if 'Helpful' == text_list[index]:
- flag = 'review_background'
- tmp_sample['content'] = content
- all_samples.append(tmp_sample)
- content = ''
- tmp_sample = {}
- return all_samples
-
-
-
- pos_list,text_list = use_result_list_to_recentangle(result_list)
- all_samples = get_important_information(pos_list,text_list)
- json_path = '/cloud/cloud_disk/users/huh/nlp/vision-reptile/vision_reptile/data/result.json'
- out_file = open(json_path, "w")
- json.dump(all_samples, out_file, indent=6)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。