赞
踩
最近在做标准地址匹配,比如输入 金丽花园1#2-102
解析成如下格式并且匹配到标准地址
{ "status": 20000, "message": "查询成功", "data": { "output": [ { "type": "poi", "start": 0, "end": 4, "prob": 4, "span": "金丽花园" }, { "type": "houseno", "start": 4, "end": 7, "prob": 7, "span": "1#2" } ], "full_bzdz_name": "山东省淄博市张店区科苑街道丽景苑社区北西五路21号金丽花园1号楼2单元102室", "houseid": "", "table_name": "bzdz_household", "id": "1726" } }
api采用了modelscopeMGeo门址地址结构化要素解析-中文-地址领域-base模型,使用fastapi 封装成api
#!/usr/bin/env python3 from typing import List, Optional from datetime import timedelta from fastapi import APIRouter, Depends, HTTPException, status from fastapi.security import OAuth2PasswordRequestForm from fastapi.responses import JSONResponse import requests,re from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks import pandas as pd from clickhouse_driver import Client from elasticsearch import Elasticsearch from elasticsearch import helpers task = Tasks.token_classification model = 'damo/mgeo_geographic_elements_tagging_chinese_base' pipeline_ins = pipeline(task=task, model=model) router = APIRouter() client = Client(host='0.0.0.0', port='000', database='xxx',password='xxx') def read_ck(sql, client): data, columns = client.execute(sql, columnar=True, with_column_types=True) df = pd.DataFrame({re.sub(r'\W', '_', col[0]): d for d, col in zip(data, columns)}) return df.to_dict("records") host='0.0.0.0' port=0000 user="xxxxx" pwd='xxxx' es = Elasticsearch(host+':'+str(port), http_auth=(user, pwd), maxsize=15) @router.get("/addressdeal/", summary='解析地址') def addressdeal(address: str): client = Client(host='0.0.0.0', port='000', database='xxxx',password='xxxxxx') data=pipeline_ins(input=address) tab_type=3 house_str='' table_name='' resp={} resp['output']=data['output'] resp['full_bzdz_name']='' resp['houseid']='' resp['table_name']='' if not data['output']: return JSONResponse(content=resp, status_code=status.HTTP_200_OK) for ou in data['output']: ou['start']=int(ou['start']) ou['end']=int(ou['end']) ou['prob']=float(ou['end']) end_item=data['output'][-1] if end_item['type'] == 'cellno': table_name = 'bzdz_household' house_str=address[end_item['end']:] tab_type=3 # 匹配到楼栋 if end_item['type'] in ['houseno']: table_name = 'bzdz_build' build_numbers = re.findall(r'\d+', end_item['span']) tab_type=2 if build_numbers and len(build_numbers)>1: table_name = 'bzdz_household' house_str=address[end_item['end']:] tab_type=3 # 小区 if end_item['type'] in ['poi','subpoi']: table_name = 'bzdz_village_info' tab_type=1 if table_name =='': return JSONResponse(content=resp, status_code=status.HTTP_200_OK) dict={ "一":'1', "二":'2', "三":'3', "四":'4', "五":'5', "六":'6', "七":'7', "八":'8', "九":'9', "十":'10', "十一":'11', "十二":'12', "十三":'13', "十四":'14', "十五":'15', "十六":'16', "十七":'17', "十八":'18', "十九":'19', "二十":'20', } like_str='' search_re='' fg='\\\\D+' search_list=[] village_str='' for item in data['output']: kw=item['span'] if item['type'] in ['cellno','poi','subpoi','houseno','road','roadno']: if item['type'] in ['houseno','cellno']: for k,v in dict.items(): if k in kw: kw=kw.replace(k,v) build_numbers = re.findall(r'\d+', kw) if build_numbers: # kw= str(build_numbers[0]) for bn in build_numbers: search_list.append(str(bn)) # 小区 if item['type'] in ['poi','subpoi','community'] and len(kw) >2: village_str+=kw+"|" like_str+=f" and ( name like '%{kw}%' or short_name like '%{kw}%' or old_name like '%{kw}%' )" tab_type=3 if house_str: house_numbers = re.findall(r'\d+', house_str) if house_numbers: kw= str(house_numbers[0])+"室" search_list.append(kw) # like_str+=f" and ( full_bzdz_name like '%{house_str}%' or short_name like '%{house_str}%' or old_name like '%{house_str}%' )" re_str='' search_re='' # '.*莲池生活\\D+13号楼\\D+1\\D+101室') if village_str: village_str=village_str.rstrip('|') search_re= f'.*({village_str})\\\\D*' if search_list: search_re += fg.join(str(n) for n in search_list) re_str+= f" and (extractAll(full_bzdz_name, '{search_re}')[1]) != ''" sql=f"select * from {table_name} where 1 {re_str}" print(sql) address_lt=read_ck(sql,client) if address_lt: resp['full_bzdz_name']=address_lt[0]['full_bzdz_name'] resp['id']=address_lt[0]['id'] resp['table_name']=table_name return JSONResponse(content=resp, status_code=status.HTTP_200_OK)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。