当前位置:   article > 正文

中文地址结构化要素解析

地址结构化

最近在做标准地址匹配,比如输入 金丽花园1#2-102
解析成如下格式并且匹配到标准地址

{
    "status": 20000,
    "message": "查询成功",
    "data": {
        "output": [
            {
                "type": "poi",
                "start": 0,
                "end": 4,
                "prob": 4,
                "span": "金丽花园"
            },
            {
                "type": "houseno",
                "start": 4,
                "end": 7,
                "prob": 7,
                "span": "1#2"
            }
        ],
        "full_bzdz_name": "山东省淄博市张店区科苑街道丽景苑社区北西五路21号金丽花园1号楼2单元102室",
        "houseid": "",
        "table_name": "bzdz_household",
        "id": "1726"
    }
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26

api采用了modelscopeMGeo门址地址结构化要素解析-中文-地址领域-base模型,使用fastapi 封装成api


#!/usr/bin/env python3
from typing import List, Optional
from datetime import timedelta
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordRequestForm
from fastapi.responses import JSONResponse
import requests,re
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import pandas as pd
from clickhouse_driver import Client
from elasticsearch import Elasticsearch
from elasticsearch import helpers



task = Tasks.token_classification
model = 'damo/mgeo_geographic_elements_tagging_chinese_base'
pipeline_ins = pipeline(task=task, model=model)


router = APIRouter()
client = Client(host='0.0.0.0', port='000', database='xxx',password='xxx')
def read_ck(sql, client):
    data, columns = client.execute(sql, columnar=True, with_column_types=True)
    df = pd.DataFrame({re.sub(r'\W', '_', col[0]): d for d, col in zip(data, columns)})
    return df.to_dict("records")


host='0.0.0.0'
port=0000
user="xxxxx"
pwd='xxxx'
es = Elasticsearch(host+':'+str(port), http_auth=(user, pwd), maxsize=15) 


@router.get("/addressdeal/", summary='解析地址')
def addressdeal(address: str):
    

    client = Client(host='0.0.0.0', port='000', database='xxxx',password='xxxxxx')
    data=pipeline_ins(input=address)
    tab_type=3

    house_str=''
    table_name=''
    resp={}
    resp['output']=data['output']
    resp['full_bzdz_name']=''
    resp['houseid']=''
    resp['table_name']=''


    if not data['output']:
        return JSONResponse(content=resp, status_code=status.HTTP_200_OK)

    for ou in data['output']:
        ou['start']=int(ou['start'])
        ou['end']=int(ou['end'])
        ou['prob']=float(ou['end'])


    end_item=data['output'][-1]

    if end_item['type'] == 'cellno':
        table_name = 'bzdz_household'
        house_str=address[end_item['end']:]
        tab_type=3
        

    # 匹配到楼栋
    if end_item['type'] in ['houseno']:
        table_name = 'bzdz_build'
        build_numbers = re.findall(r'\d+', end_item['span'])
        tab_type=2
        if build_numbers and len(build_numbers)>1:
            table_name = 'bzdz_household'
            house_str=address[end_item['end']:]
            tab_type=3

    # 小区
    if end_item['type'] in ['poi','subpoi']:
        table_name = 'bzdz_village_info'
        tab_type=1



    if  table_name =='':
        return JSONResponse(content=resp, status_code=status.HTTP_200_OK)

    dict={
        "一":'1',
        "二":'2',
        "三":'3',
        "四":'4',
        "五":'5',
        "六":'6',
        "七":'7',
        "八":'8',
        "九":'9',
        "十":'10',
        "十一":'11',
        "十二":'12',
        "十三":'13',
        "十四":'14',
        "十五":'15',
        "十六":'16',
        "十七":'17',
        "十八":'18',
        "十九":'19',
        "二十":'20',
    }

    like_str=''
    search_re=''
    fg='\\\\D+'
    search_list=[]
    village_str=''
    for item in  data['output']:
        kw=item['span']
        if item['type'] in ['cellno','poi','subpoi','houseno','road','roadno']:
            if item['type'] in ['houseno','cellno']:
                for k,v in dict.items():
                    if k in kw:
                        kw=kw.replace(k,v)
                build_numbers = re.findall(r'\d+', kw)
                if build_numbers:
                    # kw= str(build_numbers[0])
                    for bn in build_numbers:
                        search_list.append(str(bn))
            # 小区
            if item['type'] in ['poi','subpoi','community'] and  len(kw) >2:
                village_str+=kw+"|"
                like_str+=f" and ( name like '%{kw}%' or short_name like '%{kw}%' or old_name like '%{kw}%'   )"
                tab_type=3
    if house_str:
        house_numbers = re.findall(r'\d+', house_str)
        if house_numbers:
            kw= str(house_numbers[0])+"室"
            search_list.append(kw)
        # like_str+=f" and ( full_bzdz_name like '%{house_str}%' or short_name like '%{house_str}%' or old_name like '%{house_str}%'   )"
    re_str=''
    search_re=''
    # '.*莲池生活\\D+13号楼\\D+1\\D+101室')
    if village_str:
        village_str=village_str.rstrip('|')
        search_re= f'.*({village_str})\\\\D*'
    if search_list:
        search_re += fg.join(str(n) for n in search_list)
        re_str+= f" and   (extractAll(full_bzdz_name, '{search_re}')[1]) != ''"

        sql=f"select * from {table_name} where 1 {re_str}"
        print(sql)
        address_lt=read_ck(sql,client)
        if address_lt:
            resp['full_bzdz_name']=address_lt[0]['full_bzdz_name']
            resp['id']=address_lt[0]['id']
            resp['table_name']=table_name

    return JSONResponse(content=resp, status_code=status.HTTP_200_OK)


  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/526466
推荐阅读
相关标签
  

闽ICP备14008679号