赞
踩
需求:
IK分词:
拼音:
https://github.com/medcl/elasticsearch-analysis-pinyin
简繁体:
https://github.com/medcl/elasticsearch-analysis-stconvert
analysis分析是 Elasticsearch 在文档发送之前对文档正文执行的过程,以添加到反向索引中(inverted index)。 在将文档添加到索引之前,Elasticsearch 会为每个分析的字段执行许多步骤:
- PUT /_template/test_template
- {
- "index_patterns": [
- "test-*"
- ],
- "aliases": {
- "test_read": {}
- },
- "settings": {
- "index": {
- "max_result_window": "100000",
- "refresh_interval": "5s",
- "number_of_shards": "5",
- "translog": {
- "flush_threshold_size": "1024mb",
- "sync_interval": "30s",
- "durability": "async"
- },
- "number_of_replicas": "1"
- },
- "analysis": {
- "char_filter": {
- "tsconvert": {
- "type": "stconvert",
- "convert_type": "t2s"
- }
- },
- "analyzer": {
- "ik_t2s_pinyin_analyzer": {
- "type": "custom",
- "char_filter": [
- "tsconvert"
- ],
- "tokenizer": "ik_max_word",
- "filter": [
- "pinyin_filter",
- "lowercase"
- ]
- },
- "stand_t2s_pinyin_analyzer": {
- "type": "custom",
- "char_filter": [
- "tsconvert"
- ],
- "tokenizer": "standard",
- "filter": [
- "pinyin_filter",
- "lowercase"
- ]
- },
- "ik_t2s_analyzer": {
- "type": "custom",
- "char_filter": [
- "tsconvert"
- ],
- "tokenizer": "ik_max_word",
- "filter": [
- "lowercase"
- ]
- },
- "stand_t2s_analyzer": {
- "type": "custom",
- "char_filter": [
- "tsconvert"
- ],
- "tokenizer": "standard",
- "filter": [
- "lowercase"
- ]
- },
- "ik_pinyin_analyzer": {
- "type": "custom",
- "tokenizer": "ik_max_word",
- "filter": [
- "pinyin_filter",
- "lowercase"
- ]
- },
- "stand_pinyin_analyzer": {
- "type": "custom",
- "tokenizer": "standard",
- "filter": [
- "pinyin_filter",
- "lowercase"
- ]
- },
- "keyword_t2s_pinyin_analyzer": {
- "filter": [
- "pinyin_filter",
- "lowercase"
- ],
- "char_filter": [
- "tsconvert"
- ],
- "type": "custom",
- "tokenizer": "keyword"
- },
- "keyword_pinyin_analyzer": {
- "filter": [
- "pinyin_filter",
- "lowercase"
- ],
- "type": "custom",
- "tokenizer": "keyword"
- }
- },
- "filter": {
- "pinyin_first_letter_and_full_pinyin_filter": {
- "type": "pinyin",
- "keep_first_letter": true,
- "keep_separate_first_letter": false,
- "keep_full_pinyin": false,
- "keep_joined_full_pinyin": true,
- "keep_none_chinese": true,
- "none_chinese_pinyin_tokenize": false,
- "keep_none_chinese_in_joined_full_pinyin": true,
- "keep_original": false,
- "limit_first_letter_length": 1000,
- "lowercase": true,
- "trim_whitespace": true,
- "remove_duplicated_term": true
- }
- }
- }
- },
- "mappings": {
- "properties": {
- "name": {
- "index_phrases": true,
- "analyzer": "ik_max_word",
- "index": true,
- "type": "text",
- "fields": {
- "keyword": {
- "ignore_above": 256,
- "type": "keyword"
- },
- "stand": {
- "analyzer": "standard",
- "type": "text"
- },
- "STPA": {
- "type": "text",
- "analyzer": "stand_t2s_pinyin_analyzer"
- },
- "ITPA": {
- "type": "text",
- "analyzer": "ik_t2s_pinyin_analyzer"
- }
- }
- },
- "desc": {
- "index_phrases": true,
- "analyzer": "ik_max_word",
- "index": true,
- "type": "text",
- "fields": {
- "keyword": {
- "ignore_above": 256,
- "type": "keyword"
- },
- "stand": {
- "analyzer": "standard",
- "type": "text"
- },
- "STPA": {
- "type": "text",
- "analyzer": "stand_t2s_pinyin_analyzer"
- },
- "ITPA": {
- "type": "text",
- "analyzer": "ik_t2s_pinyin_analyzer"
- }
- }
- },
- "abstr": {
- "index_phrases": true,
- "analyzer": "ik_max_word",
- "index": true,
- "type": "text",
- "fields": {
- "keyword": {
- "ignore_above": 256,
- "type": "keyword"
- },
- "stand": {
- "analyzer": "standard",
- "type": "text"
- },
- "STPA": {
- "type": "text",
- "analyzer": "stand_t2s_pinyin_analyzer"
- },
- "ITPA": {
- "type": "text",
- "analyzer": "ik_t2s_pinyin_analyzer"
- }
- }
- }
- }
- }
- }
- GET /test_read/_search
- {
- "from": 0,
- "size": 10,
- "terminate_after": 100000,
- "query": {
- "bool": {
- "must": [
- {
- "query_string": {
- "query": "bj天安门 OR 测试",
- "fields": [
- "name.ITPA"
- ],
- "type": "phrase",
- "default_operator": "and"
- }
- }
- ],
- "adjust_pure_negative": true,
- "boost": 1
- }
- },
- "post_filter": {
- "bool": {
- "must": [
- {
- "match": {
- "name": "天安门"
- }
- }
- ]
- }
- },
- "highlight": {
- "fragment_size": 1000,
- "pre_tags": [
- "<span style=\"color:red;background:yellow;\">"
- ],
- "post_tags": [
- "</span>"
- ],
- "fields": {
- "name.stand": {},
- "desc.stand": {},
- "abstr.stand": {},
- "name.IPA": {},
- "desc.IPA": {},
- "abstr.IPA": {},
- "name.ITPA": {},
- "desc.ITPA": {},
- "abstr.ITPA": {}
- }
- }
- }
post_filter:后过滤器 | Elasticsearch: 权威指南 | Elastic
PS:post_filter实现二次搜索功能,post_filter无法使用es高亮功能,需要自己通过代码进行手动标记高亮;根据上面的DSL语句,可写出对应的代码啦~
拼音插件配置:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。