赞
踩
在SQL中,用dinstinct语句进行去重:
- from elasticsearch import Elasticsearch
-
- # 连接es
- es = Elasticsearch(hosts=["192.168.124.49:9200"], sniffer_timeout=60, timeout=30)
-
- body = {
- "mappings": {
- "properties": {
- "id": {
- "type": "integer"
- },
- "name": {
- "type": "text",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
- }
- }
- },
- "age": {
- "type": "integer"
- },
- "gender": {
- "type": "keyword"
- },
- "email": {
- "type": "text"
- },
- "province": {
- "type": "text",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
- }
- }
- },
- "address": {
- "type": "text",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
- }
- }
- },
- "state": {
- "type": "text",
- "fields": {
- "keyword": {
- "type": "keyword",
- "ignore_above": 256
- }
- }
- }
- }
- }
- }
-
- # 创建 index
- es.indices.create(index="person_info", body=body)
- POST person_info/_bulk
- {"index": {"_index": "person_info"}}
- {"id": 1, "name": "刘一", "age": 25, "gender": "男", "email": "111@qq.com", "provience": "北京", "address": "北京市朝阳区", "status": "正常"}
- {"index": {"_index": "person_info"}}
- {"id": 1, "name": "陈二", "age": 26, "gender": "女", "email": "111@qq.com", "provience": "山东", "address": "山东省青岛市", "status": "正常"}
- {"index": {"_index": "person_info"}}
- {"id": 1, "name": "张三", "age": 27, "gender": "男", "email": "111@qq.com", "provience": "北京", "address": "北京市朝阳区", "status": "正常"}
- {"index": {"_index": "person_info"}}
- {"id": 1, "name": "李四", "age": 28, "gender": "男", "email": "111@qq.com", "provience": "山东", "address": "山东省济南市", "status": "正常"}
- {"index": {"_index": "person_info"}}
- {"id": 1, "name": "王五", "age": 25, "gender": "男", "email": "111@qq.com", "provience": "北京", "address": "北京市朝阳区", "status": "正常"}
- {"index": {"_index": "person_info"}}
- {"id": 1, "name": "刘一", "age": 26, "gender": "男", "email": "111@qq.com", "provience": "山东", "address": "山东省青岛市", "status": "正常"}
- {"index": {"_index": "person_info"}}
- {"id": 1, "name": "陈二", "age": 26, "gender": "女", "email": "111@qq.com", "provience": "北京", "address": "北京市朝阳区", "status": "正常"}
注意:去重的字段不能是text类型。如果xxxfield的mapping要有keyword,且通过xxxfield.keyword去重。
注意:如果去重字段是其他可以直接去重的类型,比如:数字类型、keyword、日期等,则直接用字段名就可以。即:如果本处xxxfield是keyword,则xxxfield.keyword处写成xxxfield就行。
查询province为北京的信息:
- GET person_info/_search
- {
- "query": {
- "match": {
- "provience.keyword": "北京"
- }
- }
- }
运行结果:
- {
- "took" : 15,
- "timed_out" : false,
- "_shards" : {
- "total" : 1,
- "successful" : 1,
- "skipped" : 0,
- "failed" : 0
- },
- "hits" : {
- "total" : {
- "value" : 4,
- "relation" : "eq"
- },
- "max_score" : 0.5753642,
- "hits" : [
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "hFHKl4YBPv2uoOpTcHMg",
- "_score" : 0.5753642,
- "_source" : {
- "id" : 1,
- "name" : "刘一",
- "age" : 25,
- "gender" : "男",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- }
- },
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "hlHKl4YBPv2uoOpTcHMi",
- "_score" : 0.5753642,
- "_source" : {
- "id" : 1,
- "name" : "张三",
- "age" : 27,
- "gender" : "男",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- }
- },
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "iFHKl4YBPv2uoOpTcHMi",
- "_score" : 0.5753642,
- "_source" : {
- "id" : 1,
- "name" : "王五",
- "age" : 25,
- "gender" : "男",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- }
- },
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "ilHKl4YBPv2uoOpTcHMi",
- "_score" : 0.5753642,
- "_source" : {
- "id" : 1,
- "name" : "陈二",
- "age" : 26,
- "gender" : "女",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- }
- }
- ]
- }
- }
查询province为北京,且根据年龄去重的信息:
- # collapse获取去重结果
- GET person_info/_search
- {
- "query": {
- "match": {
- "provience.keyword": "北京"
- }
- },
- "collapse": {
- "field": "age"
- }
- }
运行结果:
- {
- "took" : 14,
- "timed_out" : false,
- "_shards" : {
- "total" : 1,
- "successful" : 1,
- "skipped" : 0,
- "failed" : 0
- },
- "hits" : {
- "total" : {
- "value" : 4,
- "relation" : "eq"
- },
- "max_score" : null,
- "hits" : [
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "hFHKl4YBPv2uoOpTcHMg",
- "_score" : 0.5753642,
- "_source" : {
- "id" : 1,
- "name" : "刘一",
- "age" : 25,
- "gender" : "男",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- },
- "fields" : {
- "age" : [
- 25
- ]
- }
- },
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "hlHKl4YBPv2uoOpTcHMi",
- "_score" : 0.5753642,
- "_source" : {
- "id" : 1,
- "name" : "张三",
- "age" : 27,
- "gender" : "男",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- },
- "fields" : {
- "age" : [
- 27
- ]
- }
- },
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "ilHKl4YBPv2uoOpTcHMi",
- "_score" : 0.5753642,
- "_source" : {
- "id" : 1,
- "name" : "陈二",
- "age" : 26,
- "gender" : "女",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- },
- "fields" : {
- "age" : [
- 26
- ]
- }
- }
- ]
- }
- }
查询province为北京,且根据年龄去重的信息:
- # 聚合获取去重结果
- GET person_info/_search
- {
- "query": {
- "match": {
- "provience.keyword": "北京"
- }
- },
- "size": 0,
- "aggs": {
- "age_aggs": {
- "terms": {
- "field": "age",
- "size": 10
- },
- "aggs": {
- "age_top": {
- "top_hits": {
- "sort": [{
- "age": {
- "order": "desc"
- }
- }],
- "size": 1
- }
- }
- }
- }
- }
- }
运行结果:
- {
- "took" : 230,
- "timed_out" : false,
- "_shards" : {
- "total" : 1,
- "successful" : 1,
- "skipped" : 0,
- "failed" : 0
- },
- "hits" : {
- "total" : {
- "value" : 4,
- "relation" : "eq"
- },
- "max_score" : null,
- "hits" : [ ]
- },
- "aggregations" : {
- "age_aggs" : {
- "doc_count_error_upper_bound" : 0,
- "sum_other_doc_count" : 0,
- "buckets" : [
- {
- "key" : 25,
- "doc_count" : 2,
- "age_top" : {
- "hits" : {
- "total" : {
- "value" : 2,
- "relation" : "eq"
- },
- "max_score" : null,
- "hits" : [
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "hFHKl4YBPv2uoOpTcHMg",
- "_score" : null,
- "_source" : {
- "id" : 1,
- "name" : "刘一",
- "age" : 25,
- "gender" : "男",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- },
- "sort" : [
- 25
- ]
- }
- ]
- }
- }
- },
- {
- "key" : 26,
- "doc_count" : 1,
- "age_top" : {
- "hits" : {
- "total" : {
- "value" : 1,
- "relation" : "eq"
- },
- "max_score" : null,
- "hits" : [
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "ilHKl4YBPv2uoOpTcHMi",
- "_score" : null,
- "_source" : {
- "id" : 1,
- "name" : "陈二",
- "age" : 26,
- "gender" : "女",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- },
- "sort" : [
- 26
- ]
- }
- ]
- }
- }
- },
- {
- "key" : 27,
- "doc_count" : 1,
- "age_top" : {
- "hits" : {
- "total" : {
- "value" : 1,
- "relation" : "eq"
- },
- "max_score" : null,
- "hits" : [
- {
- "_index" : "person_info",
- "_type" : "_doc",
- "_id" : "hlHKl4YBPv2uoOpTcHMi",
- "_score" : null,
- "_source" : {
- "id" : 1,
- "name" : "张三",
- "age" : 27,
- "gender" : "男",
- "email" : "111@qq.com",
- "provience" : "北京",
- "address" : "北京市朝阳区",
- "status" : "正常"
- },
- "sort" : [
- 27
- ]
- }
- ]
- }
- }
- }
- ]
- }
- }
- }
查询province为北京,且根据年龄去重的数量:
- # 聚合获取去重数量
- GET person_info/_search
- {
- "query": {
- "match": {
- "provience.keyword": "北京"
- }
- },
- "size": 0,
- "aggs": {
- "age_aggs": {
- "cardinality": {
- "field": "age"
- }
- }
- }
- }
运行结果:
- {
- "took" : 68,
- "timed_out" : false,
- "_shards" : {
- "total" : 1,
- "successful" : 1,
- "skipped" : 0,
- "failed" : 0
- },
- "hits" : {
- "total" : {
- "value" : 4,
- "relation" : "eq"
- },
- "max_score" : null,
- "hits" : [ ]
- },
- "aggregations" : {
- "age_aggs" : {
- "value" : 3
- }
- }
- }
参考博文:
ElasticSearch--去重查询/根据字段去重--方法/实例_IT利刃出鞘的博客-CSDN博客_elasticsearch统计去重后的数量准确值
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。