当前位置:   article > 正文

Elasticsearch 去重后求和

Elasticsearch 去重后求和

标题的要求可以用如下 SQL 表示

select sum(column2) from (select distinct(column1),column2 from table)t

要如何用 DSL 实现呢,先准备下索引和数据

  1. PUT test_index
  2. {
  3. "mappings": {
  4. "properties": {
  5. "column1": {
  6. "type": "keyword"
  7. },
  8. "column2": {
  9. "type": "long"
  10. }
  11. }
  12. }
  13. }
  1. PUT test_index/_doc/1
  2. {
  3. "column1": "1",
  4. "column2": 2
  5. }
  6. PUT test_index/_doc/2
  7. {
  8. "column1": "1",
  9. "column2": 2
  10. }
  11. PUT test_index/_doc/3
  12. {
  13. "column1": "2",
  14. "column2": 1
  15. }
  16. PUT test_index/_doc/4
  17. {
  18. "column1": "2",
  19. "column2": 1
  20. }

我首先想到的是 collapse 搭配 cardinality,再sum,那效果如何呢

  1. GET test_index/_search
  2. {
  3. "collapse": {
  4. "field": "column1"
  5. },
  6. "aggs": {
  7. "distinct_column": {
  8. "cardinality": {
  9. "field": "column1"
  10. }
  11. },
  12. "distinct_sum":{
  13. "sum": {
  14. "field": "column2"
  15. }
  16. },
  17. "all_sum":{
  18. "sum": {
  19. "field": "column2"
  20. }
  21. }
  22. }
  23. }

结果,count是去重了的,但sum没有

  1. {
  2. "took" : 5,
  3. "timed_out" : false,
  4. "_shards" : {
  5. "total" : 1,
  6. "successful" : 1,
  7. "skipped" : 0,
  8. "failed" : 0
  9. },
  10. "hits" : {
  11. "total" : {
  12. "value" : 4,
  13. "relation" : "eq"
  14. },
  15. "max_score" : null,
  16. "hits" : [
  17. {
  18. "_index" : "test_index",
  19. "_type" : "_doc",
  20. "_id" : "1",
  21. "_score" : 1.0,
  22. "_source" : {
  23. "column1" : "1",
  24. "column2" : 2
  25. },
  26. "fields" : {
  27. "column1" : [
  28. "1"
  29. ]
  30. }
  31. },
  32. {
  33. "_index" : "test_index",
  34. "_type" : "_doc",
  35. "_id" : "3",
  36. "_score" : 1.0,
  37. "_source" : {
  38. "column1" : "2",
  39. "column2" : 1
  40. },
  41. "fields" : {
  42. "column1" : [
  43. "2"
  44. ]
  45. }
  46. }
  47. ]
  48. },
  49. "aggregations" : {
  50. "distinct_sum" : {
  51. "value" : 6.0
  52. },
  53. "all_sum" : {
  54. "value" : 6.0
  55. },
  56. "distinct_column" : {
  57. "value" : 2
  58. }
  59. }
  60. }

尝试多次未果后,找到了这个

Sum aggregation on Unique Data in ElasticSearch - Stack Overflow

那试下呗

  1. GET test_index/_search
  2. {
  3. "size": 0,
  4. "aggs": {
  5. "column1_count": {
  6. "terms": {
  7. "field": "column1",
  8. "size": 100
  9. },
  10. "aggs": {
  11. "column2_avg": {
  12. "avg": {
  13. "field": "column2"
  14. }
  15. }
  16. }
  17. },
  18. "unique_count": {
  19. "cardinality": {
  20. "field": "column1"
  21. }
  22. },
  23. "unique_sum_column2":{
  24. "sum_bucket": {
  25. "buckets_path": "column1_count>column2_avg"
  26. }
  27. },
  28. "sum_column2":{
  29. "sum": {
  30. "field": "column2"
  31. }
  32. }
  33. }
  34. }

非常好,达到期望,这个是先求平均值,再求和,为这思路叹服

  1. {
  2. "took" : 5,
  3. "timed_out" : false,
  4. "_shards" : {
  5. "total" : 1,
  6. "successful" : 1,
  7. "skipped" : 0,
  8. "failed" : 0
  9. },
  10. "hits" : {
  11. "total" : {
  12. "value" : 4,
  13. "relation" : "eq"
  14. },
  15. "max_score" : null,
  16. "hits" : [ ]
  17. },
  18. "aggregations" : {
  19. "sum_column2" : {
  20. "value" : 6.0
  21. },
  22. "column1_count" : {
  23. "doc_count_error_upper_bound" : 0,
  24. "sum_other_doc_count" : 0,
  25. "buckets" : [
  26. {
  27. "key" : "1",
  28. "doc_count" : 2,
  29. "column2_avg" : {
  30. "value" : 2.0
  31. }
  32. },
  33. {
  34. "key" : "2",
  35. "doc_count" : 2,
  36. "column2_avg" : {
  37. "value" : 1.0
  38. }
  39. }
  40. ]
  41. },
  42. "unique_count" : {
  43. "value" : 2
  44. },
  45. "unique_sum_column2" : {
  46. "value" : 3.0
  47. }
  48. }
  49. }
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/盐析白兔/article/detail/799078
推荐阅读
相关标签
  

闽ICP备14008679号