  1. PUT /hotel
  2. {
  3. "mappings": {
  4. "properties": {
  5. "title":{"type": "text"},
  6. "city":{"type": "keyword"},
  7. "price":{"type": "double"},
  8. "create_time":{"type": "date","format": "yyyy-MM-dd HH:mm:ss"},
  9. "attachment":{"type": "text"},
  10. "full_room":{"type": "boolean"},
  11. "location":{"type": "geo_point"},
  12. "praise":{"type": "integer"}
  13. }
  14. }
  15. }


  1. POST /_bulk
  2. {"index":{"_index":"hotel","_id":"001"}}
  3. {"title":"java旅馆","city":"深圳","price":50.00,"create_time":"2022-08-05 00:00:00","location":{"lat":40.012312,"lon":116.497122},"praise":10}
  4. {"index":{"_index":"hotel","_id":"002"}}
  5. {"title":"python旅馆","city":"北京","price":50.00,"create_time":"2022-08-05 00:00:00","location":{"lat":40.012312,"lon":116.497122},"praise":10}
  6. {"index":{"_index":"hotel","_id":"003"}}
  7. {"title":"go旅馆","city":"上海","price":50.00,"create_time":"2022-08-05 00:00:00","location":{"lat":40.012312,"lon":116.497122},"praise":10}
  8. {"index":{"_index":"hotel","_id":"004"}}
  9. {"title":"C++旅馆","city":"广州","price":50.00,"create_time":"2022-08-05 00:00:00","location":{"lat":40.012312,"lon":116.497122},"praise":10}


  1. POST /hotel/_search
  2. {
  3. "_source": ["title","city"],
  4. "query": {
  5. "term": {
  6. "city": {
  7. "value": "深圳"
  8. }
  9. }
  10. }
  11. }


  1. {
  2. "took" : 361,
  3. "timed_out" : false,
  4. "_shards" : {
  5. "total" : 1,
  6. "successful" : 1,
  7. "skipped" : 0,
  8. "failed" : 0
  9. },
  10. "hits" : {
  11. "total" : {
  12. "value" : 1,
  13. "relation" : "eq"
  14. },
  15. "max_score" : 1.2039728,
  16. "hits" : [
  17. {
  18. "_index" : "hotel",
  19. "_type" : "_doc",
  20. "_id" : "001",
  21. "_score" : 1.2039728,
  22. "_source" : {
  23. "city" : "深圳",
  24. "title" : "java旅馆"
  25. }
  26. }
  27. ]
  28. }
  29. }



  1. @Test
  2. public void testQueryNeedFields() throws IOException {
  3. RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(Arrays.stream("".split(","))
  4. .map(host->{
  5. String[] split = host.split(":");
  6. String hostName = split[0];
  7. int port = Integer.parseInt(split[1]);
  8. return new HttpHost(hostName,port,HttpHost.DEFAULT_SCHEME_NAME);
  9. }).filter(Objects::nonNull).toArray(HttpHost[]::new)));
  10. SearchRequest request = new SearchRequest("hotel");
  11. SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
  12. sourceBuilder.query(new TermQueryBuilder("city","深圳"));
  13. sourceBuilder.fetchSource(new String[]{"title","city"},null);
  14. request.source(sourceBuilder);
  15. SearchResponse search = client.search(request, RequestOptions.DEFAULT);
  16. System.out.println(search.getHits());
  17. }


为提升搜索体验,需要给前端传递搜索匹配结果的文档条数,即需要对搜索结果进行计数。针对这个要求,ES提供了_count API功能,在该API中,用户提供query子句用于结果匹配,ES会返回匹配的文档条数。下面的DSL将返回城市为“北京”的旅馆个数:

  1. POST /hotel/_count
  2. {
  3. "query": {
  4. "term": {
  5. "city": {
  6. "value": "北京"
  7. }
  8. }
  9. }
  10. }


  1. {
  2. "count" : 1,
  3. "_shards" : {
  4. "total" : 1,
  5. "successful" : 1,
  6. "skipped" : 0,
  7. "failed" : 0
  8. }
  9. }


        在Java客户端中,通过CountRequest执行_count API,然后调用CountRequest对象的source()方法设置查询逻辑。countRequest.source()方法返回CountResponse对象,通过countResponse.getCount()方法可以得到匹配的文档条数。以下代码将和上面的DSL呈现相同的效果:

  1. @Test
  2. public void testCount() throws IOException {
  3. RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(Arrays.stream("".split(","))
  4. .map(host->{
  5. String[] split = host.split(":");
  6. String hostName = split[0];
  7. int port = Integer.parseInt(split[1]);
  8. return new HttpHost(hostName,port,HttpHost.DEFAULT_SCHEME_NAME);
  9. }).filter(Objects::nonNull).toArray(HttpHost[]::new)));
  10. CountRequest countRequest = new CountRequest("hotel");
  11. SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
  12. sourceBuilder.query(new TermQueryBuilder("city","深圳"));
  13. countRequest.source(sourceBuilder);
  14. CountResponse response = client.count(countRequest,RequestOptions.DEFAULT);
  15. System.out.println(response.getCount());
  16. }



  1. GET /hotel/_search
  2. {
  3. "from":0, //设置搜索起始位置
  4. "size": 2,//设置搜索返回的文档个数
  5. "query": {
  6. "term": {
  7. "city": {
  8. "value": "深圳"
  9. }
  10. }
  11. }
  12. }

在默认情况下,用户最多可以取得10 000个文档,即from为0时,size参数最大为10 000,如果请求超过该值,ES返回如下报错信息:

  1. {
  2. "error" : {
  3. "root_cause" : [
  4. {
  5. "type" : "illegal_argument_exception",
  6. "reason" : "Result window is too large, from + size must be less than or equal to: [10000] but was [10001]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
  7. }
  8. ],
  9. "type" : "search_phase_execution_exception",
  10. "reason" : "all shards failed",
  11. "phase" : "query",
  12. "grouped" : true,
  13. "failed_shards" : [
  14. {
  15. "shard" : 0,
  16. "index" : "hotel",
  17. "node" : "tiANekxXS_GtirH4DamrFA",
  18. "reason" : {
  19. "type" : "illegal_argument_exception",
  20. "reason" : "Result window is too large, from + size must be less than or equal to: [10000] but was [10001]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
  21. }
  22. }
  23. ],
  24. "caused_by" : {
  25. "type" : "illegal_argument_exception",
  26. "reason" : "Result window is too large, from + size must be less than or equal to: [10000] but was [10001]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting.",
  27. "caused_by" : {
  28. "type" : "illegal_argument_exception",
  29. "reason" : "Result window is too large, from + size must be less than or equal to: [10000] but was [10001]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
  30. }
  31. }
  32. },
  33. "status" : 400
  34. }

对于普通的搜索应用来说,size设为10 000已经足够用了。如果确实需要返回多于10 000条的数据,可以适当修改max_result_window的值。以下示例将hotel索引的最大窗口值修改为了20 000。

  1. PUT /hotel/_settings
  2. {
  3. "index":{
  4. "max_result_window":20000
  5. }
  6. }





        作为搜索引擎,ES更适合的场景是对数据进行搜索,而不是进行大规模的数据遍历。一般情况下,只需要返回前1000条数据即可,没有必要取到10 000条数据。如果确实有大规模数据遍历的需求,可以参考使用scroll模式或者考虑使用其他的存储引擎。


  1. @Test
  2. public void testQueryByPage() throws IOException {
  3. RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(Arrays.stream("".split(","))
  4. .map(host->{
  5. String[] split = host.split(":");
  6. String hostName = split[0];
  7. int port = Integer.parseInt(split[1]);
  8. return new HttpHost(hostName,port,HttpHost.DEFAULT_SCHEME_NAME);
  9. }).filter(Objects::nonNull).toArray(HttpHost[]::new)));
  10. SearchRequest request = new SearchRequest("hotel");
  11. SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
  12. searchSourceBuilder.query(new TermQueryBuilder("city","深圳"));
  13. searchSourceBuilder.from(20);
  14. searchSourceBuilder.size(10);
  15. request.source(searchSourceBuilder);
  16. client.search(request,RequestOptions.DEFAULT);
  17. }



  1. GET /hotel/_search
  2. {
  3. "profile": true,
  4. "query": {
  5. "match": {
  6. "title": "北京"
  7. }
  8. }
  9. }


  1. {
  2. "took" : 60,
  3. "timed_out" : false,
  4. "_shards" : {
  5. "total" : 1,
  6. "successful" : 1,
  7. "skipped" : 0,
  8. "failed" : 0
  9. },
  10. "hits" : {
  11. "total" : {
  12. "value" : 0,
  13. "relation" : "eq"
  14. },
  15. "max_score" : null,
  16. "hits" : [ ]
  17. },
  18. "profile" : {
  19. "shards" : [
  20. {
  21. "id" : "[tiANekxXS_GtirH4DamrFA][hotel][0]",
  22. "searches" : [
  23. {
  24. "query" : [
  25. {
  26. "type" : "BooleanQuery",
  27. "description" : "title:北 title:京",
  28. "time_in_nanos" : 1032417,
  29. "breakdown" : {
  30. "set_min_competitive_score_count" : 0,
  31. "match_count" : 0,
  32. "shallow_advance_count" : 0,
  33. "set_min_competitive_score" : 0,
  34. "next_doc" : 0,
  35. "match" : 0,
  36. "next_doc_count" : 0,
  37. "score_count" : 0,
  38. "compute_max_score_count" : 0,
  39. "compute_max_score" : 0,
  40. "advance" : 0,
  41. "advance_count" : 0,
  42. "score" : 0,
  43. "build_scorer_count" : 1,
  44. "create_weight" : 1023459,
  45. "shallow_advance" : 0,
  46. "create_weight_count" : 1,
  47. "build_scorer" : 8958
  48. },
  49. "children" : [
  50. {
  51. "type" : "TermQuery",
  52. "description" : "title:北",
  53. "time_in_nanos" : 182334,
  54. "breakdown" : {
  55. "set_min_competitive_score_count" : 0,
  56. "match_count" : 0,
  57. "shallow_advance_count" : 0,
  58. "set_min_competitive_score" : 0,
  59. "next_doc" : 0,
  60. "match" : 0,
  61. "next_doc_count" : 0,
  62. "score_count" : 0,
  63. "compute_max_score_count" : 0,
  64. "compute_max_score" : 0,
  65. "advance" : 0,
  66. "advance_count" : 0,
  67. "score" : 0,
  68. "build_scorer_count" : 1,
  69. "create_weight" : 179167,
  70. "shallow_advance" : 0,
  71. "create_weight_count" : 1,
  72. "build_scorer" : 3167
  73. }
  74. },
  75. {
  76. "type" : "TermQuery",
  77. "description" : "title:京",
  78. "time_in_nanos" : 15167,
  79. "breakdown" : {
  80. "set_min_competitive_score_count" : 0,
  81. "match_count" : 0,
  82. "shallow_advance_count" : 0,
  83. "set_min_competitive_score" : 0,
  84. "next_doc" : 0,
  85. "match" : 0,
  86. "next_doc_count" : 0,
  87. "score_count" : 0,
  88. "compute_max_score_count" : 0,
  89. "compute_max_score" : 0,
  90. "advance" : 0,
  91. "advance_count" : 0,
  92. "score" : 0,
  93. "build_scorer_count" : 1,
  94. "create_weight" : 14792,
  95. "shallow_advance" : 0,
  96. "create_weight_count" : 1,
  97. "build_scorer" : 375
  98. }
  99. }
  100. ]
  101. }
  102. ],
  103. "rewrite_time" : 183625,
  104. "collector" : [
  105. {
  106. "name" : "SimpleTopScoreDocCollector",
  107. "reason" : "search_top_hits",
  108. "time_in_nanos" : 32000
  109. }
  110. ]
  111. }
  112. ],
  113. "aggregations" : [ ]
  114. }
  115. ]
  116. }
  117. }



  1. id表示分片的唯一标识,它的组成形式为[nodeID][indexName][shardID]。
  2. searches以数组的形式存在,因为有的搜索请求会跨多个索引进行搜索。每一个search子元素即为在同一个索引中的子查询,此处不仅返回了该search子元素耗时的信息,而且还返回了搜索“北京”的详细策略,即被拆分成“title:北”和“title:京”两个子查询。同理,children子元素给出了“title:北”“title:京”的耗时和详细搜索步骤的耗时,此处不再赘述。
  3. aggregations只有在进行聚合运算时才有内容

        上面只是一个很简单的例子,如果查询比较复杂或者命中的分片比较多,profile返回的信息将特别冗长。在这种情况下,用户进行性能剖析的效率将非常低。为此,Kibana提供了可视化的profile功能,该功能建立在ES的profile功能基础上。在Kibana的Dev Tools界面中单击Search Profiler链接,就可以使用可视化的profile了,其区域布局如下图所示:



  1. GET /${index_name}/_explain/${doc_id}
  2. {
  3. "query":{
  4. ...
  5. }
  6. }


  1. GET /hotel/_explain/002
  2. {
  3. "query":{
  4. "match": {
  5. "title": "python"
  6. }
  7. }
  8. }


  1. {
  2. "_index" : "hotel",
  3. "_type" : "_doc",
  4. "_id" : "002",
  5. "matched" : true,
  6. "explanation" : {
  7. "value" : 1.2039728,
  8. "description" : "weight(title:python in 1) [PerFieldSimilarity], result of:",
  9. "details" : [
  10. {
  11. "value" : 1.2039728,
  12. "description" : "score(freq=1.0), computed as boost * idf * tf from:",
  13. "details" : [
  14. {
  15. "value" : 2.2,
  16. "description" : "boost",
  17. "details" : [ ]
  18. },
  19. {
  20. "value" : 1.2039728,
  21. "description" : "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
  22. "details" : [
  23. {
  24. "value" : 1,
  25. "description" : "n, number of documents containing term",
  26. "details" : [ ]
  27. },
  28. {
  29. "value" : 4,
  30. "description" : "N, total number of documents with field",
  31. "details" : [ ]
  32. }
  33. ]
  34. },
  35. {
  36. "value" : 0.45454544,
  37. "description" : "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
  38. "details" : [
  39. {
  40. "value" : 1.0,
  41. "description" : "freq, occurrences of term within document",
  42. "details" : [ ]
  43. },
  44. {
  45. "value" : 1.2,
  46. "description" : "k1, term saturation parameter",
  47. "details" : [ ]
  48. },
  49. {
  50. "value" : 0.75,
  51. "description" : "b, length normalization parameter",
  52. "details" : [ ]
  53. },
  54. {
  55. "value" : 3.0,
  56. "description" : "dl, length of field",
  57. "details" : [ ]
  58. },
  59. {
  60. "value" : 3.0,
  61. "description" : "avgdl, average length of field",
  62. "details" : [ ]
  63. }
  64. ]
  65. }
  66. ]
  67. }
  68. ]
  69. }
  70. }



  1. {
  2. "_index" : "hotel",
  3. "_type" : "_doc",
  4. "_id" : "001",
  5. "matched" : false,
  6. "explanation" : {
  7. "value" : 0.0,
  8. "description" : "no matching term",
  9. "details" : [ ]
  10. }
  11. }

