赞
踩
使用query关键字进行检索,倾向于相关度搜索,故需要计算评分。搜索是Elasticsearch最关键和重要的部分。
概念:相关度评分用于对搜索结果排序,评分越高则认为其结果和搜索的预期值相关度越高,即越符合搜索预期值。在7.x之前相关度评分默认使用TF/IDF算法计算而来,7.x之后默认为BM25。在核心知识篇不必关心相关评分的具体原理,只需知晓其概念即可。
排序:相关度评分为搜索结果的排序依据,默认情况下评分越高,则结果越靠前。
禁用_source:
好处:节省存储开销
坏处:
总结:如果只是为了节省磁盘,可以压缩索引比禁用_source更好。
数据源过滤器:
Including:结果中返回哪些field
Excluding:结果中不要返回哪些field,不返回的field不代表不能通过该字段进行检索,因为元数据不存在不代表索引不存在
PUT product
{
"mappings": {
"_source": {
"includes": [
"name",
"price"
],
"excludes": [
"desc",
"tags"
]
}
}
}
"_source": "false",
"_source": "obj.*",
"_source": [ "obj1.\*", "obj2.\*" ],
"_source": {
"includes": [ "obj1.\*", "obj2.\*" ],
"excludes": [ "*.description" ]
}
GET /product/_search
GET /product/_search?q=name:xiaomi
GET /product/_search?from=0&size=2&sort=price:asc
GET /product/_search?q=date:2021-06-01
GET /product/_search?q=2021-06-01
GET index/_search
{
"query": {
***
}
}
term和match_phrase区别:
match_phrase 会将检索关键词分词, match_phrase的分词结果必须在被检索字段的分词中都包含,而且顺序必须相同,而且默认必须都是连续的
term搜索不会将搜索词分词
term和keyword区别
term是对于搜索词不分词,
keyword是字段类型,是对于source data中的字段值不分词
GET _search
{
"query": {
"constant_score": {
"filter": {
"term": {
"status": "active"
}
}
}
}
}
filter:query和filter的主要区别在: filter是结果导向的而query是过程导向。query倾向于“当前文档和查询的语句的相关度”而filter倾向于“当前文档和查询的条件是不是相符”。即在查询过程中,query是要对查询的每个结果计算相关性得分的,而filter不会。另外filter有相应的缓存机制,可以提高查询效率。
bool:可以组合多个查询条件,bool查询也是采用more_matches_is_better的机制,因此满足must和should子句的文档将会合并起来计算分值
minimum_should_match:参数指定should返回的文档必须匹配的子句的数量或百分比。如果bool查询包含至少一个should子句,而没有must或 filter子句,则默认值为1。否则,默认值为0
脚本:
测试用的5号文档
POST /person1/_doc/5
{
"name":"张三5号",
"age":18,
"address":"北京海淀区"
}
批量操作文本
#批量操作
#1.删除5号
#新增8号
#更新2号 name为2号
POST _bulk
{"delete":{"_index":"person1","_id":"5"}}
{"create":{"_index":"person1","_id":"8"}}
{"name":"八号","age":18,"address":"北京"}
{"update":{"_index":"person1","_id":"2"}}
{"doc":{"name":"2号"}}
结果
{ "took" : 51, "errors" : true, "items" : [ { "delete" : { "_index" : "person1", "_type" : "_doc", "_id" : "5", "_version" : 2, "result" : "deleted", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 6, "_primary_term" : 2, "status" : 200 } }, { "create" : { "_index" : "person1", "_type" : "_doc", "_id" : "8", "_version" : 1, "result" : "created", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 7, "_primary_term" : 2, "status" : 201 } }, { "update" : { "_index" : "person1", "_type" : "_doc", "_id" : "2", "_version" : 2, "result" : "updated", "_shards" : { "total" : 2, "successful" : 1, "failed" : 0 }, "_seq_no" : 10, "_primary_term" : 2, "status" : 200 } } ] }
/** * Bulk 批量操作 */ @Test public void test2() throws IOException { //创建bulkrequest对象,整合所有操作 BulkRequest bulkRequest =new BulkRequest(); /* # 1. 删除5号记录 # 2. 添加6号记录 # 3. 修改3号记录 名称为 “三号” */ //添加对应操作 //1. 删除5号记录 DeleteRequest deleteRequest=new DeleteRequest("person1","5"); bulkRequest.add(deleteRequest); //2. 添加6号记录 Map<String, Object> map=new HashMap<>(); map.put("name","六号"); IndexRequest indexRequest=new IndexRequest("person1").id("6").source(map); bulkRequest.add(indexRequest); //3. 修改3号记录 名称为 “三号” Map<String, Object> mapUpdate=new HashMap<>(); mapUpdate.put("name","三号"); UpdateRequest updateRequest=new UpdateRequest("person1","3").doc(mapUpdate); bulkRequest.add(updateRequest); //执行批量操作 BulkResponse response = client.bulk(bulkRequest, RequestOptions.DEFAULT); System.out.println(response.status()); }
PUT goods { "mappings": { "properties": { "title": { "type": "text", "analyzer": "ik_smart" }, "price": { "type": "double" }, "createTime": { "type": "date" }, "categoryName": { "type": "keyword" }, "brandName": { "type": "keyword" }, "spec": { "type": "object" }, "saleNum": { "type": "integer" }, "stock": { "type": "integer" } } } }
需要链接数据库,然后建立对应字段,因为查询来的数据是String,需要先解析成对象,然后把整个good对象转成mapper放入其中
good
package com.ybb.domain; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.annotation.JSONField; import org.springframework.beans.factory.annotation.Value; import java.io.Serializable; import java.util.Date; import java.util.Map; /** * Description : * Version :1.0 */ public class Good implements Serializable { private Integer id; private String title; private Integer price; private String stock; private Integer saleNum; private Date createTime; private String categoryName; private String brandName; private Map spec; @JSONField(serialize = false) private String specStr; public Integer getId() { return id; } public void setId(Integer id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public Integer getPrice() { return price; } public void setPrice(Integer price) { this.price = price; } public String getStock() { return stock; } public void setStock(String stock) { this.stock = stock; } public Integer getSaleNum() { return saleNum; } public void setSaleNum(Integer saleNum) { this.saleNum = saleNum; } public Date getCreateTime() { return createTime; } public void setCreateTime(Date createTime) { this.createTime = createTime; } public String getCategoryName() { return categoryName; } public void setCategoryName(String categoryName) { this.categoryName = categoryName; } public String getBrandName() { return brandName; } public void setBrandName(String brandName) { this.brandName = brandName; } public Map getSpec() { return JSON.parseObject(specStr,Map.class); } public void setSpec(Map spec) { this.spec = spec; } public String getSpecStr() { return specStr; } public void setSpecStr(String specStr) { this.specStr = specStr; } @Override public String toString() { return "Good{" + "id=" + id + ", title='" + title + '\'' + ", price=" + price + ", stock='" + stock + '\'' + ", saleNum=" + saleNum + ", createTime=" + createTime + ", categoryName='" + categoryName + '\'' + ", brandName='" + brandName + '\'' + ", spec=" + spec + ", specStr='" + specStr + '\'' + '}'; } }
重点在于getSpec这,直接把specStr查询映射的结果拿到
@Mapper
public interface GoodsMapper {
List<Good>findAll();
}
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <mapper namespace="com.ybb.mapper.GoodsMapper"> <select id="findAll" resultType="com.ybb.domain.Good"> SELECT id, title, price, saleNum, createTime, categoryName, brandName, spec as specStr FROM goods </select> </mapper>
正式封装
@Test
public void test3() throws IOException {
List<Good> goodList = goodsMapper.findAll();
BulkRequest bulkRequest = new BulkRequest();
for (Good good : goodList) {
String data = JSON.toJSONString(good);
IndexRequest goods = new IndexRequest("goods").source(data, XContentType.JSON);
bulkRequest.add(goods);
}
BulkResponse bulk = client.bulk(bulkRequest, RequestOptions.DEFAULT);
System.out.println(bulk.status());
}
自测数据 练习使用
###################################### Bulk批量操作 # 创建索引 PUT person { "mappings": { "properties": { "name": { "type": "keyword" }, "age": { "type": "integer" }, "address": { "type": "text", "analyzer": "ik_smart" } } } } # 添加2条测试文档 POST /person/_doc/1 { "name":"ID为 1号 的数据", "age":18, "address":"北京海淀区" } POST /person/_doc/2 { "name":"ID为 2号 的数据", "age":18, "address":"北京海淀区" } # 查询所有文档 GET person/_search # 批量操作需求 # 1.删除1号 # 2.新增3号 # 3.更新2号 name为222号 POST _bulk {"delete":{"_index":"person","_id":"1"}} {"create":{"_index":"person","_id":"3"}} {"name":"ID为 3号 的数据","age":18,"address":"北京"} {"update":{"_index":"person","_id":"2"}} {"doc":{"name":"222号"}} ############################################### ES高级查询 # 创建索引 # title:商品标题 # price:商品价格 # createTime:创建时间 # categoryName:分类名称。如:家电,手机 # brandName:品牌名称。如:华为,小米 # spec: 商品规格。如: spec:{"屏幕尺寸","5寸","内存大小","128G"} # saleNum:销量 # stock:库存量 PUT goods { "mappings": { "properties": { "title": { "type": "text", "analyzer": "ik_smart" }, "price": { "type": "double" }, "createTime": { "type": "date" }, "categoryName": { "type": "keyword" }, "brandName": { "type": "keyword" }, "spec": { "type": "object" }, "saleNum": { "type": "integer" }, "stock": { "type": "integer" } } } } # 添加一条测试文档 POST goods/_doc/1 { "title": "小米手机", "price": 1000, "createTime": "2019-12-01", "categoryName": "手机", "brandName": "小米", "saleNum": 3000, "stock": 10000, "spec": { "网络制式": "移动4G", "屏幕尺寸": "4.5" } } # 查询所有文档 GET /goods/_search # 删除索引 DELETE /goods # 根据ID进行查询 GET /goods/_doc/536563 # 查询所有 GET /goods/_search # 条件查询 # 根据title查询 GET goods/_search { "query": { "term": { "price": { "value": "1699" } } } } # 根据品牌名称查询 GET goods/_search { "query": { "term": { "brandName": { "value": "三星" } } } } # 模糊查询 GET goods/_search { "query": { "match": { "title": "钛金灰" } } } # 分页查询 # 默认情况下,es一次展示10条数据,通过from和size来控制分页 # 分页是在查询结果的基础上进行分页展示 所以只要有查询结果的都可以添加分页 GET goods/_search { "query": { "match_all": {} }, "from": 0, "size": 100 } GET goods/_search { "from": 0, "size": 3 } # text:会分词,不支持聚合 # keyword:不会分词,将全部内容作为一个词条,支持聚合 # term查询:不会对查询条件进行分词 GET goods/_search { "query": { "term": { "title": { "value": "华为" } } } } # categoryName没有分词存储 使用term查询的时候必须要完全匹配 GET goods/_search { "query": { "term": { "categoryName": { "value": "华为" } } } } # match查询: # 会对查询条件进行分词 # 然后将分词后的查询条件和词条进行等值匹配 # 默认取并集(OR) # match查询 GET goods/_search { "query": { "match": { "title": { "query": "华为手机", "operator": "and" } } }, "size": 500 } # wildcard查询: # 会对查询条件进行分词 # 可以使用通配符 ?(任意单个字符) 和 * (0个或多个字符) # wildcard 查询。查询条件分词,模糊查询 GET goods/_search { "query": { "wildcard": { "title": { "value": "华*" } } },"size": 500 } GET /_analyze { "text": ["华为手机"], "analyzer": "ik_max_word" } # 正则查询 GET goods/_search { "query": { "regexp": { "title": "\\w+(.)*" } } } # 前缀查询 GET goods/_search { "query": { "prefix": { "brandName": { "value": "中国" } } } } # 范围查询 GET goods/_search { "query": { "range": { "price": { "gte": 2000, "lte": 3000 } } }, "sort": [ { "price": { "order": "desc" } } ] } # queryString 多条件查询 # 会对查询条件进行分词 # 然后将分词后的查询条件和词条进行等值匹配 # 默认取并集(OR) # 可以指定多个查询字段 GET goods/_search { "query": { "query_string": { "fields": [ "title","categoryName","brandName" ], "query": "华为 AND 手机" } } } # query_string:不识别query中的连接符(or 、and) GET goods/_search { "query": { "simple_query_string": { "fields": [ "title", "categoryName", "brandName" ], "query": "华为 AND 手机" } } } # boolQuery:对多个查询条件连接。连接方式: # must和filter配合使用时,max_score(得分)是显示的 # must 默认数组形式 # must(and):条件必须成立 # must_not(not):条件必须不成立 # should(or):条件可以成立 # filter:条件必须成立,性能比must高。不会计算得分# # 得分:即条件匹配度,匹配度越高,得分越高 GET goods/_search { "query": { "bool": { "must": [ { "term": { "brandName": { "value": "华为" } } } ], "filter": [ { "term": { "title": "手机" } }, { "range": { "price": { "gte": 2000, "lte": 3000 } } } ] } } } GET goods/_search { "query": { "bool": { "filter": [ { "term": { "brandName": { "value": "华为" } } } ] } } } # 聚合查询 # 指标聚合:相当于MySQL的聚合函数。max、min、avg、sum等 # 桶聚合:相当于MySQL的 group by 操作。不要对text类型的数据进行分组,会失败。 GET goods/_search { "query": { "match": { "title": "手机" } }, "aggs": { "max_price": { "max": { "field": "price" } } } } # 桶聚合 分组 GET goods/_search { "query": { "match": { "title": "手机" } }, "aggs": { "goods_brands": { "terms": { "field": "brandName", "size": 100 } } } } # 高亮三要素: # 高亮字段 # 前缀 # 后缀 # # 默认前后缀 :<em></em> GET goods/_search { "query": { "match": { "title": "电视" } }, "highlight": { "fields": { "title": { "pre_tags": "<font color='red'>", "post_tags": "</font>" } } } } ############################################ 重建索引 # 查询别名 默认别名无法查看,默认别名同索引名 GET goods/_alias/ # 新建student_index_v1。索引名称必须全部小写 PUT student_index_v1 { "mappings": { "properties": { "birthday": { "type": "date" } } } } GET student_index_v1 PUT student_index_v1/_doc/1 { "birthday": "1999-11-11" } GET student_index_v1/_search PUT student_index_v1/_doc/1 { "birthday": "1999年11月11日" } # 业务变更了,需要改变birthday字段的类型为text # 1. 创建新的索引 student_index_v2 # 2. 将student_index_v1 数据拷贝到 student_index_v2 # 创建新的索引 student_index_v2 PUT student_index_v2 { "mappings": { "properties": { "birthday": { "type": "text" } } } } # 将student_index_v1 数据拷贝到 student_index_v2 # _reindex 拷贝数据 POST _reindex { "source": { "index": "student_index_v1" }, "dest": { "index": "student_index_v2" } } GET student_index_v2/_search PUT student_index_v2/_doc/2 { "birthday": "1999年11月11日" } # 思考: 现在java代码中操作es,还是使用的实student_index_v1老的索引名称。 # 1. 改代码(不推荐) # 2. 索引别名(推荐) # 步骤: # 先删除student_index_v1 DELETE student_index_v1 # 给student_index_v2起个别名 student_index_v1 POST student_index_v2/_alias/student_index_v1 GET student_index_v1/_search GET student_index_v2/_search
# 默认情况下,es一次展示10条数据,通过from和size来控制分页
# 查询结果详解
GET goods/_search
{
"query": {
"match_all": {}
},
"from": 0,
"size": 100
}
GET goods
/** * 查询所有 * 1. matchAll * 2. 将查询结果封装为Goods对象,装载到List中 * 3. 分页。默认显示10条 */ @Test public void matchAll() throws IOException { //2. 构建查询请求对象,指定查询的索引名称 SearchRequest searchRequest=new SearchRequest("goods"); //4. 创建查询条件构建器SearchSourceBuilder SearchSourceBuilder sourceBuilder=new SearchSourceBuilder(); //6. 查询条件 QueryBuilder queryBuilder= QueryBuilders.matchAllQuery(); //5. 指定查询条件 sourceBuilder.query(queryBuilder); //3. 添加查询条件构建器 SearchSourceBuilder searchRequest.source(sourceBuilder); // 8 . 添加分页信息 不设置 默认10条 // sourceBuilder.from(0); // sourceBuilder.size(100); //1. 查询,获取查询结果 SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); //7. 获取命中对象 SearchHits SearchHits hits = searchResponse.getHits(); //7.1 获取总记录数 Long total= hits.getTotalHits().value; System.out.println("总数:"+total); //7.2 获取Hits数据 数组 SearchHit[] hits1 = hits.getHits(); //获取json字符串格式的数据 List<Goods> goodsList = new ArrayList<>(); for (SearchHit searchHit : hits1) { String sourceAsString = searchHit.getSourceAsString(); //转为java对象 Goods goods = JSON.parseObject(sourceAsString, Goods.class); goodsList.add(goods); } for (Goods goods : goodsList) { System.out.println(goods); } }
term查询和字段类型有关系,首先回顾一下ElasticSearch两个数据类型
ElasticSearch两个数据类型
text:会分词,不支持聚合
keyword:不会分词,将全部内容作为一个词条,支持聚合
term查询:不会对查询条件进行分词。
GET goods/_search
{
"query": {
"term": {
"title": {
"value": "华为"
}
}
}
}
term查询,查询text类型字段时,只有其中的单词相匹配都会查到,text字段会对数据进行分词
例如:查询title 为“华为”的,title type 为text
查询categoryName 字段时,categoryName字段为keyword ,keyword:不会分词,将全部内容作为一个词条,
即完全匹配,才能查询出结果
GET goods/_search
{
"query": {
"term": {
"categoryName": {
"value": "华为手机"
}
}
}
}
/** * termQuery:词条查询 */ @Test public void testTermQuery() throws IOException { SearchRequest searchRequest = new SearchRequest("goods"); SearchSourceBuilder sourceBulider = new SearchSourceBuilder(); QueryBuilder query = QueryBuilders.termQuery("title","华为");//term词条查询 sourceBulider.query(query); searchRequest.source(sourceBulider); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); SearchHits searchHits = searchResponse.getHits(); //获取记录数 long value = searchHits.getTotalHits().value; System.out.println("总记录数:"+value); List<Goods> goodsList = new ArrayList<>(); SearchHit[] hits = searchHits.getHits(); for (SearchHit hit : hits) { String sourceAsString = hit.getSourceAsString(); //转为java Goods goods = JSON.parseObject(sourceAsString, Goods.class); goodsList.add(goods); } for (Goods goods : goodsList) { System.out.println(goods); } }
match查询:
•会对查询条件进行分词。
•然后将分词后的查询条件和词条进行等值匹配
•默认取并集(OR)
# match查询
GET goods/_search
{
"query": {
"match": {
"title": "华为手机"
}
},
"size": 500
}
match 的默认搜索(or 并集)
例如:华为手机,会分词为 “华为”,“手机” 只要出现其中一个词条都会搜索到
match的 and(交集) 搜索
例如:例如:华为手机,会分词为 “华为”,“手机” 但要求“华为”,和“手机”同时出现在词条中
/** * matchQuery:词条分词查询 */ @Test public void testMatchQuery() throws IOException { SearchRequest searchRequest = new SearchRequest("goods"); SearchSourceBuilder sourceBulider = new SearchSourceBuilder(); MatchQueryBuilder query = QueryBuilders.matchQuery("title", "华为手机"); query.operator(Operator.AND);//求并集 sourceBulider.query(query); searchRequest.source(sourceBulider); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); SearchHits searchHits = searchResponse.getHits(); //获取记录数 long value = searchHits.getTotalHits().value; System.out.println("总记录数:"+value); List<Goods> goodsList = new ArrayList<>(); SearchHit[] hits = searchHits.getHits(); for (SearchHit hit : hits) { String sourceAsString = hit.getSourceAsString(); //转为java Goods goods = JSON.parseObject(sourceAsString, Goods.class); goodsList.add(goods); } for (Goods goods : goodsList) { System.out.println(goods); } }
总结:
wildcard查询:会对查询条件进行分词。还可以使用通配符 ?(任意单个字符) 和 * (0个或多个字符)
"*华*" 包含华字的
"华*" 华字后边多个字符
"华?" 华字后边多个字符
"*华"或"?华" 会引发全表(全索引)扫描 注意效率问题
# wildcard 查询。查询条件分词,模糊查询
GET goods/_search
{
"query": {
"wildcard": {
"title": {
"value": "华*"
}
}
}
}
/** * 模糊查询:WildcardQuery */ @Test public void testWildcardQuery() throws IOException { SearchRequest searchRequest = new SearchRequest("goods"); SearchSourceBuilder sourceBulider = new SearchSourceBuilder(); WildcardQueryBuilder query = QueryBuilders.wildcardQuery("title", "华*"); sourceBulider.query(query); searchRequest.source(sourceBulider); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); SearchHits searchHits = searchResponse.getHits(); //获取记录数 long value = searchHits.getTotalHits().value; System.out.println("总记录数:"+value); List<Goods> goodsList = new ArrayList<>(); SearchHit[] hits = searchHits.getHits(); for (SearchHit hit : hits) { String sourceAsString = hit.getSourceAsString(); //转为java Goods goods = JSON.parseObject(sourceAsString, Goods.class); goodsList.add(goods); } for (Goods goods : goodsList) { System.out.println(goods); } }
\W:匹配包括下划线的任何单词字符,等价于 [A-Z a-z 0-9_] 开头的反斜杠是转义符
+号多次出现
(.)*为任意字符
正则查询取决于正则表达式的效率
GET goods/_search
{
"query": {
"regexp": {
"title": "\\w+(.)*"
}
}
}
RegexpQueryBuilder query = QueryBuilders.regexpQuery("title", "\\w+(.)*");
对keyword类型支持比较好
# 前缀查询 对keyword类型支持比较好
GET goods/_search
{
"query": {
"prefix": {
"brandName": {
"value": "三"
}
}
}
}
PrefixQueryBuilder query = QueryBuilders.prefixQuery("brandName", "三");
//模糊查询
WildcardQueryBuilder query = QueryBuilders.wildcardQuery("title", "华*");//华后多个字符
//正则查询
RegexpQueryBuilder query = QueryBuilders.regexpQuery("title", "\\w+(.)*");
//前缀查询
PrefixQueryBuilder query = QueryBuilders.prefixQuery("brandName", "三");
# 范围查询 GET goods/_search { "query": { "range": { "price": { "gte": 2000, "lte": 3000 } } }, "sort": [ { "price": { "order": "desc" } } ] }
/** * 1. 范围查询:rangeQuery * 2. 排序 */ @Test public void testRangeQuery() throws IOException { SearchRequest searchRequest = new SearchRequest("goods"); SearchSourceBuilder sourceBulider = new SearchSourceBuilder(); //范围查询 RangeQueryBuilder query = QueryBuilders.rangeQuery("price"); //指定下限 query.gte(2000); //指定上限 query.lte(3000); sourceBulider.query(query); //排序 sourceBulider.sort("price", SortOrder.DESC); searchRequest.source(sourceBulider); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); SearchHits searchHits = searchResponse.getHits(); //获取记录数 long value = searchHits.getTotalHits().value; System.out.println("总记录数:"+value); List<Goods> goodsList = new ArrayList<>(); SearchHit[] hits = searchHits.getHits(); for (SearchHit hit : hits) { String sourceAsString = hit.getSourceAsString(); //转为java Goods goods = JSON.parseObject(sourceAsString, Goods.class); goodsList.add(goods); } for (Goods goods : goodsList) { System.out.println(goods); } }
queryString 多条件查询
•会对查询条件进行分词。
•然后将分词后的查询条件和词条进行等值匹配
•默认取并集(OR)
•可以指定多个查询字段
query_string:识别query中的连接符(or 、and)
# queryString
GET goods/_search
{
"query": {
"query_string": {
"fields": ["title","categoryName","brandName"],
"query": "华为 AND 手机"
}
}
}
simple_query_string:不识别query中的连接符(or 、and),查询时会将 “华为”、“and”、“手机”分别进行查询
GET goods/_search
{
"query": {
"simple_query_string": {
"fields": ["title","categoryName","brandName"],
"query": "华为 AND 手机"
}
}
}
query_string:有default_operator连接符的脚本
GET goods/_search
{
"query": {
"query_string": {
"fields": ["title","brandName","categoryName"],
"query": "华为手机 "
, "default_operator": "AND"
}
}
}
java代码
QueryStringQueryBuilder query = QueryBuilders.queryStringQuery("华为手机").field("title").field("categoryName")
.field("brandName").defaultOperator(Operator.AND);
simple_query_string:有default_operator连接符的脚本
GET goods/_search
{
"query": {
"simple_query_string": {
"fields": ["title","brandName","categoryName"],
"query": "华为手机 "
, "default_operator": "OR"
}
}
}
注意:query中的or and 是查询时 匹配条件是否同时出现----or 出现一个即可,and 两个条件同时出现
default_operator的or and 是对结果进行 并集(or)、交集(and)
boolQuery:对多个查询条件连接。连接方式:
•must(and):条件必须成立
•must_not(not):条件必须不成立
•should(or):条件可以成立
•filter:条件必须成立,性能比must高。不会计算得分
**得分:**即条件匹配度,匹配度越高,得分越高
# boolquery #must和filter配合使用时,max_score(得分)是显示的 #must 默认数组形式 GET goods/_search { "query": { "bool": { "must": [ { "term": { "brandName": { "value": "华为" } } } ], "filter":[ { "term": { "title": "手机" } }, { "range":{ "price": { "gte": 2000, "lte": 3000 } } } ] } } } #filter 单独使用 filter可以是单个条件,也可多个条件(数组形式) GET goods/_search { "query": { "bool": { "filter": [ { "term": { "brandName": { "value": "华为" } } } ] } } }
布尔查询:boolQuery
must 、filter为连接方式
term、match为不同的查询方式
//1.构建boolQuery BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); //2.构建各个查询条件 //2.1 查询品牌名称为:华为 TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("brandName", "华为"); boolQuery.must(termQueryBuilder); //2.2. 查询标题包含:手机 MatchQueryBuilder matchQuery = QueryBuilders.matchQuery("title", "手机"); boolQuery.filter(matchQuery); //2.3 查询价格在:2000-3000 RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("price"); rangeQuery.gte(2000); rangeQuery.lte(3000); boolQuery.filter(rangeQuery); sourceBuilder.query(boolQuery);
•指标聚合:相当于MySQL的聚合函数。max、min、avg、sum等
•桶聚合:相当于MySQL的 group by 操作。不要对text类型的数据进行分组,会失败。
# 聚合查询 # 指标聚合 聚合函数 GET goods/_search { "query": { "match": { "title": "手机" } }, "aggs": { "max_price": { "max": { "field": "price" } } } } # 桶聚合 分组 GET goods/_search { "query": { "match": { "title": "手机" } }, "aggs": { "goods_brands": { "terms": { "field": "brandName", "size": 100 } } } }
聚合查询:桶聚合,分组查询
/** * 聚合查询:桶聚合,分组查询 * 1. 查询title包含手机的数据 * 2. 查询品牌列表 */ @Test public void testAggQuery() throws IOException { SearchRequest searchRequest=new SearchRequest("goods"); SearchSourceBuilder sourceBuilder=new SearchSourceBuilder(); //1. 查询title包含手机的数据 MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("title", "手机"); sourceBuilder.query(queryBuilder); //2. 查询品牌列表 只展示前100条 AggregationBuilder aggregation=AggregationBuilders.terms("goods_brands").field("brandName").size(100); sourceBuilder.aggregation(aggregation); searchRequest.source(sourceBuilder); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); //7. 获取命中对象 SearchHits SearchHits hits = searchResponse.getHits(); //7.1 获取总记录数 Long total= hits.getTotalHits().value; System.out.println("总数:"+total); // aggregations 对象 Aggregations aggregations = searchResponse.getAggregations(); //将aggregations 转化为map Map<String, Aggregation> aggregationMap = aggregations.asMap(); //通过key获取goods_brands 对象 使用Aggregation的子类接收 buckets属性在Terms接口中体现 // Aggregation goods_brands1 = aggregationMap.get("goods_brands"); Terms goods_brands =(Terms) aggregationMap.get("goods_brands"); //获取buckets 数组集合 List<? extends Terms.Bucket> buckets = goods_brands.getBuckets(); Map<String,Object>map=new HashMap<>(); //遍历buckets key 属性名,doc_count 统计聚合数 for (Terms.Bucket bucket : buckets) { System.out.println(bucket.getKey()); map.put(bucket.getKeyAsString(),bucket.getDocCount()); } System.out.println(map); }
高亮三要素:
•高亮字段
•前缀
•后缀
默认前后缀 :em
<em>手机</em>
GET goods/_search { "query": { "match": { "title": "电视" } }, "highlight": { "fields": { "title": { "pre_tags": "<font color='red'>", "post_tags": "</font>" } } } }
实施步骤:
高亮查询:
1. 设置高亮
高亮字段
前缀
后缀
2. 将高亮了的字段数据,替换原有数据
/** * * 高亮查询: * 1. 设置高亮 * * 高亮字段 * * 前缀 * * 后缀 * 2. 将高亮了的字段数据,替换原有数据 */ @Test public void testHighLightQuery() throws IOException { SearchRequest searchRequest = new SearchRequest("goods"); SearchSourceBuilder sourceBulider = new SearchSourceBuilder(); // 1. 查询title包含手机的数据 MatchQueryBuilder query = QueryBuilders.matchQuery("title", "手机"); sourceBulider.query(query); //设置高亮 HighlightBuilder highlighter = new HighlightBuilder(); //设置三要素 highlighter.field("title"); //设置前后缀标签 highlighter.preTags("<font color='red'>"); highlighter.postTags("</font>"); //加载已经设置好的高亮配置 sourceBulider.highlighter(highlighter); searchRequest.source(sourceBulider); SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); SearchHits searchHits = searchResponse.getHits(); //获取记录数 long value = searchHits.getTotalHits().value; System.out.println("总记录数:"+value); List<Goods> goodsList = new ArrayList<>(); SearchHit[] hits = searchHits.getHits(); for (SearchHit hit : hits) { String sourceAsString = hit.getSourceAsString(); //转为java Goods goods = JSON.parseObject(sourceAsString, Goods.class); // 获取高亮结果,替换goods中的title Map<String, HighlightField> highlightFields = hit.getHighlightFields(); HighlightField HighlightField = highlightFields.get("title"); Text[] fragments = HighlightField.fragments(); //highlight title替换 替换goods中的title goods.setTitle(fragments[0].toString()); goodsList.add(goods); } for (Goods goods : goodsList) { System.out.println(goods); } }
#查询别名 默认别名无法查看,默认别名同索引名
GET goods/_alias/
#结果
{
"goods" : {
"aliases" : { }
}
}
1.新建student_index_v1索引
# -------重建索引----------- # 新建student_index_v1。索引名称必须全部小写 PUT student_index_v1 { "mappings": { "properties": { "birthday":{ "type": "date" } } } } #查看 student_index_v1 结构 GET student_index_v1 #添加数据 PUT student_index_v1/_doc/1 { "birthday":"1999-11-11" } #查看数据 GET student_index_v1/_search #添加数据 PUT student_index_v1/_doc/1 { "birthday":"1999年11月11日" }
2.重建索引:将student_index_v1 数据拷贝到 student_index_v2
# 业务变更了,需要改变birthday字段的类型为text # 1. 创建新的索引 student_index_v2 # 2. 将student_index_v1 数据拷贝到 student_index_v2 # 创建新的索引 student_index_v2 PUT student_index_v2 { "mappings": { "properties": { "birthday":{ "type": "text" } } } } # 将student_index_v1 数据拷贝到 student_index_v2 # _reindex 拷贝数据 POST _reindex { "source": { "index": "student_index_v1" }, "dest": { "index": "student_index_v2" } } GET student_index_v2/_search PUT student_index_v2/_doc/2 { "birthday":"1999年11月11日" }
3.创建索引库别名:
注意:DELETE student_index_v1 这一操作将删除student_index_v1索引库,并不是删除别名
# 思考: 现在java代码中操作es,还是使用的实student_index_v1老的索引名称。 # 1. 改代码(不推荐) # 2. 索引别名(推荐) # 步骤: # 0. 先删除student_index_v1 # 1. 给student_index_v2起个别名 student_index_v1 # 先删除student_index_v1 #DELETE student_index_v1 这一操作将删除student_index_v1索引库 #索引库默认的别名与索引库同名,无法删除 # 给student_index_v1起个别名 student_index_v11 POST student_index_v2/_alias/student_index_v11 #测试删除命令 POST /_aliases { "actions": [ {"remove": {"index": "student_index_v1", "alias": "student_index_v11"}} ] } # 给student_index_v2起个别名 student_index_v1 POST student_index_v2/_alias/student_index_v1 #查询别名 GET goods/_alias/ GET student_index_v1/_search GET student_index_v2/_search
1. 提升字段查询得分:
将name字段查询比重提升10倍:
GET hotel/_search
{
"explain": true,
"query":{
"multi_match":{
"query": "北京市酒店",
"fields": ["name^10", "address"]
}
}
}
得分系数提升了10倍:
2. 综合提升字段查询得分:
使用tie_breaker将其他query的分数也考虑进去
GET hotel/_search
{
"explain": true,
"query":{
"multi_match":{
"query": "北京市酒店",
"fields": ["name", "address"],
"tie_breaker": 0.3
}
}
}
使用 tie_breaker 和不使用tie_breaker ,查询出来的某一条数据的 _score 分数,会有相应的提高,例
如:
name中包含关键词matched query 的得分,假设是 0.1984226
address中包含关键词matched query的得分,假设是 12.07466
添加了 tie_breaker = 0.3,那么就是这样的了, 0.1984226 * 0.3 + 12.07466 = 12.13418678;
大于最高一条的得分12.07466,这样搜索的关联性就提升上去了, 更为合理。
3. 自定义评分:
通过function_score实现自定义评分:
query中的内容为主查询条件,functions中为判断要为哪些数据加权。weight为加权值。
#为品牌为万豪的酒店,权重值增加50倍 GET hotel/_search { "query": { "function_score": { "query": { "query_string": { "fields": ["name","area","address"], "query": "北京市spa三星" } }, "functions": [ { "filter": { "term": { "brand": "万豪" } }, "weight": 50 } ] } } }
POST hotel/_search { "query": { "range": { "createTime": { "gte": "2015-01-01", "lte": "2021-01-01", "format": "yyyy-MM-dd" } } }, "aggs": { "hotel-brand": { "terms": { "field": "brand", "size": 100 }, "aggs": { "countSale": { "sum": { "field": "salesVolume" } } } } } }
2. 增加排序处理:
GET hotel/_search { "query": { "range": { "createTime": { "gte": "2015-01-01", "lte": "2021-01-01", "format": "yyyy-MM-dd" } } }, "aggs": { "hotel-brand": { "terms": { "field": "brand", "size": 100 }, "aggs": { "countSale": { "sum": { "field": "salesVolume" } } } } }, "sort": [ { "price": { "order": "desc" } } ] }
根据price进行倒序排列。
1. 查全率:
索引内符合条件的结果有N个,查询出来的符合条件的结果有X个, 则查全率为: X/N
比如: 用户的关键词为笔记本(笔记本包含写字的笔记本以及电脑笔记本, 在索引中, 这些记录为
1000条,即N),查询出来的结果如果是100条,即X(包含写字的笔记本以及电脑笔记本), 则查全率为10%。
2. 查准率:
查询出来的X个文档中, 有M个是正确的, 则查准率为:M/X
比如: 用户的关键词为笔记本, 这些记录为1000条,查询出来的结果如果是100条, 而在这100条 (X)当中只有20条(M)为用户期望的电脑笔记本, 则查准率为20%。
bin/logstash-plugin install logstash-input-jdbc
[root@localhost bin]# mkdir mysql
[root@localhost bin]# cp mysql-connector-java-5.1.34.jar
/usr/local/logstash-7.10.2/bin/mysql/
input { stdin { } jdbc { # mysql 数据库链接,users为数据库名 jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/esdb" # 用户名和密码 jdbc_user => "root" jdbc_password => "123456" # 驱动 jdbc_driver_library => "/usr/local/logstash-7.10.2/bin/mysql/mysql- connector-java-5.1.34.jar # 驱动类名 jdbc_driver_class => "com.mysql.jdbc.Driver" jdbc_paging_enabled => "true" jdbc_page_size => "50000" # 执行的sql 文件路径+名称 statement_filepath => "/usr/local/logstash- 7.10.2/bin/mysql/jdbc.sql" # 设置监听间隔 各字段含义(由左至右)分、时、天、月、年,全部为*默认含义为每分钟 都更新 schedule => "* * * * *" } } output { elasticsearch { #ES的连接信息 hosts => ["192.168.116.140:9200"] #索引名称 index => "hotel" document_type => "_doc" #自增ID, 需要关联的数据库的ID字段, 对应索引的ID标识 document_id => "%{id}" } stdout { #JSON格式输出 codec => json_lines } }
jdbc.sql文件:
SELECT id, NAME, address, brand, type, price, specs, salesVolume, synopsis, area, imageUrl, createTime, isAd FROM t_hotel
./logstash -f mysql/jdbc.conf
检查结果:
GET hotel/_search
input {
jdbc{
#设置timezone
jdbc_default_timezone => "Asia/Shanghai"
...
# 增量同步属性标识
last_run_metadata_path => "/usr/local/logstash-
7.10.2/bin/mysql/last_value"
}
}
SELECT id, NAME, address, brand, type, price, specs, salesVolume, synopsis, area, imageUrl, createTime, isAd FROM t_hotel WHERE createTime >= :sql_last_value
vi /usr/local/logstash-7.10.2/bin/mysql/last_value
给定一个初始的时间:
2021-12-30 00:00:00
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。