赞
踩
简单理解standard会把一句话的每个字都拆开,ik_smart会把一句话按中文词语拆开,ik_max_word则比ik_smart拆解的粒度更细,可以用以下方式感受以下三种模式的拆解粒度
GET /books/_analyze
{
"analyzer": "ik_smart",
"text": "深入java虚拟机之一学就废"
}
PUT /books { "settings": { "number_of_shards": 3, "index":{ "analysis.analyzer.default.type": "ik_max_word" } }, "mappings": { "doc":{ "properties": { "title":{ "type": "text", "analyzer":"standard", "search_analyzer":"standard" }, "content":{ "type": "text", "analyzer":"ik_max_word" } } } } }
可以通过 GET /books/_mapping 查看映射
2. 批量创建文档
PUT _bulk
{"index":{"_index":"books","_type":"doc","_id":1}}
{"id":1,"title":"深入java虚拟机","content":"深入java虚拟机之一学就废"}
{"index":{"_index":"books","_type":"doc","_id":2}}
{"id":2,"title":"深入SpringBoot虚拟","content":"深入SpringBoot之虚拟世界一学就废"}
{"index":{"_index":"books","_type":"doc","_id":3}}
{"id":3,"title":"疯狂java讲义","content":"疯狂java讲义之一学就废"}
{"index":{"_index":"books","_type":"doc","_id":4}}
{"id":4,"title":"项目管理知识体系指南","content":"项目管理知识体系指南之一学就废"}
{"index":{"_index":"books","_type":"doc","_id":5}}
{"id":5,"title":"短线炒股实战","content":"短线炒股实战之一学就废"}
GET /books/doc/_search { "query":{ "match": { "title": { "query": "深虚机", "operator": "and" } } }, "highlight": { "fields": { "title": {}, "content": {} } } }
“operator”: "and"表示输入条件分词后,每个条件都需要满足的结果会返回;"or"表示输入条件分词后,仅需满足其一的结果就能返回
GET /books/doc/_search { "query":{ "match": { "content": { "query": "深虚机", "operator": "and" } } }, "highlight": { "fields": { "title": {}, "content": {} } } }
可以看到,第一种搜不到结果,第二种可以搜到结果,同时第一种搜title的时候是可以搜到结果的
这个就是因为title是用standard分词的,“深虚机”分词后是“深”,“虚”,“机”,能跟“深入java虚拟机”分词后的结果匹配上;而content是用ik_max_word,“深入java虚拟机之一学就废”分词之后如下所示,以至于“深”,“虚”,“机”匹配不上
{ "tokens": [ { "token": "深入", "start_offset": 0, "end_offset": 2, "type": "CN_WORD", "position": 0 }, { "token": "java", "start_offset": 2, "end_offset": 6, "type": "ENGLISH", "position": 1 }, { "token": "虚拟机", "start_offset": 6, "end_offset": 9, "type": "CN_WORD", "position": 2 }, { "token": "虚拟", "start_offset": 6, "end_offset": 8, "type": "CN_WORD", "position": 3 }, { "token": "机", "start_offset": 8, "end_offset": 9, "type": "CN_CHAR", "position": 4 }, { "token": "之一", "start_offset": 9, "end_offset": 11, "type": "CN_WORD", "position": 5 }, { "token": "一", "start_offset": 10, "end_offset": 11, "type": "TYPE_CNUM", "position": 6 }, { "token": "学", "start_offset": 11, "end_offset": 12, "type": "CN_CHAR", "position": 7 }, { "token": "就", "start_offset": 12, "end_offset": 13, "type": "CN_CHAR", "position": 8 }, { "token": "废", "start_offset": 13, "end_offset": 14, "type": "CN_CHAR", "position": 9 } ] }
GET /books/doc/_search { "query": { "multi_match": { "query": "深虚拟", "operator": "and", "fields": ["title", "content"] } }, "highlight": { "fields": { "title": {}, "content": {} } } }
<em>
标签,这是默认的,可以自定义"highlight": {
"pre_tags": ["<span color='red'>"],
"post_tags": ["</span>"],
"fields": {
"title": {},
"content" : {"fragment_size" : 150, "number_of_fragments" : 3 }
}
}
fragment_size: 设置要显示出来的fragment文本判断的长度,默认是100
number_of_fragments:指定显示高亮的fragment文本片段数量
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>${es.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.78</version>
</dependency>
@RestController
@RequestMapping("/test")
public class TestController {
@GetMapping("/hello.do")
public String queryFromEs() throws UnsupportedEncodingException {
QueryBuilder queryBuilder = QueryBuilders.matchQuery("title","深虚机").operator(Operator.AND);
EsTools.getInstance().searchQuery("books",queryBuilder);
return "success";
}
}
package com.tyd.utils; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.delete.DeleteResponse; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.update.UpdateRequest; import org.elasticsearch.action.update.UpdateResponse; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.transport.client.PreBuiltTransportClient; import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.concurrent.ExecutionException; /** * 访问ES的工具类 * * @see 命令行参考https://github.com/elastic/elasticsearch */ public class EsTools { /** * 查看ES路径:elasticsearch-7.5.1\config\elasticsearch.yml cluster.name */ private static final String CLUSTER_NAME = "my-application"; private static EsTools esTool; private static TransportClient transportClient; public static EsTools getInstance() { if (null == esTool) { esTool = new EsTools(); } return esTool; } /** * 减少频繁获取连接,定义一个变量存放连接 */ private TransportClient getClient() { if (null == transportClient) { transportClient = getNewClient(); } return transportClient; } public static void main(String[] args) throws IOException { String operate = "search"; if ("addIndicesMapping".equals(operate)) { // 添加索引 String jsonString = "{\"properties\":{\"author\":{\"type\":\"keyword\"},\"title\":{\"type\":\"text\"},\"content\":{\"type\":\"text\"},\"price\":{\"type\":\"integer\"},\"date\":{\"type\":\"date\",\"format\":\"yyyy-MM-dd HH:mm:ss\"}}}"; // 执行成功后显示:----------Add mapping success----------true getInstance().createIndexAndMapping("indices_test", jsonString); } else if ("addDocument".equals(operate)) { // Add id success,version is :1 getInstance().addIndexDocument("indices_test", "_doc"); } else if ("addOrUpdateDocument".equals(operate)) { // bulk success getInstance().bulkIndexDocument("indices_test", "_doc"); } else if ("deleteById".equals(operate)) { getInstance().deleteById("indices_test", "_doc", "id_003"); } else if ("batchDeleteByIds".equals(operate)) { List<String> ids = new ArrayList<String>(); ids.add("id_001"); ids.add("id_002"); getInstance().batchDeleteByIds("indices_test", "_doc", ids); } else if ("updateDocument".equals(operate)) { // result is OK == id指_id getInstance().updateDocument("indices_test", "_doc", "TNZDUHEB_rQdj7R3LnO4", null); } else if ("updateDocumentPrepare".equals(operate)) { // result is UPDATED == id指_id getInstance().updateDocumentPrepare("indices_test", "_doc", "TNZDUHEB_rQdj7R3LnO4", null); } else if ("searchByIndex".equals(operate)) { // id指_id getInstance().searchByIndex("indices_test", "_doc", "TNZDUHEB_rQdj7R3LnO4"); } else if ("queryAll".equals(operate)) { // ..."totalHits":{"value":3,"relation":"EQUAL_TO"},"maxScore":1.0} getInstance().queryAll("indices_test"); } else if ("search".equals(operate)) { // 查询全部 // QueryBuilder queryBuilder = QueryBuilders.matchAllQuery(); // getInstance().searchQuery("indices_test", queryBuilder); //以下内容仅仅为查询条件格式 // Span First /*QueryBuilder queryBuilder = QueryBuilders.spanFirstQuery( QueryBuilders.spanTermQuery("title", "title"), 1);*/ /*QueryBuilder queryBuilder =QueryBuilders.spanNearQuery(QueryBuilders.spanTermQuery("title", "title"),1000) .addClause(QueryBuilders.spanTermQuery("title", "title_001")) .addClause(QueryBuilders.spanTermQuery("title", "title_002")) .addClause(QueryBuilders.spanTermQuery("title", "title_003"));*/ // ... } } /** * 根据不同的条件查询 * * @throws Exception */ public void searchQuery(String index, QueryBuilder queryBuilder) { SearchResponse response = getClient().prepareSearch(index).setQuery(queryBuilder).get(); for (SearchHit searchHit : response.getHits()) { System.out.println(JSON.toJSONString(searchHit)); } } /** * 根据索引、类型、id获取记录 * * @param index * @param type * @param id */ public void searchByIndex(String index, String type, String id) { GetResponse response = getClient().prepareGet(index, type, id).execute().actionGet(); String json = response.getSourceAsString(); if (null != json) { System.out.println(json); } else { System.out.println("no result"); } } /** * 修改内容 * * @throws Exception */ public boolean updateDocumentPrepare(String index, String type, String id, XContentBuilder source) { XContentBuilder endObject; try { // 修改后的内容 endObject = XContentFactory.jsonBuilder().startObject().field("author", "test_prepare_001").endObject(); UpdateResponse response = getClient().prepareUpdate(index, type, id).setDoc(endObject).get(); System.out.println("result is " + response.getResult().name()); return "UPDATED".equals(response.getResult().name()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return false; } /** * 修改内容 * * @return */ @SuppressWarnings("deprecation") public boolean updateDocument(String index, String type, String id, XContentBuilder source) { Date time = new Date(); // 创建修改请求 UpdateRequest updateRequest = new UpdateRequest(); updateRequest.index(index); updateRequest.type(type); updateRequest.id(id); try { // 根据实际需要调整方法参数source里的值 updateRequest.doc(XContentFactory.jsonBuilder().startObject().field("author", "author001").field("title", "title001") .field("content", "content001") .field("date", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(time)).endObject()); UpdateResponse response = getClient().update(updateRequest).get(); System.out.println("result is " + response.status().name()); return "OK".equals(response.status().name()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ExecutionException e) { // TODO Auto-generated catch block e.printStackTrace(); } return false; } /** * 根据id批量删除 * * @param index * @param type * @param ids * @return */ public boolean batchDeleteByIds(String index, String type, List<String> ids) { if (null == ids || ids.isEmpty()) { System.out.println("ids is require"); return true; } BulkRequestBuilder builder = getClient().prepareBulk(); for (String id : ids) { builder.add(getClient().prepareDelete(index, type, id).request()); } BulkResponse bulkResponse = builder.get(); System.out.println(bulkResponse.status()); if (bulkResponse.hasFailures()) { System.out.println("has failed, " + bulkResponse.status().name()); return false; } return true; } /** * 根据索引名称、类型和id删除记录 * * @param indexName * @param type * @param id */ public void deleteById(String indexName, String type, String id) { DeleteResponse dResponse = getClient().prepareDelete(indexName, type, id).execute().actionGet(); if ("OK".equals(dResponse.status().name())) { System.out.println("delete id success"); } else { System.out.println("delete id failed : " + dResponse.getResult().toString()); } } /** * 删除某个索引下所有数据 * * @param indexName * @return * @see 删除不存在的索引时,记录实际情况,默认返回成功 */ public boolean deleteAllIndex(String indexName) { if (null == indexName || "".equals(indexName.trim())) { System.out.println("Error: index name is require."); return false; } //如果传人的indexName不存在会出现异常.可以先判断索引是否存在: IndicesExistsRequest inExistsRequest = new IndicesExistsRequest(indexName); IndicesExistsResponse inExistsResponse = getClient().admin().indices() .exists(inExistsRequest).actionGet(); if (inExistsResponse.isExists()) { AcknowledgedResponse response = getClient().admin().indices().prepareDelete(indexName) .execute().actionGet(); System.out.println("delete index date, result is " + response.isAcknowledged()); return response.isAcknowledged(); } else { System.out.println("index is not existed"); } return true; } /** * 查询索引下的全部数据 * * @param index * @param type */ public String queryAll(String index) { QueryBuilder queryBuilder = QueryBuilders.matchAllQuery(); SearchResponse response = getClient().prepareSearch(index).setQuery(queryBuilder).get(); SearchHits resultHits = response.getHits(); return JSON.toJSONString(resultHits); } /** * 添加或者修改ES里的数据 * * @param index * @param type */ public void bulkIndexDocument(String index, String type) { BulkRequestBuilder bulkRequest = getClient().prepareBulk(); Date time = new Date(); try { bulkRequest.add(getClient().prepareIndex(index, type, "id_002") .setSource(XContentFactory.jsonBuilder() .startObject() .field("id", "id_002") .field("author", "author_002") .field("title", "titile_002") .field("content", "content_002") .field("price", "20") .field("date", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(time)) .endObject() ) ); bulkRequest.add(getClient().prepareIndex(index, type, "id_003") .setSource(XContentFactory.jsonBuilder() .startObject() .field("id", "id_003") .field("author", "author_003") .field("title", "title_003") .field("content", "content_003") .field("price", "30") .field("date", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(time)) .endObject() ) ); BulkResponse bulkResponse = bulkRequest.get(); if (bulkResponse.hasFailures()) { // process failures by iterating through each bulk response item System.out.println("bulk has failed and token " + bulkResponse.getTook()); } else { System.out.println("bulk success"); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 根据索引添加数据 * * @param index * @param type */ public void addIndexDocument(String index, String type) { Date time = new Date(); IndexResponse response = null; try { response = getInstance().getClient().prepareIndex(index, type) .setSource(XContentFactory.jsonBuilder() // 以下内容可以封装成一个对象,然后重新解析成如下格式(方法多加一个参数,建议使用反射方式改成通用方法) .startObject() .field("id", "id_001") .field("author", "author_001") .field("title", "title_001") .field("content", "content_001") .field("price", "10") .field("date", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(time)) .endObject()) .get(); System.out.println("Add id success,version is :" + response.getVersion()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 创建索引和mapping * * @param indiceName * @throws Exception */ public boolean createIndexAndMapping(String indiceName, String json) { if (null == indiceName || "".equals(indiceName.trim())) { System.out.println("indice is required"); return false; } String content = "content"; CreateIndexRequestBuilder cib = getClient().admin().indices().prepareCreate(indiceName); XContentBuilder builderMapping = generateMappingBuilder(json); cib.addMapping(content, builderMapping); CreateIndexResponse res = cib.execute().actionGet(); if (res.isAcknowledged()) { System.out.println("----------Add mapping success----------" + res.isAcknowledged()); } else { System.out.println("----------Add mapping failed-----------" + res.isAcknowledged()); } return res.isAcknowledged(); } /** * 根据json动态构造mapping索引对应的XContentBuilder * * @param objJson * @param builder * @param isBegin 是否是开始位置 */ private XContentBuilder generateMappingBuilder(Object object) { XContentBuilder builder = null; try { builder = XContentFactory.jsonBuilder(); JSONObject jsonObj = null; if (object instanceof String) { jsonObj = JSON.parseObject((String) object); } // json对象 generateMappingBuilder(jsonObj, builder, true); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println("get json builder error"); } return builder; } /** * 根据json对象动态构造mapping索引对应的XContentBuilder * * @param objJson * @param builder * @param isBegin 是否是开始位置 */ private void generateMappingBuilder(Object objJson, XContentBuilder builder, boolean isBegin) { try { // #builder构造,需要添加一个开始"{" if (isBegin) { builder.startObject(); } // json数组 if (objJson instanceof JSONArray) { JSONArray objArray = (JSONArray) objJson; for (int i = 0; i < objArray.size(); i++) { generateMappingBuilder(objArray.get(i), builder, false); } } // json对象 else if (objJson instanceof JSONObject) { JSONObject jsonObject = (JSONObject) objJson; Iterator<String> it = jsonObject.keySet().iterator(); while (it.hasNext()) { String key = it.next().toString(); Object object = jsonObject.get(key); // builder:key;这里区分object和普通的属性(冒号前认为为对象,冒号后为属性) if (!key.equals("type") && !key.equals("format")) { builder.startObject(key); // System.out.println("==" + key); } // json数组 if (object instanceof JSONArray) { JSONArray objArray = (JSONArray) object; generateMappingBuilder(objArray, builder, false); } // json对象 else if (object instanceof JSONObject) { generateMappingBuilder((JSONObject) object, builder, false); } // 其他 else { builder.field(key, object.toString()); // System.out.println("====" + key + "," + object.toString()); } } // #builder构造,需要添加一个结束"}" builder.endObject(); // System.out.println("=="); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println("generate mapping builder failed"); } } /** * @return * @throws IOException */ private XContentBuilder generateMapping() throws IOException { XContentBuilder builder = XContentFactory.jsonBuilder(); builder = builder // #builer开始"{" .startObject() .startObject("properties") //设置之定义字段 .startObject("author") .field("type", "keyword") //设置数据类型 .endObject() .startObject("title") .field("type", "text") .endObject() .startObject("content") .field("type", "text") .endObject() .startObject("price") .field("type", "integer") .endObject() .startObject("date") .field("type", "date") //设置Date类型 .field("format", "yyyy-MM-dd HH:mm:ss") //设置Date的格式 .endObject() .endObject() // #builer结束"}" .endObject(); return builder; } /** * 获取访问ES的连接 */ private TransportClient getNewClient() { TransportClient client = null; try { Settings settings = Settings.builder().put("cluster.name", CLUSTER_NAME) // 开启嗅探功能(即自动检测集群内其他的节点和新加入的节点);或者全部用addTransportAddress添加,如下: .put("client.transport.sniff", true).build(); client = new PreBuiltTransportClient(settings) .addTransportAddress(new TransportAddress(InetAddress.getByName("127.0.0.1"), 9300)); } catch (UnknownHostException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println("get host error"); } return client; } }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。