当前位置:   article > 正文

springboot整合elasticsearch及热更新字典及同义词_springbbot快速更新文章热词

springbbot快速更新文章热词

整合

dao层ItemRepository

  1. package com.futhead.es.dao;
  2. import com.futhead.es.model.Item;
  3. import org.springframework.data.elasticsearch.repository.ElasticsearchRepository;
  4. import java.util.List;
  5. public interface ItemRepository extends ElasticsearchRepository<Item, Long> {
  6. List<Item> findByPriceBetween(double price1, double price2);
  7. }

model Item

  1. package com.futhead.es.model;
  2. import lombok.AllArgsConstructor;
  3. import lombok.Data;
  4. import lombok.NoArgsConstructor;
  5. import org.springframework.data.annotation.Id;
  6. import org.springframework.data.elasticsearch.annotations.Document;
  7. import org.springframework.data.elasticsearch.annotations.Field;
  8. import org.springframework.data.elasticsearch.annotations.FieldType;
  9. @Data
  10. @AllArgsConstructor
  11. @NoArgsConstructor
  12. @Document(indexName = "item",type = "docs", shards = 1, replicas = 0)
  13. public class Item {
  14. @Id
  15. private Long id;
  16. @Field(type = FieldType.Text, analyzer = "ik_max_word")
  17. private String title; //标题
  18. @Field(type = FieldType.Keyword)
  19. private String category;// 分类
  20. @Field(type = FieldType.Keyword)
  21. private String brand; // 品牌
  22. @Field(type = FieldType.Double)
  23. private Double price; // 价格
  24. @Field(index = false, type = FieldType.Keyword)
  25. private String images; // 图片地址
  26. }

pom.xml

  1. <dependencies>
  2. <dependency>
  3. <groupId>org.springframework.boot</groupId>
  4. <artifactId>spring-boot-starter</artifactId>
  5. </dependency>
  6. <dependency>
  7. <groupId>org.springframework.boot</groupId>
  8. <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
  9. </dependency>
  10. <dependency>
  11. <groupId>org.springframework.boot</groupId>
  12. <artifactId>spring-boot-starter-test</artifactId>
  13. <scope>test</scope>
  14. </dependency>
  15. <dependency>
  16. <groupId>org.projectlombok</groupId>
  17. <artifactId>lombok</artifactId>
  18. <version>1.18.8</version>
  19. <scope>provided</scope>
  20. </dependency>
  21. </dependencies>

es配置

  1. spring.data.elasticsearch.cluster-name=my-application
  2. spring.data.elasticsearch.cluster-nodes=localhost:9300

测试用例

  1. import com.futhead.es.Application;
  2. import com.futhead.es.dao.ItemRepository;
  3. import com.futhead.es.model.Item;
  4. import org.elasticsearch.index.query.QueryBuilders;
  5. import org.elasticsearch.search.aggregations.AggregationBuilders;
  6. import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
  7. import org.elasticsearch.search.aggregations.metrics.avg.InternalAvg;
  8. import org.elasticsearch.search.sort.SortBuilders;
  9. import org.elasticsearch.search.sort.SortOrder;
  10. import org.junit.Test;
  11. import org.junit.runner.RunWith;
  12. import org.springframework.beans.factory.annotation.Autowired;
  13. import org.springframework.boot.test.context.SpringBootTest;
  14. import org.springframework.data.domain.Page;
  15. import org.springframework.data.domain.PageRequest;
  16. import org.springframework.data.domain.Sort;
  17. import org.springframework.data.elasticsearch.core.ElasticsearchTemplate;
  18. import org.springframework.data.elasticsearch.core.aggregation.AggregatedPage;
  19. import org.springframework.data.elasticsearch.core.query.FetchSourceFilter;
  20. import org.springframework.data.elasticsearch.core.query.NativeSearchQueryBuilder;
  21. import org.springframework.data.elasticsearch.core.query.SearchQuery;
  22. import org.springframework.test.context.junit4.SpringRunner;
  23. import java.util.ArrayList;
  24. import java.util.List;
  25. @RunWith(SpringRunner.class)
  26. @SpringBootTest(classes = Application.class)
  27. public class EsDemoApplicationTests {
  28. @Autowired
  29. private ElasticsearchTemplate elasticsearchTemplate;
  30. @Autowired
  31. private ItemRepository itemRepository;
  32. @Test
  33. public void testCreateIndex() {
  34. elasticsearchTemplate.createIndex(Item.class);
  35. }
  36. /**
  37. * @Description:定义批量新增方法
  38. * @Author: https://blog.csdn.net/chen_2890
  39. */
  40. @Test
  41. public void insertList() {
  42. List<Item> list = new ArrayList<>();
  43. list.add(new Item(1L, "坚果手机R1", " 手机", "锤子", 3699.00, "http://image.baidu.com/13123.jpg"));
  44. list.add(new Item(2L, "华为META10", " 手机", "华为", 4499.00, "http://image.baidu.com/13123.jpg"));
  45. list.add(new Item(3L, "小米手机7", "手机", "小米", 3299.00, "http://image.baidu.com/13123.jpg"));
  46. list.add(new Item(4L, "坚果手机R1", "手机", "锤子", 3699.00, "http://image.baidu.com/13123.jpg"));
  47. list.add(new Item(5L, "华为META10", "手机", "华为", 4499.00, "http://image.baidu.com/13123.jpg"));
  48. list.add(new Item(6L, "小米Mix2S", "手机", "小米", 4299.00, "http://image.baidu.com/13123.jpg"));
  49. list.add(new Item(7L, "荣耀V10", "手机", "华为", 2799.00, "http://image.baidu.com/13123.jpg"));
  50. // 接收对象集合,实现批量新增
  51. itemRepository.saveAll(list);
  52. }
  53. /**
  54. * @Description:按照价格区间查询
  55. * @Author: https://blog.csdn.net/chen_2890
  56. */
  57. @Test
  58. public void queryByPriceBetween(){
  59. List<Item> list = this.itemRepository.findByPriceBetween(2000.00, 3500.00);
  60. for (Item item : list) {
  61. System.out.println("item = " + item);
  62. }
  63. }
  64. @Test
  65. public void testTermQuery(){
  66. NativeSearchQueryBuilder builder = new NativeSearchQueryBuilder();
  67. builder.withQuery(QueryBuilders.termQuery("price",998.0));
  68. // 查找
  69. Page<Item> page = this.itemRepository.search(builder.build());
  70. for(Item item:page){
  71. System.out.println(item);
  72. }
  73. }
  74. /**
  75. * @Description:布尔查询
  76. * @Author: https://blog.csdn.net/chen_2890
  77. */
  78. @Test
  79. public void testBooleanQuery(){
  80. NativeSearchQueryBuilder builder = new NativeSearchQueryBuilder();
  81. // builder.withQuery(
  82. // QueryBuilders.boolQuery().must(QueryBuilders.matchQuery("title","华为"))
  83. // .must(QueryBuilders.matchQuery("brand","华为"))
  84. // );
  85. builder.withQuery(
  86. QueryBuilders.boolQuery().should(QueryBuilders.matchQuery("title","荣耀")).boost(2.0f)
  87. .should(QueryBuilders.matchQuery("brand","锤子"))
  88. );
  89. // 查找
  90. Page<Item> page = this.itemRepository.search(builder.build());
  91. for(Item item:page){
  92. System.out.println(item);
  93. }
  94. }
  95. /**
  96. * @Description:嵌套聚合,求平均值
  97. * @Author: https://blog.csdn.net/chen_2890
  98. */
  99. @Test
  100. public void testSubAgg(){
  101. NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
  102. // 不查询任何结果
  103. queryBuilder.withSourceFilter(new FetchSourceFilter(new String[]{""}, null));
  104. // 1、添加一个新的聚合,聚合类型为terms,聚合名称为brands,聚合字段为brand
  105. queryBuilder.addAggregation(
  106. AggregationBuilders.terms("brands").field("brand")
  107. .subAggregation(AggregationBuilders.avg("priceAvg").field("price")) // 在品牌聚合桶内进行嵌套聚合,求平均值
  108. );
  109. // 2、查询,需要把结果强转为AggregatedPage类型
  110. AggregatedPage<Item> aggPage = (AggregatedPage<Item>) this.itemRepository.search(queryBuilder.build());
  111. // 3、解析
  112. // 3.1、从结果中取出名为brands的那个聚合,
  113. // 因为是利用String类型字段来进行的term聚合,所以结果要强转为StringTerm类型
  114. StringTerms agg = (StringTerms) aggPage.getAggregation("brands");
  115. // 3.2、获取桶
  116. List<StringTerms.Bucket> buckets = agg.getBuckets();
  117. // 3.3、遍历
  118. for (StringTerms.Bucket bucket : buckets) {
  119. // 3.4、获取桶中的key,即品牌名称 3.5、获取桶中的文档数量
  120. System.out.println(bucket.getKeyAsString() + ",共" + bucket.getDocCount() + "台");
  121. // 3.6.获取子聚合结果:
  122. InternalAvg avg = (InternalAvg) bucket.getAggregations().asMap().get("priceAvg");
  123. System.out.println("平均售价:" + avg.getValue());
  124. }
  125. }
  126. }

打印es搜索语句(DSL)

  1. @Test
  2. public void testMathQuery(){
  3. // 创建对象
  4. NativeSearchQueryBuilder queryBuilder = new NativeSearchQueryBuilder();
  5. // 在queryBuilder对象中自定义查询
  6. //matchQuery:底层就是使用的termQuery
  7. queryBuilder.withQuery(QueryBuilders.matchQuery("title","坚果"));
  8. //查询,search 默认就是分页查找
  9. SearchQuery searchQuery = queryBuilder.build();
  10. //打印查询语句;
  11. System.out.println("拼接的查询请求======");
  12. System.out.println(searchQuery.getQuery().toString());
  13. Page<Item> page = this.itemRepository.search(searchQuery);
  14. //获取数据
  15. long totalElements = page.getTotalElements();
  16. System.out.println("获取的总条数:"+totalElements);
  17. }

简单的增加权重(boost)

  1. @Test
  2. public void testBooleanQuery(){
  3.     NativeSearchQueryBuilder builder = new NativeSearchQueryBuilder();
  4.     builder.withQuery(
  5.             QueryBuilders.boolQuery().should(QueryBuilders.matchQuery("title","荣耀")).boost(2.0f)
  6.                     .should(QueryBuilders.matchQuery("brand","锤子"))
  7.     );
  8.     Page<Item> page = this.itemRepository.search(builder.build());
  9.     for(Item item:page){
  10.         System.out.println(item);
  11.     }
  12. }

ik分词器,支持热更新

github地址:https://github.com/medcl/elasticsearch-analysis-ik

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
  3. <properties>
  4. <comment>IK Analyzer 扩展配置</comment>
  5. <!--用户可以在这里配置自己的扩展字典 -->
  6. <entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
  7. <!--用户可以在这里配置自己的扩展停止词字典-->
  8. <entry key="ext_stopwords">custom/ext_stopword.dic</entry>
  9. <!--用户可以在这里配置远程扩展字典 -->
  10. <entry key="remote_ext_dict">location</entry>
  11. <!--用户可以在这里配置远程扩展停止词字典-->
  12. <entry key="remote_ext_stopwords">http://xxx.com/xxx.dic</entry>
  13. </properties>

Ps. 该 http 请求需要返回两个头部(header),一个是 Last-Modified,一个是 ETag,这两者都是字符串类型,只要有一个发生变化,该插件就会去抓取新的分词进而更新词库。

同义词、近义词热更新

获取插件

github地址:https://github.com/bells/elasticsearch-analysis-dynamic-synonym

使用方法同ik分词器

Ps. release版本比较少,一般需要根据自己的elasticsearch版本编译

eg. 我们使用的事es6.5.0

git clone https://github.com/bells/elasticsearch-analysis-dynamic-synonym.git

在提交分支记录中找到和自己使用的elasticsearch版本最近的分支,比如,我们用的6.5.0,提交记录支持到6.3.1

git checkout dbe8ebeb6b92d9b403acb76020ea7d64b39abcc8

修改pom.xml文件中的version为6.5.0

编译:mvn clean package

在target/release目录下就能看到最终输出的插件压缩包。

ps. 6.5.0中日志升级了,直接编译不能通过,需要修改

  1. //import org.elasticsearch.common.logging.ESLoggerFactory;
  2. import org.elasticsearch.common.logging.Loggers;
  3. //private static Logger logger = ESLoggerFactory.getLogger("dynamic-synonym");
  4. private static Logger logger = Loggers.getLogger(String.class,"dynamic-synonym");

使用

近义词的两种形式:

  1. # synonyms.txt
  2. 西红柿 圣女果 番茄 => 洋柿子 # 将 => 左边的词在分词的时候划归为右边的词,检索时只能检索右边的词
  3. 西红柿, 圣女果, 番茄 # 三个词同意,检索任何一个也能搜索到另外两个

参考资料

https://blog.csdn.net/chen_2890/article/details/83895646

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/盐析白兔/article/detail/68772
推荐阅读
相关标签
  

闽ICP备14008679号