赞
踩
应用背景:
对存储在MinIO服务器的文件实现全文检索。也可以是其他服务器或本地文件,本文仅详细介绍MinIO文件的读取及转换。通过Elasticsearch的Ingest-Attachment插件抽取文件内容,支持Word、Excel、PDF、TXT等格式文件,无需手动解析文件内容。
上代码,详细解释可以阅读注释、
1.引入依赖
springboot已经管理好了依赖,只需引入spring-boot-starter-data-elasticsearch
- <dependency>
- <groupId>org.springframework.boot</groupId>
- <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
- </dependency>
2.配置文件
- elasticsearch:
- host: 192.168.2.154
- port: 9200
3.配置类
- @Setter
- @ConfigurationProperties(prefix = "elasticsearch")
- @Configuration
- public class ElasticSearchConfig {
-
- private String host;
-
- private Integer port;
-
- @Bean
- public RestHighLevelClient restHighLevelClient(){
- RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(new HttpHost(this.host, this.port)));
- return client;
- }
- }
4.实现类
- package com.dmp.document.service.impl;
-
- import com.alibaba.fastjson2.JSONObject;
- import com.dmp.common.constant.HttpStatus;
- import com.dmp.common.core.page.PageDomain;
- import com.dmp.common.core.page.TableDataInfo;
- import com.dmp.common.core.page.TableSupport;
- import com.dmp.document.domain.dto.DocElasticsearchDto;
- import com.dmp.document.domain.entity.DocDocument;
- import com.dmp.document.service.DocDocumentService;
- import com.dmp.document.service.ElasticsearchService;
- import com.dmp.document.service.MinioClientService;
- import com.dmp.system.service.ISysConfigService;
- import lombok.extern.slf4j.Slf4j;
- import org.apache.commons.compress.utils.IOUtils;
- import org.elasticsearch.action.delete.DeleteRequest;
- import org.elasticsearch.action.index.IndexRequest;
- import org.elasticsearch.action.index.IndexResponse;
- import org.elasticsearch.action.search.SearchRequest;
- import org.elasticsearch.action.search.SearchResponse;
- import org.elasticsearch.client.RequestOptions;
- import org.elasticsearch.client.RestHighLevelClient;
- import org.elasticsearch.common.xcontent.XContentType;
- import org.elasticsearch.index.query.BoolQueryBuilder;
- import org.elasticsearch.index.query.MultiMatchQueryBuilder;
- import org.elasticsearch.index.query.QueryBuilders;
- import org.elasticsearch.search.SearchHit;
- import org.elasticsearch.search.builder.SearchSourceBuilder;
- import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
- import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
- import org.elasticsearch.common.text.Text;
- import org.springframework.beans.factory.annotation.Autowired;
- import org.springframework.stereotype.Service;
-
- import java.io.*;
- import java.net.HttpURLConnection;
- import java.net.URL;
- import java.util.*;
-
- import static com.sun.webkit.network.URLs.newURL;
- import static org.elasticsearch.client.RequestOptions.DEFAULT;
-
- /**
- * @author daixin
- * @version 1.0
- * @description: TODO
- * @date 2022/11/23 17:38
- */
- @Slf4j
- @Service
- public class ElasticsearchServiceImpl implements ElasticsearchService {
- @Autowired
- private RestHighLevelClient esClient;
-
- @Autowired
- private MinioClientService minioClientService;
-
- @Autowired
- private DocDocumentService docDocumentService;
-
- @Autowired
- private ISysConfigService sysConfigService;
-
-
- public String createFileIndex(String id, String projectId) throws Exception {
- String result = null;
- InputStream is = null;
- try{
- Date date1 = new Date();
- //查询系统内存储的文件key
- DocDocument docDocument = docDocumentService.getById(id);
- String path = docDocument.getPath();
- //获取minio下载签名
- String url = minioClientService.getDownloadLink("file-bucket",path);
- //请求minio获取文件流
- URL url2= newURL(url);
- HttpURLConnection conn=(HttpURLConnection) url2.openConnection();
- conn.setDoInput(true);
- conn.connect();
- is = conn.getInputStream();
- //转码base64
- byte[] fileByteStream = IOUtils.toByteArray(is);
- String base64String = new String(Base64.getEncoder().encodeToString(fileByteStream).getBytes(), "UTF-8");
- //封装ES请求
- IndexRequest request;
- Map attachmentMap = new HashMap();
- attachmentMap.put("data", base64String);
- attachmentMap.put("fileName", docDocument.getName());
- attachmentMap.put("projectId",projectId);
- //查询系统参数
- String esIndex = sysConfigService.selectConfigByKey("es_index");
- String esPipe = sysConfigService.selectConfigByKey("es_pipe");
- //配置查询请求参数
- request = new IndexRequest(esIndex);
- request.id(String.valueOf(docDocument.getId()));
- request.setPipeline(esPipe);//文件抽取管道,需提前创建
- request.source(JSONObject.toJSONString(attachmentMap), XContentType.JSON);
- IndexResponse response = esClient.index(request, RequestOptions.DEFAULT);
- response.status().toString();
- Date date2 = new Date();
- log.info("创建索引-----耗时:{}ms" , (date2.getTime() - date1.getTime()));
- }catch(Exception e){
- throw e;
- }finally {
- is.close();
- }
- return result;
- }
- @Override
- public TableDataInfo matchContent(String content, String projectId) {
- //此处为若依框架提供的分页,可改为你自己的分页
- PageDomain pageDomain = TableSupport.buildPageRequest();
- Integer pageNum = pageDomain.getPageNum();
- Integer pageSize = pageDomain.getPageSize();
- TableDataInfo rspData = new TableDataInfo();
- rspData.setCode(HttpStatus.SUCCESS);
- rspData.setMsg("查询成功");
- //查询系统参数
- String esIndex = sysConfigService.selectConfigByKey("es_index");
- SearchRequest searchRequest = new SearchRequest(esIndex);
- //布尔查询,检索标题和内容,过滤项目id
- SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
- MultiMatchQueryBuilder matchQueryBuilder = QueryBuilders.multiMatchQuery(content, "attachment.content","fileName");
- BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
- boolQueryBuilder.must(matchQueryBuilder);
- if(projectId != null){
- boolQueryBuilder.filter(QueryBuilders.termQuery("projectId", projectId));
- }
- sourceBuilder.query(boolQueryBuilder);
- //配置高亮
- HighlightBuilder highlightBuilder = new HighlightBuilder();
- highlightBuilder.field("attachment.content"); //content字段高亮
- highlightBuilder.field("fileName");//fileName字段高亮
- highlightBuilder.preTags("<span style='color:red'>"); //高亮前缀
- highlightBuilder.postTags("</span>"); //高亮后缀
- sourceBuilder.highlighter(highlightBuilder);
- //分页查询
- sourceBuilder.from((pageNum-1)*pageSize).size(pageSize);
- searchRequest.source(sourceBuilder);
- SearchResponse searchResponse = null;
- try {
- searchResponse = esClient.search(searchRequest, DEFAULT);
- } catch (Throwable e) {
- //捕捉最高级别异常,确保打印详细信息
- e.printStackTrace();
- }
- if(searchResponse.getHits() == null){
- rspData.setTotal(0);
- rspData.setRows(null);
- return rspData;
- }
- List<DocElasticsearchDto> docElasticsearchList = new ArrayList<>();
- Long totalHits = searchResponse.getHits().getTotalHits().value;//匹配总条数,用于分页显示
- for (SearchHit hit : searchResponse.getHits()){
- //查询结果
- String source = hit.getSourceAsString();
- DocElasticsearchDto docElasticsearchDto = JSONObject.parseObject(source, DocElasticsearchDto.class);
- docElasticsearchDto.setId(hit.getId());
- //处理高亮字段
- Map<String, HighlightField> map = hit.getHighlightFields();
- if(map.containsKey("attachment.content")) {
- StringBuilder matchContent = new StringBuilder();
- for(Text t : map.get("attachment.content").fragments()){
- matchContent.append(t.toString());
- }
- docElasticsearchDto.getAttachment().put("content",matchContent.toString());
- }
- if(map.containsKey("fileName")) {
- StringBuilder matchFileName = new StringBuilder();
- for(Text t : map.get("fileName").fragments()){
- matchFileName.append(t.toString());
- }
- docElasticsearchDto.setFileName(matchFileName.toString());
- }
- docElasticsearchList.add(docElasticsearchDto);
- }
- rspData.setTotal(totalHits);
- rspData.setRows(docElasticsearchList);
- return rspData;
- }
- @Override
- public void deleteFileIndex(String id) throws IOException {
- //查询系统参数,ES索引名称
- String esIndex = sysConfigService.selectConfigByKey("es_index");
- //删除索引
- DeleteRequest deleteRequest = new DeleteRequest(esIndex,id);
- esClient.delete(deleteRequest, RequestOptions.DEFAULT);
- }
-
- }
示例的实现是先从数据库查询到保存的文件信息,然后从minio文件存储服务器获取文件流,由于minio提供以签名的方式获取流,这里就直接使用了,你也可以是其他服务器,或者直接获取文件对象。在创建索引的时候直接发送文件流,Ingest-Attachment插件会帮你实现转换。Ingest-Attachment的安装可参考Docker安装Elasticsearch及相关插件详细步骤,全程亲测避坑_冰糖码奇朵的博客-CSDN博客
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。