当前位置:   article > 正文

《ElasticSearch/Kibana系列》ElasticSearch/Kibana实操记录_es分词查询模糊匹配

es分词查询模糊匹配

一、ES分词模糊查询

1.ES分词模糊查询

1.1 什么是分词

​ 在es中我们通过查询某个关键字,从而查询到关键字相关的数据。那么他是怎么去找的?
ES默认的支持对英文的分词,因为英文都是以空格分词,而对于中文的分词效果并不太好
也就是对一句话进行分词的叫做分词器。

1.2 ik分词器

1.2.1 安装ik分词器
##1. 将下载好的zip的压缩包拷贝到es的plugins目录下
##2. 在此目录下创建一个ik的目录
##3. 在ik目录下将刚才zip压缩包解压
[root@hadoop plugins]# mkdir ik
[root@hadoop plugins]# yum -y install unzip
[root@hadoop plugins]# mv elasticsearch-analysis-ik-6.5.3.zip ik/
[root@hadoop ik]# unzip elasticsearch-analysis-ik-6.5.3.zip && rm -f elasticsearch-analysis-ik-6.5.3.zip

##4. 如果是全分布式的话,所有的节点都得拷贝
##5. 重启es
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
1.2.2 模糊查询
  • 创建索引
curl -HContent-Type:application/json -XPUT 'http://hadoop:9200/chinese?pretty' -d \
'
{
    "settgings":{
        "number_of_shards":3,
        "number_of_replicas":1,
        "analysis":{
            "analyzer":{
                "ik":{
                    "tokenizer":"ik_max_word"
                }
            }
        }
    },
    "mappings":{
        "test":{
            "properties":{
                "content":{
                    "type":"text",
                    "analyzer":"ik_max_word",
                    "search_analyzer":"ik_max_word"
                }
            }
        }
    }
}
'
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 导入数据
curl -HContent-Type:application/json -XPUT 'http://hadoop:9200/chinese/test/7?pretty' -d \
'
{
    "content":"SSM框架简要介绍_Mr.zhou_Zxy-CSDN博客_简要介绍ssm框架"
}
'
curl -HContent-Type:application/json -XPUT 'http://hadoop:9200/chinese/test/8?pretty' -d \
'
{
    "content":"Mr.zhou_Zxy-CSDN博客"
}
'
curl -HContent-Type:application/json -XDELETE 'http://hadoop:9200/chinese/test/10?pretty' -d \
'
{
    "content":"大数据之布隆过滤器学习_Mr.zhou_Zxy-CSDN博客"
}
'
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 测试
curl -HContent-Type:application/json -XGET 'http://hadoop:9200/chinese/_search?pretty' -d \
'
{
    "query":{
        "match":{
            "content":"Zxy"
        }
    }
}
'
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10

2.ES的API

2.1 导入依赖

<!-- ElasticSearch -->
<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>transport</artifactId>
    <version>6.5.3</version>
</dependency>

<!-- fastjson -->
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.71</version>
</dependency>

<dependency>
    <groupId>org.projectlombok</groupId>
    <artifactId>lombok</artifactId>
    <version>1.18.8</version>
</dependency>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19

2.2 连接ES并实现增删改查

package com.bigdata.es;

import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import java.net.InetAddress;
import java.net.UnknownHostException;

public class Demo1_QuickStart {
    public static void main(String[] args) throws UnknownHostException {
        //1. 获取客户端对象
        Settings settings = Settings.builder()
                .put("cluster.name", "zxy")
                .build();
        TransportClient client = new PreBuiltTransportClient(settings);
        //2. 设置连接到集群
        client.addTransportAddresses(
                new TransportAddress(InetAddress.getByName("***.***.***.**"), 9300)
        );

        //3. 查询
        GetResponse getResponse = client.prepareGet("zxy", "doc", "1").get();
        String sourceAsString = getResponse.getSourceAsString();
        System.out.println(sourceAsString);

        //4. 插入
        String json = "{\"username\":\"Mr.zhou\"}";
        IndexResponse indexResponse = client.prepareIndex("zxy", "doc", "5").setSource(json, XContentType.JSON).get();
        System.out.println(indexResponse.getIndex());
        System.out.println(indexResponse.getId());
        System.out.println(indexResponse.getType());

        //5. 删除
        DeleteResponse deleteResponse = client.prepareDelete("zxy", "doc", "5").get();
    }
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44

3. Java代码实现模糊查询

package com.bigdata.es;

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import java.net.InetAddress;
import java.net.UnknownHostException;

public class Demo2_Search {
    public static void main(String[] args) throws UnknownHostException {
        //1. 获取客户端对象
        Settings settings = Settings.builder()
                .put("cluster.name", "zxy")
                .build();
        TransportClient client = new PreBuiltTransportClient(settings);
        //2. 设置连接到集群
        client.addTransportAddresses(
                new TransportAddress(InetAddress.getByName("***.***.***.**"), 9300)
        );

        //3. 模糊查询
        /**
         * 1. SearchType
         * DFS_QUERY_THEN_FETCH:会直接在es所在的节点直接匹配数据
         * QUERY_THEN_FETCH:在分布式环境中匹配数据
         * QUERY_AND_FETCH(过时)
         *
         * 2. QueryBuilders
         * MatchAllQueryBuilder:select * from
         * MatchQueryBuilder:select * from xxx where name like xxx
         * CommonTermsQueryBuilder:select * from xxx where name = xxx
         */
        SearchResponse searchResponse = client.prepareSearch("chinese")
                .setSearchType(SearchType.QUERY_THEN_FETCH) // 检索范围
                .setQuery(QueryBuilders.matchQuery("content", "Zxy"))
                .get();

        //4. 展示
        SearchHits hits = searchResponse.getHits(); // 获取到命中的数据集
        long totalHits = hits.totalHits; // 总的命中数
        float maxScore = hits.getMaxScore(); // 最大的分数
        System.out.println("totalHits :" + totalHits);
        System.out.println("maxScore :" + maxScore);
        SearchHit[] searchHits = hits.getHits(); // 获取命中的数据
        for (SearchHit hit : searchHits) {
            System.out.println("index :" + hit.getIndex());
            System.out.println("type :" + hit.getType());
            System.out.println("docId :" + hit.getId());
            System.out.println("content :" + hit.getSourceAsString());
        }
    }
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60

二、使用ElasticSearch开发JavaWeb实现查询

使用JavaWeb提供一个页面和搜索框,在搜索框中输入关键词,通过ES检索,找出匹配到的文章内容

使用ES+JavaWeb实现关键词索引相关文章内容

1.JavaCode

1.1.ES2Web

package com.zxy.web.servlet;

import com.zxy.pojo.Suitable;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;

public class ES2Web {
    public static List<String> getIndexWord(String indexWord) throws UnknownHostException {
        System.out.println("ES2Web.getIndexWord():" + indexWord);
        List<String> ulist = new ArrayList<String>();
        Suitable suitable = new Suitable();
        Settings settings = Settings.builder()
                .put("cluster.name", "hzbigdata2101")
                .build();
        PreBuiltTransportClient client = new PreBuiltTransportClient(settings);

        client.addTransportAddresses(
                new TransportAddress(InetAddress.getByName("192.168.130.110"), 9300)
        );
        SearchResponse searchResponse = client.prepareSearch("chinese")
                .setSearchType(SearchType.QUERY_THEN_FETCH)
                .setQuery(QueryBuilders.matchQuery("content", indexWord))
                .get();

        SearchHits hits = searchResponse.getHits();
        System.out.println("totalHits:"+hits.totalHits);
        System.out.println("maxSource:"+hits.getMaxScore());
        SearchHit[] searchHits = hits.getHits();
        for(SearchHit hit:searchHits){
            String[] split = hit.getSourceAsString().split(":");
            System.out.println(split[0] + " + " + split[1]);
            int i = split[1].lastIndexOf("\"");
            System.out.println("i" + i);
            String substring = split[1].substring(1, i);
            System.out.println(substring);
            ulist.add(substring);


            suitable.setSuitableWord(ulist);

            System.out.println("index:"+hit.getIndex());
            System.out.println("type:"+hit.getType());
            System.out.println("docID:"+hit.getId());
            System.out.println("content:"+hit.getSourceAsString());
        }
        return suitable.getSuitableWord();
    }
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60

1.2.LoginServlet

package com.zxy.web.servlet;

import com.zxy.service.UserService;
import com.zxy.service.impl.UserServiceImpl;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.List;

public class LoginServlet extends HttpServlet {

    private UserService userService = new UserServiceImpl();

    @Override
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        String indexWord1 = request.getParameter("indexWord");
        String indexWord = new String(indexWord1.getBytes("ISO-8859-1"),"utf-8");
        System.out.println("loginservlet->indexword:"+indexWord);
        List<String> suitable = ES2Web.getIndexWord(indexWord);
        System.out.println("匹配到的数据:" + suitable);
        if (suitable != null) {
            request.getSession().setAttribute("indexWord", indexWord);
            request.getSession().setAttribute("suitable", suitable);
            response.sendRedirect("/success.jsp");
        }else {
            request.getRequestDispatcher("/error.jsp").forward(request, response);
        }
    }
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33

2.JspCode

2.1. index.jsp

<%@ page contentType="text/html; charset=UTF-8" pageEncoding="UTF-8" %>
<html>
    <head>
        <title>登陆</title>
    </head>
    <body>
        <%-- 所有的表单提交都使用post方式提交 --%>
        <%-- 项目名/servlet路径:
            绝对路径:/loginServlet
            相对路径:loginServlet
        --%>
        <form action="/loginServlet" method="post">
            关键字:<input type="text" name="indexWord" ><br>
<%--            关键字:<input name="indexWord" onkeyup="value=value.replace(/[^\w\u4E00-\u9FA5]/g, '')"/><br>--%>
            <input type="submit" value="搜索">
        </form>
    </body>
</html>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18

2.2.success.jsp

<%@ page import="com.zxy.pojo.User" %>
<%@ page import="java.util.List" %>
<%@ page import="com.zxy.pojo.Suitable" %>
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<html>
<head>
    <title>successful</title>
</head>
<%--
jsp 有9大内置对象
page
pageContext
request
response
session
application
exception
out
--%>
<body>
<h2>搜索引擎</h2>
    <%
        Object indexWord = session.getAttribute("indexWord");
        System.out.println("success.jsp->indexword:" + indexWord);
    %>

    <h3>关键词:<%=indexWord%></h3>
    <h3>匹配项:</h3>
    <%
        List<String> suitables = (List<String>) session.getAttribute("suitable");
        for(String suitable:suitables){
    %>
        <h4><%=suitable%></h4>
    <%
        }
    %>
</body>
</html>

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39

2.3.error.jsp

<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<html>
<head>
    <title>error</title>
</head>
<body>
    <h1 style="color: red">你的用户名或者密码有误!</h1>
</body>
</html>

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10

3.ES导入数据

curl -HContent-Type:application/json -XPUT 'http://hadoop:9200/chinese/test/3?pretty' -d \
'
{
    "content":"运动员还在国际舞台当中取得冠军"
}
'
curl -HContent-Type:application/json -XPUT 'http://hadoop:9200/chinese/test/4?pretty' -d \
'
{
    "content":"运动员,指从事体育运动的人员,词语起源于古希腊文。运动员分为运动健将、一级运动员、二级运动员、三级运动员、少年级运动员五个技术等级。"
}
'
curl -HContent-Type:application/json -XPUT 'http://hadoop:9200/chinese/test/7?pretty' -d \
'
{
    "content":"SSM框架简要介绍_Mr.zhou_Zxy-CSDN博客_简要介绍ssm框架"
}
'
等数据....
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19

三、Kibana -> Kibana server is not ready yet

防火墙已添加5601端口
ES和KB版本一致

1.问题

在这里插入图片描述

2.解决

[hadoop@hadoop_zxy bin]$ curl -XDELETE http://101.35.83.222:9200/.kibana*
{"acknowledged":true}
  • 1
  • 2

3.刷新进入

在这里插入图片描述

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/800806
推荐阅读
相关标签
  

闽ICP备14008679号