赞
踩
之前写过一篇博客介绍Elasticsearch和Kibana的搭建,在本篇里我将会总结一下创建索引以及使用当前最新版本Kibana(7.9.3)管理Elasticsearch索引的方法,话不多说直接进入正题。
如果你要处理的内容是中文,那么最好还是先安装一个中文分词器下载地址,选择和你的es匹配的版本下载,将压缩包解压到es安装目录的/plugin/ik文件夹下即可。
开启Kibana服务之后,点击左上角的菜单按钮选择Management下的Dev Tools:
之后再Console中输入新建索引的配置信息,字段设置在mappings/properties下面,可以参考下面的写法:
PUT job_engine { "settings" : { "number_of_shards": 1, "number_of_replicas" : 0 }, "mappings" : { "properties": { "crawl_time_first": { "type": "date" }, "crawl_time_latest": { "type": "date" }, "url": { "type": "text" }, "url_md5": { "type": "text" }, "key_word": { "analyzer": "ik_max_word", "search_analyzer": "ik_max_word", "type": "text" }, "company": { "analyzer": "ik_max_word", "search_analyzer": "ik_max_word", "type": "text" }, "salary": { "analyzer": "ik_max_word", "search_analyzer": "ik_max_word", "type": "text" }, "detail_address": { "analyzer": "ik_max_word", "search_analyzer": "ik_max_word", "type": "text" }, "city": { "type": "text" }, "basic_requirement": { "analyzer": "ik_max_word", "search_analyzer": "ik_max_word", "type": "text" }, "detail_description": { "analyzer": "ik_max_word", "search_analyzer": "ik_max_word", "type": "text" }, "source": { "type": "text" } } } } }
之后点击左边编辑框右上角的三角形按钮提交,如果在右边的返回结果如下图所示则建立索引成功!
之后你可以点击左上角的菜单按钮选择Kibana/Discover进入到下面的页面中,选择Index Management:
之后就能看到刚刚创建的索引了:
妥善使用这个管理工具,一些基本的功能如下图所示:
我们已经建立好了一个索引,接下来尝试往其中插入数据,我们借助python来操作Elasticsearch。首先安装相关工具包:
pip install elasticsearch
然后建立如下的python文件,定义数据结构以及使用python管理Elasticsearch的结构代码:
from elasticsearch import Elasticsearch from elasticsearch import helpers # job_engine索引对应的数据结构类 class DataStructureJobEngine(object): def __init__(self, crawl_time, url, url_md5, key_word, company, salary, detail_address, city, basic_requirement, detail_description, source): self.crawl_time = crawl_time self.url = url self.url_md5 = url_md5 self.key_word = key_word self.company = company self.salary = salary self.detail_address = detail_address self.city = city self.basic_requirement = basic_requirement self.detail_description = detail_description self.source = source class ElasticObj(object): def __init__(self, index_name, ip="127.0.0.1", port="9200", password=""): self.index_name = index_name self.ip_address = ip self.port = port self.password = password self.es = Elasticsearch([self.ip_address]) def insertData(self, data): # 插入单条数据 body = { "crawl_time": data.crawl_time, "url": data.url, "url_md5": data.url_md5, "key_word": data.key_word, "company": data.company, "salary": data.salary, "detail_address": data.detail_address, "city": data.city, "basic_requirement": data.basic_requirement, "detail_description": data.detail_description, "source": data.source } self.es.index(index=self.index_name, body=body, id=1) def insertBatchData(self, data_list, id_start): # 插入多条数据,借助elasticsearch.helpers.bulk实现 body_list = [] for idx, data in enumerate(data_list): body = { "_index": self.index_name, "_id": id_start + idx, "crawl_time": data.crawl_time, "url": data.url, "url_md5": data.url_md5, "key_word": data.key_word, "company": data.company, "salary": data.salary, "detail_address": data.detail_address, "city": data.city, "basic_requirement": data.basic_requirement, "detail_description": data.detail_description, "source": data.source } body_list.append(body) helpers.bulk(self.es, body_list) def deleteData(self, id): # 根据索引删除数据 self.es.delete(index=self.index_name, id=id) def getDataSize(self): # 获取当前数据量大小 return self.es.count(index=self.index_name)["count"] if __name__ == "__main__": e = ElasticObj("job_engine") # e.deleteData(0) # test_data = DataStructureJobEngine( # "2020-12-28", # "http://www.baidu.com", # "adeadae232", # "test", # "test", # "test", # "test", # "test", # "test", # "test", # "test" # ) # e.insertData(test_data) print(e.getDataSize())
上面的代码能够通过测试,并且检查es中对应的索引数据发生变化即说明该模块的代码运转正常(因为是测试代码,所以安全性暂时不考虑)。
当然,介绍最为详细的还是官方文档,es和kibana的相关内容的官方文档传送门:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。