赞
踩
Lucene的常用检索类
1、IndexSercher:检索操作的核心组件,用于对IndexWriter创建的索引执行,只读的检索操作,工作模式为接受Query对象而返回ScoreDoc对象。
2、Term:检索的基本单元,标示检索的字段名称和检索对象的值,如Term("title", "lucene")。即表示在title字段中搜索关键词lucene。
3、Query:表示查询的抽象类,由相应的Term来标识。
4、TermQuery:最基本的查询类型,用于匹配含有制定值字段的文档。
5、TopDoc:保存查询结果的类。
6、ScoreDoc(Hits):用来装载搜索结果文档队列指针的数组容器。
我们先新建一个索引类:
- package com.qianyan.luceneIndex;
-
- import java.io.IOException;
-
-
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
-
- public class IndexTest {
-
- public static void main(String[] args) throws IOException{
-
- String[] ids = {"1", "2", "3", "4"};
- String[] names = {"zhangsan", "lisi", "wangwu", "zhaoliu"};
- String[] addresses = {"shanghai", "beijing", "guangzhou", "nanjing"};
- String[] birthdays = {"19820720", "19840203", "19770409", "19830130"};
- Analyzer analyzer = new StandardAnalyzer();
- String indexDir = "E:/luceneindex";
- Directory dir = FSDirectory.getDirectory(indexDir);
- //true 表示创建或覆盖当前索引;false 表示对当前索引进行追加
- //Default value is 128
- IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
- for(int i = 0; i < ids.length; i++){
- Document document = new Document();
- document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.ANALYZED));
- document.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED));
- document.add(new Field("address", addresses[i], Field.Store.YES, Field.Index.ANALYZED));
- document.add(new Field("birthday", birthdays[i], Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(document);
- }
- writer.optimize();
- writer.close();
- }
-
- }
- package com.qianyan.lucene;
-
- import java.io.IOException;
-
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.search.BooleanClause;
- import org.apache.lucene.search.BooleanQuery;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TermQuery;
- import org.apache.lucene.search.FuzzyQuery;
- import org.apache.lucene.search.RangeQuery;
- import org.apache.lucene.search.PrefixQuery;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.search.WildcardQuery;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
-
- public class TestSeacher {
-
- public static void main(String[] args) throws IOException {
- String indexDir = "E:/luceneindex";
- Directory dir = FSDirectory.getDirectory(indexDir);
- IndexSearcher searcher = new IndexSearcher(dir);
- ScoreDoc[] hits = null;
-
- Term term = new Term("id", "2");
- TermQuery query = new TermQuery(term);
- TopDocs topDocs = searcher.search(query, 5);
-
- /* 范围检索: 19820720 - 19830130 。 true表示包含首尾
- Term beginTerm = new Term("bithday", "19820720");
- Term endTerm = new Term("bithday", "19830130");
- RangeQuery rangeQuery = new RangeQuery(beginTerm, endTerm, true);
- TopDocs topDocs = searcher.search(rangeQuery, 5);
- */
-
- /* 前缀检索:
- Term term = new Term("name", "z");
- PrefixQuery preQuery = new PrefixQuery(term);
- TopDocs topDocs = searcher.search(preQuery, 5);
- */
-
- /* 模糊查询:例如查找name为zhangsan的数据,那么name为zhangsun、zhangsin也会被查出来
- Term term = new Term("name", "zhangsan");
- FuzzyQuery fuzzyQuery = new FuzzyQuery(term);
- TopDocs topDocs = searcher.search(fuzzyQuery, 5);
- */
-
-
- /* 匹配通配符: * 任何条件 ?占位符
- Term term = new Term("name", "*g??");
- WildcardQuery wildcardQuery = new WildcardQuery(term);
- TopDocs topDocs = searcher.search(wildcardQuery, 5);
- */
-
- /* 多条件联合查询
- Term nterm = new Term("name", "*g??");
- WildcardQuery wildcardQuery = new WildcardQuery(nterm);
-
- Term aterm = new Term("address", "nanjing");
- TermQuery termQuery = new TermQuery(aterm);
-
- BooleanQuery query = new BooleanQuery();
- query.add(wildcardQuery, BooleanClause.Occur.MUST); //should表示"或" must表示"必须"
- query.add(termQuery, BooleanClause.Occur.MUST);
-
- TopDocs topDocs = searcher.search(query, 10);
- */
-
- hits = topDocs.scoreDocs;
-
- for(int i = 0; i < hits.length; i++){
- Document doc = searcher.doc(hits[i].doc);
- //System.out.println(hits[i].score);
- System.out.print(doc.get("id") + " ");
- System.out.print(doc.get("name") + " ");
- System.out.print(doc.get("address") + " ");
- System.out.println(doc.get("birthday") + " ");
- }
-
- searcher.close();
- dir.close();
- }
- }
下面我们来看一个全文索引的案例,data.txt 见文章最下面。首先我们建立对文章的索引:
- package com.qianyan.lucene;
-
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
-
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
-
- public class TestFileReaderForIndex{
-
- public static void main(String[] args) throws IOException{
- File file = new File("E:/data.txt");
- FileReader fRead = new FileReader(file);
- char[] chs = new char[60000];
- fRead.read(chs);
-
- String strtemp = new String(chs);
- String[] strs = strtemp.split("Database: Compendex");
-
- System.out.println(strs.length);
- for(int i = 0; i < strs.length; i++)
- strs[i] = strs[i].trim();
-
- Analyzer analyzer = new StandardAnalyzer();
- String indexDir = "E:/luceneindex";
- Directory dir = FSDirectory.getDirectory(indexDir);
-
- IndexWriter writer = new IndexWriter(dir, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);
-
- for(int i = 0; i < strs.length; i++){
- Document document = new Document();
- document.add(new Field("contents", strs[i], Field.Store.YES, Field.Index.ANALYZED));
- writer.addDocument(document);
- }
-
- writer.optimize();
- writer.close();
- dir.close();
- System.out.println("index ok!");
- }
- }
- package com.qianyan.lucene;
-
- import java.io.IOException;
-
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TermQuery;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
-
- public class TestSeacher2 {
-
- public static void main(String[] args) throws IOException {
- String indexDir = "E:/luceneindex";
- Directory dir = FSDirectory.getDirectory(indexDir);
- IndexSearcher searcher = new IndexSearcher(dir);
- ScoreDoc[] hits = null;
-
- Term term = new Term("contents", "ontology");
- TermQuery query = new TermQuery(term);
- TopDocs topDocs = searcher.search(query, 126);
-
- hits = topDocs.scoreDocs;
-
- for(int i = 0; i < hits.length; i++){
- Document doc = searcher.doc(hits[i].doc);
- System.out.print(hits[i].score);
- System.out.println(doc.get("contents"));
- }
-
- searcher.close();
- dir.close();
- }
- }
data.txt 内容如下 :
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。