本代码涉及到的关键类有:
IndexWriter
Directory
Analyzer
Document
Field
IndexSearcher
Term
Query
TermQuery
TopDocs
代码示例:
package com.yale.lucene;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* 创建一个索引
*
* @author yale
*
*/
public class Indexer
{
public static void main(String[] args) throws Exception
{
// 创建索引放在这个目录
String indexDir = "F://新建文件夹//luceneTest//indexFile";
// 要索引的文件存在的目录
String dataDir = "F://新建文件夹//luceneTest//dataSource";
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try
{
numIndexed = indexer.index(dataDir, new TextFilesFilter());
}
finally
{
indexer.close();
}
long end =System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
private IndexWriter writer;
public Indexer(String indexDir) throws Exception
{
Directory dir = FSDirectory.open(new File(indexDir));
// 创建IndexWriter
writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),
true, IndexWriter.MaxFieldLength.LIMITED);
}
// 关闭IndexWriter
public void close() throws Exception
{
writer.close();
}
// 开始索引
public int index(String dir, FileFilter filter) throws Exception
{
File[] files = new File(dir).listFiles();
for (File f : files)
{
if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
&& (filter == null || filter.accept(f)))
{
indexFile(f);
}
}
return writer.numDocs();
}
// 索引文件
private void indexFile(File f) throws Exception
{
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
// 文档设置
private Document getDocument(File f) throws Exception
{
Document doc = new Document();
doc.add(new Field("contents", File2Reader(f), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("filename", f.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
return doc;
}
public static String File2Reader(File f) throws Exception
{
BufferedReader bf = new BufferedReader(new InputStreamReader(
new FileInputStream(f)));
StringBuffer sb = new StringBuffer();
for (String line = null; (line = bf.readLine()) != null;)
{
sb.append(line).append("\n");
}
return sb.toString();
}
// 文件类型过滤
private static class TextFilesFilter implements FileFilter
{
@Override
public boolean accept(File pathname)
{
return pathname.getName().toLowerCase().endsWith(".txt");
}
}
}
package com.yale.lucene;
import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Searcher
{
public static void main(String[] args) throws Exception
{
// 索引放在这个目录
String indexDir = "F://新建文件夹//luceneTest//indexFile";
// 要查询的字符串
String queryString = "Apache";
search(indexDir, queryString);
}
//开始搜索
public static void search(String indexDir, String queryString)
throws Exception
{
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher is = new IndexSearcher(dir);
QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",
new StandardAnalyzer(Version.LUCENE_30));
Query query = parser.parse(queryString);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.err.println("找到 " + hits.totalHits + "个文件 在" + (end - start)
+ "毫秒匹配 要查询的字符串 '" + queryString + "'");
for(ScoreDoc scoreDoc :hits.scoreDocs)
{
Document doc = is.doc(scoreDoc.doc);
System.out.println(doc.get("fullpath"));
System.out.println(doc.get("filename"));
System.out.println(doc.get("contents"));
}
is.close();
}
}