1.入门代码
class="java" name="code">import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.junit.Test; import com.test.utils.File2DocumentUtil; public class HelloWorld { String filePath = "F:\\eclipse\\LuceneTest\\luceneDatasource\\小笑话_总统的房间 Room .txt"; String indexPath = "F:\\eclipse\\LuceneTest\\luceneIndex"; // 分析器 Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_4_9); /** * 多个Field组成一个Document,多个Document组成一个索引。 * @throws Exception */ @SuppressWarnings("resource") @Test public void createIndex() throws Exception { //索引库对象 IndexWriter writer = null; // 索引文件的保存位置 Directory dir = FSDirectory.open(new File(indexPath)); // 配置类 // APPEND:总是追加,可能会导致错误,索引还会重复,导致返回多次结果 // CREATE:清空重建(推荐) // CREATE_OR_APPEND【默认】:创建或追加 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE);// 创建模式 OpenMode.CREATE_OR_APPEND // writer = new IndexWriter(dir, iwc); Document doc = File2DocumentUtil.file2Document(filePath); writer.addDocument(doc); writer.close(); } /** 1、创建IndexReader 2、使用IndexReader创建IndexSearcher 3、根据搜索关键字,使用QueryParser生成Query对象 4、以Query作为参数调用IndexSearcher.search(),执行搜索 5、以TopDocs以及ScoreDocs遍历结果并处理 * @throws IOException * @throws ParseException */ @Test public void search() throws IOException, ParseException { String queryString = "document"; String[] fields = { "name", "content" }; QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_4_9, fields, analyzer); Query query = parser.parse(queryString); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); IndexSearcher indexSearcher = new IndexSearcher(reader); TopDocs topDocs = indexSearcher.search(query, 10000); System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果"); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docSn = scoreDoc.doc; Document doc = indexSearcher.doc(docSn); File2DocumentUtil.printDocumnetInfo(doc); } } }
?1.File2Document代码
import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; public class File2DocumentUtil { /** * StringField即为NOT_ANALYZED的(即不对域的内容进行分割分析),而TextField是ANALYZED的 因此,创建Field对象时,无需再指定分析类型了 * @param filePath * @return * @throws Exception */ public static Document file2Document(String filePath) throws Exception{ File file = new File(filePath); Document doc = new Document(); FileInputStream input = new FileInputStream(file); Reader reader = new BufferedReader(new InputStreamReader(input)); //StringField索引但不分词 Field nameField = new StringField("name", file.getName(), Field.Store.YES); // LongField:索引但是不分词 Field sizeField = new LongField("size", file.length(), Field.Store.YES); Field pathField = new StringField("path", file.getAbsolutePath(), Field.Store.YES); // TextField:索引并分词 //Field contentField = new TextField("content", reader); Field contentField = new TextField("content", readFile(file), Field.Store.YES); doc.add(nameField); doc.add(sizeField); doc.add(pathField); doc.add(contentField); return doc; } public static String readFile(File file) throws IOException{ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file))); StringBuilder content = new StringBuilder(); for(String line = null; (line = reader.readLine()) != null ;){ content.append(line).append("\n"); } return content.toString(); } public static void printDocumnetInfo(Document doc) { Field field = (Field) doc.getField("name"); System.out.println(field.stringValue()); System.out.println(doc.get("path")); System.out.println(doc.get("size")); System.out.println(doc.get("content")); } }
?