lucence自定义搜索结果的排序代码分享_JAVA_编程开发_程序员俱乐部

中国优秀的程序员网站程序员频道CXYCLUB技术地图
热搜:
更多>>
 
您所在的位置: 程序员俱乐部 > 编程开发 > JAVA > lucence自定义搜索结果的排序代码分享

lucence自定义搜索结果的排序代码分享

 2011/11/23 8:17:20  ForgiDaved  http://forgidaved.iteye.com  我要评论(0)
  • 摘要:最近做lucence的应用,趁着一个节点的间歇,总结了下lucence中有关自定义搜索结果排序的相关代码,一来和大家共同探讨,二来也便于备忘。众所周知,lucence默认的结果是根据Score从高到低,当Score相等时,则会根据建立索引时创建的docID由小到大排序。通过自定义搜索结果的排序,则可以实现完全按照真实业务的需要,自定义结果的排序。下面以一个查询餐馆距离的例子配合代码进行讲解(该例很多地方都有,但是我参考的时候发现很多地方提供的例子都是不能直接运行的)
  • 标签:代码 结果 搜索结果 自定义
    最近做lucence的应用,趁着一个节点的间歇,总结了下lucence中有关自定义搜索结果排序的相关代码,一来和大家共同探讨,二来也便于备忘。
    众所周知,lucence默认的结果是根据Score从高到低,当Score相等时,则会根据建立索引时创建的docID由小到大排序。通过自定义搜索结果的排序,则可以实现完全按照真实业务的需要,自定义结果的排序。
    下面以一个查询餐馆距离的例子配合代码进行讲解(该例很多地方都有,但是我参考的时候发现很多地方提供的例子都是不能直接运行的)。并提供可以直接运行的例子代码如下:
    DistanceComparatorSource.java
package com.xxx.demo;

import java.io.IOException;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.FieldCache.IntParser;

public class DistanceComparatorSource extends FieldComparatorSource{
	private int x;
	private int y;
	
	public DistanceComparatorSource(int x,int y){
		this.x = x;
		this.y = y;
	}
	
	@Override
	public FieldComparator newComparator(String fieldname,int numHits,
			int sortPos,boolean reversed) throws IOException{
		return new DistanceScoreDocLookupComparator(fieldname,numHits);
	}
	
	private class DistanceScoreDocLookupComparator extends FieldComparator{
		private int[] xDoc,yDoc;
		private float[]  values;
		private float bottom;
		String fieldName;
	
		public DistanceScoreDocLookupComparator(String fieldName,int numHits){
			values = new float[numHits];
			this.fieldName = fieldName;
		}
		
		private class DistanceXIntParser implements IntParser{
			@Override
			public int parseInt(String string){
				return Integer.parseInt(string.split(",")[0]);
			}
			
		}

		private class DistanceYIntParser implements IntParser{
			@Override
			public int parseInt(String string){
				return Integer.parseInt(string.split(",")[1]);
			}
			
		}

		@Override
		public int compare(int slot1,int slot2){
			if(values[slot1]<values[slot2]) return -1;
			if(values[slot1]>values[slot2]) return 1;
			return 0;
		}

		@Override
		public int compareBottom(int doc) throws IOException{
			float docDistance = getDistance(doc); 
			if(bottom<docDistance) return -1;
			if(bottom>docDistance) return 1;
			return 0;
		}

		@Override
		public void copy(int slot,int doc) throws IOException{
			values[slot] = getDistance(doc);
		}

		@Override
		public void setBottom(int slot){
			bottom = values[slot];
		}

		@Override
		public void setNextReader(IndexReader reader,int docBase)
				throws IOException{
			xDoc = FieldCache.DEFAULT.getInts(reader,this.fieldName,new DistanceXIntParser());
			yDoc = FieldCache.DEFAULT.getInts(reader,this.fieldName,new DistanceYIntParser());
		}
		
		@Override
		public Float value(int slot){
			return new Float(values[slot]);
		}
		
		private float getDistance(int doc){
			int deltax = xDoc[doc] - x;
			int deltay = yDoc[doc] - y;
			return (float)Math.sqrt(deltax*deltax + deltay*deltay);
		}
		
		public int sortType(){
			return SortField.CUSTOM;
		}
	}
	
	public String toString(){
		return "Distance from ("+x+","+y+")";
	}

}

     DistanceSortingTest.java
package com.xxx.demo;

import java.io.IOException;

import junit.framework.TestCase;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;


public class DistanceSortingTest extends TestCase{
	private RAMDirectory directory;
	private IndexSearcher searcher ;
	private Query query;
	
	protected void setUp() throws Exception{
		directory = new RAMDirectory();
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33,new StandardAnalyzer(Version.LUCENE_33));
		config.setOpenMode(OpenMode.CREATE);
		IndexWriter writer = new IndexWriter(directory,config);
		addPoint(writer,"El Charro","restaurant restaurant restaurant",1,2);//5
		addPoint(writer,"Cafe Poca Cosa","restaurant",5,9);//25+81=106
		addPoint(writer,"Los Betos","restaurant",9,6);//81+36=117
		addPoint(writer,"Nico's Taco Shop","restaurant restaurant",3,8);//9+64=73
		
		writer.close();
		
		searcher = new IndexSearcher(directory);
		QueryParser parser = new QueryParser(Version.LUCENE_33, "type", new StandardAnalyzer(Version.LUCENE_33));
		query = parser.parse("type:restaurant");
	}
	
	private void addPoint(IndexWriter writer,String name,String type,int x,int y) 
	throws CorruptIndexException, IOException{
		Document doc = new Document();
		doc.add(new Field("name",name,Field.Store.YES,Field.Index.ANALYZED));
		doc.add(new Field("type",type,Field.Store.YES,Field.Index.ANALYZED));
		doc.add(new Field("location",x+","+y,Field.Store.YES,Field.Index.NOT_ANALYZED));
		writer.addDocument(doc);
	}
	
	public void testNormRestaurant() throws IOException{
		TopDocs hits = searcher.search(query,10);
		System.out.println("--------testNormRestaurant---------- ");
		for(ScoreDoc doc : hits.scoreDocs){
			System.out.println("docId:"+doc.doc+"score:"+doc.score+", name:"+searcher.doc(doc.doc).get("name"));
		}
		assertEquals("first","Cafe Poca Cosa",searcher.doc(hits.scoreDocs[0].doc).get("name"));
		assertEquals("second","Los Betos",searcher.doc(hits.scoreDocs[1].doc).get("name"));
		assertEquals("third","Nico's Taco Shop",searcher.doc(hits.scoreDocs[2].doc).get("name"));
		assertEquals("forth","El Charro",searcher.doc(hits.scoreDocs[3].doc).get("name"));
	}
	
	public void testNearestRestaurantToHome() throws IOException{
		Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(0,0)));
		TopDocs hits = searcher.search(query,null,10,sort);
		System.out.println("--------testNearestRestaurantToHome---------- ");
		for(ScoreDoc doc : hits.scoreDocs){
			System.out.println("docId:"+doc.doc+"name:"+searcher.doc(doc.doc).get("name"));
		}
		assertEquals("cloest","El Charro",searcher.doc(hits.scoreDocs[0].doc).get("name"));
		assertEquals("second","Nico's Taco Shop",searcher.doc(hits.scoreDocs[1].doc).get("name"));
		assertEquals("third","Cafe Poca Cosa",searcher.doc(hits.scoreDocs[2].doc).get("name"));
		assertEquals("furthest","Los Betos",searcher.doc(hits.scoreDocs[3].doc).get("name"));
	}
	
	public void testNearestRestaurantToWork() throws IOException{
		Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(10,10)));
		TopFieldDocs docs = searcher.search(query,null,3,sort);
		assertEquals(4,docs.totalHits);
		assertEquals(3,docs.scoreDocs.length);
		
		FieldDoc fieldDoc = (FieldDoc)docs.scoreDocs[0];
		assertEquals("(10,10) -> (9,6) = sqrt(17)",new Float(Math.sqrt(17)),fieldDoc.fields[0]);
		
		Document document = searcher.doc(fieldDoc.doc);
		assertEquals("Los Betos", document.get("name"));
	}
}
发表评论
用户名: 匿名