? ? ?部门给我找了点事做,帮筛选简历.估计是觉得我加班少了.为了不浪费时间,写了个简单的简历内容打分排序,以后直接排序转发
? ? ?代码如下:
? ? ?
class="java">package com.lu; import java.io.IOException; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import java.util.function.Consumer; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.codec.binary.Base64; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class LuceneUtils { /** * 获取分词结果 * * @param 输入的字符串 * @param 分词器 * @return 分词结果 */ // getWords("系统提供HTTP服务给其他系统用于实时数据交互,采用WebService与总行进行实时数据交互", // analyzer).forEach(System.out::println); public static List<String> getWords(String str, Analyzer analyzer) { List<String> result = new ArrayList<String>(); TokenStream stream = null; try { stream = analyzer.tokenStream("content", new StringReader(str)); CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { result.add(attr.toString()); } } catch (IOException e) { e.printStackTrace(); } finally { if (stream != null) { try { stream.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; } /** * 使用 Map按value进行排序 * * @param map * @return */ public static Map<String, Integer> sortMapByValue(Map<String, Integer> scoreMap) { if (scoreMap == null || scoreMap.isEmpty()) { return null; } Map<String, Integer> sortedMap = new LinkedHashMap<String, Integer>(); List<Map.Entry<String, Integer>> entryList = new ArrayList<Map.Entry<String, Integer>>(scoreMap.entrySet()); entryList.stream().sorted(new Comparator<Map.Entry<String, Integer>>() { @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { return Integer.compare(o1.getValue(), o2.getValue()); } }).forEach(new Consumer<Entry<String, Integer>>() { @Override public void accept(Entry<String, Integer> t) { sortedMap.put(t.getKey(), t.getValue()); } }); return sortedMap; } public static Optional<String> checkGetContent(String content) { String regx = "(^[\\s|\\S]*?)Content-Type:text/html;charset=\"([\\s|\\S]*?)\"[\\s|\\S]*?Content-Transfer-Encoding:base64([\\S|\\s]*?)----boundary_([\\S|\\s]*?$)"; Pattern compile = Pattern.compile(regx); Matcher matcher = compile.matcher(content); if (matcher.matches()) { if (matcher.groupCount() > 0) { String matchCharset = matcher.group(2); String matchContent = matcher.group(3); return Optional.of(decodeStr(matchContent, matchCharset)); } } return Optional.of(content); } public static String decodeStr(String encodeStr, String charset) { byte[] b = encodeStr.getBytes(); Base64 base64 = new Base64(); b = base64.decode(b); String s; try { s = new String(b, charset); return s; } catch (UnsupportedEncodingException e) { s = new String(b); return s; } } public static Optional<Directory> openFSDirectory(String indexPath) { Path path = Paths.get(indexPath); try { FSDirectory fsDirectory = FSDirectory.open(path); return Optional.of(fsDirectory); } catch (IOException e) { e.printStackTrace(); } return Optional.empty(); } }
?
package com.lu; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Optional; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; public class ContentScoror { String indexPath = "lucene\\Index\\"; Map<String, Integer> scoreMap = new HashMap<>(); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); public void eval(IndexSearcher searcher, Query query, Integer weight) throws IOException { TopDocs topDocs = searcher.search(query, 1000); ScoreDoc[] hits = topDocs.scoreDocs; for (int i = 0; i < hits.length; i++) { ScoreDoc hit = hits[i]; Document hitDoc = searcher.doc(hit.doc); System.out.println("(" + hit.doc + "-" + hit.score + ")" + " name:" + hitDoc.get("name")); String filename = hitDoc.get("name"); Integer score = scoreMap.get(filename); // 结果按照得分来排序。主要由 关键字的个数和权值来决定 if (null == score) { score = 0; scoreMap.put(filename, 0); } scoreMap.put(filename, score + weight); } } public void checkIndexAndScore(Directory directory, Analyzer analyzer) { try { IndexReader ir = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(ir); QueryParser parse = new QueryParser("content", analyzer); Query query = parse.parse("统招本科"); eval(searcher, query, 1); query = parse.parse("计算机数学信息管理"); eval(searcher, query, 1); query = parse.parse("Java Web App"); eval(searcher, query, 1); query = parse.parse("struts"); eval(searcher, query, 1); query = parse.parse("mybatis"); eval(searcher, query, 1); query = parse.parse("ibatis"); eval(searcher, query, 1); query = parse.parse("hibernate"); eval(searcher, query, 1); query = parse.parse("spring"); eval(searcher, query, 1); query = parse.parse("调优"); eval(searcher, query, 2); query = parse.parse("webservice"); eval(searcher, query, 1); query = parse.parse("axis"); eval(searcher, query, 2); query = parse.parse("xfire"); eval(searcher, query, 1); query = parse.parse("cxf"); eval(searcher, query, 1); query = parse.parse("jax-ws jws"); eval(searcher, query, 1); query = parse.parse("xml json"); eval(searcher, query, 1); query = parse.parse("oracle mysql sqlserver db2"); eval(searcher, query, 1); query = parse.parse("redis memcached"); eval(searcher, query, 1); query = parse.parse("组长管理设计架构分析"); eval(searcher, query, 1); Query pq = new PhraseQuery("content", "培训", "机构"); eval(searcher, pq, *); ir.close(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } } public void doScore() { Optional<Directory> dir = LuceneUtils.openFSDirectory(indexPath); if (dir.isPresent()) { checkIndexAndScore(dir.get(), analyzer); } } public void showResult() { LuceneUtils.sortMapByValue(scoreMap).forEach((k, v) -> System.out.println(k + "---->" + v)); } public static void main(String[] args) { ContentScoror fie = new ContentScoror(); fie.doScore(); fie.showResult(); } }
?
?
package com.lu; import java.io.File; import java.io.IOException; import java.util.Optional; import java.util.stream.Stream; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; public class FileIndexCreator { String indexPath = "lucene\\Index\\"; String contentFilePath = "content"; SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); public void addDoc(IndexWriter iw, File f) throws IOException { String str = FileUtils.readFileToString(f); Document doc = new Document(); doc.add(new StringField("name", f.getName(), Field.Store.YES)); doc.add(new TextField("content", LuceneUtils.checkGetContent(str).get(), Field.Store.YES)); iw.addDocument(doc); } public void content(IndexWriter iw) { File file = new File(contentFilePath); File[] listFiles = file.listFiles(); Stream.of(listFiles).forEach(f -> { try { addDoc(iw, f); } catch (IOException e) { e.printStackTrace(); } }); } public void createIndex() { // create index Optional<Directory> dir = LuceneUtils.openFSDirectory(indexPath); if (dir.isPresent()) { // 也可以存放到内存 // Directory directory = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter iw = null; try { iw = new IndexWriter(dir.get(), iwc); content(iw); iw.commit(); iw.close(); } catch (IOException e) { e.printStackTrace(); } } } public static void main(String[] args) { // Analyzer analyzer = new SmartChineseAnalyzer(); // getWords("系统提供HTTP服务给其他系统用于实时数据交互,采用WebService与总行进行实时数据交互", // analyzer).forEach(System.out::println); FileIndexCreator fie = new FileIndexCreator(); fie.createIndex(); } }
?
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>l.l.h</groupId> <artifactId>domjj</artifactId> <version>0.0.1-SNAPSHOT</version> <dependencies> <!-- <dependency> <groupId>pull-parser</groupId> <artifactId>pull-parser</artifactId> <version>2</version> </dependency> --> <dependency> <groupId>xml-resolver</groupId> <artifactId>xml-resolver</artifactId> <version>1.2</version> </dependency> <dependency> <groupId>pull-parser</groupId> <artifactId>pull-parser</artifactId> <version>2.1.10</version> </dependency> <dependency> <groupId>org.dom4j</groupId> <artifactId>dom4j</artifactId> <version>2.0.0-RC1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>5.3.1</version> </dependency> <!-- 高亮 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>5.3.1</version> </dependency> <!-- 中文分词器 SmartChineseAnalyzer --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>5.3.1</version> </dependency> <!-- 文件操作jar包 --> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>commons-codec</groupId> <artifactId>commons-codec</artifactId> <version>1.9</version> </dependency> </dependencies> </project>
?