最近在研究敏感词的过滤,网上看到有不少算法,我觉得这种算法还是不错的。希望跟大家共勉。不说了,先上代码:
class="敏感词过滤" name="code">import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; public class T { // private HashMap keysMap = new HashMap(); private static int matchType = 1; // 1:最小长度匹配 2:最大长度匹配 public static Map addKeywords(List<String> keywords) { Map<String, String> keysMap = new HashMap<String, String>(); for (int i = 0; i < keywords.size(); i++) { String key = keywords.get(i).trim(); Map filterHash = null; filterHash = keysMap; for (int j = 0; j < key.length(); j++) { char word = key.charAt(j); Object wordMap = filterHash.get(word); if (wordMap != null) { filterHash = (HashMap) wordMap; } else { HashMap<String, String> newWordHash = new HashMap<String, String>(); newWordHash.put("isEnd", "0"); filterHash.put(word, newWordHash); filterHash = newWordHash; } if (j == key.length() - 1) { filterHash.put("isEnd", "1"); } } } return keysMap; } /** * 重置关键词 */ public static void clearKeywords(HashMap keysMap) { keysMap.clear(); } /** * 检查一个字符串从begin位置起开始是否有keyword符合, 如果有符合的keyword值,返回值为匹配keyword的长度,否则返回零 * flag 1:最小长度匹配 2:最大长度匹配 */ private static int checkKeyWords(HashMap keysMap, String txt, int begin, int flag) { HashMap nowhash = null; nowhash = keysMap; int maxMatchRes = 0; int res = 0; int l = txt.length(); char word = 0; for (int i = begin; i < l; i++) { word = txt.charAt(i); Object wordMap = nowhash.get(word); if (wordMap != null) { res++; nowhash = (HashMap) wordMap; if (((String) nowhash.get("isEnd")).equals("1")) { if (flag == 1) { wordMap = null; nowhash = null; txt = null; return res; } else { maxMatchRes = res; } } } else { txt = null; nowhash = null; return maxMatchRes; } } txt = null; nowhash = null; return maxMatchRes; } /** * 返回txt中关键字的列表 */ public static Set<String> getTxtKeyWords(HashMap keysMap, String txt) { Set set = new HashSet(); int l = txt.length(); for (int i = 0; i < l;) { int len = checkKeyWords(keysMap, txt, i, matchType); if (len > 0) { set.add(txt.substring(i, i + len)); i += len; } else { i++; } } txt = null; return set; } /** * 仅判断txt中是否有关键字 */ public static boolean isContentKeyWords(HashMap keysMap, String txt) { for (int i = 0; i < txt.length(); i++) { int len = checkKeyWords(keysMap, txt, i, 1); if (len > 0) { return true; } } txt = null; return false; } // public int getMatchType() { // return matchType; // } // // public void setMatchType(int matchType) { // this.matchType = matchType; // } public static void main(String[] args) { KeywordFilter filter = new KeywordFilter(); List<String> keywords = new ArrayList<String>(); keywords.add("你妹"); keywords.add("页面加载"); HashMap keysMap = filter.addKeywords(keywords); String txt = "不允许说脏话,尤其是你妹这个词。庞大的页面加载的过程中"; System.out.println(keysMap); boolean boo = filter.isContentKeyWords(keysMap, txt); System.out.println(boo); Set set = filter.getTxtKeyWords(keysMap, txt); Iterator<String> it = set.iterator(); while (it.hasNext()) { String str = it.next(); System.out.println(str); } System.out.println(set); } }
直接可以运行,大家可以看看结果,依照自己的思路进行,如果什么疑问,随时可以发表意见
?