package com.qxueyou.scc.Sensitive; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; //屏蔽敏感词初始化 @SuppressWarnings({"rawtypes", "unchecked"}) public class SensitiveWordInit { // 字符编码 private String ENCODING = "UTF-8"; // 初始化敏感字库 public Map initKeyWord() { // 读取敏感词库 ,存入Set中 Set wordSet = readSensitiveWordFile(); // 将敏感词库加入到HashMap中//确定有穷自动机DFA return addSensitiveWordToHashMap(wordSet); } /** * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
* 中 = { * isEnd = 0 * 国 = { * isEnd = 1 * 人 = {isEnd = 0 * 民 = {isEnd = 1} * } * 男 = { * isEnd = 0 * 人 = { * isEnd = 1 * } * } * } * } * 五 = { * isEnd = 0 * 星 = { * isEnd = 0 * 红 = { * isEnd = 0 * 旗 = { * isEnd = 1 * } * } * } * } */ // 读取敏感词库 ,存入HashMap中 private Set readSensitiveWordFile() { Set wordSet = null; // app为项目地址 /* * String app = System.getProperty("user.dir"); System.out.println(app); * URL resource = Thread.currentThread().getContextClassLoader() * .getResource("/"); String path = resource.getPath().substring(1); * System.out.println(path); File file = new File(path + * "censorwords.txt"); */ //敏感词库 File file = new File( "src/main/resources/SensitiveWordList.txt"); try { // 读取文件输入流 InputStreamReader read = new InputStreamReader(new FileInputStream( file), ENCODING); // 文件是否是文件 和 是否存在 if (file.isFile() && file.exists()) { wordSet = new HashSet(); // StringBuffer sb = new StringBuffer(); // BufferedReader是包装类,先把字符读到缓存里,到缓存满了,再读入内存,提高了读的效率。 BufferedReader br = new BufferedReader(read); String txt = null; // 读取文件,将文件内容放入到set中 while ((txt = br.readLine()) != null) { wordSet.add(txt); } br.close(); /* * String str = sb.toString(); String[] ss = str.split(","); for * (String s : ss) { wordSet.add(s); } */ } // 关闭文件流 read.close(); } catch (Exception e) { e.printStackTrace(); } return wordSet; } // 将HashSet中的敏感词,存入HashMap中 private Map addSensitiveWordToHashMap(Set wordSet) { // 初始化敏感词容器,减少扩容操作 Map wordMap = new HashMap(wordSet.size()); for (String word : wordSet) { Map nowMap = wordMap; for (int i = 0; i < word.length(); i++) { // 转换成char型 char keyChar = word.charAt(i); // 获取 Object tempMap = nowMap.get(keyChar); // 如果存在该key,直接赋值 if (tempMap != null) { nowMap = (Map) tempMap; } // 不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个 else { // 设置标志位 Map newMap = new HashMap(); newMap.put("isEnd", "0"); // 添加到集合 nowMap.put(keyChar, newMap); nowMap = newMap; } // 最后一个 if (i == word.length() - 1) { nowMap.put("isEnd", "1"); } } } return wordMap; } }