From 068fc7f2e81178e55fa191a13709af64b1a163f6 Mon Sep 17 00:00:00 2001 From: EricsHu <hrr145632> Date: 星期一, 05 十二月 2022 14:27:43 +0800 Subject: [PATCH] 处理编码 --- src/main/java/com/qxueyou/scc/exercise/service/impl/Handler.java | 128 +++++++++++++++++++++--------------------- 1 files changed, 64 insertions(+), 64 deletions(-) diff --git a/src/main/java/com/qxueyou/scc/exercise/service/impl/Handler.java b/src/main/java/com/qxueyou/scc/exercise/service/impl/Handler.java index 185d785..fb58b2e 100644 --- a/src/main/java/com/qxueyou/scc/exercise/service/impl/Handler.java +++ b/src/main/java/com/qxueyou/scc/exercise/service/impl/Handler.java @@ -15,67 +15,67 @@ import com.qxueyou.scc.exercise.service.impl.parser.OptionParser; public class Handler { - + private Doc doc; - + private Node currentNode; - - /** 失败 */ + + /** 澶辫触 */ public static final Integer HANDLER_RESULT_FAIL = 1 ; - - /** 上一类型 */ + + /** 涓婁竴绫诲瀷 */ public static final Integer HANDLER_RESULT_CONTINUE = 2; - - /** 成功*/ + + /** 鎴愬姛*/ public static final Integer HANDLER_RESULT_SUCCESS = 3; - - /** 校验是否有中文 */ - private static String regEx = "[\u4e00-\u9fa5]"; - private static Pattern pat = Pattern.compile(regEx); - + + /** 鏍¢獙鏄惁鏈変腑鏂� */ + private static String regEx = "[\u4e00-\u9fa5]"; + private static Pattern pat = Pattern.compile(regEx); + public Handler(){ doc = new Doc(); currentNode = doc; } - + /** - * 解析文本,每次读取一个段落 - * @param content 段落文本内容 + * 瑙f瀽鏂囨湰,姣忔璇诲彇涓�涓钀� + * @param content 娈佃惤鏂囨湰鍐呭 */ public ExerciseParse parse(String content){ - + ParseResult result = null; - + content = content.replace((char)12288, (char)32); content = content.replace((char)160, (char)32); - - //将得到的文本全角转半角 + + //灏嗗緱鍒扮殑鏂囨湰鍏ㄨ杞崐瑙� //content = formatFullToHalf(content); - content = content.replace('(', '('); - content = content.replace(')', ')'); - - // 非全部英文题 非全部空格, 需要将空格替换掉 + content = content.replace('锛�', '('); + content = content.replace('锛�', ')'); + + // 闈炲叏閮ㄨ嫳鏂囬 闈炲叏閮ㄧ┖鏍硷紝 闇�瑕佸皢绌烘牸鏇挎崲鎺� if(isContentContainsChinese(content) || content.trim().isEmpty()){ content = content.replace(" ", ""); } - + content = content.replace("\t", ""); - + if(StringUtils.isEmpty(content)||content.equals("\r")){ return new ExerciseParse(HANDLER_RESULT_SUCCESS,content); } - - //是否需要人为去掉回车 + + //鏄惁闇�瑕佷汉涓哄幓鎺夊洖杞� content = content.replace("\r", ""); int count = 0; - + do{ result = currentNode.getParser().parse(currentNode, content); - + if(result.isSuccess()){ if(ParseResult.STEP_CUR.equals(result.getNextStep())){ - //成功后,取得当前parser + //鎴愬姛鍚庯紝鍙栧緱褰撳墠parser generateLastSuccessType(doc,currentNode,result.getNextNode()); return new ExerciseParse(HANDLER_RESULT_SUCCESS,content); } @@ -89,7 +89,7 @@ } }else{ if(ParseResult.STEP_CUR.equals(result.getNextStep())){ - //20150907 修改;抛到最上层doc解析器仍然无法定位,直接将文本内容添加到上一个解析成功的位置 + //20150907 淇敼;鎶涘埌鏈�涓婂眰doc瑙f瀽鍣ㄤ粛鐒舵棤娉曞畾浣嶏紝鐩存帴灏嗘枃鏈唴瀹规坊鍔犲埌涓婁竴涓В鏋愭垚鍔熺殑浣嶇疆 appendTextToLastPosition(doc,content); return new ExerciseParse(HANDLER_RESULT_CONTINUE,doc.getLastSuccessType(),content); } @@ -106,24 +106,24 @@ currentNode = result.getNextNode(); } } - + }while( count++ < 15); - + return new ExerciseParse(HANDLER_RESULT_FAIL,content); - + } - + public Doc result(){ return doc; } - - + + /** - * 得到最新成功的解析类型 - * + * 寰楀埌鏈�鏂版垚鍔熺殑瑙f瀽绫诲瀷 + * * @param doc doc - * @param currNode 当前节点 - * @param analysisNode 如果是解析 部分的解析器,返回非空数据,用于获取解析器类型 + * @param currNode 褰撳墠鑺傜偣 + * @param analysisNode 濡傛灉鏄В鏋� 閮ㄥ垎鐨勮В鏋愬櫒锛岃繑鍥為潪绌烘暟鎹紝鐢ㄤ簬鑾峰彇瑙f瀽鍣ㄧ被鍨� */ private void generateLastSuccessType(Doc doc,Node currNode,Node analysisNode){ Parser currParser = currNode.getParser(); @@ -140,39 +140,39 @@ return ; } } - + /** - * 得到当前解析成功的最新位置:如果是题目的:题干、选项或者解析,将他们组装对应位置 - * + * 寰楀埌褰撳墠瑙f瀽鎴愬姛鐨勬渶鏂颁綅缃細濡傛灉鏄鐩殑锛氶骞层�侀�夐」鎴栬�呰В鏋愶紝灏嗕粬浠粍瑁呭搴斾綅缃� + * * @param doc * @param content */ private void appendTextToLastPosition(Doc doc,String content){ String currSuccessType = doc.getLastSuccessType(); - - //第一层 ItemType + + //绗竴灞� ItemType List<Node> lstItemType = doc.getChildren(); if(null == lstItemType || lstItemType.isEmpty()){ return ; } - - //第二层Item + + //绗簩灞侷tem ItemType objItemType = (ItemType)lstItemType.get(lstItemType.size() - 1); List<Node> lstItem = objItemType.getChildren(); if(null == lstItem || lstItem.isEmpty()){ return ; } - - //取到最新位置的Item + + //鍙栧埌鏈�鏂颁綅缃殑Item Item objItem = (Item)lstItem.get(lstItem.size() - 1); if(null != objItem){ appendTextToCurrPosition(objItem,currSuccessType,content); } } - + /** - * 将当前没有归档的内容加入到对应的位置 - * + * 灏嗗綋鍓嶆病鏈夊綊妗g殑鍐呭鍔犲叆鍒板搴旂殑浣嶇疆 + * * @param objItem * @param currSuccessType * @param content @@ -181,25 +181,25 @@ if(Doc.CURR_SUCCESS_CONTENT == currSuccessType){ objItem.setTitle(objItem.getTitle()+ content); currentNode = objItem; - + }else if(Doc.CURR_SUCCESS_ANALYSIS == currSuccessType){ objItem.setAnalysis(objItem.getAnalysis() + content); currentNode = objItem; - + }else if(Doc.CURR_SUCCESS_OPTION == currSuccessType){ - + List<Option> lstOptions = objItem.getOptions(); if(null == lstOptions || lstOptions.isEmpty()){ return ; } - + Option opt = null ; for(int i = lstOptions.size() -1 ; i>=0 ; i --){ opt = lstOptions.get(i); if(null == opt){ return ; } - //将选项添加到最新位置 + //灏嗛�夐」娣诲姞鍒版渶鏂颁綅缃� if(StringUtils.isNotBlank(opt.getNo())){ opt.setContent(opt.getContent() + content); currentNode = objItem; @@ -208,20 +208,20 @@ } } } - + /** - * 字符串是否包含中文 + * 瀛楃涓叉槸鍚﹀寘鍚腑鏂� * @param content * @return */ private boolean isContentContainsChinese( String content ){ - + Matcher matcher = pat.matcher(content); boolean flg = false; if (matcher.find()) { flg = true; } - return flg; + return flg; } - + } -- Gitblit v1.8.0