Java tutorial
/* * Copyright 2011-2016 ZuoBian.com All right reserved. This software is the confidential and proprietary information of * ZuoBian.com ("Confidential Information"). You shall not disclose such Confidential Information and shall use it only * in accordance with the terms of the license agreement you entered into with ZuoBian.com. */ package com.mmj.app.lucene.analyzer; import java.io.Reader; import java.io.StringReader; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.mmj.app.lucene.analyzer.cons.SegMode; import com.zb.jcseg.util.WordUnionUtils; /** * @author zxc Sep 2, 2014 4:37:00 PM */ public abstract class AbstractWordAnalyzer implements WordAnalyzer { protected static Logger logger = LoggerFactory.getLogger(WordAnalyzer.class); public static final char[] TITLE_DELIMETER_CHARS = { '', ' ', '\\', '/', '', ',', ';', '', '|', '(', ')', '[', ']', '', '', '+', '', '?', '' /* , ':', '' */}; /** * ?? -- ??? * * @param input * @return */ @Override public List<String> segWords(String input) { return segWords(input, Boolean.FALSE); } public List<String> segWords(String input, SegMode segMode) { if (StringUtils.isBlank(input)) { return new ArrayList<String>(0); } return _segWords(input, segMode, Boolean.FALSE); } @Override public String segWords(String input, String wordSpilt, SegMode segMode) { List<String> segWords = segWords(input, segMode); return StringUtils.join(segWords, wordSpilt); } @Override public List<String> segWords(String input, Boolean wiselyCombineSingleWord) { if (StringUtils.isBlank(input)) { return new ArrayList<String>(0); } return _segWords(input, SegMode.COMPLEX, wiselyCombineSingleWord); } public List<String> _segWords(String input, SegMode segMode, Boolean wiselyCombineSingleWord) { if (StringUtils.isBlank(input)) { return new ArrayList<String>(); } else { return wiselySplit(input, segMode, wiselyCombineSingleWord); } } public List<String> segWords(Reader input, SegMode segMode, boolean wiselyCombineSingleWord) { // ? List<String> result = _splitWords(input, segMode); // ?? if (wiselyCombineSingleWord) { result = wiselyCombineSingleWord(result); } return result; } // ? private List<String> wiselySplit(String str, SegMode segMode, Boolean wiselyCombineSingleWord) { List<String> result = new ArrayList<String>(); int index = 0; for (int i = 0, len = str.length(), lastIndex = len - 1; i < len; i++) { if (isDelimeter(str.charAt(i))) { if (index < i) { String word = StringUtils.substring(str, index, i); _wiselySplit(result, segMode, wiselyCombineSingleWord, word); } index = i + 1; } // ? if (i == lastIndex) { String word = StringUtils.substring(str, index); _wiselySplit(result, segMode, wiselyCombineSingleWord, word); } } return result; } // ?--StringUtils.split? // 1) ?? 2)List private void _wiselySplit(List<String> result, SegMode segMode, boolean wiselyCombineSingleWord, String input) { if (StringUtils.isBlank(input)) { return; } // ?4?? int len = StringUtils.length(input); if (len <= 2 || (len == 3 && !WordUnionUtils.isContainSingleWord(input))) { result.add(input); } else { List<String> segWords = segWords(new StringReader(input), segMode, wiselyCombineSingleWord); if (segWords.size() > 0) { result.addAll(segWords); } } } // SegMode private List<String> _splitWords(Reader input, SegMode segMode) { if (!isSupportSegMode(segMode)) { logger.error("???" + segMode + "??,?" + getDefaultSegMode()); segMode = getDefaultSegMode(); } return splitWords(input, segMode); } // ?? public abstract List<String> wiselyCombineSingleWord(List<String> result); // ? public abstract List<String> splitWords(Reader input, SegMode segMode); public abstract boolean isSupportSegMode(SegMode segMode); public abstract SegMode getDefaultSegMode(); /** * ???? * * @param methodName * @param args * @return * @throws Exception */ @Override public Object invoke(String methodName, Object[] args) throws Exception { if (StringUtils.equals(methodName, "invoke")) { return null; } Class<?>[] parameterTypes = new Class<?>[args.length]; for (int i = 0; i < args.length; i++) { parameterTypes[i] = args[i].getClass(); } Method method = this.getClass().getMethod(methodName, parameterTypes); return method.invoke(this, args); } // //////////////////////////////////////////////////////////////////////////// // // // // //////////////////////////////////////////////////////////////////////////// // ???? public static String replaceBlank(String str) { return str != null ? matcherRegex(str, "\\s*|\t|\r|\n") : str; } // ? public boolean isDelimeter(char c) { for (char x : TITLE_DELIMETER_CHARS) { if (x == c) { return true; } } return false; } public static String matcherRegex(String str, String regex) { Pattern p = Pattern.compile(regex); Matcher m = p.matcher(str); return m.replaceAll(StringUtils.EMPTY).trim(); } }