com.mmj.app.lucene.analyzer.AbstractWordAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for com.mmj.app.lucene.analyzer.AbstractWordAnalyzer.java

Source

/*
 * Copyright 2011-2016 ZuoBian.com All right reserved. This software is the confidential and proprietary information of
 * ZuoBian.com ("Confidential Information"). You shall not disclose such Confidential Information and shall use it only
 * in accordance with the terms of the license agreement you entered into with ZuoBian.com.
 */
package com.mmj.app.lucene.analyzer;

import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.mmj.app.lucene.analyzer.cons.SegMode;
import com.zb.jcseg.util.WordUnionUtils;

/**
 * @author zxc Sep 2, 2014 4:37:00 PM
 */
public abstract class AbstractWordAnalyzer implements WordAnalyzer {

    protected static Logger logger = LoggerFactory.getLogger(WordAnalyzer.class);

public static final char[] TITLE_DELIMETER_CHARS = { '', ' ', '\\', '/', '', ',', ';', '', '|', '(', ')', '[',
        ']', '', '', '+', '', '?', ''       /* , ':', '' */};

/**
 * ?? -- ???
 * 
 * @param input
 * @return
 */
    @Override
    public List<String> segWords(String input) {
        return segWords(input, Boolean.FALSE);
    }

    public List<String> segWords(String input, SegMode segMode) {
        if (StringUtils.isBlank(input)) {
            return new ArrayList<String>(0);
        }
        return _segWords(input, segMode, Boolean.FALSE);
    }

    @Override
    public String segWords(String input, String wordSpilt, SegMode segMode) {
        List<String> segWords = segWords(input, segMode);
        return StringUtils.join(segWords, wordSpilt);
    }

    @Override
    public List<String> segWords(String input, Boolean wiselyCombineSingleWord) {
        if (StringUtils.isBlank(input)) {
            return new ArrayList<String>(0);
        }
        return _segWords(input, SegMode.COMPLEX, wiselyCombineSingleWord);
    }

    public List<String> _segWords(String input, SegMode segMode, Boolean wiselyCombineSingleWord) {
        if (StringUtils.isBlank(input)) {
            return new ArrayList<String>();
        } else {
            return wiselySplit(input, segMode, wiselyCombineSingleWord);
        }
    }

    public List<String> segWords(Reader input, SegMode segMode, boolean wiselyCombineSingleWord) {
        // ?
        List<String> result = _splitWords(input, segMode);
        // ??
        if (wiselyCombineSingleWord) {
            result = wiselyCombineSingleWord(result);
        }
        return result;
    }

    // ?
    private List<String> wiselySplit(String str, SegMode segMode, Boolean wiselyCombineSingleWord) {
        List<String> result = new ArrayList<String>();
        int index = 0;
        for (int i = 0, len = str.length(), lastIndex = len - 1; i < len; i++) {
            if (isDelimeter(str.charAt(i))) {
                if (index < i) {
                    String word = StringUtils.substring(str, index, i);
                    _wiselySplit(result, segMode, wiselyCombineSingleWord, word);
                }
                index = i + 1;
            }
            // ?
            if (i == lastIndex) {
                String word = StringUtils.substring(str, index);
                _wiselySplit(result, segMode, wiselyCombineSingleWord, word);
            }
        }
        return result;
    }

    // ?--StringUtils.split?
    // 1) ?? 2)List
    private void _wiselySplit(List<String> result, SegMode segMode, boolean wiselyCombineSingleWord, String input) {
        if (StringUtils.isBlank(input)) {
            return;
        }
        // ?4??
        int len = StringUtils.length(input);
        if (len <= 2 || (len == 3 && !WordUnionUtils.isContainSingleWord(input))) {
            result.add(input);
        } else {
            List<String> segWords = segWords(new StringReader(input), segMode, wiselyCombineSingleWord);
            if (segWords.size() > 0) {
                result.addAll(segWords);
            }
        }
    }

    // SegMode
    private List<String> _splitWords(Reader input, SegMode segMode) {
        if (!isSupportSegMode(segMode)) {
            logger.error("???" + segMode + "??,?" + getDefaultSegMode());
            segMode = getDefaultSegMode();
        }
        return splitWords(input, segMode);
    }

    // ??
    public abstract List<String> wiselyCombineSingleWord(List<String> result);

    // ?
    public abstract List<String> splitWords(Reader input, SegMode segMode);

    public abstract boolean isSupportSegMode(SegMode segMode);

    public abstract SegMode getDefaultSegMode();

    /**
     * ????
     * 
     * @param methodName
     * @param args
     * @return
     * @throws Exception
     */
    @Override
    public Object invoke(String methodName, Object[] args) throws Exception {
        if (StringUtils.equals(methodName, "invoke")) {
            return null;
        }
        Class<?>[] parameterTypes = new Class<?>[args.length];
        for (int i = 0; i < args.length; i++) {
            parameterTypes[i] = args[i].getClass();
        }
        Method method = this.getClass().getMethod(methodName, parameterTypes);
        return method.invoke(this, args);
    }

    // ////////////////////////////////////////////////////////////////////////////
    //
    // 
    //
    // ////////////////////////////////////////////////////////////////////////////

    // ????
    public static String replaceBlank(String str) {
        return str != null ? matcherRegex(str, "\\s*|\t|\r|\n") : str;
    }

    // ?
    public boolean isDelimeter(char c) {
        for (char x : TITLE_DELIMETER_CHARS) {
            if (x == c) {
                return true;
            }
        }
        return false;
    }

    public static String matcherRegex(String str, String regex) {
        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(str);
        return m.replaceAll(StringUtils.EMPTY).trim();
    }
}