jef.tools.string.RegexpUtils.java Source code

Java tutorial

Introduction

Here is the source code for jef.tools.string.RegexpUtils.java

Source

/*
 * JEF - Copyright 2009-2010 Jiyi (mr.jiyi@gmail.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package jef.tools.string;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;

/**
 * ??
 * @author jiyi
 */
public abstract class RegexpUtils {
    //????
    public static final String MATCH_ANY_STRING = ".*?";

    //??????64?(pattern,Matcher?)
    private static final int PATTERN_CACHE_SIZE = 64;
    private static final Map<String, Pattern> cache = new ConcurrentHashMap<String, Pattern>(PATTERN_CACHE_SIZE);

    public static final char[] STAR_QUESTION = new char[] { '*', '?', '+' };

    //??
    public static final String MATCH_ANY = ".";
    public static final String MATCH_NUMBER = "\\d";
    public static final String MATCH_NON_NUMBER = "\\D";
    public static final String MATCH_BLANK = "\\s";
    public static final String MATCH_NON_BLANK = "\\S";
    public static final String MATCH_WORD_CHAR = "\\w";
    public static final String MATCH_NON_WORD_CHAR = "\\W";
    public static final String MATCH_START = "^";
    public static final String MATCH_END = "$";

    //???
    public static final String COUNT_0_N = "*";
    public static final String COUNT_1_N = "+";
    public static final String COUNT_0_1 = "?";
    public static final String COUNT_0_N_RELUCTANT = "*?";
    public static final String COUNT_1_N_RELUCTANT = "+?";
    public static final String COUNT_0_1_RELUCTANT = "??";

    public static final Pattern E_MAIL = Pattern.compile("\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*");
    public static final Pattern URL = Pattern.compile("[a-zA-z]+://[^\\s]*");
    public static final Pattern USER_ACCOUNT = Pattern.compile("^[a-zA-Z][a-zA-Z0-9_]{4,15}$");
    public static final Pattern TELEPHONE_NUMBER = Pattern.compile("\\d{3}-\\d{8}|\\d{4}-\\d{7}");
    public static final Pattern CHINA_POSTCODE = Pattern.compile("[1-9]\\d{5}(?!\\d)");
    public static final Pattern PERSONAL_ID_CARD = Pattern.compile("\\d{15}|\\d{18}");
    public static final Pattern IP_ADDR = Pattern.compile("\\d+\\.\\d+\\.\\d+\\.\\d+");
    public static final Pattern CHINESE_CHAR = Pattern.compile("\\u4e00-\\u9fa5");
    public static final Pattern GET_HREF_LINK = Pattern
            .compile("(h|H)(r|R)(e|E)(f|F)  *=  *('|\")?(\\w|\\\\|\\/|\\.)+('|\"|  *|>)?");
    public static final Pattern GET_NUM = Pattern.compile("(-?\\d*)(\\.\\d+)?");

    /**
     * ??
     * ?14
     */
    public static final char[] REGEXP_KEY_CHARS = new char[] { '\\', //?????
            //????\\??.
            '(', ')', //?
            '{', '}', //?
            '[', ']', //???
            '*', //??0..n
            '+', //??: 1..n,????
            '?', //??: 0..1???????????)
            '.', //??
            '$', //?
            '^', //???[]??
            '|' //?
            //1?< >?????Java??<,>?JavaJava\b???
            //2'-'  ?????
            //??Java????'&'
            //3??????+ * ?????
            //4??reluctant??greedy()?reluctant? possessive(?)???
            // possessive?????
    };

    /**
     * ??
     * *?????0~1?? Windows??
     * @param key
     * @return
     */
    public static String simpleMatchToRegexp(String key) {
        //* ??
        key = escapeRegChars(key, STAR_QUESTION);
        key = StringUtils.replaceEach(key, new String[] { "*", "?", "+" }, new String[] { ".*", ".?", ".+" });
        return key;
    }

    /**
     * ?pattern
     * @param key
     * @param IgnoreCase
     * @param matchStart
     * @param matchEnd
     * @param wildcardSpace
     * @return
     */
    public static Pattern simplePattern(String key, boolean IgnoreCase, boolean matchStart, boolean matchEnd,
            boolean wildcardSpace) {
        if (IgnoreCase) {
            key = key.toUpperCase();
        }
        // ??
        String regStr = simpleMatchToRegexp(key);
        regStr = ((matchStart) ? "" : RegexpUtils.MATCH_ANY_STRING)
                + ((wildcardSpace) ? key.replace(" ", "\\s+") : regStr)
                + ((matchEnd) ? "" : RegexpUtils.MATCH_ANY_STRING);
        Pattern p = Pattern.compile(regStr);
        return p;
    }

    /**
     * ??
     * @param key
     * @param sTARQUESTION
     * @return
     */
    public static String escapeRegChars(String key, char[] keeps) {
        for (char c : REGEXP_KEY_CHARS) {
            if (!ArrayUtils.contains(keeps, c)) {
                key = key.replace(String.valueOf(c), "\\" + c);
            }
        }
        return key;
    }

    /**
     * ????true
     * @param str
     * @param regexp
     * @return
     */
    public static boolean contains(String str, String regexp) {
        return matches(str, regexp, false);
    }

    /**
     * ?String.matches. ??
     * @param str
     * @param regexp
     * @return
     */
    public static boolean matches(String str, String regexp) {
        return matches(str, regexp, true);
    }

    /**
     * ???
     * @param key
     * @param regexp
     * @param strict
     * @return
     */
    public static boolean matches(String str, String regexp, boolean strict) {
        Matcher m = getMatcher(str, regexp, strict);
        return m.matches();
    }

    public static String[] getSimpleMatchResult(String str, String key, boolean strict) {
        key = escapeRegChars(key, STAR_QUESTION);
        key = StringUtils.replaceEach(key, new String[] { "*", "?", "+" }, new String[] { "(.*)", "(.?)", "(.+)" });
        return getMatcherResult(str, key, strict);
    }

    /**
     * ?
     * @param pattern ?
     * @param str     
     * @param strict  ??????
     * @return        ?null
     */
    public static String[] getMatcherResult(String str, String regexp, boolean strict) {
        if (!strict) {
            String tmp = StringUtils.remove(str, "\\(");
            tmp = StringUtils.remove(str, "\\)");
            if (tmp.indexOf('(') > -1 && tmp.indexOf(')') > -1) {
                //??
            } else {
                regexp = "(" + regexp + ")";//
            }
        }
        Matcher m = getMatcher(str, regexp, strict);
        if (!m.matches())
            return null;
        int n = m.groupCount();
        if (n == 0)
            return new String[] { m.group() };
        String[] result = new String[n];
        for (int i = 1; i <= n; i++) {
            result[i - 1] = m.group(i);
        }
        return result;
    }

    private static Matcher getMatcher(String str, String regexp, boolean strict) {
        if (!strict)
            regexp = MATCH_ANY_STRING + regexp + MATCH_ANY_STRING;
        Pattern p = cache.get(regexp);
        if (p == null) {
            p = Pattern.compile(regexp);
            addToCache(p);
        }
        return p.matcher(str);
    }

    //Pattern 
    private static void addToCache(Pattern p) {
        if (cache.size() == PATTERN_CACHE_SIZE) {
            cache.clear();
        }
        cache.put(p.pattern(), p);
    }
}