Java tutorial
/* * JEF - Copyright 2009-2010 Jiyi (mr.jiyi@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jef.tools.string; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; /** * ?? * @author jiyi */ public abstract class RegexpUtils { //???? public static final String MATCH_ANY_STRING = ".*?"; //??????64?(pattern,Matcher?) private static final int PATTERN_CACHE_SIZE = 64; private static final Map<String, Pattern> cache = new ConcurrentHashMap<String, Pattern>(PATTERN_CACHE_SIZE); public static final char[] STAR_QUESTION = new char[] { '*', '?', '+' }; //?? public static final String MATCH_ANY = "."; public static final String MATCH_NUMBER = "\\d"; public static final String MATCH_NON_NUMBER = "\\D"; public static final String MATCH_BLANK = "\\s"; public static final String MATCH_NON_BLANK = "\\S"; public static final String MATCH_WORD_CHAR = "\\w"; public static final String MATCH_NON_WORD_CHAR = "\\W"; public static final String MATCH_START = "^"; public static final String MATCH_END = "$"; //??? public static final String COUNT_0_N = "*"; public static final String COUNT_1_N = "+"; public static final String COUNT_0_1 = "?"; public static final String COUNT_0_N_RELUCTANT = "*?"; public static final String COUNT_1_N_RELUCTANT = "+?"; public static final String COUNT_0_1_RELUCTANT = "??"; public static final Pattern E_MAIL = Pattern.compile("\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*"); public static final Pattern URL = Pattern.compile("[a-zA-z]+://[^\\s]*"); public static final Pattern USER_ACCOUNT = Pattern.compile("^[a-zA-Z][a-zA-Z0-9_]{4,15}$"); public static final Pattern TELEPHONE_NUMBER = Pattern.compile("\\d{3}-\\d{8}|\\d{4}-\\d{7}"); public static final Pattern CHINA_POSTCODE = Pattern.compile("[1-9]\\d{5}(?!\\d)"); public static final Pattern PERSONAL_ID_CARD = Pattern.compile("\\d{15}|\\d{18}"); public static final Pattern IP_ADDR = Pattern.compile("\\d+\\.\\d+\\.\\d+\\.\\d+"); public static final Pattern CHINESE_CHAR = Pattern.compile("\\u4e00-\\u9fa5"); public static final Pattern GET_HREF_LINK = Pattern .compile("(h|H)(r|R)(e|E)(f|F) *= *('|\")?(\\w|\\\\|\\/|\\.)+('|\"| *|>)?"); public static final Pattern GET_NUM = Pattern.compile("(-?\\d*)(\\.\\d+)?"); /** * ?? * ?14 */ public static final char[] REGEXP_KEY_CHARS = new char[] { '\\', //????? //????\\??. '(', ')', //? '{', '}', //? '[', ']', //??? '*', //??0..n '+', //??: 1..n,???? '?', //??: 0..1???????????) '.', //?? '$', //? '^', //???[]?? '|' //? //1?< >?????Java??<,>?JavaJava\b??? //2'-' ????? //??Java????'&' //3??????+ * ????? //4??reluctant??greedy()?reluctant? possessive(?)??? // possessive????? }; /** * ?? * *?????0~1?? Windows?? * @param key * @return */ public static String simpleMatchToRegexp(String key) { //* ?? key = escapeRegChars(key, STAR_QUESTION); key = StringUtils.replaceEach(key, new String[] { "*", "?", "+" }, new String[] { ".*", ".?", ".+" }); return key; } /** * ?pattern * @param key * @param IgnoreCase * @param matchStart * @param matchEnd * @param wildcardSpace * @return */ public static Pattern simplePattern(String key, boolean IgnoreCase, boolean matchStart, boolean matchEnd, boolean wildcardSpace) { if (IgnoreCase) { key = key.toUpperCase(); } // ?? String regStr = simpleMatchToRegexp(key); regStr = ((matchStart) ? "" : RegexpUtils.MATCH_ANY_STRING) + ((wildcardSpace) ? key.replace(" ", "\\s+") : regStr) + ((matchEnd) ? "" : RegexpUtils.MATCH_ANY_STRING); Pattern p = Pattern.compile(regStr); return p; } /** * ?? * @param key * @param sTARQUESTION * @return */ public static String escapeRegChars(String key, char[] keeps) { for (char c : REGEXP_KEY_CHARS) { if (!ArrayUtils.contains(keeps, c)) { key = key.replace(String.valueOf(c), "\\" + c); } } return key; } /** * ????true * @param str * @param regexp * @return */ public static boolean contains(String str, String regexp) { return matches(str, regexp, false); } /** * ?String.matches. ?? * @param str * @param regexp * @return */ public static boolean matches(String str, String regexp) { return matches(str, regexp, true); } /** * ??? * @param key * @param regexp * @param strict * @return */ public static boolean matches(String str, String regexp, boolean strict) { Matcher m = getMatcher(str, regexp, strict); return m.matches(); } public static String[] getSimpleMatchResult(String str, String key, boolean strict) { key = escapeRegChars(key, STAR_QUESTION); key = StringUtils.replaceEach(key, new String[] { "*", "?", "+" }, new String[] { "(.*)", "(.?)", "(.+)" }); return getMatcherResult(str, key, strict); } /** * ? * @param pattern ? * @param str * @param strict ?????? * @return ?null */ public static String[] getMatcherResult(String str, String regexp, boolean strict) { if (!strict) { String tmp = StringUtils.remove(str, "\\("); tmp = StringUtils.remove(str, "\\)"); if (tmp.indexOf('(') > -1 && tmp.indexOf(')') > -1) { //?? } else { regexp = "(" + regexp + ")";// } } Matcher m = getMatcher(str, regexp, strict); if (!m.matches()) return null; int n = m.groupCount(); if (n == 0) return new String[] { m.group() }; String[] result = new String[n]; for (int i = 1; i <= n; i++) { result[i - 1] = m.group(i); } return result; } private static Matcher getMatcher(String str, String regexp, boolean strict) { if (!strict) regexp = MATCH_ANY_STRING + regexp + MATCH_ANY_STRING; Pattern p = cache.get(regexp); if (p == null) { p = Pattern.compile(regexp); addToCache(p); } return p.matcher(str); } //Pattern private static void addToCache(Pattern p) { if (cache.size() == PATTERN_CACHE_SIZE) { cache.clear(); } cache.put(p.pattern(), p); } }