List of usage examples for java.util.regex Pattern UNICODE_CASE
int UNICODE_CASE
To view the source code for java.util.regex Pattern UNICODE_CASE.
Click Source Link
From source file:org.etudes.component.app.jforum.util.html.SafeHtml.java
/** * removes existing target attribute in the anchor tag and adds target="_blank" * //from w ww. j ava 2 s . c o m * @param contents * Post contest * * @return Modified content with target="_blank" in anchor tags */ public static String addAnchorTarget(String contents) { if (contents == null) { return null; } StringBuffer sb = new StringBuffer(); Pattern p = Pattern.compile("<(a)([^>]+)>", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher m = p.matcher(contents); while (m.find()) { if (m.groupCount() == 2) { String group1 = m.group(1); String group2 = m.group(2); String modGroup2 = group2.replaceAll("(target\\s*=\\s*[\"\'][^\"\']*[\"\']\\s*)?", ""); String modString = "<" + group1 + " target=\"_blank\" " + modGroup2 + ">"; m.appendReplacement(sb, Matcher.quoteReplacement(modString)); } } m.appendTail(sb); return sb.toString(); }
From source file:org.yes.cart.bulkexport.csv.impl.CsvExportColumnImpl.java
private Pattern getPattern() { if (pattern == null && StringUtils.isNotBlank(valueRegEx)) { pattern = Pattern.compile(valueRegEx, Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE | Pattern.DOTALL); }/*w w w . j ava 2 s . c om*/ return pattern; }
From source file:com.norconex.importer.handler.tagger.impl.TextBetweenTagger.java
@Override protected void tagStringContent(String reference, StringBuilder content, ImporterMetadata metadata, boolean parsed, boolean partialContent) { int flags = Pattern.DOTALL | Pattern.UNICODE_CASE; if (!caseSensitive) { flags = flags | Pattern.CASE_INSENSITIVE; }//from ww w. j av a2 s.c om for (TextBetween between : betweens) { List<Pair<Integer, Integer>> matches = new ArrayList<Pair<Integer, Integer>>(); Pattern leftPattern = Pattern.compile(between.start, flags); Matcher leftMatch = leftPattern.matcher(content); while (leftMatch.find()) { Pattern rightPattern = Pattern.compile(between.end, flags); Matcher rightMatch = rightPattern.matcher(content); if (rightMatch.find(leftMatch.end())) { if (inclusive) { matches.add(new ImmutablePair<Integer, Integer>(leftMatch.start(), rightMatch.end())); } else { matches.add(new ImmutablePair<Integer, Integer>(leftMatch.end(), rightMatch.start())); } } else { break; } } for (int i = matches.size() - 1; i >= 0; i--) { Pair<Integer, Integer> matchPair = matches.get(i); String value = content.substring(matchPair.getLeft(), matchPair.getRight()); if (value != null) { metadata.addString(between.name, value); } } } }
From source file:Normalization.TextNormalization.java
public String removeUrlsFromString(String content) { String utf8tweet = ""; try {/* w w w . j a va2 s .c o m*/ byte[] utf8Bytes = content.getBytes("UTF-8"); utf8tweet = new String(utf8Bytes, "UTF-8"); } catch (UnsupportedEncodingException e) { } final String regex = "(https?|ftp|file|pic|www)[:|.][-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%=~_|]"; final Pattern unicodeOutliers = Pattern.compile(regex, Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet); utf8tweet = unicodeOutlierMatcher.replaceAll(""); return utf8tweet; }
From source file:com.norconex.importer.handler.transformer.impl.StripBetweenTransformer.java
@Override protected void transformStringContent(String reference, StringBuilder content, ImporterMetadata metadata, boolean parsed, boolean partialContent) { int flags = Pattern.DOTALL | Pattern.UNICODE_CASE; if (!caseSensitive) { flags = flags | Pattern.CASE_INSENSITIVE; }/* w w w. j a v a 2 s . c o m*/ for (Pair<String, String> pair : stripPairs) { List<Pair<Integer, Integer>> matches = new ArrayList<Pair<Integer, Integer>>(); Pattern leftPattern = Pattern.compile(pair.getLeft(), flags); Matcher leftMatch = leftPattern.matcher(content); while (leftMatch.find()) { Pattern rightPattern = Pattern.compile(pair.getRight(), flags); Matcher rightMatch = rightPattern.matcher(content); if (rightMatch.find(leftMatch.end())) { if (inclusive) { matches.add(new ImmutablePair<Integer, Integer>(leftMatch.start(), rightMatch.end())); } else { matches.add(new ImmutablePair<Integer, Integer>(leftMatch.end(), rightMatch.start())); } } else { break; } } for (int i = matches.size() - 1; i >= 0; i--) { Pair<Integer, Integer> matchPair = matches.get(i); content.delete(matchPair.getLeft(), matchPair.getRight()); } } }
From source file:com.microsoft.tfs.core.clients.versioncontrol.internal.fileattributes.FileAttributesEntry.java
/** * Compiles a filename expression into a {@link Pattern} using the regex * flags appropriate for a file attributes entry. * * @param filenameExpression//w ww. j a va2 s. co m * the filename expression to compile. * @return the compiled pattern. * @throws PatternSyntaxException * if the regular expression could not be compiled. */ private Pattern compilePattern(final String filenameExpression) { return Pattern.compile(filenameExpression, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); }
From source file:com.norconex.collector.core.filter.impl.RegexMetadataFilter.java
public final void setRegex(String regex) { this.regex = regex; if (regex != null) { int flags = Pattern.DOTALL; if (!caseSensitive) { flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; }//from w ww .j a v a 2 s.c o m this.pattern = Pattern.compile(regex, flags); } else { this.pattern = Pattern.compile(".*"); } }
From source file:org.tightblog.rendering.comment.BlacklistCommentValidator.java
/** * Create a list of regex Pattern elements from a line-delimited list * @param blacklist String of regex rules, one per line delimited by \n **//* w ww. j av a2s. co m*/ public static List<Pattern> populateSpamRules(String blacklist) { List<Pattern> regexRules = new ArrayList<>(); if (blacklist != null) { StringTokenizer tokenizer = new StringTokenizer(blacklist, "\n"); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken().trim(); if (token.startsWith("#")) { continue; } regexRules.add(Pattern.compile(token, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE)); } } return regexRules; }
From source file:com.google.code.configprocessor.processing.ModifyAction.java
protected int parseFlags() { int flagsToUse = 0; String flagsToTest = getFlags() == null ? DEFAULT_PATTERN_FLAGS : getFlags(); String[] flagArray = StringUtils.split(flagsToTest, PATTERN_FLAG_SEPARATOR); for (String flag : flagArray) { if ("UNIX_LINES".equals(flag)) { flagsToUse |= Pattern.UNIX_LINES; } else if ("CASE_INSENSITIVE".equals(flag)) { flagsToUse |= Pattern.CASE_INSENSITIVE; } else if ("COMMENTS".equals(flag)) { flagsToUse |= Pattern.COMMENTS; } else if ("MULTILINE".equals(flag)) { flagsToUse |= Pattern.MULTILINE; } else if ("LITERAL".equals(flag)) { flagsToUse |= Pattern.LITERAL; } else if ("DOTALL".equals(flag)) { flagsToUse |= Pattern.DOTALL; } else if ("UNICODE_CASE".equals(flag)) { flagsToUse |= Pattern.UNICODE_CASE; } else if ("CANON_EQ".equals(flag)) { flagsToUse |= Pattern.CANON_EQ; } else {//from w w w.j a va2 s. c om throw new IllegalArgumentException("Unknown flag: " + flag); } } return flagsToUse; }
From source file:org.apache.manifoldcf.crawler.connectors.webcrawler.CredentialsDescription.java
/** Constructor. Build the description from the ConfigParams. */ public CredentialsDescription(ConfigParams configData) throws ManifoldCFException { // Scan, looking for bin description nodes int i = 0;// w w w . j av a2 s.co m while (i < configData.getChildCount()) { ConfigNode node = configData.getChild(i++); if (node.getType().equals(WebcrawlerConfig.NODE_ACCESSCREDENTIAL)) { // Get the url regexp String urlDescription = node.getAttributeValue(WebcrawlerConfig.ATTR_URLREGEXP); try { Pattern p; try { p = Pattern.compile(urlDescription, Pattern.UNICODE_CASE); } catch (java.util.regex.PatternSyntaxException e) { throw new ManifoldCFException("Access credential regular expression '" + urlDescription + "' is illegal: " + e.getMessage(), e); } CredentialsItem ti = new CredentialsItem(p); String type = node.getAttributeValue(WebcrawlerConfig.ATTR_TYPE); // These get used in two of the three types; no harm in fetching them up front. String userName = node.getAttributeValue(WebcrawlerConfig.ATTR_USERNAME); String password = node.getAttributeValue(WebcrawlerConfig.ATTR_PASSWORD); if (password != null) password = ManifoldCF.deobfuscate(password); if (type.equals(WebcrawlerConfig.ATTRVALUE_BASIC)) ti.setCredential(new BasicCredential(userName, password)); else if (type.equals(WebcrawlerConfig.ATTRVALUE_NTLM)) { String domain = node.getAttributeValue(WebcrawlerConfig.ATTR_DOMAIN); ti.setCredential(new NTLMCredential(domain, userName, password)); } else if (type.equals(WebcrawlerConfig.ATTRVALUE_SESSION)) { // This is a complex credential type that cannot be easily set up with just a constructor. // Use the url regexp as the sequence key; this works as well as anything, although I haven't thought through all the implications if it gets changed. SessionCredential sc = new SessionCredential(urlDescription); // Loop through child nodes; they describe the pages that belong to the login sequence. int j = 0; while (j < node.getChildCount()) { ConfigNode child = node.getChild(j++); if (child.getType().equals(WebcrawlerConfig.NODE_AUTHPAGE)) { String authPageRegexp = child.getAttributeValue(WebcrawlerConfig.ATTR_URLREGEXP); String pageType = child.getAttributeValue(WebcrawlerConfig.ATTR_TYPE); String matchRegexp = child.getAttributeValue(WebcrawlerConfig.ATTR_MATCHREGEXP); String overrideTargetURL = child .getAttributeValue(WebcrawlerConfig.ATTR_OVERRIDETARGETURL); if (overrideTargetURL != null && overrideTargetURL.length() == 0) overrideTargetURL = null; Pattern authPattern; try { authPattern = Pattern.compile(authPageRegexp, Pattern.UNICODE_CASE); } catch (java.util.regex.PatternSyntaxException e) { throw new ManifoldCFException("Authentication page regular expression '" + authPageRegexp + "' is illegal: " + e.getMessage(), e); } Pattern matchPattern; try { matchPattern = Pattern.compile(matchRegexp, Pattern.UNICODE_CASE); } catch (java.util.regex.PatternSyntaxException e) { throw new ManifoldCFException("Match regular expression '" + matchRegexp + "' is illegal: " + e.getMessage(), e); } if (pageType.equals(WebcrawlerConfig.ATTRVALUE_FORM)) { sc.addAuthPage(authPageRegexp, authPattern, overrideTargetURL, null, null, matchRegexp, matchPattern, null, null, null, null); } else if (pageType.equals(WebcrawlerConfig.ATTRVALUE_LINK)) { sc.addAuthPage(authPageRegexp, authPattern, overrideTargetURL, matchRegexp, matchPattern, null, null, null, null, null, null); } else if (pageType.equals(WebcrawlerConfig.ATTRVALUE_REDIRECTION)) { sc.addAuthPage(authPageRegexp, authPattern, overrideTargetURL, null, null, null, null, matchRegexp, matchPattern, null, null); } else if (pageType.equals(WebcrawlerConfig.ATTRVALUE_CONTENT)) { sc.addAuthPage(authPageRegexp, authPattern, overrideTargetURL, null, null, null, null, null, null, matchRegexp, matchPattern); } else throw new ManifoldCFException("Invalid page type: " + pageType); // Finally, walk through any specified parameters int k = 0; while (k < child.getChildCount()) { ConfigNode paramNode = child.getChild(k++); if (paramNode.getType().equals(WebcrawlerConfig.NODE_AUTHPARAMETER)) { String paramName = paramNode .getAttributeValue(WebcrawlerConfig.ATTR_NAMEREGEXP); Pattern paramNamePattern; try { paramNamePattern = Pattern.compile(paramName, Pattern.UNICODE_CASE); } catch (java.util.regex.PatternSyntaxException e) { throw new ManifoldCFException("Parameter name regular expression '" + paramName + "' is illegal: " + e.getMessage(), e); } String passwordValue = paramNode .getAttributeValue(WebcrawlerConfig.ATTR_PASSWORD); String paramValue = paramNode .getAttributeValue(WebcrawlerConfig.ATTR_VALUE); if (passwordValue != null) paramValue = ManifoldCF.deobfuscate(passwordValue); sc.addPageParameter(authPageRegexp, paramName, paramNamePattern, paramValue); } } } } ti.setCredential(sc); } else throw new ManifoldCFException("Illegal credential type: " + type); patternHash.put(urlDescription, ti); } catch (PatternSyntaxException e) { throw new ManifoldCFException("Bad pattern syntax in '" + urlDescription + "'", e); } } } }