List of usage examples for java.util.regex Matcher replaceAll
public String replaceAll(Function<MatchResult, String> replacer)
From source file:com.nhncorp.lucy.security.xss.XssPreventer.java
/** * ? XssPreventer ? ? ? . <br/> * /*w w w .ja va 2 s .c om*/ * @param clean * XssPreventer ?. * @return XssPreventer ? ?. */ public static String unescape(String clean) { String str = StringEscapeUtils.unescapeHtml4(clean); if (str == null) { return null; } Matcher matcher = unescapePttern.matcher(str); if (matcher.find()) { return matcher.replaceAll("'"); } return str; }
From source file:com.nhncorp.lucy.security.xss.XssPreventer.java
/** * ? XSS({@code Cross Site Scripting}) ?? ? * ? .// w w w. j a v a 2 s .c o m * ? XssFilter, XssSaxFilter ??? ??. * * @param dirty * XSS({@code Cross Site Scripting})? ?? . * @return . */ public static String escape(String dirty) { String clean = StringEscapeUtils.escapeHtml4(dirty); if (clean == null) { return null; } Matcher matcher = escapePattern.matcher(clean); if (matcher.find()) { return matcher.replaceAll("'"); } return clean; }
From source file:org.lanes.utility.string.TextNormaliser.java
public static List<String> cleanLightHTML(String html) { html = html.replaceAll(" ", " "); html = html.replaceAll("[\\{\\}\\[\\]]", ""); html = html.replaceAll("&", "&"); html = html.replaceAll("(?i)<div.*?>(.*?)<\\/div>", "$1\n"); html = html.replaceAll("(?i)<strong.*?>(.*?)<\\/strong>", "[$1] "); html = html.replaceAll("(?i)<br\\/?>", "\n");//MUST COME BEFORE <b> html = html.replaceAll("(?i)<b.*?>(.*?)<\\/b>", "[$1] "); html = html.replaceAll("(?i)<em>(.*?)<\\/em>", "[$1] "); html = html.replaceAll("(?i)<i>(.*?)<\\/i>", "[$1] "); html = html.replaceAll("(?i)<u>(.*?)<\\/u>", "[$1] "); html = html.replaceAll("[\\s\\n]+\\]", "]"); html = html.replaceAll("\\[[\\s\\n]+", "["); html = html.replaceAll("[\\s]*:\\]", "]"); html = html.replaceAll("(?i)<[\\/]?[uo]l.*?>", ""); html = html.replaceAll("(?i)<li.*?>(.+?)(?=<li>)", "{$1}\n"); html = html.replaceAll("(?i)<li.*?>(.+?)\\n", "{$1}\n"); html = html.replaceAll("(?i)<\\/li>", " "); html = html.replaceAll("(?i)<[\\/]?div.*?>", " "); html = html.replaceAll("(?i)<\\/?center>", " "); html = html.replaceAll("(?i)<\\/?p.*?>", " "); html = html.replaceAll("(?i)<\\/?li>", " "); html = html.replaceAll("(?i)<\\/?font.*?>", " "); html = html.replaceAll("(?i)<\\/?hr.*?>", " "); html = html.replaceAll("\\[\\]", ""); Pattern pattern = Pattern.compile("[\u00B7\u2022]\\s*(.+?)\n", (Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE)); Matcher matcher = pattern.matcher(html); html = matcher.replaceAll("{$1}\n"); html = html.replaceAll("\\s\\}", "}"); html = html.replaceAll("(?i)(?:[\\w\\.]+)@(?:[\\w]+\\.)+(?:[\\w]+)", "<EMAIL>"); html = html.replaceAll("(?i)(?:http:\\/\\/)?(?:[\\w]+\\.)+(?:[\\w]+)", "<URL>"); html = html.replaceAll("\\s*\\/\\s*", ", "); //html = html.replaceAll("\\s+", " "); html = Normalizer.normalize(html, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); List<String> lineobj = new ArrayList<String>(); String[] lines = html.split("\\n"); for (String line : lines) { line = line.trim();//from w w w. j a v a2s.c om if (!line.equals("")) { lineobj.add(line); } } return lineobj; }
From source file:com.mmj.app.lucene.analyzer.AbstractWordAnalyzer.java
public static String matcherRegex(String str, String regex) { Pattern p = Pattern.compile(regex); Matcher m = p.matcher(str); return m.replaceAll(StringUtils.EMPTY).trim(); }
From source file:RegexUtil.java
/** * Replaces all backslashes "\" with forward slashes "/". Convenience method to * convert path Strings to URI format./* w ww . j av a2s .com*/ */ static String substBackslashes(String string) { if (string == null) { return null; } Matcher matcher = BACKSLASH.matcher(string); return matcher.find() ? matcher.replaceAll("\\/") : string; }
From source file:apps.Source2XML.java
public static String replaceSomePunct(String s) { Matcher m = mSomePunctPattern.matcher(s); return m.replaceAll(" "); }
From source file:org.carrot2.source.SearchEngineBase.java
/** * Unescape HTML entities and tags from a given set of <code>fields</code> of all * documents in the provided <code>response</code>. * //from ww w . j a v a 2 s. c o m * @param response the search engine response to clean * @param keepHighlights set to <code>true</code> to keep query terms highlights * @param fields names of fields to clean */ protected static void clean(SearchEngineResponse response, boolean keepHighlights, String... fields) { for (Document document : response.results) { for (String field : fields) { final String originalField = document.getField(field); if (StringUtils.isNotBlank(originalField)) { String cleanedField = originalField; if (!keepHighlights) { final Matcher matcher = HIGHLIGHTS_PATTERN.matcher(cleanedField); cleanedField = matcher.replaceAll(""); } cleanedField = StringEscapeUtils.unescapeHtml(cleanedField); document.setField(field, cleanedField); } } } }
From source file:RegexUtil.java
/** * Returns package name for the Java class as a path separated with forward slash * ("/"). Method is used to lookup resources that are located in package * subdirectories. For example, a String "a/b/c" will be returned for class name * "a.b.c.ClassName"./*from w w w. ja va 2 s . c om*/ */ static String getPackagePath(String className) { if (className == null) { return ""; } Matcher matcher = DOT.matcher(className); if (matcher.find()) { String path = matcher.replaceAll("\\/"); return path.substring(0, path.lastIndexOf("/")); } else { return ""; } }
From source file:net.sf.zekr.engine.search.tanzil.RegexUtils.java
License:asdf
/** * @param replacePatternMap a {@link Map} from {@link Pattern}s to replace {@link String}s * @param src original string to apply replace all on. * @return the resulting string after replacing patterns *///from ww w. ja va 2 s.co m public static String replaceAll(Map<Pattern, String> replacePatternMap, String src) { for (Entry<Pattern, String> entry : replacePatternMap.entrySet()) { Matcher m = ((Pattern) entry.getKey()).matcher(src); src = m.replaceAll((String) entry.getValue()); } return src; }
From source file:com.datumbox.framework.utilities.text.cleaners.HTMLCleaner.java
public static String replaceImgWithAlt(String text) { Matcher m = IMG_ALT_TITLE_PATTERN.matcher(text); if (m.find()) { return m.replaceAll(" $1 "); }//from w w w . ja v a 2 s . c o m return text; }