Example usage for java.util.regex Matcher replaceAll

List of usage examples for java.util.regex Matcher replaceAll

Introduction

In this page you can find the example usage for java.util.regex Matcher replaceAll.

Prototype

public String replaceAll(Function<MatchResult, String> replacer) 

Source Link

Document

Replaces every subsequence of the input sequence that matches the pattern with the result of applying the given replacer function to the match result of this matcher corresponding to that subsequence.

Usage

From source file:com.nhncorp.lucy.security.xss.XssPreventer.java

/**
 * ?  XssPreventer  ?  ? ? . <br/>   
 * /*w w  w .ja  va  2 s  .c om*/
 * @param clean
 *            XssPreventer   ?.            
 * @return XssPreventer  ? ?.
 */
public static String unescape(String clean) {

    String str = StringEscapeUtils.unescapeHtml4(clean);

    if (str == null) {
        return null;
    }

    Matcher matcher = unescapePttern.matcher(str);

    if (matcher.find()) {
        return matcher.replaceAll("'");
    }

    return str;
}

From source file:com.nhncorp.lucy.security.xss.XssPreventer.java

/**
 * ?  XSS({@code Cross Site Scripting}) ??  ?  
 *      ? .//  w  w  w.  j a v a  2  s .c o m
 *  ? XssFilter, XssSaxFilter  ???    ??.  
 * 
 * @param dirty
 *            XSS({@code Cross Site Scripting})? ??  .            
 * @return    .
 */
public static String escape(String dirty) {

    String clean = StringEscapeUtils.escapeHtml4(dirty);

    if (clean == null) {
        return null;
    }

    Matcher matcher = escapePattern.matcher(clean);

    if (matcher.find()) {
        return matcher.replaceAll("&#39;");
    }

    return clean;
}

From source file:org.lanes.utility.string.TextNormaliser.java

public static List<String> cleanLightHTML(String html) {

    html = html.replaceAll("&nbsp;", " ");
    html = html.replaceAll("[\\{\\}\\[\\]]", "");
    html = html.replaceAll("&amp;", "&");
    html = html.replaceAll("(?i)<div.*?>(.*?)<\\/div>", "$1\n");
    html = html.replaceAll("(?i)<strong.*?>(.*?)<\\/strong>", "[$1] ");
    html = html.replaceAll("(?i)<br\\/?>", "\n");//MUST COME BEFORE <b>
    html = html.replaceAll("(?i)<b.*?>(.*?)<\\/b>", "[$1] ");
    html = html.replaceAll("(?i)<em>(.*?)<\\/em>", "[$1] ");
    html = html.replaceAll("(?i)<i>(.*?)<\\/i>", "[$1] ");
    html = html.replaceAll("(?i)<u>(.*?)<\\/u>", "[$1] ");

    html = html.replaceAll("[\\s\\n]+\\]", "]");
    html = html.replaceAll("\\[[\\s\\n]+", "[");
    html = html.replaceAll("[\\s]*:\\]", "]");
    html = html.replaceAll("(?i)<[\\/]?[uo]l.*?>", "");

    html = html.replaceAll("(?i)<li.*?>(.+?)(?=<li>)", "{$1}\n");
    html = html.replaceAll("(?i)<li.*?>(.+?)\\n", "{$1}\n");
    html = html.replaceAll("(?i)<\\/li>", " ");
    html = html.replaceAll("(?i)<[\\/]?div.*?>", " ");
    html = html.replaceAll("(?i)<\\/?center>", " ");
    html = html.replaceAll("(?i)<\\/?p.*?>", " ");
    html = html.replaceAll("(?i)<\\/?li>", " ");
    html = html.replaceAll("(?i)<\\/?font.*?>", " ");
    html = html.replaceAll("(?i)<\\/?hr.*?>", " ");
    html = html.replaceAll("\\[\\]", "");

    Pattern pattern = Pattern.compile("[\u00B7\u2022]\\s*(.+?)\n",
            (Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE));
    Matcher matcher = pattern.matcher(html);
    html = matcher.replaceAll("{$1}\n");

    html = html.replaceAll("\\s\\}", "}");

    html = html.replaceAll("(?i)(?:[\\w\\.]+)@(?:[\\w]+\\.)+(?:[\\w]+)", "<EMAIL>");
    html = html.replaceAll("(?i)(?:http:\\/\\/)?(?:[\\w]+\\.)+(?:[\\w]+)", "<URL>");
    html = html.replaceAll("\\s*\\/\\s*", ", ");

    //html = html.replaceAll("\\s+", " ");

    html = Normalizer.normalize(html, Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");

    List<String> lineobj = new ArrayList<String>();
    String[] lines = html.split("\\n");
    for (String line : lines) {
        line = line.trim();//from w w w.  j  a  v  a2s.c  om
        if (!line.equals("")) {
            lineobj.add(line);
        }
    }

    return lineobj;
}

From source file:com.mmj.app.lucene.analyzer.AbstractWordAnalyzer.java

public static String matcherRegex(String str, String regex) {
    Pattern p = Pattern.compile(regex);
    Matcher m = p.matcher(str);
    return m.replaceAll(StringUtils.EMPTY).trim();
}

From source file:RegexUtil.java

/**
 * Replaces all backslashes "\" with forward slashes "/". Convenience method to
 * convert path Strings to URI format./*  w ww  . j av  a2s .com*/
 */
static String substBackslashes(String string) {
    if (string == null) {
        return null;
    }

    Matcher matcher = BACKSLASH.matcher(string);
    return matcher.find() ? matcher.replaceAll("\\/") : string;
}

From source file:apps.Source2XML.java

public static String replaceSomePunct(String s) {
    Matcher m = mSomePunctPattern.matcher(s);
    return m.replaceAll(" ");
}

From source file:org.carrot2.source.SearchEngineBase.java

/**
 * Unescape HTML entities and tags from a given set of <code>fields</code> of all
 * documents in the provided <code>response</code>.
 * //from  ww w . j a  v  a  2 s.  c  o  m
 * @param response the search engine response to clean
 * @param keepHighlights set to <code>true</code> to keep query terms highlights
 * @param fields names of fields to clean
 */
protected static void clean(SearchEngineResponse response, boolean keepHighlights, String... fields) {
    for (Document document : response.results) {
        for (String field : fields) {
            final String originalField = document.getField(field);
            if (StringUtils.isNotBlank(originalField)) {
                String cleanedField = originalField;
                if (!keepHighlights) {
                    final Matcher matcher = HIGHLIGHTS_PATTERN.matcher(cleanedField);
                    cleanedField = matcher.replaceAll("");
                }

                cleanedField = StringEscapeUtils.unescapeHtml(cleanedField);

                document.setField(field, cleanedField);
            }
        }
    }
}

From source file:RegexUtil.java

/**
 * Returns package name for the Java class as a path separated with forward slash
 * ("/"). Method is used to lookup resources that are located in package
 * subdirectories. For example, a String "a/b/c" will be returned for class name
 * "a.b.c.ClassName"./*from   w  w w. ja va  2 s  .  c om*/
 */
static String getPackagePath(String className) {
    if (className == null) {
        return "";
    }

    Matcher matcher = DOT.matcher(className);
    if (matcher.find()) {
        String path = matcher.replaceAll("\\/");
        return path.substring(0, path.lastIndexOf("/"));
    } else {
        return "";
    }
}

From source file:net.sf.zekr.engine.search.tanzil.RegexUtils.java

License:asdf

/**
 * @param replacePatternMap a {@link Map} from {@link Pattern}s to replace {@link String}s
 * @param src original string to apply replace all on.
 * @return the resulting string after replacing patterns
 *///from ww w. ja  va  2  s.co m
public static String replaceAll(Map<Pattern, String> replacePatternMap, String src) {
    for (Entry<Pattern, String> entry : replacePatternMap.entrySet()) {
        Matcher m = ((Pattern) entry.getKey()).matcher(src);
        src = m.replaceAll((String) entry.getValue());
    }
    return src;
}

From source file:com.datumbox.framework.utilities.text.cleaners.HTMLCleaner.java

public static String replaceImgWithAlt(String text) {
    Matcher m = IMG_ALT_TITLE_PATTERN.matcher(text);
    if (m.find()) {
        return m.replaceAll(" $1 ");
    }//from  w w w  . ja  v a  2  s  .  c o m
    return text;
}