List of usage examples for java.util.regex Matcher replaceAll
public String replaceAll(Function<MatchResult, String> replacer)
From source file:opennlp.tools.similarity.apps.utils.Utils.java
public static String stripHTMLMultiLine(String text) { Pattern p = java.util.regex.Pattern.compile("\\<.*?>", Pattern.DOTALL); Matcher matcher = p.matcher(text); String tmp = matcher.replaceAll(""); return tmp;/*from ww w . j a v a2 s .c om*/ }
From source file:opennlp.tools.similarity.apps.utils.Utils.java
public static String stripHTMLCommentsMultiLine(String text) { Pattern p = java.util.regex.Pattern.compile("\\<!--.*?-->", Pattern.DOTALL); Matcher matcher = p.matcher(text); String tmp = matcher.replaceAll(""); return tmp;//from ww w. j a v a 2s . co m }
From source file:opennlp.tools.similarity.apps.utils.Utils.java
public static String stripScriptTags(String text) { Pattern p = java.util.regex.Pattern.compile("\\<SCRIPT.*?</SCRIPT>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); Matcher matcher = p.matcher(text); String tmp = matcher.replaceAll(""); return tmp;/*from w ww.j a va 2s . c o m*/ }
From source file:opennlp.tools.similarity.apps.utils.Utils.java
public static String stripNoScriptTags(String text) { Pattern p = java.util.regex.Pattern.compile("\\<NOSCRIPT.*?</NOSCRIPT>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); Matcher matcher = p.matcher(text); String tmp = matcher.replaceAll(""); return tmp;//from w ww .j av a2 s .c o m }
From source file:opennlp.tools.similarity.apps.utils.Utils.java
public static String stripStyleTags(String text) { Pattern p = java.util.regex.Pattern.compile("\\<STYLE.*?</STYLE>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); Matcher matcher = p.matcher(text); String tmp = matcher.replaceAll(""); return tmp;// www . ja v a2 s .c om }
From source file:com.bizosys.dataservice.util.StringUtils.java
public static String stripExtraSpace(String input) { Matcher matcher = pattern.matcher(input); return matcher.replaceAll(" "); }
From source file:com.sfs.Formatter.java
/** * Gets the numeric phone./* w w w.ja va 2 s .c o m*/ * * @param countryCode the country code * @param areaCode the area code * @param phoneNumber the phone number * * @return the numeric phone */ public static String getNumericPhone(final int countryCode, final int areaCode, final String phoneNumber) { StringBuffer phone = new StringBuffer(); int intPhoneNumber = 0; try { intPhoneNumber = Integer.parseInt(phoneNumber); } catch (NumberFormatException nfe) { intPhoneNumber = 0; } if (intPhoneNumber == 0 && StringUtils.isNotBlank(phoneNumber)) { // An integer has not been parsed, but a phone number // has been supplied. This means it probably contains // some non-numeric characters. Pattern pattern = Pattern.compile("[^\\d]"); Matcher match = pattern.matcher(phoneNumber); String onlyDigits = match.replaceAll(""); try { intPhoneNumber = Integer.parseInt(onlyDigits); } catch (NumberFormatException nfe) { intPhoneNumber = 0; } } // Build the phone numeric string phone.append("+"); phone.append(countryCode); phone.append(" "); phone.append(areaCode); phone.append(" "); phone.append(intPhoneNumber); return phone.toString(); }
From source file:org.opencron.common.utils.StringUtils.java
/** * script//w w w.ja v a 2 s .c om * * @param htmlStr * @return writer:<a href="mailto:benjobs@qq.com">benjobs</a> 2012.2.1 */ public static String replaceScript(String htmlStr) { if (htmlStr == null || "".equals(htmlStr)) { return ""; } String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>"; // script? Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE); Matcher m_script = p_script.matcher(htmlStr); htmlStr = m_script.replaceAll(""); // script return htmlStr.trim(); // }
From source file:org.geopublishing.atlasViewer.GpCoreUtil.java
/** * This method converts backslashes to forwardslashes in any html document. * Backslashes dont work in linux environments and when using the atlas out * of a .jar geopublisher expects forwardslashes * /*w w w . ja va 2 s .com*/ * @param url * @return */ public static URL convertWindowsToLinuxSlashesInHtmlSrcTags(URL url) { String htmlString = IOUtil.readURLasString(url); File tempFile = null; Pattern p = Pattern.compile("(<[^>]+(?:src|href)=\"[^\"]*)\\\\([^\"]*\"[^>]*>)", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(htmlString); while (m.find()) { htmlString = m.replaceAll("$1/$2"); m = p.matcher(htmlString); } try { tempFile = IOUtil.createTemporaryFile("tmp", ".html", true); InputStream bais = new ByteArrayInputStream(htmlString.getBytes()); IOUtil.writeStreamToFile(bais, tempFile); IOUtil.copyFileNoException(null, tempFile, IOUtil.urlToFile(url), false); } catch (IOException e) { } return url; }
From source file:org.opencron.common.utils.StringUtils.java
/** * html//from w w w. j a v a2 s.com * * @param htmlStr * @return writer:<a href="mailto:benjobs@qq.com">benjobs</a> 2012.2.1 */ public static String replaceHtml(String htmlStr) { if (htmlStr == null || "".equals(htmlStr)) { return ""; } String regEx_script = "<script[^>]*?>[\\s\\S]*?</script>"; // script? String regEx_style = "<style[^>]*?>[\\s\\S]*?</style>"; // style? String regEx_html = "<[^>]+>"; // HTML? Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE); Matcher m_script = p_script.matcher(htmlStr); htmlStr = m_script.replaceAll(""); // script Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE); Matcher m_style = p_style.matcher(htmlStr); htmlStr = m_style.replaceAll(""); // style Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE); Matcher m_html = p_html.matcher(htmlStr); htmlStr = m_html.replaceAll(""); // html return htmlStr.trim(); // }