List of usage examples for java.util.regex Matcher replaceAll
public String replaceAll(Function<MatchResult, String> replacer)
From source file:immf.StringConverter.java
public String convert(String str) { if (this.replaceMap.isEmpty()) { return str; }/*from w w w. ja v a2 s . c om*/ String s = str; for (Object key : this.replaceMap.keySet().toArray()) { String from = (String) key; String to = this.replaceMap.get(from); Matcher m = Pattern.compile(from).matcher(s); if (m.find()) { log.info("????? [" + m.group() + "]"); s = m.replaceAll(to); } } return s; }
From source file:no.dusken.momus.service.drive.GoogleDocsTextConverter.java
/** * Comments inserted should be removed as they don't belong to the text * A comment adds a <sup>-reference to the text, and then the comment * itself at the bottom/* w w w . j a v a 2 s . co m*/ */ private String removeComments(String in) { Matcher m = inlineComments.matcher(in); String out = m.replaceAll(""); // Spaces inside a marked text are written as m = spaces.matcher(out); out = m.replaceAll(" "); m = comments.matcher(out); out = m.replaceAll(""); return out; }
From source file:com.ririjin.adminmobile.fragment.BasicFragment.java
public String FilterChinese(String str) { String reg = "[\u4e00-\u9fa5]"; Pattern pat = Pattern.compile(reg); Matcher mat = pat.matcher(str); String repickStr = mat.replaceAll(""); System.out.println("?:" + repickStr); return repickStr; }
From source file:org.executequery.search.TextAreaSearch.java
public static int replaceAll() { if (textComponent == null) { GUIUtilities.displayWarningMessage("Search text not found."); return -1; }/*from w ww . jav a 2s . com*/ if (findText == null || findText.length() == 0) return -1; String _text = null; String text = textComponent.getText(); if (text == null || text.length() == 0) { GUIUtilities.displayWarningMessage("Search text not found."); return -1; } int caretPosition = textComponent.getCaretPosition(); if (replacementText == null) replacementText = ""; String regexPattern = null; if (!useRegex) regexPattern = formatRegularExpression(findText, wholeWords); else regexPattern = findText; Pattern pattern = null; Matcher matcher = null; StringBuilder resultText = null; try { if (matchCase) pattern = Pattern.compile(regexPattern); else pattern = Pattern.compile(regexPattern, Pattern.CASE_INSENSITIVE); if (wrapSearch) matcher = pattern.matcher(text); else { if (searchDirection == SEARCH_UP) matcher = pattern.matcher(text.substring(0, caretPosition)); else matcher = pattern.matcher(text.substring(caretPosition)); } if (matcher.find()) { _text = matcher.replaceAll(replacementText); } else { GUIUtilities.displayWarningMessage("Search text not found."); return -1; } if (wrapSearch) { resultText = new StringBuilder(_text); } else { resultText = new StringBuilder(text); if (searchDirection == SEARCH_UP) resultText.replace(0, caretPosition, _text); else resultText.replace(caretPosition, text.length() - 1, _text); } textComponent.setText(resultText.toString()); return 0; } catch (PatternSyntaxException pExc) { if (useRegex) GUIUtilities.displayErrorMessage("The regular expression search pattern is invalid."); return -1; } finally { if (resultText != null) { int length = resultText.length(); textComponent.setCaretPosition(length < caretPosition ? length : caretPosition); } GUIUtilities.scheduleGC(); } }
From source file:net.yacy.search.query.QueryParams.java
/** * Generate an URL filter from the query modifier and eventual tld, usable as a * first approximation for filtering, and compatible with the yacy/search * API.<br/>/*from w w w . j a va2 s. com*/ * For truly accurate filtering, checking constraints against parsed URLs in * MultiprotocolURL instances is easier and more reliable than building a complex regular * expression that must be both compatible with the JDK {@link Pattern} and with Lucene {@link RegExp}. * * @param modifier * query modifier with eventual protocol, sitehost and filetype * constraints. The modifier parameter itselft must not be null. * @param tld * an eventual Top Level Domain name * @return an URL filter regular expression from the provided modifier and tld * constraints, matching anything when there are no constraints at all. */ protected static String buildApproximateURLFilter(final QueryModifier modifier, final String tld) { final String protocolfilter = modifier.protocol == null ? ".*" : modifier.protocol; final String defaulthostprefix = "www"; final String hostfilter; if (modifier.sitehost == null && tld == null) { hostfilter = ".*"; } else if (modifier.sitehost == null) { hostfilter = ".*\\." + tld; } else if (modifier.sitehost.startsWith(defaulthostprefix + ".")) { hostfilter = "(" + defaulthostprefix + "\\.)?" + modifier.sitehost.substring(4); } else { hostfilter = "(" + defaulthostprefix + "\\.)?" + modifier.sitehost; } final String filefilter = modifier.filetype == null ? ".*" : ".*" + modifier.filetype + ".*"; // TODO: should be ".ext" but while/comment above suggests not -> add filetype contrain pullOneFilteredFromRWI() String filter = protocolfilter + "..." + hostfilter + "." + filefilter; if (!filter.equals(".*....*..*")) { /* Remove redundant sequences of catch all expressions */ Pattern r = Pattern.compile("(\\.|(\\.\\*))\\.\\*"); Matcher m; while ((m = r.matcher(filter)).find()) { filter = m.replaceAll(".*"); } } else { filter = QueryParams.catchall_pattern.toString(); } return filter; }
From source file:no.dusken.momus.service.drive.GoogleDocsTextConverter.java
/** * Converts HTML entities to "normal characters", for instance * it converts å to /* w w w . j av a 2 s . c o m*/ * * But < (<) and > (>) are ignored, to avoid < and > in the written * text to affect our HTML. */ private String unescapeHtml(String in) { // replace all > and < Matcher m = ltToUnicode.matcher(in); String out = m.replaceAll(ltUnicode); m = gtToUnicode.matcher(out); out = m.replaceAll(gtUnicode); //Convert quotes to "guillemets" out = out.replaceAll("“", ""); out = out.replaceAll("”", ""); // convert stuff out = StringEscapeUtils.unescapeHtml4(out); // add the > and <s back m = unicodeToLt.matcher(out); out = m.replaceAll("<"); m = unicodeToGt.matcher(out); out = m.replaceAll(">"); return out; }
From source file:org.apache.nutch.indexer.replace.FieldReplacer.java
/** * Return a replacement value for a field. * * This is designed to fail fast and trigger a replacement only when * necessary. If this method returns null, either the field does not match or * the value does not match the pattern (or possibly the pattern is invalid). * * So only if the method returns a non-null value will you need to replace the * value for the field.// w ww . java 2s . c om * * @param fieldName * the name of the field you are checking * @param value * the value of the field you are checking * @return a replacement value. If null, either the field does not match or * the value does not match. */ public String checkAndReplace(String fieldName, String value) { if (this.fieldName.equals(fieldName)) { if (value != null && value.length() > 0) { if (this.isValid) { Matcher m = this.pattern.matcher(value); if (m.find()) { return m.replaceAll(this.replacement); } } } } return null; }
From source file:com.hichinaschool.flashcards.libanki.Utils.java
/** * Strips a text from <style>...</style>, <script>...</script> and <_any_tag_> HTML tags. * @param The HTML text to be cleaned./*from w ww . j av a 2 s . co m*/ * @return The text without the aforementioned tags. */ public static String stripHTML(String s) { Matcher htmlMatcher = stylePattern.matcher(s); s = htmlMatcher.replaceAll(""); htmlMatcher = scriptPattern.matcher(s); s = htmlMatcher.replaceAll(""); htmlMatcher = tagPattern.matcher(s); s = htmlMatcher.replaceAll(""); return entsToTxt(s); }
From source file:com.digitalpebble.storm.crawler.filtering.regex.RegexURLNormalizer.java
/** * This function does the replacements by iterating through all the regex * patterns. It accepts a string url as input and returns the altered * string. If the normalized url is an empty string, the function will * return null.//from w w w . j a v a 2s . co m */ @Override public String filter(URL sourceUrl, Metadata sourceMetadata, String urlString) { Iterator<Rule> i = rules.iterator(); while (i.hasNext()) { Rule r = i.next(); Matcher matcher = r.pattern.matcher(urlString); urlString = matcher.replaceAll(r.substitution); } if (urlString.equals("")) { urlString = null; } return urlString; }
From source file:org.lockss.rewriter.RegexpCssLinkRewriterFactory.java
/** Remove backslashes when used as escape character in CSS URL. * Should probably also process hex URL encodings */ String processUrlEscapes(String url) { Matcher m2 = CSS_BACKSLASH_PAT.matcher(url); return m2.replaceAll("$1"); }