List of usage examples for java.util.regex Matcher replaceAll
public String replaceAll(Function<MatchResult, String> replacer)
From source file:elh.eus.absa.MicroTextNormalizer.java
/** * Normalize input String (@user -> USRID) * /*from ww w.j av a2 s . co m*/ * @param input : input string to normalize * @returns String */ private String normalizeUSR(String input, boolean anonimize) { String result = input; Matcher m = user.matcher(result); if (anonimize) { result = m.replaceAll("USRID"); } else { result = m.replaceAll("$1"); } return result; }
From source file:org.amnesty.aidoc.search.AidocSearch.java
private StringBuffer buildStandardQuery(StringBuffer query, Map<String, List<String>> queryMap, String language) {/*from w w w . java2s .c om*/ // Our query template. Prioritize new content. String tpl = "(" + "(" + "(@cm\\:title:(terms)^8 OR @cm\\:description:(terms)^4 OR TEXT:(terms))" + " AND @cm\\:from:[primRange]" + ") OR " // + "(" // + "(@cm\\:title:(terms)^8 OR @cm\\:description:(terms)^4 OR TEXT:(terms))" // + " AND @cm\\:from:[secRange]" // + ")^2 OR " + "(@cm\\:title:(terms)^8 OR @cm\\:description:(terms)^4 OR TEXT:(terms))" + ")" + ")"; Date from = new Date(); from.setYear(from.getYear() - 2); String primRange = ISO8601DateFormat.format(from) + " TO " + ISO8601DateFormat.format(new Date()); // from.setYear(from.getYear() - 4); // String secRange = ISO8601DateFormat.format(from) + " TO " // + ISO8601DateFormat.format(new Date()); // Compile regular expression Pattern pattern = Pattern.compile("primRange"); Matcher matcher = pattern.matcher(tpl); tpl = matcher.replaceAll(primRange); // pattern = Pattern.compile("secRange"); // matcher = pattern.matcher(tpl); // tpl = matcher.replaceAll(secRange); /* Process categories */ if (queryMap.containsKey("category")) { List<String> catValues = (List<String>) queryMap.get("category"); for (String category : catValues) { logger.debug("Search category: " + category); category = category.replace(" ", "_x0020_"); category = category.replace(",", "_x002c_"); query.append(" PATH:\"/cm:generalclassifiable//cm:" + category + "//*\" AND "); } } if (queryMap.containsKey("cat")) { List<String> catValues = (List<String>) queryMap.get("cat"); for (String category : catValues) { if (CategoryToClassMap.hm.containsKey(category.toUpperCase())) { String mappedClass = (String) CategoryToClassMap.hm.get(category.toUpperCase()); logger.debug("Mapping " + category + " to class: " + mappedClass); query.append(" @aicore\\:aiIndex:\""); query.append(mappedClass + "*"); query.append("\" AND"); } else logger.debug("No AiClass found for: " + category); } } /* Process keywords */ if (queryMap.containsKey("keywords")) { List<String> keywords = (List<String>) queryMap.get("keywords"); pattern = Pattern.compile("terms"); matcher = pattern.matcher(tpl); if (keywords.size() == 1) tpl = matcher.replaceAll(keywords.get(0)); else if (keywords.size() == 0) tpl = matcher.replaceAll("?"); else tpl = matcher.replaceAll("bad query"); query.append(tpl); } return query; }
From source file:io.wcm.caconfig.extensions.contextpath.impl.AbsoluteParentContextPathStrategy.java
private String deriveConfigRef(String contextPath, String configPathPattern, ResourceResolver resourceResolver) { Matcher matcher = contextPathRegex.matcher(Path.getOriginalPath(contextPath, resourceResolver)); Matcher blacklistMatcher = null; if (contextPathBlacklistRegex != null) { blacklistMatcher = contextPathBlacklistRegex.matcher(contextPath); }/*from www .jav a 2 s . c o m*/ if (matcher.matches() && (blacklistMatcher == null || !blacklistMatcher.matches())) { return matcher.replaceAll(configPathPattern); } else { return null; } }
From source file:com.amani.action.LoginAction.java
public String replaceBlank(String str) { String dest = ""; if (str != null) { Pattern p = Pattern.compile("\\s*|\t|\r|\n"); /*/* ww w . j a v a 2 s. c o m*/ * \n (\u000a) \t (\u0009) \s (\u0008)\r ?(\u000d) */ Matcher m = p.matcher(str); dest = m.replaceAll(""); } return dest; }
From source file:de.ingrid.iplug.csw.dsc.cache.impl.AbstractUpdateStrategy.java
/** * Create a filter Document from a filter string. Replace any filter * variables. TODO: if there should be more variables, this could be done * more generic/* ww w . ja v a2s . co m*/ * * @param filterStr * @return Document * @throws Exception */ protected Document createFilterDocument(String filterStr) throws Exception { ExecutionContext context = this.getExecutionContext(); if (this.docBuilder == null) { DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); docBuilder = docBuilderFactory.newDocumentBuilder(); } // replace last update date variable Pattern lastUpdateDatePattern = Pattern.compile("\\{LAST_UPDATE_DATE\\}", Pattern.MULTILINE); Matcher matcher = lastUpdateDatePattern.matcher(filterStr); if (matcher.find()) { Date lastUpdateDate = context.getLastExecutionDate(); SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd"); filterStr = matcher.replaceAll(df.format(lastUpdateDate)); } return docBuilder.parse(new InputSource(new StringReader(filterStr))); }
From source file:org.infoscoop.request.filter.rss.RssHandler.java
Date checkDateFormat(String dateStr) throws ParseException { Pattern pattern = Pattern.compile("(.*[+|-][0-9]{2}):00"); Matcher m = pattern.matcher(dateStr); if (m.matches()) { dateStr = m.replaceAll("$100"); return format2.parse(dateStr); }/*from ww w . j a va 2 s .c o m*/ return null; }
From source file:com.hdfs.concat.crush.CrushReducer.java
/** * Converts the name of a directory to a path to the crush output file using the specs at the given index. The path will the * directory and file name separated by a slash /. Performs placeholder substitution on the corresponding replacement string in * {@link #outputReplacementList}. The final replacement string is then used to form the final path. *///from ww w. ja v a 2 s .c o m String calculateOutputFile(int idx, String srcDir) { StringBuffer sb = new StringBuffer(srcDir); sb.append("/"); String replacement = outputReplacementList.get(idx); placeHolderToValue.put("crush.file.num", Integer.toString(fileNum++)); placeholderMatcher.reset(replacement); while (placeholderMatcher.find()) { String key = placeholderMatcher.group(1); String value = placeHolderToValue.get(key); if (null == value) { throw new IllegalArgumentException("No value for key: " + key); } placeholderMatcher.appendReplacement(sb, value); } placeholderMatcher.appendTail(sb); Matcher matcher = inputRegexList.get(idx); matcher.reset(srcDir); String finalOutputName = matcher.replaceAll(sb.toString()); return finalOutputName; }
From source file:com.akamai.edgegrid.auth.EdgeGridV1Signer.java
/** * Get the canonicalized data for the request headers. * // w w w . jav a2 s .com * <p> * The canonicalization is done as the following: * </p> * * <p> * For each entry in the {@link #headersToInclude}, * </p> * * <ul> * <li> * get the first header value for the name; * </li> * <li> * trim the leading and trailing white spaces; * </li> * <li> * replace all repeated white spaces with a single space; * <p> * Note: the canonicalized data is used for signature only, as this step might alter the header value. * </p> * </li> * <li> * concatenate the name:value pairs with a tab '\t' separator. The name field is all in lower cases. * </li> * <li> * terminate the headers with another tab ('\t') separator. * </li> * </ul> * * @param request the request. * @return the canonicalized data for the request headers. */ protected String canonicalizeHeaders(HttpRequest request) { StringBuilder sb = new StringBuilder(); for (String headerName : headersToInclude) { // only use the first entry if more than one headers with the same name String headerValue = request.getHeaders().getFirstHeaderStringValue(headerName); if (headerValue != null) { // trim the header value headerValue = headerValue.trim(); if (!headerValue.isEmpty()) { Pattern p = Pattern.compile("\\s+"); Matcher matcher = p.matcher(headerValue); headerValue = matcher.replaceAll(" "); sb.append(headerName.toLowerCase()); sb.append(':'); sb.append(headerValue); sb.append('\t'); } } } return sb.toString(); }
From source file:org.codelabor.system.web.taglib.PaginationTag.java
protected String replacePageNoInQueryString(String queryString, int pageNo, int maxRowPerPage) { String replacedQueryString = null; if (!StringUtils.isBlank(queryString)) { if (StringUtils.contains(queryString, pageNoParamName)) { StringBuilder sb = new StringBuilder(); sb.append(pageNoParamName).append("=[0-9]*"); Pattern pattern = Pattern.compile(sb.toString()); Matcher matcher = pattern.matcher(queryString); sb.setLength(0);/*ww w . ja va 2 s .com*/ sb.trimToSize(); sb.append(pageNoParamName).append("=").append(pageNo); replacedQueryString = matcher.replaceAll(sb.toString()); } else { StringBuilder sb = new StringBuilder(); sb.append(pageNoParamName).append("=").append(pageNo); sb.append("&"); sb.append(maxRowPerPageParamName).append("=").append(maxRowPerPage); sb.append("&"); sb.append(queryString); replacedQueryString = sb.toString(); } } else { StringBuilder sb = new StringBuilder(); sb.append(pageNoParamName).append("=").append(pageNo); sb.append("&"); sb.append(maxRowPerPageParamName).append("=").append(maxRowPerPage); replacedQueryString = sb.toString(); } return replacedQueryString; }
From source file:org.lanes.text.mining.EntityRecogniser.java
public String determineEntityClass(String entity, double acceptancethreshold, int iteration) { long timestart = System.currentTimeMillis(); String isa = ""; String confidence = "0.0"; String deteriorate = "0.0"; NGramAnalyser nga = new NGramAnalyser(); entity = entity.trim();//from ww w. ja v a 2s . c o m //Matcher matchercap1 = Pattern.compile("^[A-Z]").matcher(entity); //Matcher matchercap2 = Pattern.compile("\\s[A-Z]").matcher(entity); //if(matchercap1.find() || matchercap2.find()){ System.err.println("CANDIDATE(" + entity + ")"); boolean exacttitlematchfound = false; try { double isPersonPt = 0.0; double isLocationPt = 0.0; double isOrganisationPt = 0.0; double totalTitles = 0.0; ModifiableSolrParams param = null; if (iteration <= 2) { String exactintextquery = "\"" + entity + "\""; param = simobj.formulateQuery("titleText:" + exactintextquery + "", 10); } else { String intextquery = ""; String[] toks = entity.split(" "); for (String tok : toks) { intextquery = intextquery + tok + " AND "; } Matcher replace1 = Pattern.compile(" AND $").matcher(intextquery); intextquery = replace1.replaceAll(""); param = simobj.formulateQuery("text:" + intextquery + "", 10); } QueryResponse response = solrserver.query(param); for (SolrDocument doc : response.getResults()) { Collection<String> fnames = doc.getFieldNames(); for (String fname : fnames) { if (fname.equals("titleText")) { String title = (String) doc.getFieldValue(fname); String orititle = title; Matcher replace1 = Pattern.compile("\\s\\([^\\(\\)]+\\)$").matcher(title); title = replace1.replaceAll(""); Matcher replace2 = Pattern.compile(",[^,]+$").matcher(title); title = replace2.replaceAll(""); double strsim = FuzzyMatcher.stringSim(title, entity); if (strsim > 0.5) { Map<String, Double> istypeornot = compareEntityType(orititle); System.err.println("\tOK(" + entity + "," + title + ") = " + strsim + ""); double sum = istypeornot.get("PERSON") + istypeornot.get("LOCATION") + istypeornot.get("ORGANISATION"); if (sum > 0) { double personPercentage = CommonData.roundDecimal(istypeornot.get("PERSON") / sum, "#.####"); double locationPercentage = CommonData .roundDecimal(istypeornot.get("LOCATION") / sum, "#.####"); double organisationPercentage = CommonData .roundDecimal(istypeornot.get("ORGANISATION") / sum, "#.####"); totalTitles++; System.err.println("\t\tPERSON(" + personPercentage + " = " + istypeornot.get("PERSON") + "/" + sum + ")"); System.err.println("\t\tLOCATION(" + locationPercentage + " = " + istypeornot.get("LOCATION") + "/" + sum + ")"); System.err.println("\t\tORGANISATION(" + organisationPercentage + " = " + istypeornot.get("ORGANISATION") + "/" + sum + ")"); isPersonPt = isPersonPt + personPercentage; isLocationPt = isLocationPt + locationPercentage; isOrganisationPt = isOrganisationPt + organisationPercentage; } else if (strsim > 0.999) {//IF EXACT TITLE MATCH, BUT NOT LOCATION|PERSON|ORGANISATION exacttitlematchfound = true; } } else { System.err.println("\tKO(" + entity + "," + title + ") = " + strsim + ""); } } } } if (totalTitles > 0) { double personAvePt = isPersonPt / totalTitles; double locationAvePt = isLocationPt / totalTitles; double organisationAvePt = isOrganisationPt / totalTitles; System.err.println("(PERSON): " + personAvePt + " = " + isPersonPt + "/" + totalTitles); System.err.println("(LOCATION): " + locationAvePt + " = " + isLocationPt + "/" + totalTitles); System.err.println( "(ORGANISATION): " + organisationAvePt + " = " + isOrganisationPt + "/" + totalTitles); if (personAvePt > acceptancethreshold && personAvePt > locationAvePt && personAvePt > organisationAvePt) { isa = "PERSON"; confidence = String.valueOf(personAvePt); } else if (locationAvePt > acceptancethreshold && locationAvePt > personAvePt && locationAvePt > organisationAvePt) { isa = "LOCATION"; confidence = String.valueOf(locationAvePt); } else if (organisationAvePt > acceptancethreshold && organisationAvePt > personAvePt && organisationAvePt > locationAvePt) { isa = "ORGANISATION"; confidence = String.valueOf(organisationAvePt); } } } catch (Exception e) { } if (isa.equals("") && iteration <= 2 && !exacttitlematchfound) { Map<String, Double> ngrams = nga.getNGrams(entity); Map<String, Double> ngramsrelfreq = nga.getNGramsRelFreq(ngrams, simobj); String longestnonzerofreqngram = ""; Map<String, Double> sortedngrams = MapSorter.sortMap(ngrams, "DESC");//MAPPED TO THE LONGEST ONE, MORE SPECIFIC Iterator iterator4 = sortedngrams.keySet().iterator(); while (iterator4.hasNext() && longestnonzerofreqngram.equals("")) { String ngram = (String) iterator4.next(); double righttoleft = ngrams.get(ngram); double relfreq = ngramsrelfreq.get(ngram); System.err.println("\t" + ngram + " (" + righttoleft + ")(" + relfreq + ")"); if (relfreq > 0 && !ngram.equals(entity)) { longestnonzerofreqngram = ngram; } } if (!longestnonzerofreqngram.equals("")) { iteration++; String results = determineEntityClass(longestnonzerofreqngram, acceptancethreshold, iteration); String[] splitresults = results.split(":"); isa = splitresults[0]; confidence = splitresults[1]; deteriorate = splitresults[2]; } } else { deteriorate = String.valueOf(iteration); } System.err.println("(ISA): " + isa); //} //else{ // System.err.println("NOT-ENTITY(" + entity + ")"); //} System.err.println("=================================================="); System.err.println("=================================================="); return isa + ":" + confidence + ":" + deteriorate; }