List of usage examples for java.util.regex Matcher end
public int end()
From source file:com.joliciel.talismane.filters.RegexMarkerFilter.java
@Override public Set<TextMarker> apply(String prevText, String text, String nextText) { if (LOG.isTraceEnabled()) { LOG.trace("Matching " + regex + ""); }// w w w. j a va 2 s . co m String context = prevText + text + nextText; int textStartPos = prevText.length(); int textEndPos = prevText.length() + text.length(); Matcher matcher = pattern.matcher(context); Set<TextMarker> textMarkers = new TreeSet<TextMarker>(); while (matcher.find()) { int matcherStart = 0; int matcherEnd = 0; if (groupIndex == 0) { matcherStart = matcher.start(); matcherEnd = matcher.end(); } else { matcherStart = matcher.start(groupIndex); matcherEnd = matcher.end(groupIndex); } String matchText = context.substring(matcher.start(), matcher.end()); if (LOG.isTraceEnabled()) { LOG.trace("Next match: " + matchText); if (matcher.start() != matcherStart || matcher.end() != matcherEnd) { LOG.trace("But matching group: " + context.substring(matcherStart, matcherEnd)); } LOG.trace("matcher.start()=" + matcher.start() + ", matcher.end()=" + matcher.end() + ", matcherStart=" + matcherStart + ", matcherEnd=" + matcherEnd + ", textStartPos=" + textStartPos + ", textEndPos=" + textEndPos); } if (matcherEnd - matcherStart > blockSize) { String errorString = "Match size (" + (matcherEnd - matcherStart) + ") bigger than block size (" + blockSize + "). " + "Increase blockSize or change filter. " + "Maybe you need to change a greedy quantifier (e.g. .*) to a reluctant quantifier (e.g. .*?)? " + "Regex: " + regex + ". Text: " + matchText; throw new TalismaneException(errorString); } if (matcherStart >= textStartPos && matcherStart < textEndPos) { if (LOG.isTraceEnabled()) { LOG.trace("Start in range: textStartPos " + textStartPos + ">= matcherStart [[" + matcherStart + "]] < textEndPos " + textEndPos); } for (MarkerFilterType filterType : filterTypes) { switch (filterType) { case SKIP: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.PUSH_SKIP, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } case SENTENCE_BREAK: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.SENTENCE_BREAK, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } case SPACE: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.SPACE, matcherStart - prevText.length(), this, matchText); textMarker.setInsertionText(" "); textMarkers.add(textMarker); TextMarker textMarker2 = this.getFilterService().getTextMarker(TextMarkerType.PUSH_SKIP, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker2); break; } case REPLACE: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.INSERT, matcherStart - prevText.length(), this, matchText); String newText = RegexUtils.getReplacement(replacement, context, matcher); if (LOG.isTraceEnabled()) { LOG.trace("Setting replacement to: " + newText); } textMarker.setInsertionText(newText); textMarkers.add(textMarker); TextMarker textMarker2 = this.getFilterService().getTextMarker(TextMarkerType.PUSH_SKIP, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker2); break; } case OUTPUT: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.PUSH_OUTPUT, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker); TextMarker textMarker2 = this.getFilterService().getTextMarker(TextMarkerType.PUSH_SKIP, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker2); break; } case INCLUDE: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.PUSH_INCLUDE, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } case OUTPUT_START: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.START_OUTPUT, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } case STOP: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.STOP, matcherStart - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } } } } if (matcherEnd >= textStartPos && matcherEnd < textEndPos) { if (LOG.isTraceEnabled()) { LOG.trace("End in range: textStartPos " + textStartPos + ">= matcherEnd [[" + matcherEnd + "]] < textEndPos " + textEndPos); } for (MarkerFilterType filterType : filterTypes) { switch (filterType) { case SKIP: case SPACE: case REPLACE: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.POP_SKIP, matcherEnd - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } case OUTPUT: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.POP_OUTPUT, matcherEnd - prevText.length(), this, matchText); textMarkers.add(textMarker); TextMarker textMarker2 = this.getFilterService().getTextMarker(TextMarkerType.POP_SKIP, matcherEnd - prevText.length(), this, matchText); textMarkers.add(textMarker2); break; } case INCLUDE: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.POP_INCLUDE, matcherEnd - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } case START: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.START, matcherEnd - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } case OUTPUT_STOP: { TextMarker textMarker = this.getFilterService().getTextMarker(TextMarkerType.STOP_OUTPUT, matcherEnd - prevText.length(), this, matchText); textMarkers.add(textMarker); break; } } } } } // next match if (textMarkers.size() > 0) LOG.debug("Added markers: " + textMarkers); return textMarkers; }
From source file:android.databinding.compilationTest.BaseCompilationTest.java
protected void copyResourceTo(String name, File targetFile, Map<String, String> replacements) throws IOException { if (replacements.isEmpty()) { copyResourceTo(name, targetFile); }//from w w w . ja va 2s .c o m InputStream inputStream = getClass().getResourceAsStream(name); final String contents = IOUtils.toString(inputStream); IOUtils.closeQuietly(inputStream); StringBuilder out = new StringBuilder(contents.length()); final Matcher matcher = VARIABLES.matcher(contents); int location = 0; while (matcher.find()) { int start = matcher.start(); if (start > location) { out.append(contents, location, start); } final String key = matcher.group(1); final String replacement = replacements.get(key); if (replacement != null) { out.append(replacement); } location = matcher.end(); } if (location < contents.length()) { out.append(contents, location, contents.length()); } FileUtils.writeStringToFile(targetFile, out.toString()); }
From source file:com.haulmont.yarg.loaders.impl.AbstractDbDataLoader.java
protected QueryPack prepareQuery(String query, BandData parentBand, Map<String, Object> reportParams) { Map<String, Object> currentParams = new HashMap<String, Object>(); if (reportParams != null) { currentParams.putAll(reportParams); }/* w w w .j av a 2 s. c o m*/ //adds parameters from parent bands hierarchy while (parentBand != null) { addParentBandDataToParameters(parentBand, currentParams); parentBand = parentBand.getParentBand(); } List<QueryParameter> queryParameters = new ArrayList<QueryParameter>(); HashSet<String> paramNames = findParameterNames(query); Map<String, String> paramsToRemoveFromQuery = new LinkedHashMap<String, String>(); for (String paramName : paramNames) { Object paramValue = currentParams.get(paramName); String alias = "${" + paramName + "}"; String paramNameRegexp = "\\$\\{" + paramName + "\\}"; String valueRegexp = "([\\w|\\d|\\.|\\_]+|\'.+?\'|\".+?\"|\\(.+?\\))";//fieldName|literal|list or sub-query String andRegexp = "\\s+and\\s+"; String orRegexp = "\\s+or\\s+"; String operatorRegexp = "(=|>=|<=|\\slike\\s|>|<|\\sin\\s)"; String expression1Rgxp = "\\s*" + valueRegexp + "\\s*" + operatorRegexp + "\\s*" + paramNameRegexp + "\\s*"; String expression2Rgxp = "\\s*" + paramNameRegexp + "\\s*" + operatorRegexp + "\\s*" + valueRegexp + "\\s*"; String expressionRgxp = "(" + expression1Rgxp + "|" + expression2Rgxp + ")"; String andFirstRgxp = andRegexp + expressionRgxp; String orFirstRgxp = orRegexp + expressionRgxp; String andLastRgxp = expressionRgxp + andRegexp; String orLastRgxp = expressionRgxp + orRegexp; String isNullRgxp = paramNameRegexp + "\\s+is\\s+null"; String isNotNullRgxp = paramNameRegexp + "\\s+is\\s+not\\s+null"; String boundsRegexp = "\\[\\[.+?" + paramNameRegexp + ".+?\\]\\]"; if (paramValue == null && reportParams != null && reportParams.containsKey(paramName)) {//if value == null && this is user parameter - remove condition from query paramsToRemoveFromQuery.put("(?i)" + andFirstRgxp, " and 1=1 "); paramsToRemoveFromQuery.put("(?i)" + andLastRgxp, " 1=1 and "); paramsToRemoveFromQuery.put("(?i)" + orFirstRgxp, " or 1=0 "); paramsToRemoveFromQuery.put("(?i)" + orLastRgxp, " 1=0 or "); paramsToRemoveFromQuery.put("(?i)" + expressionRgxp, " 1=1 "); paramsToRemoveFromQuery.put("(?i)" + isNullRgxp, " 1=1 "); paramsToRemoveFromQuery.put("(?i)" + isNotNullRgxp, " 1=0 "); paramsToRemoveFromQuery.put("(?i)" + boundsRegexp, " "); } else if (query.contains(alias)) {//otherwise - create parameter and save each entry's position Pattern pattern = Pattern.compile(paramNameRegexp); Matcher replaceMatcher = pattern.matcher(query); int subPosition = 0; while (replaceMatcher.find(subPosition)) { subPosition = replaceMatcher.start(); queryParameters .add(new QueryParameter(paramNameRegexp, subPosition, convertParameter(paramValue))); subPosition = replaceMatcher.end(); } } } for (Map.Entry<String, String> entry : paramsToRemoveFromQuery.entrySet()) { query = query.replaceAll(entry.getKey(), entry.getValue()); } query = query.replaceAll("\\[\\[", ""); query = query.replaceAll("\\]\\]", ""); // Sort params by position Collections.sort(queryParameters, new Comparator<QueryParameter>() { @Override public int compare(QueryParameter o1, QueryParameter o2) { return o1.getPosition().compareTo(o2.getPosition()); } }); //normalize params position to 1..n for (int i = 1; i <= queryParameters.size(); i++) { QueryParameter queryParameter = queryParameters.get(i - 1); queryParameter.setPosition(i); } for (QueryParameter parameter : queryParameters) { query = insertParameterToQuery(query, parameter); } return new QueryPack(query.trim().replaceAll(" +", " "), queryParameters.toArray(new QueryParameter[queryParameters.size()])); }
From source file:qhindex.controller.SearchAuthorWorksController.java
private String resolvePublisher(String urlCitationWork, String publisherNameIncomplete) throws IOException { String publisher = publisherNameIncomplete; if (urlCitationWork.contains(".pdf") == false) { // Get the header and determine if the resource is in text format (html or plain) // to be able to extract the publisher name final RequestConfig requestConfig = RequestConfig.custom() .setConnectTimeout(AppHelper.connectionTimeOut) .setConnectionRequestTimeout(AppHelper.connectionTimeOut) .setSocketTimeout(AppHelper.connectionTimeOut).setStaleConnectionCheckEnabled(true).build(); final CloseableHttpClient httpclient = HttpClients.custom().setDefaultRequestConfig(requestConfig) .build();//w ww . j av a 2s . com HttpHead httpHead = new HttpHead(urlCitationWork); try { CloseableHttpResponse responseHead = httpclient.execute(httpHead); StatusLine statusLineHead = responseHead.getStatusLine(); responseHead.close(); String contentType = responseHead.getFirstHeader("Content-Type").toString().toLowerCase(); if (statusLineHead.getStatusCode() < 300 && contentType.contains("text/html") || contentType.contains("text/plain")) { HttpGet httpGet = new HttpGet(urlCitationWork); CloseableHttpResponse responsePost = httpclient.execute(httpGet); StatusLine statusLine = responsePost.getStatusLine(); if (statusLine.getStatusCode() < 300) { //AppHelper.waitBeforeNewRequest(); BufferedReader br = new BufferedReader( new InputStreamReader((responsePost.getEntity().getContent()))); String content = new String(); String line; while ((line = br.readLine()) != null) { content += line; } int bodyStartIndex = content.indexOf("<body"); if (bodyStartIndex < 0) bodyStartIndex = 0; try { publisherNameIncomplete = formatRegExSpecialCharsInString(publisherNameIncomplete); Pattern pattern = Pattern.compile(publisherNameIncomplete + "(\\w|\\d|-|\\s)+"); Matcher matcher = pattern.matcher(content); if (matcher.find(bodyStartIndex)) { publisher = content.substring(matcher.start(), matcher.end()); } else { publisher = publisherNameIncomplete; } } catch (Exception ex) { Debug.print( "Exception while resolving publisher for citing work - extrating pattern from citation web resource: " + ex.toString()); resultsMsg += "Exception while resolving publisher for citing work - extrating pattern from citation web resource.\n"; } } responsePost.close(); } } catch (IOException ioEx) { Debug.print("Exception while resolving publisher for citing work: " + ioEx.toString()); resultsMsg += "Exception while resolving publisher for citing work.\n"; } } publisher = publisher.trim(); return publisher; }
From source file:de.csw.linkgenerator.CSWLinksetRenderer.java
public String renderLinks(String text) { Matcher linksetMatcher = linksetPattern.matcher(text); StringBuilder newText = new StringBuilder(); int oldEnd = 0; while (linksetMatcher.find()) { String linkset = linksetMatcher.group(); // extract the content of the text nodes StringBuilder textContent = new StringBuilder(); Matcher textContentMatcher = textContentPattern.matcher(linkset); while (textContentMatcher.find()) { textContent.append(textContentMatcher.group(1)); }/* w w w . j a va 2 s . co m*/ int start = linksetMatcher.start(); int end = linksetMatcher.end(); newText.append(text.substring(oldEnd, start)); newText.append("<a href=\"#\" onclick=\"showPopup(this, new Array("); Matcher linkMatcher = linkPattern.matcher(linkset); while (linkMatcher.find()) { String page = linkMatcher.group(1); newText.append('\''); newText.append(page); newText.append("',"); } newText.setCharAt(newText.length() - 1, ')'); newText.append("); return false;\">"); newText.append(textContent); newText.append("</a>"); oldEnd = end; } // append rest of text newText.append(text.substring(oldEnd, text.length())); // end == 0 means that there are no csw:linkset elements, thus we do not need to include the popup html and javascript if (oldEnd != 0) { newText.append('\n'); newText.append(cswLinkPopupHTML); } return newText.toString(); }
From source file:com.cenrise.test.azkaban.PropsUtils.java
private static String resolveVariableReplacement(final String value, final Props props, final LinkedHashSet<String> visitedVariables) { final StringBuffer buffer = new StringBuffer(); int startIndex = 0; final Matcher matcher = VARIABLE_REPLACEMENT_PATTERN.matcher(value); while (matcher.find(startIndex)) { if (startIndex < matcher.start()) { // Copy everything up front to the buffer buffer.append(value.substring(startIndex, matcher.start())); }//from ww w. j a v a 2s.c o m final String subVariable = matcher.group(1); // Detected a cycle if (visitedVariables.contains(subVariable)) { throw new IllegalArgumentException( String.format("Circular variable substitution found: [%s] -> [%s]", StringUtils.join(visitedVariables, "->"), subVariable)); } else { // Add substitute variable and recurse. final String replacement = props.get(subVariable); visitedVariables.add(subVariable); if (replacement == null) { throw new UndefinedPropertyException( String.format("Could not find variable substitution for variable(s) [%s]", StringUtils.join(visitedVariables, "->"))); } buffer.append(resolveVariableReplacement(replacement, props, visitedVariables)); visitedVariables.remove(subVariable); } startIndex = matcher.end(); } if (startIndex < value.length()) { buffer.append(value.substring(startIndex)); } return buffer.toString(); }
From source file:gate.creole.kea.CorpusImporter.java
protected boolean annotateKeyPhrases(Document document, String annSetName, String keyphraseAnnotationType, List<String> phrases) throws Exception { if (phrases == null || phrases.isEmpty()) return false; //create a pattern String patternStr = ""; Iterator<String> phraseIter = phrases.iterator(); while (phraseIter.hasNext()) { String phrase = phraseIter.next(); patternStr += patternStr.length() == 0 ? "\\Q" + phrase + "\\E" : "|\\Q" + phrase + "\\E"; }//from ww w . j a v a 2 s . co m Pattern pattern = Pattern.compile(patternStr, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); Matcher matcher = pattern.matcher(document.getContent().toString()); AnnotationSet outputSet = annSetName == null || annSetName.length() == 0 ? document.getAnnotations() : document.getAnnotations(annSetName); boolean result = false; while (matcher.find()) { int start = matcher.start(); int end = matcher.end(); outputSet.add(new Long(start), new Long(end), keyphraseAnnotationType, Factory.newFeatureMap()); result = true; } document.getFeatures().put("Author assigned keyphrases", phrases); return result; }
From source file:cn.dockerfoundry.ide.eclipse.dockerfile.validator.DockerfileDelegatingValidator.java
@SuppressWarnings("unchecked") public Map<DockerfileValidationLevel, List<DockerfileValidationResult>> validate() { Map<DockerfileValidationLevel, List<DockerfileValidationResult>> result = new HashMap<DockerfileValidationLevel, List<DockerfileValidationResult>>(); if (this.dockerfileInputStream == null) return result; ValidatorUtils validatorUtils = new ValidatorUtils(); boolean fromCheck = false; int currentLine = 0; List<DockerfileValidationResult> errors = new ArrayList<DockerfileValidationResult>(); List<DockerfileValidationResult> warnings = new ArrayList<DockerfileValidationResult>(); List<DockerfileValidationResult> infos = new ArrayList<DockerfileValidationResult>(); Map<String, Object> ruleObject = validatorUtils .getRules(DockerfileDelegatingValidator.class.getResourceAsStream("default.yaml")); List<Map<String, Object>> requiredInstructions = validatorUtils.createReqInstructionHash(ruleObject); Map<String, Object> general = (Map<String, Object>) ruleObject.get("general"); List<String> valid_instructions = (List<String>) general.get("valid_instructions"); Pattern validInstructionsRegex = validatorUtils.createValidCommandRegex(valid_instructions); Pattern continuationRegex = null; // Pattern ignoreRegex = null; Object multiline_regex = general.get("multiline_regex"); if (multiline_regex != null && multiline_regex.toString().length() > 2) { String _multiline_regex = multiline_regex.toString().substring(1, multiline_regex.toString().length() - 1); continuationRegex = Pattern.compile(_multiline_regex, Pattern.CASE_INSENSITIVE); }//from www.j ava 2s . c o m Object ignore_regex = general.get("ignore_regex"); if (ignore_regex != null && ignore_regex.toString().length() > 2) { String _ignore_regex = ignore_regex.toString().substring(1, ignore_regex.toString().length() - 1); Pattern ignoreRegex = Pattern.compile(_ignore_regex, Pattern.CASE_INSENSITIVE); System.out.println("ignore_regex is not used for now: " + ignoreRegex.pattern()); } try { String dockerfile = IOUtils.toString(dockerfileInputStream); String[] linesArr = dockerfile.split("(\\r|\\n)"); if (linesArr != null && linesArr.length > 0) { for (int i = 0; i < linesArr.length; i++) { currentLine++; String line = linesArr[i]; int lineOffSet = 0; if (line == null || line.length() == 0 || line.charAt(0) == '#') { continue; } while (validatorUtils.isPartialLine(line, continuationRegex)) { line = continuationRegex.matcher(line).replaceAll(" "); if (linesArr[currentLine + lineOffSet].charAt(0) == '#') { linesArr[currentLine + lineOffSet] = null; line = line + "\\"; } else { line = line + linesArr[currentLine + lineOffSet]; linesArr[currentLine + lineOffSet] = null; } lineOffSet++; } // First instruction must be FROM if (!fromCheck) { fromCheck = true; if (line.toUpperCase().indexOf("FROM") != 0) { DockerfileValidationResult error = new DockerfileValidationResult(); error.setLine(currentLine); error.setLevel(DockerfileValidationLevel.ERROR); error.setMessage("Missing or misplaced FROM"); error.setLineContent(line); errors.add(error); } } // end for FROM Matcher matcher = validInstructionsRegex.matcher(line); if (!matcher.find()) { DockerfileValidationResult error = new DockerfileValidationResult(); error.setLine(currentLine); error.setLevel(DockerfileValidationLevel.ERROR); error.setMessage("Invalid instruction"); error.setLineContent(line); errors.add(error); } else { String instruction = line.substring(matcher.start(), matcher.end()).trim(); String params = matcher.replaceAll(""); validatorUtils.checkLineRules(ruleObject, instruction, params, line, currentLine, errors, warnings, infos); requiredInstructions.remove(instruction); } // end for valid instructions checking } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } validatorUtils.checkRequiredInstructions(requiredInstructions, errors, warnings, infos); result.put(DockerfileValidationLevel.ERROR, errors); result.put(DockerfileValidationLevel.WARNING, warnings); result.put(DockerfileValidationLevel.INFO, infos); return result; }
From source file:jp.go.nict.langrid.wrapper.ws_1_2.translation.AbstractTranslationService.java
private String[] divideSource(String source, int maxSourceLength) { ArrayList<String> resultArray = new ArrayList<String>(); Pattern p = Pattern.compile("(\\Q*$%*\\E|\\Q*%$*\\E)"); Matcher m = p.matcher(source); int index = 0; int end = 0;// ww w . j av a 2 s . c o m while (index + maxSourceLength < source.length()) { Matcher region = m.region(index, index + maxSourceLength); while (region.find()) { end = m.end(); } if (end > index) { resultArray.add(source.substring(index, end + 1)); index = end + 2; end = index; } else { break; } } if (index < source.length()) { resultArray.add(source.substring(index)); } return resultArray.toArray(new String[] {}); }