Example usage for java.util.regex MatchResult end

Introduction

In this page you can find the example usage for java.util.regex MatchResult end.

Prototype

public int end();

Source Link

Document

Returns the offset after the last character matched.

Usage

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

private int getTruePositiveTokenCount(Mutant mutant) {
    int count = 0;
    Pattern mask = Pattern.compile("X+");
    List<MatchResult> results = new ArrayList<>();
    Matcher matcher = mask.matcher(mutant.getDeidentifiedString());
    while (matcher.find()) {
        results.add(matcher.toMatchResult());
    }/*from w  ww .  j  a  v a 2  s .c o m*/
    for (MatchResult result : results) {
        StringTokenizer tokenizer = new StringTokenizer(
                mutant.getFinalText().substring(result.start(), result.end()));
        ArrayList<String> arHits = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            arHits.add(tokenizer.nextToken());
        }
        count = getHitCount(mutant, count, arHits);
    }
    return count;
}

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

private int getFalsePositiveTokenCount(Mutant mutant) {
    int count = 0;
    Pattern mask = Pattern.compile("X+");
    List<MatchResult> results = new ArrayList<>();
    Matcher matcher = mask.matcher(mutant.getDeidentifiedString());
    while (matcher.find()) {
        results.add(matcher.toMatchResult());
    }/*from   w  ww. j  av a2  s  .  c  o  m*/
    for (MatchResult result : results) {
        StringTokenizer tokenizer = new StringTokenizer(
                mutant.getFinalText().substring(result.start(), result.end()));
        ArrayList<String> arHits = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            arHits.add(tokenizer.nextToken());
        }
        for (String hit : arHits) {
            boolean isAnIdentifier = false;
            for (String token : mutant.getOutputTokens()) {
                if (hit.matches(Pattern.quote(token))) {
                    isAnIdentifier = true;
                }
            }
            if (!isAnIdentifier && !hit.equalsIgnoreCase("") && !hit.equalsIgnoreCase("-")) {
                count++;
            }
        }
    }
    return count;
}

From source file:org.springframework.social.twitter.api.impl.TweetDeserializer.java

private void extractTickerSymbolEntitiesFromText(String text, Entities entities) {
    Pattern pattern = Pattern.compile("\\$[A-Za-z]+");
    Matcher matcher = pattern.matcher(text);
    while (matcher.find()) {
        MatchResult matchResult = matcher.toMatchResult();
        String tickerSymbol = matchResult.group().substring(1);
        String url = "https://twitter.com/search?q=%24" + tickerSymbol + "&src=ctag";
        entities.getTickerSymbols().add(new TickerSymbolEntity(tickerSymbol, url,
                new int[] { matchResult.start(), matchResult.end() }));
    }//from  ww w  .  j  a v  a 2s . c o  m
}

From source file:net.osten.watermap.convert.AZTReport.java

private WaterReport parseDataLine(String line) {
    WaterReport result = new WaterReport();

    try {//from  ww  w  . j  a  v  a2s  .co  m
        // Example line:
        // 8.3 8.3 Tub Spring (aka Bathtub Spring) spring 3 full tub; good trickle3/28/15 3/28/15 Bird Food 4/5/15

        // Mileages = first two decimals
        MatchResult decimalsMatch = RegexUtils.matchFirstOccurance(line, decimalPattern);
        if (decimalsMatch == null) {
            log.fine("Mileages not found");
            return null;
        }
        int decimalsEnd = decimalsMatch.end();

        // Type = spring | creek | spring fed | windmill | store | dirt tank | pipe | Town | etc..
        MatchResult typeMatch = RegexUtils.matchFirstOccurance(line, typesPattern);
        if (typeMatch == null) {
            log.fine("Type not found");
            return null;
        }
        log.finer("type=" + typeMatch.group());
        int typeEnd = typeMatch.end();

        // Name = text from second decimal number to type (spring,creek,etc.)
        log.finer("decimalsEnd=" + decimalsEnd + " typeEnd=" + typeEnd);
        String name = line.substring(decimalsEnd, typeEnd);
        result.setName(name.trim());

        // Historic Reliability = int after Type (can be "1 to 2" or "0-2")
        MatchResult histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern3, typeEnd);
        if (histRelMatch == null) {
            histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern2, typeEnd);
            if (histRelMatch == null) {
                histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern1, typeEnd);
                if (histRelMatch == null) {
                    log.fine("Historical Reliability not found");
                    return null;
                }
            }
        }
        log.finer("histRel=" + histRelMatch.group());
        String historicReliability = mapHistoricReliability(histRelMatch.group().trim());
        int histRelEnd = histRelMatch.end();

        // Report Date = second date from right
        int reportDateEnd = -1;
        int reportDateStart = -1;
        List<MatchResult> dates = RegexUtils.matchOccurences(line, datePattern);
        if (dates.size() >= 2) {
            reportDateEnd = dates.get(dates.size() - 2).end();
            reportDateStart = dates.get(dates.size() - 2).start();
        } else {
            log.fine("Only found " + dates.size() + " dates");
            reportDateStart = Math.max(line.length() - 1, histRelEnd);
        }

        // Report = Historic Reliability to Report Date
        log.finer("histRelEnd=" + histRelEnd + " reportDateStart=" + reportDateStart);
        if (histRelEnd >= 0 && reportDateStart >= 0 && reportDateStart >= histRelEnd) {
            String report = line.substring(histRelEnd, reportDateStart);
            result.setDescription(report.trim() + "<br />Historical Reliability:" + historicReliability);
        } else {
            log.fine("cannot find historic reliability");
        }

        // Post Date = first date from right
        int postDateStart = -1;
        MatchResult postDate = RegexUtils.matchLastOccurence(line, datePattern);
        if (postDate == null) {
            log.fine("Post Date not found");
        } else {
            result.setLastReport(dateFormatter.parse(postDate.group()));
            postDateStart = postDate.start();
            log.finer("postDate=" + postDate.group());
        }

        // Reported By = text between Report Date and Post Date
        if (postDateStart >= 0 && reportDateEnd >= 0 && postDateStart > reportDateEnd) {
            String reportedBy = line.substring(reportDateEnd, postDateStart);
            log.finer("reportedBy=" + reportedBy);
        } else {
            log.finer("cannot find reportedBy");
        }

        result.setState(WaterStateParser.parseState(result.getDescription()));
        result.setSource(SOURCE_TITLE);
        result.setUrl(SOURCE_URL);
    } catch (

    ParseException e)

    {
        log.fine("ParseException:" + e.getLocalizedMessage());
    }

    return result;

}

From source file:jp.go.nict.langrid.wrapper.ws_1_2.translation.AbstractTranslationService.java

/**
 * /*  w  ww.  j  ava  2  s .  com*/
 * 
 */
public final String multistatementTranslate(String sourceLang, String targetLang, String source,
        String delimiterRegx)
        throws AccessLimitExceededException, InvalidParameterException, LanguagePairNotUniquelyDecidedException,
        NoAccessPermissionException, NoValidEndpointsException, ProcessFailedException, ServerBusyException,
        ServiceNotActiveException, ServiceNotFoundException, UnsupportedLanguagePairException {
    checkStartupException();
    if (StringUtils.isBlank(delimiterRegx)) {
        throw new InvalidParameterException("delimiterRegx", "is Blank.");
    }
    StringBuilder sb = new StringBuilder();
    Scanner s = new Scanner(source).useDelimiter(delimiterRegx);
    int i = 0;
    while (s.hasNext()) {
        String text = s.next();
        MatchResult m = s.match();
        if (i != m.start()) {
            String tag = source.substring(i, m.start());
            sb.append(tag);
        }
        i = m.end();
        sb.append(invokeDoTranslation(sourceLang, targetLang, text));
    }
    if (source.length() != i) {
        String tag = source.substring(i);
        sb.append(tag);
    }

    return sb.toString();
}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

/**
 * Checks whether a possible match is being vetoed by a non split match. A
 * possible match is vetoed if it any nay overlap with a veto region.
 *
 * @param split the match result representing the split to be tested
 * @param vetoRegions regions where matches are not allowed. For efficiency
 * reasons, this method assumes these regions to be non overlapping and sorted
 * in ascending order./*from   w  ww  .j a va 2s.  c om*/
 * All veto regions that end before the proposed match are also discarded
 * (again for efficiency reasons). This requires the proposed matches to be
 * sent to this method in ascending order, so as to avoid malfunctions.
 * @return <tt>true</tt> iff the proposed split should be ignored
 */
private boolean veto(MatchResult split, List<int[]> vetoRegions) {
    //if no more non splits available, accept everything
    for (Iterator<int[]> vetoRegIter = vetoRegions.iterator(); vetoRegIter.hasNext();) {
        int[] aVetoRegion = vetoRegIter.next();
        if (aVetoRegion[1] - 1 < split.start()) {
            //current veto region ends before the proposed split starts
            //--> discard the veto region
            vetoRegIter.remove();
        } else if (split.end() - 1 < aVetoRegion[0]) {
            //veto region starts after the split ends
            //-> we can return false
            return false;
        } else {
            //we have overlap
            return true;
        }
    }
    //if we got this far, all veto regions are before the split
    return false;
}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

@Override
public void execute() throws ExecutionException {
    interrupted = false;//from   w ww .j a  v  a 2  s. co  m
    int lastProgress = 0;
    fireProgressChanged(lastProgress);
    //get pointers to the annotation sets
    AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0)
            ? document.getAnnotations()
            : document.getAnnotations(outputASName);

    String docText = document.getContent().toString();

    /* If the document's content is empty or contains only whitespace,
     * we drop out right here, since there's nothing to sentence-split.     */
    if (docText.trim().length() < 1) {
        return;
    }

    Matcher internalSplitMatcher = internalSplitsPattern.matcher(docText);
    Matcher externalSplitMatcher = externalSplitsPattern.matcher(docText);

    Matcher nonSplitMatcher = nonSplitsPattern.matcher(docText);
    //store all non split locations in a list of pairs
    List<int[]> nonSplits = new LinkedList<int[]>();
    while (nonSplitMatcher.find()) {
        nonSplits.add(new int[] { nonSplitMatcher.start(), nonSplitMatcher.end() });
    }
    //this lists holds the next matches at each step
    List<MatchResult> nextSplitMatches = new ArrayList<MatchResult>();
    //initialise matching process
    MatchResult internalMatchResult = null;
    if (internalSplitMatcher.find()) {
        internalMatchResult = internalSplitMatcher.toMatchResult();
        nextSplitMatches.add(internalMatchResult);
    }
    MatchResult externalMatchResult = null;
    if (externalSplitMatcher.find()) {
        externalMatchResult = externalSplitMatcher.toMatchResult();
        nextSplitMatches.add(externalMatchResult);
    }
    MatchResultComparator comparator = new MatchResultComparator();
    int lastSentenceEnd = 0;

    while (!nextSplitMatches.isEmpty()) {
        //see which one matches first
        Collections.sort(nextSplitMatches, comparator);
        MatchResult nextMatch = nextSplitMatches.remove(0);
        if (nextMatch == internalMatchResult) {
            //we have a new internal split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //add the split annotation
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "internal");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    int endOffset = nextMatch.end();
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isWhitespace(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < nextMatch.start()) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = endOffset;
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (internalSplitMatcher.find()) {
                internalMatchResult = internalSplitMatcher.toMatchResult();
                nextSplitMatches.add(internalMatchResult);
            } else {
                internalMatchResult = null;
            }
        } else if (nextMatch == externalMatchResult) {
            //we have a new external split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //generate the split
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "external");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    //find the last non whitespace character, going backward from
                    //where the external skip starts
                    int endOffset = nextMatch.start();
                    while (endOffset > lastSentenceEnd
                            && Character.isSpaceChar(Character.codePointAt(docText, endOffset - 1))) {
                        endOffset--;
                    }
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isSpaceChar(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < endOffset) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = nextMatch.end();
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (externalSplitMatcher.find()) {
                externalMatchResult = externalSplitMatcher.toMatchResult();
                nextSplitMatches.add(externalMatchResult);
            } else {
                externalMatchResult = null;
            }
        } else {
            //malfunction
            throw new ExecutionException("Invalid state - cannot identify match!");
        }
        //report progress
        int newProgress = 100 * lastSentenceEnd / docText.length();
        if (newProgress - lastProgress > 20) {
            lastProgress = newProgress;
            fireProgressChanged(lastProgress);
        }
    } //while(!nextMatches.isEmpty()){
    fireProcessFinished();
}

From source file:magicware.scm.redmine.tools.IssueSyncApp.java

public void execute(SyncItem syncItem) throws IOException, InvalidFormatException {

    FileInputStream in = null;//w w w .  j a v a 2  s .c  om

    try {

        // ?JSON??
        String issueTemplate = FileUtils.readFileAsString(syncItem.getJsonTemplate());

        // ???
        Matcher m = Pattern.compile(Constants.ISSUE_FIELD_VALUE_EXP).matcher(issueTemplate);

        List<MatchResult> mrList = new ArrayList<MatchResult>();

        while (m.find()) {
            MatchResult mr = m.toMatchResult();
            mrList.add(mr);
        }

        // ????
        in = new FileInputStream(syncItem.getFilePath());
        Workbook wb = WorkbookFactory.create(in);

        FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();

        Sheet sheet = wb.getSheet(syncItem.getSheetName());
        Row row = null;
        Cell cell = null;

        List<String> issues = new ArrayList<String>();

        // ?????
        for (int i = sheet.getLastRowNum(); i >= (syncItem.getKeyRowBeginIdx() > 0
                ? (syncItem.getKeyRowBeginIdx() - 1)
                : 0); i--) {
            // ????
            row = sheet.getRow(i);

            if (row != null) {

                String keyNo = ExcelUtils.getCellContent(row.getCell(syncItem.getKeyColumnIdx() - 1),
                        evaluator);

                // ??????????
                if (StringUtils.isBlank(keyNo)) {
                    break;
                }

                // ????
                if (redmineClient.queryIssue(syncItem.getProjectId(), syncItem.getKeyFiledId(), keyNo) == 0) {
                    StringBuilder newIssue = new StringBuilder();
                    int eolIdx = 0;
                    for (MatchResult matchResult : mrList) {

                        newIssue.append(issueTemplate.substring(eolIdx, matchResult.start()));

                        int cellIndex = Integer.valueOf(matchResult.group(1)) - 1;
                        cell = row.getCell(cellIndex);
                        String cellvalue = ExcelUtils.getCellContent(cell, evaluator);

                        // ?
                        String valueMapStr = matchResult.group(3);
                        Map<String, String> valueMap = null;
                        if (valueMapStr != null) {
                            valueMap = JSON.decode(valueMapStr);
                            if (StringUtils.isNotEmpty(cellvalue) && valueMap.containsKey(cellvalue)) {
                                cellvalue = valueMap.get(cellvalue);
                            } else {
                                cellvalue = valueMap.get("default");
                            }
                        }

                        if (StringUtils.isNotEmpty(cellvalue)) {
                            cellvalue = StringEscapeUtils.escapeJavaScript(cellvalue);
                            newIssue.append(cellvalue);
                        }
                        eolIdx = matchResult.end();
                    }
                    newIssue.append(issueTemplate.substring(eolIdx));
                    issues.add(newIssue.toString());
                } else {
                    // ???
                    break;
                }
            }
        }

        for (int i = issues.size() - 1; i >= 0; i--) {
            Map<String, Issue> issueMap = JSON.decode(issues.get(i));
            log.debug("create new issue >>>");
            log.debug(JSON.encode(issueMap, true));
            redmineClient.createNewIssue(issues.get(i));
        }

    } finally {
        if (in != null) {
            in.close();
            in = null;
        }
    }
}

From source file:au.org.ala.biocache.dao.SearchDAOImpl.java

/**
 * Substitute with i18n properties/* w  w w  .j  ava  2 s  .  c  om*/
 *
 * @param displayText
 * @return
 */
public String formatDisplayStringWithI18n(String displayText) {

    if (StringUtils.trimToNull(displayText) == null)
        return displayText;
    try {
        String formatted = displayText;

        Matcher m = indexFieldPatternMatcher.matcher(displayText);
        int currentPos = 0;
        while (m.find(currentPos)) {
            String matchedIndexTerm = m.group(0).replaceAll(":", "");
            MatchResult mr = m.toMatchResult();
            //if the matched term represents a layer lookup the title in the layers service
            Matcher lm = layersPattern.matcher(matchedIndexTerm);
            String i18n = "";
            if (lm.matches()) {
                i18n = layersService.getName(matchedIndexTerm);
                if (i18n == null) {
                    i18n = matchedIndexTerm;
                }
            } else {
                i18n = messageSource.getMessage("facet." + matchedIndexTerm, null, matchedIndexTerm, null);
            }
            //System.out.println("i18n for " + matchedIndexTerm + " = " + i18n);
            if (!matchedIndexTerm.equals(i18n)) {

                int nextWhitespace = displayText.substring(mr.end()).indexOf(" ");
                String extractedValue = null;
                if (nextWhitespace > 0) {
                    extractedValue = displayText.substring(mr.end(), mr.end() + nextWhitespace);
                } else {
                    //reached the end of the query
                    extractedValue = displayText.substring(mr.end());
                }

                String formattedExtractedValue = SearchUtils.stripEscapedQuotes(extractedValue);

                String i18nForValue = messageSource.getMessage(matchedIndexTerm + "." + formattedExtractedValue,
                        null, "", null);
                if (i18nForValue.length() == 0)
                    i18nForValue = messageSource.getMessage(formattedExtractedValue, null, "", null);

                if (i18nForValue.length() > 0) {
                    formatted = formatted.replaceAll(matchedIndexTerm + ":" + extractedValue,
                            i18n + ":" + i18nForValue);
                } else {
                    //just replace the matched index term
                    formatted = formatted.replaceAll(matchedIndexTerm, i18n);
                }
            }
            currentPos = mr.end();
        }
        return formatted;

    } catch (Exception e) {
        logger.debug(e.getMessage(), e);
        return displayText;
    }
}