Example usage for java.util.regex MatchResult start

Introduction

In this page you can find the example usage for java.util.regex MatchResult start.

Prototype

public int start();

Source Link

Document

Returns the start index of the match.

Usage

From source file:net.solarnetwork.util.StringMerger.java

/**
 * Merge from a String source into a StringBuilder.
 * /*from w w  w.  j  a  v a  2 s.c o m*/
 * @param src
 *        the source String to substitute into
 * @param data
 *        the data object to substitute with
 * @param nullValue
 *        the value to substitute for null data
 * @param buf
 *        the StringBuilder to append the output to
 */
public static void mergeString(String src, Object data, String nullValue, StringBuilder buf) {
    Matcher matcher = MERGE_VAR_PAT.matcher(src);

    //MatchResult[] matches = MERGE_VAR_PAT.matcher(src);
    //REMatch[] matches = MERGE_VAR_RE.getAllMatches(src);
    if (!matcher.find()) {
        buf.append(src);
    } else {
        int endLastMatchIdx = 0;
        do {
            MatchResult matchResult = matcher.toMatchResult();

            // append everything from the end of the last
            // match to the start of this match
            buf.append(src.substring(endLastMatchIdx, matchResult.start()));

            // perform substitution here...
            if (data != null) {
                int s = matchResult.start(1);
                int e = matchResult.end(1);
                if ((s > -1) && (e > -1)) {
                    String varName = src.substring(s, e);
                    if (data instanceof java.util.Map<?, ?>) {
                        Object o = null;
                        int sepIdx = varName.indexOf('.');
                        if (sepIdx > 0) {
                            String varName2 = varName.substring(sepIdx + 1);
                            varName = varName.substring(0, sepIdx);
                            o = ((Map<?, ?>) data).get(varName);
                            if (o != null) {
                                try {
                                    o = PropertyUtils.getProperty(o, varName2);
                                } catch (Exception e2) {
                                    LOG.warn("Exception getting property '" + varName2 + "' out of "
                                            + o.getClass() + ": " + e2);
                                }
                            }
                        } else {
                            // simply check for key
                            o = ((Map<?, ?>) data).get(varName);
                        }
                        if (o == null || (String.class.isAssignableFrom(o.getClass())
                                && !StringUtils.hasText(o.toString()))) {
                            buf.append(nullValue);
                        } else {
                            buf.append(o);
                        }
                    } else {
                        // use reflection to get a bean property
                        try {
                            Object o = PropertyUtils.getProperty(data, varName);
                            if (o == null || (String.class.isAssignableFrom(o.getClass())
                                    && !StringUtils.hasText(o.toString()))) {
                                buf.append(nullValue);
                            } else {
                                buf.append(o);
                            }
                        } catch (Exception ex) {
                            LOG.warn("Exception getting property '" + varName + "' out of " + data.getClass()
                                    + ": " + ex);
                            buf.append(nullValue);
                        }
                    }
                }
                endLastMatchIdx = matchResult.end();
            }
        } while (matcher.find());

        if (endLastMatchIdx < src.length()) {
            buf.append(src.substring(endLastMatchIdx));
        }
    }
}

From source file:magicware.scm.redmine.tools.IssueSyncApp.java

public void execute(SyncItem syncItem) throws IOException, InvalidFormatException {

    FileInputStream in = null;//from  w  w w. j  ava2 s. c  om

    try {

        // ?JSON??
        String issueTemplate = FileUtils.readFileAsString(syncItem.getJsonTemplate());

        // ???
        Matcher m = Pattern.compile(Constants.ISSUE_FIELD_VALUE_EXP).matcher(issueTemplate);

        List<MatchResult> mrList = new ArrayList<MatchResult>();

        while (m.find()) {
            MatchResult mr = m.toMatchResult();
            mrList.add(mr);
        }

        // ????
        in = new FileInputStream(syncItem.getFilePath());
        Workbook wb = WorkbookFactory.create(in);

        FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();

        Sheet sheet = wb.getSheet(syncItem.getSheetName());
        Row row = null;
        Cell cell = null;

        List<String> issues = new ArrayList<String>();

        // ?????
        for (int i = sheet.getLastRowNum(); i >= (syncItem.getKeyRowBeginIdx() > 0
                ? (syncItem.getKeyRowBeginIdx() - 1)
                : 0); i--) {
            // ????
            row = sheet.getRow(i);

            if (row != null) {

                String keyNo = ExcelUtils.getCellContent(row.getCell(syncItem.getKeyColumnIdx() - 1),
                        evaluator);

                // ??????????
                if (StringUtils.isBlank(keyNo)) {
                    break;
                }

                // ????
                if (redmineClient.queryIssue(syncItem.getProjectId(), syncItem.getKeyFiledId(), keyNo) == 0) {
                    StringBuilder newIssue = new StringBuilder();
                    int eolIdx = 0;
                    for (MatchResult matchResult : mrList) {

                        newIssue.append(issueTemplate.substring(eolIdx, matchResult.start()));

                        int cellIndex = Integer.valueOf(matchResult.group(1)) - 1;
                        cell = row.getCell(cellIndex);
                        String cellvalue = ExcelUtils.getCellContent(cell, evaluator);

                        // ?
                        String valueMapStr = matchResult.group(3);
                        Map<String, String> valueMap = null;
                        if (valueMapStr != null) {
                            valueMap = JSON.decode(valueMapStr);
                            if (StringUtils.isNotEmpty(cellvalue) && valueMap.containsKey(cellvalue)) {
                                cellvalue = valueMap.get(cellvalue);
                            } else {
                                cellvalue = valueMap.get("default");
                            }
                        }

                        if (StringUtils.isNotEmpty(cellvalue)) {
                            cellvalue = StringEscapeUtils.escapeJavaScript(cellvalue);
                            newIssue.append(cellvalue);
                        }
                        eolIdx = matchResult.end();
                    }
                    newIssue.append(issueTemplate.substring(eolIdx));
                    issues.add(newIssue.toString());
                } else {
                    // ???
                    break;
                }
            }
        }

        for (int i = issues.size() - 1; i >= 0; i--) {
            Map<String, Issue> issueMap = JSON.decode(issues.get(i));
            log.debug("create new issue >>>");
            log.debug(JSON.encode(issueMap, true));
            redmineClient.createNewIssue(issues.get(i));
        }

    } finally {
        if (in != null) {
            in.close();
            in = null;
        }
    }
}

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

private int getTruePositiveTokenCount(Mutant mutant) {
    int count = 0;
    Pattern mask = Pattern.compile("X+");
    List<MatchResult> results = new ArrayList<>();
    Matcher matcher = mask.matcher(mutant.getDeidentifiedString());
    while (matcher.find()) {
        results.add(matcher.toMatchResult());
    }/*from  w  w w. j a  va  2 s .  c  o m*/
    for (MatchResult result : results) {
        StringTokenizer tokenizer = new StringTokenizer(
                mutant.getFinalText().substring(result.start(), result.end()));
        ArrayList<String> arHits = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            arHits.add(tokenizer.nextToken());
        }
        count = getHitCount(mutant, count, arHits);
    }
    return count;
}

From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java

private int getFalsePositiveTokenCount(Mutant mutant) {
    int count = 0;
    Pattern mask = Pattern.compile("X+");
    List<MatchResult> results = new ArrayList<>();
    Matcher matcher = mask.matcher(mutant.getDeidentifiedString());
    while (matcher.find()) {
        results.add(matcher.toMatchResult());
    }//from w  ww  .ja  v a2s.  c o  m
    for (MatchResult result : results) {
        StringTokenizer tokenizer = new StringTokenizer(
                mutant.getFinalText().substring(result.start(), result.end()));
        ArrayList<String> arHits = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            arHits.add(tokenizer.nextToken());
        }
        for (String hit : arHits) {
            boolean isAnIdentifier = false;
            for (String token : mutant.getOutputTokens()) {
                if (hit.matches(Pattern.quote(token))) {
                    isAnIdentifier = true;
                }
            }
            if (!isAnIdentifier && !hit.equalsIgnoreCase("") && !hit.equalsIgnoreCase("-")) {
                count++;
            }
        }
    }
    return count;
}

From source file:org.springframework.social.twitter.api.impl.TweetDeserializer.java

private void extractTickerSymbolEntitiesFromText(String text, Entities entities) {
    Pattern pattern = Pattern.compile("\\$[A-Za-z]+");
    Matcher matcher = pattern.matcher(text);
    while (matcher.find()) {
        MatchResult matchResult = matcher.toMatchResult();
        String tickerSymbol = matchResult.group().substring(1);
        String url = "https://twitter.com/search?q=%24" + tickerSymbol + "&src=ctag";
        entities.getTickerSymbols().add(new TickerSymbolEntity(tickerSymbol, url,
                new int[] { matchResult.start(), matchResult.end() }));
    }//from  w  w w. j ava  2  s .c  om
}

From source file:jp.go.nict.langrid.wrapper.ws_1_2.translation.AbstractTranslationService.java

/**
 * //from w w w. ja  va 2 s  . co  m
 * 
 */
public final String multistatementTranslate(String sourceLang, String targetLang, String source,
        String delimiterRegx)
        throws AccessLimitExceededException, InvalidParameterException, LanguagePairNotUniquelyDecidedException,
        NoAccessPermissionException, NoValidEndpointsException, ProcessFailedException, ServerBusyException,
        ServiceNotActiveException, ServiceNotFoundException, UnsupportedLanguagePairException {
    checkStartupException();
    if (StringUtils.isBlank(delimiterRegx)) {
        throw new InvalidParameterException("delimiterRegx", "is Blank.");
    }
    StringBuilder sb = new StringBuilder();
    Scanner s = new Scanner(source).useDelimiter(delimiterRegx);
    int i = 0;
    while (s.hasNext()) {
        String text = s.next();
        MatchResult m = s.match();
        if (i != m.start()) {
            String tag = source.substring(i, m.start());
            sb.append(tag);
        }
        i = m.end();
        sb.append(invokeDoTranslation(sourceLang, targetLang, text));
    }
    if (source.length() != i) {
        String tag = source.substring(i);
        sb.append(tag);
    }

    return sb.toString();
}

From source file:net.osten.watermap.convert.AZTReport.java

private WaterReport parseDataLine(String line) {
    WaterReport result = new WaterReport();

    try {//from   w w  w.  j a  va  2s . co m
        // Example line:
        // 8.3 8.3 Tub Spring (aka Bathtub Spring) spring 3 full tub; good trickle3/28/15 3/28/15 Bird Food 4/5/15

        // Mileages = first two decimals
        MatchResult decimalsMatch = RegexUtils.matchFirstOccurance(line, decimalPattern);
        if (decimalsMatch == null) {
            log.fine("Mileages not found");
            return null;
        }
        int decimalsEnd = decimalsMatch.end();

        // Type = spring | creek | spring fed | windmill | store | dirt tank | pipe | Town | etc..
        MatchResult typeMatch = RegexUtils.matchFirstOccurance(line, typesPattern);
        if (typeMatch == null) {
            log.fine("Type not found");
            return null;
        }
        log.finer("type=" + typeMatch.group());
        int typeEnd = typeMatch.end();

        // Name = text from second decimal number to type (spring,creek,etc.)
        log.finer("decimalsEnd=" + decimalsEnd + " typeEnd=" + typeEnd);
        String name = line.substring(decimalsEnd, typeEnd);
        result.setName(name.trim());

        // Historic Reliability = int after Type (can be "1 to 2" or "0-2")
        MatchResult histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern3, typeEnd);
        if (histRelMatch == null) {
            histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern2, typeEnd);
            if (histRelMatch == null) {
                histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern1, typeEnd);
                if (histRelMatch == null) {
                    log.fine("Historical Reliability not found");
                    return null;
                }
            }
        }
        log.finer("histRel=" + histRelMatch.group());
        String historicReliability = mapHistoricReliability(histRelMatch.group().trim());
        int histRelEnd = histRelMatch.end();

        // Report Date = second date from right
        int reportDateEnd = -1;
        int reportDateStart = -1;
        List<MatchResult> dates = RegexUtils.matchOccurences(line, datePattern);
        if (dates.size() >= 2) {
            reportDateEnd = dates.get(dates.size() - 2).end();
            reportDateStart = dates.get(dates.size() - 2).start();
        } else {
            log.fine("Only found " + dates.size() + " dates");
            reportDateStart = Math.max(line.length() - 1, histRelEnd);
        }

        // Report = Historic Reliability to Report Date
        log.finer("histRelEnd=" + histRelEnd + " reportDateStart=" + reportDateStart);
        if (histRelEnd >= 0 && reportDateStart >= 0 && reportDateStart >= histRelEnd) {
            String report = line.substring(histRelEnd, reportDateStart);
            result.setDescription(report.trim() + "<br />Historical Reliability:" + historicReliability);
        } else {
            log.fine("cannot find historic reliability");
        }

        // Post Date = first date from right
        int postDateStart = -1;
        MatchResult postDate = RegexUtils.matchLastOccurence(line, datePattern);
        if (postDate == null) {
            log.fine("Post Date not found");
        } else {
            result.setLastReport(dateFormatter.parse(postDate.group()));
            postDateStart = postDate.start();
            log.finer("postDate=" + postDate.group());
        }

        // Reported By = text between Report Date and Post Date
        if (postDateStart >= 0 && reportDateEnd >= 0 && postDateStart > reportDateEnd) {
            String reportedBy = line.substring(reportDateEnd, postDateStart);
            log.finer("reportedBy=" + reportedBy);
        } else {
            log.finer("cannot find reportedBy");
        }

        result.setState(WaterStateParser.parseState(result.getDescription()));
        result.setSource(SOURCE_TITLE);
        result.setUrl(SOURCE_URL);
    } catch (

    ParseException e)

    {
        log.fine("ParseException:" + e.getLocalizedMessage());
    }

    return result;

}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

/**
 * Checks whether a possible match is being vetoed by a non split match. A
 * possible match is vetoed if it any nay overlap with a veto region.
 *
 * @param split the match result representing the split to be tested
 * @param vetoRegions regions where matches are not allowed. For efficiency
 * reasons, this method assumes these regions to be non overlapping and sorted
 * in ascending order.//from   w w  w  .j  a v  a  2  s  .  c  om
 * All veto regions that end before the proposed match are also discarded
 * (again for efficiency reasons). This requires the proposed matches to be
 * sent to this method in ascending order, so as to avoid malfunctions.
 * @return <tt>true</tt> iff the proposed split should be ignored
 */
private boolean veto(MatchResult split, List<int[]> vetoRegions) {
    //if no more non splits available, accept everything
    for (Iterator<int[]> vetoRegIter = vetoRegions.iterator(); vetoRegIter.hasNext();) {
        int[] aVetoRegion = vetoRegIter.next();
        if (aVetoRegion[1] - 1 < split.start()) {
            //current veto region ends before the proposed split starts
            //--> discard the veto region
            vetoRegIter.remove();
        } else if (split.end() - 1 < aVetoRegion[0]) {
            //veto region starts after the split ends
            //-> we can return false
            return false;
        } else {
            //we have overlap
            return true;
        }
    }
    //if we got this far, all veto regions are before the split
    return false;
}

From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java

protected void parseInfoBox(String strText, Metadata metadata, ContentHandler handler) throws SAXException {

    // att-value paare mit | getrennt. Innerhalb eines values gibt es auch Zeilenumbrche (mit '<br />') - dies gilt als Aufzhlung
    // |Single1 |Datum1 , Besetzung1a Besetzung1b, Sonstiges1Titel |Sonstiges1Inhalt , Coverversion3 |Jahr3
    // | 1Option = 3
    // | 1Option Name = Demos
    // | 1Option Link = Demos
    // | 1Option Color =

    // als erstes schneiden wir mal die Infobox raus. (?m) ist multiline und (?s) ist dotall ('.' matcht auch line breaks)
    int iStartInfoBox = -1;
    int iEndInfoBox = -1;
    MatchResult infoMatch = StringUtils.findFirst("\\{\\{\\s*Infobox", strText);
    if (infoMatch != null) {
        iStartInfoBox = infoMatch.start();
        iEndInfoBox = StringUtils.findMatchingBracket(iStartInfoBox, strText) + 1;
    } else//from  w w  w.  jav a 2s  .c  o  m
        return;

    if (strText.length() < 3 || strText.length() < iEndInfoBox || iEndInfoBox <= 0
            || (iStartInfoBox + 2) > iEndInfoBox)
        return;

    String strInfoBox = "";

    strInfoBox = strText.substring(iStartInfoBox + 2, iEndInfoBox);
    if (strInfoBox.length() < 5)
        return;

    String strCleanedInfoBox = m_wikiModel.render(new PlainTextConverter(),
            strInfoBox.replaceAll("<br />", "&lt;br /&gt;"));

    // da wir hier eigentlich relationierte Datenstze haben, machen wir auch einzelne, separierte Dokumente draus

    // System.out.println(strCleanedInfoBox);
    // System.out.println(strCleanedInfoBox.substring(0, strCleanedInfoBox.indexOf("\n")).trim());

    // erste Zeile bezeichnet die InfoBox
    int iIndex = strCleanedInfoBox.indexOf("|");
    if (iIndex == -1)
        iIndex = strCleanedInfoBox.indexOf("\n");
    if (iIndex == -1)
        return;
    String strInfoBoxName = strCleanedInfoBox.substring(7, iIndex).trim();
    metadata.add(infobox, strInfoBoxName);

    String[] straCleanedInfoBoxSplit = strCleanedInfoBox.split("\\s*\\|\\s*");

    HashMap<String, MultiValueHashMap<String, String>> hsSubDocId2AttValuePairsOfSubDoc = new HashMap<String, MultiValueHashMap<String, String>>();

    for (String strAttValuePair : straCleanedInfoBoxSplit) {

        // System.out.println("\nattValPair unsplittet " + strAttValuePair);
        // die Dinger sind mit einem '=' getrennt
        String[] straAtt2Value = strAttValuePair.split("=");

        if (straAtt2Value.length == 0 || straAtt2Value[0] == null)
            continue;
        if (straAtt2Value.length < 2 || straAtt2Value[1] == null)
            continue;

        String strAttName = straAtt2Value[0].trim();
        String strAttValues = straAtt2Value[1];
        if (StringUtils.nullOrWhitespace(strAttValues))
            continue;
        // Innerhalb eines values gibt es auch Zeilenumbrche (mit '<br />' bzw. '&lt;br /&gt;') - dies gilt als Aufzhlung
        String[] straAttValues = strAttValues.split(Pattern.quote("&lt;br /&gt;"));
        // XXX wir werfen zusatzangaben in Klammern erst mal weg - man knnte sie auch als attnameAddInfo in einem extra Attribut speichern -
        // allerdings mu man dann wieder aufpassen, ob nicht ein subDocument entstehen mu (Bsp. mehrere Genre-entries mit jeweiliger
        // Jahreszahl)

        // der Attributname entscheidet nun, ob ein Dokument ausgelagert werden soll oder nicht. Ist darin eine Zahl enthalten, dann entfernen
        // wir diese und gruppieren alle att-value-paare mit dieser Zahl in einen extra Datensatz (MultiValueHashMap)
        Matcher numberMatcher = Pattern.compile("([\\D]*)(\\d+)([\\D]*)").matcher(strAttName);

        if (!numberMatcher.find()) {
            // wir haben keine Zahl im AttNamen - wir tragen diesen Wert einfach in die Metadaten ein.
            for (String strAttValue : straAttValues) {
                String strCleanedAttValue = cleanAttValue(strAttName, strAttValue);
                if (strCleanedAttValue != null)
                    metadata.add(strAttName, strCleanedAttValue);
            }
        } else {
            // wir haben eine Zahl im Namen - wir tragen den Wert in einem SubDocument unter der Id <zahl> ein
            String strPrefix = numberMatcher.group(1);
            String strNumber = numberMatcher.group(2);
            String strSuffix = numberMatcher.group(3);

            String strDataSetId = strPrefix + strNumber;
            String strFinalAttName = strPrefix + strSuffix;

            // wenn wir noch mehr Zahlen haben, dann haben wir geloost - und tragen es einfach ein
            if (numberMatcher.find()) {
                for (String strAttValue : straAttValues) {
                    String strCleanedAttValue = cleanAttValue(strFinalAttName, strAttValue);
                    if (strCleanedAttValue != null)
                        metadata.add(strFinalAttName, strCleanedAttValue);
                }
            }

            // System.out.println("prefix " + strPrefix);
            // System.out.println("num " + strDataSetId);
            // System.out.println("suffix " + strSuffix);
            MultiValueHashMap<String, String> hsAttname2ValueOfSubDoc = hsSubDocId2AttValuePairsOfSubDoc
                    .get(strDataSetId);
            if (hsAttname2ValueOfSubDoc == null) {
                hsAttname2ValueOfSubDoc = new MultiValueHashMap<String, String>();
                hsSubDocId2AttValuePairsOfSubDoc.put(strDataSetId, hsAttname2ValueOfSubDoc);
            }

            for (String strAttValue : straAttValues)
                hsAttname2ValueOfSubDoc.add(strFinalAttName, strAttValue.replaceAll("\\(.*?\\)", "").trim());

        }
    }

    String strPageId = new UID().toString();
    metadata.add(LeechMetadata.id, strPageId);

    // we have to use the same metadata Object
    Metadata metadataBackup4ParentPage = TikaUtils.copyMetadata(metadata);

    for (MultiValueHashMap<String, String> hsAttValuePairsOfSubDoc : hsSubDocId2AttValuePairsOfSubDoc
            .values()) {

        TikaUtils.clearMetadata(metadata);

        // die Referenz zu meinem parent
        metadata.add(LeechMetadata.parentId, strPageId);
        metadata.add(infobox, strInfoBoxName);
        String strChildId = new UID().toString();
        metadata.add(LeechMetadata.id, strChildId);
        // zum rckreferenzieren geben wir dem parent auch noch unsere id
        metadataBackup4ParentPage.add(LeechMetadata.childId, strChildId);

        for (Entry<String, String> attName2Value4SubDoc : hsAttValuePairsOfSubDoc.entryList()) {
            String strAttName = attName2Value4SubDoc.getKey();
            String strAttValue = attName2Value4SubDoc.getValue();

            String strCleanedAttValue = cleanAttValue(strAttName, strAttValue);
            if (strCleanedAttValue != null)
                metadata.add(strAttName, strCleanedAttValue);
        }

        metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia-meta+xml");

        // so erreichen wir, da im bergeordneten ContentHandler mehrere Docs ankommen :)
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        xhtml.endDocument();

    }

    TikaUtils.clearMetadata(metadata);
    TikaUtils.copyMetadataFromTo(metadataBackup4ParentPage, metadata);

}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

@Override
public void execute() throws ExecutionException {
    interrupted = false;/* w w  w . j  a  v a 2  s  .co  m*/
    int lastProgress = 0;
    fireProgressChanged(lastProgress);
    //get pointers to the annotation sets
    AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0)
            ? document.getAnnotations()
            : document.getAnnotations(outputASName);

    String docText = document.getContent().toString();

    /* If the document's content is empty or contains only whitespace,
     * we drop out right here, since there's nothing to sentence-split.     */
    if (docText.trim().length() < 1) {
        return;
    }

    Matcher internalSplitMatcher = internalSplitsPattern.matcher(docText);
    Matcher externalSplitMatcher = externalSplitsPattern.matcher(docText);

    Matcher nonSplitMatcher = nonSplitsPattern.matcher(docText);
    //store all non split locations in a list of pairs
    List<int[]> nonSplits = new LinkedList<int[]>();
    while (nonSplitMatcher.find()) {
        nonSplits.add(new int[] { nonSplitMatcher.start(), nonSplitMatcher.end() });
    }
    //this lists holds the next matches at each step
    List<MatchResult> nextSplitMatches = new ArrayList<MatchResult>();
    //initialise matching process
    MatchResult internalMatchResult = null;
    if (internalSplitMatcher.find()) {
        internalMatchResult = internalSplitMatcher.toMatchResult();
        nextSplitMatches.add(internalMatchResult);
    }
    MatchResult externalMatchResult = null;
    if (externalSplitMatcher.find()) {
        externalMatchResult = externalSplitMatcher.toMatchResult();
        nextSplitMatches.add(externalMatchResult);
    }
    MatchResultComparator comparator = new MatchResultComparator();
    int lastSentenceEnd = 0;

    while (!nextSplitMatches.isEmpty()) {
        //see which one matches first
        Collections.sort(nextSplitMatches, comparator);
        MatchResult nextMatch = nextSplitMatches.remove(0);
        if (nextMatch == internalMatchResult) {
            //we have a new internal split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //add the split annotation
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "internal");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    int endOffset = nextMatch.end();
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isWhitespace(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < nextMatch.start()) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = endOffset;
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (internalSplitMatcher.find()) {
                internalMatchResult = internalSplitMatcher.toMatchResult();
                nextSplitMatches.add(internalMatchResult);
            } else {
                internalMatchResult = null;
            }
        } else if (nextMatch == externalMatchResult) {
            //we have a new external split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //generate the split
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "external");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    //find the last non whitespace character, going backward from
                    //where the external skip starts
                    int endOffset = nextMatch.start();
                    while (endOffset > lastSentenceEnd
                            && Character.isSpaceChar(Character.codePointAt(docText, endOffset - 1))) {
                        endOffset--;
                    }
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isSpaceChar(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < endOffset) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = nextMatch.end();
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (externalSplitMatcher.find()) {
                externalMatchResult = externalSplitMatcher.toMatchResult();
                nextSplitMatches.add(externalMatchResult);
            } else {
                externalMatchResult = null;
            }
        } else {
            //malfunction
            throw new ExecutionException("Invalid state - cannot identify match!");
        }
        //report progress
        int newProgress = 100 * lastSentenceEnd / docText.length();
        if (newProgress - lastProgress > 20) {
            lastProgress = newProgress;
            fireProgressChanged(lastProgress);
        }
    } //while(!nextMatches.isEmpty()){
    fireProcessFinished();
}