List of usage examples for java.util.regex MatchResult start
public int start();
From source file:net.solarnetwork.util.StringMerger.java
/** * Merge from a String source into a StringBuilder. * /*from w w w. j a v a 2 s.c o m*/ * @param src * the source String to substitute into * @param data * the data object to substitute with * @param nullValue * the value to substitute for null data * @param buf * the StringBuilder to append the output to */ public static void mergeString(String src, Object data, String nullValue, StringBuilder buf) { Matcher matcher = MERGE_VAR_PAT.matcher(src); //MatchResult[] matches = MERGE_VAR_PAT.matcher(src); //REMatch[] matches = MERGE_VAR_RE.getAllMatches(src); if (!matcher.find()) { buf.append(src); } else { int endLastMatchIdx = 0; do { MatchResult matchResult = matcher.toMatchResult(); // append everything from the end of the last // match to the start of this match buf.append(src.substring(endLastMatchIdx, matchResult.start())); // perform substitution here... if (data != null) { int s = matchResult.start(1); int e = matchResult.end(1); if ((s > -1) && (e > -1)) { String varName = src.substring(s, e); if (data instanceof java.util.Map<?, ?>) { Object o = null; int sepIdx = varName.indexOf('.'); if (sepIdx > 0) { String varName2 = varName.substring(sepIdx + 1); varName = varName.substring(0, sepIdx); o = ((Map<?, ?>) data).get(varName); if (o != null) { try { o = PropertyUtils.getProperty(o, varName2); } catch (Exception e2) { LOG.warn("Exception getting property '" + varName2 + "' out of " + o.getClass() + ": " + e2); } } } else { // simply check for key o = ((Map<?, ?>) data).get(varName); } if (o == null || (String.class.isAssignableFrom(o.getClass()) && !StringUtils.hasText(o.toString()))) { buf.append(nullValue); } else { buf.append(o); } } else { // use reflection to get a bean property try { Object o = PropertyUtils.getProperty(data, varName); if (o == null || (String.class.isAssignableFrom(o.getClass()) && !StringUtils.hasText(o.toString()))) { buf.append(nullValue); } else { buf.append(o); } } catch (Exception ex) { LOG.warn("Exception getting property '" + varName + "' out of " + data.getClass() + ": " + ex); buf.append(nullValue); } } } endLastMatchIdx = matchResult.end(); } } while (matcher.find()); if (endLastMatchIdx < src.length()) { buf.append(src.substring(endLastMatchIdx)); } } }
From source file:magicware.scm.redmine.tools.IssueSyncApp.java
public void execute(SyncItem syncItem) throws IOException, InvalidFormatException { FileInputStream in = null;//from w w w. j ava2 s. c om try { // ?JSON?? String issueTemplate = FileUtils.readFileAsString(syncItem.getJsonTemplate()); // ??? Matcher m = Pattern.compile(Constants.ISSUE_FIELD_VALUE_EXP).matcher(issueTemplate); List<MatchResult> mrList = new ArrayList<MatchResult>(); while (m.find()) { MatchResult mr = m.toMatchResult(); mrList.add(mr); } // ???? in = new FileInputStream(syncItem.getFilePath()); Workbook wb = WorkbookFactory.create(in); FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator(); Sheet sheet = wb.getSheet(syncItem.getSheetName()); Row row = null; Cell cell = null; List<String> issues = new ArrayList<String>(); // ????? for (int i = sheet.getLastRowNum(); i >= (syncItem.getKeyRowBeginIdx() > 0 ? (syncItem.getKeyRowBeginIdx() - 1) : 0); i--) { // ???? row = sheet.getRow(i); if (row != null) { String keyNo = ExcelUtils.getCellContent(row.getCell(syncItem.getKeyColumnIdx() - 1), evaluator); // ?????????? if (StringUtils.isBlank(keyNo)) { break; } // ???? if (redmineClient.queryIssue(syncItem.getProjectId(), syncItem.getKeyFiledId(), keyNo) == 0) { StringBuilder newIssue = new StringBuilder(); int eolIdx = 0; for (MatchResult matchResult : mrList) { newIssue.append(issueTemplate.substring(eolIdx, matchResult.start())); int cellIndex = Integer.valueOf(matchResult.group(1)) - 1; cell = row.getCell(cellIndex); String cellvalue = ExcelUtils.getCellContent(cell, evaluator); // ? String valueMapStr = matchResult.group(3); Map<String, String> valueMap = null; if (valueMapStr != null) { valueMap = JSON.decode(valueMapStr); if (StringUtils.isNotEmpty(cellvalue) && valueMap.containsKey(cellvalue)) { cellvalue = valueMap.get(cellvalue); } else { cellvalue = valueMap.get("default"); } } if (StringUtils.isNotEmpty(cellvalue)) { cellvalue = StringEscapeUtils.escapeJavaScript(cellvalue); newIssue.append(cellvalue); } eolIdx = matchResult.end(); } newIssue.append(issueTemplate.substring(eolIdx)); issues.add(newIssue.toString()); } else { // ??? break; } } } for (int i = issues.size() - 1; i >= 0; i--) { Map<String, Issue> issueMap = JSON.decode(issues.get(i)); log.debug("create new issue >>>"); log.debug(JSON.encode(issueMap, true)); redmineClient.createNewIssue(issues.get(i)); } } finally { if (in != null) { in.close(); in = null; } } }
From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java
private int getTruePositiveTokenCount(Mutant mutant) { int count = 0; Pattern mask = Pattern.compile("X+"); List<MatchResult> results = new ArrayList<>(); Matcher matcher = mask.matcher(mutant.getDeidentifiedString()); while (matcher.find()) { results.add(matcher.toMatchResult()); }/*from w w w. j a va 2 s . c o m*/ for (MatchResult result : results) { StringTokenizer tokenizer = new StringTokenizer( mutant.getFinalText().substring(result.start(), result.end())); ArrayList<String> arHits = new ArrayList<>(); while (tokenizer.hasMoreTokens()) { arHits.add(tokenizer.nextToken()); } count = getHitCount(mutant, count, arHits); } return count; }
From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java
private int getFalsePositiveTokenCount(Mutant mutant) { int count = 0; Pattern mask = Pattern.compile("X+"); List<MatchResult> results = new ArrayList<>(); Matcher matcher = mask.matcher(mutant.getDeidentifiedString()); while (matcher.find()) { results.add(matcher.toMatchResult()); }//from w ww .ja v a2s. c o m for (MatchResult result : results) { StringTokenizer tokenizer = new StringTokenizer( mutant.getFinalText().substring(result.start(), result.end())); ArrayList<String> arHits = new ArrayList<>(); while (tokenizer.hasMoreTokens()) { arHits.add(tokenizer.nextToken()); } for (String hit : arHits) { boolean isAnIdentifier = false; for (String token : mutant.getOutputTokens()) { if (hit.matches(Pattern.quote(token))) { isAnIdentifier = true; } } if (!isAnIdentifier && !hit.equalsIgnoreCase("") && !hit.equalsIgnoreCase("-")) { count++; } } } return count; }
From source file:org.springframework.social.twitter.api.impl.TweetDeserializer.java
private void extractTickerSymbolEntitiesFromText(String text, Entities entities) { Pattern pattern = Pattern.compile("\\$[A-Za-z]+"); Matcher matcher = pattern.matcher(text); while (matcher.find()) { MatchResult matchResult = matcher.toMatchResult(); String tickerSymbol = matchResult.group().substring(1); String url = "https://twitter.com/search?q=%24" + tickerSymbol + "&src=ctag"; entities.getTickerSymbols().add(new TickerSymbolEntity(tickerSymbol, url, new int[] { matchResult.start(), matchResult.end() })); }//from w w w. j ava 2 s .c om }
From source file:jp.go.nict.langrid.wrapper.ws_1_2.translation.AbstractTranslationService.java
/** * //from w w w. ja va 2 s . co m * */ public final String multistatementTranslate(String sourceLang, String targetLang, String source, String delimiterRegx) throws AccessLimitExceededException, InvalidParameterException, LanguagePairNotUniquelyDecidedException, NoAccessPermissionException, NoValidEndpointsException, ProcessFailedException, ServerBusyException, ServiceNotActiveException, ServiceNotFoundException, UnsupportedLanguagePairException { checkStartupException(); if (StringUtils.isBlank(delimiterRegx)) { throw new InvalidParameterException("delimiterRegx", "is Blank."); } StringBuilder sb = new StringBuilder(); Scanner s = new Scanner(source).useDelimiter(delimiterRegx); int i = 0; while (s.hasNext()) { String text = s.next(); MatchResult m = s.match(); if (i != m.start()) { String tag = source.substring(i, m.start()); sb.append(tag); } i = m.end(); sb.append(invokeDoTranslation(sourceLang, targetLang, text)); } if (source.length() != i) { String tag = source.substring(i); sb.append(tag); } return sb.toString(); }
From source file:net.osten.watermap.convert.AZTReport.java
private WaterReport parseDataLine(String line) { WaterReport result = new WaterReport(); try {//from w w w. j a va 2s . co m // Example line: // 8.3 8.3 Tub Spring (aka Bathtub Spring) spring 3 full tub; good trickle3/28/15 3/28/15 Bird Food 4/5/15 // Mileages = first two decimals MatchResult decimalsMatch = RegexUtils.matchFirstOccurance(line, decimalPattern); if (decimalsMatch == null) { log.fine("Mileages not found"); return null; } int decimalsEnd = decimalsMatch.end(); // Type = spring | creek | spring fed | windmill | store | dirt tank | pipe | Town | etc.. MatchResult typeMatch = RegexUtils.matchFirstOccurance(line, typesPattern); if (typeMatch == null) { log.fine("Type not found"); return null; } log.finer("type=" + typeMatch.group()); int typeEnd = typeMatch.end(); // Name = text from second decimal number to type (spring,creek,etc.) log.finer("decimalsEnd=" + decimalsEnd + " typeEnd=" + typeEnd); String name = line.substring(decimalsEnd, typeEnd); result.setName(name.trim()); // Historic Reliability = int after Type (can be "1 to 2" or "0-2") MatchResult histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern3, typeEnd); if (histRelMatch == null) { histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern2, typeEnd); if (histRelMatch == null) { histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern1, typeEnd); if (histRelMatch == null) { log.fine("Historical Reliability not found"); return null; } } } log.finer("histRel=" + histRelMatch.group()); String historicReliability = mapHistoricReliability(histRelMatch.group().trim()); int histRelEnd = histRelMatch.end(); // Report Date = second date from right int reportDateEnd = -1; int reportDateStart = -1; List<MatchResult> dates = RegexUtils.matchOccurences(line, datePattern); if (dates.size() >= 2) { reportDateEnd = dates.get(dates.size() - 2).end(); reportDateStart = dates.get(dates.size() - 2).start(); } else { log.fine("Only found " + dates.size() + " dates"); reportDateStart = Math.max(line.length() - 1, histRelEnd); } // Report = Historic Reliability to Report Date log.finer("histRelEnd=" + histRelEnd + " reportDateStart=" + reportDateStart); if (histRelEnd >= 0 && reportDateStart >= 0 && reportDateStart >= histRelEnd) { String report = line.substring(histRelEnd, reportDateStart); result.setDescription(report.trim() + "<br />Historical Reliability:" + historicReliability); } else { log.fine("cannot find historic reliability"); } // Post Date = first date from right int postDateStart = -1; MatchResult postDate = RegexUtils.matchLastOccurence(line, datePattern); if (postDate == null) { log.fine("Post Date not found"); } else { result.setLastReport(dateFormatter.parse(postDate.group())); postDateStart = postDate.start(); log.finer("postDate=" + postDate.group()); } // Reported By = text between Report Date and Post Date if (postDateStart >= 0 && reportDateEnd >= 0 && postDateStart > reportDateEnd) { String reportedBy = line.substring(reportDateEnd, postDateStart); log.finer("reportedBy=" + reportedBy); } else { log.finer("cannot find reportedBy"); } result.setState(WaterStateParser.parseState(result.getDescription())); result.setSource(SOURCE_TITLE); result.setUrl(SOURCE_URL); } catch ( ParseException e) { log.fine("ParseException:" + e.getLocalizedMessage()); } return result; }
From source file:gate.creole.splitter.RegexSentenceSplitter.java
/** * Checks whether a possible match is being vetoed by a non split match. A * possible match is vetoed if it any nay overlap with a veto region. * * @param split the match result representing the split to be tested * @param vetoRegions regions where matches are not allowed. For efficiency * reasons, this method assumes these regions to be non overlapping and sorted * in ascending order.//from w w w .j a v a 2 s . c om * All veto regions that end before the proposed match are also discarded * (again for efficiency reasons). This requires the proposed matches to be * sent to this method in ascending order, so as to avoid malfunctions. * @return <tt>true</tt> iff the proposed split should be ignored */ private boolean veto(MatchResult split, List<int[]> vetoRegions) { //if no more non splits available, accept everything for (Iterator<int[]> vetoRegIter = vetoRegions.iterator(); vetoRegIter.hasNext();) { int[] aVetoRegion = vetoRegIter.next(); if (aVetoRegion[1] - 1 < split.start()) { //current veto region ends before the proposed split starts //--> discard the veto region vetoRegIter.remove(); } else if (split.end() - 1 < aVetoRegion[0]) { //veto region starts after the split ends //-> we can return false return false; } else { //we have overlap return true; } } //if we got this far, all veto regions are before the split return false; }
From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java
protected void parseInfoBox(String strText, Metadata metadata, ContentHandler handler) throws SAXException { // att-value paare mit | getrennt. Innerhalb eines values gibt es auch Zeilenumbrche (mit '<br />') - dies gilt als Aufzhlung // |Single1 |Datum1 , Besetzung1a Besetzung1b, Sonstiges1Titel |Sonstiges1Inhalt , Coverversion3 |Jahr3 // | 1Option = 3 // | 1Option Name = Demos // | 1Option Link = Demos // | 1Option Color = // als erstes schneiden wir mal die Infobox raus. (?m) ist multiline und (?s) ist dotall ('.' matcht auch line breaks) int iStartInfoBox = -1; int iEndInfoBox = -1; MatchResult infoMatch = StringUtils.findFirst("\\{\\{\\s*Infobox", strText); if (infoMatch != null) { iStartInfoBox = infoMatch.start(); iEndInfoBox = StringUtils.findMatchingBracket(iStartInfoBox, strText) + 1; } else//from w w w. jav a 2s .c o m return; if (strText.length() < 3 || strText.length() < iEndInfoBox || iEndInfoBox <= 0 || (iStartInfoBox + 2) > iEndInfoBox) return; String strInfoBox = ""; strInfoBox = strText.substring(iStartInfoBox + 2, iEndInfoBox); if (strInfoBox.length() < 5) return; String strCleanedInfoBox = m_wikiModel.render(new PlainTextConverter(), strInfoBox.replaceAll("<br />", "<br />")); // da wir hier eigentlich relationierte Datenstze haben, machen wir auch einzelne, separierte Dokumente draus // System.out.println(strCleanedInfoBox); // System.out.println(strCleanedInfoBox.substring(0, strCleanedInfoBox.indexOf("\n")).trim()); // erste Zeile bezeichnet die InfoBox int iIndex = strCleanedInfoBox.indexOf("|"); if (iIndex == -1) iIndex = strCleanedInfoBox.indexOf("\n"); if (iIndex == -1) return; String strInfoBoxName = strCleanedInfoBox.substring(7, iIndex).trim(); metadata.add(infobox, strInfoBoxName); String[] straCleanedInfoBoxSplit = strCleanedInfoBox.split("\\s*\\|\\s*"); HashMap<String, MultiValueHashMap<String, String>> hsSubDocId2AttValuePairsOfSubDoc = new HashMap<String, MultiValueHashMap<String, String>>(); for (String strAttValuePair : straCleanedInfoBoxSplit) { // System.out.println("\nattValPair unsplittet " + strAttValuePair); // die Dinger sind mit einem '=' getrennt String[] straAtt2Value = strAttValuePair.split("="); if (straAtt2Value.length == 0 || straAtt2Value[0] == null) continue; if (straAtt2Value.length < 2 || straAtt2Value[1] == null) continue; String strAttName = straAtt2Value[0].trim(); String strAttValues = straAtt2Value[1]; if (StringUtils.nullOrWhitespace(strAttValues)) continue; // Innerhalb eines values gibt es auch Zeilenumbrche (mit '<br />' bzw. '<br />') - dies gilt als Aufzhlung String[] straAttValues = strAttValues.split(Pattern.quote("<br />")); // XXX wir werfen zusatzangaben in Klammern erst mal weg - man knnte sie auch als attnameAddInfo in einem extra Attribut speichern - // allerdings mu man dann wieder aufpassen, ob nicht ein subDocument entstehen mu (Bsp. mehrere Genre-entries mit jeweiliger // Jahreszahl) // der Attributname entscheidet nun, ob ein Dokument ausgelagert werden soll oder nicht. Ist darin eine Zahl enthalten, dann entfernen // wir diese und gruppieren alle att-value-paare mit dieser Zahl in einen extra Datensatz (MultiValueHashMap) Matcher numberMatcher = Pattern.compile("([\\D]*)(\\d+)([\\D]*)").matcher(strAttName); if (!numberMatcher.find()) { // wir haben keine Zahl im AttNamen - wir tragen diesen Wert einfach in die Metadaten ein. for (String strAttValue : straAttValues) { String strCleanedAttValue = cleanAttValue(strAttName, strAttValue); if (strCleanedAttValue != null) metadata.add(strAttName, strCleanedAttValue); } } else { // wir haben eine Zahl im Namen - wir tragen den Wert in einem SubDocument unter der Id <zahl> ein String strPrefix = numberMatcher.group(1); String strNumber = numberMatcher.group(2); String strSuffix = numberMatcher.group(3); String strDataSetId = strPrefix + strNumber; String strFinalAttName = strPrefix + strSuffix; // wenn wir noch mehr Zahlen haben, dann haben wir geloost - und tragen es einfach ein if (numberMatcher.find()) { for (String strAttValue : straAttValues) { String strCleanedAttValue = cleanAttValue(strFinalAttName, strAttValue); if (strCleanedAttValue != null) metadata.add(strFinalAttName, strCleanedAttValue); } } // System.out.println("prefix " + strPrefix); // System.out.println("num " + strDataSetId); // System.out.println("suffix " + strSuffix); MultiValueHashMap<String, String> hsAttname2ValueOfSubDoc = hsSubDocId2AttValuePairsOfSubDoc .get(strDataSetId); if (hsAttname2ValueOfSubDoc == null) { hsAttname2ValueOfSubDoc = new MultiValueHashMap<String, String>(); hsSubDocId2AttValuePairsOfSubDoc.put(strDataSetId, hsAttname2ValueOfSubDoc); } for (String strAttValue : straAttValues) hsAttname2ValueOfSubDoc.add(strFinalAttName, strAttValue.replaceAll("\\(.*?\\)", "").trim()); } } String strPageId = new UID().toString(); metadata.add(LeechMetadata.id, strPageId); // we have to use the same metadata Object Metadata metadataBackup4ParentPage = TikaUtils.copyMetadata(metadata); for (MultiValueHashMap<String, String> hsAttValuePairsOfSubDoc : hsSubDocId2AttValuePairsOfSubDoc .values()) { TikaUtils.clearMetadata(metadata); // die Referenz zu meinem parent metadata.add(LeechMetadata.parentId, strPageId); metadata.add(infobox, strInfoBoxName); String strChildId = new UID().toString(); metadata.add(LeechMetadata.id, strChildId); // zum rckreferenzieren geben wir dem parent auch noch unsere id metadataBackup4ParentPage.add(LeechMetadata.childId, strChildId); for (Entry<String, String> attName2Value4SubDoc : hsAttValuePairsOfSubDoc.entryList()) { String strAttName = attName2Value4SubDoc.getKey(); String strAttValue = attName2Value4SubDoc.getValue(); String strCleanedAttValue = cleanAttValue(strAttName, strAttValue); if (strCleanedAttValue != null) metadata.add(strAttName, strCleanedAttValue); } metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia-meta+xml"); // so erreichen wir, da im bergeordneten ContentHandler mehrere Docs ankommen :) XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); xhtml.endDocument(); } TikaUtils.clearMetadata(metadata); TikaUtils.copyMetadataFromTo(metadataBackup4ParentPage, metadata); }
From source file:gate.creole.splitter.RegexSentenceSplitter.java
@Override public void execute() throws ExecutionException { interrupted = false;/* w w w . j a v a 2 s .co m*/ int lastProgress = 0; fireProgressChanged(lastProgress); //get pointers to the annotation sets AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0) ? document.getAnnotations() : document.getAnnotations(outputASName); String docText = document.getContent().toString(); /* If the document's content is empty or contains only whitespace, * we drop out right here, since there's nothing to sentence-split. */ if (docText.trim().length() < 1) { return; } Matcher internalSplitMatcher = internalSplitsPattern.matcher(docText); Matcher externalSplitMatcher = externalSplitsPattern.matcher(docText); Matcher nonSplitMatcher = nonSplitsPattern.matcher(docText); //store all non split locations in a list of pairs List<int[]> nonSplits = new LinkedList<int[]>(); while (nonSplitMatcher.find()) { nonSplits.add(new int[] { nonSplitMatcher.start(), nonSplitMatcher.end() }); } //this lists holds the next matches at each step List<MatchResult> nextSplitMatches = new ArrayList<MatchResult>(); //initialise matching process MatchResult internalMatchResult = null; if (internalSplitMatcher.find()) { internalMatchResult = internalSplitMatcher.toMatchResult(); nextSplitMatches.add(internalMatchResult); } MatchResult externalMatchResult = null; if (externalSplitMatcher.find()) { externalMatchResult = externalSplitMatcher.toMatchResult(); nextSplitMatches.add(externalMatchResult); } MatchResultComparator comparator = new MatchResultComparator(); int lastSentenceEnd = 0; while (!nextSplitMatches.isEmpty()) { //see which one matches first Collections.sort(nextSplitMatches, comparator); MatchResult nextMatch = nextSplitMatches.remove(0); if (nextMatch == internalMatchResult) { //we have a new internal split; see if it's vetoed or not if (!veto(nextMatch, nonSplits)) { //split is not vetoed try { //add the split annotation FeatureMap features = Factory.newFeatureMap(); features.put("kind", "internal"); outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features); //generate the sentence annotation int endOffset = nextMatch.end(); //find the first non whitespace character starting from where the //last sentence ended while (lastSentenceEnd < endOffset && Character.isWhitespace(Character.codePointAt(docText, lastSentenceEnd))) { lastSentenceEnd++; } //if there is any useful text between the two offsets, generate //a new sentence if (lastSentenceEnd < nextMatch.start()) { outputAS.add(new Long(lastSentenceEnd), new Long(endOffset), ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap()); } //store the new sentence end lastSentenceEnd = endOffset; } catch (InvalidOffsetException e) { // this should never happen throw new ExecutionException(e); } } //prepare for next step if (internalSplitMatcher.find()) { internalMatchResult = internalSplitMatcher.toMatchResult(); nextSplitMatches.add(internalMatchResult); } else { internalMatchResult = null; } } else if (nextMatch == externalMatchResult) { //we have a new external split; see if it's vetoed or not if (!veto(nextMatch, nonSplits)) { //split is not vetoed try { //generate the split FeatureMap features = Factory.newFeatureMap(); features.put("kind", "external"); outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features); //generate the sentence annotation //find the last non whitespace character, going backward from //where the external skip starts int endOffset = nextMatch.start(); while (endOffset > lastSentenceEnd && Character.isSpaceChar(Character.codePointAt(docText, endOffset - 1))) { endOffset--; } //find the first non whitespace character starting from where the //last sentence ended while (lastSentenceEnd < endOffset && Character.isSpaceChar(Character.codePointAt(docText, lastSentenceEnd))) { lastSentenceEnd++; } //if there is any useful text between the two offsets, generate //a new sentence if (lastSentenceEnd < endOffset) { outputAS.add(new Long(lastSentenceEnd), new Long(endOffset), ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap()); } //store the new sentence end lastSentenceEnd = nextMatch.end(); } catch (InvalidOffsetException e) { // this should never happen throw new ExecutionException(e); } } //prepare for next step if (externalSplitMatcher.find()) { externalMatchResult = externalSplitMatcher.toMatchResult(); nextSplitMatches.add(externalMatchResult); } else { externalMatchResult = null; } } else { //malfunction throw new ExecutionException("Invalid state - cannot identify match!"); } //report progress int newProgress = 100 * lastSentenceEnd / docText.length(); if (newProgress - lastProgress > 20) { lastProgress = newProgress; fireProgressChanged(lastProgress); } } //while(!nextMatches.isEmpty()){ fireProcessFinished(); }