List of usage examples for java.util.regex MatchResult end
public int end();
From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java
private int getTruePositiveTokenCount(Mutant mutant) { int count = 0; Pattern mask = Pattern.compile("X+"); List<MatchResult> results = new ArrayList<>(); Matcher matcher = mask.matcher(mutant.getDeidentifiedString()); while (matcher.find()) { results.add(matcher.toMatchResult()); }/*from w ww . j a v a 2 s .c o m*/ for (MatchResult result : results) { StringTokenizer tokenizer = new StringTokenizer( mutant.getFinalText().substring(result.start(), result.end())); ArrayList<String> arHits = new ArrayList<>(); while (tokenizer.hasMoreTokens()) { arHits.add(tokenizer.nextToken()); } count = getHitCount(mutant, count, arHits); } return count; }
From source file:uk.ac.kcl.at.ElasticGazetteerAcceptanceTest.java
private int getFalsePositiveTokenCount(Mutant mutant) { int count = 0; Pattern mask = Pattern.compile("X+"); List<MatchResult> results = new ArrayList<>(); Matcher matcher = mask.matcher(mutant.getDeidentifiedString()); while (matcher.find()) { results.add(matcher.toMatchResult()); }/*from w ww. j av a2 s . c o m*/ for (MatchResult result : results) { StringTokenizer tokenizer = new StringTokenizer( mutant.getFinalText().substring(result.start(), result.end())); ArrayList<String> arHits = new ArrayList<>(); while (tokenizer.hasMoreTokens()) { arHits.add(tokenizer.nextToken()); } for (String hit : arHits) { boolean isAnIdentifier = false; for (String token : mutant.getOutputTokens()) { if (hit.matches(Pattern.quote(token))) { isAnIdentifier = true; } } if (!isAnIdentifier && !hit.equalsIgnoreCase("") && !hit.equalsIgnoreCase("-")) { count++; } } } return count; }
From source file:org.springframework.social.twitter.api.impl.TweetDeserializer.java
private void extractTickerSymbolEntitiesFromText(String text, Entities entities) { Pattern pattern = Pattern.compile("\\$[A-Za-z]+"); Matcher matcher = pattern.matcher(text); while (matcher.find()) { MatchResult matchResult = matcher.toMatchResult(); String tickerSymbol = matchResult.group().substring(1); String url = "https://twitter.com/search?q=%24" + tickerSymbol + "&src=ctag"; entities.getTickerSymbols().add(new TickerSymbolEntity(tickerSymbol, url, new int[] { matchResult.start(), matchResult.end() })); }//from ww w . j a v a 2s . c o m }
From source file:net.osten.watermap.convert.AZTReport.java
private WaterReport parseDataLine(String line) { WaterReport result = new WaterReport(); try {//from ww w . j a v a2s .co m // Example line: // 8.3 8.3 Tub Spring (aka Bathtub Spring) spring 3 full tub; good trickle3/28/15 3/28/15 Bird Food 4/5/15 // Mileages = first two decimals MatchResult decimalsMatch = RegexUtils.matchFirstOccurance(line, decimalPattern); if (decimalsMatch == null) { log.fine("Mileages not found"); return null; } int decimalsEnd = decimalsMatch.end(); // Type = spring | creek | spring fed | windmill | store | dirt tank | pipe | Town | etc.. MatchResult typeMatch = RegexUtils.matchFirstOccurance(line, typesPattern); if (typeMatch == null) { log.fine("Type not found"); return null; } log.finer("type=" + typeMatch.group()); int typeEnd = typeMatch.end(); // Name = text from second decimal number to type (spring,creek,etc.) log.finer("decimalsEnd=" + decimalsEnd + " typeEnd=" + typeEnd); String name = line.substring(decimalsEnd, typeEnd); result.setName(name.trim()); // Historic Reliability = int after Type (can be "1 to 2" or "0-2") MatchResult histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern3, typeEnd); if (histRelMatch == null) { histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern2, typeEnd); if (histRelMatch == null) { histRelMatch = RegexUtils.matchFirstOccurance(line, histRelPattern1, typeEnd); if (histRelMatch == null) { log.fine("Historical Reliability not found"); return null; } } } log.finer("histRel=" + histRelMatch.group()); String historicReliability = mapHistoricReliability(histRelMatch.group().trim()); int histRelEnd = histRelMatch.end(); // Report Date = second date from right int reportDateEnd = -1; int reportDateStart = -1; List<MatchResult> dates = RegexUtils.matchOccurences(line, datePattern); if (dates.size() >= 2) { reportDateEnd = dates.get(dates.size() - 2).end(); reportDateStart = dates.get(dates.size() - 2).start(); } else { log.fine("Only found " + dates.size() + " dates"); reportDateStart = Math.max(line.length() - 1, histRelEnd); } // Report = Historic Reliability to Report Date log.finer("histRelEnd=" + histRelEnd + " reportDateStart=" + reportDateStart); if (histRelEnd >= 0 && reportDateStart >= 0 && reportDateStart >= histRelEnd) { String report = line.substring(histRelEnd, reportDateStart); result.setDescription(report.trim() + "<br />Historical Reliability:" + historicReliability); } else { log.fine("cannot find historic reliability"); } // Post Date = first date from right int postDateStart = -1; MatchResult postDate = RegexUtils.matchLastOccurence(line, datePattern); if (postDate == null) { log.fine("Post Date not found"); } else { result.setLastReport(dateFormatter.parse(postDate.group())); postDateStart = postDate.start(); log.finer("postDate=" + postDate.group()); } // Reported By = text between Report Date and Post Date if (postDateStart >= 0 && reportDateEnd >= 0 && postDateStart > reportDateEnd) { String reportedBy = line.substring(reportDateEnd, postDateStart); log.finer("reportedBy=" + reportedBy); } else { log.finer("cannot find reportedBy"); } result.setState(WaterStateParser.parseState(result.getDescription())); result.setSource(SOURCE_TITLE); result.setUrl(SOURCE_URL); } catch ( ParseException e) { log.fine("ParseException:" + e.getLocalizedMessage()); } return result; }
From source file:jp.go.nict.langrid.wrapper.ws_1_2.translation.AbstractTranslationService.java
/** * /* w ww. j ava 2 s . com*/ * */ public final String multistatementTranslate(String sourceLang, String targetLang, String source, String delimiterRegx) throws AccessLimitExceededException, InvalidParameterException, LanguagePairNotUniquelyDecidedException, NoAccessPermissionException, NoValidEndpointsException, ProcessFailedException, ServerBusyException, ServiceNotActiveException, ServiceNotFoundException, UnsupportedLanguagePairException { checkStartupException(); if (StringUtils.isBlank(delimiterRegx)) { throw new InvalidParameterException("delimiterRegx", "is Blank."); } StringBuilder sb = new StringBuilder(); Scanner s = new Scanner(source).useDelimiter(delimiterRegx); int i = 0; while (s.hasNext()) { String text = s.next(); MatchResult m = s.match(); if (i != m.start()) { String tag = source.substring(i, m.start()); sb.append(tag); } i = m.end(); sb.append(invokeDoTranslation(sourceLang, targetLang, text)); } if (source.length() != i) { String tag = source.substring(i); sb.append(tag); } return sb.toString(); }
From source file:gate.creole.splitter.RegexSentenceSplitter.java
/** * Checks whether a possible match is being vetoed by a non split match. A * possible match is vetoed if it any nay overlap with a veto region. * * @param split the match result representing the split to be tested * @param vetoRegions regions where matches are not allowed. For efficiency * reasons, this method assumes these regions to be non overlapping and sorted * in ascending order./*from w ww .j a va 2s. c om*/ * All veto regions that end before the proposed match are also discarded * (again for efficiency reasons). This requires the proposed matches to be * sent to this method in ascending order, so as to avoid malfunctions. * @return <tt>true</tt> iff the proposed split should be ignored */ private boolean veto(MatchResult split, List<int[]> vetoRegions) { //if no more non splits available, accept everything for (Iterator<int[]> vetoRegIter = vetoRegions.iterator(); vetoRegIter.hasNext();) { int[] aVetoRegion = vetoRegIter.next(); if (aVetoRegion[1] - 1 < split.start()) { //current veto region ends before the proposed split starts //--> discard the veto region vetoRegIter.remove(); } else if (split.end() - 1 < aVetoRegion[0]) { //veto region starts after the split ends //-> we can return false return false; } else { //we have overlap return true; } } //if we got this far, all veto regions are before the split return false; }
From source file:gate.creole.splitter.RegexSentenceSplitter.java
@Override public void execute() throws ExecutionException { interrupted = false;//from w ww .j a v a 2 s. co m int lastProgress = 0; fireProgressChanged(lastProgress); //get pointers to the annotation sets AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0) ? document.getAnnotations() : document.getAnnotations(outputASName); String docText = document.getContent().toString(); /* If the document's content is empty or contains only whitespace, * we drop out right here, since there's nothing to sentence-split. */ if (docText.trim().length() < 1) { return; } Matcher internalSplitMatcher = internalSplitsPattern.matcher(docText); Matcher externalSplitMatcher = externalSplitsPattern.matcher(docText); Matcher nonSplitMatcher = nonSplitsPattern.matcher(docText); //store all non split locations in a list of pairs List<int[]> nonSplits = new LinkedList<int[]>(); while (nonSplitMatcher.find()) { nonSplits.add(new int[] { nonSplitMatcher.start(), nonSplitMatcher.end() }); } //this lists holds the next matches at each step List<MatchResult> nextSplitMatches = new ArrayList<MatchResult>(); //initialise matching process MatchResult internalMatchResult = null; if (internalSplitMatcher.find()) { internalMatchResult = internalSplitMatcher.toMatchResult(); nextSplitMatches.add(internalMatchResult); } MatchResult externalMatchResult = null; if (externalSplitMatcher.find()) { externalMatchResult = externalSplitMatcher.toMatchResult(); nextSplitMatches.add(externalMatchResult); } MatchResultComparator comparator = new MatchResultComparator(); int lastSentenceEnd = 0; while (!nextSplitMatches.isEmpty()) { //see which one matches first Collections.sort(nextSplitMatches, comparator); MatchResult nextMatch = nextSplitMatches.remove(0); if (nextMatch == internalMatchResult) { //we have a new internal split; see if it's vetoed or not if (!veto(nextMatch, nonSplits)) { //split is not vetoed try { //add the split annotation FeatureMap features = Factory.newFeatureMap(); features.put("kind", "internal"); outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features); //generate the sentence annotation int endOffset = nextMatch.end(); //find the first non whitespace character starting from where the //last sentence ended while (lastSentenceEnd < endOffset && Character.isWhitespace(Character.codePointAt(docText, lastSentenceEnd))) { lastSentenceEnd++; } //if there is any useful text between the two offsets, generate //a new sentence if (lastSentenceEnd < nextMatch.start()) { outputAS.add(new Long(lastSentenceEnd), new Long(endOffset), ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap()); } //store the new sentence end lastSentenceEnd = endOffset; } catch (InvalidOffsetException e) { // this should never happen throw new ExecutionException(e); } } //prepare for next step if (internalSplitMatcher.find()) { internalMatchResult = internalSplitMatcher.toMatchResult(); nextSplitMatches.add(internalMatchResult); } else { internalMatchResult = null; } } else if (nextMatch == externalMatchResult) { //we have a new external split; see if it's vetoed or not if (!veto(nextMatch, nonSplits)) { //split is not vetoed try { //generate the split FeatureMap features = Factory.newFeatureMap(); features.put("kind", "external"); outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features); //generate the sentence annotation //find the last non whitespace character, going backward from //where the external skip starts int endOffset = nextMatch.start(); while (endOffset > lastSentenceEnd && Character.isSpaceChar(Character.codePointAt(docText, endOffset - 1))) { endOffset--; } //find the first non whitespace character starting from where the //last sentence ended while (lastSentenceEnd < endOffset && Character.isSpaceChar(Character.codePointAt(docText, lastSentenceEnd))) { lastSentenceEnd++; } //if there is any useful text between the two offsets, generate //a new sentence if (lastSentenceEnd < endOffset) { outputAS.add(new Long(lastSentenceEnd), new Long(endOffset), ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap()); } //store the new sentence end lastSentenceEnd = nextMatch.end(); } catch (InvalidOffsetException e) { // this should never happen throw new ExecutionException(e); } } //prepare for next step if (externalSplitMatcher.find()) { externalMatchResult = externalSplitMatcher.toMatchResult(); nextSplitMatches.add(externalMatchResult); } else { externalMatchResult = null; } } else { //malfunction throw new ExecutionException("Invalid state - cannot identify match!"); } //report progress int newProgress = 100 * lastSentenceEnd / docText.length(); if (newProgress - lastProgress > 20) { lastProgress = newProgress; fireProgressChanged(lastProgress); } } //while(!nextMatches.isEmpty()){ fireProcessFinished(); }
From source file:magicware.scm.redmine.tools.IssueSyncApp.java
public void execute(SyncItem syncItem) throws IOException, InvalidFormatException { FileInputStream in = null;//w w w . j a v a 2 s .c om try { // ?JSON?? String issueTemplate = FileUtils.readFileAsString(syncItem.getJsonTemplate()); // ??? Matcher m = Pattern.compile(Constants.ISSUE_FIELD_VALUE_EXP).matcher(issueTemplate); List<MatchResult> mrList = new ArrayList<MatchResult>(); while (m.find()) { MatchResult mr = m.toMatchResult(); mrList.add(mr); } // ???? in = new FileInputStream(syncItem.getFilePath()); Workbook wb = WorkbookFactory.create(in); FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator(); Sheet sheet = wb.getSheet(syncItem.getSheetName()); Row row = null; Cell cell = null; List<String> issues = new ArrayList<String>(); // ????? for (int i = sheet.getLastRowNum(); i >= (syncItem.getKeyRowBeginIdx() > 0 ? (syncItem.getKeyRowBeginIdx() - 1) : 0); i--) { // ???? row = sheet.getRow(i); if (row != null) { String keyNo = ExcelUtils.getCellContent(row.getCell(syncItem.getKeyColumnIdx() - 1), evaluator); // ?????????? if (StringUtils.isBlank(keyNo)) { break; } // ???? if (redmineClient.queryIssue(syncItem.getProjectId(), syncItem.getKeyFiledId(), keyNo) == 0) { StringBuilder newIssue = new StringBuilder(); int eolIdx = 0; for (MatchResult matchResult : mrList) { newIssue.append(issueTemplate.substring(eolIdx, matchResult.start())); int cellIndex = Integer.valueOf(matchResult.group(1)) - 1; cell = row.getCell(cellIndex); String cellvalue = ExcelUtils.getCellContent(cell, evaluator); // ? String valueMapStr = matchResult.group(3); Map<String, String> valueMap = null; if (valueMapStr != null) { valueMap = JSON.decode(valueMapStr); if (StringUtils.isNotEmpty(cellvalue) && valueMap.containsKey(cellvalue)) { cellvalue = valueMap.get(cellvalue); } else { cellvalue = valueMap.get("default"); } } if (StringUtils.isNotEmpty(cellvalue)) { cellvalue = StringEscapeUtils.escapeJavaScript(cellvalue); newIssue.append(cellvalue); } eolIdx = matchResult.end(); } newIssue.append(issueTemplate.substring(eolIdx)); issues.add(newIssue.toString()); } else { // ??? break; } } } for (int i = issues.size() - 1; i >= 0; i--) { Map<String, Issue> issueMap = JSON.decode(issues.get(i)); log.debug("create new issue >>>"); log.debug(JSON.encode(issueMap, true)); redmineClient.createNewIssue(issues.get(i)); } } finally { if (in != null) { in.close(); in = null; } } }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
/** * Substitute with i18n properties/* w w w .j ava 2 s . c om*/ * * @param displayText * @return */ public String formatDisplayStringWithI18n(String displayText) { if (StringUtils.trimToNull(displayText) == null) return displayText; try { String formatted = displayText; Matcher m = indexFieldPatternMatcher.matcher(displayText); int currentPos = 0; while (m.find(currentPos)) { String matchedIndexTerm = m.group(0).replaceAll(":", ""); MatchResult mr = m.toMatchResult(); //if the matched term represents a layer lookup the title in the layers service Matcher lm = layersPattern.matcher(matchedIndexTerm); String i18n = ""; if (lm.matches()) { i18n = layersService.getName(matchedIndexTerm); if (i18n == null) { i18n = matchedIndexTerm; } } else { i18n = messageSource.getMessage("facet." + matchedIndexTerm, null, matchedIndexTerm, null); } //System.out.println("i18n for " + matchedIndexTerm + " = " + i18n); if (!matchedIndexTerm.equals(i18n)) { int nextWhitespace = displayText.substring(mr.end()).indexOf(" "); String extractedValue = null; if (nextWhitespace > 0) { extractedValue = displayText.substring(mr.end(), mr.end() + nextWhitespace); } else { //reached the end of the query extractedValue = displayText.substring(mr.end()); } String formattedExtractedValue = SearchUtils.stripEscapedQuotes(extractedValue); String i18nForValue = messageSource.getMessage(matchedIndexTerm + "." + formattedExtractedValue, null, "", null); if (i18nForValue.length() == 0) i18nForValue = messageSource.getMessage(formattedExtractedValue, null, "", null); if (i18nForValue.length() > 0) { formatted = formatted.replaceAll(matchedIndexTerm + ":" + extractedValue, i18n + ":" + i18nForValue); } else { //just replace the matched index term formatted = formatted.replaceAll(matchedIndexTerm, i18n); } } currentPos = mr.end(); } return formatted; } catch (Exception e) { logger.debug(e.getMessage(), e); return displayText; } }