List of usage examples for java.util.regex Matcher end
public int end()
From source file:com.g3net.tool.StringUtils.java
/** * src?regex??????? ?(handler)/*from w w w. j av a 2s. c o m*/ * ????? * * @param src * @param regex * ??:&(\\d+;)([a-z)+) * @param handleGroupIndex * ??? * @param hander * ? * @param reservesGroups * ???,?hander?handleGroupIndex * ?reservesGroups?hander?regex? * @return */ public static String replaceAll(String src, String regex, int handleGroupIndex, GroupHandler hander, int[] reservesGroups) { if (src == null || src.trim().length() == 0) { return ""; } Matcher m = Pattern.compile(regex).matcher(src); StringBuffer sbuf = new StringBuffer(); String replacementFirst = ""; String replacementTail = ""; if (reservesGroups != null && reservesGroups.length > 0) { Arrays.sort(reservesGroups); for (int i = 0; i < reservesGroups.length; i++) { if (reservesGroups[i] < handleGroupIndex) { replacementFirst = replacementFirst + "$" + reservesGroups[i]; } else { replacementTail = replacementTail + "$" + reservesGroups[i]; } } } // perform the replacements: while (m.find()) { String value = m.group(handleGroupIndex); String group = m.group(); String handledStr = hander.handler(value); String replacement = ""; if (reservesGroups == null) { int start0 = m.start(); int end0 = m.end(); int start = m.start(handleGroupIndex); int end = m.end(handleGroupIndex); int relativeStart = start - start0; int relativeEnd = end - start0; StringBuilder sbgroup = new StringBuilder(group); sbgroup = sbgroup.replace(relativeStart, relativeEnd, handledStr); replacement = sbgroup.toString(); } else { replacement = replacementFirst + handledStr + replacementTail; } m.appendReplacement(sbuf, replacement); } // Put in the remainder of the text: m.appendTail(sbuf); return sbuf.toString(); // return null; }
From source file:it.drwolf.ridire.session.CrawlerManager.java
private long getURICount(Job job, String whichCount, User currentUser) throws IOException, HeritrixException, DocumentException, XPathExpressionException, SAXException { // this.updateJobsList(currentUser); Pattern pURICount = Pattern.compile( "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)", Pattern.MULTILINE); String jobName = job.getName(); Job j = this.getPersistedJob(jobName); if (j == null) { return 0L; }/*from w ww .j ava 2s . com*/ if (job.getChildJobName() != null && job.getChildJobName().length() > 0) { jobName = job.getChildJobName(); } String dir = this.entityManager.find(Parameter.class, Parameter.JOBS_DIR.getKey()).getValue(); long uriCountFromCrawlReport = 0L; long queuedURICount = 0L; long discoveredURICount = 0L; HttpMethod method = null; String jobStatus = this.getJobStatus(jobName); // jobName = jobName.replaceAll(" ", "\\\\ "); try { while (true) { if (jobStatus.equals(CrawlStatus.RUNNING.toString())) { RandomAccessFile progressStatistics = null; try { progressStatistics = new RandomAccessFile(this.jobsDir + CrawlerManager.FILE_SEPARATOR + jobName + CrawlerManager.FILE_SEPARATOR + "logs" + CrawlerManager.FILE_SEPARATOR + "progress-statistics.log", "r"); if (progressStatistics != null) { progressStatistics.seek(Math.max(0, progressStatistics.length() - 3000)); String line = progressStatistics.readLine(); StringBuffer buffer = new StringBuffer(); while (line != null) { buffer.append(line + "\n"); line = progressStatistics.readLine(); } String progressStatisticsContent = buffer.toString(); Matcher m = pURICount.matcher(progressStatisticsContent); int start = 0; long queuedURICountTemp = 0L; long discoveredURICountTemp = 0L; long uriCountFromCrawlReportTemp = 0L; while (m.find(start)) { start = m.end(); queuedURICountTemp = Long.parseLong(m.group(2)); discoveredURICountTemp = Long.parseLong(m.group(1)); uriCountFromCrawlReportTemp = Long.parseLong(m.group(3)); } queuedURICount += queuedURICountTemp; discoveredURICount = discoveredURICountTemp; uriCountFromCrawlReport = uriCountFromCrawlReportTemp; } } catch (FileNotFoundException e) { // TODO: handle exception } finally { if (progressStatistics != null) { progressStatistics.close(); } } break; } else if (whichCount.equalsIgnoreCase("finishedURICount")) { File reportFile = new File( dir + CrawlerManager.FILE_SEPARATOR + jobName + CrawlerManager.FILE_SEPARATOR + "reports" + CrawlerManager.FILE_SEPARATOR + "crawl-report.txt"); if (reportFile.exists() && reportFile.canRead()) { String content = FileUtils.readFileToString(reportFile); Matcher m = CrawlerManager.pFinishedURICount.matcher(content); if (m.find()) { String bytes = m.group(1); uriCountFromCrawlReport += Long.parseLong(bytes); } } Matcher m = CrawlerManager.childJobPattern.matcher(jobName); if (m.matches()) { Integer count = Integer.parseInt(m.group(1)); if (count > 1) { count--; jobName = jobName.substring(0, jobName.indexOf("__")) + "__" + count; } else if (count == 1) { jobName = jobName.substring(0, jobName.indexOf("__")); } else { break; } } else { break; } } else { return 0L; } } } finally { if (method != null) { method.releaseConnection(); } } if (whichCount.equals("discoveredUriCount")) { return discoveredURICount; } if (whichCount.equals("queuedUriCount")) { return queuedURICount; } return uriCountFromCrawlReport; }
From source file:com.timrae.rikaidroid.MainActivity.java
/** * Add the reading to the kanji as Ruby furigana, ensuring that there is only furigana above * the kanji, not above any hiragana included in the word. * @param kanji a word in kanji// www .j av a 2 s . c o m * @param reading the hiragana reading for the word * @return a String with the reading correctly added to the kanji as Ruby */ private String makeFurigana(String kanji, String reading) { Matcher kanaMatcher = KANA_REGEXP.matcher(kanji); // All characeters are kanji; simple replacement will work if (!kanaMatcher.find()) { return String.format(RUBY, kanji, reading); } // Strip off any kana from the beginning of the word StringBuilder output = new StringBuilder(); if (kanaMatcher.start() == 0) { String prefix = kanaMatcher.group(); kanji = kanji.substring(prefix.length()); reading = reading.substring(prefix.length()); output.append(prefix); kanaMatcher = KANA_REGEXP.matcher(kanji); } else { kanaMatcher.reset(); } // Keep track of number of kana added to output to see if the algorithm was successful int numKana = output.length(); // Now step through each kanji int lastKanaEnd = 0; int lastReadingKanaEnd = 0; while (kanaMatcher.find()) { // Find the next kana in the kanji string int kanaStart = kanaMatcher.start(); String currentKana = kanaMatcher.group(); // Extract the kanji in-between the current kana and the previous kana String currentKanji = kanji.substring(lastKanaEnd, kanaStart); // Set the end index of current kana in kanji string for next loop iteration lastKanaEnd = kanaMatcher.end(); // Find the current kana in the reading string // Not perfect. Here we take the first occurrence at least number of kanji after the last kana int readingKanaStart = reading.indexOf(currentKana, lastReadingKanaEnd + currentKanji.length()); // Extract the reading in-between the kana found in the kanji this time and last time String currentReading = reading.substring(lastReadingKanaEnd, readingKanaStart); // Set the end index of current kana in reading string for next loop iteration lastReadingKanaEnd = readingKanaStart + currentKana.length(); // Append current kanji and reading to the StringBuilder as furigana output.append(String.format(RUBY, currentKanji, currentReading)); // Append the current kana to the StringBuilder (outside the furigana) output.append(currentKana); // Keep track of number of kana addded to see if the algorithm was successful numKana += currentReading.length() + currentKana.length(); } // Add any kanji / reading at the end of the string to the builder if (lastKanaEnd < kanji.length()) { String currentKanji = kanji.substring(lastKanaEnd + 1); String currentReading = reading.substring(lastReadingKanaEnd + 1); output.append(String.format(RUBY, currentKanji, currentReading)); numKana += currentReading.length(); } // Do sanity check, returning naiive substitution if it failed if (numKana < reading.length()) { return String.format(RUBY, kanji, reading); } return output.toString().trim(); }
From source file:com.dwdesign.tweetings.activity.ComposeActivity.java
private final void gatherLinks(ArrayList<Hyperlink> links, Spannable s, Pattern pattern) { // Matcher matching the pattern Matcher m = pattern.matcher(s); while (m.find()) { int start = m.start(); int end = m.end(); /*/* w w w .j a v a 2 s. c o m*/ * Hyperlink is basically used like a structure for storing the information about * where the link was found. */ Hyperlink spec = new Hyperlink(); spec.textSpan = s.subSequence(start, end); spec.span = new InternalURLSpan(spec.textSpan.toString()); spec.start = start; spec.end = end; links.add(spec); } }
From source file:net.sf.jabref.wizard.auximport.AuxSubGenerator.java
/** * parseAuxFile read the Aux file and fill up some intern data structures. Nested aux files (latex \\include) * supported!//w w w. j a v a 2 s .c om * * @param filename String : Path to LatexAuxFile * @return boolean, true = no error occurs */ // found at comp.text.tex // > Can anyone tell be the information held within a .aux file? Is there a // > specific format to this file? // // I don't think there is a particular format. Every package, class // or document can write to the aux file. The aux file consists of LaTeX macros // and is read at the \begin{document} and again at the \end{document}. // // It usually contains information about existing labels // \\newlabel{sec:Intro}{{1}{1}} // and citations // \citation{hiri:conv:1993} // and macros to write information to other files (like toc, lof or lot files) // \@writefile{toc}{\contentsline {section}{\numberline // {1}Intro}{1}} // but as I said, there can be a lot more // aux file : // // \\citation{x} x = used reference of bibtex library entry // // \\@input{x} x = nested aux file // // the \\bibdata{x} directive contains information about the // bibtex library file -> x = name of bib file // // \\bibcite{x}{y} // x is a label for an item and y is the index in bibliography private boolean parseAuxFile(String filename) { // regular expressions Matcher matcher; // while condition boolean cont; // return value -> default: no error boolean back = true; // file list, used for nested aux files List<String> fileList = new ArrayList<>(5); fileList.add(filename); // get the file path File dummy = new File(filename); String path = dummy.getParent(); if (path == null) { path = ""; } else { path = path + File.separator; } nestedAuxCounter = -1; // count only the nested reads // index of current file in list int fileIndex = 0; while (fileIndex < fileList.size()) { String fName = fileList.get(fileIndex); try (BufferedReader br = new BufferedReader(new FileReader(fName))) { cont = true; while (cont) { Optional<String> maybeLine; try { maybeLine = Optional.ofNullable(br.readLine()); } catch (IOException ioe) { maybeLine = Optional.empty(); } if (maybeLine.isPresent()) { String line = maybeLine.get(); matcher = TAG_PATTERN.matcher(line); while (matcher.find()) { // extract the bibtex-key(s) XXX from \citation{XXX} string int len = matcher.end() - matcher.start(); if (len > 11) { String str = matcher.group(2); // could be an comma separated list of keys String[] keys = str.split(","); if (keys != null) { for (String dummyStr : keys) { if (dummyStr != null) { // delete all unnecessary blanks and save key into an set mySet.add(dummyStr.trim()); } } } } } // try to find a nested aux file int index = line.indexOf("\\@input{"); if (index >= 0) { int start = index + 8; int end = line.indexOf('}', start); if (end > start) { String str = path + line.substring(index + 8, end); // if filename already in file list if (!fileList.contains(str)) { fileList.add(str); // insert file into file list } } } } else { cont = false; } } nestedAuxCounter++; } catch (FileNotFoundException e) { LOGGER.info("Cannot locate input file!", e); } catch (IOException e) { LOGGER.warn("Problem opening file!", e); } fileIndex++; // load next file } return back; }
From source file:com.manydesigns.portofino.pageactions.text.TextAction.java
protected String processLocalUrls(String content) { List<String> hosts = new ArrayList<String>(); hosts.add(context.getRequest().getLocalAddr()); hosts.add(context.getRequest().getLocalName()); hosts.addAll(portofinoConfiguration.getList(PortofinoProperties.HOSTNAMES)); String patternString = BASE_USER_URL_PATTERN.replace("HOSTS", "(" + StringUtils.join(hosts, ")|(") + ")"); Pattern pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(content); int lastEnd = 0; StringBuilder sb = new StringBuilder(); String contextPath = context.getRequest().getContextPath(); while (matcher.find()) { String attribute = matcher.group(1); String path = matcher.group(8 + hosts.size()); assert path.startsWith("/"); String queryString = matcher.group(10 + hosts.size()); String hostAndPort = matcher.group(5); if (!StringUtils.isBlank(hostAndPort) && !path.startsWith(contextPath)) { logger.debug("Path refers to another web application on the same host, skipping: {}", path); continue; }/*ww w .j av a2 s . c o m*/ sb.append(content.substring(lastEnd, matcher.start())); sb.append("portofino:hrefAttribute=\"").append(attribute).append("\""); if (path.startsWith(contextPath)) { path = path.substring(contextPath.length()); } //path = convertPathToInternalLink(path); sb.append(" portofino:link=\"").append(path).append("\""); if (!StringUtils.isBlank(queryString)) { sb.append(" portofino:queryString=\"").append(queryString).append("\""); } lastEnd = matcher.end(); } sb.append(content.substring(lastEnd)); return sb.toString(); }
From source file:au.org.ala.names.search.ALANameSearcher.java
/** * Update the rank for the name based on it containing rank strings. * Provides a bit of a sanity check on the name matching. If we expect a * species we don't want to match on a genus * * @param name/* w w w. ja va 2 s . c o m*/ * @param rank */ private RankType getUpdatedRank(String name, RankType rank) { Matcher matcher = RANK_MARKER.matcher(name); if (matcher.find()) { String value = name.substring(matcher.start(), matcher.end()); log.debug("Changing rank to : " + value); if (value.endsWith(".")) rank = RankType.getForCBRank(Rank.RANK_MARKER_MAP.get(value.substring(1, value.length() - 1))); log.debug("Using the new rank " + rank); } return rank; }
From source file:fr.gouv.culture.thesaurus.service.impl.SesameThesaurus.java
/** * Abrge le libell en ne renvoyant que la premire occurrence du texte * trouv, avec le contexte et en surlignant les termes trouvs. Si aucune * occurrence n'a t trouve, renvoie la premire partie du libell. * /*www. j av a 2s. co m*/ * @param matchingLabel * Libell correspondant la requte * @param queryPattern * Requte d'origine sous forme d'expression rgulire * @return Premire occurrence du texte trouv avec le contexte et le * surlignage en HTML */ private String abbreviateAndHighlightMatchingLabel(String matchingLabel, Pattern queryPattern) { final Matcher matcher = queryPattern.matcher(matchingLabel); final int maxDescriptionLength = configuration.getMatchingLabelFirstOccurrenceWidth(); String abbreviatedVersion; if (matcher.find()) { final int contextMaxLength = configuration.getMatchingLabelContextLength(); final int highlightMaxLength = maxDescriptionLength - 2 * contextMaxLength; if (highlightMaxLength < 1) { throw new IllegalArgumentException( "Invalid configuration: the occurrence width is not long enough to hold the highlighted part and the context."); } abbreviatedVersion = TextUtils.htmlHighlightOccurrence(matchingLabel, matcher.start(), matcher.end(), highlightMaxLength, contextMaxLength, "<em>", "</em>"); } else { /* * Pour une certaine raison, les termes trouvs par la recherche ne * sont pas localisables dans le texte trait avec Java. On renvoie * alors le dbut du libell correspondant. */ abbreviatedVersion = StringEscapeUtils .escapeHtml4(TextUtils.leftAbbreviateOnWords(matchingLabel, maxDescriptionLength)); } return abbreviatedVersion; }