Example usage for java.util.regex Matcher end

List of usage examples for java.util.regex Matcher end

Introduction

In this page you can find the example usage for java.util.regex Matcher end.

Prototype

public int end() 

Source Link

Document

Returns the offset after the last character matched.

Usage

From source file:com.g3net.tool.StringUtils.java

/**
 * src?regex??????? ?(handler)/*from  w w  w. j av a  2s. c o m*/
 * ?????
 * 
 * @param src
 * @param regex
 *            ??:&(\\d+;)([a-z)+)
 * @param handleGroupIndex
 *            ???
 * @param hander
 *            ?
 * @param reservesGroups
 *            ???,?hander?handleGroupIndex
 *            ?reservesGroups?hander?regex? 
 * @return
 */
public static String replaceAll(String src, String regex, int handleGroupIndex, GroupHandler hander,
        int[] reservesGroups) {

    if (src == null || src.trim().length() == 0) {
        return "";
    }
    Matcher m = Pattern.compile(regex).matcher(src);

    StringBuffer sbuf = new StringBuffer();
    String replacementFirst = "";
    String replacementTail = "";
    if (reservesGroups != null && reservesGroups.length > 0) {
        Arrays.sort(reservesGroups);
        for (int i = 0; i < reservesGroups.length; i++) {
            if (reservesGroups[i] < handleGroupIndex) {
                replacementFirst = replacementFirst + "$" + reservesGroups[i];
            } else {
                replacementTail = replacementTail + "$" + reservesGroups[i];
            }
        }
    }

    // perform the replacements:
    while (m.find()) {
        String value = m.group(handleGroupIndex);

        String group = m.group();

        String handledStr = hander.handler(value);
        String replacement = "";
        if (reservesGroups == null) {
            int start0 = m.start();
            int end0 = m.end();
            int start = m.start(handleGroupIndex);
            int end = m.end(handleGroupIndex);
            int relativeStart = start - start0;
            int relativeEnd = end - start0;
            StringBuilder sbgroup = new StringBuilder(group);
            sbgroup = sbgroup.replace(relativeStart, relativeEnd, handledStr);
            replacement = sbgroup.toString();
        } else {
            replacement = replacementFirst + handledStr + replacementTail;
        }

        m.appendReplacement(sbuf, replacement);

    }
    // Put in the remainder of the text:
    m.appendTail(sbuf);
    return sbuf.toString();
    // return null;
}

From source file:it.drwolf.ridire.session.CrawlerManager.java

private long getURICount(Job job, String whichCount, User currentUser)
        throws IOException, HeritrixException, DocumentException, XPathExpressionException, SAXException {
    // this.updateJobsList(currentUser);
    Pattern pURICount = Pattern.compile(
            "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)", Pattern.MULTILINE);
    String jobName = job.getName();
    Job j = this.getPersistedJob(jobName);
    if (j == null) {
        return 0L;
    }/*from w  ww .j ava 2s .  com*/
    if (job.getChildJobName() != null && job.getChildJobName().length() > 0) {
        jobName = job.getChildJobName();
    }
    String dir = this.entityManager.find(Parameter.class, Parameter.JOBS_DIR.getKey()).getValue();
    long uriCountFromCrawlReport = 0L;
    long queuedURICount = 0L;
    long discoveredURICount = 0L;
    HttpMethod method = null;
    String jobStatus = this.getJobStatus(jobName);
    // jobName = jobName.replaceAll(" ", "\\\\ ");
    try {
        while (true) {
            if (jobStatus.equals(CrawlStatus.RUNNING.toString())) {
                RandomAccessFile progressStatistics = null;
                try {
                    progressStatistics = new RandomAccessFile(this.jobsDir + CrawlerManager.FILE_SEPARATOR
                            + jobName + CrawlerManager.FILE_SEPARATOR + "logs" + CrawlerManager.FILE_SEPARATOR
                            + "progress-statistics.log", "r");
                    if (progressStatistics != null) {
                        progressStatistics.seek(Math.max(0, progressStatistics.length() - 3000));
                        String line = progressStatistics.readLine();
                        StringBuffer buffer = new StringBuffer();
                        while (line != null) {
                            buffer.append(line + "\n");
                            line = progressStatistics.readLine();
                        }
                        String progressStatisticsContent = buffer.toString();
                        Matcher m = pURICount.matcher(progressStatisticsContent);
                        int start = 0;
                        long queuedURICountTemp = 0L;
                        long discoveredURICountTemp = 0L;
                        long uriCountFromCrawlReportTemp = 0L;
                        while (m.find(start)) {
                            start = m.end();
                            queuedURICountTemp = Long.parseLong(m.group(2));
                            discoveredURICountTemp = Long.parseLong(m.group(1));
                            uriCountFromCrawlReportTemp = Long.parseLong(m.group(3));
                        }
                        queuedURICount += queuedURICountTemp;
                        discoveredURICount = discoveredURICountTemp;
                        uriCountFromCrawlReport = uriCountFromCrawlReportTemp;
                    }
                } catch (FileNotFoundException e) {
                    // TODO: handle exception
                } finally {
                    if (progressStatistics != null) {
                        progressStatistics.close();
                    }
                }
                break;
            } else if (whichCount.equalsIgnoreCase("finishedURICount")) {
                File reportFile = new File(
                        dir + CrawlerManager.FILE_SEPARATOR + jobName + CrawlerManager.FILE_SEPARATOR
                                + "reports" + CrawlerManager.FILE_SEPARATOR + "crawl-report.txt");
                if (reportFile.exists() && reportFile.canRead()) {
                    String content = FileUtils.readFileToString(reportFile);
                    Matcher m = CrawlerManager.pFinishedURICount.matcher(content);
                    if (m.find()) {
                        String bytes = m.group(1);
                        uriCountFromCrawlReport += Long.parseLong(bytes);
                    }
                }
                Matcher m = CrawlerManager.childJobPattern.matcher(jobName);
                if (m.matches()) {
                    Integer count = Integer.parseInt(m.group(1));
                    if (count > 1) {
                        count--;
                        jobName = jobName.substring(0, jobName.indexOf("__")) + "__" + count;
                    } else if (count == 1) {
                        jobName = jobName.substring(0, jobName.indexOf("__"));
                    } else {
                        break;
                    }
                } else {
                    break;
                }
            } else {
                return 0L;
            }
        }
    } finally {
        if (method != null) {
            method.releaseConnection();
        }
    }
    if (whichCount.equals("discoveredUriCount")) {
        return discoveredURICount;
    }
    if (whichCount.equals("queuedUriCount")) {
        return queuedURICount;
    }
    return uriCountFromCrawlReport;
}

From source file:com.timrae.rikaidroid.MainActivity.java

/**
 * Add the reading to the kanji as Ruby furigana, ensuring that there is only furigana above
 * the kanji, not above any hiragana included in the word.
 * @param kanji a word in kanji// www .j av  a  2 s . c  o m
 * @param reading the hiragana reading for the word
 * @return a String with the reading correctly added to the kanji as Ruby
 */
private String makeFurigana(String kanji, String reading) {
    Matcher kanaMatcher = KANA_REGEXP.matcher(kanji);
    // All characeters are kanji; simple replacement will work
    if (!kanaMatcher.find()) {
        return String.format(RUBY, kanji, reading);
    }
    // Strip off any kana from the beginning of the word
    StringBuilder output = new StringBuilder();
    if (kanaMatcher.start() == 0) {
        String prefix = kanaMatcher.group();
        kanji = kanji.substring(prefix.length());
        reading = reading.substring(prefix.length());
        output.append(prefix);
        kanaMatcher = KANA_REGEXP.matcher(kanji);
    } else {
        kanaMatcher.reset();
    }
    // Keep track of number of kana added to output to see if the algorithm was successful
    int numKana = output.length();
    // Now step through each kanji
    int lastKanaEnd = 0;
    int lastReadingKanaEnd = 0;
    while (kanaMatcher.find()) {
        // Find the next kana in the kanji string
        int kanaStart = kanaMatcher.start();
        String currentKana = kanaMatcher.group();
        // Extract the kanji in-between the current kana and the previous kana
        String currentKanji = kanji.substring(lastKanaEnd, kanaStart);
        // Set the end index of current kana in kanji string for next loop iteration
        lastKanaEnd = kanaMatcher.end();
        // Find the current kana in the reading string
        // Not perfect. Here we take the first occurrence at least number of kanji after the last kana
        int readingKanaStart = reading.indexOf(currentKana, lastReadingKanaEnd + currentKanji.length());
        // Extract the reading in-between the kana found in the kanji this time and last time
        String currentReading = reading.substring(lastReadingKanaEnd, readingKanaStart);
        // Set the end index of current kana in reading string for next loop iteration
        lastReadingKanaEnd = readingKanaStart + currentKana.length();
        // Append current kanji and reading to the StringBuilder as furigana
        output.append(String.format(RUBY, currentKanji, currentReading));
        // Append the current kana to the StringBuilder (outside the furigana)
        output.append(currentKana);
        // Keep track of number of kana addded to see if the algorithm was successful
        numKana += currentReading.length() + currentKana.length();
    }
    // Add any kanji / reading at the end of the string to the builder
    if (lastKanaEnd < kanji.length()) {
        String currentKanji = kanji.substring(lastKanaEnd + 1);
        String currentReading = reading.substring(lastReadingKanaEnd + 1);
        output.append(String.format(RUBY, currentKanji, currentReading));
        numKana += currentReading.length();
    }
    // Do sanity check, returning naiive substitution if it failed
    if (numKana < reading.length()) {
        return String.format(RUBY, kanji, reading);
    }
    return output.toString().trim();
}

From source file:com.dwdesign.tweetings.activity.ComposeActivity.java

private final void gatherLinks(ArrayList<Hyperlink> links, Spannable s, Pattern pattern) {
    // Matcher matching the pattern
    Matcher m = pattern.matcher(s);

    while (m.find()) {
        int start = m.start();
        int end = m.end();

        /*/* w  w  w .j a v a  2  s.  c o m*/
        *  Hyperlink is basically used like a structure for storing the information about
        *  where the link was found.
        */
        Hyperlink spec = new Hyperlink();

        spec.textSpan = s.subSequence(start, end);
        spec.span = new InternalURLSpan(spec.textSpan.toString());
        spec.start = start;
        spec.end = end;

        links.add(spec);
    }
}

From source file:net.sf.jabref.wizard.auximport.AuxSubGenerator.java

/**
 * parseAuxFile read the Aux file and fill up some intern data structures. Nested aux files (latex \\include)
 * supported!//w w  w.  j a v  a 2 s .c om
 *
 * @param filename String : Path to LatexAuxFile
 * @return boolean, true = no error occurs
 */

// found at comp.text.tex
//  > Can anyone tell be the information held within a .aux file?  Is there a
//  > specific format to this file?
//
// I don't think there is a particular format. Every package, class
// or document can write to the aux file. The aux file consists of LaTeX macros
// and is read at the \begin{document} and again at the \end{document}.
//
// It usually contains information about existing labels
//  \\newlabel{sec:Intro}{{1}{1}}
// and citations
//  \citation{hiri:conv:1993}
// and macros to write information to other files (like toc, lof or lot files)
//  \@writefile{toc}{\contentsline {section}{\numberline
// {1}Intro}{1}}
// but as I said, there can be a lot more

// aux file :
//
// \\citation{x}  x = used reference of bibtex library entry
//
// \\@input{x}  x = nested aux file
//
// the \\bibdata{x} directive contains information about the
// bibtex library file -> x = name of bib file
//
// \\bibcite{x}{y}
//   x is a label for an item and y is the index in bibliography
private boolean parseAuxFile(String filename) {
    // regular expressions
    Matcher matcher;

    // while condition
    boolean cont;

    // return value -> default: no error
    boolean back = true;

    // file list, used for nested aux files
    List<String> fileList = new ArrayList<>(5);
    fileList.add(filename);

    // get the file path
    File dummy = new File(filename);
    String path = dummy.getParent();
    if (path == null) {
        path = "";
    } else {
        path = path + File.separator;
    }

    nestedAuxCounter = -1; // count only the nested reads

    // index of current file in list
    int fileIndex = 0;

    while (fileIndex < fileList.size()) {
        String fName = fileList.get(fileIndex);
        try (BufferedReader br = new BufferedReader(new FileReader(fName))) {
            cont = true;

            while (cont) {
                Optional<String> maybeLine;
                try {
                    maybeLine = Optional.ofNullable(br.readLine());
                } catch (IOException ioe) {
                    maybeLine = Optional.empty();
                }

                if (maybeLine.isPresent()) {
                    String line = maybeLine.get();
                    matcher = TAG_PATTERN.matcher(line);

                    while (matcher.find()) {
                        // extract the bibtex-key(s) XXX from \citation{XXX} string
                        int len = matcher.end() - matcher.start();
                        if (len > 11) {
                            String str = matcher.group(2);
                            // could be an comma separated list of keys
                            String[] keys = str.split(",");
                            if (keys != null) {
                                for (String dummyStr : keys) {
                                    if (dummyStr != null) {
                                        // delete all unnecessary blanks and save key into an set
                                        mySet.add(dummyStr.trim());
                                    }
                                }
                            }
                        }
                    }
                    // try to find a nested aux file
                    int index = line.indexOf("\\@input{");
                    if (index >= 0) {
                        int start = index + 8;
                        int end = line.indexOf('}', start);
                        if (end > start) {
                            String str = path + line.substring(index + 8, end);

                            // if filename already in file list
                            if (!fileList.contains(str)) {
                                fileList.add(str); // insert file into file list
                            }
                        }
                    }
                } else {
                    cont = false;
                }
            }
            nestedAuxCounter++;
        } catch (FileNotFoundException e) {
            LOGGER.info("Cannot locate input file!", e);
        } catch (IOException e) {
            LOGGER.warn("Problem opening file!", e);
        }

        fileIndex++; // load next file
    }

    return back;
}

From source file:com.manydesigns.portofino.pageactions.text.TextAction.java

protected String processLocalUrls(String content) {
    List<String> hosts = new ArrayList<String>();
    hosts.add(context.getRequest().getLocalAddr());
    hosts.add(context.getRequest().getLocalName());
    hosts.addAll(portofinoConfiguration.getList(PortofinoProperties.HOSTNAMES));
    String patternString = BASE_USER_URL_PATTERN.replace("HOSTS", "(" + StringUtils.join(hosts, ")|(") + ")");
    Pattern pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
    Matcher matcher = pattern.matcher(content);
    int lastEnd = 0;
    StringBuilder sb = new StringBuilder();
    String contextPath = context.getRequest().getContextPath();
    while (matcher.find()) {
        String attribute = matcher.group(1);
        String path = matcher.group(8 + hosts.size());
        assert path.startsWith("/");
        String queryString = matcher.group(10 + hosts.size());
        String hostAndPort = matcher.group(5);
        if (!StringUtils.isBlank(hostAndPort) && !path.startsWith(contextPath)) {
            logger.debug("Path refers to another web application on the same host, skipping: {}", path);
            continue;
        }/*ww w .j  av a2  s  . c  o  m*/

        sb.append(content.substring(lastEnd, matcher.start()));
        sb.append("portofino:hrefAttribute=\"").append(attribute).append("\"");

        if (path.startsWith(contextPath)) {
            path = path.substring(contextPath.length());
        }

        //path = convertPathToInternalLink(path);
        sb.append(" portofino:link=\"").append(path).append("\"");
        if (!StringUtils.isBlank(queryString)) {
            sb.append(" portofino:queryString=\"").append(queryString).append("\"");
        }

        lastEnd = matcher.end();
    }

    sb.append(content.substring(lastEnd));

    return sb.toString();
}

From source file:au.org.ala.names.search.ALANameSearcher.java

/**
 * Update the rank for the name based on it containing rank strings.
 * Provides a bit of a sanity check on the name matching.  If we expect a
 * species we don't want to match on a genus
 *
 * @param name/* w  w w. ja va 2  s  . c  o  m*/
 * @param rank
 */
private RankType getUpdatedRank(String name, RankType rank) {
    Matcher matcher = RANK_MARKER.matcher(name);

    if (matcher.find()) {
        String value = name.substring(matcher.start(), matcher.end());
        log.debug("Changing rank to : " + value);
        if (value.endsWith("."))
            rank = RankType.getForCBRank(Rank.RANK_MARKER_MAP.get(value.substring(1, value.length() - 1)));
        log.debug("Using the new rank " + rank);
    }
    return rank;
}

From source file:fr.gouv.culture.thesaurus.service.impl.SesameThesaurus.java

/**
 * Abrge le libell en ne renvoyant que la premire occurrence du texte
 * trouv, avec le contexte et en surlignant les termes trouvs. Si aucune
 * occurrence n'a t trouve, renvoie la premire partie du libell.
 * /*www. j  av a  2s.  co  m*/
 * @param matchingLabel
 *            Libell correspondant  la requte
 * @param queryPattern
 *            Requte d'origine sous forme d'expression rgulire
 * @return Premire occurrence du texte trouv avec le contexte et le
 *         surlignage en HTML
 */
private String abbreviateAndHighlightMatchingLabel(String matchingLabel, Pattern queryPattern) {
    final Matcher matcher = queryPattern.matcher(matchingLabel);
    final int maxDescriptionLength = configuration.getMatchingLabelFirstOccurrenceWidth();
    String abbreviatedVersion;

    if (matcher.find()) {
        final int contextMaxLength = configuration.getMatchingLabelContextLength();
        final int highlightMaxLength = maxDescriptionLength - 2 * contextMaxLength;
        if (highlightMaxLength < 1) {
            throw new IllegalArgumentException(
                    "Invalid configuration: the occurrence width is not long enough to hold the highlighted part and the context.");
        }

        abbreviatedVersion = TextUtils.htmlHighlightOccurrence(matchingLabel, matcher.start(), matcher.end(),
                highlightMaxLength, contextMaxLength, "<em>", "</em>");
    } else {
        /*
         * Pour une certaine raison, les termes trouvs par la recherche ne
         * sont pas localisables dans le texte trait avec Java. On renvoie
         * alors le dbut du libell correspondant.
         */
        abbreviatedVersion = StringEscapeUtils
                .escapeHtml4(TextUtils.leftAbbreviateOnWords(matchingLabel, maxDescriptionLength));
    }

    return abbreviatedVersion;
}