Example usage for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start()

Source Link

Document

Returns the start index of the previous match.

Usage

From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java

/**
 * processHeader/*from  www .java2  s .co  m*/
 * 
 * @param headerPattern
 * @param f
 * @param meta
 */
@SuppressWarnings("deprecation")
private void processHeader(Pattern headerPattern, DocumentPojo f, List<metaField> meta, SourcePojo source,
        UnstructuredAnalysisConfigPojo uap) {
    if (headerPattern != null) {
        Matcher headerMatcher = headerPattern.matcher(f.getFullText());
        String headerText = null;
        while (headerMatcher.find()) {
            if (headerMatcher.start() == 0) {
                headerText = headerMatcher.group(0);
                f.setHeaderEndIndex(headerText.length());
                for (int i = 1; i < headerMatcher.groupCount() + 1; i++) {
                    f.addToHeader(headerMatcher.group(i).trim());
                }
                break;
            }
        }
        if (null != headerText && null != meta) {
            for (metaField m : meta) {
                if (m.context == Context.Header || m.context == Context.All) {
                    this.processMeta(f, m, headerText, source, uap);
                }
            }
        }
    }
}

From source file:com.dwdesign.tweetings.activity.ComposeActivity.java

private final void gatherLinks(ArrayList<Hyperlink> links, Spannable s, Pattern pattern) {
    // Matcher matching the pattern
    Matcher m = pattern.matcher(s);

    while (m.find()) {
        int start = m.start();
        int end = m.end();

        /*/*from   ww w  .  j  a v  a  2s .c  o m*/
        *  Hyperlink is basically used like a structure for storing the information about
        *  where the link was found.
        */
        Hyperlink spec = new Hyperlink();

        spec.textSpan = s.subSequence(start, end);
        spec.span = new InternalURLSpan(spec.textSpan.toString());
        spec.start = start;
        spec.end = end;

        links.add(spec);
    }
}

From source file:net.yacy.cora.document.id.MultiProtocolURL.java

/**
 * Resolve '..' segments in the path.// w  ww . jav a2s .  c  o m
 * For standard pseudo algorithms, see :
 * <ul>
 * <li>https://tools.ietf.org/html/rfc3986#section-5.2.4</li>
 * <li>https://url.spec.whatwg.org/#path-state</li>
 * <li>https://www.w3.org/TR/url/#relative-path-state</li>
 * </ul>
 * @param path URL path part : must not be null
 * @return the path with '..' segments resolved
 */
private static final String resolveBackpath(final String path) {
    String p = path;
    if (p.isEmpty() || p.charAt(0) != '/') {
        p = "/" + p;
    }
    final Matcher qm = CommonPattern.QUESTION.matcher(p); // do not resolve backpaths in the post values
    final int end = qm.find() ? qm.start() : p.length();
    final Matcher matcher = backPathPattern.matcher(p);
    while (matcher.find()) {
        if (matcher.start() > end)
            break;
        p = matcher.replaceAll("");
        matcher.reset(p);
    }
    /* Let's remove any eventual remaining but inappropriate '..' segments at the beginning. 
     * See https://tools.ietf.org/html/rfc3986#section-5.2.4 -> parts 2.C and 2.D */
    while (p.startsWith("/../")) {
        p = p.substring(3);
    }
    if (p.equals("/..")) {
        p = "/";
    }
    return p.equals("") ? "/" : p;
}

From source file:net.sf.jabref.wizard.auximport.AuxSubGenerator.java

/**
 * parseAuxFile read the Aux file and fill up some intern data structures. Nested aux files (latex \\include)
 * supported!/*  www .j a va 2  s  . c  om*/
 *
 * @param filename String : Path to LatexAuxFile
 * @return boolean, true = no error occurs
 */

// found at comp.text.tex
//  > Can anyone tell be the information held within a .aux file?  Is there a
//  > specific format to this file?
//
// I don't think there is a particular format. Every package, class
// or document can write to the aux file. The aux file consists of LaTeX macros
// and is read at the \begin{document} and again at the \end{document}.
//
// It usually contains information about existing labels
//  \\newlabel{sec:Intro}{{1}{1}}
// and citations
//  \citation{hiri:conv:1993}
// and macros to write information to other files (like toc, lof or lot files)
//  \@writefile{toc}{\contentsline {section}{\numberline
// {1}Intro}{1}}
// but as I said, there can be a lot more

// aux file :
//
// \\citation{x}  x = used reference of bibtex library entry
//
// \\@input{x}  x = nested aux file
//
// the \\bibdata{x} directive contains information about the
// bibtex library file -> x = name of bib file
//
// \\bibcite{x}{y}
//   x is a label for an item and y is the index in bibliography
private boolean parseAuxFile(String filename) {
    // regular expressions
    Matcher matcher;

    // while condition
    boolean cont;

    // return value -> default: no error
    boolean back = true;

    // file list, used for nested aux files
    List<String> fileList = new ArrayList<>(5);
    fileList.add(filename);

    // get the file path
    File dummy = new File(filename);
    String path = dummy.getParent();
    if (path == null) {
        path = "";
    } else {
        path = path + File.separator;
    }

    nestedAuxCounter = -1; // count only the nested reads

    // index of current file in list
    int fileIndex = 0;

    while (fileIndex < fileList.size()) {
        String fName = fileList.get(fileIndex);
        try (BufferedReader br = new BufferedReader(new FileReader(fName))) {
            cont = true;

            while (cont) {
                Optional<String> maybeLine;
                try {
                    maybeLine = Optional.ofNullable(br.readLine());
                } catch (IOException ioe) {
                    maybeLine = Optional.empty();
                }

                if (maybeLine.isPresent()) {
                    String line = maybeLine.get();
                    matcher = TAG_PATTERN.matcher(line);

                    while (matcher.find()) {
                        // extract the bibtex-key(s) XXX from \citation{XXX} string
                        int len = matcher.end() - matcher.start();
                        if (len > 11) {
                            String str = matcher.group(2);
                            // could be an comma separated list of keys
                            String[] keys = str.split(",");
                            if (keys != null) {
                                for (String dummyStr : keys) {
                                    if (dummyStr != null) {
                                        // delete all unnecessary blanks and save key into an set
                                        mySet.add(dummyStr.trim());
                                    }
                                }
                            }
                        }
                    }
                    // try to find a nested aux file
                    int index = line.indexOf("\\@input{");
                    if (index >= 0) {
                        int start = index + 8;
                        int end = line.indexOf('}', start);
                        if (end > start) {
                            String str = path + line.substring(index + 8, end);

                            // if filename already in file list
                            if (!fileList.contains(str)) {
                                fileList.add(str); // insert file into file list
                            }
                        }
                    }
                } else {
                    cont = false;
                }
            }
            nestedAuxCounter++;
        } catch (FileNotFoundException e) {
            LOGGER.info("Cannot locate input file!", e);
        } catch (IOException e) {
            LOGGER.warn("Problem opening file!", e);
        }

        fileIndex++; // load next file
    }

    return back;
}

From source file:au.org.ala.names.search.ALANameSearcher.java

/**
 * Update the rank for the name based on it containing rank strings.
 * Provides a bit of a sanity check on the name matching.  If we expect a
 * species we don't want to match on a genus
 *
 * @param name/*from  w w  w  .  j  av a 2s  . co m*/
 * @param rank
 */
private RankType getUpdatedRank(String name, RankType rank) {
    Matcher matcher = RANK_MARKER.matcher(name);

    if (matcher.find()) {
        String value = name.substring(matcher.start(), matcher.end());
        log.debug("Changing rank to : " + value);
        if (value.endsWith("."))
            rank = RankType.getForCBRank(Rank.RANK_MARKER_MAP.get(value.substring(1, value.length() - 1)));
        log.debug("Using the new rank " + rank);
    }
    return rank;
}

From source file:fr.gouv.culture.thesaurus.service.impl.SesameThesaurus.java

/**
 * Abrge le libell en ne renvoyant que la premire occurrence du texte
 * trouv, avec le contexte et en surlignant les termes trouvs. Si aucune
 * occurrence n'a t trouve, renvoie la premire partie du libell.
 * //from   w w  w .  j  a  v  a2 s  .co  m
 * @param matchingLabel
 *            Libell correspondant  la requte
 * @param queryPattern
 *            Requte d'origine sous forme d'expression rgulire
 * @return Premire occurrence du texte trouv avec le contexte et le
 *         surlignage en HTML
 */
private String abbreviateAndHighlightMatchingLabel(String matchingLabel, Pattern queryPattern) {
    final Matcher matcher = queryPattern.matcher(matchingLabel);
    final int maxDescriptionLength = configuration.getMatchingLabelFirstOccurrenceWidth();
    String abbreviatedVersion;

    if (matcher.find()) {
        final int contextMaxLength = configuration.getMatchingLabelContextLength();
        final int highlightMaxLength = maxDescriptionLength - 2 * contextMaxLength;
        if (highlightMaxLength < 1) {
            throw new IllegalArgumentException(
                    "Invalid configuration: the occurrence width is not long enough to hold the highlighted part and the context.");
        }

        abbreviatedVersion = TextUtils.htmlHighlightOccurrence(matchingLabel, matcher.start(), matcher.end(),
                highlightMaxLength, contextMaxLength, "<em>", "</em>");
    } else {
        /*
         * Pour une certaine raison, les termes trouvs par la recherche ne
         * sont pas localisables dans le texte trait avec Java. On renvoie
         * alors le dbut du libell correspondant.
         */
        abbreviatedVersion = StringEscapeUtils
                .escapeHtml4(TextUtils.leftAbbreviateOnWords(matchingLabel, maxDescriptionLength));
    }

    return abbreviatedVersion;
}

From source file:tr.edu.gsu.nerwip.retrieval.reader.wikipedia.WikipediaReader.java

/**
 * Retrieces the category of the article
 * from its content, and more particularly
 * from its first sentence, which generally
 * takes the following form in biographies:
 * "Firstname Lastname (19xx-19xx) was/is a politician/artist/etc."
 * /*  ww w  . ja va2  s .  c  o  m*/
 * @param article
 *       Article to be processed.
 * @return
 *       The identified categories, possibly an empty list if none
 *       could be identified.
 */
public List<ArticleCategory> getArticleCategoriesFromContent(Article article) {
    logger.log("Using the article content to retrieve its categories");
    Set<ArticleCategory> categories = new TreeSet<ArticleCategory>();
    logger.increaseOffset();

    // get first sentence
    String text = article.getRawText();
    String firstSentence = null;
    Pattern pattern = Pattern.compile("[a-zA-Z0-9]{3,}\\. ");
    Matcher matcher = pattern.matcher(text);
    if (!matcher.find())
        logger.log("Could not find the first sentence of the article");
    else {
        int i = matcher.end();
        firstSentence = text.substring(0, i);
        logger.log("First sentence of the article: \"" + firstSentence + "\"");

        // identify state verb (to be)
        int index = firstSentence.length();
        String verb = null;
        for (String v : STATE_VERBS) {
            pattern = Pattern.compile("[^a-zA-Z0-9]" + v + "[^a-zA-Z0-9]");
            matcher = pattern.matcher(firstSentence);
            if (matcher.find()) {
                i = matcher.start();
                if (i > -1 && i < index) {
                    index = i;
                    verb = v;
                }
            }
        }
        if (verb == null)
            logger.log("WARNING: could not find any state verb in the first sentence");
        else {
            logger.log("State verb detected in the sentence: '" + verb + "'");

            // look for key words located in the second part of the sentence (after the verb)
            firstSentence = firstSentence.substring(index + verb.length());
            logger.log("Focusing on the end of the sentence: \"" + firstSentence + "\"");
            logger.increaseOffset();
            String temp[] = firstSentence.split("[^a-zA-Z0-9]");
            for (String key : temp) {
                if (!key.isEmpty()) {
                    ArticleCategory cat = CONTENT_CONVERSION_MAP.get(key);
                    if (cat == null) {
                        logger.log(key + ": no associated category");
                    } else {
                        categories.add(cat);
                        logger.log(key + ": category " + cat);
                    }
                }
            }
            logger.decreaseOffset();
        }
    }

    List<ArticleCategory> result = new ArrayList<ArticleCategory>(categories);
    Collections.sort(result);
    logger.decreaseOffset();
    logger.log("detected categories: " + result.toString());
    return result;
}

From source file:com.aliyun.odps.conf.Configuration.java

private String substituteVars(String expr) {
    if (expr == null) {
        return null;
    }//from www .jav a 2 s. c  o  m
    Matcher match = varPat.matcher("");
    String eval = expr;
    for (int s = 0; s < MAX_SUBST; s++) {
        match.reset(eval);
        if (!match.find()) {
            return eval;
        }
        String var = match.group();
        var = var.substring(2, var.length() - 1); // remove ${ .. }
        String val = null;
        try {
            val = System.getProperty(var);
        } catch (SecurityException se) {
            LOG.warn("No permission to get system property: " + var);
        }
        if (val == null) {
            val = getRaw(var);
        }
        if (val == null) {
            return eval; // return literal ${var}: var is unbound
        }
        // substitute
        eval = eval.substring(0, match.start()) + val + eval.substring(match.end());
    }
    throw new IllegalStateException("Variable substitution depth too large: " + MAX_SUBST + " " + expr);
}

From source file:cn.suishen.email.activity.MessageViewFragmentBase.java

/**
 * Reload the body from the provider cursor.  This must only be called from the UI thread.
 *
 * @param bodyText text part//from   ww w.  j  av a2s.c o m
 * @param bodyHtml html part
 *
 * TODO deal with html vs text and many other issues <- WHAT DOES IT MEAN??
 */
private void reloadUiFromBody(String bodyText, String bodyHtml, boolean autoShowPictures) {
    String text = null;
    mHtmlTextRaw = null;
    boolean hasImages = false;

    if (bodyHtml == null) {
        text = bodyText;
        /*
         * Convert the plain text to HTML
         */
        StringBuffer sb = new StringBuffer("<html><body>");
        if (text != null) {
            // Escape any inadvertent HTML in the text message
            text = EmailHtmlUtil.escapeCharacterToDisplay(text);
            // Find any embedded URL's and linkify
            Matcher m = Patterns.WEB_URL.matcher(text);
            while (m.find()) {
                int start = m.start();
                /*
                 * WEB_URL_PATTERN may match domain part of email address. To detect
                 * this false match, the character just before the matched string
                 * should not be '@'.
                 */
                if (start == 0 || text.charAt(start - 1) != '@') {
                    String url = m.group();
                    Matcher proto = WEB_URL_PROTOCOL.matcher(url);
                    String link;
                    if (proto.find()) {
                        // This is work around to force URL protocol part be lower case,
                        // because WebView could follow only lower case protocol link.
                        link = proto.group().toLowerCase() + url.substring(proto.end());
                    } else {
                        // Patterns.WEB_URL matches URL without protocol part,
                        // so added default protocol to link.
                        link = "http://" + url;
                    }
                    String href = String.format("<a href=\"%s\">%s</a>", link, url);
                    m.appendReplacement(sb, href);
                } else {
                    m.appendReplacement(sb, "$0");
                }
            }
            m.appendTail(sb);
        }
        sb.append("</body></html>");
        text = sb.toString();
    } else {
        text = bodyHtml;
        mHtmlTextRaw = bodyHtml;
        hasImages = IMG_TAG_START_REGEX.matcher(text).find();
    }

    // TODO this is not really accurate.
    // - Images aren't the only network resources.  (e.g. CSS)
    // - If images are attached to the email and small enough, we download them at once,
    //   and won't need network access when they're shown.
    if (hasImages) {
        if (mRestoredPictureLoaded || autoShowPictures) {
            blockNetworkLoads(false);
            addTabFlags(TAB_FLAGS_PICTURE_LOADED); // Set for next onSaveInstanceState

            // Make sure to reset the flag -- otherwise this will keep taking effect even after
            // moving to another message.
            mRestoredPictureLoaded = false;
        } else {
            addTabFlags(TAB_FLAGS_HAS_PICTURES);
        }
    }
    setMessageHtml(text);

    // Ask for attachments after body
    new LoadAttachmentsTask().executeParallel(mMessage.mId);

    mIsMessageLoadedForTest = true;
}

From source file:de.dfki.iui.opentok.cordova.plugin.OpenTokPlugin.java

private int extractDp(JSONArray args, int index) throws JSONException {

    JSONException failure = null;/*from   w  ww. j a  va2  s  . c o  m*/
    int result = 0;
    try {
        result = args.getInt(index);
    } catch (JSONException e) {
        failure = e;
    }

    if (failure != null) {
        String temp = args.getString(index);

        //simple decimal number pattern
        Pattern p = Pattern.compile("([+-]?\\d+([,.]\\d+)?)");
        Matcher m = p.matcher(temp);
        if (m.find()) {
            result = Integer.parseInt(temp.substring(m.start(), m.end()));
        }
    }

    return result;
}