Example usage for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start(String name)

Source Link

Document

Returns the start index of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.nttec.everychan.chans.cirno.MikubaReader.java

private String fixPostRefs(StringBuilder commentBuffer) {
    String comment = commentBuffer.toString();
    commentBuffer.setLength(0);/*from w w  w .  j  av a  2 s .  co  m*/
    if (postsBuf == null || postsBuf.size() == 0)
        return comment;
    Matcher matcher = POST_REFERENCE.matcher(comment);
    if (!matcher.find())
        return comment;
    String threadNum = postsBuf.get(0).number;
    int appendPos = 0;
    boolean replacements = false;
    do {
        String num = matcher.group(1);
        commentBuffer.append(comment, appendPos, matcher.start(1));
        appendPos = matcher.end();
        if (!num.equals(threadNum) && postsNumBuf.contains(num)) {
            replacements = true;
            commentBuffer.append(threadNum);
        } else {
            commentBuffer.append(num);
        }
    } while (matcher.find());
    commentBuffer.append(comment, appendPos, comment.length());
    if (replacements)
        comment = commentBuffer.toString();
    commentBuffer.setLength(0);
    return comment;
}

From source file:Repackage.java

public void repackageJavaFile(String name) throws IOException {
    File sourceFile = new File(_sourceBase, name);
    StringBuffer sb = readFile(sourceFile);

    Matcher packageMatcher = _packagePattern.matcher(sb);

    if (packageMatcher.find()) {
        String pkg = packageMatcher.group(1);
        int pkgStart = packageMatcher.start(1);
        int pkgEnd = packageMatcher.end(1);

        if (packageMatcher.find())
            throw new RuntimeException("Two package specifications found: " + name);

        List filePath = Repackager.splitPath(name, File.separatorChar);
        String srcDir = Repackager.dirForPath(name);

        // Sort the repackage spec so that longer from's are first to match
        // longest package first

        for (;;) {
            boolean swapped = false;

            for (int i = 1; i < filePath.size(); i++) {
                String spec1 = (String) filePath.get(i - 1);
                String spec2 = (String) filePath.get(i);

                if (spec1.indexOf(':') < spec2.indexOf(':')) {
                    filePath.set(i - 1, spec2);
                    filePath.set(i, spec1);

                    swapped = true;//  w  ww  . j  av  a  2  s.c om
                }
            }

            if (!swapped)
                break;
        }

        List pkgPath = Repackager.splitPath(pkg, '.');

        int f = filePath.size() - 2;

        if (f < 0 || (filePath.size() - 1) < pkgPath.size())
            throw new RuntimeException("Package spec differs from file path: " + name);

        for (int i = pkgPath.size() - 1; i >= 0; i--) {
            if (!pkgPath.get(i).equals(filePath.get(f)))
                throw new RuntimeException("Package spec differs from file path: " + name);
            f--;
        }

        List changeTo = null;
        List changeFrom = null;

        from: for (int i = 0; i < _fromPackages.size(); i++) {
            List from = (List) _fromPackages.get(i);

            if (from.size() <= pkgPath.size()) {
                for (int j = 0; j < from.size(); j++)
                    if (!from.get(j).equals(pkgPath.get(j)))
                        continue from;

                changeFrom = from;
                changeTo = (List) _toPackages.get(i);

                break;
            }
        }

        if (changeTo != null) {
            String newPkg = "";
            String newName = "";

            for (int i = 0; i < changeTo.size(); i++) {
                if (i > 0) {
                    newPkg += ".";
                    newName += File.separatorChar;
                }

                newPkg += changeTo.get(i);
                newName += changeTo.get(i);
            }

            for (int i = filePath.size() - pkgPath.size() - 2; i >= 0; i--)
                newName = (String) filePath.get(i) + File.separatorChar + newName;

            for (int i = changeFrom.size(); i < pkgPath.size(); i++) {
                newName += File.separatorChar + (String) pkgPath.get(i);
                newPkg += '.' + (String) pkgPath.get(i);
            }

            newName += File.separatorChar + (String) filePath.get(filePath.size() - 1);

            sb.replace(pkgStart, pkgEnd, newPkg);

            name = newName;
            String newDir = Repackager.dirForPath(name);

            if (!srcDir.equals(newDir)) {
                _movedDirs.put(srcDir, newDir);
            }
        }
    }

    File targetFile = new File(_targetBase, name); // new name

    if (sourceFile.lastModified() < targetFile.lastModified()) {
        _skippedFiles += 1;
        return;
    }

    writeFile(new File(_targetBase, name), _repackager.repackage(sb));
}

From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java

protected boolean processGeneralTag(CharSequence element, CharSequence cs) {

    Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs);

    // Just in case it's an OBJECT or APPLET tag
    String codebase = null;//  ww w. ja v a2s . c  o  m
    ArrayList<String> resources = null;
    long tally = next.size();

    while (attr.find()) {
        int valueGroup = (attr.start(12) > -1) ? 12 : (attr.start(13) > -1) ? 13 : 14;
        int start = attr.start(valueGroup);
        int end = attr.end(valueGroup);
        CharSequence value = cs.subSequence(start, end);
        if (attr.start(2) > -1) {
            // HREF
            CharSequence context = Link.elementContext(element, attr.group(2));
            if (element.toString().equalsIgnoreCase(LINK)) {
                // <LINK> elements treated as embeds (css, ico, etc)
                processEmbed(value, context);
            } else {
                if (element.toString().equalsIgnoreCase(BASE)) {
                    try {
                        base = UURIFactory.getInstance(value.toString());
                    } catch (URIException e) {
                        extractErrorListener.noteExtractError(e, source, value);
                    }
                }
                // other HREFs treated as links
                processLink(value, context);
            }
        } else if (attr.start(3) > -1) {
            // ACTION
            CharSequence context = Link.elementContext(element, attr.group(3));
            processLink(value, context);
        } else if (attr.start(4) > -1) {
            // ON____
            processScriptCode(value); // TODO: context?
        } else if (attr.start(5) > -1) {
            // SRC etc.
            CharSequence context = Link.elementContext(element, attr.group(5));
            processEmbed(value, context);
        } else if (attr.start(6) > -1) {
            // CODEBASE
            // TODO: more HTML deescaping?
            codebase = TextUtils.replaceAll(ESCAPED_AMP, value, AMP);
            CharSequence context = Link.elementContext(element, attr.group(6));
            processEmbed(codebase, context);
        } else if (attr.start(7) > -1) {
            // CLASSID, DATA
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            resources.add(value.toString());
        } else if (attr.start(8) > -1) {
            // ARCHIVE
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            String[] multi = TextUtils.split(WHITESPACE, value);
            for (int i = 0; i < multi.length; i++) {
                resources.add(multi[i]);
            }
        } else if (attr.start(9) > -1) {
            // CODE
            if (resources == null) {
                resources = new ArrayList<String>();
            }
            // If element is applet and code value does not end with
            // '.class' then append '.class' to the code value.
            if (element.toString().toLowerCase().equals(APPLET)
                    && !value.toString().toLowerCase().endsWith(CLASSEXT)) {
                resources.add(value.toString() + CLASSEXT);
            } else {
                resources.add(value.toString());
            }

        } else if (attr.start(10) > -1) {
            // VALUE
            if (TextUtils.matches(LIKELY_URI_PATH, value)) {
                CharSequence context = Link.elementContext(element, attr.group(10));
                processLink(value, context);
            }

        } else if (attr.start(11) > -1) {
            // any other attribute
            // ignore for now
            // could probe for path- or script-looking strings, but
            // those should be vanishingly rare in other attributes,
            // and/or symptomatic of page bugs
        }
    }
    TextUtils.recycleMatcher(attr);

    // handle codebase/resources
    if (resources == null) {
        return (tally - next.size()) > 0;
    }
    Iterator iter = resources.iterator();
    UURI codebaseURI = null;
    String res = null;
    try {
        if (codebase != null) {
            // TODO: Pass in the charset.
            codebaseURI = UURIFactory.getInstance(base, codebase);
        }
        while (iter.hasNext()) {
            res = iter.next().toString();
            // TODO: more HTML deescaping?
            res = TextUtils.replaceAll(ESCAPED_AMP, res, AMP);
            if (codebaseURI != null) {
                res = codebaseURI.resolve(res).toString();
            }
            processEmbed(res, element); // TODO: include attribute too
        }
    } catch (URIException e) {
        extractErrorListener.noteExtractError(e, source, codebase);
    } catch (IllegalArgumentException e) {
        DevUtils.logger.log(Level.WARNING,
                "processGeneralTag()\n" + "codebase=" + codebase + " res=" + res + "\n" + DevUtils.extraInfo(),
                e);
    }
    return (tally - next.size()) > 0;
}

From source file:com.cyberway.issue.crawler.extractor.ExtractorHTML.java

/**
 * Run extractor./*from  w  ww.j a  va 2  s .  com*/
 * This method is package visible to ease testing.
 * @param curi CrawlURI we're processing.
 * @param cs Sequence from underlying ReplayCharSequence. This
 * is TRANSIENT data. Make a copy if you want the data to live outside
 * of this extractors' lifetime.
 */
void extract(CrawlURI curi, CharSequence cs) {
    Matcher tags = TextUtils.getMatcher(RELEVANT_TAG_EXTRACTOR, cs);
    while (tags.find()) {
        if (Thread.interrupted()) {
            break;
        }
        if (tags.start(8) > 0) {
            // comment match
            // for now do nothing
        } else if (tags.start(7) > 0) {
            // <meta> match
            int start = tags.start(5);
            int end = tags.end(5);
            assert start >= 0 : "Start is: " + start + ", " + curi;
            assert end >= 0 : "End is :" + end + ", " + curi;
            if (processMeta(curi, cs.subSequence(start, end))) {

                // meta tag included NOFOLLOW; abort processing
                break;
            }
        } else if (tags.start(5) > 0) {
            // generic <whatever> match
            int start5 = tags.start(5);
            int end5 = tags.end(5);
            assert start5 >= 0 : "Start is: " + start5 + ", " + curi;
            assert end5 >= 0 : "End is :" + end5 + ", " + curi;
            int start6 = tags.start(6);
            int end6 = tags.end(6);
            assert start6 >= 0 : "Start is: " + start6 + ", " + curi;
            assert end6 >= 0 : "End is :" + end6 + ", " + curi;
            processGeneralTag(curi, cs.subSequence(start6, end6), cs.subSequence(start5, end5));

        } else if (tags.start(1) > 0) {
            // <script> match
            int start = tags.start(1);
            int end = tags.end(1);
            assert start >= 0 : "Start is: " + start + ", " + curi;
            assert end >= 0 : "End is :" + end + ", " + curi;
            assert tags.end(2) >= 0 : "Tags.end(2) illegal " + tags.end(2) + ", " + curi;
            processScript(curi, cs.subSequence(start, end), tags.end(2) - start);

        } else if (tags.start(3) > 0) {
            // <style... match
            int start = tags.start(3);
            int end = tags.end(3);
            assert start >= 0 : "Start is: " + start + ", " + curi;
            assert end >= 0 : "End is :" + end + ", " + curi;
            assert tags.end(4) >= 0 : "Tags.end(4) illegal " + tags.end(4) + ", " + curi;
            processStyle(curi, cs.subSequence(start, end), tags.end(4) - start);
        }
    }
    TextUtils.recycleMatcher(tags);
}

From source file:com.haulmont.cuba.gui.config.WindowConfig.java

/**
 * Get screen information by screen ID./*from w  w  w. j  a v  a 2s  . c o  m*/
 *
 * @param id         screen ID as set up in <code>screens.xml</code>
 * @param deviceInfo target device info
 * @return screen's registration information or null if not found
 */
@Nullable
public WindowInfo findWindowInfo(String id, @Nullable DeviceInfo deviceInfo) {
    lock.readLock().lock();
    try {
        checkInitialized();

        List<WindowInfo> infos = screens.get(id);

        if (infos == null) {
            Matcher matcher = ENTITY_SCREEN_PATTERN.matcher(id);
            if (matcher.matches()) {
                MetaClass metaClass = metadata.getClass(matcher.group(1));
                if (metaClass == null) {
                    return null;
                }

                MetaClass originalMetaClass = metadata.getExtendedEntities().getOriginalMetaClass(metaClass);
                if (originalMetaClass != null) {
                    String originalId = new StringBuilder(id)
                            .replace(matcher.start(1), matcher.end(1), originalMetaClass.getName()).toString();
                    infos = screens.get(originalId);
                }
            }
        }

        List<WindowInfo> foundWindowInfos = infos;

        if (foundWindowInfos != null) {
            // do not perform stream processing in a simple case
            if (foundWindowInfos.size() == 1 && foundWindowInfos.get(0).getScreenAgent() == null) {
                return foundWindowInfos.get(0);
            }

            if (deviceInfo == null) {
                // find default screen
                return foundWindowInfos.stream().filter(windowInfo -> windowInfo.getScreenAgent() == null)
                        .findFirst().orElse(null);
            } else {
                return infos.stream().filter(
                        wi -> wi.getScreenAgent() != null && wi.getScreenAgent().isSupported(deviceInfo))
                        .findFirst()
                        .orElseGet(() -> foundWindowInfos.stream()
                                .filter(windowInfo -> windowInfo.getScreenAgent() == null).findFirst()
                                .orElse(null));
            }
        }

        return null;
    } finally {
        lock.readLock().unlock();
    }
}

From source file:biz.astute.test.simulator.rest.RequestContext.java

/**
 * Return path portion of URL. The url may be modified to extract variables.
 *
 * @param globalProperties global properties
 * @return path portion of url/*from  w  w  w  .j  av  a 2s .co m*/
 * @throws UnsupportedEncodingException exception
 */
public final String getResourcePath(final Properties globalProperties) throws UnsupportedEncodingException {

    uriProperties.clear();
    String requestURI = URLDecoder.decode(request.getRequestURI(), "utf-8");
    Pattern[] currentPatterns = getPatterns(globalProperties);
    if (currentPatterns.length < 1) {
        return requestURI;
    }

    StringBuilder resourceName = new StringBuilder(requestURI);
    resourceName.append('/'); // Remove this later - need for matcher

    for (Pattern pattern : currentPatterns) {
        Matcher matcher = pattern.matcher(resourceName);
        if (matcher.matches() && (matcher.groupCount() > 0)) {
            for (int index = 1; index <= matcher.groupCount(); index++) {
                String matched = matcher.group(index);
                uriProperties.add(matched);
            }
            // Do so in reverse order so as to not affect offset
            for (int index = matcher.groupCount(); index > 0; index--) {
                resourceName.replace(matcher.start(index), matcher.end(index), StringUtils.EMPTY);
            }
            break;
        }
    }

    // remove '/' appended earlier
    resourceName.setLength(resourceName.length() - 1);
    // Remove any // that result from pattern replacement
    return resourceName.toString().replaceAll("//", "/");
}

From source file:com.cyberway.issue.crawler.extractor.ExtractorHTML.java

/**
 * Process metadata tags./*from  ww  w  . j ava 2  s.  c  om*/
 * @param curi CrawlURI we're processing.
 * @param cs Sequence from underlying ReplayCharSequence. This
 * is TRANSIENT data. Make a copy if you want the data to live outside
 * of this extractors' lifetime.
 * @return True robots exclusion metatag.
 */
protected boolean processMeta(CrawlURI curi, CharSequence cs) {
    Matcher attr = TextUtils.getMatcher(EACH_ATTRIBUTE_EXTRACTOR, cs);
    String name = null;
    String httpEquiv = null;
    String content = null;
    while (attr.find()) {
        int valueGroup = (attr.start(14) > -1) ? 14 : (attr.start(15) > -1) ? 15 : 16;
        CharSequence value = cs.subSequence(attr.start(valueGroup), attr.end(valueGroup));
        if (attr.group(1).equalsIgnoreCase("name")) {
            name = value.toString();
        } else if (attr.group(1).equalsIgnoreCase("http-equiv")) {
            httpEquiv = value.toString();
        } else if (attr.group(1).equalsIgnoreCase("content")) {
            content = value.toString();
        }
        // TODO: handle other stuff
    }
    TextUtils.recycleMatcher(attr);

    // Look for the 'robots' meta-tag
    if ("robots".equalsIgnoreCase(name) && content != null) {
        curi.putString(A_META_ROBOTS, content);
        RobotsHonoringPolicy policy = getSettingsHandler().getOrder().getRobotsHonoringPolicy();
        String contentLower = content.toLowerCase();
        if ((policy == null || (!policy.isType(curi, RobotsHonoringPolicy.IGNORE)
                && !policy.isType(curi, RobotsHonoringPolicy.CUSTOM)))
                && (contentLower.indexOf("nofollow") >= 0 || contentLower.indexOf("none") >= 0)) {
            // if 'nofollow' or 'none' is specified and the
            // honoring policy is not IGNORE or CUSTOM, end html extraction
            logger.fine("HTML extraction skipped due to robots meta-tag for: " + curi.toString());
            return true;
        }
    } else if ("refresh".equalsIgnoreCase(httpEquiv) && content != null) {
        int urlIndex = content.indexOf("=") + 1;
        if (urlIndex > 0) {
            String refreshUri = content.substring(urlIndex);
            try {
                curi.createAndAddLinkRelativeToBase(refreshUri, "meta", Link.REFER_HOP);
            } catch (URIException e) {
                if (getController() != null) {
                    getController().logUriError(e, curi.getUURI(), refreshUri);
                } else {
                    logger.info("Failed createAndAddLinkRelativeToBase " + curi + ", " + cs + ", " + refreshUri
                            + ": " + e);
                }
            }
        }
    }
    return false;
}

From source file:com.hichinaschool.flashcards.libanki.Media.java

/**
 * Percent-escape UTF-8 characters in local image filenames.
 * //from www. j  a va  2  s  . c o  m
 * @param string The string to search for image references and escape the filenames.
 * @return The string with the filenames of any local images percent-escaped as UTF-8.
 */
public String escapeImages(String string) {
    Matcher m = fMediaRegexps[1].matcher(string);
    StringBuffer sb = new StringBuffer();
    while (m.find()) {
        if (fRemoteFilePattern.matcher(m.group(2)).find()) {
            m.appendReplacement(sb, m.group());
        } else {
            String tagBegin = m.group(1).substring(0, m.start(2));
            String fname = m.group(2);
            String tagEnd = m.group(1).substring(m.end(2));
            String tag = tagBegin + Uri.encode(fname) + tagEnd;
            m.appendReplacement(sb, tag);
        }
    }
    m.appendTail(sb);
    return sb.toString();
}

From source file:com.app.util.browser.BrowserSniffer.java

private ArrayList getMatches(Pattern pat, String str, int countGroups) {
    Matcher matcher = pat.matcher(str);
    ArrayList matches = new ArrayList();
    try {/* w  w  w.  j a  va2  s  .  c  o m*/
        ArrayList groups = new ArrayList();
        while (matcher.find()) {
            groups.clear();
            int nullCount = 0;
            for (int i = 0; i < countGroups; i++) {
                int start = matcher.start(i);
                int end = matcher.end(i);
                if (start >= 0 && end >= 0) {
                    String sub = str.substring(start, end);
                    if (StringUtils.isNotEmpty(sub))
                        groups.add(sub);
                    else {
                        groups.add(null);
                        nullCount++;
                    }

                } else {
                    groups.add(null);
                    nullCount++;
                }
            }
            if (groups.size() > 0 && nullCount != groups.size())
                matches.add(groups.toArray(new String[groups.size()]));
        }
    } catch (Exception e) {
        log.error(e);
    }

    return matches;
}

From source file:com.edgenius.wiki.render.filter.MacroFilter.java

private void resetRegion(final int initPos, final CharSequence input, final List<Region> list) {

    final List<Region> pairRegions = new ArrayList<Region>();

    singleMacroProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() {
        public void handleMatch(StringBuffer buffer, Matcher result) {
            String macroName = result.group(1);
            if (macroName != null && !macroName.startsWith("$")) {
                Macro macro = macroMgr.getMacro(macroName);
                if (macro != null && macro.isPaired()) {
                    String body = result.group(0);
                    int start = result.start(0);
                    int end = result.end(0);
                    Region pair = new Region(start, end);
                    //no parameter, then mark as unknown, otherwise, must be a start macro
                    if (StringUtils.isBlank(result.group(2))) {
                        pair.setKey(MACRO_REGION_KEY_UNKNOWN);
                    } else {
                        pair.setKey(MACRO_REGION_KEY_START);
                    }//  w ww  .  ja  va 2s  .  co m

                    //just for temporary to remember the macro name...
                    pair.setContent(macroName);
                    pair.setBody(body);
                    //sum to list
                    pairRegions.add(pair);
                }
            }
        }
    });

    int size = pairRegions.size();
    if (size > 0) {
        StringBuffer inputBuf = new StringBuffer(input);
        for (int idx = 0; idx < size; idx++) {
            Region reg = pairRegions.get(idx);
            int deep = 0;
            Region pair = null;
            //looking for pairs...
            for (int chIdx = idx + 1; chIdx < size; chIdx++) {
                Region next = pairRegions.get(chIdx);
                if (StringUtils.equalsIgnoreCase(reg.getContent(), next.getContent())) {
                    //start is unknown (no attribute), then end must be unknown
                    if (MACRO_REGION_KEY_UNKNOWN.equals(reg.getKey())
                            && MACRO_REGION_KEY_UNKNOWN.equals(next.getKey())) {
                        //matched
                        pair = next;
                        //skip all internal node - which is handle by embedded recursive
                        idx = chIdx;
                        break;
                    }

                    if (MACRO_REGION_KEY_START.equals(reg.getKey())
                            && MACRO_REGION_KEY_UNKNOWN.equals(next.getKey())) {
                        if (deep == 0) {
                            //matched;
                            pair = next;
                            //skip all internal node - which is handle by embedded recursive
                            idx = chIdx;
                            break;
                        } else {
                            //just another inner same name macro matched, deep minus  
                            deep--;
                        }
                    }
                    if (MACRO_REGION_KEY_START.equals(next.getKey())) {
                        //ok, it gets another start, in 4th scenarios - then add deep
                        deep++;
                    }
                }
            }
            //ok, success find paired
            if (pair != null) {
                int start = initPos + reg.getStart();
                int end = initPos + pair.getEnd();
                int contentStart = initPos + reg.getEnd();
                int contentEnd = initPos + pair.getStart();

                String macroName = reg.getContent();
                Macro macro = macroMgr.getMacro(macroName);
                boolean immutable = macro instanceof ImmutableContentMacro;

                list.add(new Region(MacroFilter.this, immutable, start, end, contentStart, contentEnd));
                if (macro.isProcessEmbedded() && (end > start)) {
                    resetRegion(contentStart,
                            inputBuf.subSequence(contentStart - initPos, contentEnd - initPos), list);
                }
            }
        }
    }

}