Example usage for java.util.regex Matcher end

Introduction

In this page you can find the example usage for java.util.regex Matcher end.

Prototype

public int end(String name)

Source Link

Document

Returns the offset after the last character of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:de.micromata.genome.gwiki.utils.StringUtils.java

public static String replace(String text, Pattern p, int group,
        CallableX1<String, String, RuntimeException> replacer) {
    if (isEmpty(text) == true) {
        return text;
    }/*w  w  w.  java2s.com*/
    StringBuilder sb = new StringBuilder();
    Matcher m = p.matcher(text);
    int lastIdx = 0;
    while (m.find() == true) {
        int idx = m.start(group);
        int eidx = m.end(group);
        if (idx > lastIdx) {
            sb.append(text.substring(lastIdx, idx));
        }
        sb.append(replacer.call(text.substring(idx, eidx)));
        lastIdx = eidx;
    }
    if (lastIdx < text.length()) {
        sb.append(text.substring(lastIdx, text.length()));
    }
    return sb.toString();
}

From source file:dk.netarkivet.harvester.harvesting.extractor.ExtractorJS.java

public static long considerStrings(CrawlURI curi, CharSequence cs, CrawlController controller,
        boolean handlingJSFile) {
    long foundLinks = 0;
    Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs);
    while (strings.find()) {
        CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2));

        if (UriUtils.isLikelyUriJavascriptContextLegacy(subsequence)) {
            String string = subsequence.toString();
            string = StringEscapeUtils.unescapeJavaScript(string);
            string = UriUtils.speculativeFixup(string, curi.getUURI());
            foundLinks++;/* ww w  .ja  v  a  2  s . c o m*/
            try {
                if (handlingJSFile) {
                    curi.createAndAddLinkRelativeToVia(string, Link.JS_MISC, Link.SPECULATIVE_HOP);
                } else {
                    curi.createAndAddLinkRelativeToBase(string, Link.JS_MISC, Link.SPECULATIVE_HOP);
                }
            } catch (URIException e) {
                // There may not be a controller (e.g. If we're being run
                // by the extractor tool).
                if (controller != null) {
                    controller.logUriError(e, curi.getUURI(), string);
                } else {
                    LOGGER.info(curi + ", " + string + ": " + e.getMessage());
                }
            }
        } else {
            foundLinks += considerStrings(curi, subsequence, controller, handlingJSFile);
        }
    }
    TextUtils.recycleMatcher(strings);
    return foundLinks;
}

From source file:com.smartitengineering.cms.api.common.MediaType.java

public static MediaType fromString(String mediaType) {
    if (StringUtils.isBlank(mediaType)) {
        return MediaType.WILDCARD;
    }//from ww  w  . ja v  a2s . co  m
    MediaType cacheResult = parseResult.get(mediaType);
    if (cacheResult != null) {
        return cacheResult;
    }
    Matcher matcher = MEDIA_TYPE_REGEX.matcher(mediaType);
    if (matcher.matches()) {
        String type = matcher.group(1);
        String subtype = matcher.group(2);
        final int endOfSubtype = matcher.end(2);
        Map<String, String> params = new HashMap<String, String>();
        if (endOfSubtype < mediaType.length()) {
            String paramsStr = mediaType.substring(endOfSubtype);
            if (StringUtils.isNotBlank(paramsStr)) {
                String[] parameters = paramsStr.split(";");
                for (String parameter : parameters) {
                    final int indexOfEquals = parameter.indexOf('=');
                    if (indexOfEquals >= 0) {
                        params.put(parameter.substring(0, indexOfEquals),
                                indexOfEquals < parameter.length() - 1 ? parameter.substring(indexOfEquals + 1)
                                        : "");
                    }
                }
            }
        }
        final MediaType result = new MediaType(type, subtype, params);
        parseResult.put(mediaType, result);
        return result;
    } else {
        return MediaType.WILDCARD;
    }
}

From source file:com.cyberway.issue.crawler.extractor.ExtractorJS.java

public static long considerStrings(CrawlURI curi, CharSequence cs, CrawlController controller,
        boolean handlingJSFile) {
    long foundLinks = 0;
    Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs);
    while (strings.find()) {
        CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2));
        Matcher uri = TextUtils.getMatcher(STRING_URI_DETECTOR, subsequence);
        if (uri.matches()) {
            String string = uri.group();
            string = speculativeFixup(string, curi);
            foundLinks++;/*w w  w. j a  v a2  s . c  o m*/
            try {
                if (handlingJSFile) {
                    curi.createAndAddLinkRelativeToVia(string, Link.JS_MISC, Link.SPECULATIVE_HOP);
                } else {
                    curi.createAndAddLinkRelativeToBase(string, Link.JS_MISC, Link.SPECULATIVE_HOP);
                }
            } catch (URIException e) {
                // There may not be a controller (e.g. If we're being run
                // by the extractor tool).
                if (controller != null) {
                    controller.logUriError(e, curi.getUURI(), string);
                } else {
                    LOGGER.info(curi + ", " + string + ": " + e.getMessage());
                }
            }
        } else if (subsequence.toString().startsWith("/")) {
            try {
                curi.createAndAddLinkRelativeToBase(subsequence.toString(), Link.JS_MISC, Link.NAVLINK_HOP);
            } catch (URIException e) {
                if (controller != null) {
                    controller.logUriError(e, curi.getUURI(), subsequence);
                } else {
                    LOGGER.info(curi + ", " + subsequence + ": " + e.getMessage());
                }
            }
        }

        else {
            foundLinks += considerStrings(curi, subsequence, controller, handlingJSFile);
        }
        TextUtils.recycleMatcher(uri);
    }
    TextUtils.recycleMatcher(strings);
    return foundLinks;
}

From source file:com.github.feribg.audiogetter.helpers.Utils.java

/**
 * Extract a list of urls from a string/*from   ww  w.jav  a  2 s.c om*/
 * Retrieved from: http://www.java-tutorial.ch/core-java-tutorial/extract-urls-using-java-regular-expressions
 *
 * @param value
 * @return
 */
public static List<String> extractUrls(String value) {
    List<String> result = new ArrayList<String>();
    String urlPattern = "((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?\\+-=\\\\\\.&]*)";
    Pattern p = Pattern.compile(urlPattern, Pattern.CASE_INSENSITIVE);
    Matcher m = p.matcher(value);
    while (m.find()) {
        result.add(value.substring(m.start(0), m.end(0)));
    }
    return result;
}

From source file:de.pixida.logtest.logreaders.PatternMatchingsStripper.java

public static String strip(final Matcher matcher, final String value) {
    if (matcher.groupCount() == 0) {
        return value;
    }//from   w  w  w .j a  v a  2  s. c  om

    // Remove optional matches which were empty and filter nested matches
    final List<Pair<Integer, Integer>> realMatches = new ArrayList<>();
    int lastMostOuterMatchEnd = -1;
    for (int i = 1; i <= matcher.groupCount(); i++) {
        if (matcher.start(i) != -1) {
            if (matcher.end(i) <= lastMostOuterMatchEnd) {
                continue;
            }
            lastMostOuterMatchEnd = matcher.end(i);
            realMatches.add(Pair.of(matcher.start(i), matcher.end(i)));
        }
    }
    if (realMatches.isEmpty()) {
        return value;
    }

    // Removal
    final StringBuilder sb = new StringBuilder(value.substring(0, realMatches.get(0).getLeft()));
    for (int i = 0; i < realMatches.size(); i++) {
        // Assumption: Substring before start of match was already appended
        if (i + 1 < realMatches.size()) {
            sb.append(value.substring(realMatches.get(i).getRight(), realMatches.get(i + 1).getLeft()));
        } else {
            sb.append(value.substring(realMatches.get(i).getRight()));
        }
    }
    return sb.toString();
}

From source file:de.micromata.genome.gwiki.page.search.expr.SearchUtils.java

@Deprecated
public static String sampleToHtmlNew(String text, List<String> words) {
    String ap = Pattern.quote("<!--KW:XXX-->");
    String ep = Pattern.quote("<!--KW-->");
    // TODO gwiki geht nicht mit umlauten, da words normalisiert sind.
    // StringBuilder sb = new StringBuilder();
    for (String w : words) {
        String nw = NormalizeUtils.normalize(w);
        String app = StringUtils.replace(ap, "XXX", nw);
        String reg = app + "(.+?)" + ep;
        Pattern p = Pattern.compile(reg);
        Matcher m = p.matcher(text);
        while (m.find() == true) {
            int start = m.start(1);
            int end = m.end(1);
            String t = m.group(1);
            text = text.substring(0, start) + "<b><strong><big>" + t + "</big></strong></b>"
                    + text.substring(end);
        }//from   w  w  w  . j ava2  s . c  o  m
        // text = StringUtils.replace(text, w, "<b><strong><big>" + w + "</big></strong></b>");
    }
    return text;
}

From source file:de.unidue.inf.is.ezdl.gframedl.utils.HighlightingUtils.java

private static String highlight(String htmlString, boolean escape, List<String> highlightStrings,
        Matcher matcher, String foregroundColor, String backgroundColor) {
    if (htmlString != null) {
        if (highlightStrings.isEmpty() || highlightStrings.size() == 1 && highlightStrings.get(0).isEmpty()) {
            return htmlString;
        }/* w w w  .j a va2  s  .  co m*/

        StringBuilder result = new StringBuilder();
        int lastStop = 0;
        while (matcher.find()) {
            int start = matcher.start(1);
            int end = matcher.end(1);

            int i = htmlString.substring(0, start).lastIndexOf(">");
            int j = htmlString.substring(0, start).lastIndexOf("<");
            boolean r = i > j || (i == -1 && j == -1);

            if (r) {
                if (escape) {
                    result.append(escape(htmlString.substring(lastStop, start)));
                } else {
                    result.append(htmlString.substring(lastStop, start));
                }
                result.append("<font bgcolor=\"" + backgroundColor + "\" color=\"" + foregroundColor + "\">");
                if (escape) {
                    result.append(escape(htmlString.substring(start, end)));
                } else {
                    result.append(htmlString.substring(start, end));

                }
                result.append("</font>");
                lastStop = end;
            }
        }
        if (escape) {
            result.append(escape(htmlString.substring(lastStop, htmlString.length())));
        } else {
            result.append(htmlString.substring(lastStop, htmlString.length()));
        }

        return result.toString();
    } else {
        return "";
    }
}

From source file:com.puppycrawl.tools.checkstyle.checks.javadoc.JavadocUtils.java

/**
 * Gets validTags from a given piece of Javadoc.
 * @param cmt//from  w w w.  ja  v a  2  s .  co  m
 *        the Javadoc comment to process.
 * @param tagType
 *        the type of validTags we're interested in
 * @return all standalone validTags from the given javadoc.
 */
public static JavadocTags getJavadocTags(TextBlock cmt, JavadocTagType tagType) {
    final String[] text = cmt.getText();
    final List<JavadocTag> tags = Lists.newArrayList();
    final List<InvalidJavadocTag> invalidTags = Lists.newArrayList();
    Pattern blockTagPattern = Pattern.compile("/\\*{2,}\\s*@(\\p{Alpha}+)\\s");
    for (int i = 0; i < text.length; i++) {
        final String s = text[i];
        final Matcher blockTagMatcher = blockTagPattern.matcher(s);
        if ((tagType == JavadocTagType.ALL || tagType == JavadocTagType.BLOCK) && blockTagMatcher.find()) {
            final String tagName = blockTagMatcher.group(1);
            String content = s.substring(blockTagMatcher.end(1));
            if (content.endsWith("*/")) {
                content = content.substring(0, content.length() - 2);
            }
            final int line = cmt.getStartLineNo() + i;
            int col = blockTagMatcher.start(1) - 1;
            if (i == 0) {
                col += cmt.getStartColNo();
            }
            if (JavadocTagInfo.isValidName(tagName)) {
                tags.add(new JavadocTag(line, col, tagName, content.trim()));
            } else {
                invalidTags.add(new InvalidJavadocTag(line, col, tagName));
            }
        }
        // No block tag, so look for inline validTags
        else if (tagType == JavadocTagType.ALL || tagType == JavadocTagType.INLINE) {
            // Match Javadoc text after comment characters
            final Pattern commentPattern = Pattern.compile("^\\s*(?:/\\*{2,}|\\*+)\\s*(.*)");
            final Matcher commentMatcher = commentPattern.matcher(s);
            final String commentContents;
            final int commentOffset; // offset including comment characters
            if (commentMatcher.find()) {
                commentContents = commentMatcher.group(1);
                commentOffset = commentMatcher.start(1) - 1;
            } else {
                commentContents = s; // No leading asterisks, still valid
                commentOffset = 0;
            }
            final Pattern tagPattern = Pattern.compile(".*?\\{@(\\p{Alpha}+)\\s+(.*?)\\}");
            final Matcher tagMatcher = tagPattern.matcher(commentContents);
            while (tagMatcher.find()) {
                final String tagName = tagMatcher.group(1);
                final String tagValue = tagMatcher.group(2).trim();
                final int line = cmt.getStartLineNo() + i;
                int col = commentOffset + tagMatcher.start(1) - 1;
                if (i == 0) {
                    col += cmt.getStartColNo();
                }
                if (JavadocTagInfo.isValidName(tagName)) {
                    tags.add(new JavadocTag(line, col, tagName, tagValue));
                } else {
                    invalidTags.add(new InvalidJavadocTag(line, col, tagName));
                }
                // else Error: Unexpected match count for inline Javadoc
                // tag!
            }
        }
        blockTagPattern = Pattern.compile("^\\s*\\**\\s*@(\\p{Alpha}+)\\s");
    }
    return new JavadocTags(tags, invalidTags);
}

From source file:com.puppycrawl.tools.checkstyle.utils.JavadocUtils.java

/**
 * Gets validTags from a given piece of Javadoc.
 * @param textBlock/*from w w  w.j a  v a2  s. c om*/
 *        the Javadoc comment to process.
 * @param tagType
 *        the type of validTags we're interested in
 * @return all standalone validTags from the given javadoc.
 */
public static JavadocTags getJavadocTags(TextBlock textBlock, JavadocTagType tagType) {
    final String[] text = textBlock.getText();
    final List<JavadocTag> tags = Lists.newArrayList();
    final List<InvalidJavadocTag> invalidTags = Lists.newArrayList();
    Pattern blockTagPattern = Pattern.compile("/\\*{2,}\\s*@(\\p{Alpha}+)\\s");
    for (int i = 0; i < text.length; i++) {
        final String textValue = text[i];
        final Matcher blockTagMatcher = blockTagPattern.matcher(textValue);
        if ((tagType == JavadocTagType.ALL || tagType == JavadocTagType.BLOCK) && blockTagMatcher.find()) {
            final String tagName = blockTagMatcher.group(1);
            String content = textValue.substring(blockTagMatcher.end(1));
            if (content.endsWith("*/")) {
                content = content.substring(0, content.length() - 2);
            }
            final int line = textBlock.getStartLineNo() + i;
            int col = blockTagMatcher.start(1) - 1;
            if (i == 0) {
                col += textBlock.getStartColNo();
            }
            if (JavadocTagInfo.isValidName(tagName)) {
                tags.add(new JavadocTag(line, col, tagName, content.trim()));
            } else {
                invalidTags.add(new InvalidJavadocTag(line, col, tagName));
            }
        }
        // No block tag, so look for inline validTags
        else if (tagType == JavadocTagType.ALL || tagType == JavadocTagType.INLINE) {
            lookForInlineTags(textBlock, i, tags, invalidTags);
        }
        blockTagPattern = Pattern.compile("^\\s*\\**\\s*@(\\p{Alpha}+)\\s");
    }
    return new JavadocTags(tags, invalidTags);
}