Example usage for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start(String name)

Source Link

Document

Returns the start index of the subsequence captured by the given named-capturing group during the previous match operation.

Usage

From source file:com.cyberway.issue.crawler.extractor.ExtractorJS.java

public static long considerStrings(CrawlURI curi, CharSequence cs, CrawlController controller,
        boolean handlingJSFile) {
    long foundLinks = 0;
    Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs);
    while (strings.find()) {
        CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2));
        Matcher uri = TextUtils.getMatcher(STRING_URI_DETECTOR, subsequence);
        if (uri.matches()) {
            String string = uri.group();
            string = speculativeFixup(string, curi);
            foundLinks++;/*from   w ww . j av  a  2s  .co m*/
            try {
                if (handlingJSFile) {
                    curi.createAndAddLinkRelativeToVia(string, Link.JS_MISC, Link.SPECULATIVE_HOP);
                } else {
                    curi.createAndAddLinkRelativeToBase(string, Link.JS_MISC, Link.SPECULATIVE_HOP);
                }
            } catch (URIException e) {
                // There may not be a controller (e.g. If we're being run
                // by the extractor tool).
                if (controller != null) {
                    controller.logUriError(e, curi.getUURI(), string);
                } else {
                    LOGGER.info(curi + ", " + string + ": " + e.getMessage());
                }
            }
        } else if (subsequence.toString().startsWith("/")) {
            try {
                curi.createAndAddLinkRelativeToBase(subsequence.toString(), Link.JS_MISC, Link.NAVLINK_HOP);
            } catch (URIException e) {
                if (controller != null) {
                    controller.logUriError(e, curi.getUURI(), subsequence);
                } else {
                    LOGGER.info(curi + ", " + subsequence + ": " + e.getMessage());
                }
            }
        }

        else {
            foundLinks += considerStrings(curi, subsequence, controller, handlingJSFile);
        }
        TextUtils.recycleMatcher(uri);
    }
    TextUtils.recycleMatcher(strings);
    return foundLinks;
}

From source file:de.pixida.logtest.logreaders.PatternMatchingsStripper.java

public static String strip(final Matcher matcher, final String value) {
    if (matcher.groupCount() == 0) {
        return value;
    }//from  w  w  w .  j a  v a  2 s.  c om

    // Remove optional matches which were empty and filter nested matches
    final List<Pair<Integer, Integer>> realMatches = new ArrayList<>();
    int lastMostOuterMatchEnd = -1;
    for (int i = 1; i <= matcher.groupCount(); i++) {
        if (matcher.start(i) != -1) {
            if (matcher.end(i) <= lastMostOuterMatchEnd) {
                continue;
            }
            lastMostOuterMatchEnd = matcher.end(i);
            realMatches.add(Pair.of(matcher.start(i), matcher.end(i)));
        }
    }
    if (realMatches.isEmpty()) {
        return value;
    }

    // Removal
    final StringBuilder sb = new StringBuilder(value.substring(0, realMatches.get(0).getLeft()));
    for (int i = 0; i < realMatches.size(); i++) {
        // Assumption: Substring before start of match was already appended
        if (i + 1 < realMatches.size()) {
            sb.append(value.substring(realMatches.get(i).getRight(), realMatches.get(i + 1).getLeft()));
        } else {
            sb.append(value.substring(realMatches.get(i).getRight()));
        }
    }
    return sb.toString();
}

From source file:com.github.feribg.audiogetter.helpers.Utils.java

/**
 * Extract a list of urls from a string//w w  w. j a va  2  s .co  m
 * Retrieved from: http://www.java-tutorial.ch/core-java-tutorial/extract-urls-using-java-regular-expressions
 *
 * @param value
 * @return
 */
public static List<String> extractUrls(String value) {
    List<String> result = new ArrayList<String>();
    String urlPattern = "((https?):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?\\+-=\\\\\\.&]*)";
    Pattern p = Pattern.compile(urlPattern, Pattern.CASE_INSENSITIVE);
    Matcher m = p.matcher(value);
    while (m.find()) {
        result.add(value.substring(m.start(0), m.end(0)));
    }
    return result;
}

From source file:de.knowwe.visualization.util.Utils.java

public static String prepareLabel(String string) {
    // if (true) return string;
    String lb = LINE_BREAK;/*from w w  w.  j a v  a 2  s. c o  m*/

    int length = string.length();
    if (length < 13)
        return clean(string, lb);

    // find possible line break positions
    Set<Integer> possibleLBs = new TreeSet<>();

    // possible line breaks are before the following chars:
    // _ >= <= = . ( [ and white spaces
    Matcher m = Pattern.compile("_|>=|<=|=|\\.|\\([^\\)]{1}|\\[[^\\]]{1}").matcher(string);
    while (m.find()) {
        possibleLBs.add(m.start(0));
    }
    // line breaks at whitespace only if they are not in range of = or > or
    // <
    m = Pattern.compile("(?<=[^=<>]){3}( )(?=[^=<>]{3})").matcher(string);
    while (m.find()) {
        possibleLBs.add(m.start(1));
    }

    if (possibleLBs.isEmpty())
        return clean(string, lb);

    // add the line breaks were it makes sense
    List<Integer> desiredLBs = new LinkedList<>();
    Set<Integer> addedLBs = new TreeSet<>();

    // optimal length is determined by the length of the given String
    double optimalLength = (double) length / Math.sqrt(length / 5);

    for (int i = 1; i < string.length() / optimalLength; i++) {
        // having the line breaks on these position would be optimal
        desiredLBs.add((int) Math.round(i * optimalLength));
    }

    //todo: remove creation of trailing linebreaks

    // try to find those possible line breaks that closest to the optimal
    // line breaks
    int d = 0;
    for (Integer desLB : desiredLBs) {
        int bestCandiadate = 0;
        // to avoid breaks for only a few chars at the end, we make
        // extra efforts for the last line break
        // we get the line break that produces the smallest variance
        // we should actually calculate the best break via variance for
        // all line breaks, but that seems rather complex and not yet
        // justified right now, since the current simple algorithm
        // already produces nice results
        if (d == desiredLBs.size() - 1) {
            double bestVar = Double.MAX_VALUE;
            for (Integer posLB : possibleLBs) {
                Set<Integer> temp = new TreeSet<>(addedLBs);
                temp.add(posLB);
                TreeSet<Integer> varianceCheck = new TreeSet<>(temp);
                varianceCheck.add(length);
                double variance = getVariance(varianceCheck);
                if (variance <= bestVar) {
                    bestVar = variance;
                    bestCandiadate = posLB;
                }
            }
        }
        // for all other breakpoints, just get the one closest to the
        // desired position
        else {
            for (Integer posLB : possibleLBs) {
                if (Math.abs(desLB - posLB) <= Math.abs(desLB - bestCandiadate)) {
                    bestCandiadate = posLB;
                }
            }
        }
        if (bestCandiadate != 0 && bestCandiadate != length) {
            addedLBs.add(bestCandiadate);
        }
        d++;
    }

    // but in the line breaks
    StringBuilder labelBuilder = new StringBuilder();
    List<String> split = new ArrayList<>(addedLBs.size() + 1);
    int last = 0;
    for (Integer addedLB : addedLBs) {
        split.add(string.substring(last, addedLB));
        last = addedLB;
    }
    split.add(string.substring(last, string.length()));
    for (String s : split) {
        // clean the substrings
        labelBuilder.append(clean(s.trim(), lb)).append(lb);
    }

    String label = labelBuilder.toString();
    return label;
}

From source file:de.micromata.genome.gwiki.page.search.expr.SearchUtils.java

@Deprecated
public static String sampleToHtmlNew(String text, List<String> words) {
    String ap = Pattern.quote("<!--KW:XXX-->");
    String ep = Pattern.quote("<!--KW-->");
    // TODO gwiki geht nicht mit umlauten, da words normalisiert sind.
    // StringBuilder sb = new StringBuilder();
    for (String w : words) {
        String nw = NormalizeUtils.normalize(w);
        String app = StringUtils.replace(ap, "XXX", nw);
        String reg = app + "(.+?)" + ep;
        Pattern p = Pattern.compile(reg);
        Matcher m = p.matcher(text);
        while (m.find() == true) {
            int start = m.start(1);
            int end = m.end(1);
            String t = m.group(1);
            text = text.substring(0, start) + "<b><strong><big>" + t + "</big></strong></b>"
                    + text.substring(end);
        }//from w  w  w.j  a v a2  s  .  co m
        // text = StringUtils.replace(text, w, "<b><strong><big>" + w + "</big></strong></b>");
    }
    return text;
}

From source file:com.github.gekoh.yagen.util.FieldInfo.java

private static String addNamePrefixToAttributeOverride(String annotation, String prefix) {
    Matcher matcher = ATTR_OVERR_NAME.matcher(annotation);
    if (matcher.find()) {
        return annotation.substring(0, matcher.start(1)) + prefix + annotation.substring(matcher.start(1));
    }//from ww w.  java 2  s.  co m
    throw new IllegalArgumentException("no AttributeOverride found in '" + annotation + "'");
}

From source file:com.ultrapower.eoms.common.plugin.ecside.tag.TagUtils.java

    public final static String escapeExpression(String attribute,String escapeType){
       String[] escapeRexs=(String[])escape_REX.get(escapeType);
       for (int i=0;i<escapeRexs.length;i++){
          Pattern pattern = Pattern.compile(escapeRexs[i]);
          Matcher matcher = pattern.matcher(attribute);
          StringBuffer attributeBuffer=new StringBuffer();
          int lastIndex=0;
          while (matcher.find()){
             String pEsc=matcher.group(0);
//             String pEscType=matcher.group(1);
             String pEscValue=matcher.group(2);
             pEscValue=escapeString(pEscValue,escapeType);
             int start=matcher.start(0);
             attributeBuffer.append(attribute.substring(lastIndex, start));
             attributeBuffer.append(pEscValue);
             lastIndex=start+pEsc.length();
          }//from w  w w.jav  a 2s . c o m
          attributeBuffer.append(attribute.substring(lastIndex));
          attribute=attributeBuffer.toString();
       }
       return attribute;
    }

From source file:com.github.gekoh.yagen.util.FieldInfo.java

private static String formatAnnotation(Annotation annotation) {
    String a = annotation.toString();
    StringBuilder result = new StringBuilder();

    // wrap string value of attribute "name" into double quotes as needed for java code
    Matcher m = STRING_ATTR_PATTERN.matcher(a);
    int idx = 0;/*from  w ww  . ja  v  a  2  s. com*/
    while (m.find(idx)) {
        result.append(a.substring(idx, m.start(2)));
        result.append("\"").append(escapeAttributeValue(m.group(2))).append("\"");
        result.append(a.substring(m.end(2), m.end()));
        idx = m.end();
    }
    result.append(a.substring(idx));

    a = result.toString();
    result = new StringBuilder();

    // remove empty attributes like (columnDefinition=)
    m = Pattern.compile("\\(?(,?\\s*[A-Za-z]*=)[,|\\)]").matcher(a);
    idx = 0;
    while (m.find(idx)) {
        result.append(a.substring(idx, m.start(1)));
        idx = m.end(1);
    }
    result.append(a.substring(idx));

    // set nullable=true
    m = NULLABLE_PATTERN.matcher(result);
    idx = 0;
    while (m.find(idx)) {
        if (m.group(1).equals("false")) {
            result.replace(m.start(1), m.end(1), "true");
        }
        idx = m.start(1) + 1;
        m = NULLABLE_PATTERN.matcher(result);
    }

    // set unique=false
    m = UNIQUE_PATTERN.matcher(result);
    idx = 0;
    while (m.find(idx)) {
        if (m.group(1).equals("true")) {
            result.replace(m.start(1), m.end(1), "false");
        }
        idx = m.start(1) + 1;
        m = UNIQUE_PATTERN.matcher(result);
    }

    return result.toString().replaceAll("=\\[([^\\]]*)\\]", "={$1}");
}

From source file:com.googlecode.jtiger.modules.ecside.tag.TagUtils.java

public final static String escapeExpression(String attribute, String escapeType) {
    String[] escapeRexs = (String[]) escape_REX.get(escapeType);
    for (int i = 0; i < escapeRexs.length; i++) {
        Pattern pattern = Pattern.compile(escapeRexs[i]);
        Matcher matcher = pattern.matcher(attribute);
        StringBuffer attributeBuffer = new StringBuffer();
        int lastIndex = 0;
        while (matcher.find()) {
            String pEsc = matcher.group(0);
            //             String pEscType=matcher.group(1);
            String pEscValue = matcher.group(2);
            pEscValue = escapeString(pEscValue, escapeType);
            int start = matcher.start(0);
            attributeBuffer.append(attribute.substring(lastIndex, start));
            attributeBuffer.append(pEscValue);
            lastIndex = start + pEsc.length();
        }//from  www  .  j  a  v  a2 s .  c o  m
        attributeBuffer.append(attribute.substring(lastIndex));
        attribute = attributeBuffer.toString();
    }
    return attribute;
}

From source file:de.unidue.inf.is.ezdl.gframedl.utils.HighlightingUtils.java

private static String highlight(String htmlString, boolean escape, List<String> highlightStrings,
        Matcher matcher, String foregroundColor, String backgroundColor) {
    if (htmlString != null) {
        if (highlightStrings.isEmpty() || highlightStrings.size() == 1 && highlightStrings.get(0).isEmpty()) {
            return htmlString;
        }/*from  ww  w .j  a  va2  s  .  co  m*/

        StringBuilder result = new StringBuilder();
        int lastStop = 0;
        while (matcher.find()) {
            int start = matcher.start(1);
            int end = matcher.end(1);

            int i = htmlString.substring(0, start).lastIndexOf(">");
            int j = htmlString.substring(0, start).lastIndexOf("<");
            boolean r = i > j || (i == -1 && j == -1);

            if (r) {
                if (escape) {
                    result.append(escape(htmlString.substring(lastStop, start)));
                } else {
                    result.append(htmlString.substring(lastStop, start));
                }
                result.append("<font bgcolor=\"" + backgroundColor + "\" color=\"" + foregroundColor + "\">");
                if (escape) {
                    result.append(escape(htmlString.substring(start, end)));
                } else {
                    result.append(htmlString.substring(start, end));

                }
                result.append("</font>");
                lastStop = end;
            }
        }
        if (escape) {
            result.append(escape(htmlString.substring(lastStop, htmlString.length())));
        } else {
            result.append(htmlString.substring(lastStop, htmlString.length()));
        }

        return result.toString();
    } else {
        return "";
    }
}