Example usage for java.util.regex Matcher end

Introduction

In this page you can find the example usage for java.util.regex Matcher end.

Prototype

public int end()

Source Link

Document

Returns the offset after the last character matched.

Usage

From source file:com.streamsets.pipeline.lib.parser.log.ExtendedFormatParser.java

@Override
public Map<String, Field> parseLogLine(StringBuilder logLine) throws DataParserException {
    Map<String, Field> map = new HashMap<>();

    // Parse headers
    Matcher m = HEADER_PATTERN.matcher(logLine);
    int counter = 0;
    int index = 0;
    int headerCount = 1;
    while (counter < headerCount && m.find()) {
        String val = logLine.substring(index, m.start());

        if (counter == 0) {
            Field formatVersion = getExtFormatVersion(val);
            map.put(formatType.label + "Version", formatVersion);
            headerCount = getNumHeaderFields(formatType, formatVersion);
        } else {//from w  ww  .j a  v a2  s .  co m
            map.put(getHeaderFieldName(counter), Field.create(val));
        }

        index = m.end();
        counter++;
    }

    if (counter < headerCount) {
        throw new DataParserException(Errors.LOG_PARSER_12, formatName, headerCount, counter);
    }

    // For LEEF 2.0, there is an optional field in the header, so we check for it, and
    // advance the index, if necessary, to get to the start of the extensions
    char attrSeparator = getExtensionAttrSeparator(m, index, logLine);
    if (!m.hitEnd()) {
        index = m.end();
    }

    // Calls to trim() will strip off whitespace, but if format is LEEF 2.0 and a custom
    // delimiter is being used, we need to offset the start index of extension keys
    int offset = 0;
    if (!Character.isWhitespace(attrSeparator)) {
        offset = 1;
    }

    // Process extensions
    Map<String, Field> extMap = new HashMap<>();
    Map<String, String> labelMap = new HashMap<>();
    String ext = logLine.substring(index);
    m = EXT_PATTERN.matcher(ext);
    index = 0;
    String key = null;
    String value;

    while (m.find()) {
        if (key == null) {
            key = ext.substring(index, m.start());
            index = m.end();
            if (!m.find()) {
                break;
            }
        }
        // Regex will search for unescaped '=' character to find the split between keys
        // and values. We'll need to figure out where the separator is to determine the
        // end of the value, and then go back for the next KV pair
        value = ext.substring(index, m.start());
        index = m.end();
        int lastSepIndex = value.lastIndexOf(attrSeparator);
        if (lastSepIndex > 0) {
            String temp = value.substring(0, lastSepIndex).trim();
            putLabelIntoAppropriateMap(labelMap, extMap, key, temp);
            key = value.substring(lastSepIndex + offset).trim();
        }
    }
    value = ext.substring(index);

    // Build a map of Label extensions to apply later
    putLabelIntoAppropriateMap(labelMap, extMap, key, value);

    // Apply the labels to custom fields
    for (Map.Entry<String, String> label : labelMap.entrySet()) {
        if (extMap.containsKey(label.getKey())) {
            Field field = extMap.remove(label.getKey());
            extMap.put(label.getValue(), field);
        }
    }

    map.put("extensions", Field.create(extMap));

    return map;
}

From source file:com.nttec.everychan.chans.cirno.MikubaReader.java

private String fixPostRefs(StringBuilder commentBuffer) {
    String comment = commentBuffer.toString();
    commentBuffer.setLength(0);/*  w w w .  j  a  v a  2  s  .  c  om*/
    if (postsBuf == null || postsBuf.size() == 0)
        return comment;
    Matcher matcher = POST_REFERENCE.matcher(comment);
    if (!matcher.find())
        return comment;
    String threadNum = postsBuf.get(0).number;
    int appendPos = 0;
    boolean replacements = false;
    do {
        String num = matcher.group(1);
        commentBuffer.append(comment, appendPos, matcher.start(1));
        appendPos = matcher.end();
        if (!num.equals(threadNum) && postsNumBuf.contains(num)) {
            replacements = true;
            commentBuffer.append(threadNum);
        } else {
            commentBuffer.append(num);
        }
    } while (matcher.find());
    commentBuffer.append(comment, appendPos, comment.length());
    if (replacements)
        comment = commentBuffer.toString();
    commentBuffer.setLength(0);
    return comment;
}

From source file:de.tudarmstadt.lt.lm.service.UimaStringProvider.java

public Iterator<String> applySplitHeuristic(final String text, int aggressiveness) {
    if (aggressiveness >= patterns.length)
        return new Iterator<String>() {
            int i = 0;

            @Override/*from w ww  . ja va2 s.c  om*/
            public boolean hasNext() {
                return i < text.length();
            }

            @Override
            public String next() {
                int next_i = Math.min(i + 1000, text.length());
                String result = text.substring(i, next_i);
                i = next_i;
                return result;
            }

            @Override
            public void remove() {
                throw new UnsupportedOperationException("remove() is not supported");
            }
        };

    final Matcher m = patterns[aggressiveness].matcher(text);
    return new Iterator<String>() {
        int last = 0;
        boolean found = false;

        @Override
        public boolean hasNext() {
            found = m.find();
            return found || last < text.length();
        }

        @Override
        public String next() {
            int current;
            if (found)
                current = m.end() - 1;
            else
                current = text.length();
            String res = text.substring(last, current);
            last = current;
            return res;

        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("remove() is not supported");
        }
    };

}

From source file:hudson.Util.java

/**
 * Replaces the occurrence of '$key' by <tt>resolver.get('key')</tt>.
 *
 * <p>//  w w w  . ja  va2s .c  om
 * Unlike shell, undefined variables are left as-is (this behavior is the same as Ant.)
 */
public static String replaceMacro(String s, VariableResolver<String> resolver) {
    if (s == null) {
        return null;
    }

    int idx = 0;
    while (true) {
        Matcher m = VARIABLE.matcher(s);
        if (!m.find(idx)) {
            return s;
        }

        String key = m.group().substring(1);

        // escape the dollar sign or get the key to resolve
        String value;
        if (key.charAt(0) == '$') {
            value = "$";
        } else {
            if (key.charAt(0) == '{') {
                key = key.substring(1, key.length() - 1);
            }
            value = resolver.resolve(key);
        }

        if (value == null) {
            idx = m.end(); // skip this
        } else {
            s = s.substring(0, m.start()) + value + s.substring(m.end());
            idx = m.start() + value.length();
        }
    }
}

From source file:gate.creole.splitter.RegexSentenceSplitter.java

@Override
public void execute() throws ExecutionException {
    interrupted = false;// ww  w.jav a2  s. c  o m
    int lastProgress = 0;
    fireProgressChanged(lastProgress);
    //get pointers to the annotation sets
    AnnotationSet outputAS = (outputASName == null || outputASName.trim().length() == 0)
            ? document.getAnnotations()
            : document.getAnnotations(outputASName);

    String docText = document.getContent().toString();

    /* If the document's content is empty or contains only whitespace,
     * we drop out right here, since there's nothing to sentence-split.     */
    if (docText.trim().length() < 1) {
        return;
    }

    Matcher internalSplitMatcher = internalSplitsPattern.matcher(docText);
    Matcher externalSplitMatcher = externalSplitsPattern.matcher(docText);

    Matcher nonSplitMatcher = nonSplitsPattern.matcher(docText);
    //store all non split locations in a list of pairs
    List<int[]> nonSplits = new LinkedList<int[]>();
    while (nonSplitMatcher.find()) {
        nonSplits.add(new int[] { nonSplitMatcher.start(), nonSplitMatcher.end() });
    }
    //this lists holds the next matches at each step
    List<MatchResult> nextSplitMatches = new ArrayList<MatchResult>();
    //initialise matching process
    MatchResult internalMatchResult = null;
    if (internalSplitMatcher.find()) {
        internalMatchResult = internalSplitMatcher.toMatchResult();
        nextSplitMatches.add(internalMatchResult);
    }
    MatchResult externalMatchResult = null;
    if (externalSplitMatcher.find()) {
        externalMatchResult = externalSplitMatcher.toMatchResult();
        nextSplitMatches.add(externalMatchResult);
    }
    MatchResultComparator comparator = new MatchResultComparator();
    int lastSentenceEnd = 0;

    while (!nextSplitMatches.isEmpty()) {
        //see which one matches first
        Collections.sort(nextSplitMatches, comparator);
        MatchResult nextMatch = nextSplitMatches.remove(0);
        if (nextMatch == internalMatchResult) {
            //we have a new internal split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //add the split annotation
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "internal");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    int endOffset = nextMatch.end();
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isWhitespace(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < nextMatch.start()) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = endOffset;
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (internalSplitMatcher.find()) {
                internalMatchResult = internalSplitMatcher.toMatchResult();
                nextSplitMatches.add(internalMatchResult);
            } else {
                internalMatchResult = null;
            }
        } else if (nextMatch == externalMatchResult) {
            //we have a new external split; see if it's vetoed or not
            if (!veto(nextMatch, nonSplits)) {
                //split is not vetoed
                try {
                    //generate the split
                    FeatureMap features = Factory.newFeatureMap();
                    features.put("kind", "external");
                    outputAS.add(new Long(nextMatch.start()), new Long(nextMatch.end()), "Split", features);
                    //generate the sentence annotation
                    //find the last non whitespace character, going backward from
                    //where the external skip starts
                    int endOffset = nextMatch.start();
                    while (endOffset > lastSentenceEnd
                            && Character.isSpaceChar(Character.codePointAt(docText, endOffset - 1))) {
                        endOffset--;
                    }
                    //find the first non whitespace character starting from where the
                    //last sentence ended
                    while (lastSentenceEnd < endOffset
                            && Character.isSpaceChar(Character.codePointAt(docText, lastSentenceEnd))) {
                        lastSentenceEnd++;
                    }
                    //if there is any useful text between the two offsets, generate
                    //a new sentence
                    if (lastSentenceEnd < endOffset) {
                        outputAS.add(new Long(lastSentenceEnd), new Long(endOffset),
                                ANNIEConstants.SENTENCE_ANNOTATION_TYPE, Factory.newFeatureMap());
                    }
                    //store the new sentence end
                    lastSentenceEnd = nextMatch.end();
                } catch (InvalidOffsetException e) {
                    // this should never happen
                    throw new ExecutionException(e);
                }
            }
            //prepare for next step
            if (externalSplitMatcher.find()) {
                externalMatchResult = externalSplitMatcher.toMatchResult();
                nextSplitMatches.add(externalMatchResult);
            } else {
                externalMatchResult = null;
            }
        } else {
            //malfunction
            throw new ExecutionException("Invalid state - cannot identify match!");
        }
        //report progress
        int newProgress = 100 * lastSentenceEnd / docText.length();
        if (newProgress - lastProgress > 20) {
            lastProgress = newProgress;
            fireProgressChanged(lastProgress);
        }
    } //while(!nextMatches.isEmpty()){
    fireProcessFinished();
}

From source file:ninja.javafx.smartcsv.fx.validation.ValidationEditorController.java

private static StyleSpans<Collection<String>> computeHighlighting(String text) {
    Matcher matcher = PATTERN.matcher(text);
    int lastKwEnd = 0;
    StyleSpansBuilder<Collection<String>> spansBuilder = new StyleSpansBuilder<>();
    while (matcher.find()) {
        String styleClass = matcher
                .group("KEYWORD") != null
                        ? "keyword"
                        : matcher.group("PAREN") != null ? "paren"
                                : matcher.group("BRACE") != null ? "brace"
                                        : matcher.group("BRACKET") != null ? "bracket"
                                                : matcher.group("SEMICOLON") != null ? "semicolon"
                                                        : matcher.group("STRING") != null ? "string"
                                                                : matcher.group("STRING2") != null ? "string"
                                                                        : matcher.group("COMMENT") != null
                                                                                ? "comment"
                                                                                : null;
        /* never happens */ assert styleClass != null;
        spansBuilder.add(Collections.emptyList(), matcher.start() - lastKwEnd);
        spansBuilder.add(Collections.singleton(styleClass), matcher.end() - matcher.start());
        lastKwEnd = matcher.end();//w w w  .j ava  2 s .c o m
    }
    spansBuilder.add(Collections.emptyList(), text.length() - lastKwEnd);
    return spansBuilder.create();
}

From source file:com.g3net.tool.StringUtils.java

/**
 * ?????/*from w  ww  .  jav  a 2 s  . com*/
 * @param src ??
 * @param regex ???
 * @param ignoreCase ??
 * @param endPos src??(regex)??1
 * @return
 */
public static boolean startsWith(String src, String regex, boolean ignoreCase, TInteger endPos) {

    Pattern p = null;
    if (ignoreCase) {
        p = Pattern.compile("^" + regex, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
    } else {
        p = Pattern.compile("^" + regex, Pattern.MULTILINE);
    }
    Matcher m = p.matcher(src);
    while (m.find()) {
        // log.info(m.group()+":"+m.start()+":"+m.end());
        endPos.setValue(m.end());
        return true;
    }
    return false;
}

From source file:sapience.injectors.stax.inject.ModelBasedStaxStreamInjector.java

/**
 * If the reference is a attribute (e.g. sawsdl:modelreference), we add it here (by creating 
 * the according XMLEvent). The ref//from w w w . j a va2  s.co  m
 * @param w
 * @param ref
 * @param se 
 * @throws XMLStreamException
 */
private StartElement handleAttribute(XMLEventWriter w, Reference ref, StartElement se)
        throws XMLStreamException {
    /* we are having attributes which are in both, the reference and the current element. We only add 
     * a new Attribute event, if it is not already contained in the Start Element
     * 
     * Example: 
     *    reference <element ns:attr1="value" reference="http://example.com">
     *  element   <element ns:attr1="value">
     */
    StringBuilder referenceString = new StringBuilder(ref.getTarget().toString());
    Matcher matcher = findAttributeInReference.matcher(referenceString);
    List<Attribute> attributeList = new ArrayList<Attribute>();

    // copy namespaces
    LocalNamespaceContext lnc = new LocalNamespaceContext((BaseNsContext) se.getNamespaceContext());

    while (matcher.find()) {
        int start = matcher.start();
        int end = matcher.end();

        String key = null;
        String prefix = null;
        String value = null;

        // [ns:attr1, "value"]      
        String[] l = referenceString.substring(start, end).split("=");
        if (l.length > 0) {
            // [ns, attr1]
            String[] n = l[0].split(":");
            if (n.length == 2) {
                key = n[1];
                prefix = n[0];
            } else {
                key = n[0];
            }
            if (l.length == 2) {
                value = l[1].substring(1, l[1].length() - 1); // remove ""

            }
        }

        // check if this is a namespace definition
        if ((prefix != null) && ("xmlns".contentEquals(prefix))) {
            lnc.put(key, value);
        } else {
            QName name = null;
            // create QName
            if (prefix != null) {
                name = new QName(null, key, prefix);
            } else {
                String namespaceURI = se.getNamespaceContext().getNamespaceURI(XMLConstants.DEFAULT_NS_PREFIX);
                name = new QName(namespaceURI, key);
            }
            if (name != null) {
                Attribute created = getXMLEventFactory().createAttribute(name, value);
                attributeList.add(created);
            }
        }
    }

    // remove redundant attribute from reference list
    Iterator<?> it = se.getAttributes();
    while (it.hasNext()) {
        Attribute ae = (Attribute) it.next();
        for (Attribute ar : attributeList) {
            if ((ar.getName().getLocalPart().contentEquals(ae.getName().getLocalPart()))
                    && (ar.getValue().contentEquals(ae.getValue()))) {
                //System.out.println("Attribute removed! -> " + ar.getName() + "= " + ar.getValue());
                attributeList.remove(ar);
                break;

            }
        }
    }

    // merge everything again
    Iterator<? extends Attribute> it2 = se.getAttributes();
    while (it2.hasNext()) {
        attributeList.add(it2.next());
    }

    // create a new element with the attribute set and return it
    return StartElementEventImpl.construct(se.getLocation(), se.getName(), attributeList.iterator(),
            lnc.getNamespaces().iterator(), lnc);

}

From source file:com.jsmartframework.web.manager.ExpressionHandler.java

public Object getExpressionValue(Object expr) {
    if (expr != null) {
        String evalExpr = expr.toString();

        Matcher matcher = EL_PATTERN.matcher(evalExpr);
        if (!matcher.find()) {
            return expr;
        }/* www. j a  v  a  2  s  .c  om*/

        boolean hasMoreGroup = false;
        StringBuffer exprBuffer = new StringBuffer();

        Object result = evaluateExpression(evalExpr.substring(matcher.start() + 2, matcher.end() - 1));
        matcher.appendReplacement(exprBuffer,
                result != null ? Matcher.quoteReplacement(result.toString()) : "null");

        while (matcher.find()) {
            hasMoreGroup = true;
            Object object = evaluateExpression(evalExpr.substring(matcher.start() + 2, matcher.end() - 1));
            matcher.appendReplacement(exprBuffer,
                    object != null ? Matcher.quoteReplacement(object.toString()) : "null");
        }

        if (hasMoreGroup || result instanceof String) {
            return matcher.appendTail(exprBuffer).toString();
        } else {
            return result;
        }
    }
    return null;
}

From source file:de.mpg.escidoc.services.cone.util.CCCrawler.java

private static List<LocalizedTripleObject> extractLanguages(String page, String baseURL) throws Exception {
    HttpClient httpClient = new HttpClient();
    List<LocalizedTripleObject> result = new ArrayList<LocalizedTripleObject>();

    Pattern pattern = Pattern.compile(
            "<a\\s+href=\"./([^\"]+)\"\\s+title=\"([^\"]+)\"\\s+hreflang=\"([^\"]+)\"\\s+rel=\"alternate nofollow\"\\s+(xml:)?lang=\"([^\"]+)\">");
    Matcher matcher = pattern.matcher(page);
    int start = 0;
    while (matcher.find(start)) {
        String genid = querier.createUniqueIdentifier(null);
        TreeFragment treeFragment = new TreeFragment(genid);

        String locale = matcher.group(3);

        treeFragment.setLanguage(locale.split("_")[0]);

        String url = baseURL + matcher.group(1);
        List<LocalizedTripleObject> list = new ArrayList<LocalizedTripleObject>();
        list.add(new LocalizedString(url));
        treeFragment.put("http://purl.org/dc/elements/1.1/identifier", list);

        list = new ArrayList<LocalizedTripleObject>();
        list.add(new LocalizedString(matcher.group(2)));
        treeFragment.put("http://purl.org/dc/elements/1.1/title", list);

        list = new ArrayList<LocalizedTripleObject>();
        list.add(new LocalizedString(locale));
        treeFragment.put("urn:cone:locale", list);

        GetMethod method = new GetMethod(url);
        ProxyHelper.executeMethod(httpClient, method);
        String translation = method.getResponseBodyAsString();

        Pattern namePattern = Pattern.compile("<h2 property=\"dc:title\">([^<]+)</h2>");
        Matcher nameMatcher = namePattern.matcher(translation);
        if (nameMatcher.find()) {
            list = new ArrayList<LocalizedTripleObject>();
            list.add(new LocalizedString(nameMatcher.group(1)));
            treeFragment.put("http://purl.org/dc/elements/1.1/title", list);
        }/*  ww  w  .jav a 2 s .com*/

        result.add(treeFragment);

        start = matcher.end();
    }
    return result;
}