Example usage for java.util.regex Matcher start

List of usage examples for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start() 

Source Link

Document

Returns the start index of the previous match.

Usage

From source file:sapience.injectors.stax.inject.StringBasedStaxStreamInjector.java

/**
 * Helper method, takes an XML reference with local namespace definitions, and returns the namespaces as QNames
 * @param xml//  w  w w.j a  v a  2  s .  c om
 * @return
 */
private List<QName> extractNamespaces(String xml) {
    List<QName> res = new ArrayList<QName>();

    Matcher nsMatcher = nsPattern.matcher(xml);
    while (nsMatcher.find()) {
        int start = nsMatcher.start();
        int end = nsMatcher.end();
        StringBuilder sbu = new StringBuilder(xml.substring(start, end));
        String prefix = sbu.substring(sbu.indexOf(":") + 1, sbu.lastIndexOf("="));
        String uri = sbu.substring(sbu.indexOf("\"") + 1, sbu.lastIndexOf("\""));
        res.add(new QName(uri, prefix));
    }
    return res;
}

From source file:gate.tagger.tagme.TaggerTagMeWS.java

protected void annotateText(Document doc, AnnotationSet outputAS, long from, long to) {
    String text = "";
    try {/*  w ww . j  ava  2 s. c  o  m*/
        text = doc.getContent().getContent(from, to).toString();
    } catch (InvalidOffsetException ex) {
        throw new GateRuntimeException("Unexpected offset exception, offsets are " + from + "/" + to);
    }
    // send the text to the service and get back the response
    //System.out.println("Annotating text: "+text);
    //System.out.println("Starting offset is "+from);

    // NOTE: there is a bug in the TagMe service which causes offset errors
    // if we use the tweet mode and there are certain patterns in the tweet.
    // The approach recommended by Francesco Piccinno is to replace those 
    // patterns by spaces.    
    if (getIsTweet()) {
        logger.debug("Text before cleaning: >>" + text + "<<");
        // replace 
        text = text.replaceAll(patternStringRT3, "    ");
        text = text.replaceAll(patternStringRT2, "   ");
        text = text.replaceAll(patternHashTag, " $1");
        // now replace the remaining patterns by spaces
        StringBuilder sb = new StringBuilder(text);
        Matcher m = patternUrl.matcher(text);
        while (m.find()) {
            int start = m.start();
            int end = m.end();
            sb.replace(start, end, nSpaces(end - start));
        }
        m = patternUser.matcher(text);
        while (m.find()) {
            int start = m.start();
            int end = m.end();
            sb.replace(start, end, nSpaces(end - start));
        }
        text = sb.toString();
        logger.debug("Text after cleaning:  >>" + text + "<<");
    }
    TagMeAnnotation[] tagmeAnnotations = getTagMeAnnotations(text);
    for (TagMeAnnotation tagmeAnn : tagmeAnnotations) {
        if (tagmeAnn.rho >= minrho) {
            FeatureMap fm = Factory.newFeatureMap();
            fm.put("tagMeId", tagmeAnn.id);
            fm.put("title", tagmeAnn.title);
            fm.put("rho", tagmeAnn.rho);
            fm.put("spot", tagmeAnn.spot);
            fm.put("link_probability", tagmeAnn.link_probability);
            if (tagmeAnn.title == null) {
                throw new GateRuntimeException("Odd: got a null title from the TagMe service" + tagmeAnn);
            } else {
                fm.put("inst", "http://dbpedia.org/resource/" + recodeForDbp38(tagmeAnn.title));
            }
            try {
                gate.Utils.addAnn(outputAS, from + tagmeAnn.start, from + tagmeAnn.end,
                        getOutputAnnotationType(), fm);
            } catch (Exception ex) {
                System.err.println(
                        "Got an exception in document " + doc.getName() + ": " + ex.getLocalizedMessage());
                ex.printStackTrace(System.err);
                System.err.println("from=" + from + ", to=" + to + " TagMeAnn=" + tagmeAnn);
            }
        }
    }
}

From source file:gtu.youtube.JavaYoutubeVideoUrlHandler.java

private String getParamStr(String baseUrlString, int start, String endPattern) {
    String tmpUrl = StringUtils.substring(baseUrlString, start);
    Pattern ptn = Pattern.compile(endPattern);
    Matcher mth = ptn.matcher(tmpUrl);
    int pos = -1;
    if (mth.find()) {
        pos = mth.start();
    } else {// w  w w . j a v a 2s . c o  m
        pos = baseUrlString.length();
    }
    String paramStr = StringUtils.substring(tmpUrl, 0, pos);
    return paramStr;
}

From source file:com.haulmont.cuba.core.global.QueryTransformerRegex.java

@Override
public void addWhereAsIs(String where) {
    Matcher entityMatcher = FROM_ENTITY_PATTERN.matcher(buffer);
    findAlias(entityMatcher);/*from   ww  w  . java 2s  .  co  m*/

    int insertPos = buffer.length();
    Matcher lastClauseMatcher = LAST_CLAUSE_PATTERN.matcher(buffer);
    if (lastClauseMatcher.find(entityMatcher.end()))
        insertPos = lastClauseMatcher.start() - 1;

    StringBuilder sb = new StringBuilder();
    Matcher whereMatcher = WHERE_PATTERN.matcher(buffer);
    if (whereMatcher.find(entityMatcher.end()))
        sb.append(" and ");
    else
        sb.append(" where ");

    sb.append("(").append(where).append(")");

    buffer.insert(insertPos, sb);

    Matcher paramMatcher = PARAM_PATTERN.matcher(where);
    while (paramMatcher.find()) {
        addedParams.add(paramMatcher.group(1));
    }
}

From source file:com.jkoolcloud.tnt4j.streams.custom.parsers.ApacheAccessLogParser.java

/**
 * Makes log entry parsing RegEx from defined Apache access log configuration pattern string.
 *
 * @param apacheLogPattern//from   w ww.  ja  v a 2s  .c  o m
 *            Apache access log configuration pattern string
 *
 * @return regular expression string, or {@code null} if can't make RegEx string from defined Apache access log
 *         configuration pattern string
 */
private String makeRegexPattern(String apacheLogPattern) {
    Pattern pattern = Pattern.compile(APACHE_LOG_CONFIG_TOKEN_REPLACEMENT_REGEX);
    Matcher matcher = pattern.matcher(apacheLogPattern);
    StringBuilder logRegexBuff = new StringBuilder();
    int pos = 0;
    while (matcher.find()) {
        logRegexBuff.append(apacheLogPattern.substring(pos, matcher.start()));
        logRegexBuff.append(mapConfigTokenToRegex(matcher.group()));
        pos = matcher.end();
    }

    if (pos < apacheLogPattern.length()) {
        logRegexBuff.append(apacheLogPattern.substring(pos, apacheLogPattern.length()));
    }

    String logRegex = logRegexBuff.toString().trim();
    // return logRegex.isEmpty() ? null : "(?m)^" + logRegex;
    return logRegex.isEmpty() ? null : '^' + logRegex; // NON-NLS
}

From source file:br.msf.commons.util.CharSequenceUtils.java

@SuppressWarnings("unchecked")
public static List<MatchEntry> findPattern(final Pattern pattern, final CharSequence sequence) {
    if (pattern == null || isEmptyOrNull(sequence)) {
        return CollectionUtils.EMPTY_LIST;
    }//from  www  .  jav a 2 s.  c  o m
    final Matcher matcher = pattern.matcher(sequence);
    final List<MatchEntry> occurrences = new ArrayList<MatchEntry>();
    while (matcher.find()) {
        occurrences.add(new MatchEntry(matcher.start(), matcher.end()));
    }
    return occurrences;
}

From source file:com.intuit.tank.tools.debugger.FindReplaceDialog.java

private void find(RSyntaxTextArea textArea) {

    try {//from ww  w .j  a  va 2s.  c o  m
        int offset = currentLine < textArea.getLineCount() ? textArea.getLineStartOffset(currentLine) : 0;
        String searchTerm = tfSearchEditor.getText();
        String text = textArea.getText();
        int foundIndex = -1;
        int flags = (checkboxRegexp.isSelected() ? 0 : Pattern.LITERAL)
                | (checkboxMatchCase.isSelected() ? 0 : Pattern.CASE_INSENSITIVE);
        Pattern p = Pattern.compile(searchTerm, flags);
        Matcher matcher = p.matcher(text);
        matcher.region(offset, text.length());
        if (matcher.find()) {
            foundIndex = matcher.start();
        } else if (checkboxWrap.isSelected() && offset > 0) {
            matcher.region(0, offset);
            if (matcher.find()) {
                foundIndex = matcher.start();
            }
        }
        if (foundIndex != -1) {
            int lineOfOffset = textArea.getLineOfOffset(foundIndex);
            // textArea.setActiveLineRange(lineOfOffset, lineOfOffset);
            textArea.setCurrentLine(lineOfOffset);
            // textArea.setCaretPosition(foundIndex + searchTerm.length());
            parent.repaint();
            parent.fireStepChanged(lineOfOffset);
            currentLine = lineOfOffset + 1;
        } else {
            JOptionPane.showMessageDialog(parent, "Search String not found.");
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.haulmont.cuba.core.global.QueryTransformerRegex.java

@Override
public void addWhere(String where) {
    Matcher entityMatcher = FROM_ENTITY_PATTERN.matcher(buffer);
    String alias = findAlias(entityMatcher);

    int insertPos = buffer.length();
    Matcher lastClauseMatcher = LAST_CLAUSE_PATTERN.matcher(buffer);
    if (lastClauseMatcher.find(entityMatcher.end()))
        insertPos = lastClauseMatcher.start() - 1;

    StringBuilder sb = new StringBuilder();
    Matcher whereMatcher = WHERE_PATTERN.matcher(buffer);
    int whereEnd = -1;
    boolean needOpenBracket = false;
    if (whereMatcher.find(entityMatcher.end())) {
        whereEnd = whereMatcher.end();/*  w  w  w  .  j ava2s .  c  o m*/

        Matcher orMatcher = OR_PATTERN.matcher(buffer);
        orMatcher.region(whereEnd + 1, insertPos);
        if (orMatcher.find()) { // surround with brackets if there is OR inside WHERE
            sb.append(")");
            needOpenBracket = true;
        }
        sb.append(" and ");
    } else {
        sb.append(" where ");
    }

    sb.append("(").append(where);
    int idx;
    while ((idx = sb.indexOf(ALIAS_PLACEHOLDER)) >= 0) {
        sb.replace(idx, idx + ALIAS_PLACEHOLDER.length(), alias);
    }
    sb.append(")");

    if (needOpenBracket) {
        buffer.insert(whereEnd + 1, "(");
        insertPos++;
    }

    buffer.insert(insertPos, sb);

    Matcher paramMatcher = PARAM_PATTERN.matcher(where);
    while (paramMatcher.find()) {
        addedParams.add(paramMatcher.group(1));
    }
}

From source file:com.google.acre.appengine.script.AppEngineAsyncUrlfetch.java

private Scriptable callback_result(AsyncRequest req, HTTPResponse res) {

    long waiting_time = System.currentTimeMillis() - req.start_time;

    URL furl = res.getFinalUrl();
    if (furl == null) {
        furl = req.url;// ww  w . j  av a2  s  .c  o  m
    }

    BrowserCompatSpecFactory bcsf = new BrowserCompatSpecFactory();
    CookieSpec cspec = bcsf.newInstance(null);
    String protocol = furl.getProtocol();
    boolean issecure = ("https".equals(protocol));
    int port = furl.getPort();
    if (port == -1)
        port = 80;
    CookieOrigin origin = new CookieOrigin(furl.getHost(), port, furl.getPath(), issecure);

    Context ctx = Context.getCurrentContext();
    Scriptable out = ctx.newObject(_scope);
    Scriptable headers = ctx.newObject(_scope);
    Scriptable cookies = ctx.newObject(_scope);

    out.put("status", out, res.getResponseCode());

    String response_body = null;

    try {
        response_body = new String(res.getContent(), getResponseEncoding(res));
        out.put("body", out, response_body);
    } catch (java.io.UnsupportedEncodingException e) {
        throw new RuntimeException(e);
    }

    out.put("headers", out, headers);
    out.put("cookies", out, cookies);

    StringBuilder response_header_log = new StringBuilder();
    for (HTTPHeader h : res.getHeaders()) {
        if (h.getName().equalsIgnoreCase("set-cookie")) {
            String set_cookie = h.getValue();
            Matcher m = Pattern.compile("\\s*(([^,]|(,\\s*\\d))+)").matcher(set_cookie);
            while (m.find()) {
                Header ch = new BasicHeader("Set-Cookie", set_cookie.substring(m.start(), m.end()));
                try {
                    List<Cookie> pcookies = cspec.parse(ch, origin);
                    for (Cookie c : pcookies) {
                        cookies.put(c.getName(), cookies, new AcreCookie(c).toJsObject(_scope));
                    }
                } catch (MalformedCookieException e) {
                    // we've occasionally choked on cookie-set,
                    // e.g. www.google.com returning expires=; expires=Mon, 01-Jan-1990 00:00:00 GMT;
                    // no solution but at least log exactly what's happening.
                    String cookiestring = ch.toString();
                    _logger.warn("urlfetch.response.async", "exception thrown on bad cookie " + cookiestring);
                    throw new RuntimeException(e);
                }
            }
        }

        headers.put(h.getName(), headers, h.getValue());
        response_header_log.append(h.getName() + ": " + h.getValue() + ", ");
    }

    boolean system = req.system;
    boolean log_to_user = req.log_to_user;

    String log_body = new String();
    if (res.getResponseCode() != 200 && response_body != null) {
        log_body = response_body;
    }

    _logger.syslog4j("INFO", "urlfetch.response.async", "URL", furl.toString(), "Status",
            Integer.toString(res.getResponseCode()), "Headers", response_header_log, "Body", log_body);

    if (system && log_to_user) {
        _response.userlog4j("INFO", "urlfetch.response.async", "URL", furl.toString(), "Status",
                Integer.toString(res.getResponseCode()), "Headers", response_header_log);
    }

    _costCollector.collect((system) ? "asuc" : "auuc").collect((system) ? "asuw" : "auuw", waiting_time);

    return out;
}

From source file:mobi.jenkinsci.server.core.net.ProxyUtil.java

private String resolveJs(final String userAgent, final String pluginName, final String url,
        final String resultString) throws IOException {
    log.debug("Resolving JavaScript for URL " + url);
    final StringBuilder outString = new StringBuilder();
    int currPos = 0;
    final Pattern linkPattern = Pattern.compile(
            "<script>?[^>]*src=[\"\\']([^>\"\\']*)[\"\\']([^>]*/>|[^>]*>[ \r\n]*</script>)",
            Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
    final Matcher matcher = linkPattern.matcher(resultString);
    while (matcher.find(currPos)) {
        final int start = matcher.start();
        final int end = matcher.end();
        outString.append(resultString.substring(currPos, start));
        final String cssUrl = matcher.group(1);
        final String jsText = retrieveJs(userAgent, pluginName, url, cssUrl);
        outString.append(jsText);/*from   w ww.j  a  v a2  s.c o m*/
        currPos = end;
    }

    outString.append(resultString.substring(currPos));
    log.debug(outString.length() + " JavaScript chars included for URL " + url);
    return outString.toString();
}