Example usage for java.util.regex Matcher start

List of usage examples for java.util.regex Matcher start

Introduction

In this page you can find the example usage for java.util.regex Matcher start.

Prototype

public int start() 

Source Link

Document

Returns the start index of the previous match.

Usage

From source file:com.google.flightmap.parsing.faa.afd.AfdCommParser.java

private void addCommData() throws SQLException {
    Pattern commSectionRegex = Pattern
            .compile("\\((\\S+?)\\)\\s+?\\d+\\s*(?:N|E|W|S|NE|NW|SE|SW).+?UTC\\s+.+?COMMUNICATIONS\\:");
    //        "\\((\\S+?)\\)\\s+?\\d+.+?UTC\\s+.+?COMMUNICATIONS\\:");
    Pattern freqRegex = Pattern.compile("([A-Z]+(?:[A-Z]| |/)+?)(\\d+\\.\\d+)\\s+(?:\\((.+?)\\))?");
    int start = 0;

    Matcher commSectionMatcher = commSectionRegex.matcher(afd);
    Matcher freqMatcher = freqRegex.matcher(afd);

    while (commSectionMatcher.find(start)) {
        start = commSectionMatcher.start();
        final String iata = commSectionMatcher.group(1);
        System.out.println(iata);

        // Determine potential end of this COMMUNICATIONS end
        int nextColon = afd.indexOf(":", commSectionMatcher.end());
        if (nextColon == -1) {
            nextColon = afd.length();//w ww.  j  a va2s.c  om
        }

        int nextMatch;
        if (commSectionMatcher.find(commSectionMatcher.end())) {
            nextMatch = commSectionMatcher.start();
        } else {
            nextMatch = afd.length();
        }

        final int stop = Math.min(nextColon, nextMatch);

        while (freqMatcher.find(start)) {
            if (freqMatcher.start() > stop) {
                start = stop;
                break;
            }
            System.out.println("  -> " + freqMatcher.group(0));
            final String identifier = freqMatcher.group(1).trim();
            final String frequency = freqMatcher.group(2).trim();
            String remarks = freqMatcher.group(3);
            if (remarks != null) {
                remarks = remarks.trim();
            }

            addAirportCommToDb(iata, identifier, frequency, remarks);
            start = freqMatcher.end();
        }
    }
}

From source file:com.sonicle.webtop.mail.ICalendarRequest.java

/**
 * <p>//from   ww  w  .  j a v  a 2s.c  o m
 * Finds all "URL"s in the given _rawText, wraps them in 
 * HTML link tags and returns the result (with the rest of the text
 * html encoded).
 * </p>
 * <p>
 * We employ the procedure described at:
 * http://www.codinghorror.com/blog/2008/10/the-problem-with-urls.html
 * which is a <b>must-read</b>.
 * </p>
 * Basically, we allow any number of left parenthesis (which will get stripped away)
 * followed by http:// or https://.  Then any number of permitted URL characters
 * (based on http://www.ietf.org/rfc/rfc1738.txt) followed by a single character
 * of that set (basically, those minus typical punctuation).  We remove all sets of 
 * matching left & right parentheses which surround the URL.
 *</p>
 * <p>
 * This method *must* be called from a tag/component which will NOT
 * end up escaping the output.  For example:
 * <PRE>
 * <h:outputText ... escape="false" value="#{core:hyperlinkText(textThatMayHaveURLs, '_blank')}"/>
 * </pre>
 * </p>
 * <p>
 * Reason: we are adding <code>&lt;a href="..."&gt;</code> tags to the output *and*
 * encoding the rest of the string.  So, encoding the outupt will result in
 * double-encoding data which was already encoded - and encoding the <code>a href</code>
 * (which will render it useless).
 * </p>
 * <p>
 * 
 * @param   _rawText  - if <code>null</code>, returns <code>""</code> (empty string).
 * @param   _target   - if not <code>null</code> or <code>""</code>, adds a target attributed to the generated link, using _target as the attribute value.
 */
public static final String hyperlinkText(final String _rawText, final String _target) {

    String returnValue = null;

    if (!StringUtils.isBlank(_rawText)) {

        final Matcher matcher = URI_FINDER_PATTERN.matcher(_rawText);

        if (matcher.find()) {

            final int originalLength = _rawText.length();

            final String targetText = (StringUtils.isBlank(_target)) ? ""
                    : " target=\"" + _target.trim() + "\"";
            final int targetLength = targetText.length();

            // Counted 15 characters aside from the target + 2 of the URL (max if the whole string is URL)
            // Rough guess, but should keep us from expanding the Builder too many times.
            final StringBuilder returnBuffer = new StringBuilder(originalLength * 2 + targetLength + 15);

            int currentStart;
            int currentEnd;
            int lastEnd = 0;

            String currentURL;

            do {
                currentStart = matcher.start();
                currentEnd = matcher.end();
                currentURL = matcher.group();

                // Adjust for URLs wrapped in ()'s ... move start/end markers
                //      and substring the _rawText for new URL value.
                while (currentURL.startsWith("(") && currentURL.endsWith(")")) {
                    currentStart = currentStart + 1;
                    currentEnd = currentEnd - 1;

                    currentURL = _rawText.substring(currentStart, currentEnd);
                }

                while (currentURL.startsWith("(")) {
                    currentStart = currentStart + 1;

                    currentURL = _rawText.substring(currentStart, currentEnd);
                }

                // Text since last match
                returnBuffer.append(StringEscapeUtils.escapeHtml4(_rawText.substring(lastEnd, currentStart)));

                // Wrap matched URL
                returnBuffer.append("<a href=\"" + currentURL + "\"" + targetText + ">" + currentURL + "</a>");

                lastEnd = currentEnd;

            } while (matcher.find());

            if (lastEnd < originalLength) {
                returnBuffer.append(StringEscapeUtils.escapeHtml4(_rawText.substring(lastEnd)));
            }

            returnValue = returnBuffer.toString();
        }
    }

    if (returnValue == null) {
        returnValue = StringEscapeUtils.escapeHtml4(_rawText);
    }

    return returnValue;

}

From source file:android.databinding.compilationTest.BaseCompilationTest.java

protected void copyResourceTo(String name, File targetFile, Map<String, String> replacements)
        throws IOException {
    if (replacements.isEmpty()) {
        copyResourceTo(name, targetFile);
    }/*from  w  ww .  j  a va2  s  .  c  o m*/
    InputStream inputStream = getClass().getResourceAsStream(name);
    final String contents = IOUtils.toString(inputStream);
    IOUtils.closeQuietly(inputStream);

    StringBuilder out = new StringBuilder(contents.length());
    final Matcher matcher = VARIABLES.matcher(contents);
    int location = 0;
    while (matcher.find()) {
        int start = matcher.start();
        if (start > location) {
            out.append(contents, location, start);
        }
        final String key = matcher.group(1);
        final String replacement = replacements.get(key);
        if (replacement != null) {
            out.append(replacement);
        }
        location = matcher.end();
    }
    if (location < contents.length()) {
        out.append(contents, location, contents.length());
    }

    FileUtils.writeStringToFile(targetFile, out.toString());
}

From source file:be.makercafe.apps.makerbench.editors.JFXScadEditor.java

private static StyleSpans<Collection<String>> computeHighlighting(String text) {
    Matcher matcher = PATTERN.matcher(text);
    int lastKwEnd = 0;
    StyleSpansBuilder<Collection<String>> spansBuilder = new StyleSpansBuilder<>();
    while (matcher.find()) {
        String styleClass = matcher.group("KEYWORD") != null ? "keyword"
                : matcher.group("PAREN") != null ? "paren"
                        : matcher.group("BRACE") != null ? "brace"
                                : matcher.group("BRACKET") != null ? "bracket"
                                        : matcher.group("SEMICOLON") != null ? "semicolon"
                                                : matcher.group("STRING") != null ? "string"
                                                        : matcher.group("COMMENT") != null ? "comment" : null;
        /* never happens */ assert styleClass != null;
        spansBuilder.add(Collections.emptyList(), matcher.start() - lastKwEnd);
        spansBuilder.add(Collections.singleton(styleClass), matcher.end() - matcher.start());
        lastKwEnd = matcher.end();/*from ww  w .j ava 2 s  . c  om*/
    }
    spansBuilder.add(Collections.emptyList(), text.length() - lastKwEnd);
    return spansBuilder.create();
}

From source file:com.xpn.xwiki.render.XWikiMacrosMappingRenderer.java

private String convertSingleLines(String content, XWikiContext context) {
    StringBuffer result = new StringBuffer();
    Matcher m = SINGLE_LINE_MACRO_PATTERN.matcher(content);
    int current = 0;
    while (m.find()) {
        result.append(content.substring(current, m.start()));
        current = m.end();//from   w  ww  .j  av  a  2s .  c om
        String macroname = m.group(1);
        String params = m.group(3);
        String allcontent = m.group(0);

        XWikiVirtualMacro macro = this.macros_mappings.get(macroname);
        if ((macro != null) && (macro.isSingleLine())) {
            result.append(context.getWiki().getRenderingEngine().convertSingleLine(macroname, params,
                    allcontent, macro, context));
        } else {
            result.append(allcontent);
        }
    }
    if (current == 0) {
        return content;
    }

    result.append(content.substring(current));

    return result.toString();
}

From source file:com.xpn.xwiki.render.XWikiMacrosMappingRenderer.java

private String convertMultiLines(String content, XWikiContext context) {
    StringBuffer result = new StringBuffer();
    Matcher m = MULTI_LINE_MACRO_PATTERN.matcher(content);
    int current = 0;
    while (m.find()) {
        result.append(content.substring(current, m.start()));
        current = m.end();/*from www  .  j a  v a  2 s . c  om*/
        String macroname = m.group(1);
        String params = m.group(3);
        String data = m.group(4);
        String allcontent = m.group(0);

        XWikiVirtualMacro macro = this.macros_mappings.get(macroname);
        if ((macro != null) && (macro.isMultiLine())) {
            result.append(context.getWiki().getRenderingEngine().convertMultiLine(macroname, params, data,
                    allcontent, macro, context));
        } else {
            result.append(allcontent);
        }
    }
    if (current == 0) {
        return content;
    }

    result.append(content.substring(current));

    return result.toString();
}

From source file:com.ushahidi.chambua.service.EntityExtractorService.java

/**
 * Extracts named entities from the provided text. The first
 * step is to determine the content type of the text. 
 * //from  ww  w .  j ava 2s .  c om
 * @param text
 * @return com.ushahidi.swiftriver.tagger.dto.APIResponseDTO
 */
public APIResponseDTO getEntities(String text) {
    String cleanedContent = null;
    try {
        cleanedContent = articleExtractor.getText(text);
    } catch (BoilerpipeProcessingException e) {
        LOGGER.error("An error occurred while cleaning the input: {}", e.getMessage());
    }

    String labeledText = classifier.classifyWithInlineXML(cleanedContent);

    // Entity types/classes available in the classifier e.g. PERSON, LOCATION, ORGANIZATION
    Set<String> tags = classifier.labels();
    String background = classifier.backgroundSymbol();

    // Build out the regex string
    String tagPattern = "";
    for (String tag : tags) {
        if (background.equals(tag))
            continue;

        if (tagPattern.length() > 0) {
            tagPattern += "|";
        }
        tagPattern += tag;
    }

    // Patterns for extracting the labeled text
    Pattern startPattern = Pattern.compile("<(" + tagPattern + ")>");
    Pattern endPattern = Pattern.compile("</(" + tagPattern + ")>");

    // Map to store the extracted entities/tags
    Map<String, Set<String>> entityMap = new HashMap<String, Set<String>>();

    // Begin extraction
    Matcher m = startPattern.matcher(labeledText);
    while (m.find()) {
        int start = m.start();
        labeledText = m.replaceFirst("");
        m = endPattern.matcher(labeledText);
        if (m.find()) {
            int end = m.start();
            String tag = m.group(1).toLowerCase();
            labeledText = m.replaceFirst("");
            String entity = labeledText.substring(start, end);

            if (entityMap.containsKey(tag)) {
                Set<String> current = entityMap.get(tag);
                current.add(entity);
                entityMap.put(tag, current);
            } else {
                Set<String> entities = new HashSet<String>();
                entities.add(entity);
                entityMap.put(tag, entities);
            }
        }
        // Adjust the matcher
        m = startPattern.matcher(labeledText);
    }

    DocumentData apiResponse = new DocumentData();
    if (entityMap.containsKey("person")) {
        apiResponse.setPeople(new ArrayList<String>(entityMap.get("person")));
    }

    if (entityMap.containsKey("organization")) {
        apiResponse.setOrganizations(new ArrayList<String>(entityMap.get("organization")));
    }

    // Geocode the location entities via the Gisgraphy REST API
    if (entityMap.containsKey("location")) {
        List<Place> places = new ArrayList<DocumentData.Place>();
        try {
            places = geocodePlaceNames(entityMap.get("location"));
            apiResponse.setPlaces(places);
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    return beanMapper.map(apiResponse, APIResponseDTO.class);
}

From source file:at.ac.tuwien.inso.subcat.utility.commentparser.Parser.java

License:asdf

private void parseParagraphs(List<ContentNode<T>> ast, String commentFragment) {
    Matcher pm = pPara.matcher(commentFragment);

    int lastEnd = 0;
    while (pm.find()) {
        if (lastEnd != pm.start()) {
            int count = StringUtils.countMatches(pm.group(0), "\n") - 1;
            parseParagraph(ast, commentFragment.substring(lastEnd, pm.start()), count);
        }// w  w w. j av a 2  s.  c  om

        lastEnd = pm.end();
    }

    if (lastEnd != commentFragment.length()) {
        String frag = commentFragment.substring(lastEnd, commentFragment.length());
        if (frag.trim().length() > 0) {
            parseParagraph(ast, frag, 0);
        }
    }
}

From source file:net.sf.jabref.openoffice.BstWrapper.java

private Map<String, String> parseResult(String result) {
    Map<String, String> map = new HashMap<>();
    // Look through for instances of \bibitem :
    Matcher m = BstWrapper.bibitemTag.matcher(result);
    ArrayList<Integer> indices = new ArrayList<>();
    ArrayList<Integer> endIndices = new ArrayList<>();
    ArrayList<String> keys = new ArrayList<>();
    while (m.find()) {
        if (!indices.isEmpty()) {
            endIndices.add(m.start());
        }/*from   w  w  w.  ja v a2s .c  o  m*/
        LOGGER.debug(m.start() + "  " + m.end());
        String tag = m.group();
        String key = tag.substring(9, tag.length() - 1);
        indices.add(m.end());
        keys.add(key);
    }
    int lastI = result.lastIndexOf("\\end{thebibliography}");
    if ((lastI > 0) && (lastI > indices.get(indices.size() - 1))) {
        endIndices.add(lastI);
    }
    for (int i = 0; i < keys.size(); i++) {
        String key = keys.get(i);
        int index = indices.get(i);
        int endIndex = endIndices.get(i);
        String part = result.substring(index, endIndex);
        map.put(key, formatter.format(part.trim().replaceAll("\\\\newblock ", " ")));
    }

    return map;
}

From source file:com.digitalpebble.storm.crawler.indexing.AbstractIndexerBolt.java

/** Returns a mapping field name / values for the metadata to index **/
protected Map<String, String[]> filterMetadata(Metadata meta) {

    Pattern indexValuePattern = Pattern.compile("\\[(\\d+)\\]");

    Map<String, String[]> fieldVals = new HashMap<String, String[]>();
    Iterator<Entry<String, String>> iter = metadata2field.entrySet().iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        // check whether we want a specific value or all of them?
        int index = -1;
        String key = entry.getKey();
        Matcher match = indexValuePattern.matcher(key);
        if (match.find()) {
            index = Integer.parseInt(match.group(1));
            key = key.substring(0, match.start());
        }//from  www.  j a  v  a  2s . com
        String[] values = meta.getValues(key);
        // not found
        if (values == null || values.length == 0)
            continue;
        // want a value index that it outside the range given
        if (index >= values.length)
            continue;
        // store all values available
        if (index == -1)
            fieldVals.put(entry.getValue(), values);
        // or only the one we want
        else
            fieldVals.put(entry.getValue(), new String[] { values[index] });
    }

    return fieldVals;
}