Example usage for java.util.regex Matcher groupCount

Introduction

In this page you can find the example usage for java.util.regex Matcher groupCount.

Prototype

public int groupCount()

Source Link

Document

Returns the number of capturing groups in this matcher's pattern.

Usage

From source file:org.transdroid.search.hdbitsorg.HdBitsOrgAdapter.java

protected List<SearchResult> parseHtml(String html, int maxResults) throws Exception {
    Log.d(LOG_TAG, "Parsing search results.");

    List<SearchResult> results = new ArrayList<SearchResult>();
    int matchCount = 0;
    int errorCount = 0;

    Pattern regex = Pattern.compile(SEARCH_REGEX, Pattern.DOTALL);
    Matcher match = regex.matcher(html);
    while (match.find() && matchCount < maxResults) {
        matchCount++;/*from ww w .  j a  v  a 2  s .com*/
        if (match.groupCount() != 11) {
            errorCount++;
            continue;
        }

        String detailsUrl = URL_PREFIX + match.group(1);
        String title = match.group(2);
        String torrentUrl = URL_PREFIX + match.group(3);
        String size = match.group(8) + match.group(9); // size + unit
        int seeders = Integer.parseInt(match.group(10));
        int leechers = Integer.parseInt(match.group(11));

        int time1 = Integer.parseInt(match.group(4));
        String timeUnit1 = match.group(5);
        int time2 = Integer.parseInt(match.group(6));
        String timeUnit2 = match.group(7);

        // hdbits.org lists "added date" in a relative format (i.e. 8 months 7 days ago)
        // we roughly calculate the number of MS elapsed then subtract that from "now"
        // could be a day or two off depending on month lengths, it's just imprecise data
        long elapsedTime = 0;
        if (timeUnit1.startsWith("month"))
            elapsedTime += time1 * 1000L * 60L * 60L * 24L * 30L;
        if (timeUnit1.startsWith("day"))
            elapsedTime += time1 * 1000L * 60L * 60L * 24L;
        if (timeUnit2.startsWith("day"))
            elapsedTime += time2 * 1000L * 60L * 60L * 24L;
        if (timeUnit2.startsWith("hour"))
            elapsedTime += time2 * 1000L * 60L * 60L;

        Date addedDate = new Date();
        addedDate.setTime(addedDate.getTime() - elapsedTime);

        // build our search result
        SearchResult torrent = new SearchResult(title, torrentUrl, detailsUrl, size, addedDate, seeders,
                leechers);
        results.add(torrent);
    }

    Log.d(LOG_TAG, "Found " + matchCount + " matches and successfully parsed " + (matchCount - errorCount)
            + " of those matches.");
    return results;
}

From source file:com.moviejukebox.model.scriptablescraper.SectionSS.java

public String parseInput(String data, String regex) {
    if (isDebug()) {
        LOG.debug("parseInput: data: '{}'", data);
        LOG.debug("parseInput: regex: '{}'", regex);
    }//from   ww  w  .  java2s.c o m
    Pattern pattern = Pattern.compile(regex);
    Matcher matcher = pattern.matcher(data);
    int looper;
    String result = "";
    while (matcher.find()) {
        for (looper = 0; looper < matcher.groupCount(); looper++) {
            result += matcher.group(looper + 1) + ScriptableScraper.ARRAY_ITEM_DIVIDER;
        }
        result += ScriptableScraper.ARRAY_GROUP_DIVIDER;
    }

    if (isDebug()) {
        LOG.debug("parseInput: result: '{}'", result);
    }
    return result;
}

From source file:net.timbusproject.extractors.debiansoftwareextractor.Engine.java

private JSONObject extractInlinePackage(String element) throws JSONException {
    final Pattern pattern = Pattern.compile("(\\S+)(?: \\((\\S+) (.+)\\))?");
    Matcher matcher = pattern.matcher(element);
    matcher.find();//w ww  . j  a v  a2  s  . c o  m
    JSONObject object = new JSONObject().put("Package", matcher.group(1));
    if (matcher.groupCount() > 1)
        object.put("Comparator", matcher.group(2)).put("Version", matcher.group(3));
    return object;
}

From source file:com.amalto.core.jobox.component.JobAware.java

public List<JobInfo> findJobsInBox() {
    File[] entities = new File(workDir).listFiles(new FileFilter() {

        public boolean accept(File pathName) {
            return !(pathName.isFile() || JOBOX_RESERVED_FOLDER_NAME.equalsIgnoreCase(pathName.getName()));
        }//from w w w .j a va  2  s .  c o  m
    });
    List<JobInfo> jobList = new ArrayList<JobInfo>();
    for (File entity : entities) {
        boolean isTISEntry = recognizeTISJob(entity);
        if (isTISEntry) {
            // parse name and version
            String jobVersion = ""; //$NON-NLS-1$
            String jobName = ""; //$NON-NLS-1$
            Matcher m = jobVersionNamePattern.matcher(entity.getName());
            while (m.find()) {
                jobName = m.group(1);
                jobVersion = m.group(m.groupCount());
            }
            JobInfo jobInfo = new JobInfo(jobName, jobVersion);
            setClassPath4TISJob(entity, jobInfo);
            // get main class from command line
            guessMainClassFromCommandLine(entity, jobInfo);
            //not found then found it in context properties folder
            if (jobInfo.getMainClass() == null) {
                String propFilePath = analyzeJobParams(entity, jobInfo);
                guessMainClass(propFilePath, jobInfo);
            }
            jobList.add(jobInfo);
        }
    }
    return jobList;
}

From source file:com.mirth.connect.client.ui.reference.ClassVisitor.java

private void addMethod(boolean constructor, String className, String name, String type, int modifiers,
        JavadocComment javadoc, List<Parameter> parameters) {
    String iconName = null;//from   w w  w . ja va  2  s. co  m
    if (ModifierSet.isPublic(modifiers)) {
        if (ModifierSet.isStatic(modifiers)) {
            iconName = IconFactory.PUBLIC_STATIC_FUNCTION_ICON;
        } else {
            iconName = IconFactory.PUBLIC_METHOD_ICON;
        }
    } else {
        // Don't add references for non-public methods
        return;
    }

    String comment = null;
    Map<String, String> parameterComments = new CaseInsensitiveMap();
    String returnComment = constructor ? "A new " + name + " object." : null;
    String deprecatedComment = null;

    if (javadoc != null) {
        comment = StringUtils.trim(javadoc.getContent());
        if (StringUtils.isNotBlank(comment)) {
            Matcher matcher = JAVADOC_ANNOTATION_PATTERN.matcher(comment);
            while (matcher.find() && matcher.groupCount() >= 2) {
                String key = matcher.group("key");
                String value = convertComment(matcher.group("value"));

                if (key.equalsIgnoreCase("param")) {
                    int index = value.indexOf(' ');
                    if (index >= 0) {
                        key = value.substring(0, index).trim();
                        value = value.substring(index).trim();
                    } else {
                        key = value;
                        value = "";
                    }

                    parameterComments.put(key, value);
                } else if (key.equalsIgnoreCase("return")) {
                    returnComment = value;
                } else if (key.equalsIgnoreCase("deprecated")) {
                    deprecatedComment = value;
                }
            }

            comment = convertComment(comment);

            if (StringUtils.isNotBlank(deprecatedComment)) {
                comment = "<b>Deprecated.</b> <em>" + deprecatedComment + "</em><br/><br/>" + comment;
            }
        }
    }

    Parameters params = new Parameters();
    if (CollectionUtils.isNotEmpty(parameters)) {
        for (Parameter parameter : parameters) {
            String parameterName = parameter.getId().getName();
            params.add(parameterName, parameter.getType().toString(), parameterComments.get(parameterName));
        }
    }

    Reference reference;
    if (constructor) {
        reference = new ConstructorReference(CodeTemplateContextSet.getGlobalContextSet(), null, name, name,
                comment, null, new CodeTemplateFunctionDefinition(name, params, type, returnComment));
    } else {
        reference = new FunctionReference(CodeTemplateContextSet.getGlobalContextSet(), null, className, name,
                comment, null, new CodeTemplateFunctionDefinition(name, params, type, returnComment),
                inputTextList);
    }

    if (StringUtils.isNotBlank(deprecatedComment)) {
        reference.setDeprecated(true);
    }

    reference.setIconName(iconName);
    references.add(reference);
}

From source file:fitnesse.testsystems.slim.tables.ScenarioTable.java

private String[] extractNamesFromMatcher(Matcher matcher) {
    String[] arguments = new String[matcher.groupCount()];

    for (int i = 0; i < arguments.length; i++) {
        arguments[i] = matcher.group(i + 1);
    }/*www  .ja  va 2  s  .  c om*/

    return arguments;
}

From source file:com.cloudera.recordbreaker.hive.RegExpSerDe.java

/**
 * Deserialize a single line of text in the raw input.
 * Transform into a GenericData.Record object for Hive.
 *///  w w w .  j  av a 2  s . c  o m
GenericData.Record deserializeRowBlob(Writable blob) {
    String rowStr = ((Text) blob).toString();
    GenericData.Record rowRecord = null;

    for (int i = 0; i < patterns.size(); i++) {
        Pattern curPattern = patterns.get(i);
        Schema curSchema = schemaOptions.get(i);
        Matcher curMatcher = curPattern.matcher(rowStr);

        if (curMatcher.find()) {
            // Create Avro record here
            rowRecord = new GenericData.Record(curSchema);
            List<Schema.Field> curFields = curSchema.getFields();

            for (int j = 0; j < curMatcher.groupCount(); j++) {
                Schema.Field curField = curFields.get(j);

                String fieldName = curField.name();
                Schema fieldType = curField.schema();
                String rawFieldValue = curMatcher.group(j + 1);

                Object fieldValue = null;
                if (fieldType.getType() == Schema.Type.INT) {
                    fieldValue = Integer.parseInt(rawFieldValue);
                } else if (fieldType.getType() == Schema.Type.FLOAT) {
                    fieldValue = Float.parseFloat(rawFieldValue);
                } else if (fieldType.getType() == Schema.Type.STRING) {
                    fieldValue = rawFieldValue;
                }
                if (fieldValue != null) {
                    rowRecord.put(fieldName, fieldValue);
                }
            }
            return rowRecord;
        }
    }
    return null;
}

From source file:edu.uab.ccts.nlp.uima.annotator.SegmentRegexAnnotator.java

/**
 * Add Segment annotations to the cas. First create a list of segments. Then
 * sort the list according to segment start. For each segment that has no
 * end, set the end to the [beginning of next segment - 1], or the eof.
 */// w w  w. j av a2 s. c  o m
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    log.info("Starting SegmentRegexAnnotator with " + regexMap.size() + " segements.");
    String strDocText = aJCas.getDocumentText();
    if (strDocText == null)
        return;
    List<Segment> segmentsAdded = new ArrayList<Segment>();
    // find all the segments, set begin and id, add to list
    for (Map.Entry<SegmentRegex, Pattern> entry : regexMap.entrySet()) {
        if (log.isDebugEnabled()) {
            log.debug("applying regex:" + entry.getKey().getRegex());
        }
        Matcher matcher = entry.getValue().matcher(strDocText);
        while (matcher.find()) {
            Segment seg = new Segment(aJCas);
            if (entry.getKey().isLimitToRegex() && matcher.groupCount() == 1) {
                seg.setBegin(matcher.start(1));
                seg.setEnd(matcher.end(1));
            } else {
                seg.setBegin(matcher.start());
                if (entry.getKey().isLimitToRegex()) {
                    seg.setEnd(matcher.end());
                }
            }
            seg.setId(entry.getKey().getSegmentID());
            //if (log.isDebugEnabled()) {
            log.debug("found match: id=" + seg.getId() + ", begin=" + seg.getBegin() + " end=" + seg.getEnd());
            //}
            segmentsAdded.add(seg);
        }
    }
    if (log.isDebugEnabled()) {
        log.debug("segmentsAdded: " + segmentsAdded.size());
    }
    if (segmentsAdded.size() > 0) {
        // sort the segments by begin
        Collections.sort(segmentsAdded, new Comparator<Segment>() {

            // @Override
            public int compare(Segment o1, Segment o2) {
                return o1.getBegin() < o2.getBegin() ? -1 : o1.getBegin() > o2.getBegin() ? 1 : 0;
            }

        });
        // set the end for each segment
        for (int i = 0; i < segmentsAdded.size(); i++) {
            Segment seg = segmentsAdded.get(i);
            Segment segNext = (i + 1) < segmentsAdded.size() ? segmentsAdded.get(i + 1) : null;
            if (seg.getEnd() <= 0) {
                if (segNext != null) {
                    // set end to beginning of next segment
                    seg.setEnd(segNext.getBegin() - 1);
                } else {
                    // set end to doc end
                    seg.setEnd(strDocText.length());
                }
            } else {
                // segments shouldn't overlap
                if (segNext != null && segNext.getBegin() < seg.getEnd()) {
                    seg.setEnd(segNext.getBegin() - 1);
                }
            }
            //if (log.isDebugEnabled()) {
            log.debug("Adding Segment: segment id=" + seg.getId() + ", begin=" + seg.getBegin() + ", end="
                    + seg.getEnd());
            //}
            seg.addToIndexes();
        }
    }
    // ctakes 1.3.2 - anything not in a segment will not be annotated - add
    // text outside segments to the 'default' segment
    int end = 0;
    for (Segment seg : segmentsAdded) {
        if ((seg.getBegin() - 1) > end) {
            addGapSegment(aJCas, end, seg.getBegin() - 1);
        }
        end = seg.getEnd();
    }
    if (end < strDocText.length()) {
        addGapSegment(aJCas, end, strDocText.length());
    }
}

From source file:com.moviejukebox.model.scriptablescraper.SectionSS.java

public String compileValue(String value) {
    if (isDebug()) {
        LOG.debug("compileValue: '{}'", value);
    }/*from   ww w.j a  v a 2 s  . c  om*/
    value = escapeForRegex(value);
    if (isDebug()) {
        LOG.debug("compileValue: escaped: '{}", value);
    }
    String result = value;

    int start, end;
    String variable;
    Pattern pattern = Pattern.compile("__DOLLAR_SIGN__\\{([^{}]+)\\}");
    while (value.contains(DOLLAR_SIGN)) {
        Matcher matcher = pattern.matcher(value);
        start = -1;
        while (matcher.find()) {
            for (int looper = 0; looper < matcher.groupCount(); looper++) {
                variable = matcher.group(looper + 1);
                if (isDebug()) {
                    LOG.debug("compileValue: matcher: '{}'", variable);
                }
                if (hasGlobalVariable(variable) || hasVariable(variable)) {
                    start = result.indexOf("__DOLLAR_SIGN__{" + variable);
                    end = result.indexOf("}", start);
                    if (isDebug()) {
                        LOG.debug("compileValue: start: {} end: {}", start, end);
                    }
                    variable = hasGlobalVariable(variable) ? getGlobalVariable(variable)
                            : getVariable(variable);
                    if (variable == null || "null".equals(variable)) {
                        variable = "";
                    }
                    result = result.substring(0, start) + variable + result.substring(end + 1);
                }
            }
        }
        if (start == -1) {
            result = result.replaceAll(DOLLAR_SIGN, "\\$");
        }
        value = result;
    }

    result = result.trim().replaceAll("^\\s+", "");
    if (result.contains("|")) {
        List<String> values = Arrays.asList(result.split("\\|"));
        result = "";
        for (String value1 : values) {
            if (value1.length() > 0) {
                if (result.length() > 0) {
                    result += "|";
                }
                result += value1.trim().replaceAll("^\\s+", "");
            }
        }
    }
    if (isDebug()) {
        LOG.debug("compileValue: compiled: '{}'", result);
    }
    return result;
}

From source file:edu.cornell.mannlib.vitro.webapp.filters.VitroURL.java

public VitroURL(String urlStr, String characterEncoding) {
    this.characterEncoding = characterEncoding;
    if (urlStr.indexOf("&amp;") > -1) {
        wasXMLEscaped = true;/*from w  w w  .  ja  v a2s.co  m*/
        urlStr = StringEscapeUtils.unescapeXml(urlStr);
    }
    try {
        URL url = new URL(urlStr);
        this.protocol = url.getProtocol();
        this.host = url.getHost();
        this.port = Integer.toString(url.getPort());
        this.pathParts = splitPath(url.getPath());
        this.pathBeginsWithSlash = beginsWithSlash(url.getPath());
        this.pathEndsInSlash = endsInSlash(url.getPath());
        this.queryParams = parseQueryParams(url.getQuery());
        this.fragment = url.getRef();
    } catch (Exception e) {
        // Under normal circumstances, this is because the urlStr is relative
        // We'll assume that we just have a path and possibly a query string.
        // This is likely to be a bad assumption, but let's roll with it.
        Matcher m = pathPattern.matcher(urlStr);
        String[] urlParts = new String[2];
        if (m.matches()) {
            urlParts[0] = m.group(1);
            if (m.groupCount() == 2)
                urlParts[1] = m.group(2);
        } else {
            //???
        }

        try {
            this.pathParts = splitPath(URLDecoder.decode(getPath(urlStr), characterEncoding));
            this.pathBeginsWithSlash = beginsWithSlash(urlParts[0]);
            this.pathEndsInSlash = endsInSlash(urlParts[0]);
            if (urlParts.length > 1) {
                this.queryParams = parseQueryParams(URLDecoder.decode(urlParts[1], characterEncoding));
            }
        } catch (UnsupportedEncodingException uee) {
            log.error("Unable to use character encoding " + characterEncoding, uee);
        }
    }
}