List of usage examples for java.util.regex Matcher groupCount
public int groupCount()
From source file:org.transdroid.search.hdbitsorg.HdBitsOrgAdapter.java
protected List<SearchResult> parseHtml(String html, int maxResults) throws Exception { Log.d(LOG_TAG, "Parsing search results."); List<SearchResult> results = new ArrayList<SearchResult>(); int matchCount = 0; int errorCount = 0; Pattern regex = Pattern.compile(SEARCH_REGEX, Pattern.DOTALL); Matcher match = regex.matcher(html); while (match.find() && matchCount < maxResults) { matchCount++;/*from ww w . j a v a 2 s .com*/ if (match.groupCount() != 11) { errorCount++; continue; } String detailsUrl = URL_PREFIX + match.group(1); String title = match.group(2); String torrentUrl = URL_PREFIX + match.group(3); String size = match.group(8) + match.group(9); // size + unit int seeders = Integer.parseInt(match.group(10)); int leechers = Integer.parseInt(match.group(11)); int time1 = Integer.parseInt(match.group(4)); String timeUnit1 = match.group(5); int time2 = Integer.parseInt(match.group(6)); String timeUnit2 = match.group(7); // hdbits.org lists "added date" in a relative format (i.e. 8 months 7 days ago) // we roughly calculate the number of MS elapsed then subtract that from "now" // could be a day or two off depending on month lengths, it's just imprecise data long elapsedTime = 0; if (timeUnit1.startsWith("month")) elapsedTime += time1 * 1000L * 60L * 60L * 24L * 30L; if (timeUnit1.startsWith("day")) elapsedTime += time1 * 1000L * 60L * 60L * 24L; if (timeUnit2.startsWith("day")) elapsedTime += time2 * 1000L * 60L * 60L * 24L; if (timeUnit2.startsWith("hour")) elapsedTime += time2 * 1000L * 60L * 60L; Date addedDate = new Date(); addedDate.setTime(addedDate.getTime() - elapsedTime); // build our search result SearchResult torrent = new SearchResult(title, torrentUrl, detailsUrl, size, addedDate, seeders, leechers); results.add(torrent); } Log.d(LOG_TAG, "Found " + matchCount + " matches and successfully parsed " + (matchCount - errorCount) + " of those matches."); return results; }
From source file:com.moviejukebox.model.scriptablescraper.SectionSS.java
public String parseInput(String data, String regex) { if (isDebug()) { LOG.debug("parseInput: data: '{}'", data); LOG.debug("parseInput: regex: '{}'", regex); }//from ww w . java2s.c o m Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(data); int looper; String result = ""; while (matcher.find()) { for (looper = 0; looper < matcher.groupCount(); looper++) { result += matcher.group(looper + 1) + ScriptableScraper.ARRAY_ITEM_DIVIDER; } result += ScriptableScraper.ARRAY_GROUP_DIVIDER; } if (isDebug()) { LOG.debug("parseInput: result: '{}'", result); } return result; }
From source file:net.timbusproject.extractors.debiansoftwareextractor.Engine.java
private JSONObject extractInlinePackage(String element) throws JSONException { final Pattern pattern = Pattern.compile("(\\S+)(?: \\((\\S+) (.+)\\))?"); Matcher matcher = pattern.matcher(element); matcher.find();//w ww . j a v a2 s . c o m JSONObject object = new JSONObject().put("Package", matcher.group(1)); if (matcher.groupCount() > 1) object.put("Comparator", matcher.group(2)).put("Version", matcher.group(3)); return object; }
From source file:com.amalto.core.jobox.component.JobAware.java
public List<JobInfo> findJobsInBox() { File[] entities = new File(workDir).listFiles(new FileFilter() { public boolean accept(File pathName) { return !(pathName.isFile() || JOBOX_RESERVED_FOLDER_NAME.equalsIgnoreCase(pathName.getName())); }//from w w w .j a va 2 s . c o m }); List<JobInfo> jobList = new ArrayList<JobInfo>(); for (File entity : entities) { boolean isTISEntry = recognizeTISJob(entity); if (isTISEntry) { // parse name and version String jobVersion = ""; //$NON-NLS-1$ String jobName = ""; //$NON-NLS-1$ Matcher m = jobVersionNamePattern.matcher(entity.getName()); while (m.find()) { jobName = m.group(1); jobVersion = m.group(m.groupCount()); } JobInfo jobInfo = new JobInfo(jobName, jobVersion); setClassPath4TISJob(entity, jobInfo); // get main class from command line guessMainClassFromCommandLine(entity, jobInfo); //not found then found it in context properties folder if (jobInfo.getMainClass() == null) { String propFilePath = analyzeJobParams(entity, jobInfo); guessMainClass(propFilePath, jobInfo); } jobList.add(jobInfo); } } return jobList; }
From source file:com.mirth.connect.client.ui.reference.ClassVisitor.java
private void addMethod(boolean constructor, String className, String name, String type, int modifiers, JavadocComment javadoc, List<Parameter> parameters) { String iconName = null;//from w w w . ja va 2 s. co m if (ModifierSet.isPublic(modifiers)) { if (ModifierSet.isStatic(modifiers)) { iconName = IconFactory.PUBLIC_STATIC_FUNCTION_ICON; } else { iconName = IconFactory.PUBLIC_METHOD_ICON; } } else { // Don't add references for non-public methods return; } String comment = null; Map<String, String> parameterComments = new CaseInsensitiveMap(); String returnComment = constructor ? "A new " + name + " object." : null; String deprecatedComment = null; if (javadoc != null) { comment = StringUtils.trim(javadoc.getContent()); if (StringUtils.isNotBlank(comment)) { Matcher matcher = JAVADOC_ANNOTATION_PATTERN.matcher(comment); while (matcher.find() && matcher.groupCount() >= 2) { String key = matcher.group("key"); String value = convertComment(matcher.group("value")); if (key.equalsIgnoreCase("param")) { int index = value.indexOf(' '); if (index >= 0) { key = value.substring(0, index).trim(); value = value.substring(index).trim(); } else { key = value; value = ""; } parameterComments.put(key, value); } else if (key.equalsIgnoreCase("return")) { returnComment = value; } else if (key.equalsIgnoreCase("deprecated")) { deprecatedComment = value; } } comment = convertComment(comment); if (StringUtils.isNotBlank(deprecatedComment)) { comment = "<b>Deprecated.</b> <em>" + deprecatedComment + "</em><br/><br/>" + comment; } } } Parameters params = new Parameters(); if (CollectionUtils.isNotEmpty(parameters)) { for (Parameter parameter : parameters) { String parameterName = parameter.getId().getName(); params.add(parameterName, parameter.getType().toString(), parameterComments.get(parameterName)); } } Reference reference; if (constructor) { reference = new ConstructorReference(CodeTemplateContextSet.getGlobalContextSet(), null, name, name, comment, null, new CodeTemplateFunctionDefinition(name, params, type, returnComment)); } else { reference = new FunctionReference(CodeTemplateContextSet.getGlobalContextSet(), null, className, name, comment, null, new CodeTemplateFunctionDefinition(name, params, type, returnComment), inputTextList); } if (StringUtils.isNotBlank(deprecatedComment)) { reference.setDeprecated(true); } reference.setIconName(iconName); references.add(reference); }
From source file:fitnesse.testsystems.slim.tables.ScenarioTable.java
private String[] extractNamesFromMatcher(Matcher matcher) { String[] arguments = new String[matcher.groupCount()]; for (int i = 0; i < arguments.length; i++) { arguments[i] = matcher.group(i + 1); }/*www .ja va 2 s . c om*/ return arguments; }
From source file:com.cloudera.recordbreaker.hive.RegExpSerDe.java
/** * Deserialize a single line of text in the raw input. * Transform into a GenericData.Record object for Hive. */// w w w . j av a 2 s . c o m GenericData.Record deserializeRowBlob(Writable blob) { String rowStr = ((Text) blob).toString(); GenericData.Record rowRecord = null; for (int i = 0; i < patterns.size(); i++) { Pattern curPattern = patterns.get(i); Schema curSchema = schemaOptions.get(i); Matcher curMatcher = curPattern.matcher(rowStr); if (curMatcher.find()) { // Create Avro record here rowRecord = new GenericData.Record(curSchema); List<Schema.Field> curFields = curSchema.getFields(); for (int j = 0; j < curMatcher.groupCount(); j++) { Schema.Field curField = curFields.get(j); String fieldName = curField.name(); Schema fieldType = curField.schema(); String rawFieldValue = curMatcher.group(j + 1); Object fieldValue = null; if (fieldType.getType() == Schema.Type.INT) { fieldValue = Integer.parseInt(rawFieldValue); } else if (fieldType.getType() == Schema.Type.FLOAT) { fieldValue = Float.parseFloat(rawFieldValue); } else if (fieldType.getType() == Schema.Type.STRING) { fieldValue = rawFieldValue; } if (fieldValue != null) { rowRecord.put(fieldName, fieldValue); } } return rowRecord; } } return null; }
From source file:edu.uab.ccts.nlp.uima.annotator.SegmentRegexAnnotator.java
/** * Add Segment annotations to the cas. First create a list of segments. Then * sort the list according to segment start. For each segment that has no * end, set the end to the [beginning of next segment - 1], or the eof. */// w w w. j av a2 s. c o m @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { log.info("Starting SegmentRegexAnnotator with " + regexMap.size() + " segements."); String strDocText = aJCas.getDocumentText(); if (strDocText == null) return; List<Segment> segmentsAdded = new ArrayList<Segment>(); // find all the segments, set begin and id, add to list for (Map.Entry<SegmentRegex, Pattern> entry : regexMap.entrySet()) { if (log.isDebugEnabled()) { log.debug("applying regex:" + entry.getKey().getRegex()); } Matcher matcher = entry.getValue().matcher(strDocText); while (matcher.find()) { Segment seg = new Segment(aJCas); if (entry.getKey().isLimitToRegex() && matcher.groupCount() == 1) { seg.setBegin(matcher.start(1)); seg.setEnd(matcher.end(1)); } else { seg.setBegin(matcher.start()); if (entry.getKey().isLimitToRegex()) { seg.setEnd(matcher.end()); } } seg.setId(entry.getKey().getSegmentID()); //if (log.isDebugEnabled()) { log.debug("found match: id=" + seg.getId() + ", begin=" + seg.getBegin() + " end=" + seg.getEnd()); //} segmentsAdded.add(seg); } } if (log.isDebugEnabled()) { log.debug("segmentsAdded: " + segmentsAdded.size()); } if (segmentsAdded.size() > 0) { // sort the segments by begin Collections.sort(segmentsAdded, new Comparator<Segment>() { // @Override public int compare(Segment o1, Segment o2) { return o1.getBegin() < o2.getBegin() ? -1 : o1.getBegin() > o2.getBegin() ? 1 : 0; } }); // set the end for each segment for (int i = 0; i < segmentsAdded.size(); i++) { Segment seg = segmentsAdded.get(i); Segment segNext = (i + 1) < segmentsAdded.size() ? segmentsAdded.get(i + 1) : null; if (seg.getEnd() <= 0) { if (segNext != null) { // set end to beginning of next segment seg.setEnd(segNext.getBegin() - 1); } else { // set end to doc end seg.setEnd(strDocText.length()); } } else { // segments shouldn't overlap if (segNext != null && segNext.getBegin() < seg.getEnd()) { seg.setEnd(segNext.getBegin() - 1); } } //if (log.isDebugEnabled()) { log.debug("Adding Segment: segment id=" + seg.getId() + ", begin=" + seg.getBegin() + ", end=" + seg.getEnd()); //} seg.addToIndexes(); } } // ctakes 1.3.2 - anything not in a segment will not be annotated - add // text outside segments to the 'default' segment int end = 0; for (Segment seg : segmentsAdded) { if ((seg.getBegin() - 1) > end) { addGapSegment(aJCas, end, seg.getBegin() - 1); } end = seg.getEnd(); } if (end < strDocText.length()) { addGapSegment(aJCas, end, strDocText.length()); } }
From source file:com.moviejukebox.model.scriptablescraper.SectionSS.java
public String compileValue(String value) { if (isDebug()) { LOG.debug("compileValue: '{}'", value); }/*from ww w.j a v a 2 s . c om*/ value = escapeForRegex(value); if (isDebug()) { LOG.debug("compileValue: escaped: '{}", value); } String result = value; int start, end; String variable; Pattern pattern = Pattern.compile("__DOLLAR_SIGN__\\{([^{}]+)\\}"); while (value.contains(DOLLAR_SIGN)) { Matcher matcher = pattern.matcher(value); start = -1; while (matcher.find()) { for (int looper = 0; looper < matcher.groupCount(); looper++) { variable = matcher.group(looper + 1); if (isDebug()) { LOG.debug("compileValue: matcher: '{}'", variable); } if (hasGlobalVariable(variable) || hasVariable(variable)) { start = result.indexOf("__DOLLAR_SIGN__{" + variable); end = result.indexOf("}", start); if (isDebug()) { LOG.debug("compileValue: start: {} end: {}", start, end); } variable = hasGlobalVariable(variable) ? getGlobalVariable(variable) : getVariable(variable); if (variable == null || "null".equals(variable)) { variable = ""; } result = result.substring(0, start) + variable + result.substring(end + 1); } } } if (start == -1) { result = result.replaceAll(DOLLAR_SIGN, "\\$"); } value = result; } result = result.trim().replaceAll("^\\s+", ""); if (result.contains("|")) { List<String> values = Arrays.asList(result.split("\\|")); result = ""; for (String value1 : values) { if (value1.length() > 0) { if (result.length() > 0) { result += "|"; } result += value1.trim().replaceAll("^\\s+", ""); } } } if (isDebug()) { LOG.debug("compileValue: compiled: '{}'", result); } return result; }
From source file:edu.cornell.mannlib.vitro.webapp.filters.VitroURL.java
public VitroURL(String urlStr, String characterEncoding) { this.characterEncoding = characterEncoding; if (urlStr.indexOf("&") > -1) { wasXMLEscaped = true;/*from w w w . ja v a2s.co m*/ urlStr = StringEscapeUtils.unescapeXml(urlStr); } try { URL url = new URL(urlStr); this.protocol = url.getProtocol(); this.host = url.getHost(); this.port = Integer.toString(url.getPort()); this.pathParts = splitPath(url.getPath()); this.pathBeginsWithSlash = beginsWithSlash(url.getPath()); this.pathEndsInSlash = endsInSlash(url.getPath()); this.queryParams = parseQueryParams(url.getQuery()); this.fragment = url.getRef(); } catch (Exception e) { // Under normal circumstances, this is because the urlStr is relative // We'll assume that we just have a path and possibly a query string. // This is likely to be a bad assumption, but let's roll with it. Matcher m = pathPattern.matcher(urlStr); String[] urlParts = new String[2]; if (m.matches()) { urlParts[0] = m.group(1); if (m.groupCount() == 2) urlParts[1] = m.group(2); } else { //??? } try { this.pathParts = splitPath(URLDecoder.decode(getPath(urlStr), characterEncoding)); this.pathBeginsWithSlash = beginsWithSlash(urlParts[0]); this.pathEndsInSlash = endsInSlash(urlParts[0]); if (urlParts.length > 1) { this.queryParams = parseQueryParams(URLDecoder.decode(urlParts[1], characterEncoding)); } } catch (UnsupportedEncodingException uee) { log.error("Unable to use character encoding " + characterEncoding, uee); } } }