List of usage examples for java.util.regex Matcher groupCount
public int groupCount()
From source file:com.epimorphics.appbase.templates.Lib.java
/** * Match a string to a regex and return a vector of the matching groups *///from w w w . j a v a 2 s .c om public String[] regex(Object data, String regex) { Matcher m = Pattern.compile(regex).matcher(data.toString()); if (m.matches()) { String[] result = new String[m.groupCount()]; for (int i = 0; i < m.groupCount(); i++) { result[i] = m.group(i + 1); } return result; } else { return null; } }
From source file:org.apache.ctakes.ytex.uima.annotators.SegmentRegexAnnotator.java
/** * Add Segment annotations to the cas. First create a list of segments. Then * sort the list according to segment start. For each segment that has no * end, set the end to the [beginning of next segment - 1], or the eof. */// w w w .j av a 2 s . c o m @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { String strDocText = aJCas.getDocumentText(); if (strDocText == null) return; List<Segment> segmentsAdded = new ArrayList<Segment>(); // find all the segments, set begin and id, add to list for (Map.Entry<SegmentRegex, Pattern> entry : regexMap.entrySet()) { if (log.isDebugEnabled()) { log.debug("applying regex:" + entry.getKey().getRegex()); } Matcher matcher = entry.getValue().matcher(strDocText); while (matcher.find()) { Segment seg = new Segment(aJCas); if (entry.getKey().isLimitToRegex() && matcher.groupCount() == 1) { seg.setBegin(matcher.start(1)); seg.setEnd(matcher.end(1)); } else { seg.setBegin(matcher.start()); if (entry.getKey().isLimitToRegex()) { seg.setEnd(matcher.end()); } } seg.setId(entry.getKey().getSegmentID()); if (log.isDebugEnabled()) { log.debug("found match: id=" + seg.getId() + ", begin=" + seg.getBegin()); } segmentsAdded.add(seg); } } if (log.isDebugEnabled()) { log.debug("segmentsAdded: " + segmentsAdded.size()); } if (segmentsAdded.size() > 0) { // sort the segments by begin Collections.sort(segmentsAdded, new Comparator<Segment>() { // @Override public int compare(Segment o1, Segment o2) { return o1.getBegin() < o2.getBegin() ? -1 : o1.getBegin() > o2.getBegin() ? 1 : 0; } }); // set the end for each segment for (int i = 0; i < segmentsAdded.size(); i++) { Segment seg = segmentsAdded.get(i); Segment segNext = (i + 1) < segmentsAdded.size() ? segmentsAdded.get(i + 1) : null; if (seg.getEnd() <= 0) { if (segNext != null) { // set end to beginning of next segment seg.setEnd(segNext.getBegin() - 1); } else { // set end to doc end seg.setEnd(strDocText.length()); } } else { // segments shouldn't overlap if (segNext != null && segNext.getBegin() < seg.getEnd()) { seg.setEnd(segNext.getBegin() - 1); } } if (log.isDebugEnabled()) { log.debug("Adding Segment: segment id=" + seg.getId() + ", begin=" + seg.getBegin() + ", end=" + seg.getEnd()); } seg.addToIndexes(); } } // ctakes 1.3.2 - anything not in a segment will not be annotated - add // text outside segments to the 'default' segment int end = 0; for (Segment seg : segmentsAdded) { if ((seg.getBegin() - 1) > end) { addGapSegment(aJCas, end, seg.getBegin() - 1); } end = seg.getEnd(); } if (end < strDocText.length()) { addGapSegment(aJCas, end, strDocText.length()); } }
From source file:com.amalto.core.jobox.component.JobAware.java
public JobInfo loadJobInfo(String entityName) { JobInfo jobInfo = null;// w ww . j a v a 2 s .c o m File entity = new File(workDir + File.separator + entityName); if (entity.exists()) { // parse name and version String jobVersion = StringUtils.EMPTY; String jobName = StringUtils.EMPTY; Matcher m = jobVersionNamePattern.matcher(entityName); while (m.find()) { jobName = m.group(1); jobVersion = m.group(m.groupCount()); } jobInfo = new JobInfo(jobName, jobVersion); setClassPath4TISJob(entity, jobInfo); // get main class from command line guessMainClassFromCommandLine(entity, jobInfo); //not found then found it in context properties folder if (jobInfo.getMainClass() == null) { String propFilePath = analyzeJobParams(entity, jobInfo); guessMainClass(propFilePath, jobInfo); } } return jobInfo; }
From source file:org.eclipse.rdf4j.repository.http.HTTPRepository.java
public HTTPRepository(final String repositoryURL) { this();//w w w. ja va 2 s. c o m // Try to parse the server URL from the repository URL Pattern urlPattern = Pattern.compile("(.*)/" + Protocol.REPOSITORIES + "/[^/]*/?"); Matcher matcher = urlPattern.matcher(repositoryURL); if (matcher.matches() && matcher.groupCount() == 1) { this.serverURL = matcher.group(1); } else { throw new IllegalArgumentException("URL must be to a Sesame Repository (not just the server)"); } this.repositoryURL = repositoryURL; }
From source file:org.apache.flume.ext.interceptor.RegexExtractorInterceptor.java
@Override public Event intercept(Event event) { Matcher matcher = regex.matcher(new String(event.getBody(), Charsets.UTF_8)); Map<String, String> headers = event.getHeaders(); if (matcher.find()) { for (int group = 0, count = matcher.groupCount(); group < count; group++) { int groupIndex = group + 1; if (groupIndex > serializers.size()) { if (logger.isDebugEnabled()) { logger.debug("Skipping group {} to {} due to missing serializer", group, count); }/*w ww. ja v a 2 s.co m*/ break; } NameAndSerializer serializer = serializers.get(group); if (logger.isDebugEnabled()) { logger.debug("Serializing {} using {}", serializer.headerName, serializer.serializer); } headers.put(serializer.headerName, serializer.serializer.serialize(matcher.group(groupIndex))); } } return event; }
From source file:org.apache.oozie.util.XLogStreamer.java
/** * Returns the creation time of the .gz archive if it is relevant to the job * * @param fileName/*from ww w .j a v a 2 s . com*/ * @param startTime * @param endTime * @return Modification time of .gz file after checking if it is relevant to the job */ private long getGZFileCreationTime(String fileName, long startTime, long endTime) { // Default return value of -1 to exclude the file long returnVal = -1; // Include oozie.log as oozie.log.gz if it is accidentally GZipped if (fileName.equals("oozie.log.gz")) { LOG.warn("oozie.log has been GZipped, which is unexpected"); // Return a value other than -1 to include the file in list returnVal = 0; } else { Matcher m = gzTimePattern.matcher(fileName); if (m.matches() && m.groupCount() == 4) { int year = Integer.parseInt(m.group(1)); int month = Integer.parseInt(m.group(2)); int day = Integer.parseInt(m.group(3)); int hour = Integer.parseInt(m.group(4)); int minute = 0; Calendar calendarEntry = Calendar.getInstance(); calendarEntry.set(year, month - 1, day, hour, minute); // give month-1(Say, 7 for August) long logFileStartTime = calendarEntry.getTimeInMillis(); long milliSecondsPerHour = 3600000; long logFileEndTime = logFileStartTime + milliSecondsPerHour; /* To check whether the log content is there in the initial or later part of the log file or the log content is contained entirely within this log file or the entire log file contains the event log where the event spans across hours */ if ((startTime >= logFileStartTime && startTime <= logFileEndTime) || (endTime >= logFileStartTime && endTime <= logFileEndTime) || (startTime <= logFileStartTime && endTime >= logFileEndTime)) { returnVal = logFileStartTime; } } else { LOG.debug("Filename " + fileName + " does not match the expected format"); returnVal = -1; } } return returnVal; }
From source file:org.apache.hadoop.hive.serde2.RegexSerDe.java
@Override public Object deserialize(Writable blob) throws SerDeException { Text rowText = (Text) blob; Matcher m = inputPattern.matcher(rowText.toString()); if (m.groupCount() != numColumns) { throw new SerDeException("Number of matching groups doesn't match the number of columns"); }//from w w w . j a v a2 s. co m // If do not match, ignore the line, return a row with all nulls. if (!m.matches()) { unmatchedRowsCount++; if (!alreadyLoggedNoMatch) { // Report the row if its the first time LOG.warn("" + unmatchedRowsCount + " unmatched rows are found: " + rowText); alreadyLoggedNoMatch = true; } return null; } // Otherwise, return the row. for (int c = 0; c < numColumns; c++) { try { String t = m.group(c + 1); TypeInfo typeInfo = columnTypes.get(c); // Convert the column to the correct type when needed and set in row obj PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo; switch (pti.getPrimitiveCategory()) { case STRING: row.set(c, t); break; case BYTE: Byte b; b = Byte.valueOf(t); row.set(c, b); break; case SHORT: Short s; s = Short.valueOf(t); row.set(c, s); break; case INT: Integer i; i = Integer.valueOf(t); row.set(c, i); break; case LONG: Long l; l = Long.valueOf(t); row.set(c, l); break; case FLOAT: Float f; f = Float.valueOf(t); row.set(c, f); break; case DOUBLE: Double d; d = Double.valueOf(t); row.set(c, d); break; case BOOLEAN: Boolean bool; bool = Boolean.valueOf(t); row.set(c, bool); break; case TIMESTAMP: Timestamp ts; ts = Timestamp.valueOf(t); row.set(c, ts); break; case DATE: Date date; date = Date.valueOf(t); row.set(c, date); break; case DECIMAL: HiveDecimal bd = HiveDecimal.create(t); row.set(c, bd); break; case CHAR: HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength()); row.set(c, hc); break; case VARCHAR: HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo) typeInfo).getLength()); row.set(c, hv); break; default: throw new SerDeException("Unsupported type " + typeInfo); } } catch (RuntimeException e) { partialMatchedRowsCount++; if (!alreadyLoggedPartialMatch) { // Report the row if its the first row LOG.warn("" + partialMatchedRowsCount + " partially unmatched rows are found, " + " cannot find group " + c + ": " + rowText); alreadyLoggedPartialMatch = true; } row.set(c, null); } } return row; }
From source file:org.berlin.crawl.parse.WebParser.java
protected void processFullURL(final List<BotLink> linksForProcessing, final Link tkLink, final String u) { String scheme = ""; String host = ""; String path = ""; String query = ""; final Matcher m = SIMPLE_LINK.matcher(u); while (m.find()) { if (m.groupCount() >= 2) { scheme = m.group(1).trim();// w w w .j a v a 2s . com final String tmp = m.group(2).trim(); final Matcher m2 = SIMPLE_LINK2.matcher(tmp); while (m2.find()) { if (m2.groupCount() >= 2) { host = m2.group(1).trim(); // At this point we should have a path // Remove the 'query' section if available final String tmp2 = m2.group(2).trim(); if (tmp2.indexOf('?') > 0) { final String wQuery = tmp2.substring(tmp2.indexOf('?') + 1); path = tmp2.substring(0, tmp2.indexOf('?')); query = wQuery; } else { path = tmp2; } } // End of the if // } } } // End of the while if (scheme.length() > 0 && host.length() > 0) { // Create a link for for further processing // final BotLink link = new BotLink(); link.setHost(host); if (path.length() > 0) { link.setPath("/" + path); } // End of the if // link.setScheme(scheme); link.setQuery(query); link.setLink(tkLink); logger.info("Attempt to process and add to queue / link , link=" + link); linksForProcessing.add(link); } // End of the if // }
From source file:org.atomserver.core.dbstore.DBBasedAtomService.java
@ManagedOperation(description = "obliterate entries.") public String obliterateEntries(String entriesQueries) { StringBuilder builder = new StringBuilder(); String[] queries = entriesQueries.split(","); for (String query : queries) { Matcher matcher = ENTRY_ID_PATTERN.matcher(query); builder.append("(").append(query).append(" : "); if (matcher.matches()) { EntryDescriptor descriptor = new BaseEntryDescriptor(matcher.group(1), matcher.groupCount() >= 2 ? matcher.group(2) : null, matcher.groupCount() >= 3 ? matcher.group(3) : null, matcher.groupCount() >= 4 ? LocaleUtils.toLocale(matcher.group(4)) : null); List<EntryMetaData> list = entriesDAO.selectEntries(descriptor); if (list.size() > obliterateThreshold && !query.endsWith("!")) { builder.append("would have obliterated more than ").append(obliterateThreshold) .append(" entries (").append(list.size()).append(") - try ").append(query) .append("! instead."); } else { for (EntryMetaData entry : list) { ((DBBasedAtomCollection) getAtomWorkspace(descriptor.getWorkspace()) .getAtomCollection(descriptor.getCollection())).obliterateEntry(entry); }/* w w w . j a v a 2 s.co m*/ builder.append("obliterated ").append(list.size()).append(" entries."); } } else { builder.append("error - doesn't match workspace/collection/entryId?locale=xx_XX"); } builder.append(")"); } return builder.toString(); }
From source file:com.mgmtp.jfunk.web.CapabilitiesProvider.java
@Override public Map<String, DesiredCapabilities> get() { Configuration config = configProvider.get(); Map<String, Map<String, List<JFunkCapability>>> capabilitiesMap = newHashMap(); for (Entry<String, String> entry : config.entrySet()) { String key = entry.getKey(); Matcher matcher = CAPABILITIES_PREFIX_PATTERN.matcher(key); if (matcher.find()) { String driverType = matcher.groupCount() == 1 && matcher.group(1) != null ? matcher.group(1) : "global"; String capabilityString = key.substring(matcher.end() + 1); int lastDotIndex = capabilityString.lastIndexOf('.'); String value = entry.getValue(); JFunkCapability capability;/*from ww w. ja v a2s . c om*/ if (lastDotIndex != -1) { JFunkCapabilityType type = JFunkCapabilityType.LIST; try { Integer.parseInt(capabilityString.substring(lastDotIndex + 1)); capabilityString = capabilityString.substring(0, lastDotIndex); } catch (NumberFormatException ex) { // not a list capability type = JFunkCapabilityType.STRING; } capability = new JFunkCapability(capabilityString, value, type); } else { capability = new JFunkCapability(capabilityString, value, JFunkCapabilityType.STRING); } Map<String, List<JFunkCapability>> map = capabilitiesMap.get(driverType); if (map == null) { map = newHashMapWithExpectedSize(5); capabilitiesMap.put(driverType, map); } List<JFunkCapability> list = map.get(capability.name); if (list == null) { list = newArrayListWithExpectedSize(1); map.put(capability.name, list); } list.add(capability); } } Map<String, List<JFunkCapability>> tmpGlobals = capabilitiesMap.remove("global"); final Map<String, Object> globalCapabilities = tmpGlobals == null ? ImmutableMap.<String, Object>of() : transformCapabilities(tmpGlobals); final Proxy proxy = createProxyFromConfig(config); // transform in to map of capabilities for each webdriver type final Map<String, DesiredCapabilities> byDriverTypeCapabilities = transformEntries(capabilitiesMap, new EntryTransformer<String, Map<String, List<JFunkCapability>>, DesiredCapabilities>() { @Override public DesiredCapabilities transformEntry(final String key, final Map<String, List<JFunkCapability>> value) { Map<String, Object> capabilities = newHashMap(globalCapabilities); Map<String, Object> transformedCapabilities = transformCapabilities(value); capabilities.putAll(transformedCapabilities); DesiredCapabilities result = new DesiredCapabilities(capabilities); if (proxy != null) { result.setCapability(CapabilityType.PROXY, proxy); } return result; } }); // wrap, so we get empty capabilities instead of nulls return new ForwardingMap<String, DesiredCapabilities>() { @Override protected Map<String, DesiredCapabilities> delegate() { return byDriverTypeCapabilities; } @Override public DesiredCapabilities get(final Object key) { DesiredCapabilities capabilities = super.get(key); if (capabilities == null) { DesiredCapabilities desiredCapabilities = new DesiredCapabilities(); if (proxy != null) { desiredCapabilities.setCapability(CapabilityType.PROXY, proxy); } capabilities = desiredCapabilities; } return capabilities; } }; }