List of usage examples for java.util.regex Matcher groupCount
public int groupCount()
From source file:com.snowplowanalytics.hive.serde.CfLogStruct.java
/** * Parses the input row String into a Java object. For performance reasons * this works in-place updating the fields within this CfLogStruct, rather * than creating a new one.// w w w . j a v a 2 s . c o m * * @param row * The raw String containing the row contents * @return This struct with all values updated * @throws SerDeException * For any exception during parsing */ public Object parse(String row) throws SerDeException { // We have to handle any header rows if (row.startsWith("#Version:") || row.startsWith("#Fields:")) { return null; // Empty row will be discarded by Hive } Matcher matcher = cfRegex.matcher(row); try { // if the row is not matching the NEW log format, try the former one. if (!matcher.find()) { if (log.isDebugEnabled()) { log.debug("old log format"); } matcher = cfRegex_before_2013_10_21.matcher(row); if (!matcher.find()) { matcher = cfRegex_before_timetaken.matcher(row); if (!matcher.find()) { throw new Exception("row didn't match either old or new patterns"); } } } this.dt = matcher.group(1); this.tm = matcher.group(2); // No need for toHiveDate any more - // CloudFront date format matches Hive's this.edgelocation = matcher.group(3); this.bytessent = toInt(matcher.group(4)); this.ipaddress = matcher.group(5); this.operation = matcher.group(6); this.domain = matcher.group(7); this.object = matcher.group(8); this.httpstatus = toInt(matcher.group(9)); this.referrer = nullifyHyphen(matcher.group(10)); this.useragent = matcher.group(11); this.querystring = nullifyHyphen(matcher.group(12)); this.cookie = nullifyHyphen(matcher.group(13)); this.resulttype = matcher.group(14); this.requestid = matcher.group(15); if (matcher.groupCount() > 15) { this.hostheader = matcher.group(16); this.protocol = matcher.group(17); this.bytes = toInt(matcher.group(18)); if (matcher.groupCount() > 18) { this.timetaken = Double.parseDouble(matcher.group(19)); } } } catch (Exception e) { throw new SerDeException("Could not parse row: \n" + row, e); } return this; // Return the CfLogStruct }
From source file:com.ephesoft.dcma.kvfieldcreation.KVFieldCreator.java
/** * Validates a proposed key span with valid key pattern syntax and with respect to value coordinates. * // w w w.j a v a 2 s.c o m * @param keyString {@link String} * @param recCoordinates {@link Coordinates} * @param spanCoordinates {@link Coordinates} * @param location {@link LocationType} * @param numericKeyLearningSwitch {@link String} * @return {@link Boolean} */ private boolean validateKey(final String keyString, final Coordinates recCoordinates, final Coordinates spanCoordinates, final LocationType location, final String numericKeyLearningSwitch) { boolean isKeyValid = false; // bug id #7154: Invalid Key pattern is extracted through automatic key value learning. boolean isKeyPatternSyntaxValid = true; try { Pattern.compile(keyString); } catch (PatternSyntaxException patternInvalidException) { isKeyPatternSyntaxValid = false; } if (isKeyPatternSyntaxValid) { // Auto KV learning algo enhancement to ignore Keys with numeric data (having exactly a digit/ having two or more digits in // entire key string) if numericKeyLearningSwitch switch is off. boolean isKeyNumericTestRequired = KVFieldCreatorConstants.SWITCH_OFF .equalsIgnoreCase(numericKeyLearningSwitch); boolean isKeyNonNumeric = true; if (isKeyNumericTestRequired) { try { Pattern pattern = Pattern.compile(EphesoftStringUtil.concatenate( KVFieldCreatorConstants.ATLEAST_TWO_DIGITS_PATTERN, KVFieldCreatorConstants.OR_OPERATOR, KVFieldCreatorConstants.ONLY_ONE_DIGIT_PATTERN)); Matcher matcher = pattern.matcher(keyString); while (matcher.find()) { for (int i = 0; i <= matcher.groupCount(); i++) { if (isKeyNonNumeric) { isKeyNonNumeric = false; } } } } catch (PatternSyntaxException patternInvalidException) { LOGGER.error("The Numeric check pattern used has invalid syntax."); } } if (!isKeyNumericTestRequired || (isKeyNumericTestRequired && isKeyNonNumeric)) { LOGGER.info(MSG_KEY_CREATION + location.toString()); long rectangleX0 = recCoordinates.getX0().longValue(); long rectangleY0 = recCoordinates.getY0().longValue(); long rectangleX1 = recCoordinates.getX1().longValue(); long rectangleY1 = recCoordinates.getY1().longValue(); long spanX0 = spanCoordinates.getX0().longValue(); long spanY0 = spanCoordinates.getY0().longValue(); long spanX1 = spanCoordinates.getX1().longValue(); long spanY1 = spanCoordinates.getY1().longValue(); long diffX0 = Math.abs(spanX0 - rectangleX0); long diffX1 = Math.abs(spanX1 - rectangleX1); if ((spanX0 <= rectangleX0 && spanX1 <= rectangleX0) || (spanX0 >= rectangleX1 && spanX1 >= rectangleX1) || (spanY0 <= rectangleY0 && spanY1 <= rectangleY0) || (spanY0 >= rectangleY1 && spanY1 >= rectangleY1)) { isKeyValid = true; } switch (location) { case BOTTOM: case TOP: if (isKeyValid && ((spanX0 <= rectangleX0 && spanX1 >= rectangleX1) || (spanX0 >= rectangleX0 && spanX1 <= rectangleX1) || ((spanX0 >= rectangleX0 && spanX0 <= rectangleX1) && (diffX0 >= diffX1)) || ((spanX1 >= rectangleX0 && spanX1 <= rectangleX1) && (diffX0 <= diffX1)))) { isKeyValid = true; } else { isKeyValid = false; } break; case BOTTOM_LEFT: case TOP_LEFT: if (isKeyValid && (spanX1 <= rectangleX0 || (spanX0 <= rectangleX0) && (diffX0 >= diffX1))) { isKeyValid = true; } else { isKeyValid = false; } break; case BOTTOM_RIGHT: case TOP_RIGHT: if (isKeyValid && (spanX0 >= rectangleX1 || (spanX1 >= rectangleX1) && (diffX0 <= diffX1))) { isKeyValid = true; } else { isKeyValid = false; } break; default: break; } } } return isKeyValid; }
From source file:org.commoncrawl.service.dns.DNSRewriteFilter.java
@Override public FilterResult filterItem(String rootDomainName, String fullyQualifiedDomainName, String urlPath, CrawlURLMetadata metadata, FilterResults results) { for (DNSRewriteItem item : rewriteItems) { if (rootDomainName.equals(item.tldName)) { Matcher matcher = item.pattern.matcher(fullyQualifiedDomainName); boolean matches = matcher.matches(); if (matches && item.testType == DNSRewriteItem.TestType.Inclusion) { StringBuffer finalString = new StringBuffer(); int searchIndexStart = 0; while (searchIndexStart != item.rewriteRule.length()) { int indexOfNextSlash = item.rewriteRule.indexOf('\\', searchIndexStart); if (indexOfNextSlash == -1) { finalString.append(item.rewriteRule.substring(searchIndexStart)); searchIndexStart = item.rewriteRule.length(); } else { if (indexOfNextSlash - searchIndexStart != 0) { finalString.append(item.rewriteRule.substring(searchIndexStart, indexOfNextSlash)); }// w w w . j av a 2 s . c o m searchIndexStart = indexOfNextSlash + 1; if (indexOfNextSlash + 1 != item.rewriteRule.length() && (item.rewriteRule.charAt(indexOfNextSlash + 1) >= '1' && item.rewriteRule.charAt(indexOfNextSlash + 1) <= '9')) { searchIndexStart++; int index = Integer.parseInt( item.rewriteRule.substring(indexOfNextSlash + 1, indexOfNextSlash + 2)); if (index < matcher.groupCount()) { finalString.append(matcher.group(index)); } else if (index == matcher.groupCount()) { finalString.append(rootDomainName); } else { LOG.error("Invalid group index specified in rewrite rule:" + index); return FilterResult.Filter_NoAction; } } else { finalString.append('\\'); } } } results.setRewrittenDomainName(finalString.toString()); return FilterResult.Filter_Modified; } else if (!matches && item.testType == DNSRewriteItem.TestType.Exclusion) { results.setRewrittenDomainName(item.rewriteRule); return FilterResult.Filter_Modified; } } } return FilterResult.Filter_NoAction; }
From source file:com.microsoft.tfs.client.common.ui.controls.generic.html.HTMLEditor.java
/** * Converts the {@link Object} we get back from the browser when we want the * foreground or background color into an {@link RGB}. IE gives us a * {@link Double} object, Firefox gives us a {@link String} in the format * "#000000" or "rgb(0,0,0)"./*from w ww. j av a 2 s .c o m*/ * * @param o * the object to convert (may be <code>null</code> * @return the converted {@link RGB} or <code>null</code> if the object * could not be converted */ private RGB convertHTMLColorObject(final Object o) { if (o == null) { return null; } if (o instanceof String) { /* * Handle both #000000 and rgb(0,0,0) styles. Mozilla alternates * between them. */ final String s = (String) o; log.debug("convertHTMLColorObject called with string " + s); //$NON-NLS-1$ if (s.startsWith("#")) //$NON-NLS-1$ { /* * This implementation assumes 24 bits of color info. Will there * ever be more in this string? */ final int value = Integer.parseInt(s.substring(1), 16); return new RGB((value >> 16) & 0xFF, (value >> 8) & 0xFF, value & 0xFF); } else if (s.toLowerCase(Locale.ENGLISH).startsWith("rgb")) //$NON-NLS-1$ { final Matcher matcher = RGB_CSS_COLOR_PATTERN.matcher(s); if (matcher.find() && matcher.groupCount() == 3) { return new RGB(Integer.parseInt(matcher.group(1).trim()), Integer.parseInt(matcher.group(2).trim()), Integer.parseInt(matcher.group(3).trim())); } else { log.warn(MessageFormat.format("Couldn''t parse color object from string ''{0}''", s)); //$NON-NLS-1$ return null; } } } else if (o instanceof Double) { final long value = Math.round((Double) o); log.debug(MessageFormat.format("convertHTMLColorObject called with Double {0} (rounded to long {1})", //$NON-NLS-1$ Double.toString(((Double) o)), Long.toString(value))); /* * Always seems to be little-endian (on little-endian Windows * platforms). Not sure of a reliable way to detect the format IE * will supply us, so just use little endian for now. */ return new RGB((int) value & 0xFF, (int) (value >> 8) & 0xFF, (int) (value >> 16) & 0xFF); } return null; }
From source file:com.palantir.opensource.sysmon.linux.LinuxVMStatJMXWrapper.java
LinuxVMStat processLine(String line) throws LinuxMonitoringException { Matcher m = VMSTAT_PAT.matcher(line); if (m.matches()) { LinuxVMStat rc = new LinuxVMStat(); try {/*from ww w.j a va 2s. c o m*/ rc.runningProcesses = Integer.parseInt(m.group(1)); rc.sleepingProcesses = Integer.parseInt(m.group(2)); /* * Memory */ rc.swappedMemory = Integer.parseInt(m.group(3)); rc.freeMemory = Integer.parseInt(m.group(4)); rc.buffersMemory = Integer.parseInt(m.group(5)); rc.cacheMemory = Integer.parseInt(m.group(6)); rc.swapIn = Integer.parseInt(m.group(7)); rc.swapOut = Integer.parseInt(m.group(8)); /* * I/O */ rc.blocksRead = Integer.parseInt(m.group(9)); rc.blocksWritten = Integer.parseInt(m.group(10)); /* * System */ rc.interrupts = Integer.parseInt(m.group(11)); rc.contextSwitches = Integer.parseInt(m.group(12)); /* * CPU */ rc.userPercentCPU = Integer.parseInt(m.group(13)); rc.sysPercentCPU = Integer.parseInt(m.group(14)); rc.idlePercentCPU = Integer.parseInt(m.group(15)); rc.waitPercentCPU = Integer.parseInt(m.group(16)); // this may not be there if (m.groupCount() == 18) { rc.stolenFromVMCPU = Integer.parseInt(m.group(17)); } } catch (NumberFormatException e) { throw new LinuxMonitoringException("Encountered problems parsing integer out of line: " + line, e); } return rc; } else { throw new LinuxMonitoringException( "Input line '" + line + "' did not match regex " + VMSTAT_PAT.pattern() + ", can't parse."); } }
From source file:birch.filter.EncryptionFilter.java
protected String doReceive(String message) { StringBuilder result;/* w ww . j a v a 2s .co m*/ String target; Matcher messageMatcher; Matcher encryptedMatcher; message = linebreakFilter.receive(message); if (message.length() == 0) { return message; } messageMatcher = messagePattern.matcher(message); if (messageMatcher.matches()) { target = messageMatcher.group(3).startsWith("#") ? messageMatcher.group(3) : messageMatcher.group(1).substring(1, messageMatcher.group(1).indexOf('!')); if (inEncryptionList(target)) { result = new StringBuilder(); result.append(":"); result.append(messageMatcher.group(1)); result.append(" "); result.append(messageMatcher.group(2)); result.append(" "); result.append(messageMatcher.group(3)); result.append(" :"); encryptedMatcher = activeEncryptedPattern.matcher(messageMatcher.group(4)); if (encryptedMatcher.matches()) { result.append(messageMatcher.group(4).substring(0, encryptedMatcher.start(encryptedMatcher.groupCount()))); result.append(decrypt(target, encryptedMatcher.group(encryptedMatcher.groupCount()))); } else { result.append(plainPrefix); result.append(messageMatcher.group(4)); } return result.toString(); } } return message; }
From source file:com.photon.phresco.framework.commons.FrameworkUtil.java
public static List<String> getCsvAsList(String csv) { Pattern csvPattern = Pattern.compile(CSV_PATTERN); Matcher match = csvPattern.matcher(csv); List<String> list = new ArrayList<String>(match.groupCount()); // For each field while (match.find()) { String value = match.group(); if (value == null) { break; }/*from w ww . j av a2s .c o m*/ if (value.endsWith(",")) { // trim trailing , value = value.substring(0, value.length() - 1); } if (value.startsWith("\"")) { // assume also ends with value = value.substring(1, value.length() - 1); } if (value.length() == 0) { value = null; } list.add(value.trim()); } if (CollectionUtils.isEmpty(list)) { list.add(csv.trim()); } return list; }
From source file:edu.umd.cs.marmoset.modelClasses.TestOutcome.java
/** * @param requestedSourceFileName//from w ww. j av a2 s .c o m * @param requestedLineNumber * @return */ public boolean isStackTraceAtLineForFile(String requestedSourceFileName, int requestedLineNumber) { if (!isCardinalTestType()) return false; if (longTestResult == null || longTestResult.equals("")) return false; BufferedReader reader = null; Pattern pattern = Pattern.compile("\\((\\w+\\.java):(\\d+)\\)"); try { reader = new BufferedReader(new StringReader(getLongTrimmedTestResult())); while (true) { String line = reader.readLine(); if (line == null) break; //System.out.println("line: " +line); Matcher matcher = pattern.matcher(line); if (!line.contains("java.") && !line.contains("junit.") && !line.contains("\\s+sun\\.reflect") && !line.contains("edu.umd.cs.buildServer") && !line.contains("ReleaseTest") && !line.contains("PublicTest") && !line.contains("SecretTest") && !line.contains("SimpleTest") && !line.contains("TestAgainstFile") && !line.contains("SpiderTest") && matcher.find() && matcher.groupCount() > 1) { String sourceFileName = matcher.group(1); String lineNumber = matcher.group(2); if (requestedSourceFileName.equals(sourceFileName) && Integer.valueOf(lineNumber).intValue() == requestedLineNumber) return true; } } } catch (IOException ignore) { // cannot happen; we're reading from a String! } finally { IOUtils.closeQuietly(reader); } return false; }
From source file:org.fcrepo.test.api.TestRESTAPI.java
private String extractPid(String source) { Matcher m = Pattern.compile("^.*/([^/]+$)").matcher(source); String pid = null;/*ww w . j a v a2 s . c om*/ if (m.find() && m.groupCount() == 1) { pid = m.group(1); } pid = pid.replaceAll("\n", "").replaceAll("\r", "").replaceAll("%3A", ":"); return pid; }
From source file:edu.umd.cs.marmoset.modelClasses.TestOutcome.java
private @HTML String getStackTraceHotlinks(String viewSourceLink) { // System.out.println("Calling getStackTraceHotlinks!"); String testResult = getLongTestResult(); if (Strings.isNullOrEmpty(testResult) || getOutcome().equals(NOT_IMPLEMENTED)) return ""; StringBuffer buf = new StringBuffer(); String txt = getLongTrimmedTestResult(); BufferedReader reader = null; Pattern pattern = Pattern.compile("\\((\\w+\\.java):(\\d+)\\)"); try {// ww w .j a v a2 s . c o m reader = new BufferedReader(new StringReader(txt)); while (true) { String line = reader.readLine(); if (line == null) break; /** skip stacktracelements resulting from Clover */ String trimmed = line.trim(); if (trimmed.startsWith("at ")) { if (line.contains(".__CLR3_0_") || trimmed.startsWith("at junit.framework.Assert") || trimmed.startsWith("at java.io") || trimmed.startsWith("at sun.") || trimmed.startsWith("at edu.umd.cs.diffText")) continue; } line = line.replaceAll("&", "&"); line = line.replaceAll("<", "<"); line = line.replaceAll(">", ">"); Matcher matcher = pattern.matcher(line); if (!line.contains("java.") && !line.contains("junit.") && !line.contains("\\s+sun\\.reflect") && !line.contains("edu.umd.cs.buildServer") && !line.contains("ReleaseTest") && !line.contains("PublicTest") && !line.contains("SecretTest") && !line.contains("SimpleTest") && !line.contains("TestAgainstFile") && !line.contains("SpiderTest") && matcher.find() && matcher.groupCount() > 1) { String sourceFileName = matcher.group(1); String startHighlight = matcher.group(2); int numToHighlight = 1; int numContext = 0; buf.append(createSourceCodeLink(viewSourceLink, line, sourceFileName, startHighlight, numToHighlight, numContext)); buf.append("<br>"); } else { buf.append(line + "<br>\n"); } } } catch (IOException ignore) { throw new RuntimeException("DAMMIT JIM!", ignore); } finally { IOUtils.closeQuietly(reader); } return XSSScrubber.asHTML(buf.toString()); }