List of usage examples for java.util.regex Matcher groupCount
public int groupCount()
From source file:com.linkedin.databus2.producers.gg.GGEventGenerationFactory.java
public static String uriToXmlPrefix(String uri) throws DatabusException { if (uri == null) { throw new DatabusException("uri passed is null and not valid"); }/* w w w .jav a 2 s .c om*/ Pattern pattern = Pattern.compile("gg://(.*):(.*)"); Matcher matcher = pattern.matcher(uri); if (!matcher.matches() || matcher.groupCount() != 2) { throw new DatabusException("Expected uri format for gg path not found"); } return matcher.group(2); }
From source file:com.linkedin.databus2.producers.gg.GGEventGenerationFactory.java
public static long ggDateStringToLong(String value) throws DatabusException { Pattern _pattern = Pattern.compile("(\\d{4}-\\d{2}-\\d{2}).*"); Matcher matcher = _pattern.matcher(value); if (!matcher.matches() || matcher.groupCount() != 1) { throw new DatabusException("The date format is not as expected, cannot proceed!"); }/*ww w. j a va 2 s. c o m*/ String dateFormatString = matcher.group(1); long dateLong = Date.valueOf(dateFormatString).getTime(); return dateLong; }
From source file:de.rnd7.libtvdb.util.EpisodeUtil.java
private static List<EpisodeInfo> parseNameInternal(final String name) throws IOException { final String filtered = filter(name.toLowerCase()); final List<EpisodeInfo> result = new ArrayList<EpisodeInfo>(); for (final Pattern pattern : PATTERN) { final Matcher matcher = pattern.matcher(filtered); boolean match = false; while (matcher.find()) { match = true;// ww w . j av a 2 s .c o m if (matcher.groupCount() == 3) { final int season = Integer.parseInt(matcher.group(1)); final int episodeA = Integer.parseInt(matcher.group(2)); final int episodeB = Integer.parseInt(matcher.group(3)); result.add(new EpisodeInfo(season, episodeA)); result.add(new EpisodeInfo(season, episodeB)); } else { final int season = Integer.parseInt(matcher.group(1)); final int episode = Integer.parseInt(matcher.group(2)); result.add(new EpisodeInfo(season, episode)); } } if (match) { break; } } if (result.isEmpty()) { // Fallback for single season series: final Pattern pattern = Pattern.compile(".*(\\d\\d).*", Pattern.CASE_INSENSITIVE); final Matcher matcher = pattern.matcher(filtered); if (matcher.matches()) { final int season = 1; final int episode = Integer.parseInt(matcher.group(1)); result.add(new EpisodeInfo(season, episode)); } } return result; }
From source file:com.linkedin.databus2.producers.gg.GGEventGenerationFactory.java
public static long ggTimeStampStringToMilliSeconds(String value) throws DatabusException { Pattern _pattern = Pattern.compile("(\\d{4})-(\\d{2})-(\\d{2}):(\\d{2}):(\\d{2}):(\\d{2})\\.(\\d{0,9})"); Matcher matcher = _pattern.matcher(value); if (!matcher.matches() || matcher.groupCount() != 7) { throw new DatabusException("The timestamp format is not as expected, cannot proceed!"); }//from ww w . j a va2 s.c o m Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); //Explicitly set ms to zero; without initialization it has random ms values :( calendar.set(Calendar.MILLISECOND, 0); calendar.set(Integer.valueOf(matcher.group(1)), Integer.valueOf(matcher.group(2)) - 1, Integer.valueOf(matcher.group(3)), Integer.valueOf(matcher.group(4)), Integer.valueOf(matcher.group(5)), Integer.valueOf(matcher.group(6))); //Prune to the first 3 digits or less String milliSecondsString = matcher.group(7); int maxSecondsLength = (milliSecondsString.length() > 3) ? 3 : milliSecondsString.length(); String prunedMilliSeconds = milliSecondsString.substring(0, maxSecondsLength); //Add the ms value to the calendar object calendar.add(Calendar.MILLISECOND, Integer.valueOf(prunedMilliSeconds)); return calendar.getTimeInMillis(); }
From source file:com.technophobia.substeps.model.Arguments.java
public static List<Object> getArgs(final String patternString, final String sourceString, final Class<?>[] parameterTypes, final Class<? extends Converter<?>>[] converterTypes, Config cfg) { log.debug("Arguments getArgs List<Object> with pattern: " + patternString + " and sourceStr: " + sourceString);/* w w w. j av a 2 s . c om*/ List<Object> argsList = null; final Pattern pattern = Pattern.compile(patternString); final Matcher matcher = pattern.matcher(sourceString); final int groupCount = matcher.groupCount(); int argIdx = 0; if (matcher.find()) { for (int i = 1; i <= groupCount; i++) { final String arg = matcher.group(i); if (arg != null) { if (argsList == null) { argsList = new ArrayList<>(); } String substituted = substituteValues(arg, cfg); argsList.add(getObjectArg(substituted, parameterTypes[argIdx], converterTypes[argIdx])); } argIdx++; } } return argsList; }
From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java
/** * Extracts the HTML headers (h1-h6 tags) from an HTML page. * // w ww . j a va 2 s . c o m * @param html * @return */ public static Map<String, List<String>> extractHTMLheaders(String html) { Map<String, List<String>> hxtagsMap = new HashMap<>(); for (int i = 1; i <= 6; ++i) { hxtagsMap.put("H" + i, new ArrayList<>()); } Matcher m = HX_PATTERN.matcher(html); while (m.find()) { if (m.groupCount() == 2) { String tagType = m.group(1).toUpperCase(Locale.ENGLISH); String content = m.group(2); hxtagsMap.get(tagType).add(clear(content)); } } return hxtagsMap; }
From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java
/** * Extracts the meta tags from an HTML page and returns them in a map. * //from w w w . j a v a 2 s .c o m * @param html * @return */ public static Map<String, String> extractMetatags(String html) { Map<String, String> metatagsMap = new HashMap<>(); Matcher m = METATAG_PATTERN.matcher(html); while (m.find()) { if (m.groupCount() == 2) { String name = m.group(1); String content = m.group(2); metatagsMap.put(clear(name), clear(content)); } } return metatagsMap; }
From source file:net.ripe.ipresource.Ipv6Address.java
/** * Converts a fully expanded IPv6 string to a BigInteger * * @param ipAddressString Fully expanded address (i.e. no '::' shortcut) * @return Address as BigInteger//www. ja va 2 s . co m */ private static BigInteger ipv6StringtoBigInteger(String ipAddressString) { Pattern p = Pattern.compile( "([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4}):([0-9a-fA-F]{0,4})"); Matcher m = p.matcher(ipAddressString); m.find(); String ipv6Number = ""; for (int i = 1; i <= m.groupCount(); i++) { String part = m.group(i); String padding = "0000".substring(0, 4 - part.length()); ipv6Number = ipv6Number + padding + part; } return new BigInteger(ipv6Number, 16); }
From source file:com.datumbox.framework.core.utilities.text.parsers.HTMLParser.java
/** * Extracts the hyperlinks from an html string and returns their components * in a map./*from w w w. j av a 2s. c o m*/ * * @param html * @return */ public static Map<HyperlinkPart, List<String>> extractHyperlinks(String html) { Map<HyperlinkPart, List<String>> hyperlinksMap = new HashMap<>(); hyperlinksMap.put(HyperlinkPart.HTMLTAG, new ArrayList<>()); hyperlinksMap.put(HyperlinkPart.URL, new ArrayList<>()); hyperlinksMap.put(HyperlinkPart.ANCHORTEXT, new ArrayList<>()); Matcher m = HYPERLINK_PATTERN.matcher(html); while (m.find()) { if (m.groupCount() == 2) { String tag = m.group(0); String url = m.group(1); String anchortext = m.group(2); hyperlinksMap.get(HyperlinkPart.HTMLTAG).add(tag); hyperlinksMap.get(HyperlinkPart.URL).add(url); hyperlinksMap.get(HyperlinkPart.ANCHORTEXT).add(anchortext); } } return hyperlinksMap; }
From source file:org.apache.falcon.util.DateUtil.java
/** * {@link java.util.TimeZone#getTimeZone(String)} takes the timezone ID as an argument; for invalid IDs * it returns the <code>GMT</code> TimeZone. A timezone ID formatted like <code>GMT-####</code> is not a valid ID, * however, it will actually map this to the <code>GMT-##:##</code> TimeZone, instead of returning the * <code>GMT</code> TimeZone. We check (later) check that a timezone ID is valid by calling * {@link java.util.TimeZone#getTimeZone(String)} and seeing if the returned * TimeZone ID is equal to the original; because we want to allow <code>GMT-####</code>, while still * disallowing actual invalid IDs, we have to manually replace <code>GMT-####</code> * with <code>GMT-##:##</code> first. * * @param tzId The timezone ID/*from www. java2 s.c o m*/ * @return If tzId matches <code>GMT-####</code>, then we return <code>GMT-##:##</code>; otherwise, * we return tzId unaltered */ private static String handleGMTOffsetTZNames(String tzId) { Matcher m = GMT_OFFSET_COLON_PATTERN.matcher(tzId); if (m.matches() && m.groupCount() == 3) { tzId = "GMT" + m.group(1) + m.group(2) + ":" + m.group(3); } return tzId; }