List of usage examples for java.util.regex Pattern toString
public String toString()
Returns the string representation of this pattern.
From source file:com.ibm.watson.catalyst.corpus.tfidf.ApplyTemplate.java
public static void main(String[] args) { System.out.println("Loading Corpus."); JsonNode root;//w w w .ja v a 2 s. co m TermCorpus c; JsonNode documents; try (InputStream in = new FileInputStream(new File("tfidf-health-1.json"))) { root = MAPPER.readTree(in); documents = root.get("documents"); TermCorpusBuilder cb = new TermCorpusBuilder(); cb.setDocumentCombiner(0, 0); cb.setJson(new File("health-corpus.json")); c = cb.build(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); return; } catch (JsonProcessingException e) { // TODO Auto-generated catch block e.printStackTrace(); return; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); return; } System.out.println("Corpus loaded."); List<TemplateMatch> matches = new ArrayList<TemplateMatch>(); Iterator<TermDocument> documentIterator = c.getDocuments().iterator(); int index = 0; for (JsonNode document : documents) { Pattern p1 = Template.getTemplatePattern(document, "\\b(an? |the )?(\\w+ ){0,4}", "( \\w+)?(?= is (an?|one|the)\\b)"); if (p1.toString().equals("\\b(an? |the )?(\\w+ ){0,4}()( \\w+)?(?= is (an?|one|the)\\b)")) continue; Pattern p2 = Template.getTemplatePattern(document, "^(\\w+ ){0,2}", "( \\w+){0,1}?(?=( can| may)? causes?\\b)"); Pattern p3 = Template.getTemplatePattern(document, "(?<=the use of )(\\w+ ){0,3}", "( \\w+| ){0,2}?(?=( (and|does|in|for|can|is|as|to|of)\\b|\\.))"); Pattern p4 = Template.getTemplatePattern(document, "^(\\w+ ){0,3}", "( \\w+){0,1}(?=( can| may) leads? to\\b)"); Pattern p5 = Template.getTemplatePattern(document, "(?<=\\bthe risk of )(\\w+ ){0,3}", "( (disease|stroke|attack|cancer))?\\b"); Pattern p6 = Template.getTemplatePattern(document, "(\\w{3,} ){0,3}", "( (disease|stroke|attack|cancer))?(?= is caused by\\b)"); Pattern p7 = Template.getTemplatePattern(document, "(?<= is caused by )(\\w+ ){0,10}", ""); Pattern p8 = Template.getTemplatePattern(document, "\\b", "( \\w{4,})(?= can be used)"); Pattern p9 = Template.getTemplatePattern(document, "(?<= can be used )(\\w+ ){0,10}", "\\b"); TermDocument d = documentIterator.next(); DocumentMatcher dm = new DocumentMatcher(d); matches.addAll(dm.getParagraphMatches(p1, "What is ", "?")); matches.addAll(dm.getParagraphMatches(p2, "What does ", " cause?")); matches.addAll(dm.getParagraphMatches(p3, "How is ", " used?")); matches.addAll(dm.getParagraphMatches(p4, "What can ", " lead to?")); matches.addAll(dm.getParagraphMatches(p5, "What impacts the risk of ", "?")); matches.addAll(dm.getParagraphMatches(p6, "What causes ", "?")); matches.addAll(dm.getParagraphMatches(p7, "What is caused by ", "?")); matches.addAll(dm.getParagraphMatches(p8, "How can ", " be used?")); matches.addAll(dm.getParagraphMatches(p9, "What can be used ", "?")); System.out.print("Progress: " + ((100 * ++index) / documents.size()) + "%\r"); } System.out.println(); List<TemplateMatch> condensedMatches = new ArrayList<TemplateMatch>(); for (TemplateMatch match : matches) { for (TemplateMatch baseMatch : condensedMatches) { if (match.sameQuestion(baseMatch)) { baseMatch.addAnswers(match); break; } } condensedMatches.add(match); } try (BufferedWriter bw = new BufferedWriter(new FileWriter("health-questions.txt"))) { for (TemplateMatch match : condensedMatches) { bw.write(match.toString()); } bw.write("\n"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("Done and generated: " + condensedMatches.size()); }
From source file:org.eclipse.epp.internal.logging.aeri.ui.v2.ServerConfigurationTest.java
private static boolean containsPatternFromSameString(List<Pattern> patterns, Pattern p) { for (Pattern pp : patterns) { if (pp.toString().equals(p.toString())) { return true; }/*from w w w. j av a 2 s . c o m*/ } return false; }
From source file:org.yamj.core.tools.LanguageTools.java
/** * Get the list of loose languages associated with a language * * @param language// w w w. j a va 2 s . c o m * @return */ public static String getLanguageList(String language) { if (LOOSE_LANGUAGE_MAP.containsKey(language)) { Pattern langPatt = LOOSE_LANGUAGE_MAP.get(language); return langPatt.toString().toLowerCase(); } return StringUtils.EMPTY; }
From source file:com.microsoft.tfs.client.eclipse.tpignore.TPIgnoreCache.java
/** * Gets pattern strings from {@link Pattern} objects. * * @param patterns/*w w w .jav a 2s . c o m*/ * the patterns to get strings for (must not be <code>null</code>) * @return an array of strings equal in size to the given collection * containing the pattern strings */ public static String[] getPatternStrings(final Collection<Pattern> patterns) { Check.notNull(patterns, "patterns"); //$NON-NLS-1$ final String[] ret = new String[patterns.size()]; int i = 0; for (final Pattern pattern : patterns) { ret[i++] = pattern.toString(); } return ret; }
From source file:com.puppycrawl.tools.checkstyle.checks.regexp.RegexpOnFilenameCheck.java
/** * Retrieves the String form of the {@code pattern} or {@code defaultString} * if null.// www.jav a2 s . co m * * @param pattern The pattern to convert. * @param defaultString The result to use if {@code pattern} is null. * @return The String form of the {@code pattern}. */ private static String getStringOrDefault(Pattern pattern, String defaultString) { final String result; if (pattern == null) { result = defaultString; } else { result = pattern.toString(); } return result; }
From source file:org.tinymediamanager.core.tvshow.TvShowEpisodeAndSeasonParser.java
/** * Parses the./*from w ww . ja v a2 s.c om*/ * * @param searchString * the search string * @param pattern * the pattern * @return the episode matching result */ @Deprecated private static EpisodeMatchingResult parse(String searchString, Pattern pattern) { LOGGER.trace("parsing " + searchString + " with " + pattern.toString()); EpisodeMatchingResult result = new EpisodeMatchingResult(); Matcher m = pattern.matcher(searchString); while (m.find()) { int ep = 0; // match episode try { ep = Integer.parseInt(m.group(2)); } catch (NumberFormatException nfe) { // maybe roman notation ep = decodeRoman(m.group(2)); } if (ep > 0 && !result.episodes.contains(ep)) { LOGGER.trace("found episode " + ep + " for " + searchString + " with " + pattern.toString()); result.episodes.add(ep); } // match season if (result.season < 0) { int season = -1; try { season = Integer.parseInt(m.group(1)); } catch (NumberFormatException nfe) { } result.season = season; } // if episode found take the 3 matcher group again to the matcher if (StringUtils.isBlank(result.name)) { EpisodeMatchingResult newResult = parseString(" " + m.group(3)); if (newResult.episodes.size() > 0) { // we found episodes again result.episodes.addAll(newResult.episodes); } else { // get name an strip out file extension result.name = FilenameUtils.getBaseName(m.group(3)); } } } LOGGER.trace("matching result " + result); return result; }
From source file:com.moviejukebox.scanner.MovieFilenameScanner.java
/** * Get the list of loose languages associated with a language * * @param language/*from ww w . j av a 2 s . c o m*/ * @return */ //TODO : Extract this from here, it's not specific on MovieFileNameScanner public static String getLanguageList(String language) { if (LOOSE_LANGUAGE_MAP.containsKey(language)) { Pattern langPatt = LOOSE_LANGUAGE_MAP.get(language); return langPatt.toString().toLowerCase(); } return ""; }
From source file:org.jfrog.build.extractor.clientConfiguration.util.PublishedItemsHelper.java
/** * Checks whether to continue searching recursively for files. * * @param absoluteRoot// ww w . j a v a 2 s . c o m * @param dir * @param pattern * @param recursive * @return boolean */ private static boolean continueDepthSearch(File absoluteRoot, File dir, Pattern pattern, boolean recursive) { if (recursive) { return true; } int relativePathDepth = StringUtils.countMatches(getRelativePath(absoluteRoot, dir).replace("\\", "/"), "/"); int patternPathDepth = StringUtils.countMatches(pattern.toString(), "/"); return relativePathDepth < patternPathDepth; }
From source file:com.wcs.wcslib.vaadin.widget.jqinputmask.extension.Definition.java
public Definition validator(Pattern pattern) { return validator(pattern.toString()); }
From source file:uk.gov.gchq.gaffer.function.filter.MultiRegex.java
/** * Utility method to convert an array of {@link Pattern}s to * and array of {@link String}s./*ww w. j ava2 s . co m*/ * * This is required since the Pattern class does not override {@link Object#equals(Object)} * or {@link Object#hashCode()} * * @param patterns an array of Patterns to convert * @return an array of Strings representing the regex pattern */ private String[] patternsToStrings(final Pattern[] patterns) { final List<String> strings = new ArrayList<>(patterns.length); for (final Pattern pattern : patterns) { strings.add(pattern.toString()); } return strings.toArray(new String[] {}); }