List of usage examples for java.util.regex Pattern DOTALL
int DOTALL
To view the source code for java.util.regex Pattern DOTALL.
Click Source Link
From source file:application.Crawler.java
/** * not a good pratice but quick should have only one regex for perf * @param htmlRes// ww w . j a v a 2 s. co m * @return */ private String extractDescription(String htmlRes) { String res = null; Pattern p = Pattern.compile( "<div class=\"productText\">(.*)<\\/div>.*<h3 class=\"productDataItemHeader\">Nutrition<\\/h3>", Pattern.DOTALL); Matcher m = p.matcher(htmlRes); while (m.find()) { res = m.group(1); // remove html tag res = res.replaceAll("\\<[^>]*>", "").trim(); } return res; }
From source file:fr.dudie.acrachilisync.utils.IssueDescriptionReader.java
/** * Extracts the list of bug occurrences from the description. * /* w ww.j a v a 2s .co m*/ * @param pDescription * the issue description * @param pStacktraceMD5 * the stacktrace MD5 hash the issue is related to * @return the ACRA bug occurrences listed in the description * @throws IssueParseException * malformed issue description */ private List<ErrorOccurrence> parseAcraOccurrencesTable(final String pDescription, final String pStacktraceMD5) throws IssueParseException { final List<ErrorOccurrence> occur = new ArrayList<ErrorOccurrence>(); // escape braces { and } to use strings in regexp final String header = IssueDescriptionUtils.getOccurrencesTableHeader(); final String escHeader = Pattern.quote(header); // regexp to find occurrences tables final Pattern p = Pattern.compile(escHeader + IssueDescriptionUtils.EOL + "(?:" + OCCURR_LINE_PATTERN + IssueDescriptionUtils.EOL + "+)+", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); final Matcher m = p.matcher(pDescription); if (m.find()) { // regexp to find occurrences lines final Pattern pLine = Pattern.compile(OCCURR_LINE_PATTERN); final Matcher mLine = pLine.matcher(m.group()); while (mLine.find()) { try { final StringTokenizer line = new StringTokenizer(mLine.group(), "|"); final String acraReportId = line.nextToken(); final String acraUserCrashDate = line.nextToken(); final String acraRunFor = line.nextToken(); final String acraAndroidVersion = line.nextToken(); final String acraVersionCode = line.nextToken(); final String acraVersionName = line.nextToken(); final String acraDevice = line.nextToken(); final ErrorOccurrence error = new ErrorOccurrence(); error.setReportId(acraReportId); try { error.setCrashDate(IssueDescriptionUtils.parseDate(acraUserCrashDate)); error.setRunFor(RunningTimeUtils.parseRunningTime(acraRunFor)); } catch (final ParseException e) { throw new IssueParseException( "Unable to parse user crash date of ACRA report " + acraReportId, e); } error.setAndroidVersion(acraAndroidVersion); error.setVersionCode(acraVersionCode); error.setVersionName(acraVersionName); error.setDevice(acraDevice); occur.add(error); } catch (final NoSuchElementException e) { throw new IssueParseException("Unable to parse ACRA report line: " + mLine.group(), e); } } } else { throw new IssueParseException("No crash occurrence table found in the description"); } if (m.find()) { throw new IssueParseException("More than 1 occurrence table found in the description"); } if (CollectionUtils.isEmpty(occur)) { throw new IssueParseException("0 user crash occurrence found in the description"); } return occur; }
From source file:dk.dma.msinm.web.rest.LocationRestService.java
/** * Annoyingly, different versions of KML use different default namespaces. * Hence, attempt to extract the default namespace * @param kml the xml// w w w . j a v a 2s . c o m * @return the default KML namespace */ private String extractDefaultNamespace(String kml) { Pattern p = Pattern.compile(".*<kml xmlns=\"([^\"]*)\".*", Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE); Matcher m = p.matcher(kml); if (m.matches()) { return m.group(1); } return "http://www.opengis.net/kml/2.2"; }
From source file:fr.dudie.acrachilisync.tools.upgrade.IssueDescriptionReaderV1.java
/** * Extracts the bug stacktrace from the description. * //www.jav a2 s. c o m * @param pDescription * the issue description * @param pStacktraceMD5 * the stacktrace MD5 hash the issue is related to * @return the stacktrace * @throws IssueParseException * malformed issue description */ private String parseStacktrace(final String pDescription, final String pStacktraceMD5) throws IssueParseException { String stacktrace = null; // escape braces { and } to use strings in regexp final String start = "<pre class=\"javastacktrace\">"; final String qStart = Pattern.quote(start); final String end = "</pre>"; final String qEnd = Pattern.quote(end); final Pattern p = Pattern.compile(qStart + "(.*)" + qEnd, Pattern.DOTALL | Pattern.CASE_INSENSITIVE); final Matcher m = p.matcher(pDescription); if (m.find()) { stacktrace = m.group(1); // if a start tag or an end tag is found in the stacktrace, then there is a problem if (StringUtils.contains(stacktrace, start) || StringUtils.contains(stacktrace, end)) { throw new IssueParseException("Invalid stacktrace block"); } } else { throw new IssueParseException("0 stacktrace block found in the description"); } return stacktrace; }
From source file:org.sonar.dotnet.tools.commons.visualstudio.ModelFactory.java
/** * Gets all the projects in a solution.//from ww w .j av a 2 s . c o m * * @param solutionFile * the solution file * @param solutionContent * the text content of the solution file * @return a list of projects * @throws IOException * @throws DotNetToolsException */ private static List<VisualStudioProject> getProjects(File solutionFile, String solutionContent, List<String> buildConfigurations) throws IOException, DotNetToolsException { File baseDirectory = solutionFile.getParentFile(); // A pattern to extract the projects from a visual studion solution String projectExtractExp = "(Project.*?^EndProject$)"; Pattern projectExtractPattern = Pattern.compile(projectExtractExp, Pattern.MULTILINE + Pattern.DOTALL); List<String> projectDefinitions = new ArrayList<String>(); // Extracts all the projects from the solution Matcher globalMatcher = projectExtractPattern.matcher(solutionContent); while (globalMatcher.find()) { String projectDefinition = globalMatcher.group(1); projectDefinitions.add(projectDefinition); } // This pattern extracts the projects from a Visual Studio solution String normalProjectExp = "\\s*Project\\([^\\)]*\\)\\s*=\\s*\"([^\"]*)\"\\s*,\\s*\"([^\"]*?\\.csproj)\""; String webProjectExp = "\\s*Project\\([^\\)]*\\)\\s*=\\s*\"([^\"]*).*?ProjectSection\\(WebsiteProperties\\).*?" + "Debug\\.AspNetCompiler\\.PhysicalPath\\s*=\\s*\"([^\"]*)"; Pattern projectPattern = Pattern.compile(normalProjectExp); Pattern webPattern = Pattern.compile(webProjectExp, Pattern.MULTILINE + Pattern.DOTALL); List<VisualStudioProject> result = new ArrayList<VisualStudioProject>(); for (String projectDefinition : projectDefinitions) { // Looks for project files Matcher matcher = projectPattern.matcher(projectDefinition); if (matcher.find()) { String projectName = matcher.group(1); String projectPath = StringUtils.replace(matcher.group(2), "\\", File.separatorChar + ""); File projectFile = new File(baseDirectory, projectPath); if (!projectFile.exists()) { throw new FileNotFoundException("Could not find the project file: " + projectFile); } VisualStudioProject project = getProject(projectFile, projectName, buildConfigurations); result.add(project); } else { // Searches the web project Matcher webMatcher = webPattern.matcher(projectDefinition); if (webMatcher.find()) { String projectName = webMatcher.group(1); String projectPath = webMatcher.group(2); if (projectPath.endsWith("\\")) { projectPath = StringUtils.chop(projectPath); } File projectRoot = new File(baseDirectory, projectPath); VisualStudioProject project = getWebProject(baseDirectory, projectRoot, projectName, projectDefinition); result.add(project); } } } return result; }
From source file:org.beangle.test.selenium.SeleniumTestBase.java
/** * Compares two strings, but handles "regexp:" strings like HTML Selenese * /*from w w w . j a v a 2s .co m*/ * @param expectedPattern * @param actual * @return true if actual matches the expectedPattern, or false otherwise */ public static boolean seleniumEquals(String expectedPattern, String actual) { if (expectedPattern == null || actual == null) { return expectedPattern == null && actual == null; } if (actual.startsWith("regexp:") || actual.startsWith("regex:") || actual.startsWith("regexpi:") || actual.startsWith("regexi:")) { // swap 'em String tmp = actual; actual = expectedPattern; expectedPattern = tmp; } Boolean b; b = handleRegex("regexp:", expectedPattern, actual, 0); if (b != null) { return b.booleanValue(); } b = handleRegex("regex:", expectedPattern, actual, 0); if (b != null) { return b.booleanValue(); } b = handleRegex("regexpi:", expectedPattern, actual, Pattern.CASE_INSENSITIVE); if (b != null) { return b.booleanValue(); } b = handleRegex("regexi:", expectedPattern, actual, Pattern.CASE_INSENSITIVE); if (b != null) { return b.booleanValue(); } if (expectedPattern.startsWith("exact:")) { String expectedExact = expectedPattern.replaceFirst("exact:", ""); if (!expectedExact.equals(actual)) { System.out.println("expected " + actual + " to match " + expectedPattern); return false; } return true; } String expectedGlob = expectedPattern.replaceFirst("glob:", ""); expectedGlob = expectedGlob.replaceAll("([\\]\\[\\\\{\\}$\\(\\)\\|\\^\\+.])", "\\\\$1"); expectedGlob = expectedGlob.replaceAll("\\*", ".*"); expectedGlob = expectedGlob.replaceAll("\\?", "."); if (!Pattern.compile(expectedGlob, Pattern.DOTALL).matcher(actual).matches()) { System.out.println("expected \"" + actual + "\" to match glob \"" + expectedPattern + "\" (had transformed the glob into regexp \"" + expectedGlob + "\""); return false; } return true; }
From source file:org.ppwcode.vernacular.l10n_III.dojo.DojoDjConfigFilter.java
private String replaceDjConfigLocale(String djConfig, Locale preferredLocale) { StringBuffer result = new StringBuffer(1024); String dojoLocale = DojoLocaleHelpers.localeToString(preferredLocale); // construct regexp String regexp = "^(.*\\s+djConfig=\\s*)((['\"])(.*?)\\3)(\\s*.*)$"; LOG.debug("regexp djconfiglocale: " + regexp); Pattern p = Pattern.compile(regexp, Pattern.DOTALL); Matcher m = p.matcher(djConfig); // pattern group 1 - before djConfig, 2 - djConfig content, 3 - after djConfig if (m.matches()) { String before = m.group(1); String quote = m.group(3); String config = m.group(4); String after = m.group(5); String otherQuote = quote.equals("\"") ? "'" : "\""; LOG.debug("before " + before); LOG.debug("after " + after); LOG.debug("cfg " + config); LOG.debug("quote " + quote); // construct before djConfig content result.append(before);/*w w w.j a v a 2s .c om*/ result.append(quote); // filter config itself String regexpCfg = "^(.*" + otherQuote + "?locale" + otherQuote + "?\\s*:\\s*" + otherQuote + ")([a-z\\-]*)(" + otherQuote + ".*)$"; Pattern pCfg = Pattern.compile(regexpCfg, Pattern.DOTALL); Matcher mCfg = pCfg.matcher(config); if (mCfg.matches()) { // found locale in djConfig String beforeLocale = mCfg.group(1); String locale = mCfg.group(2); String afterLocale = mCfg.group(3); LOG.debug("before locale: " + beforeLocale); LOG.debug("locale: " + locale); LOG.debug("after locale: " + afterLocale); result.append(beforeLocale); result.append(dojoLocale); result.append(afterLocale); } else { // no locale in djConfig: add it result.append(config); result.append(", locale:"); result.append(otherQuote); result.append(dojoLocale); result.append(otherQuote); } // construct after djConfig content result.append(quote); result.append(after); } else { // no match on djConfig, just return the string then result.append(djConfig); } LOG.debug("new djConfig: " + result); return result.toString(); }
From source file:com.nextep.designer.sqlgen.helpers.CaptureHelper.java
private static String getBodyFromCreateAsStatement(String stmt, String bodyStartKeyword) { String body = null;/*w w w .j av a2s . c o m*/ if (stmt != null) { /* * FIXME [BGA] This expression will not work for bodies that start with a DECLARE * keyword. The expression should not rely on the first keyword of the body section, as * this first keyword can vary, but should instead rely on the previous AS keyword which * is always the same. */ Pattern p = Pattern.compile("^.+?\\s+AS\\s+(" + bodyStartKeyword + "\\s+.+)", //$NON-NLS-1$ //$NON-NLS-2$ Pattern.CASE_INSENSITIVE | Pattern.DOTALL); Matcher m = p.matcher(stmt); if (m.find()) { try { body = m.group(1); if (body != null && !"".equals(body.trim())) { //$NON-NLS-1$ return body; } } catch (IllegalStateException ise) { LOGGER.warn(bodyStartKeyword + " keyword could not be found in the specified CREATE AS statement [" + stmt + "]", //$NON-NLS-2$ ise); } } } if (body == null || "".equals(body.trim())) { //$NON-NLS-1$ // If we failed extracting the BODY clause, we simply try to return the passed statement // as this is better than nothing. (Attempt to fix DES-927) return stmt; } else { return body; } }
From source file:org.codeexample.anchorlinks.CVAnchorContentIndexingFilter.java
public String getContentBetweenAnchorInWiki(StringBuilder remaining, String anchor1, String anchor2) throws IOException { // http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html // "<a name='Creating_Schedule_Policies'>Creating Schedule Policies</a>XXXX<a name='Auxiliary_Copy_Schedule_Policy'>Creating Schedule Policies</a>";// // doc.toString(); // long start = new Date().getTime(); // <span class="mw-headline" id="JDK_contents">JDK contents</span><a // href="/wiki/AppletViewer" title="AppletViewer">appletviewer</a><span // class="mw-headline" id="Ambiguity_between_a_JDK_and_an_SDK">Ambiguity // between a JDK and an SDK</span> // <span[^>]*id\s*=\s*(?:"|')?JDK_contents(?:'|")?[^>]*>[^<]*</span>(.*)<span[^>]*?id\s*=\s*(?:"|')?Ambiguity_between_a_JDK_and_an_SDK(?:'|")?[^>]*>[^<]*</span> Matcher matcher = Pattern .compile(getRegexToExtractContent(anchor1, anchor2), Pattern.DOTALL | Pattern.MULTILINE) .matcher(remaining);/*w w w . ja v a 2 s . c om*/ String matchedText = ""; if (matcher.find()) { // System.out.println("found match"); String anchorText = Jsoup.parse(matcher.group(1)).text(); matchedText = anchorText + " " + Jsoup.parse(matcher.group(2)).text(); // System.out.println(matchedText); // int cnt = matcher.groupCount(); // if (cnt == 2) { String newRemaining = matcher.group(3); remaining.setLength(0); remaining.append(newRemaining); // } } // long end = new Date().getTime(); // System.out.println("Took: " + (end - start)); return matchedText; }
From source file:com.sangupta.pep.Generator.java
private SlideVariables getSlideVariables(String slideContents) { SlideVariables vars = new SlideVariables(); Matcher matcher = PATTERN.matcher(slideContents); if (matcher != null && matcher.matches()) { vars.setHeader(matcher.group(1)); vars.setLevel(Integer.valueOf(matcher.group(2))); vars.setTitle(matcher.group(3)); vars.setContent(matcher.group(4)); } else {/*from w w w.j av a 2s.c o m*/ vars.setHeader(""); vars.setTitle(""); vars.setContent(slideContents); vars.setLevel(0); } // process slide classes ContentAndClasses cc = processMacros(vars); String content = cc.getContent(); vars.setContent(content); vars.setClasses(cc.getClasses().toArray(new String[0])); if (StringUtils.isNotEmpty(content)) { content = content.trim(); Pattern p2 = Pattern.compile("<h\\d[^>]*>presenter notes</h\\d>", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL); Matcher m2 = p2.matcher(content); if (m2 != null && m2.matches()) { vars.setPresenterNotes(content.substring(m2.end()).trim()); content = content.substring(0, m2.start()); vars.setContent(content); } } vars.setRelativeSourcePath(this.inputFile.getPath()); vars.setAbsoluteSourcePath(this.inputFile.getAbsolutePath()); return vars; }