List of usage examples for java.util.regex Matcher lookingAt
public boolean lookingAt()
From source file:pt.webdetails.cda.CdaUtils.java
private String getPath(HttpServletRequest servletRequest) { String path = servletRequest.getParameter("path"); if (!StringUtils.isEmpty(path)) { return path; }//w w w . jav a 2s . c o m String uri = servletRequest.getRequestURI(); Matcher pathFinder = CDA_PATH.matcher(uri); if (pathFinder.lookingAt()) { path = pathFinder.group(1); return path.replaceAll(":", "/"); } return null; }
From source file:specminers.referenceparser.javamop.Extractor.java
private String expandExtendedRegularExpression(String ere, File specificationFile) throws IOException, ParseException { String contents = FileUtils.readFileToString(specificationFile); MOPSpecFileExt spec = getJavaMOPSpec(specificationFile); String strEvents = spec.getSpecs().get(0).getEventStr(); if (spec.getSpecs().get(0).getPropertiesAndHandlers().get(0).getProperty() instanceof FormulaExt) { FormulaExt fext = (FormulaExt) spec.getSpecs().get(0).getPropertiesAndHandlers().get(0).getProperty(); String strFormula = fext.toString(); System.out.println(strFormula); }// w ww . ja v a 2 s . com if (1 > 0) { return ""; } ere = ere.trim(); String componentsPattern = "\\(\\w+\\)|\\w+"; Matcher m = Pattern.compile(componentsPattern).matcher(ere); String aspectJExpression = "((execution)|(call))"; String aspectjAdvicePattern = aspectJExpression; while (m.lookingAt()) { String component = ere.substring(m.start(), m.end()); String componentDefintionRegex = "\\t*(creation)?[\\s\\t]+(event)[\\s|\\t]+" + component + aspectjAdvicePattern + "\\{(.+)\\}"; // According to JavaMOP 4 syntax Pattern p = Pattern.compile(componentDefintionRegex, Pattern.DOTALL); Matcher definitionMatcher = p.matcher(contents); if (definitionMatcher.find()) { String definition = definitionMatcher.group(5) + definitionMatcher.group(6); return definition; } } return ere; }
From source file:tufts.vue.URLResource.java
private Properties scrapeHTMLmetaData(URLConnection connection, int maxSearchBytes) throws java.io.IOException { Properties metaData = new Properties(); InputStream byteStream = connection.getInputStream(); if (DEBUG.DND && DEBUG.META) { System.err.println("Getting headers from " + connection); System.err.println("Headers: " + connection.getHeaderFields()); }/*from w w w . jav a 2 s .c o m*/ // note: be sure to call getContentType and don't rely on getting it from the HeaderFields map, // as sometimes it's set by the OS for a file:/// URL when there are no header fields (no http server) // (actually, this is set by java via a mime type table based on file extension, or a guess based on the stream) if (DEBUG.DND) System.err.println("*** getting contentType & encoding..."); final String contentType = connection.getContentType(); final String contentEncoding = connection.getContentEncoding(); final int contentLength = connection.getContentLength(); if (DEBUG.DND) System.err.println("*** contentType [" + contentType + "]"); if (DEBUG.DND) System.err.println("*** contentEncoding [" + contentEncoding + "]"); if (DEBUG.DND) System.err.println("*** contentLength [" + contentLength + "]"); setProperty("url.contentType", contentType); setProperty("url.contentEncoding", contentEncoding); if (contentLength >= 0) setProperty("url.contentLength", contentLength); //if (contentType.toLowerCase().startsWith("text/html") == false) { if (!isHTML()) { // we only currently handle HTML if (DEBUG.Enabled) System.err.println("*** contentType [" + contentType + "] not HTML; skipping title extraction"); return metaData; } if (DEBUG.DND) System.err.println("*** scanning for HTML meta-data..."); try { final BufferedInputStream bufStream = new BufferedInputStream(byteStream, maxSearchBytes); bufStream.mark(maxSearchBytes); final byte[] byteBuffer = new byte[maxSearchBytes]; int bytesRead = 0; int len = 0; // BufferedInputStream still won't read thru a block, so we need to allow // a few reads here to get thru a couple of blocks, so we can get up to // our maxbytes (e.g., a common return chunk count is 1448 bytes, presumably related to the MTU) do { int max = maxSearchBytes - bytesRead; len = bufStream.read(byteBuffer, bytesRead, max); System.out.println("*** read " + len); if (len > 0) bytesRead += len; else if (len < 0) break; } while (len > 0 && bytesRead < maxSearchBytes); if (DEBUG.DND) System.out.println("*** Got total chars: " + bytesRead); String html = new String(byteBuffer, 0, bytesRead); if (DEBUG.DND && DEBUG.META) System.out.println("*** HTML-STRING[" + html + "]"); // first, look for a content encoding, so we can search for and get the title // on a properly encoded character stream String charset = null; Matcher cm = Content_Charset_Regex.matcher(html); if (cm.lookingAt()) { charset = cm.group(1); if (DEBUG.DND) System.err.println("*** found HTML specified charset [" + charset + "]"); setProperty("charset", charset); } if (charset == null && contentEncoding != null) { if (DEBUG.DND || true) System.err.println("*** no charset found: using contentEncoding charset " + contentEncoding); charset = contentEncoding; } final String decodedHTML; if (charset != null) { bufStream.reset(); InputStreamReader decodedStream = new InputStreamReader(bufStream, charset); //InputStreamReader decodedStream = new InputStreamReader(new ByteArrayInputStream(byteBuffer), charset); if (true || DEBUG.DND) System.out.println("*** decoding bytes into characters with official encoding " + decodedStream.getEncoding()); setProperty("contentEncoding", decodedStream.getEncoding()); char[] decoded = new char[bytesRead]; int decodedChars = decodedStream.read(decoded); decodedStream.close(); if (true || DEBUG.DND) System.err.println("*** " + decodedChars + " characters decoded using " + charset); decodedHTML = new String(decoded, 0, decodedChars); } else decodedHTML = html; // we'll just have to go with the default platform charset... // these needed to be left open till the decodedStream was done, which // although it should never need to read beyond what's already buffered, // some internal java code has checks that make sure the underlying stream // isn't closed, even it it isn't used. byteStream.close(); bufStream.close(); Matcher m = HTML_Title_Regex.matcher(decodedHTML); if (m.lookingAt()) { String title = m.group(1); if (true || DEBUG.DND) System.err.println("*** found title [" + title + "]"); metaData.put("title", title.trim()); } } catch (Throwable e) { System.err.println("scrapeHTMLmetaData: " + e); if (DEBUG.DND) e.printStackTrace(); } if (DEBUG.DND || DEBUG.Enabled) System.err.println("*** scrapeHTMLmetaData returning [" + metaData + "]"); return metaData; }