List of usage examples for java.io BufferedInputStream mark
public synchronized void mark(int readlimit)
mark
method of InputStream
. From source file:org.apache.nifi.csv.CSVReader.java
@Override public RecordReader createRecordReader(final Map<String, String> variables, final InputStream in, final ComponentLog logger) throws IOException, SchemaNotFoundException { // Use Mark/Reset of a BufferedInputStream in case we read from the Input Stream for the header. final BufferedInputStream bufferedIn = new BufferedInputStream(in); bufferedIn.mark(1024 * 1024); final RecordSchema schema = getSchema(variables, new NonCloseableInputStream(bufferedIn), null); bufferedIn.reset();// w ww . j av a 2s . c o m return new CSVRecordReader(bufferedIn, logger, schema, csvFormat, firstLineIsHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat, charSet); }
From source file:org.callimachusproject.io.CarInputStream.java
private String detectRdfType(BufferedInputStream in) throws IOException { byte[] peek = new byte[200]; in.mark(200); int len = IOUtil.readBytes(in, peek); in.reset();// w ww . j a v a 2s .co m int first = new TextReader(new ByteArrayInputStream(peek, 0, len)).read(); if (first == '<') return "application/rdf+xml"; return "text/turtle"; }
From source file:fedora.server.rest.RestUtil.java
/** * Retrieves the contents of the HTTP Request. * @return InputStream from the request/*from w w w. ja v a 2 s . c om*/ */ public RequestContent getRequestContent(HttpServletRequest request, HttpHeaders headers) throws Exception { RequestContent rContent = null; // See if the request is a multi-part file upload request if (ServletFileUpload.isMultipartContent(request)) { // Create a new file upload handler ServletFileUpload upload = new ServletFileUpload(); // Parse the request, use the first available File item FileItemIterator iter = upload.getItemIterator(request); while (iter.hasNext()) { FileItemStream item = iter.next(); if (!item.isFormField()) { rContent = new RequestContent(); rContent.contentStream = item.openStream(); rContent.mimeType = item.getContentType(); FileItemHeaders itemHeaders = item.getHeaders(); if (itemHeaders != null) { String contentLength = itemHeaders.getHeader("Content-Length"); if (contentLength != null) { rContent.size = Integer.parseInt(contentLength); } } break; } } } else { // If the content stream was not been found as a multipart, // try to use the stream from the request directly if (rContent == null) { if (request.getContentLength() > 0) { rContent = new RequestContent(); rContent.contentStream = request.getInputStream(); rContent.size = request.getContentLength(); } else { String transferEncoding = request.getHeader("Transfer-Encoding"); if (transferEncoding != null && transferEncoding.contains("chunked")) { BufferedInputStream bis = new BufferedInputStream(request.getInputStream()); bis.mark(2); if (bis.read() > 0) { bis.reset(); rContent = new RequestContent(); rContent.contentStream = bis; } } } } } // Attempt to set the mime type and size if not already set if (rContent != null) { if (rContent.mimeType == null) { MediaType mediaType = headers.getMediaType(); if (mediaType != null) { rContent.mimeType = mediaType.toString(); } } if (rContent.size == 0) { List<String> lengthHeaders = headers.getRequestHeader("Content-Length"); if (lengthHeaders != null && lengthHeaders.size() > 0) { rContent.size = Integer.parseInt(lengthHeaders.get(0)); } } } return rContent; }
From source file:org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor.java
protected String getEncoding(final BufferedInputStream bis) { final byte[] b = new byte[preloadSizeForCharset]; try {//w w w.ja v a 2s.co m bis.mark(preloadSizeForCharset); final int c = bis.read(b); if (c == -1) { return encoding; } final String head = new String(b, 0, c, encoding); if (StringUtil.isBlank(head)) { return encoding; } final Matcher matcher = getEncodingPattern().matcher(head); if (matcher.find()) { final String enc = matcher.group(1); if (Charset.isSupported(enc)) { return enc; } } } catch (final Exception e) { if (logger.isInfoEnabled()) { logger.info("Use a default encoding: " + encoding, e); } } finally { try { bis.reset(); } catch (final IOException e) { throw new ExtractException(e); } } return encoding; }
From source file:org.lockss.plugin.usdocspln.gov.gpo.fdsys.GPOFDSysHtmlFilterFactory.java
public InputStream createFilteredInputStream(ArchivalUnit au, InputStream in, String encoding) throws PluginException { BufferedInputStream bis = new BufferedInputStream(in, 2048); bis.mark(2048); try {/*from w ww . ja va 2s. c o m*/ boolean filter = shouldFilter(bis, encoding); bis.reset(); if (!filter) { return bis; } } catch (IOException ioe) { logger.debug("IOException while inspecting document to skip filtering", ioe); } NodeFilter[] filters = new NodeFilter[] { /* * Broad area filtering */ /* Document header */ // Differences in the presence and order of <meta> tags and spacing of the <title> tag HtmlNodeFilters.tag("head"), /* Scripts, inline style */ HtmlNodeFilters.tag("script"), HtmlNodeFilters.tag("noscript"), HtmlNodeFilters.tag("style"), /* Header */ HtmlNodeFilters.tagWithAttributeRegex("div", "id", "top-menu-one"), HtmlNodeFilters.tagWithAttributeRegex("div", "id", "top-banner-inside"), HtmlNodeFilters.tagWithAttributeRegex("div", "id", "top-menu-two"), /* Left column (various cells of a two-column table layout) */ HtmlNodeFilters.tagWithAttributeRegex("div", "id", "left-menu"), HtmlNodeFilters.tagWithAttributeRegex("div", "id", "page-details-left-mask"), /* Footer */ HtmlNodeFilters.tagWithAttributeRegex("div", "id", "footer"), /* * Main area */ // Seen in the field: "null" in <a href="/fdsys/search/pagedetails.action?null&bread=true">More Information</a> HtmlNodeFilters.tagWithAttributeRegex("span", "id", "breadcrumbs"), // Whitespace differences inside <div id="page-details-form-mask"> HtmlNodeFilters.tagWithAttributeRegex("div", "id", "page-details-form-mask"), /* * Other */ // Variable Struts identifier, sometime in a comment, sometimes not HtmlNodeFilters.tagWithAttributeRegex("input", "type", "hidden"), HtmlNodeFilters.comment(), // "Email a link to this page" link [probably contained in larger block now] HtmlNodeFilters.tagWithAttributeRegex("a", "href", "^search/notificationPage\\.action\\?emailBody="), // Session ID from search results [probably contained in larger block now] HtmlNodeFilters.tagWithAttributeRegex("form", "action", "jsessionid="), // Differs over time in the presence and placement of rel="nofollow" HtmlNodeFilters.tagWithAttributeRegex("a", "href", "^delivery/getpackage\\.action\\?packageId="), }; HtmlTransform xform = new HtmlTransform() { @Override public NodeList transform(NodeList nodeList) throws IOException { try { nodeList.visitAllNodesWith(new NodeVisitor() { @Override public void visitTag(Tag tag) { String tagName = tag.getTagName().toLowerCase(); if ("a".equals(tagName)) { tag.removeAttribute("onclick"); // Javascript calls changed over time } } }); return nodeList; } catch (ParserException pe) { throw new IOException("ParserException inside HtmlTransform", pe); } } }; InputStream prefilteredStream = new HtmlFilterInputStream(bis, // NOTE: this is 'bis', not 'in' encoding, new HtmlCompoundTransform(HtmlNodeFilterTransform.exclude(new OrFilter(filters)), xform)); try { Reader filteredReader = new InputStreamReader(prefilteredStream, encoding); Reader whitespaceReader = new WhiteSpaceFilter(filteredReader); return new ReaderInputStream(whitespaceReader, encoding); } catch (UnsupportedEncodingException uee) { throw new PluginException(uee); } }
From source file:org.eclipse.xtend.expression.ResourceManagerDefaultImpl.java
/** * Creates a Reader for the given InputStream. If no explicit file encoding * is set this method will try to autodetect the file's encoding. * /*ww w . ja v a 2 s. c o m*/ * @param in * Some resource input stream * @return A Reader for the stream * @since 4.2 */ protected Reader createReader(final InputStream in) { Reader reader = null; if (fileEncoding != null) { try { reader = new InputStreamReader(in, fileEncoding); } catch (final UnsupportedEncodingException e) { log.error("Unsupported encoding falling back to default...", e); reader = new InputStreamReader(in); } } else { Charset encoding = null; // Buffer the original stream since we want to re-read it BufferedInputStream is = new BufferedInputStream(in); try { // Read some bytes from the stream is.mark(65); byte[] buf = new byte[64]; is.read(buf); // reset the stream is.reset(); // Special handling for Xpand files on Mac: Try to detect // the opening Guillemot bracket for MacRoman encoding for (int i = 0; i < buf.length; i++) { if (buf[i] == -57) { // opening Guillemot bracket encoding = Charset.forName("MacRoman"); break; } } // Use com.ibm.icu for autodetection if (encoding == null) { CharsetDetector det = new CharsetDetector(); det.setText(buf); CharsetMatch match = det.detect(); if (match != null) { encoding = Charset.forName(match.getName()); } } // Create the reader with the detected encoding if (encoding != null) { reader = new InputStreamReader(is, encoding); } else { log.warn("Failed autodetecting encoding. Falling back to default..."); reader = new InputStreamReader(is); } } catch (IOException e) { log.warn("Failed autodetecting encoding. Falling back to default...", e); reader = new InputStreamReader(in); } } return reader; }
From source file:de.tudarmstadt.ukp.dkpro.core.io.bincas.BinaryCasReader.java
@Override public void getNext(CAS aCAS) throws IOException, CollectionException { Resource res = nextFile();//from w w w . j av a 2s . c o m InputStream is = null; try { is = CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()); BufferedInputStream bis = new BufferedInputStream(is); TypeSystemImpl ts = null; // Check if this is original UIMA CAS format or DKPro Core format bis.mark(10); DataInputStream dis = new DataInputStream(bis); byte[] dkproHeader = new byte[] { 'D', 'K', 'P', 'r', 'o', '1' }; byte[] header = new byte[dkproHeader.length]; dis.read(header); // If it is DKPro Core format, read the type system if (Arrays.equals(header, dkproHeader)) { ObjectInputStream ois = new ObjectInputStream(bis); CASMgrSerializer casMgrSerializer = (CASMgrSerializer) ois.readObject(); ts = casMgrSerializer.getTypeSystem(); ts.commit(); } else { bis.reset(); } if (ts == null) { // Check if this is a UIMA binary CAS stream byte[] uimaHeader = new byte[] { 'U', 'I', 'M', 'A' }; byte[] header4 = new byte[uimaHeader.length]; System.arraycopy(header, 0, header4, 0, header4.length); if (header4[0] != 'U') { ArrayUtils.reverse(header4); } // If it is not a UIMA binary CAS stream, assume it is output from // SerializedCasWriter if (!Arrays.equals(header4, uimaHeader)) { ObjectInputStream ois = new ObjectInputStream(bis); CASCompleteSerializer serializer = (CASCompleteSerializer) ois.readObject(); deserializeCASComplete(serializer, (CASImpl) aCAS); } else { // Since there was no type system, it must be type 0 or 4 deserializeCAS(aCAS, bis); } } else { // Only format 6 can have type system information deserializeCAS(aCAS, bis, ts, null); } } catch (ResourceInitializationException e) { throw new IOException(e); } catch (ClassNotFoundException e) { throw new IOException(e); } finally { closeQuietly(is); } }
From source file:org.jasig.schedassist.impl.caldav.xml.ReportResponseHandlerImpl.java
/** * Extracts a {@link List} of {@link Calendar}s from the {@link InputStream}, if present. * // ww w .j a v a 2 s . com * @param inputStream * @return a never null, but possibly empty {@link List} of {@link Calendar}s from the {@link InputStream} * @throws XmlParsingException in the event the stream could not be properly parsed */ public List<CalendarWithURI> extractCalendars(InputStream inputStream) { List<CalendarWithURI> results = new ArrayList<CalendarWithURI>(); ByteArrayOutputStream capturedContent = null; XMLInputFactory factory = XMLInputFactory.newInstance(); try { InputStream localReference = inputStream; if (log.isDebugEnabled()) { capturedContent = new ByteArrayOutputStream(); localReference = new TeeInputStream(inputStream, capturedContent); } BufferedInputStream buffered = new BufferedInputStream(localReference); buffered.mark(1); int firstbyte = buffered.read(); if (-1 == firstbyte) { // short circuit on empty stream return results; } buffered.reset(); XMLStreamReader parser = factory.createXMLStreamReader(buffered); String currentUri = null; String currentEtag = null; for (int eventType = parser.next(); eventType != XMLStreamConstants.END_DOCUMENT; eventType = parser .next()) { switch (eventType) { case XMLStreamConstants.START_ELEMENT: QName name = parser.getName(); if (isWebdavHrefElement(name)) { currentUri = parser.getElementText(); } else if (isWebdavEtagElement(name)) { currentEtag = parser.getElementText(); } else if (isCalendarDataElement(name)) { Calendar cal = extractCalendar(parser.getElementText()); if (cal != null) { CalendarWithURI withUri = new CalendarWithURI(cal, currentUri, currentEtag); results.add(withUri); } else if (log.isDebugEnabled()) { log.debug("extractCalendar returned null for " + currentUri + ", skipping"); } } break; } } if (log.isDebugEnabled()) { log.debug("extracted " + results.size() + " calendar from " + capturedContent.toString()); } } catch (XMLStreamException e) { if (capturedContent != null) { log.error("caught XMLStreamException in extractCalendars, captured content: " + capturedContent.toString(), e); } else { log.error("caught XMLStreamException in extractCalendars, no captured content available", e); } throw new XmlParsingException("caught XMLStreamException in extractCalendars", e); } catch (IOException e) { log.error("caught IOException in extractCalendars", e); throw new XmlParsingException("caught IOException in extractCalendars", e); } return results; }
From source file:org.callimachusproject.io.CarInputStream.java
private boolean scanForClass(BufferedInputStream in, String type) throws IOException { assert type != null; byte[] peek = new byte[RDFS_PEEK_SIZE]; in.mark(RDFS_PEEK_SIZE); int len = IOUtil.readBytes(in, peek); in.reset();// www .j av a2 s. c o m URI uri = new URIImpl("http://example.com/" + entry.getName()); LinkedHashModel model = new LinkedHashModel(); try { RDFParserRegistry registry = org.openrdf.rio.RDFParserRegistry.getInstance(); RDFParser parser = registry.get(registry.getFileFormatForMIMEType(type)).getParser(); parser.setRDFHandler(new StatementCollector(model)); parser.parse(new ByteArrayInputStream(peek, 0, len), uri.toString()); } catch (RDFParseException e) { // ignore } catch (RDFHandlerException e) { // ignore } return model.contains(uri, RDF.TYPE, OWL.ONTOLOGY) || model.contains(uri, RDF.TYPE, OWL.CLASS) || model.contains(uri, RDF.TYPE, OWL.OBJECTPROPERTY) || model.contains(uri, RDF.TYPE, OWL.DATATYPEPROPERTY) || model.contains(uri, RDF.TYPE, OWL.FUNCTIONALPROPERTY) || model.contains(uri, RDF.TYPE, RDFS.CLASS) || model.contains(uri, RDF.TYPE, RDF.PROPERTY); }
From source file:org.gtdfree.model.GTDDataXMLTools.java
static public DataHeader load(GTDModel model, InputStream in) throws XMLStreamException, IOException { model.setSuspendedForMultipleChanges(true); model.getDataRepository().suspend(true); XMLStreamReader r;//from w w w . ja v a 2 s . co m try { // buffer size is same as default in 1.6, we explicitly request it so, not to brake if defaut changes. BufferedInputStream bin = new BufferedInputStream(in, 8192); bin.mark(8191); Reader rr = new InputStreamReader(bin); CharBuffer b = CharBuffer.allocate(96); rr.read(b); b.position(0); //System.out.println(b); Pattern pattern = Pattern.compile("<\\?.*?encoding\\s*?=.*?\\?>", Pattern.CASE_INSENSITIVE); //$NON-NLS-1$ Matcher matcher = pattern.matcher(b); // reset back to start of file bin.reset(); // we check if encoding is defined in xml, by the book encoding on r should be null if not defined in xml, // but in reality it can be arbitrary if not defined in xml. So we have to check ourselves. if (matcher.find()) { //System.out.println(matcher); // if defined, then XML parser will pick it up and use it r = XMLInputFactory.newInstance().createXMLStreamReader(bin); Logger.getLogger(GTDDataXMLTools.class).info("XML declared encoding: " + r.getEncoding() //$NON-NLS-1$ + ", system default encoding: " + Charset.defaultCharset()); //$NON-NLS-1$ } else { //System.out.println(matcher); // if not defined, then we assume it is generated by gtd-free version 0.4 or some local editor, // so we assume system default encoding. r = XMLInputFactory.newInstance().createXMLStreamReader(new InputStreamReader(bin)); Logger.getLogger(GTDDataXMLTools.class) .info("XML assumed system default encoding: " + Charset.defaultCharset()); //$NON-NLS-1$ } r.nextTag(); if ("gtd-data".equals(r.getLocalName())) { //$NON-NLS-1$ DataHeader dh = new DataHeader(null, r.getAttributeValue(null, "version"), //$NON-NLS-1$ r.getAttributeValue(null, "modified")); //$NON-NLS-1$ if (dh.version != null) { if (dh.version.equals("2.0")) { //$NON-NLS-1$ r.nextTag(); _load_2_0(model, r); return dh; } } String s = r.getAttributeValue(null, "lastActionID"); //$NON-NLS-1$ if (s != null) { try { model.setLastActionID(Integer.parseInt(s)); } catch (Exception e) { Logger.getLogger(GTDDataXMLTools.class).debug("Internal error.", e); //$NON-NLS-1$ } } if (dh.version != null) { if (dh.version.equals("2.1")) { //$NON-NLS-1$ r.nextTag(); _load_2_1(model, r); return dh; } if (dh.version.startsWith("2.2")) { //$NON-NLS-1$ r.nextTag(); _load_2_2(model, r); return dh; } } throw new IOException("XML gtd-free data with version number " + dh.version //$NON-NLS-1$ + " can not be imported. Data version is newer then supported versions. Update your GTD-Free application to latest version."); //$NON-NLS-1$ } _load_1_0(model, r); return null; } catch (XMLStreamException e) { if (e.getNestedException() != null) { Logger.getLogger(GTDDataXMLTools.class).debug("Parse error.", e.getNestedException()); //$NON-NLS-1$ } else { Logger.getLogger(GTDDataXMLTools.class).debug("Parse error.", e); //$NON-NLS-1$ } throw e; } catch (IOException e) { throw e; } finally { model.setSuspendedForMultipleChanges(false); model.getDataRepository().suspend(false); } }