Example usage for java.io BufferedInputStream mark

Introduction

In this page you can find the example usage for java.io BufferedInputStream mark.

Prototype

public synchronized void mark(int readlimit)

Source Link

Document

See the general contract of the mark method of InputStream.

Usage

From source file:org.apache.nifi.csv.CSVReader.java

@Override
public RecordReader createRecordReader(final Map<String, String> variables, final InputStream in,
        final ComponentLog logger) throws IOException, SchemaNotFoundException {
    // Use Mark/Reset of a BufferedInputStream in case we read from the Input Stream for the header.
    final BufferedInputStream bufferedIn = new BufferedInputStream(in);
    bufferedIn.mark(1024 * 1024);
    final RecordSchema schema = getSchema(variables, new NonCloseableInputStream(bufferedIn), null);
    bufferedIn.reset();//  w ww  . j av a  2s . c  o  m

    return new CSVRecordReader(bufferedIn, logger, schema, csvFormat, firstLineIsHeader, ignoreHeader,
            dateFormat, timeFormat, timestampFormat, charSet);
}

From source file:org.callimachusproject.io.CarInputStream.java

private String detectRdfType(BufferedInputStream in) throws IOException {
    byte[] peek = new byte[200];
    in.mark(200);
    int len = IOUtil.readBytes(in, peek);
    in.reset();//  w  ww . j  a v a 2s  .co  m
    int first = new TextReader(new ByteArrayInputStream(peek, 0, len)).read();
    if (first == '<')
        return "application/rdf+xml";
    return "text/turtle";
}

From source file:fedora.server.rest.RestUtil.java

/**
 * Retrieves the contents of the HTTP Request.
 * @return InputStream from the request/*from   w w w. ja  v  a 2 s  . c  om*/
 */
public RequestContent getRequestContent(HttpServletRequest request, HttpHeaders headers) throws Exception {
    RequestContent rContent = null;

    // See if the request is a multi-part file upload request
    if (ServletFileUpload.isMultipartContent(request)) {

        // Create a new file upload handler
        ServletFileUpload upload = new ServletFileUpload();

        // Parse the request, use the first available File item
        FileItemIterator iter = upload.getItemIterator(request);
        while (iter.hasNext()) {
            FileItemStream item = iter.next();
            if (!item.isFormField()) {
                rContent = new RequestContent();
                rContent.contentStream = item.openStream();
                rContent.mimeType = item.getContentType();

                FileItemHeaders itemHeaders = item.getHeaders();
                if (itemHeaders != null) {
                    String contentLength = itemHeaders.getHeader("Content-Length");
                    if (contentLength != null) {
                        rContent.size = Integer.parseInt(contentLength);
                    }
                }

                break;
            }
        }
    } else {
        // If the content stream was not been found as a multipart,
        // try to use the stream from the request directly
        if (rContent == null) {
            if (request.getContentLength() > 0) {
                rContent = new RequestContent();
                rContent.contentStream = request.getInputStream();
                rContent.size = request.getContentLength();
            } else {
                String transferEncoding = request.getHeader("Transfer-Encoding");
                if (transferEncoding != null && transferEncoding.contains("chunked")) {
                    BufferedInputStream bis = new BufferedInputStream(request.getInputStream());
                    bis.mark(2);
                    if (bis.read() > 0) {
                        bis.reset();
                        rContent = new RequestContent();
                        rContent.contentStream = bis;
                    }
                }
            }
        }
    }

    // Attempt to set the mime type and size if not already set
    if (rContent != null) {
        if (rContent.mimeType == null) {
            MediaType mediaType = headers.getMediaType();
            if (mediaType != null) {
                rContent.mimeType = mediaType.toString();
            }
        }

        if (rContent.size == 0) {
            List<String> lengthHeaders = headers.getRequestHeader("Content-Length");
            if (lengthHeaders != null && lengthHeaders.size() > 0) {
                rContent.size = Integer.parseInt(lengthHeaders.get(0));
            }
        }
    }

    return rContent;
}

From source file:org.codelibs.fess.crawler.extractor.impl.AbstractXmlExtractor.java

protected String getEncoding(final BufferedInputStream bis) {
    final byte[] b = new byte[preloadSizeForCharset];
    try {//w  w  w.ja  v  a  2s.co m
        bis.mark(preloadSizeForCharset);
        final int c = bis.read(b);

        if (c == -1) {
            return encoding;
        }

        final String head = new String(b, 0, c, encoding);
        if (StringUtil.isBlank(head)) {
            return encoding;
        }
        final Matcher matcher = getEncodingPattern().matcher(head);
        if (matcher.find()) {
            final String enc = matcher.group(1);
            if (Charset.isSupported(enc)) {
                return enc;
            }
        }
    } catch (final Exception e) {
        if (logger.isInfoEnabled()) {
            logger.info("Use a default encoding: " + encoding, e);
        }
    } finally {
        try {
            bis.reset();
        } catch (final IOException e) {
            throw new ExtractException(e);
        }
    }

    return encoding;
}

From source file:org.lockss.plugin.usdocspln.gov.gpo.fdsys.GPOFDSysHtmlFilterFactory.java

public InputStream createFilteredInputStream(ArchivalUnit au, InputStream in, String encoding)
        throws PluginException {
    BufferedInputStream bis = new BufferedInputStream(in, 2048);
    bis.mark(2048);
    try {/*from   w ww . ja va  2s.  c  o  m*/
        boolean filter = shouldFilter(bis, encoding);
        bis.reset();
        if (!filter) {
            return bis;
        }
    } catch (IOException ioe) {
        logger.debug("IOException while inspecting document to skip filtering", ioe);
    }

    NodeFilter[] filters = new NodeFilter[] {
            /*
             * Broad area filtering 
             */
            /* Document header */
            // Differences in the presence and order of <meta> tags and spacing of the <title> tag
            HtmlNodeFilters.tag("head"),
            /* Scripts, inline style */
            HtmlNodeFilters.tag("script"), HtmlNodeFilters.tag("noscript"), HtmlNodeFilters.tag("style"),
            /* Header */
            HtmlNodeFilters.tagWithAttributeRegex("div", "id", "top-menu-one"),
            HtmlNodeFilters.tagWithAttributeRegex("div", "id", "top-banner-inside"),
            HtmlNodeFilters.tagWithAttributeRegex("div", "id", "top-menu-two"),
            /* Left column (various cells of a two-column table layout) */
            HtmlNodeFilters.tagWithAttributeRegex("div", "id", "left-menu"),
            HtmlNodeFilters.tagWithAttributeRegex("div", "id", "page-details-left-mask"),
            /* Footer */
            HtmlNodeFilters.tagWithAttributeRegex("div", "id", "footer"),
            /*
             * Main area
             */
            // Seen in the field: "null" in <a href="/fdsys/search/pagedetails.action?null&amp;bread=true">More Information</a>
            HtmlNodeFilters.tagWithAttributeRegex("span", "id", "breadcrumbs"),
            // Whitespace differences inside <div id="page-details-form-mask">
            HtmlNodeFilters.tagWithAttributeRegex("div", "id", "page-details-form-mask"),
            /*
             * Other
             */
            // Variable Struts identifier, sometime in a comment, sometimes not
            HtmlNodeFilters.tagWithAttributeRegex("input", "type", "hidden"), HtmlNodeFilters.comment(),
            // "Email a link to this page" link [probably contained in larger block now]
            HtmlNodeFilters.tagWithAttributeRegex("a", "href",
                    "^search/notificationPage\\.action\\?emailBody="),
            // Session ID from search results [probably contained in larger block now]
            HtmlNodeFilters.tagWithAttributeRegex("form", "action", "jsessionid="),
            // Differs over time in the presence and placement of rel="nofollow"
            HtmlNodeFilters.tagWithAttributeRegex("a", "href", "^delivery/getpackage\\.action\\?packageId="), };

    HtmlTransform xform = new HtmlTransform() {
        @Override
        public NodeList transform(NodeList nodeList) throws IOException {
            try {
                nodeList.visitAllNodesWith(new NodeVisitor() {
                    @Override
                    public void visitTag(Tag tag) {
                        String tagName = tag.getTagName().toLowerCase();
                        if ("a".equals(tagName)) {
                            tag.removeAttribute("onclick"); // Javascript calls changed over time
                        }
                    }
                });
                return nodeList;
            } catch (ParserException pe) {
                throw new IOException("ParserException inside HtmlTransform", pe);
            }
        }
    };

    InputStream prefilteredStream = new HtmlFilterInputStream(bis, // NOTE: this is 'bis', not 'in'
            encoding, new HtmlCompoundTransform(HtmlNodeFilterTransform.exclude(new OrFilter(filters)), xform));

    try {
        Reader filteredReader = new InputStreamReader(prefilteredStream, encoding);
        Reader whitespaceReader = new WhiteSpaceFilter(filteredReader);
        return new ReaderInputStream(whitespaceReader, encoding);
    } catch (UnsupportedEncodingException uee) {
        throw new PluginException(uee);
    }
}

From source file:org.eclipse.xtend.expression.ResourceManagerDefaultImpl.java

/**
 * Creates a Reader for the given InputStream. If no explicit file encoding
 * is set this method will try to autodetect the file's encoding.
 * /*ww w . ja  v a 2  s.  c o  m*/
 * @param in
 *            Some resource input stream
 * @return A Reader for the stream
 * @since 4.2
 */
protected Reader createReader(final InputStream in) {
    Reader reader = null;
    if (fileEncoding != null) {
        try {
            reader = new InputStreamReader(in, fileEncoding);
        } catch (final UnsupportedEncodingException e) {
            log.error("Unsupported encoding falling back to default...", e);
            reader = new InputStreamReader(in);
        }
    } else {
        Charset encoding = null;
        // Buffer the original stream since we want to re-read it
        BufferedInputStream is = new BufferedInputStream(in);

        try {
            // Read some bytes from the stream
            is.mark(65);
            byte[] buf = new byte[64];
            is.read(buf);
            // reset the stream
            is.reset();

            // Special handling for Xpand files on Mac: Try to detect
            // the opening Guillemot bracket for MacRoman encoding
            for (int i = 0; i < buf.length; i++) {
                if (buf[i] == -57) { // opening Guillemot bracket
                    encoding = Charset.forName("MacRoman");
                    break;
                }
            }
            // Use com.ibm.icu for autodetection
            if (encoding == null) {
                CharsetDetector det = new CharsetDetector();
                det.setText(buf);
                CharsetMatch match = det.detect();
                if (match != null) {
                    encoding = Charset.forName(match.getName());
                }
            }

            // Create the reader with the detected encoding
            if (encoding != null) {
                reader = new InputStreamReader(is, encoding);
            } else {
                log.warn("Failed autodetecting encoding. Falling back to default...");
                reader = new InputStreamReader(is);
            }
        } catch (IOException e) {
            log.warn("Failed autodetecting encoding. Falling back to default...", e);
            reader = new InputStreamReader(in);
        }
    }
    return reader;
}

From source file:de.tudarmstadt.ukp.dkpro.core.io.bincas.BinaryCasReader.java

@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
    Resource res = nextFile();//from   w w w . j av a 2s .  c o  m
    InputStream is = null;
    try {
        is = CompressionUtils.getInputStream(res.getLocation(), res.getInputStream());
        BufferedInputStream bis = new BufferedInputStream(is);

        TypeSystemImpl ts = null;

        // Check if this is original UIMA CAS format or DKPro Core format
        bis.mark(10);
        DataInputStream dis = new DataInputStream(bis);
        byte[] dkproHeader = new byte[] { 'D', 'K', 'P', 'r', 'o', '1' };
        byte[] header = new byte[dkproHeader.length];
        dis.read(header);

        // If it is DKPro Core format, read the type system
        if (Arrays.equals(header, dkproHeader)) {
            ObjectInputStream ois = new ObjectInputStream(bis);
            CASMgrSerializer casMgrSerializer = (CASMgrSerializer) ois.readObject();
            ts = casMgrSerializer.getTypeSystem();
            ts.commit();
        } else {
            bis.reset();
        }

        if (ts == null) {
            // Check if this is a UIMA binary CAS stream
            byte[] uimaHeader = new byte[] { 'U', 'I', 'M', 'A' };

            byte[] header4 = new byte[uimaHeader.length];
            System.arraycopy(header, 0, header4, 0, header4.length);

            if (header4[0] != 'U') {
                ArrayUtils.reverse(header4);
            }

            // If it is not a UIMA binary CAS stream, assume it is output from
            // SerializedCasWriter
            if (!Arrays.equals(header4, uimaHeader)) {
                ObjectInputStream ois = new ObjectInputStream(bis);
                CASCompleteSerializer serializer = (CASCompleteSerializer) ois.readObject();
                deserializeCASComplete(serializer, (CASImpl) aCAS);
            } else {
                // Since there was no type system, it must be type 0 or 4
                deserializeCAS(aCAS, bis);
            }
        } else {
            // Only format 6 can have type system information
            deserializeCAS(aCAS, bis, ts, null);
        }
    } catch (ResourceInitializationException e) {
        throw new IOException(e);
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    } finally {
        closeQuietly(is);
    }
}

From source file:org.jasig.schedassist.impl.caldav.xml.ReportResponseHandlerImpl.java

/**
 * Extracts a {@link List} of {@link Calendar}s from the {@link InputStream}, if present.
 * //  ww  w .j  a v  a 2 s  . com
 * @param inputStream
 * @return a never null, but possibly empty {@link List} of {@link Calendar}s from the {@link InputStream}
 * @throws XmlParsingException in the event the stream could not be properly parsed
 */
public List<CalendarWithURI> extractCalendars(InputStream inputStream) {
    List<CalendarWithURI> results = new ArrayList<CalendarWithURI>();
    ByteArrayOutputStream capturedContent = null;
    XMLInputFactory factory = XMLInputFactory.newInstance();
    try {
        InputStream localReference = inputStream;
        if (log.isDebugEnabled()) {
            capturedContent = new ByteArrayOutputStream();
            localReference = new TeeInputStream(inputStream, capturedContent);
        }
        BufferedInputStream buffered = new BufferedInputStream(localReference);
        buffered.mark(1);
        int firstbyte = buffered.read();
        if (-1 == firstbyte) {
            // short circuit on empty stream
            return results;
        }
        buffered.reset();
        XMLStreamReader parser = factory.createXMLStreamReader(buffered);

        String currentUri = null;
        String currentEtag = null;
        for (int eventType = parser.next(); eventType != XMLStreamConstants.END_DOCUMENT; eventType = parser
                .next()) {
            switch (eventType) {
            case XMLStreamConstants.START_ELEMENT:
                QName name = parser.getName();
                if (isWebdavHrefElement(name)) {
                    currentUri = parser.getElementText();
                } else if (isWebdavEtagElement(name)) {
                    currentEtag = parser.getElementText();
                } else if (isCalendarDataElement(name)) {
                    Calendar cal = extractCalendar(parser.getElementText());
                    if (cal != null) {
                        CalendarWithURI withUri = new CalendarWithURI(cal, currentUri, currentEtag);
                        results.add(withUri);
                    } else if (log.isDebugEnabled()) {
                        log.debug("extractCalendar returned null for " + currentUri + ", skipping");
                    }
                }
                break;
            }
        }

        if (log.isDebugEnabled()) {
            log.debug("extracted " + results.size() + " calendar from " + capturedContent.toString());
        }

    } catch (XMLStreamException e) {
        if (capturedContent != null) {
            log.error("caught XMLStreamException in extractCalendars, captured content: "
                    + capturedContent.toString(), e);
        } else {
            log.error("caught XMLStreamException in extractCalendars, no captured content available", e);
        }
        throw new XmlParsingException("caught XMLStreamException in extractCalendars", e);
    } catch (IOException e) {
        log.error("caught IOException in extractCalendars", e);
        throw new XmlParsingException("caught IOException in extractCalendars", e);
    }

    return results;
}

From source file:org.callimachusproject.io.CarInputStream.java

private boolean scanForClass(BufferedInputStream in, String type) throws IOException {
    assert type != null;
    byte[] peek = new byte[RDFS_PEEK_SIZE];
    in.mark(RDFS_PEEK_SIZE);
    int len = IOUtil.readBytes(in, peek);
    in.reset();// www .j  av a2 s.  c o m
    URI uri = new URIImpl("http://example.com/" + entry.getName());
    LinkedHashModel model = new LinkedHashModel();
    try {
        RDFParserRegistry registry = org.openrdf.rio.RDFParserRegistry.getInstance();
        RDFParser parser = registry.get(registry.getFileFormatForMIMEType(type)).getParser();
        parser.setRDFHandler(new StatementCollector(model));
        parser.parse(new ByteArrayInputStream(peek, 0, len), uri.toString());
    } catch (RDFParseException e) {
        // ignore
    } catch (RDFHandlerException e) {
        // ignore
    }
    return model.contains(uri, RDF.TYPE, OWL.ONTOLOGY) || model.contains(uri, RDF.TYPE, OWL.CLASS)
            || model.contains(uri, RDF.TYPE, OWL.OBJECTPROPERTY)
            || model.contains(uri, RDF.TYPE, OWL.DATATYPEPROPERTY)
            || model.contains(uri, RDF.TYPE, OWL.FUNCTIONALPROPERTY)
            || model.contains(uri, RDF.TYPE, RDFS.CLASS) || model.contains(uri, RDF.TYPE, RDF.PROPERTY);
}

From source file:org.gtdfree.model.GTDDataXMLTools.java

static public DataHeader load(GTDModel model, InputStream in) throws XMLStreamException, IOException {

    model.setSuspendedForMultipleChanges(true);
    model.getDataRepository().suspend(true);

    XMLStreamReader r;//from  w  w  w  .  ja v a 2  s  .  co  m
    try {

        // buffer size is same as default in 1.6, we explicitly request it so, not to brake if defaut changes.
        BufferedInputStream bin = new BufferedInputStream(in, 8192);
        bin.mark(8191);

        Reader rr = new InputStreamReader(bin);
        CharBuffer b = CharBuffer.allocate(96);
        rr.read(b);
        b.position(0);
        //System.out.println(b);
        Pattern pattern = Pattern.compile("<\\?.*?encoding\\s*?=.*?\\?>", Pattern.CASE_INSENSITIVE); //$NON-NLS-1$
        Matcher matcher = pattern.matcher(b);

        // reset back to start of file
        bin.reset();

        // we check if encoding is defined in xml, by the book encoding on r should be null if not defined in xml,
        // but in reality it can be arbitrary if not defined in xml. So we have to check ourselves.
        if (matcher.find()) {
            //System.out.println(matcher);
            // if defined, then XML parser will pick it up and use it
            r = XMLInputFactory.newInstance().createXMLStreamReader(bin);
            Logger.getLogger(GTDDataXMLTools.class).info("XML declared encoding: " + r.getEncoding() //$NON-NLS-1$
                    + ", system default encoding: " + Charset.defaultCharset()); //$NON-NLS-1$
        } else {
            //System.out.println(matcher);
            // if not defined, then we assume it is generated by gtd-free version 0.4 or some local editor,
            // so we assume system default encoding.
            r = XMLInputFactory.newInstance().createXMLStreamReader(new InputStreamReader(bin));
            Logger.getLogger(GTDDataXMLTools.class)
                    .info("XML assumed system default encoding: " + Charset.defaultCharset()); //$NON-NLS-1$
        }

        r.nextTag();
        if ("gtd-data".equals(r.getLocalName())) { //$NON-NLS-1$
            DataHeader dh = new DataHeader(null, r.getAttributeValue(null, "version"), //$NON-NLS-1$
                    r.getAttributeValue(null, "modified")); //$NON-NLS-1$
            if (dh.version != null) {
                if (dh.version.equals("2.0")) { //$NON-NLS-1$
                    r.nextTag();
                    _load_2_0(model, r);
                    return dh;
                }
            }
            String s = r.getAttributeValue(null, "lastActionID"); //$NON-NLS-1$
            if (s != null) {
                try {
                    model.setLastActionID(Integer.parseInt(s));
                } catch (Exception e) {
                    Logger.getLogger(GTDDataXMLTools.class).debug("Internal error.", e); //$NON-NLS-1$
                }
            }
            if (dh.version != null) {
                if (dh.version.equals("2.1")) { //$NON-NLS-1$
                    r.nextTag();
                    _load_2_1(model, r);
                    return dh;

                }
                if (dh.version.startsWith("2.2")) { //$NON-NLS-1$
                    r.nextTag();
                    _load_2_2(model, r);
                    return dh;
                }
            }
            throw new IOException("XML gtd-free data with version number " + dh.version //$NON-NLS-1$
                    + " can not be imported. Data version is newer then supported versions. Update your GTD-Free application to latest version."); //$NON-NLS-1$
        }

        _load_1_0(model, r);

        return null;

    } catch (XMLStreamException e) {
        if (e.getNestedException() != null) {
            Logger.getLogger(GTDDataXMLTools.class).debug("Parse error.", e.getNestedException()); //$NON-NLS-1$
        } else {
            Logger.getLogger(GTDDataXMLTools.class).debug("Parse error.", e); //$NON-NLS-1$
        }
        throw e;
    } catch (IOException e) {
        throw e;
    } finally {
        model.setSuspendedForMultipleChanges(false);
        model.getDataRepository().suspend(false);
    }

}