Example usage for java.io PushbackInputStream read

List of usage examples for java.io PushbackInputStream read

Introduction

In this page you can find the example usage for java.io PushbackInputStream read.

Prototype

public int read(byte[] b, int off, int len) throws IOException 

Source Link

Document

Reads up to len bytes of data from this input stream into an array of bytes.

Usage

From source file:Main.java

public static void main(String[] args) {

    byte[] arrByte = new byte[1024];

    byte[] byteArray = new byte[] { 'j', 'a', 'v', 'a', '2', 's', '.', 'c', 'o', 'm' };

    // create object of PushbackInputStream class for specified stream
    InputStream is = new ByteArrayInputStream(byteArray);
    PushbackInputStream pis = new PushbackInputStream(is);

    try {//from w  w  w. j a va 2s .co  m

        // read a char into our array
        pis.read(arrByte, 0, 3);

        // print arrByte
        for (int i = 0; i < 3; i++) {
            System.out.println((char) arrByte[i]);
        }

    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:Main.java

/**
 * Creates a reader allowing to read the contents of specified text source.
 * <p>This method implements the detection of the encoding.
 * <p>Note that the detection of the encoding always works 
 * because it uses a fallback value./*w ww . j  a va 2 s.c o  m*/
 *
 * @param in the text source
 * @param encoding the detected encoding is copied there.
 * May be <code>null</code>.
 * @return a reader allowing to read the contents of the text source.
 * This reader will automatically skip the BOM if any.
 * @exception IOException if there is an I/O problem
 */
public static Reader createReader(InputStream in, String fallbackEncoding, String[] encoding)
        throws IOException {
    byte[] bytes = new byte[1024];
    int byteCount = -1;

    PushbackInputStream in2 = new PushbackInputStream(in, bytes.length);
    try {
        int count = in2.read(bytes, 0, bytes.length);
        if (count > 0) {
            in2.unread(bytes, 0, count);
        }
        byteCount = count;
    } catch (IOException ignored) {
    }

    String charset = null;

    if (byteCount > 0) {
        if (byteCount >= 2) {
            // Use BOM ---

            int b0 = (bytes[0] & 0xFF);
            int b1 = (bytes[1] & 0xFF);

            switch ((b0 << 8) | b1) {
            case 0xFEFF:
                charset = "UTF-16BE";
                // We don't want to read the BOM.
                in2.skip(2);
                break;
            case 0xFFFE:
                charset = "UTF-16LE";
                in2.skip(2);
                break;
            case 0xEFBB:
                if (byteCount >= 3 && (bytes[2] & 0xFF) == 0xBF) {
                    charset = "UTF-8";
                    in2.skip(3);
                }
                break;
            }
        }

        if (charset == null) {
            // Unsupported characters are replaced by U+FFFD.
            String text = new String(bytes, 0, byteCount, "US-ASCII");

            if (text.startsWith("<?xml")) {
                Pattern pattern = Pattern.compile("encoding\\s*=\\s*['\"]([^'\"]+)");
                Matcher matcher = pattern.matcher(text);
                if (matcher.find()) {
                    charset = matcher.group(1);
                } else {
                    charset = "UTF-8";
                }
            }
        }
    }

    if (charset == null) {
        charset = fallbackEncoding;
        if (charset == null) {
            charset = "UTF-8";
        }
    }

    if (encoding != null) {
        encoding[0] = charset;
    }
    return new InputStreamReader(in2, charset);
}

From source file:XmlReader.java

private void useEncodingDecl(PushbackInputStream pb, String encoding) throws IOException {
    byte buffer[] = new byte[MAXPUSHBACK];
    int len;//  w  w  w . j a  v a2 s  .c om
    Reader r;
    int c;

    //
    // Buffer up a bunch of input, and set up to read it in
    // the specified encoding ... we can skip the first four
    // bytes since we know that "<?xm" was read to determine
    // what encoding to use!
    //
    len = pb.read(buffer, 0, buffer.length);
    pb.unread(buffer, 0, len);
    r = new InputStreamReader(new ByteArrayInputStream(buffer, 4, len), encoding);

    //
    // Next must be "l" (and whitespace) else we conclude
    // error and choose UTF-8.
    //
    if ((c = r.read()) != 'l') {
        setEncoding(pb, "UTF-8");
        return;
    }

    //
    // Then, we'll skip any
    //  S version="..."   [or single quotes]
    // bit and get any subsequent 
    //  S encoding="..."  [or single quotes]
    //
    // We put an arbitrary size limit on how far we read; lots
    // of space will break this algorithm.
    //
    StringBuffer buf = new StringBuffer();
    StringBuffer keyBuf = null;
    String key = null;
    boolean sawEq = false;
    char quoteChar = 0;
    boolean sawQuestion = false;

    XmlDecl: for (int i = 0; i < MAXPUSHBACK - 5; ++i) {
        if ((c = r.read()) == -1)
            break;

        // ignore whitespace before/between "key = 'value'"
        if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
            continue;

        // ... but require at least a little!
        if (i == 0)
            break;

        // terminate the loop ASAP
        if (c == '?')
            sawQuestion = true;
        else if (sawQuestion) {
            if (c == '>')
                break;
            sawQuestion = false;
        }

        // did we get the "key =" bit yet?
        if (key == null || !sawEq) {
            if (keyBuf == null) {
                if (Character.isWhitespace((char) c))
                    continue;
                keyBuf = buf;
                buf.setLength(0);
                buf.append((char) c);
                sawEq = false;
            } else if (Character.isWhitespace((char) c)) {
                key = keyBuf.toString();
            } else if (c == '=') {
                if (key == null)
                    key = keyBuf.toString();
                sawEq = true;
                keyBuf = null;
                quoteChar = 0;
            } else
                keyBuf.append((char) c);
            continue;
        }

        // space before quoted value
        if (Character.isWhitespace((char) c))
            continue;
        if (c == '"' || c == '\'') {
            if (quoteChar == 0) {
                quoteChar = (char) c;
                buf.setLength(0);
                continue;
            } else if (c == quoteChar) {
                if ("encoding".equals(key)) {
                    assignedEncoding = buf.toString();

                    // [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')*
                    for (i = 0; i < assignedEncoding.length(); i++) {
                        c = assignedEncoding.charAt(i);
                        if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
                            continue;
                        if (i == 0)
                            break XmlDecl;
                        if (i > 0 && (c == '-' || (c >= '0' && c <= '9') || c == '.' || c == '_'))
                            continue;
                        // map illegal names to UTF-8 default
                        break XmlDecl;
                    }

                    setEncoding(pb, assignedEncoding);
                    return;

                } else {
                    key = null;
                    continue;
                }
            }
        }
        buf.append((char) c);
    }

    setEncoding(pb, "UTF-8");
}

From source file:com.digitalpebble.storm.crawler.protocol.http.HttpResponse.java

private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException {
    boolean doneChunks = false;
    int contentBytesRead = 0;
    byte[] bytes = new byte[HttpProtocol.BUFFER_SIZE];
    ByteArrayOutputStream out = new ByteArrayOutputStream(HttpProtocol.BUFFER_SIZE);

    while (!doneChunks) {
        if (HttpProtocol.LOGGER.isTraceEnabled()) {
            HttpProtocol.LOGGER.trace("Http: starting chunk");
        }/*  ww w . java  2s. co  m*/

        readLine(in, line, false);

        String chunkLenStr;
        // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line +
        // "'"); }

        int pos = line.indexOf(";");
        if (pos < 0) {
            chunkLenStr = line.toString();
        } else {
            chunkLenStr = line.substring(0, pos);
            // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " +
            // line.substring(pos+1)); }
        }
        chunkLenStr = chunkLenStr.trim();
        int chunkLen;
        try {
            chunkLen = Integer.parseInt(chunkLenStr, 16);
        } catch (NumberFormatException e) {
            throw new HttpException("bad chunk length: " + line.toString());
        }

        if (chunkLen == 0) {
            doneChunks = true;
            break;
        }

        if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent())
            chunkLen = http.getMaxContent() - contentBytesRead;

        // read one chunk
        int chunkBytesRead = 0;
        while (chunkBytesRead < chunkLen) {

            int toRead = (chunkLen - chunkBytesRead) < HttpProtocol.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
                    : HttpProtocol.BUFFER_SIZE;
            int len = in.read(bytes, 0, toRead);

            if (len == -1)
                throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks"
                        + " and " + chunkBytesRead + " in current chunk");

            // DANGER!!! Will printed GZIPed stuff right to your
            // terminal!
            // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new
            // String(bytes, 0, len)); }

            out.write(bytes, 0, len);
            chunkBytesRead += len;
        }

        readLine(in, line, false);

    }

    if (!doneChunks) {
        if (contentBytesRead != http.getMaxContent())
            throw new HttpException("chunk eof: !doneChunk && didn't max out");
        return;
    }

    content = out.toByteArray();
    parseHeaders(in, line);

}

From source file:org.adl.parsers.dom.ADLDOMParser.java

/**
 * Sets up the file source for the test subject file.
 *
 * @param iFileName file to setup input source for.
 *
 * @return InputSource// w  ww  .  j a  v a 2 s.c  o  m
 */
private InputSource setupFileSource(String iFileName) {
    log.debug("setupFileSource()");
    String msgText;
    boolean defaultEncoding = true;
    String encoding = null;
    PushbackInputStream inputStream;
    FileInputStream inFile;

    try {
        File xmlFile = new File(iFileName);
        log.debug(xmlFile.getAbsolutePath());

        if (xmlFile.isFile()) {
            InputSource is = null;

            defaultEncoding = true;
            if (xmlFile.length() > 1) {
                inFile = new FileInputStream(xmlFile);
                inputStream = new PushbackInputStream(inFile, 4);

                // Reads the initial 4 bytes of the file to check for a Byte
                // Order Mark and determine the encoding

                byte bom[] = new byte[4];
                int n, pushBack;
                n = inputStream.read(bom, 0, bom.length);

                // UTF-8 Encoded
                if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
                    encoding = "UTF-8";
                    defaultEncoding = false;
                    pushBack = n - 3;
                }
                // UTF-16 Big Endian Encoded
                else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
                    encoding = "UTF-16BE";
                    defaultEncoding = false;
                    pushBack = n - 2;
                }
                // UTF-16 Little Endian Encoded               
                else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
                    encoding = "UTF-16LE";
                    defaultEncoding = false;
                    pushBack = n - 2;
                }
                // Default encoding
                else {
                    // Unicode BOM mark not found, unread all bytes                  
                    pushBack = n;
                }

                // Place any non-BOM bytes back into the stream
                if (pushBack > 0) {
                    inputStream.unread(bom, (n - pushBack), pushBack);
                }

                if (defaultEncoding == true) { //Reads in ASCII file.
                    FileReader fr = new FileReader(xmlFile);
                    is = new InputSource(fr);
                }
                // Reads the file in the determined encoding
                else {
                    //Creates a buffer with the size of the xml encoded file
                    BufferedReader inStream = new BufferedReader(new InputStreamReader(inputStream, encoding));
                    StringBuffer dataString = new StringBuffer();
                    String s = "";

                    //Builds the encoded file to be parsed
                    while ((s = inStream.readLine()) != null) {
                        dataString.append(s);
                    }

                    inStream.close();
                    inputStream.close();
                    inFile.close();
                    is = new InputSource(new StringReader(dataString.toString()));
                    is.setEncoding(encoding);
                }
            }
            return is;
        } else if ((iFileName.length() > 6)
                && (iFileName.substring(0, 5).equals("http:") || iFileName.substring(0, 6).equals("https:"))) {
            URL xmlURL = new URL(iFileName);
            InputStream xmlIS = xmlURL.openStream();
            InputSource is = new InputSource(xmlIS);
            return is;
        } else {
            msgText = "XML File: " + iFileName + " is not a file or URL";
            log.error(msgText);
        }
    } catch (NullPointerException npe) {
        msgText = "Null pointer exception" + npe;
        log.error(msgText);
    } catch (SecurityException se) {
        msgText = "Security Exception" + se;
        log.error(msgText);
    } catch (FileNotFoundException fnfe) {
        msgText = "File Not Found Exception" + fnfe;
        log.error(msgText);
    } catch (Exception e) {
        msgText = "General Exception" + e;
        log.error(msgText);
    }

    log.debug("setUpFileSource()");

    return new InputSource();
}

From source file:org.apache.axis2.builder.BuilderUtil.java

/**
 * Use the BOM Mark to identify the encoding to be used. Fall back to default encoding
 * specified/*from  w w w . j  a  v  a2 s . c  o m*/
 *
 * @param is2             PushBackInputStream (it must be a pushback input stream so that we can
 *                        unread the BOM)
 * @param defaultEncoding default encoding style if no BOM
 * @return the selected character set encoding
 * @throws java.io.IOException
 */
public static String getCharSetEncoding(PushbackInputStream is2, String defaultEncoding) throws IOException {
    String encoding;
    byte bom[] = new byte[BOM_SIZE];
    int n, unread;

    n = is2.read(bom, 0, bom.length);

    if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
        encoding = "UTF-8";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 3;
    } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
        encoding = "UTF-16BE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 2;
    } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
        encoding = "UTF-16LE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 2;
    } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE)
            && (bom[3] == (byte) 0xFF)) {
        encoding = "UTF-32BE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 4;
    } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00)
            && (bom[3] == (byte) 0x00)) {
        encoding = "UTF-32LE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 4;
    } else {

        // Unicode BOM mark not found, unread all bytes
        encoding = defaultEncoding;
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from default =" + encoding);
        }
        unread = n;
    }

    if (unread > 0) {
        is2.unread(bom, (n - unread), unread);
    }
    return encoding;
}

From source file:org.apache.cocoon.components.flow.javascript.fom.FOM_JavaScriptInterpreter.java

/**
 * Find the encoding of the stream, or null if not specified
 *//* ww  w. ja v  a2s  .c o  m*/
String findEncoding(PushbackInputStream is) throws IOException {
    // Read some bytes
    byte[] buffer = new byte[ENCODING_BUF_SIZE];
    int len = is.read(buffer, 0, buffer.length);
    // and push them back
    is.unread(buffer, 0, len);

    // Interpret them as an ASCII string
    String str = new String(buffer, 0, len, "ASCII");
    RE re = new RE(encodingRE);
    if (re.match(str)) {
        return re.getParen(1);
    }
    return null;
}

From source file:org.apache.nutch.protocol.htmlunit.HttpResponse.java

/**
 * /*from   ww  w .  j a  va  2s . c om*/
 * @param in
 * @param line
 * @throws HttpException
 * @throws IOException
 */
@SuppressWarnings("unused")
private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException {
    boolean doneChunks = false;
    int contentBytesRead = 0;
    byte[] bytes = new byte[Http.BUFFER_SIZE];
    ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);

    while (!doneChunks) {
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace("Http: starting chunk");
        }

        readLine(in, line, false);

        String chunkLenStr;
        // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); }

        int pos = line.indexOf(";");
        if (pos < 0) {
            chunkLenStr = line.toString();
        } else {
            chunkLenStr = line.substring(0, pos);
            // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + line.substring(pos+1)); }
        }
        chunkLenStr = chunkLenStr.trim();
        int chunkLen;
        try {
            chunkLen = Integer.parseInt(chunkLenStr, 16);
        } catch (NumberFormatException e) {
            throw new HttpException("bad chunk length: " + line.toString());
        }

        if (chunkLen == 0) {
            doneChunks = true;
            break;
        }

        if ((contentBytesRead + chunkLen) > http.getMaxContent())
            chunkLen = http.getMaxContent() - contentBytesRead;

        // read one chunk
        int chunkBytesRead = 0;
        while (chunkBytesRead < chunkLen) {

            int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
                    : Http.BUFFER_SIZE;
            int len = in.read(bytes, 0, toRead);

            if (len == -1)
                throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks"
                        + " and " + chunkBytesRead + " in current chunk");

            // DANGER!!! Will printed GZIPed stuff right to your
            // terminal!
            // if (LOG.isTraceEnabled()) { LOG.trace("read: " +  new String(bytes, 0, len)); }

            out.write(bytes, 0, len);
            chunkBytesRead += len;
        }

        readLine(in, line, false);

    }

    if (!doneChunks) {
        if (contentBytesRead != http.getMaxContent())
            throw new HttpException("chunk eof: !doneChunk && didn't max out");
        return;
    }

    content = out.toByteArray();
    parseHeaders(in, line);

}

From source file:org.apache.nutch.protocol.http.HttpResponse.java

/**
 * @param in/*from ww w .j  a v  a 2s  .  c om*/
 * @param line
 * @throws HttpException
 * @throws IOException
 */
private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException {
    boolean doneChunks = false;
    int contentBytesRead = 0;
    byte[] bytes = new byte[Http.BUFFER_SIZE];
    ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);

    while (!doneChunks) {
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace("Http: starting chunk");
        }

        readLine(in, line, false);

        String chunkLenStr;
        // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'");
        // }

        int pos = line.indexOf(";");
        if (pos < 0) {
            chunkLenStr = line.toString();
        } else {
            chunkLenStr = line.substring(0, pos);
            // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " +
            // line.substring(pos+1)); }
        }
        chunkLenStr = chunkLenStr.trim();
        int chunkLen;
        try {
            chunkLen = Integer.parseInt(chunkLenStr, 16);
        } catch (NumberFormatException e) {
            throw new HttpException("bad chunk length: " + line.toString());
        }

        if (chunkLen == 0) {
            doneChunks = true;
            break;
        }

        if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent())
            chunkLen = http.getMaxContent() - contentBytesRead;

        // read one chunk
        int chunkBytesRead = 0;
        while (chunkBytesRead < chunkLen) {

            int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
                    : Http.BUFFER_SIZE;
            int len = in.read(bytes, 0, toRead);

            if (len == -1)
                throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks"
                        + " and " + chunkBytesRead + " in current chunk");

            // DANGER!!! Will printed GZIPed stuff right to your
            // terminal!
            // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0,
            // len)); }

            out.write(bytes, 0, len);
            chunkBytesRead += len;
        }

        readLine(in, line, false);

    }

    if (!doneChunks) {
        if (contentBytesRead != http.getMaxContent())
            throw new HttpException("chunk eof: !doneChunk && didn't max out");
        return;
    }

    content = out.toByteArray();
    parseHeaders(in, line, null);

}

From source file:org.apache.nutch.protocol.s2jh.HttpResponse.java

/**
 * /*from w  w  w. j ava  2s .co  m*/
 * @param in
 * @param line
 * @throws HttpException
 * @throws IOException
 */
@SuppressWarnings("unused")
private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException {
    boolean doneChunks = false;
    int contentBytesRead = 0;
    byte[] bytes = new byte[Http.BUFFER_SIZE];
    ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE);

    while (!doneChunks) {
        if (Http.LOG.isTraceEnabled()) {
            Http.LOG.trace("Http: starting chunk");
        }

        readLine(in, line, false);

        String chunkLenStr;
        // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'");
        // }

        int pos = line.indexOf(";");
        if (pos < 0) {
            chunkLenStr = line.toString();
        } else {
            chunkLenStr = line.substring(0, pos);
            // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " +
            // line.substring(pos+1)); }
        }
        chunkLenStr = chunkLenStr.trim();
        int chunkLen;
        try {
            chunkLen = Integer.parseInt(chunkLenStr, 16);
        } catch (NumberFormatException e) {
            throw new HttpException("bad chunk length: " + line.toString());
        }

        if (chunkLen == 0) {
            doneChunks = true;
            break;
        }

        if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent())
            chunkLen = http.getMaxContent() - contentBytesRead;

        // read one chunk
        int chunkBytesRead = 0;
        while (chunkBytesRead < chunkLen) {

            int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead)
                    : Http.BUFFER_SIZE;
            int len = in.read(bytes, 0, toRead);

            if (len == -1)
                throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks"
                        + " and " + chunkBytesRead + " in current chunk");

            // DANGER!!! Will printed GZIPed stuff right to your
            // terminal!
            // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0,
            // len)); }

            out.write(bytes, 0, len);
            chunkBytesRead += len;
        }

        readLine(in, line, false);
    }

    if (!doneChunks) {
        if (contentBytesRead != http.getMaxContent())
            throw new HttpException("chunk eof: !doneChunk && didn't max out");
        return;
    }

    content = out.toByteArray();
    parseHeaders(in, line);

}