Example usage for java.io PushbackInputStream unread

Introduction

In this page you can find the example usage for java.io PushbackInputStream unread.

Prototype

public void unread(byte[] b, int off, int len) throws IOException

Source Link

Document

Pushes back a portion of an array of bytes by copying it to the front of the pushback buffer.

Usage

From source file:XmlReader.java

private XmlReader(InputStream stream) throws IOException {
    super(stream);

    PushbackInputStream pb;
    byte buf[];//from   w ww .j a va 2 s.  c  o m
    int len;

    /*if (stream instanceof PushbackInputStream)
        pb = (PushbackInputStream) stream;
    else*/
    /**
     * Commented out the above code to make sure it works when the
     * document is accessed using http. URL connection in the code uses
     * a PushbackInputStream with size 7 and when we try to push back
     * MAX which default value is set to 512 we get and exception. So
     * that's why we need to wrap the stream irrespective of what type
     * of stream we start off with.
     */
    pb = new PushbackInputStream(stream, MAXPUSHBACK);

    //
    // See if we can figure out the character encoding used
    // in this file by peeking at the first few bytes.
    //
    buf = new byte[4];
    len = pb.read(buf);
    if (len > 0)
        pb.unread(buf, 0, len);

    if (len == 4)
        switch (buf[0] & 0x0ff) {
        case 0:
            // 00 3c 00 3f == illegal UTF-16 big-endian
            if (buf[1] == 0x3c && buf[2] == 0x00 && buf[3] == 0x3f) {
                setEncoding(pb, "UnicodeBig");
                return;
            }
            // else it's probably UCS-4
            break;

        case '<': // 0x3c: the most common cases!
            switch (buf[1] & 0x0ff) {
            // First character is '<'; could be XML without
            // an XML directive such as "<hello>", "<!-- ...",
            // and so on.
            default:
                break;

            // 3c 00 3f 00 == illegal UTF-16 little endian
            case 0x00:
                if (buf[2] == 0x3f && buf[3] == 0x00) {
                    setEncoding(pb, "UnicodeLittle");
                    return;
                }
                // else probably UCS-4
                break;

            // 3c 3f 78 6d == ASCII and supersets '<?xm'
            case '?':
                if (buf[2] != 'x' || buf[3] != 'm')
                    break;
                //
                // One of several encodings could be used:
                // Shift-JIS, ASCII, UTF-8, ISO-8859-*, etc
                //
                useEncodingDecl(pb, "UTF8");
                return;
            }
            break;

        // 4c 6f a7 94 ... some EBCDIC code page
        case 0x4c:
            if (buf[1] == 0x6f && (0x0ff & buf[2]) == 0x0a7 && (0x0ff & buf[3]) == 0x094) {
                useEncodingDecl(pb, "CP037");
                return;
            }
            // whoops, treat as UTF-8
            break;

        // UTF-16 big-endian
        case 0xfe:
            if ((buf[1] & 0x0ff) != 0xff)
                break;
            setEncoding(pb, "UTF-16");
            return;

        // UTF-16 little-endian
        case 0xff:
            if ((buf[1] & 0x0ff) != 0xfe)
                break;
            setEncoding(pb, "UTF-16");
            return;

        // default ... no XML declaration
        default:
            break;
        }

    //
    // If all else fails, assume XML without a declaration, and
    // using UTF-8 encoding.
    //
    setEncoding(pb, "UTF-8");
}

From source file:org.adl.parsers.dom.ADLDOMParser.java

/**
 * Sets up the file source for the test subject file.
 *
 * @param iFileName file to setup input source for.
 *
 * @return InputSource/*from  w w w .  java 2  s .c  om*/
 */
private InputSource setupFileSource(String iFileName) {
    log.debug("setupFileSource()");
    String msgText;
    boolean defaultEncoding = true;
    String encoding = null;
    PushbackInputStream inputStream;
    FileInputStream inFile;

    try {
        File xmlFile = new File(iFileName);
        log.debug(xmlFile.getAbsolutePath());

        if (xmlFile.isFile()) {
            InputSource is = null;

            defaultEncoding = true;
            if (xmlFile.length() > 1) {
                inFile = new FileInputStream(xmlFile);
                inputStream = new PushbackInputStream(inFile, 4);

                // Reads the initial 4 bytes of the file to check for a Byte
                // Order Mark and determine the encoding

                byte bom[] = new byte[4];
                int n, pushBack;
                n = inputStream.read(bom, 0, bom.length);

                // UTF-8 Encoded
                if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
                    encoding = "UTF-8";
                    defaultEncoding = false;
                    pushBack = n - 3;
                }
                // UTF-16 Big Endian Encoded
                else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
                    encoding = "UTF-16BE";
                    defaultEncoding = false;
                    pushBack = n - 2;
                }
                // UTF-16 Little Endian Encoded               
                else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
                    encoding = "UTF-16LE";
                    defaultEncoding = false;
                    pushBack = n - 2;
                }
                // Default encoding
                else {
                    // Unicode BOM mark not found, unread all bytes                  
                    pushBack = n;
                }

                // Place any non-BOM bytes back into the stream
                if (pushBack > 0) {
                    inputStream.unread(bom, (n - pushBack), pushBack);
                }

                if (defaultEncoding == true) { //Reads in ASCII file.
                    FileReader fr = new FileReader(xmlFile);
                    is = new InputSource(fr);
                }
                // Reads the file in the determined encoding
                else {
                    //Creates a buffer with the size of the xml encoded file
                    BufferedReader inStream = new BufferedReader(new InputStreamReader(inputStream, encoding));
                    StringBuffer dataString = new StringBuffer();
                    String s = "";

                    //Builds the encoded file to be parsed
                    while ((s = inStream.readLine()) != null) {
                        dataString.append(s);
                    }

                    inStream.close();
                    inputStream.close();
                    inFile.close();
                    is = new InputSource(new StringReader(dataString.toString()));
                    is.setEncoding(encoding);
                }
            }
            return is;
        } else if ((iFileName.length() > 6)
                && (iFileName.substring(0, 5).equals("http:") || iFileName.substring(0, 6).equals("https:"))) {
            URL xmlURL = new URL(iFileName);
            InputStream xmlIS = xmlURL.openStream();
            InputSource is = new InputSource(xmlIS);
            return is;
        } else {
            msgText = "XML File: " + iFileName + " is not a file or URL";
            log.error(msgText);
        }
    } catch (NullPointerException npe) {
        msgText = "Null pointer exception" + npe;
        log.error(msgText);
    } catch (SecurityException se) {
        msgText = "Security Exception" + se;
        log.error(msgText);
    } catch (FileNotFoundException fnfe) {
        msgText = "File Not Found Exception" + fnfe;
        log.error(msgText);
    } catch (Exception e) {
        msgText = "General Exception" + e;
        log.error(msgText);
    }

    log.debug("setUpFileSource()");

    return new InputSource();
}

From source file:org.apache.axis2.builder.BuilderUtil.java

/**
 * Use the BOM Mark to identify the encoding to be used. Fall back to default encoding
 * specified//  w w  w  . j a  v  a 2s. c  om
 *
 * @param is2             PushBackInputStream (it must be a pushback input stream so that we can
 *                        unread the BOM)
 * @param defaultEncoding default encoding style if no BOM
 * @return the selected character set encoding
 * @throws java.io.IOException
 */
public static String getCharSetEncoding(PushbackInputStream is2, String defaultEncoding) throws IOException {
    String encoding;
    byte bom[] = new byte[BOM_SIZE];
    int n, unread;

    n = is2.read(bom, 0, bom.length);

    if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
        encoding = "UTF-8";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 3;
    } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
        encoding = "UTF-16BE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 2;
    } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
        encoding = "UTF-16LE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 2;
    } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE)
            && (bom[3] == (byte) 0xFF)) {
        encoding = "UTF-32BE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 4;
    } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00)
            && (bom[3] == (byte) 0x00)) {
        encoding = "UTF-32LE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 4;
    } else {

        // Unicode BOM mark not found, unread all bytes
        encoding = defaultEncoding;
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from default =" + encoding);
        }
        unread = n;
    }

    if (unread > 0) {
        is2.unread(bom, (n - unread), unread);
    }
    return encoding;
}

From source file:org.apache.cocoon.components.flow.javascript.fom.FOM_JavaScriptInterpreter.java

/**
 * Find the encoding of the stream, or null if not specified
 *//*from w w w. ja v  a  2  s . c om*/
String findEncoding(PushbackInputStream is) throws IOException {
    // Read some bytes
    byte[] buffer = new byte[ENCODING_BUF_SIZE];
    int len = is.read(buffer, 0, buffer.length);
    // and push them back
    is.unread(buffer, 0, len);

    // Interpret them as an ASCII string
    String str = new String(buffer, 0, len, "ASCII");
    RE re = new RE(encodingRE);
    if (re.match(str)) {
        return re.getParen(1);
    }
    return null;
}

From source file:org.apache.pulsar.io.file.utils.GZipFiles.java

/**
 * Returns true if the given file is a gzip file.
 *//*from   ww w  .  ja v a2  s .c o m*/
@SuppressWarnings("deprecation")
public static boolean isGzip(File f) {

    InputStream input = null;
    try {
        input = new FileInputStream(f);
        PushbackInputStream pb = new PushbackInputStream(input, 2);
        byte[] signature = new byte[2];
        int len = pb.read(signature); //read the signature
        pb.unread(signature, 0, len); //push back the signature to the stream
        // check if matches standard gzip magic number
        return (signature[0] == (byte) 0x1f && signature[1] == (byte) 0x8b);
    } catch (final Exception e) {
        return false;
    } finally {
        IOUtils.closeQuietly(input);
    }
}

From source file:org.commoncrawl.hadoop.io.deprecated.ArcFileReader.java

private void readTrailer() throws IOException {

    PushbackInputStream in = (PushbackInputStream) this.in;

    int n = inf.getRemaining();

    if (n > 0) {
        in.unread(buf, len - n, n);
    }//from  ww  w.j  a v  a 2  s . c o m
    // Uses left-to-right evaluation order
    if ((readUInt(in) != _crc.getValue()) ||
    // rfc1952; ISIZE is the input size modulo 2^32
            (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL)))
        throw new IOException("Corrupt GZIP trailer");
}

From source file:org.deegree.framework.xml.XMLFragment.java

/**
 * reads the encoding of a XML document from its header. If no header available
 * <code>CharsetUtils.getSystemCharset()</code> will be returned
 * //from  w ww.  ja v  a  2s . co m
 * @param pbis
 * @return encoding of a XML document
 * @throws IOException
 */
private String readEncoding(PushbackInputStream pbis) throws IOException {
    byte[] b = new byte[80];
    String s = "";
    int rd = 0;

    LinkedList<byte[]> bs = new LinkedList<byte[]>();
    LinkedList<Integer> rds = new LinkedList<Integer>();
    while (rd < 80) {
        rds.addFirst(pbis.read(b));
        if (rds.peek() == -1) {
            rds.poll();
            break;
        }
        rd += rds.peek();
        s += new String(b, 0, rds.peek()).toLowerCase();
        bs.addFirst(b);
        b = new byte[80];
    }

    String encoding = CharsetUtils.getSystemCharset();
    if (s.indexOf("?>") > -1) {
        int p = s.indexOf("encoding=");
        if (p > -1) {
            StringBuffer sb = new StringBuffer();
            int k = p + 1 + "encoding=".length();
            while (s.charAt(k) != '"' && s.charAt(k) != '\'') {
                sb.append(s.charAt(k++));
            }
            encoding = sb.toString();
        }
    }
    while (!bs.isEmpty()) {
        pbis.unread(bs.poll(), 0, rds.poll());
    }

    return encoding;
}

From source file:org.methodize.nntprss.feed.Channel.java

private static void skipBOM(PushbackInputStream is) throws IOException {
    byte[] header = new byte[PUSHBACK_BUFFER_SIZE];
    int bytesRead = is.read(header);
    if (header[0] == 0 && header[1] == 0 && (header[2] & 0xff) == 0xFE && (header[3] & 0xff) == 0xFF) {
        // UTF-32, big-endian
    } else if ((header[0] & 0xff) == 0xFF && (header[1] & 0xff) == 0xFE && header[2] == 0 && header[3] == 0) {
        // UTF-32, little-endian
    } else if ((header[0] & 0xff) == 0xFE && (header[1] & 0xff) == 0xFF) {
        is.unread(header, 2, 2);
        // UTF-16, big-endian
    } else if ((header[0] & 0xff) == 0xFF && (header[1] & 0xff) == 0xFE) {
        is.unread(header, 2, 2);/*  w  w w .j  ava 2s .  c o  m*/
        // UTF-16, little-endian
    } else if ((header[0] & 0xff) == 0xEF && (header[1] & 0xff) == 0xBB && (header[2] & 0xff) == 0xBF) {
        // UTF-8
        is.unread(header, 3, 1);
    } else {
        is.unread(header, 0, PUSHBACK_BUFFER_SIZE);
    }
}

From source file:org.springframework.ws.soap.saaj.SaajSoapMessageFactory.java

/**
 * Checks for the UTF-8 Byte Order Mark, and removes it if present. The SAAJ RI cannot cope with these BOMs.
 *
 * @see <a href="http://jira.springframework.org/browse/SWS-393">SWS-393</a>
 * @see <a href="http://unicode.org/faq/utf_bom.html#22">UTF-8 BOMs</a>
 *//*from   w w  w.  j  av  a2  s  . co  m*/
private InputStream checkForUtf8ByteOrderMark(InputStream inputStream) throws IOException {
    PushbackInputStream pushbackInputStream = new PushbackInputStream(new BufferedInputStream(inputStream), 3);
    byte[] bytes = new byte[3];
    int bytesRead = pushbackInputStream.read(bytes);
    if (bytesRead != -1) {
        // check for the UTF-8 BOM, and remove it if there. See SWS-393
        if (!isByteOrderMark(bytes)) {
            pushbackInputStream.unread(bytes, 0, bytesRead);
        }
    }
    return pushbackInputStream;
}