Example usage for java.io PushbackInputStream unread

List of usage examples for java.io PushbackInputStream unread

Introduction

In this page you can find the example usage for java.io PushbackInputStream unread.

Prototype

public void unread(byte[] b, int off, int len) throws IOException 

Source Link

Document

Pushes back a portion of an array of bytes by copying it to the front of the pushback buffer.

Usage

From source file:XmlReader.java

private XmlReader(InputStream stream) throws IOException {
    super(stream);

    PushbackInputStream pb;
    byte buf[];//from   w ww .j a va 2 s.  c  o m
    int len;

    /*if (stream instanceof PushbackInputStream)
        pb = (PushbackInputStream) stream;
    else*/
    /**
     * Commented out the above code to make sure it works when the
     * document is accessed using http. URL connection in the code uses
     * a PushbackInputStream with size 7 and when we try to push back
     * MAX which default value is set to 512 we get and exception. So
     * that's why we need to wrap the stream irrespective of what type
     * of stream we start off with.
     */
    pb = new PushbackInputStream(stream, MAXPUSHBACK);

    //
    // See if we can figure out the character encoding used
    // in this file by peeking at the first few bytes.
    //
    buf = new byte[4];
    len = pb.read(buf);
    if (len > 0)
        pb.unread(buf, 0, len);

    if (len == 4)
        switch (buf[0] & 0x0ff) {
        case 0:
            // 00 3c 00 3f == illegal UTF-16 big-endian
            if (buf[1] == 0x3c && buf[2] == 0x00 && buf[3] == 0x3f) {
                setEncoding(pb, "UnicodeBig");
                return;
            }
            // else it's probably UCS-4
            break;

        case '<': // 0x3c: the most common cases!
            switch (buf[1] & 0x0ff) {
            // First character is '<'; could be XML without
            // an XML directive such as "<hello>", "<!-- ...",
            // and so on.
            default:
                break;

            // 3c 00 3f 00 == illegal UTF-16 little endian
            case 0x00:
                if (buf[2] == 0x3f && buf[3] == 0x00) {
                    setEncoding(pb, "UnicodeLittle");
                    return;
                }
                // else probably UCS-4
                break;

            // 3c 3f 78 6d == ASCII and supersets '<?xm'
            case '?':
                if (buf[2] != 'x' || buf[3] != 'm')
                    break;
                //
                // One of several encodings could be used:
                // Shift-JIS, ASCII, UTF-8, ISO-8859-*, etc
                //
                useEncodingDecl(pb, "UTF8");
                return;
            }
            break;

        // 4c 6f a7 94 ... some EBCDIC code page
        case 0x4c:
            if (buf[1] == 0x6f && (0x0ff & buf[2]) == 0x0a7 && (0x0ff & buf[3]) == 0x094) {
                useEncodingDecl(pb, "CP037");
                return;
            }
            // whoops, treat as UTF-8
            break;

        // UTF-16 big-endian
        case 0xfe:
            if ((buf[1] & 0x0ff) != 0xff)
                break;
            setEncoding(pb, "UTF-16");
            return;

        // UTF-16 little-endian
        case 0xff:
            if ((buf[1] & 0x0ff) != 0xfe)
                break;
            setEncoding(pb, "UTF-16");
            return;

        // default ... no XML declaration
        default:
            break;
        }

    //
    // If all else fails, assume XML without a declaration, and
    // using UTF-8 encoding.
    //
    setEncoding(pb, "UTF-8");
}

From source file:org.adl.parsers.dom.ADLDOMParser.java

/**
 * Sets up the file source for the test subject file.
 *
 * @param iFileName file to setup input source for.
 *
 * @return InputSource/*from  w w w .  java 2  s .c  om*/
 */
private InputSource setupFileSource(String iFileName) {
    log.debug("setupFileSource()");
    String msgText;
    boolean defaultEncoding = true;
    String encoding = null;
    PushbackInputStream inputStream;
    FileInputStream inFile;

    try {
        File xmlFile = new File(iFileName);
        log.debug(xmlFile.getAbsolutePath());

        if (xmlFile.isFile()) {
            InputSource is = null;

            defaultEncoding = true;
            if (xmlFile.length() > 1) {
                inFile = new FileInputStream(xmlFile);
                inputStream = new PushbackInputStream(inFile, 4);

                // Reads the initial 4 bytes of the file to check for a Byte
                // Order Mark and determine the encoding

                byte bom[] = new byte[4];
                int n, pushBack;
                n = inputStream.read(bom, 0, bom.length);

                // UTF-8 Encoded
                if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
                    encoding = "UTF-8";
                    defaultEncoding = false;
                    pushBack = n - 3;
                }
                // UTF-16 Big Endian Encoded
                else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
                    encoding = "UTF-16BE";
                    defaultEncoding = false;
                    pushBack = n - 2;
                }
                // UTF-16 Little Endian Encoded               
                else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
                    encoding = "UTF-16LE";
                    defaultEncoding = false;
                    pushBack = n - 2;
                }
                // Default encoding
                else {
                    // Unicode BOM mark not found, unread all bytes                  
                    pushBack = n;
                }

                // Place any non-BOM bytes back into the stream
                if (pushBack > 0) {
                    inputStream.unread(bom, (n - pushBack), pushBack);
                }

                if (defaultEncoding == true) { //Reads in ASCII file.
                    FileReader fr = new FileReader(xmlFile);
                    is = new InputSource(fr);
                }
                // Reads the file in the determined encoding
                else {
                    //Creates a buffer with the size of the xml encoded file
                    BufferedReader inStream = new BufferedReader(new InputStreamReader(inputStream, encoding));
                    StringBuffer dataString = new StringBuffer();
                    String s = "";

                    //Builds the encoded file to be parsed
                    while ((s = inStream.readLine()) != null) {
                        dataString.append(s);
                    }

                    inStream.close();
                    inputStream.close();
                    inFile.close();
                    is = new InputSource(new StringReader(dataString.toString()));
                    is.setEncoding(encoding);
                }
            }
            return is;
        } else if ((iFileName.length() > 6)
                && (iFileName.substring(0, 5).equals("http:") || iFileName.substring(0, 6).equals("https:"))) {
            URL xmlURL = new URL(iFileName);
            InputStream xmlIS = xmlURL.openStream();
            InputSource is = new InputSource(xmlIS);
            return is;
        } else {
            msgText = "XML File: " + iFileName + " is not a file or URL";
            log.error(msgText);
        }
    } catch (NullPointerException npe) {
        msgText = "Null pointer exception" + npe;
        log.error(msgText);
    } catch (SecurityException se) {
        msgText = "Security Exception" + se;
        log.error(msgText);
    } catch (FileNotFoundException fnfe) {
        msgText = "File Not Found Exception" + fnfe;
        log.error(msgText);
    } catch (Exception e) {
        msgText = "General Exception" + e;
        log.error(msgText);
    }

    log.debug("setUpFileSource()");

    return new InputSource();
}

From source file:org.apache.axis2.builder.BuilderUtil.java

/**
 * Use the BOM Mark to identify the encoding to be used. Fall back to default encoding
 * specified//  w w  w  . j a  v  a 2s. c  om
 *
 * @param is2             PushBackInputStream (it must be a pushback input stream so that we can
 *                        unread the BOM)
 * @param defaultEncoding default encoding style if no BOM
 * @return the selected character set encoding
 * @throws java.io.IOException
 */
public static String getCharSetEncoding(PushbackInputStream is2, String defaultEncoding) throws IOException {
    String encoding;
    byte bom[] = new byte[BOM_SIZE];
    int n, unread;

    n = is2.read(bom, 0, bom.length);

    if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
        encoding = "UTF-8";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 3;
    } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
        encoding = "UTF-16BE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 2;
    } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
        encoding = "UTF-16LE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 2;
    } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE)
            && (bom[3] == (byte) 0xFF)) {
        encoding = "UTF-32BE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 4;
    } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00)
            && (bom[3] == (byte) 0x00)) {
        encoding = "UTF-32LE";
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from BOM =" + encoding);
        }
        unread = n - 4;
    } else {

        // Unicode BOM mark not found, unread all bytes
        encoding = defaultEncoding;
        if (log.isDebugEnabled()) {
            log.debug("char set encoding set from default =" + encoding);
        }
        unread = n;
    }

    if (unread > 0) {
        is2.unread(bom, (n - unread), unread);
    }
    return encoding;
}

From source file:org.apache.cocoon.components.flow.javascript.fom.FOM_JavaScriptInterpreter.java

/**
 * Find the encoding of the stream, or null if not specified
 *//*from w w w. ja v  a  2  s . c om*/
String findEncoding(PushbackInputStream is) throws IOException {
    // Read some bytes
    byte[] buffer = new byte[ENCODING_BUF_SIZE];
    int len = is.read(buffer, 0, buffer.length);
    // and push them back
    is.unread(buffer, 0, len);

    // Interpret them as an ASCII string
    String str = new String(buffer, 0, len, "ASCII");
    RE re = new RE(encodingRE);
    if (re.match(str)) {
        return re.getParen(1);
    }
    return null;
}

From source file:org.apache.pulsar.io.file.utils.GZipFiles.java

/**
 * Returns true if the given file is a gzip file.
 *//*from   ww w  .  ja v a2  s .c o m*/
@SuppressWarnings("deprecation")
public static boolean isGzip(File f) {

    InputStream input = null;
    try {
        input = new FileInputStream(f);
        PushbackInputStream pb = new PushbackInputStream(input, 2);
        byte[] signature = new byte[2];
        int len = pb.read(signature); //read the signature
        pb.unread(signature, 0, len); //push back the signature to the stream
        // check if matches standard gzip magic number
        return (signature[0] == (byte) 0x1f && signature[1] == (byte) 0x8b);
    } catch (final Exception e) {
        return false;
    } finally {
        IOUtils.closeQuietly(input);
    }
}

From source file:org.commoncrawl.hadoop.io.deprecated.ArcFileReader.java

private void readTrailer() throws IOException {

    PushbackInputStream in = (PushbackInputStream) this.in;

    int n = inf.getRemaining();

    if (n > 0) {
        in.unread(buf, len - n, n);
    }//from  ww  w.j  a v  a 2  s . c o m
    // Uses left-to-right evaluation order
    if ((readUInt(in) != _crc.getValue()) ||
    // rfc1952; ISIZE is the input size modulo 2^32
            (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL)))
        throw new IOException("Corrupt GZIP trailer");
}

From source file:org.deegree.framework.xml.XMLFragment.java

/**
 * reads the encoding of a XML document from its header. If no header available
 * <code>CharsetUtils.getSystemCharset()</code> will be returned
 * //from  w ww.  ja v  a  2s . co m
 * @param pbis
 * @return encoding of a XML document
 * @throws IOException
 */
private String readEncoding(PushbackInputStream pbis) throws IOException {
    byte[] b = new byte[80];
    String s = "";
    int rd = 0;

    LinkedList<byte[]> bs = new LinkedList<byte[]>();
    LinkedList<Integer> rds = new LinkedList<Integer>();
    while (rd < 80) {
        rds.addFirst(pbis.read(b));
        if (rds.peek() == -1) {
            rds.poll();
            break;
        }
        rd += rds.peek();
        s += new String(b, 0, rds.peek()).toLowerCase();
        bs.addFirst(b);
        b = new byte[80];
    }

    String encoding = CharsetUtils.getSystemCharset();
    if (s.indexOf("?>") > -1) {
        int p = s.indexOf("encoding=");
        if (p > -1) {
            StringBuffer sb = new StringBuffer();
            int k = p + 1 + "encoding=".length();
            while (s.charAt(k) != '"' && s.charAt(k) != '\'') {
                sb.append(s.charAt(k++));
            }
            encoding = sb.toString();
        }
    }
    while (!bs.isEmpty()) {
        pbis.unread(bs.poll(), 0, rds.poll());
    }

    return encoding;
}

From source file:org.methodize.nntprss.feed.Channel.java

private static void skipBOM(PushbackInputStream is) throws IOException {
    byte[] header = new byte[PUSHBACK_BUFFER_SIZE];
    int bytesRead = is.read(header);
    if (header[0] == 0 && header[1] == 0 && (header[2] & 0xff) == 0xFE && (header[3] & 0xff) == 0xFF) {
        // UTF-32, big-endian
    } else if ((header[0] & 0xff) == 0xFF && (header[1] & 0xff) == 0xFE && header[2] == 0 && header[3] == 0) {
        // UTF-32, little-endian
    } else if ((header[0] & 0xff) == 0xFE && (header[1] & 0xff) == 0xFF) {
        is.unread(header, 2, 2);
        // UTF-16, big-endian
    } else if ((header[0] & 0xff) == 0xFF && (header[1] & 0xff) == 0xFE) {
        is.unread(header, 2, 2);/*  w  w w .j  ava 2s .  c o  m*/
        // UTF-16, little-endian
    } else if ((header[0] & 0xff) == 0xEF && (header[1] & 0xff) == 0xBB && (header[2] & 0xff) == 0xBF) {
        // UTF-8
        is.unread(header, 3, 1);
    } else {
        is.unread(header, 0, PUSHBACK_BUFFER_SIZE);
    }
}

From source file:org.springframework.ws.soap.saaj.SaajSoapMessageFactory.java

/**
 * Checks for the UTF-8 Byte Order Mark, and removes it if present. The SAAJ RI cannot cope with these BOMs.
 *
 * @see <a href="http://jira.springframework.org/browse/SWS-393">SWS-393</a>
 * @see <a href="http://unicode.org/faq/utf_bom.html#22">UTF-8 BOMs</a>
 *//*from   w w  w.  j  av  a2  s  . co  m*/
private InputStream checkForUtf8ByteOrderMark(InputStream inputStream) throws IOException {
    PushbackInputStream pushbackInputStream = new PushbackInputStream(new BufferedInputStream(inputStream), 3);
    byte[] bytes = new byte[3];
    int bytesRead = pushbackInputStream.read(bytes);
    if (bytesRead != -1) {
        // check for the UTF-8 BOM, and remove it if there. See SWS-393
        if (!isByteOrderMark(bytes)) {
            pushbackInputStream.unread(bytes, 0, bytesRead);
        }
    }
    return pushbackInputStream;
}