List of usage examples for java.io PushbackInputStream unread
public void unread(byte[] b, int off, int len) throws IOException
From source file:XmlReader.java
private XmlReader(InputStream stream) throws IOException { super(stream); PushbackInputStream pb; byte buf[];//from w ww .j a va 2 s. c o m int len; /*if (stream instanceof PushbackInputStream) pb = (PushbackInputStream) stream; else*/ /** * Commented out the above code to make sure it works when the * document is accessed using http. URL connection in the code uses * a PushbackInputStream with size 7 and when we try to push back * MAX which default value is set to 512 we get and exception. So * that's why we need to wrap the stream irrespective of what type * of stream we start off with. */ pb = new PushbackInputStream(stream, MAXPUSHBACK); // // See if we can figure out the character encoding used // in this file by peeking at the first few bytes. // buf = new byte[4]; len = pb.read(buf); if (len > 0) pb.unread(buf, 0, len); if (len == 4) switch (buf[0] & 0x0ff) { case 0: // 00 3c 00 3f == illegal UTF-16 big-endian if (buf[1] == 0x3c && buf[2] == 0x00 && buf[3] == 0x3f) { setEncoding(pb, "UnicodeBig"); return; } // else it's probably UCS-4 break; case '<': // 0x3c: the most common cases! switch (buf[1] & 0x0ff) { // First character is '<'; could be XML without // an XML directive such as "<hello>", "<!-- ...", // and so on. default: break; // 3c 00 3f 00 == illegal UTF-16 little endian case 0x00: if (buf[2] == 0x3f && buf[3] == 0x00) { setEncoding(pb, "UnicodeLittle"); return; } // else probably UCS-4 break; // 3c 3f 78 6d == ASCII and supersets '<?xm' case '?': if (buf[2] != 'x' || buf[3] != 'm') break; // // One of several encodings could be used: // Shift-JIS, ASCII, UTF-8, ISO-8859-*, etc // useEncodingDecl(pb, "UTF8"); return; } break; // 4c 6f a7 94 ... some EBCDIC code page case 0x4c: if (buf[1] == 0x6f && (0x0ff & buf[2]) == 0x0a7 && (0x0ff & buf[3]) == 0x094) { useEncodingDecl(pb, "CP037"); return; } // whoops, treat as UTF-8 break; // UTF-16 big-endian case 0xfe: if ((buf[1] & 0x0ff) != 0xff) break; setEncoding(pb, "UTF-16"); return; // UTF-16 little-endian case 0xff: if ((buf[1] & 0x0ff) != 0xfe) break; setEncoding(pb, "UTF-16"); return; // default ... no XML declaration default: break; } // // If all else fails, assume XML without a declaration, and // using UTF-8 encoding. // setEncoding(pb, "UTF-8"); }
From source file:org.adl.parsers.dom.ADLDOMParser.java
/** * Sets up the file source for the test subject file. * * @param iFileName file to setup input source for. * * @return InputSource/*from w w w . java 2 s .c om*/ */ private InputSource setupFileSource(String iFileName) { log.debug("setupFileSource()"); String msgText; boolean defaultEncoding = true; String encoding = null; PushbackInputStream inputStream; FileInputStream inFile; try { File xmlFile = new File(iFileName); log.debug(xmlFile.getAbsolutePath()); if (xmlFile.isFile()) { InputSource is = null; defaultEncoding = true; if (xmlFile.length() > 1) { inFile = new FileInputStream(xmlFile); inputStream = new PushbackInputStream(inFile, 4); // Reads the initial 4 bytes of the file to check for a Byte // Order Mark and determine the encoding byte bom[] = new byte[4]; int n, pushBack; n = inputStream.read(bom, 0, bom.length); // UTF-8 Encoded if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { encoding = "UTF-8"; defaultEncoding = false; pushBack = n - 3; } // UTF-16 Big Endian Encoded else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { encoding = "UTF-16BE"; defaultEncoding = false; pushBack = n - 2; } // UTF-16 Little Endian Encoded else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { encoding = "UTF-16LE"; defaultEncoding = false; pushBack = n - 2; } // Default encoding else { // Unicode BOM mark not found, unread all bytes pushBack = n; } // Place any non-BOM bytes back into the stream if (pushBack > 0) { inputStream.unread(bom, (n - pushBack), pushBack); } if (defaultEncoding == true) { //Reads in ASCII file. FileReader fr = new FileReader(xmlFile); is = new InputSource(fr); } // Reads the file in the determined encoding else { //Creates a buffer with the size of the xml encoded file BufferedReader inStream = new BufferedReader(new InputStreamReader(inputStream, encoding)); StringBuffer dataString = new StringBuffer(); String s = ""; //Builds the encoded file to be parsed while ((s = inStream.readLine()) != null) { dataString.append(s); } inStream.close(); inputStream.close(); inFile.close(); is = new InputSource(new StringReader(dataString.toString())); is.setEncoding(encoding); } } return is; } else if ((iFileName.length() > 6) && (iFileName.substring(0, 5).equals("http:") || iFileName.substring(0, 6).equals("https:"))) { URL xmlURL = new URL(iFileName); InputStream xmlIS = xmlURL.openStream(); InputSource is = new InputSource(xmlIS); return is; } else { msgText = "XML File: " + iFileName + " is not a file or URL"; log.error(msgText); } } catch (NullPointerException npe) { msgText = "Null pointer exception" + npe; log.error(msgText); } catch (SecurityException se) { msgText = "Security Exception" + se; log.error(msgText); } catch (FileNotFoundException fnfe) { msgText = "File Not Found Exception" + fnfe; log.error(msgText); } catch (Exception e) { msgText = "General Exception" + e; log.error(msgText); } log.debug("setUpFileSource()"); return new InputSource(); }
From source file:org.apache.axis2.builder.BuilderUtil.java
/** * Use the BOM Mark to identify the encoding to be used. Fall back to default encoding * specified// w w w . j a v a 2s. c om * * @param is2 PushBackInputStream (it must be a pushback input stream so that we can * unread the BOM) * @param defaultEncoding default encoding style if no BOM * @return the selected character set encoding * @throws java.io.IOException */ public static String getCharSetEncoding(PushbackInputStream is2, String defaultEncoding) throws IOException { String encoding; byte bom[] = new byte[BOM_SIZE]; int n, unread; n = is2.read(bom, 0, bom.length); if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { encoding = "UTF-8"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 3; } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { encoding = "UTF-16BE"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 2; } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { encoding = "UTF-16LE"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 2; } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { encoding = "UTF-32BE"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 4; } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { encoding = "UTF-32LE"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 4; } else { // Unicode BOM mark not found, unread all bytes encoding = defaultEncoding; if (log.isDebugEnabled()) { log.debug("char set encoding set from default =" + encoding); } unread = n; } if (unread > 0) { is2.unread(bom, (n - unread), unread); } return encoding; }
From source file:org.apache.cocoon.components.flow.javascript.fom.FOM_JavaScriptInterpreter.java
/** * Find the encoding of the stream, or null if not specified *//*from w w w. ja v a 2 s . c om*/ String findEncoding(PushbackInputStream is) throws IOException { // Read some bytes byte[] buffer = new byte[ENCODING_BUF_SIZE]; int len = is.read(buffer, 0, buffer.length); // and push them back is.unread(buffer, 0, len); // Interpret them as an ASCII string String str = new String(buffer, 0, len, "ASCII"); RE re = new RE(encodingRE); if (re.match(str)) { return re.getParen(1); } return null; }
From source file:org.apache.pulsar.io.file.utils.GZipFiles.java
/** * Returns true if the given file is a gzip file. *//*from ww w . ja v a2 s .c o m*/ @SuppressWarnings("deprecation") public static boolean isGzip(File f) { InputStream input = null; try { input = new FileInputStream(f); PushbackInputStream pb = new PushbackInputStream(input, 2); byte[] signature = new byte[2]; int len = pb.read(signature); //read the signature pb.unread(signature, 0, len); //push back the signature to the stream // check if matches standard gzip magic number return (signature[0] == (byte) 0x1f && signature[1] == (byte) 0x8b); } catch (final Exception e) { return false; } finally { IOUtils.closeQuietly(input); } }
From source file:org.commoncrawl.hadoop.io.deprecated.ArcFileReader.java
private void readTrailer() throws IOException { PushbackInputStream in = (PushbackInputStream) this.in; int n = inf.getRemaining(); if (n > 0) { in.unread(buf, len - n, n); }//from ww w.j a v a 2 s . c o m // Uses left-to-right evaluation order if ((readUInt(in) != _crc.getValue()) || // rfc1952; ISIZE is the input size modulo 2^32 (readUInt(in) != (inf.getBytesWritten() & 0xffffffffL))) throw new IOException("Corrupt GZIP trailer"); }
From source file:org.deegree.framework.xml.XMLFragment.java
/** * reads the encoding of a XML document from its header. If no header available * <code>CharsetUtils.getSystemCharset()</code> will be returned * //from w ww. ja v a 2s . co m * @param pbis * @return encoding of a XML document * @throws IOException */ private String readEncoding(PushbackInputStream pbis) throws IOException { byte[] b = new byte[80]; String s = ""; int rd = 0; LinkedList<byte[]> bs = new LinkedList<byte[]>(); LinkedList<Integer> rds = new LinkedList<Integer>(); while (rd < 80) { rds.addFirst(pbis.read(b)); if (rds.peek() == -1) { rds.poll(); break; } rd += rds.peek(); s += new String(b, 0, rds.peek()).toLowerCase(); bs.addFirst(b); b = new byte[80]; } String encoding = CharsetUtils.getSystemCharset(); if (s.indexOf("?>") > -1) { int p = s.indexOf("encoding="); if (p > -1) { StringBuffer sb = new StringBuffer(); int k = p + 1 + "encoding=".length(); while (s.charAt(k) != '"' && s.charAt(k) != '\'') { sb.append(s.charAt(k++)); } encoding = sb.toString(); } } while (!bs.isEmpty()) { pbis.unread(bs.poll(), 0, rds.poll()); } return encoding; }
From source file:org.methodize.nntprss.feed.Channel.java
private static void skipBOM(PushbackInputStream is) throws IOException { byte[] header = new byte[PUSHBACK_BUFFER_SIZE]; int bytesRead = is.read(header); if (header[0] == 0 && header[1] == 0 && (header[2] & 0xff) == 0xFE && (header[3] & 0xff) == 0xFF) { // UTF-32, big-endian } else if ((header[0] & 0xff) == 0xFF && (header[1] & 0xff) == 0xFE && header[2] == 0 && header[3] == 0) { // UTF-32, little-endian } else if ((header[0] & 0xff) == 0xFE && (header[1] & 0xff) == 0xFF) { is.unread(header, 2, 2); // UTF-16, big-endian } else if ((header[0] & 0xff) == 0xFF && (header[1] & 0xff) == 0xFE) { is.unread(header, 2, 2);/* w w w .j ava 2s . c o m*/ // UTF-16, little-endian } else if ((header[0] & 0xff) == 0xEF && (header[1] & 0xff) == 0xBB && (header[2] & 0xff) == 0xBF) { // UTF-8 is.unread(header, 3, 1); } else { is.unread(header, 0, PUSHBACK_BUFFER_SIZE); } }
From source file:org.springframework.ws.soap.saaj.SaajSoapMessageFactory.java
/** * Checks for the UTF-8 Byte Order Mark, and removes it if present. The SAAJ RI cannot cope with these BOMs. * * @see <a href="http://jira.springframework.org/browse/SWS-393">SWS-393</a> * @see <a href="http://unicode.org/faq/utf_bom.html#22">UTF-8 BOMs</a> *//*from w w w. j av a2 s . co m*/ private InputStream checkForUtf8ByteOrderMark(InputStream inputStream) throws IOException { PushbackInputStream pushbackInputStream = new PushbackInputStream(new BufferedInputStream(inputStream), 3); byte[] bytes = new byte[3]; int bytesRead = pushbackInputStream.read(bytes); if (bytesRead != -1) { // check for the UTF-8 BOM, and remove it if there. See SWS-393 if (!isByteOrderMark(bytes)) { pushbackInputStream.unread(bytes, 0, bytesRead); } } return pushbackInputStream; }