List of usage examples for java.io PushbackInputStream unread
public void unread(byte[] b, int off, int len) throws IOException
From source file:Main.java
public static void main(String[] args) { byte[] arrByte = new byte[1024]; byte[] byteArray = new byte[] { 'j', 'a', 'v', 'a', '2', 's', '.', 'c', 'o', 'm' }; InputStream is = new ByteArrayInputStream(byteArray); PushbackInputStream pis = new PushbackInputStream(is, 10); try {//from w ww . j a v a 2 s . c om for (int i = 0; i < byteArray.length; i++) { arrByte[i] = (byte) pis.read(); System.out.println((char) arrByte[i]); } byte[] b = { 'W', 'o', 'r', 'l', 'd' }; pis.unread(b, 2, 3); for (int i = 0; i < 3; i++) { arrByte[i] = (byte) pis.read(); System.out.println((char) arrByte[i]); } } catch (Exception ex) { ex.printStackTrace(); } }
From source file:Main.java
/** * Creates a reader from the given input stream and encoding. * This method assumes the input stream working buffer is at least * 128 byte long. The input stream is restored before this method * returns. The 4 first bytes are skipped before creating the reader. *//*w ww . j a va2s .c om*/ protected static Reader createXMLDeclarationReader(PushbackInputStream pbis, String enc) throws IOException { byte[] buf = new byte[128]; int len = pbis.read(buf); if (len > 0) { pbis.unread(buf, 0, len); } return new InputStreamReader(new ByteArrayInputStream(buf, 4, len), enc); }
From source file:Main.java
/** * Creates a reader allowing to read the contents of specified text source. * <p>This method implements the detection of the encoding. * <p>Note that the detection of the encoding always works * because it uses a fallback value./*from w ww. j a va 2 s .c o m*/ * * @param in the text source * @param encoding the detected encoding is copied there. * May be <code>null</code>. * @return a reader allowing to read the contents of the text source. * This reader will automatically skip the BOM if any. * @exception IOException if there is an I/O problem */ public static Reader createReader(InputStream in, String fallbackEncoding, String[] encoding) throws IOException { byte[] bytes = new byte[1024]; int byteCount = -1; PushbackInputStream in2 = new PushbackInputStream(in, bytes.length); try { int count = in2.read(bytes, 0, bytes.length); if (count > 0) { in2.unread(bytes, 0, count); } byteCount = count; } catch (IOException ignored) { } String charset = null; if (byteCount > 0) { if (byteCount >= 2) { // Use BOM --- int b0 = (bytes[0] & 0xFF); int b1 = (bytes[1] & 0xFF); switch ((b0 << 8) | b1) { case 0xFEFF: charset = "UTF-16BE"; // We don't want to read the BOM. in2.skip(2); break; case 0xFFFE: charset = "UTF-16LE"; in2.skip(2); break; case 0xEFBB: if (byteCount >= 3 && (bytes[2] & 0xFF) == 0xBF) { charset = "UTF-8"; in2.skip(3); } break; } } if (charset == null) { // Unsupported characters are replaced by U+FFFD. String text = new String(bytes, 0, byteCount, "US-ASCII"); if (text.startsWith("<?xml")) { Pattern pattern = Pattern.compile("encoding\\s*=\\s*['\"]([^'\"]+)"); Matcher matcher = pattern.matcher(text); if (matcher.find()) { charset = matcher.group(1); } else { charset = "UTF-8"; } } } } if (charset == null) { charset = fallbackEncoding; if (charset == null) { charset = "UTF-8"; } } if (encoding != null) { encoding[0] = charset; } return new InputStreamReader(in2, charset); }
From source file:net.rptools.assets.supplier.AbstractURIAssetSupplier.java
/** * Create an asset and determine its format. * @param id id of asset/* w w w . ja v a 2 s .c o m*/ * @param type type of asset * @param listener listener to inform of (partial) completion * @param assetLength length, if known, or -1 * @param stream stream to read * @return asset * @throws IOException I/O problems */ @ThreadPolicy(ThreadPolicy.ThreadId.ANY) protected AssetImpl newAsset(final String id, final Type type, final AssetListener listener, final int assetLength, final InputStream stream) throws IOException { final InputStream input = new InputStreamInterceptor(getComponent().getFramework(), id, assetLength, stream, listener, getNotifyInterval()); final PushbackInputStream pushbackStream = new PushbackInputStream(input, PUSHBACK_LIMIT); final byte[] firstBytes = new byte[PUSHBACK_LIMIT]; final int actualLength = pushbackStream.read(firstBytes); if (actualLength != -1) { pushbackStream.unread(firstBytes, 0, actualLength); } final ByteArrayInputStream bais = new ByteArrayInputStream(firstBytes); final String mimeType = URLConnection.guessContentTypeFromStream(bais); LOGGER.debug("mimeType={}, actualLength={}", mimeType, actualLength); // read in image AssetImpl asset = null; switch (type) { case IMAGE: asset = new AssetImpl(new Image(pushbackStream)); asset.setMimetype(mimeType); break; case TEXT: asset = new AssetImpl(IOUtils.toString(pushbackStream, StandardCharsets.UTF_8.name())); asset.setMimetype(mimeType); break; default: break; } if (listener != null) { listener.notify(id, asset); } return asset; }
From source file:com.day.cq.wcm.foundation.impl.Rewriter.java
/** * Feed HTML into received over a {@link java.net.URLConnection} to an * HTML parser.//from w w w . j a v a 2 s. co m * @param in input stream * @param contentType preferred content type * @param response servlet response * @throws java.io.IOException if an I/O error occurs */ private void rewriteHtml(InputStream in, String contentType, HttpServletResponse response) throws IOException { // Determine encoding if not specified String encoding = "8859_1"; int charsetIndex = contentType.indexOf("charset="); if (charsetIndex != -1) { encoding = contentType.substring(charsetIndex + "charset=".length()).trim(); } else { byte[] buf = new byte[2048]; int len = fillBuffer(in, buf); String scanned = EncodingScanner.scan(buf, 0, len); if (scanned != null) { encoding = scanned; contentType += "; charset=" + encoding; } PushbackInputStream pb = new PushbackInputStream(in, buf.length); pb.unread(buf, 0, len); in = pb; } // Set appropriate content type and get writer response.setContentType(contentType); PrintWriter writer = response.getWriter(); setWriter(writer); // Define tags that should be inspected HashSet<String> inclusionSet = new HashSet<String>(); inclusionSet.add("A"); inclusionSet.add("AREA"); inclusionSet.add("BASE"); inclusionSet.add("FORM"); inclusionSet.add("FRAME"); inclusionSet.add("IFRAME"); inclusionSet.add("IMG"); inclusionSet.add("INPUT"); inclusionSet.add("LINK"); inclusionSet.add("SCRIPT"); inclusionSet.add("TABLE"); inclusionSet.add("TD"); inclusionSet.add("TR"); inclusionSet.add("NOREWRITE"); HtmlParser parser = new HtmlParser(); parser.setTagInclusionSet(inclusionSet); parser.setDocumentHandler(this); BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(in, encoding)); char buf[] = new char[2048]; int len; while ((len = reader.read(buf)) != -1) { parser.update(buf, 0, len); } parser.finished(); } finally { if (reader != null) { reader.close(); } } }
From source file:XmlReader.java
private void useEncodingDecl(PushbackInputStream pb, String encoding) throws IOException { byte buffer[] = new byte[MAXPUSHBACK]; int len;/*from ww w . java 2s . co m*/ Reader r; int c; // // Buffer up a bunch of input, and set up to read it in // the specified encoding ... we can skip the first four // bytes since we know that "<?xm" was read to determine // what encoding to use! // len = pb.read(buffer, 0, buffer.length); pb.unread(buffer, 0, len); r = new InputStreamReader(new ByteArrayInputStream(buffer, 4, len), encoding); // // Next must be "l" (and whitespace) else we conclude // error and choose UTF-8. // if ((c = r.read()) != 'l') { setEncoding(pb, "UTF-8"); return; } // // Then, we'll skip any // S version="..." [or single quotes] // bit and get any subsequent // S encoding="..." [or single quotes] // // We put an arbitrary size limit on how far we read; lots // of space will break this algorithm. // StringBuffer buf = new StringBuffer(); StringBuffer keyBuf = null; String key = null; boolean sawEq = false; char quoteChar = 0; boolean sawQuestion = false; XmlDecl: for (int i = 0; i < MAXPUSHBACK - 5; ++i) { if ((c = r.read()) == -1) break; // ignore whitespace before/between "key = 'value'" if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue; // ... but require at least a little! if (i == 0) break; // terminate the loop ASAP if (c == '?') sawQuestion = true; else if (sawQuestion) { if (c == '>') break; sawQuestion = false; } // did we get the "key =" bit yet? if (key == null || !sawEq) { if (keyBuf == null) { if (Character.isWhitespace((char) c)) continue; keyBuf = buf; buf.setLength(0); buf.append((char) c); sawEq = false; } else if (Character.isWhitespace((char) c)) { key = keyBuf.toString(); } else if (c == '=') { if (key == null) key = keyBuf.toString(); sawEq = true; keyBuf = null; quoteChar = 0; } else keyBuf.append((char) c); continue; } // space before quoted value if (Character.isWhitespace((char) c)) continue; if (c == '"' || c == '\'') { if (quoteChar == 0) { quoteChar = (char) c; buf.setLength(0); continue; } else if (c == quoteChar) { if ("encoding".equals(key)) { assignedEncoding = buf.toString(); // [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')* for (i = 0; i < assignedEncoding.length(); i++) { c = assignedEncoding.charAt(i); if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) continue; if (i == 0) break XmlDecl; if (i > 0 && (c == '-' || (c >= '0' && c <= '9') || c == '.' || c == '_')) continue; // map illegal names to UTF-8 default break XmlDecl; } setEncoding(pb, assignedEncoding); return; } else { key = null; continue; } } } buf.append((char) c); } setEncoding(pb, "UTF-8"); }
From source file:z.hol.net.http.entity.DeflateDecompressingEntity.java
/** * Returns the non-null InputStream that should be returned to by all requests to * {@link #getContent()}./*from w ww. j av a 2s . co m*/ * * @return a non-null InputStream * @throws IOException if there was a problem */ @Override InputStream getDecompressingInputStream(final InputStream wrapped) throws IOException { /* * A zlib stream will have a header. * * CMF | FLG [| DICTID ] | ...compressed data | ADLER32 | * * * CMF is one byte. * * * FLG is one byte. * * * DICTID is four bytes, and only present if FLG.FDICT is set. * * Sniff the content. Does it look like a zlib stream, with a CMF, etc? c.f. RFC1950, * section 2.2. http://tools.ietf.org/html/rfc1950#page-4 * * We need to see if it looks like a proper zlib stream, or whether it is just a deflate * stream. RFC2616 calls zlib streams deflate. Confusing, isn't it? That's why some servers * implement deflate Content-Encoding using deflate streams, rather than zlib streams. * * We could start looking at the bytes, but to be honest, someone else has already read * the RFCs and implemented that for us. So we'll just use the JDK libraries and exception * handling to do this. If that proves slow, then we could potentially change this to check * the first byte - does it look like a CMF? What about the second byte - does it look like * a FLG, etc. */ /* We read a small buffer to sniff the content. */ byte[] peeked = new byte[6]; PushbackInputStream pushback = new PushbackInputStream(wrapped, peeked.length); int headerLength = pushback.read(peeked); if (headerLength == -1) { throw new IOException("Unable to read the response"); } /* We try to read the first uncompressed byte. */ byte[] dummy = new byte[1]; Inflater inf = new Inflater(); try { int n; while ((n = inf.inflate(dummy)) == 0) { if (inf.finished()) { /* Not expecting this, so fail loudly. */ throw new IOException("Unable to read the response"); } if (inf.needsDictionary()) { /* Need dictionary - then it must be zlib stream with DICTID part? */ break; } if (inf.needsInput()) { inf.setInput(peeked); } } if (n == -1) { throw new IOException("Unable to read the response"); } /* * We read something without a problem, so it's a valid zlib stream. Just need to reset * and return an unused InputStream now. */ pushback.unread(peeked, 0, headerLength); return new InflaterInputStream(pushback); } catch (DataFormatException e) { /* Presume that it's an RFC1951 deflate stream rather than RFC1950 zlib stream and try * again. */ pushback.unread(peeked, 0, headerLength); return new InflaterInputStream(pushback, new Inflater(true)); } }
From source file:org.mcxiaoke.commons.http.impl.DeflateDecompressingEntity.java
/** * Returns the non-null InputStream that should be returned to by all * requests to {@link #getContent()}.//from ww w .jav a 2 s.c om * * @return a non-null InputStream * @throws IOException * if there was a problem */ @Override InputStream getDecompressingInputStream(final InputStream wrapped) throws IOException { /* * A zlib stream will have a header. * * CMF | FLG [| DICTID ] | ...compressed data | ADLER32 | * * * CMF is one byte. * * * FLG is one byte. * * * DICTID is four bytes, and only present if FLG.FDICT is set. * * Sniff the content. Does it look like a zlib stream, with a CMF, etc? * c.f. RFC1950, section 2.2. http://tools.ietf.org/html/rfc1950#page-4 * * We need to see if it looks like a proper zlib stream, or whether it * is just a deflate stream. RFC2616 calls zlib streams deflate. * Confusing, isn't it? That's why some servers implement deflate * Content-Encoding using deflate streams, rather than zlib streams. * * We could start looking at the bytes, but to be honest, someone else * has already read the RFCs and implemented that for us. So we'll just * use the JDK libraries and exception handling to do this. If that * proves slow, then we could potentially change this to check the first * byte - does it look like a CMF? What about the second byte - does it * look like a FLG, etc. */ /* We read a small buffer to sniff the content. */ byte[] peeked = new byte[6]; PushbackInputStream pushback = new PushbackInputStream(wrapped, peeked.length); int headerLength = pushback.read(peeked); if (headerLength == -1) { throw new IOException("Unable to read the response"); } /* We try to read the first uncompressed byte. */ byte[] dummy = new byte[1]; Inflater inf = new Inflater(); try { int n; while ((n = inf.inflate(dummy)) == 0) { if (inf.finished()) { /* Not expecting this, so fail loudly. */ throw new IOException("Unable to read the response"); } if (inf.needsDictionary()) { /* * Need dictionary - then it must be zlib stream with DICTID * part? */ break; } if (inf.needsInput()) { inf.setInput(peeked); } } if (n == -1) { throw new IOException("Unable to read the response"); } /* * We read something without a problem, so it's a valid zlib stream. * Just need to reset and return an unused InputStream now. */ pushback.unread(peeked, 0, headerLength); return new InflaterInputStream(pushback); } catch (DataFormatException e) { /* * Presume that it's an RFC1951 deflate stream rather than RFC1950 * zlib stream and try again. */ pushback.unread(peeked, 0, headerLength); return new InflaterInputStream(pushback, new Inflater(true)); } }
From source file:com.fanfou.app.opensource.http.support.DeflateDecompressingEntity.java
/** * Returns the non-null InputStream that should be returned to by all * requests to {@link #getContent()}.// w w w . j a v a 2 s . c om * * @return a non-null InputStream * @throws IOException * if there was a problem */ @Override InputStream getDecompressingInputStream(final InputStream wrapped) throws IOException { /* * A zlib stream will have a header. * * CMF | FLG [| DICTID ] | ...compressed data | ADLER32 | * * * CMF is one byte. * * * FLG is one byte. * * * DICTID is four bytes, and only present if FLG.FDICT is set. * * Sniff the content. Does it look like a zlib stream, with a CMF, etc? * c.f. RFC1950, section 2.2. http://tools.ietf.org/html/rfc1950#page-4 * * We need to see if it looks like a proper zlib stream, or whether it * is just a deflate stream. RFC2616 calls zlib streams deflate. * Confusing, isn't it? That's why some servers implement deflate * Content-Encoding using deflate streams, rather than zlib streams. * * We could start looking at the bytes, but to be honest, someone else * has already read the RFCs and implemented that for us. So we'll just * use the JDK libraries and exception handling to do this. If that * proves slow, then we could potentially change this to check the first * byte - does it look like a CMF? What about the second byte - does it * look like a FLG, etc. */ /* We read a small buffer to sniff the content. */ final byte[] peeked = new byte[6]; final PushbackInputStream pushback = new PushbackInputStream(wrapped, peeked.length); final int headerLength = pushback.read(peeked); if (headerLength == -1) { throw new IOException("Unable to read the response"); } /* We try to read the first uncompressed byte. */ final byte[] dummy = new byte[1]; final Inflater inf = new Inflater(); try { int n; while ((n = inf.inflate(dummy)) == 0) { if (inf.finished()) { /* Not expecting this, so fail loudly. */ throw new IOException("Unable to read the response"); } if (inf.needsDictionary()) { /* * Need dictionary - then it must be zlib stream with DICTID * part? */ break; } if (inf.needsInput()) { inf.setInput(peeked); } } if (n == -1) { throw new IOException("Unable to read the response"); } /* * We read something without a problem, so it's a valid zlib stream. * Just need to reset and return an unused InputStream now. */ pushback.unread(peeked, 0, headerLength); return new InflaterInputStream(pushback); } catch (final DataFormatException e) { /* * Presume that it's an RFC1951 deflate stream rather than RFC1950 * zlib stream and try again. */ pushback.unread(peeked, 0, headerLength); return new InflaterInputStream(pushback, new Inflater(true)); } }
From source file:jef.tools.XMLUtils.java
/** * XML//from www .j a v a2s . c om * * @param in * ? * @param charSet * ? * @param ignorComment * * @return Document. DOM * @throws SAXException * ? * @throws IOException * */ public static Document loadDocument(InputStream in, String charSet, boolean ignorComments, boolean namespaceAware) throws SAXException, IOException { DocumentBuilder db = REUSABLE_BUILDER.get().getDocumentBuilder(ignorComments, namespaceAware); InputSource is = null; // ????charset if (charSet == null) {// ?200??? byte[] buf = new byte[200]; PushbackInputStream pin = new PushbackInputStream(in, 200); in = pin; int len = pin.read(buf); if (len > 0) { pin.unread(buf, 0, len); charSet = getCharsetInXml(buf, len); } } if (charSet != null) { is = new InputSource(new XmlFixedReader(new InputStreamReader(in, charSet))); is.setEncoding(charSet); } else { // ? Reader reader = new InputStreamReader(in, "UTF-8");// XML???Reader??Reader?XML? is = new InputSource(new XmlFixedReader(reader)); } Document doc = db.parse(is); doc.setXmlStandalone(true);// True???standalone="no" return doc; }