List of usage examples for java.io PushbackInputStream read
public int read(byte[] b, int off, int len) throws IOException
len
bytes of data from this input stream into an array of bytes. From source file:Main.java
public static void main(String[] args) { byte[] arrByte = new byte[1024]; byte[] byteArray = new byte[] { 'j', 'a', 'v', 'a', '2', 's', '.', 'c', 'o', 'm' }; // create object of PushbackInputStream class for specified stream InputStream is = new ByteArrayInputStream(byteArray); PushbackInputStream pis = new PushbackInputStream(is); try {//from w w w. j a va 2s .co m // read a char into our array pis.read(arrByte, 0, 3); // print arrByte for (int i = 0; i < 3; i++) { System.out.println((char) arrByte[i]); } } catch (Exception ex) { ex.printStackTrace(); } }
From source file:Main.java
/** * Creates a reader allowing to read the contents of specified text source. * <p>This method implements the detection of the encoding. * <p>Note that the detection of the encoding always works * because it uses a fallback value./*w ww . j a va 2 s.c o m*/ * * @param in the text source * @param encoding the detected encoding is copied there. * May be <code>null</code>. * @return a reader allowing to read the contents of the text source. * This reader will automatically skip the BOM if any. * @exception IOException if there is an I/O problem */ public static Reader createReader(InputStream in, String fallbackEncoding, String[] encoding) throws IOException { byte[] bytes = new byte[1024]; int byteCount = -1; PushbackInputStream in2 = new PushbackInputStream(in, bytes.length); try { int count = in2.read(bytes, 0, bytes.length); if (count > 0) { in2.unread(bytes, 0, count); } byteCount = count; } catch (IOException ignored) { } String charset = null; if (byteCount > 0) { if (byteCount >= 2) { // Use BOM --- int b0 = (bytes[0] & 0xFF); int b1 = (bytes[1] & 0xFF); switch ((b0 << 8) | b1) { case 0xFEFF: charset = "UTF-16BE"; // We don't want to read the BOM. in2.skip(2); break; case 0xFFFE: charset = "UTF-16LE"; in2.skip(2); break; case 0xEFBB: if (byteCount >= 3 && (bytes[2] & 0xFF) == 0xBF) { charset = "UTF-8"; in2.skip(3); } break; } } if (charset == null) { // Unsupported characters are replaced by U+FFFD. String text = new String(bytes, 0, byteCount, "US-ASCII"); if (text.startsWith("<?xml")) { Pattern pattern = Pattern.compile("encoding\\s*=\\s*['\"]([^'\"]+)"); Matcher matcher = pattern.matcher(text); if (matcher.find()) { charset = matcher.group(1); } else { charset = "UTF-8"; } } } } if (charset == null) { charset = fallbackEncoding; if (charset == null) { charset = "UTF-8"; } } if (encoding != null) { encoding[0] = charset; } return new InputStreamReader(in2, charset); }
From source file:XmlReader.java
private void useEncodingDecl(PushbackInputStream pb, String encoding) throws IOException { byte buffer[] = new byte[MAXPUSHBACK]; int len;// w w w . j a v a2 s .c om Reader r; int c; // // Buffer up a bunch of input, and set up to read it in // the specified encoding ... we can skip the first four // bytes since we know that "<?xm" was read to determine // what encoding to use! // len = pb.read(buffer, 0, buffer.length); pb.unread(buffer, 0, len); r = new InputStreamReader(new ByteArrayInputStream(buffer, 4, len), encoding); // // Next must be "l" (and whitespace) else we conclude // error and choose UTF-8. // if ((c = r.read()) != 'l') { setEncoding(pb, "UTF-8"); return; } // // Then, we'll skip any // S version="..." [or single quotes] // bit and get any subsequent // S encoding="..." [or single quotes] // // We put an arbitrary size limit on how far we read; lots // of space will break this algorithm. // StringBuffer buf = new StringBuffer(); StringBuffer keyBuf = null; String key = null; boolean sawEq = false; char quoteChar = 0; boolean sawQuestion = false; XmlDecl: for (int i = 0; i < MAXPUSHBACK - 5; ++i) { if ((c = r.read()) == -1) break; // ignore whitespace before/between "key = 'value'" if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue; // ... but require at least a little! if (i == 0) break; // terminate the loop ASAP if (c == '?') sawQuestion = true; else if (sawQuestion) { if (c == '>') break; sawQuestion = false; } // did we get the "key =" bit yet? if (key == null || !sawEq) { if (keyBuf == null) { if (Character.isWhitespace((char) c)) continue; keyBuf = buf; buf.setLength(0); buf.append((char) c); sawEq = false; } else if (Character.isWhitespace((char) c)) { key = keyBuf.toString(); } else if (c == '=') { if (key == null) key = keyBuf.toString(); sawEq = true; keyBuf = null; quoteChar = 0; } else keyBuf.append((char) c); continue; } // space before quoted value if (Character.isWhitespace((char) c)) continue; if (c == '"' || c == '\'') { if (quoteChar == 0) { quoteChar = (char) c; buf.setLength(0); continue; } else if (c == quoteChar) { if ("encoding".equals(key)) { assignedEncoding = buf.toString(); // [81] Encname ::= [A-Za-z] ([A-Za-z0-9._]|'-')* for (i = 0; i < assignedEncoding.length(); i++) { c = assignedEncoding.charAt(i); if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) continue; if (i == 0) break XmlDecl; if (i > 0 && (c == '-' || (c >= '0' && c <= '9') || c == '.' || c == '_')) continue; // map illegal names to UTF-8 default break XmlDecl; } setEncoding(pb, assignedEncoding); return; } else { key = null; continue; } } } buf.append((char) c); } setEncoding(pb, "UTF-8"); }
From source file:com.digitalpebble.storm.crawler.protocol.http.HttpResponse.java
private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException { boolean doneChunks = false; int contentBytesRead = 0; byte[] bytes = new byte[HttpProtocol.BUFFER_SIZE]; ByteArrayOutputStream out = new ByteArrayOutputStream(HttpProtocol.BUFFER_SIZE); while (!doneChunks) { if (HttpProtocol.LOGGER.isTraceEnabled()) { HttpProtocol.LOGGER.trace("Http: starting chunk"); }/* ww w . java 2s. co m*/ readLine(in, line, false); String chunkLenStr; // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + // "'"); } int pos = line.indexOf(";"); if (pos < 0) { chunkLenStr = line.toString(); } else { chunkLenStr = line.substring(0, pos); // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + // line.substring(pos+1)); } } chunkLenStr = chunkLenStr.trim(); int chunkLen; try { chunkLen = Integer.parseInt(chunkLenStr, 16); } catch (NumberFormatException e) { throw new HttpException("bad chunk length: " + line.toString()); } if (chunkLen == 0) { doneChunks = true; break; } if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent()) chunkLen = http.getMaxContent() - contentBytesRead; // read one chunk int chunkBytesRead = 0; while (chunkBytesRead < chunkLen) { int toRead = (chunkLen - chunkBytesRead) < HttpProtocol.BUFFER_SIZE ? (chunkLen - chunkBytesRead) : HttpProtocol.BUFFER_SIZE; int len = in.read(bytes, 0, toRead); if (len == -1) throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks" + " and " + chunkBytesRead + " in current chunk"); // DANGER!!! Will printed GZIPed stuff right to your // terminal! // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new // String(bytes, 0, len)); } out.write(bytes, 0, len); chunkBytesRead += len; } readLine(in, line, false); } if (!doneChunks) { if (contentBytesRead != http.getMaxContent()) throw new HttpException("chunk eof: !doneChunk && didn't max out"); return; } content = out.toByteArray(); parseHeaders(in, line); }
From source file:org.adl.parsers.dom.ADLDOMParser.java
/** * Sets up the file source for the test subject file. * * @param iFileName file to setup input source for. * * @return InputSource// w ww . j a v a 2 s.c o m */ private InputSource setupFileSource(String iFileName) { log.debug("setupFileSource()"); String msgText; boolean defaultEncoding = true; String encoding = null; PushbackInputStream inputStream; FileInputStream inFile; try { File xmlFile = new File(iFileName); log.debug(xmlFile.getAbsolutePath()); if (xmlFile.isFile()) { InputSource is = null; defaultEncoding = true; if (xmlFile.length() > 1) { inFile = new FileInputStream(xmlFile); inputStream = new PushbackInputStream(inFile, 4); // Reads the initial 4 bytes of the file to check for a Byte // Order Mark and determine the encoding byte bom[] = new byte[4]; int n, pushBack; n = inputStream.read(bom, 0, bom.length); // UTF-8 Encoded if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { encoding = "UTF-8"; defaultEncoding = false; pushBack = n - 3; } // UTF-16 Big Endian Encoded else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { encoding = "UTF-16BE"; defaultEncoding = false; pushBack = n - 2; } // UTF-16 Little Endian Encoded else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { encoding = "UTF-16LE"; defaultEncoding = false; pushBack = n - 2; } // Default encoding else { // Unicode BOM mark not found, unread all bytes pushBack = n; } // Place any non-BOM bytes back into the stream if (pushBack > 0) { inputStream.unread(bom, (n - pushBack), pushBack); } if (defaultEncoding == true) { //Reads in ASCII file. FileReader fr = new FileReader(xmlFile); is = new InputSource(fr); } // Reads the file in the determined encoding else { //Creates a buffer with the size of the xml encoded file BufferedReader inStream = new BufferedReader(new InputStreamReader(inputStream, encoding)); StringBuffer dataString = new StringBuffer(); String s = ""; //Builds the encoded file to be parsed while ((s = inStream.readLine()) != null) { dataString.append(s); } inStream.close(); inputStream.close(); inFile.close(); is = new InputSource(new StringReader(dataString.toString())); is.setEncoding(encoding); } } return is; } else if ((iFileName.length() > 6) && (iFileName.substring(0, 5).equals("http:") || iFileName.substring(0, 6).equals("https:"))) { URL xmlURL = new URL(iFileName); InputStream xmlIS = xmlURL.openStream(); InputSource is = new InputSource(xmlIS); return is; } else { msgText = "XML File: " + iFileName + " is not a file or URL"; log.error(msgText); } } catch (NullPointerException npe) { msgText = "Null pointer exception" + npe; log.error(msgText); } catch (SecurityException se) { msgText = "Security Exception" + se; log.error(msgText); } catch (FileNotFoundException fnfe) { msgText = "File Not Found Exception" + fnfe; log.error(msgText); } catch (Exception e) { msgText = "General Exception" + e; log.error(msgText); } log.debug("setUpFileSource()"); return new InputSource(); }
From source file:org.apache.axis2.builder.BuilderUtil.java
/** * Use the BOM Mark to identify the encoding to be used. Fall back to default encoding * specified/*from w w w . j a v a2 s . c o m*/ * * @param is2 PushBackInputStream (it must be a pushback input stream so that we can * unread the BOM) * @param defaultEncoding default encoding style if no BOM * @return the selected character set encoding * @throws java.io.IOException */ public static String getCharSetEncoding(PushbackInputStream is2, String defaultEncoding) throws IOException { String encoding; byte bom[] = new byte[BOM_SIZE]; int n, unread; n = is2.read(bom, 0, bom.length); if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { encoding = "UTF-8"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 3; } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { encoding = "UTF-16BE"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 2; } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { encoding = "UTF-16LE"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 2; } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { encoding = "UTF-32BE"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 4; } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { encoding = "UTF-32LE"; if (log.isDebugEnabled()) { log.debug("char set encoding set from BOM =" + encoding); } unread = n - 4; } else { // Unicode BOM mark not found, unread all bytes encoding = defaultEncoding; if (log.isDebugEnabled()) { log.debug("char set encoding set from default =" + encoding); } unread = n; } if (unread > 0) { is2.unread(bom, (n - unread), unread); } return encoding; }
From source file:org.apache.cocoon.components.flow.javascript.fom.FOM_JavaScriptInterpreter.java
/** * Find the encoding of the stream, or null if not specified *//* ww w. ja v a2s .c o m*/ String findEncoding(PushbackInputStream is) throws IOException { // Read some bytes byte[] buffer = new byte[ENCODING_BUF_SIZE]; int len = is.read(buffer, 0, buffer.length); // and push them back is.unread(buffer, 0, len); // Interpret them as an ASCII string String str = new String(buffer, 0, len, "ASCII"); RE re = new RE(encodingRE); if (re.match(str)) { return re.getParen(1); } return null; }
From source file:org.apache.nutch.protocol.htmlunit.HttpResponse.java
/** * /*from ww w . j a va 2s . c om*/ * @param in * @param line * @throws HttpException * @throws IOException */ @SuppressWarnings("unused") private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException { boolean doneChunks = false; int contentBytesRead = 0; byte[] bytes = new byte[Http.BUFFER_SIZE]; ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); while (!doneChunks) { if (Http.LOG.isTraceEnabled()) { Http.LOG.trace("Http: starting chunk"); } readLine(in, line, false); String chunkLenStr; // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); } int pos = line.indexOf(";"); if (pos < 0) { chunkLenStr = line.toString(); } else { chunkLenStr = line.substring(0, pos); // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + line.substring(pos+1)); } } chunkLenStr = chunkLenStr.trim(); int chunkLen; try { chunkLen = Integer.parseInt(chunkLenStr, 16); } catch (NumberFormatException e) { throw new HttpException("bad chunk length: " + line.toString()); } if (chunkLen == 0) { doneChunks = true; break; } if ((contentBytesRead + chunkLen) > http.getMaxContent()) chunkLen = http.getMaxContent() - contentBytesRead; // read one chunk int chunkBytesRead = 0; while (chunkBytesRead < chunkLen) { int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead) : Http.BUFFER_SIZE; int len = in.read(bytes, 0, toRead); if (len == -1) throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks" + " and " + chunkBytesRead + " in current chunk"); // DANGER!!! Will printed GZIPed stuff right to your // terminal! // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0, len)); } out.write(bytes, 0, len); chunkBytesRead += len; } readLine(in, line, false); } if (!doneChunks) { if (contentBytesRead != http.getMaxContent()) throw new HttpException("chunk eof: !doneChunk && didn't max out"); return; } content = out.toByteArray(); parseHeaders(in, line); }
From source file:org.apache.nutch.protocol.http.HttpResponse.java
/** * @param in/*from ww w .j a v a 2s . c om*/ * @param line * @throws HttpException * @throws IOException */ private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException { boolean doneChunks = false; int contentBytesRead = 0; byte[] bytes = new byte[Http.BUFFER_SIZE]; ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); while (!doneChunks) { if (Http.LOG.isTraceEnabled()) { Http.LOG.trace("Http: starting chunk"); } readLine(in, line, false); String chunkLenStr; // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); // } int pos = line.indexOf(";"); if (pos < 0) { chunkLenStr = line.toString(); } else { chunkLenStr = line.substring(0, pos); // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + // line.substring(pos+1)); } } chunkLenStr = chunkLenStr.trim(); int chunkLen; try { chunkLen = Integer.parseInt(chunkLenStr, 16); } catch (NumberFormatException e) { throw new HttpException("bad chunk length: " + line.toString()); } if (chunkLen == 0) { doneChunks = true; break; } if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent()) chunkLen = http.getMaxContent() - contentBytesRead; // read one chunk int chunkBytesRead = 0; while (chunkBytesRead < chunkLen) { int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead) : Http.BUFFER_SIZE; int len = in.read(bytes, 0, toRead); if (len == -1) throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks" + " and " + chunkBytesRead + " in current chunk"); // DANGER!!! Will printed GZIPed stuff right to your // terminal! // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0, // len)); } out.write(bytes, 0, len); chunkBytesRead += len; } readLine(in, line, false); } if (!doneChunks) { if (contentBytesRead != http.getMaxContent()) throw new HttpException("chunk eof: !doneChunk && didn't max out"); return; } content = out.toByteArray(); parseHeaders(in, line, null); }
From source file:org.apache.nutch.protocol.s2jh.HttpResponse.java
/** * /*from w w w. j ava 2s .co m*/ * @param in * @param line * @throws HttpException * @throws IOException */ @SuppressWarnings("unused") private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException { boolean doneChunks = false; int contentBytesRead = 0; byte[] bytes = new byte[Http.BUFFER_SIZE]; ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); while (!doneChunks) { if (Http.LOG.isTraceEnabled()) { Http.LOG.trace("Http: starting chunk"); } readLine(in, line, false); String chunkLenStr; // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); // } int pos = line.indexOf(";"); if (pos < 0) { chunkLenStr = line.toString(); } else { chunkLenStr = line.substring(0, pos); // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + // line.substring(pos+1)); } } chunkLenStr = chunkLenStr.trim(); int chunkLen; try { chunkLen = Integer.parseInt(chunkLenStr, 16); } catch (NumberFormatException e) { throw new HttpException("bad chunk length: " + line.toString()); } if (chunkLen == 0) { doneChunks = true; break; } if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http.getMaxContent()) chunkLen = http.getMaxContent() - contentBytesRead; // read one chunk int chunkBytesRead = 0; while (chunkBytesRead < chunkLen) { int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead) : Http.BUFFER_SIZE; int len = in.read(bytes, 0, toRead); if (len == -1) throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks" + " and " + chunkBytesRead + " in current chunk"); // DANGER!!! Will printed GZIPed stuff right to your // terminal! // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0, // len)); } out.write(bytes, 0, len); chunkBytesRead += len; } readLine(in, line, false); } if (!doneChunks) { if (contentBytesRead != http.getMaxContent()) throw new HttpException("chunk eof: !doneChunk && didn't max out"); return; } content = out.toByteArray(); parseHeaders(in, line); }