List of usage examples for java.io PushbackInputStream unread
public void unread(byte[] b) throws IOException
From source file:org.apache.james.mailbox.maildir.mail.model.MaildirMessage.java
/** * Return the position in the given {@link InputStream} at which the Body of * the MailboxMessage starts// ww w .ja va 2 s . c om */ private int bodyStartOctet(InputStream msgIn) throws IOException { // we need to pushback maximal 3 bytes PushbackInputStream in = new PushbackInputStream(msgIn, 3); int localBodyStartOctet = in.available(); int i; int count = 0; while ((i = in.read()) != -1 && in.available() > 4) { if (i == 0x0D) { int a = in.read(); if (a == 0x0A) { int b = in.read(); if (b == 0x0D) { int c = in.read(); if (c == 0x0A) { localBodyStartOctet = count + 4; break; } in.unread(c); } in.unread(b); } in.unread(a); } count++; } return localBodyStartOctet; }
From source file:org.apache.nutch.protocol.htmlunit.HttpResponse.java
private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { while (readLine(in, line, true) != 0) { // handle HTTP responses with missing blank line after headers int pos;//from w w w. j a va 2 s . co m if (((pos = line.indexOf("<!DOCTYPE")) != -1) || ((pos = line.indexOf("<HTML")) != -1) || ((pos = line.indexOf("<html")) != -1)) { in.unread(line.substring(pos).getBytes("UTF-8")); line.setLength(pos); try { //TODO: (CM) We don't know the header names here //since we're just handling them generically. It would //be nice to provide some sort of mapping function here //for the returned header names to the standard metadata //names in the ParseData class processHeaderLine(line); } catch (Exception e) { // fixme: Http.LOG.warn("Error: ", e); } return; } processHeaderLine(line); } }
From source file:org.apache.nutch.protocol.http.HttpResponse.java
private void parseHeaders(PushbackInputStream in, StringBuffer line, StringBuffer httpHeaders) throws IOException, HttpException { while (readLine(in, line, true) != 0) { if (httpHeaders != null) httpHeaders.append(line).append("\n"); // handle HTTP responses with missing blank line after headers int pos;//from w w w .j a v a2 s .c o m if (((pos = line.indexOf("<!DOCTYPE")) != -1) || ((pos = line.indexOf("<HTML")) != -1) || ((pos = line.indexOf("<html")) != -1)) { in.unread(line.substring(pos).getBytes("UTF-8")); line.setLength(pos); try { // TODO: (CM) We don't know the header names here // since we're just handling them generically. It would // be nice to provide some sort of mapping function here // for the returned header names to the standard metadata // names in the ParseData class processHeaderLine(line); } catch (Exception e) { // fixme: Http.LOG.warn("Error: ", e); } return; } processHeaderLine(line); } }
From source file:org.apache.nutch.protocol.s2jh.HttpResponse.java
private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { while (readLine(in, line, true) != 0) { // handle HTTP responses with missing blank line after headers int pos;//from ww w .j a v a 2 s . c om if (((pos = line.indexOf("<!DOCTYPE")) != -1) || ((pos = line.indexOf("<HTML")) != -1) || ((pos = line.indexOf("<html")) != -1)) { in.unread(line.substring(pos).getBytes("UTF-8")); line.setLength(pos); try { // TODO: (CM) We don't know the header names here // since we're just handling them generically. It would // be nice to provide some sort of mapping function here // for the returned header names to the standard metadata // names in the ParseData class processHeaderLine(line); } catch (Exception e) { // fixme: Http.LOG.error("Failed with the following exception: ", e); } return; } processHeaderLine(line); } }
From source file:org.apache.pig.builtin.Utf8StorageConverter.java
private Tuple consumeTuple(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { if (fieldSchema == null) { throw new IOException("Schema is null"); }/*from w w w.ja v a 2 s . c o m*/ int buf; ByteArrayOutputStream mOut; while ((buf = in.read()) != '(' || buf == '}') { if (buf == -1) { throw new IOException("Unexpect end of tuple"); } if (buf == '}') { in.unread(buf); return null; } } Tuple t = TupleFactory.getInstance().newTuple(); if (fieldSchema.getSchema() != null && fieldSchema.getSchema().getFields().length != 0) { ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields(); // Interpret item inside tuple one by one based on the inner schema for (int i = 0; i < fss.length; i++) { Object field; ResourceFieldSchema fs = fss[i]; int delimit = ','; if (i == fss.length - 1) delimit = ')'; if (DataType.isComplex(fs.getType())) { field = consumeComplexType(in, fs); while ((buf = in.read()) != delimit) { if (buf == -1) { throw new IOException("Unexpect end of tuple"); } } } else { mOut = new ByteArrayOutputStream(BUFFER_SIZE); while ((buf = in.read()) != delimit) { if (buf == -1) { throw new IOException("Unexpect end of tuple"); } if (buf == delimit) break; mOut.write(buf); } field = parseSimpleType(mOut.toByteArray(), fs); } t.append(field); } } else { // No inner schema, treat everything inside tuple as bytearray Deque<Character> level = new LinkedList<Character>(); // keep track of nested tuple/bag/map. We do not interpret, save them as bytearray mOut = new ByteArrayOutputStream(BUFFER_SIZE); while (true) { buf = in.read(); if (buf == -1) { throw new IOException("Unexpect end of tuple"); } if (buf == '[' || buf == '{' || buf == '(') { level.push((char) buf); mOut.write(buf); } else if (buf == ')' && level.isEmpty()) // End of tuple { DataByteArray value = new DataByteArray(mOut.toByteArray()); t.append(value); break; } else if (buf == ',' && level.isEmpty()) { DataByteArray value = new DataByteArray(mOut.toByteArray()); t.append(value); mOut.reset(); } else if (buf == ']' || buf == '}' || buf == ')') { if (level.peek() == findStartChar((char) buf)) level.pop(); else throw new IOException("Malformed tuple"); mOut.write(buf); } else mOut.write(buf); } } return t; }
From source file:org.apache.synapse.format.hessian.HessianMessageBuilder.java
/** * Reads the first four bytes of the inputstream to detect whether the message represents a * fault message. Once a fault message has been detected, a property used to mark fault messages * is stored in the Axis2 message context. The implementaton uses a PushbackInputStream to be * able to put those four bytes back at the end of processing. * * @param messageContext the Axis2 message context * @param inputStream the inputstream to read the Hessian message * * @return the wrapped (pushback) input stream * * @throws IOException if an I/O error occurs *///from w w w.ja v a2s. com private PushbackInputStream detectAndMarkMessageFault(final MessageContext messageContext, final InputStream inputStream) throws IOException { int bytesToRead = 4; PushbackInputStream pis = new PushbackInputStream(inputStream, bytesToRead); byte[] headerBytes = new byte[bytesToRead]; int n = pis.read(headerBytes); // checking fourth byte for fault marker if (n == bytesToRead) { if (headerBytes[bytesToRead - 1] == HessianConstants.HESSIAN_V1_FAULT_IDENTIFIER || headerBytes[bytesToRead - 1] == HessianConstants.HESSIAN_V2_FAULT_IDENTIFIER) { messageContext.setProperty(BaseConstants.FAULT_MESSAGE, SynapseConstants.TRUE); if (log.isDebugEnabled()) { log.debug("Hessian fault detected, marking in Axis2 message context"); } } pis.unread(headerBytes); } else if (n > 0) { byte[] bytesRead = new byte[n]; System.arraycopy(headerBytes, 0, bytesRead, 0, n); pis.unread(bytesRead); } return pis; }
From source file:org.apache.tika.parser.rtf.TextExtractor.java
private void parseHexChar(PushbackInputStream in) throws IOException, SAXException, TikaException { int hex1 = in.read(); if (!isHexChar(hex1)) { // DOC ERROR (malformed hex escape): ignore in.unread(hex1); return;/* www.j av a 2 s.com*/ } int hex2 = in.read(); if (!isHexChar(hex2)) { // TODO: log a warning here, somehow? // DOC ERROR (malformed hex escape): // ignore in.unread(hex2); return; } if (ansiSkip != 0) { // Skip this ansi char since we are // still in the shadow of a unicode // escape: ansiSkip--; } else { // Unescape: addOutputByte(16 * hexValue(hex1) + hexValue(hex2)); } }
From source file:org.apache.tika.parser.rtf.TextExtractor.java
private void parseControlWord(int firstChar, PushbackInputStream in) throws IOException, SAXException, TikaException { addControl(firstChar);/*from www . jav a2s. co m*/ int b = in.read(); while (isAlpha(b)) { addControl(b); b = in.read(); } boolean hasParam = false; boolean negParam = false; if (b == '-') { negParam = true; hasParam = true; b = in.read(); } int param = 0; while (isDigit(b)) { param *= 10; param += (b - '0'); hasParam = true; b = in.read(); } // space is consumed as part of the // control word, but is not added to the // control word if (b != ' ') { in.unread(b); } if (hasParam) { if (negParam) { param = -param; } processControlWord(param, in); } else { processControlWord(); } pendingControlCount = 0; }
From source file:org.apache.tika.parser.rtf.TextExtractor.java
private void processGroupStart(PushbackInputStream in) throws IOException { ansiSkip = 0;// w w w. j av a 2s . co m // Push current groupState onto the stack groupStates.add(groupState); // Make new GroupState groupState = new GroupState(groupState); assert groupStates.size() == groupState.depth : "size=" + groupStates.size() + " depth=" + groupState.depth; if (uprState == 0) { uprState = 1; groupState.ignore = true; } // Check for ignorable groups. Note that // sometimes we un-ignore within this group, eg // when handling upr escape. int b2 = in.read(); if (b2 == '\\') { int b3 = in.read(); if (b3 == '*') { groupState.ignore = true; } in.unread(b3); } in.unread(b2); }
From source file:org.apache.usergrid.services.assets.data.AWSBinaryStore.java
@Override public void write(final UUID appId, final Entity entity, InputStream inputStream) throws Exception { String uploadFileName = AssetUtils.buildAssetKey(appId, entity); ByteArrayOutputStream baos = new ByteArrayOutputStream(); long written = IOUtils.copyLarge(inputStream, baos, 0, FIVE_MB); byte[] data = baos.toByteArray(); InputStream awsInputStream = new ByteArrayInputStream(data); final Map<String, Object> fileMetadata = AssetUtils.getFileMetadata(entity); fileMetadata.put(AssetUtils.LAST_MODIFIED, System.currentTimeMillis()); String mimeType = AssetMimeHandler.get().getMimeType(entity, data); Boolean overSizeLimit = false; EntityManager em = emf.getEntityManager(appId); if (written < FIVE_MB) { // total smaller than 5mb ObjectMetadata om = new ObjectMetadata(); om.setContentLength(written);/*from w ww . ja va2s . co m*/ om.setContentType(mimeType); PutObjectResult result = null; result = getS3Client().putObject(bucketName, uploadFileName, awsInputStream, om); String md5sum = Hex.encodeHexString(Base64.decodeBase64(result.getContentMd5())); String eTag = result.getETag(); fileMetadata.put(AssetUtils.CONTENT_LENGTH, written); if (md5sum != null) fileMetadata.put(AssetUtils.CHECKSUM, md5sum); fileMetadata.put(AssetUtils.E_TAG, eTag); em.update(entity); } else { // bigger than 5mb... dump 5 mb tmp files and upload from them written = 0; //reset written to 0, we still haven't wrote anything in fact int partNumber = 1; int firstByte = 0; Boolean isFirstChunck = true; List<PartETag> partETags = new ArrayList<PartETag>(); //get the s3 client in order to initialize the multipart request getS3Client(); InitiateMultipartUploadRequest initRequest = new InitiateMultipartUploadRequest(bucketName, uploadFileName); InitiateMultipartUploadResult initResponse = getS3Client().initiateMultipartUpload(initRequest); InputStream firstChunck = new ByteArrayInputStream(data); PushbackInputStream chunckableInputStream = new PushbackInputStream(inputStream, 1); // determine max size file allowed, default to 50mb long maxSizeBytes = 50 * FileUtils.ONE_MB; String maxSizeMbString = properties.getProperty("usergrid.binary.max-size-mb", "50"); if (StringUtils.isNumeric(maxSizeMbString)) { maxSizeBytes = Long.parseLong(maxSizeMbString) * FileUtils.ONE_MB; } // always allow files up to 5mb if (maxSizeBytes < 5 * FileUtils.ONE_MB) { maxSizeBytes = 5 * FileUtils.ONE_MB; } while (-1 != (firstByte = chunckableInputStream.read())) { long partSize = 0; chunckableInputStream.unread(firstByte); File tempFile = File.createTempFile( entity.getUuid().toString().concat("-part").concat(String.valueOf(partNumber)), "tmp"); tempFile.deleteOnExit(); OutputStream os = null; try { os = new BufferedOutputStream(new FileOutputStream(tempFile.getAbsolutePath())); if (isFirstChunck == true) { partSize = IOUtils.copyLarge(firstChunck, os, 0, (FIVE_MB)); isFirstChunck = false; } else { partSize = IOUtils.copyLarge(chunckableInputStream, os, 0, (FIVE_MB)); } written += partSize; if (written > maxSizeBytes) { overSizeLimit = true; logger.error("OVERSIZED FILE ({}). STARTING ABORT", written); break; //set flag here and break out of loop to run abort } } finally { IOUtils.closeQuietly(os); } FileInputStream chunk = new FileInputStream(tempFile); Boolean isLastPart = -1 == (firstByte = chunckableInputStream.read()); if (!isLastPart) chunckableInputStream.unread(firstByte); UploadPartRequest uploadRequest = new UploadPartRequest().withUploadId(initResponse.getUploadId()) .withBucketName(bucketName).withKey(uploadFileName).withInputStream(chunk) .withPartNumber(partNumber).withPartSize(partSize).withLastPart(isLastPart); partETags.add(getS3Client().uploadPart(uploadRequest).getPartETag()); partNumber++; } //check for flag here then abort. if (overSizeLimit) { AbortMultipartUploadRequest abortRequest = new AbortMultipartUploadRequest(bucketName, uploadFileName, initResponse.getUploadId()); ListMultipartUploadsRequest listRequest = new ListMultipartUploadsRequest(bucketName); MultipartUploadListing listResult = getS3Client().listMultipartUploads(listRequest); //upadte the entity with the error. try { logger.error("starting update of entity due to oversized asset"); fileMetadata.put("error", "Asset size is larger than max size of " + maxSizeBytes); em.update(entity); } catch (Exception e) { logger.error("Error updating entity with error message", e); } int timesIterated = 20; //loop and abort all the multipart uploads while (listResult.getMultipartUploads().size() != 0 && timesIterated > 0) { getS3Client().abortMultipartUpload(abortRequest); Thread.sleep(1000); timesIterated--; listResult = getS3Client().listMultipartUploads(listRequest); if (logger.isDebugEnabled()) { logger.debug("Files that haven't been aborted are: {}", listResult.getMultipartUploads().listIterator().toString()); } } if (timesIterated == 0) { logger.error("Files parts that couldn't be aborted in 20 seconds are:"); Iterator<MultipartUpload> multipartUploadIterator = listResult.getMultipartUploads().iterator(); while (multipartUploadIterator.hasNext()) { logger.error(multipartUploadIterator.next().getKey()); } } } else { CompleteMultipartUploadRequest request = new CompleteMultipartUploadRequest(bucketName, uploadFileName, initResponse.getUploadId(), partETags); CompleteMultipartUploadResult amazonResult = getS3Client().completeMultipartUpload(request); fileMetadata.put(AssetUtils.CONTENT_LENGTH, written); fileMetadata.put(AssetUtils.E_TAG, amazonResult.getETag()); em.update(entity); } } }