me.tatetian.hs.io.SamplableByteArrayOutputStream.java Source code

Java tutorial

Introduction

Here is the source code for me.tatetian.hs.io.SamplableByteArrayOutputStream.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package me.tatetian.hs.io;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.SequenceInputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import me.tatetian.hs.sampler.Sampler;

/**
 * This class is modified from <code>org.apache.commons.io.output.ByteArrayOutputStream</code>.
 * It the same as <code>org.apache.commons.io.output.ByteArrayOutputStream</code> except its
 * ability to update itself with sampled records.  
 * 
 * @author Hongliang Tian
 * 
 * This class implements an output stream in which the data is 
 * written into a byte array. The buffer automatically grows as data 
 * is written to it.
 * <p> 
 * The data can be retrieved using <code>toByteArray()</code> and
 * <code>toString()</code>.
 * <p>
 * Closing a <tt>ByteArrayOutputStream</tt> has no effect. The methods in
 * this class can be called after the stream has been closed without
 * generating an <tt>IOException</tt>.
 * <p>
 * This is an alternative implementation of the {@link java.io.ByteArrayOutputStream}
 * class. The original implementation only allocates 32 bytes at the beginning.
 * As this class is designed for heavy duty it starts at 1024 bytes. In contrast
 * to the original it doesn't reallocate the whole memory block but allocates
 * additional buffers. This way no buffers need to be garbage collected and
 * the contents don't have to be copied to the new buffer. This class is
 * designed to behave exactly like the original. The only exception is the
 * deprecated toString(int) method that has been ignored.
 * 
 * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
 * @author Holger Hoffstatte
 * @version $Id: ByteArrayOutputStream.java 1153506 2011-08-03 14:32:38Z ggregory $
 */
public class SamplableByteArrayOutputStream extends OutputStream {
    //===========================================================================
    // Added variables
    //===========================================================================
    private int recordSize = 0; // in bytes
    private List<Integer> recordSizes = new ArrayList<Integer>(1024);

    //===========================================================================
    // Added methods
    //===========================================================================

    /**
     * Record the length of a record 
     * */
    public void newRecord() {
        if (recordSize > 0)
            recordSizes.add(recordSize);
        recordSize = 0;
    }

    /**
     * Update itself with sampled records.
     * 
     * This operation should be fairly efficient for two reasons:
     * 1) No need to parse record: we already know the size of each record;
     * 2) In-place update: unnecessary copy is minimized. 
     * */
    public void updateBy(Sampler sampler) {
        // Finish unfinished record
        newRecord();
        // Reset so that we can overwrite original data with sampled data
        oldReset();
        // Vars
        int _currentBuffIndex = 0;
        byte[] _currentBuff = buffers.get(_currentBuffIndex);
        int inBuffPos = 0;
        int buffRemaining = _currentBuff.length;
        boolean copy = false;
        int sampledRecordCount = 0;
        // Iterate records
        for (int i = 0; i < recordSizes.size(); i++) {
            int size = recordSizes.get(i);
            // Copy record with chance
            copy = sampler.next();
            // Remember the size of sampled records
            if (copy) {
                recordSizes.set(sampledRecordCount, size);
                sampledRecordCount++;
            }
            // Skip or copy this record in loop
            while (size > 0) {
                // Decide length for copying
                int copyLen = Math.min(size, buffRemaining);
                if (copy)
                    write(_currentBuff, inBuffPos, copyLen);
                buffRemaining -= copyLen;
                size -= copyLen;
                // Move to next buff
                if (buffRemaining == 0) {
                    _currentBuffIndex++;
                    _currentBuff = buffers.get(_currentBuffIndex);
                    inBuffPos = 0;
                    buffRemaining = _currentBuff.length;
                }
                // Move in buff
                else {
                    inBuffPos += copyLen;
                }
            }
        }
        // Update records
        int total = recordSizes.size();
        for (int i = total - 1; i >= sampledRecordCount; i--)
            recordSizes.remove(i);
        recordSize = 0;
    }

    private void oldReset() {
        count = 0;
        filledBufferSum = 0;
        currentBufferIndex = 0;
        currentBuffer = buffers.get(currentBufferIndex);
    }

    //===========================================================================
    // Modified methods:
    //   public void write(byte[] b, int off, int len);
    //   public synchronized void write(int b);
    //   public synchronized int write(InputStream in) throws IOException;
    //   public synchronized void reset();
    //===========================================================================

    //===========================================================================
    // Added static class by Hongliang Tian
    //===========================================================================

    /**
     * Copied from org.apache.commons.io.ClosedInputStream
     * 
     * Closed input stream. This stream returns -1 to all attempts to read
     * something from the stream.
     * <p>
     * Typically uses of this class include testing for corner cases in methods
     * that accept input streams and acting as a sentinel value instead of a
     * <code>null</code> input stream.
     *
     * @version $Id: ClosedInputStream.java 659817 2008-05-24 13:23:10Z niallp $
     * @since Commons IO 1.4
     */
    public static class ClosedInputStream extends InputStream {
        /**
         * A singleton.
         */
        public static final ClosedInputStream CLOSED_INPUT_STREAM = new ClosedInputStream();

        /**
         * Returns -1 to indicate that the stream is closed.
         *
         * @return always -1
         */
        @Override
        public int read() {
            return -1;
        }
    }

    //===========================================================================
    // Variables and methods from org.apache.commons.io.output.ByteArrayOutputStream
    //===========================================================================

    /** A singleton empty byte array. */
    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];

    /** The list of buffers, which grows and never reduces. */
    private final List<byte[]> buffers = new ArrayList<byte[]>();
    /** The index of the current buffer. */
    private int currentBufferIndex;
    /** The total count of bytes in all the filled buffers. */
    private int filledBufferSum;
    /** The current buffer. */
    private byte[] currentBuffer;
    /** The total count of bytes written. */
    private int count;

    /**
     * Creates a new byte array output stream. The buffer capacity is 
     * initially 1024 bytes, though its size increases if necessary. 
     */
    public SamplableByteArrayOutputStream() {
        this(1024);
    }

    /**
     * Creates a new byte array output stream, with a buffer capacity of 
     * the specified size, in bytes. 
     *
     * @param size  the initial size
     * @throws IllegalArgumentException if size is negative
     */
    public SamplableByteArrayOutputStream(int size) {
        if (size < 0) {
            throw new IllegalArgumentException("Negative initial size: " + size);
        }
        //       synchronized (this) {
        needNewBuffer(size);
        //       }
    }

    /**
     * Makes a new buffer available either by allocating
     * a new one or re-cycling an existing one.
     *
     * @param newcount  the size of the buffer if one is created
     */
    private void needNewBuffer(int newcount) {
        if (currentBufferIndex < buffers.size() - 1) {
            //Recycling old buffer
            filledBufferSum += currentBuffer.length;

            currentBufferIndex++;
            currentBuffer = buffers.get(currentBufferIndex);
        } else {
            //Creating new buffer
            int newBufferSize;
            if (currentBuffer == null) {
                newBufferSize = newcount;
                filledBufferSum = 0;
            } else {
                newBufferSize = Math.max(currentBuffer.length << 1, newcount - filledBufferSum);
                filledBufferSum += currentBuffer.length;
            }

            currentBufferIndex++;
            currentBuffer = new byte[newBufferSize];
            buffers.add(currentBuffer);
        }
    }

    /**
     * Write the bytes to byte array.
     * @param b the bytes to write
     * @param off The start offset
     * @param len The number of bytes to write
     */
    @Override
    public void write(byte[] b, int off, int len) {
        if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length) || ((off + len) < 0)) {
            throw new IndexOutOfBoundsException();
        } else if (len == 0) {
            return;
        }
        //       synchronized (this) {
        int newcount = count + len;
        int remaining = len;
        int inBufferPos = count - filledBufferSum;
        while (remaining > 0) {
            int part = Math.min(remaining, currentBuffer.length - inBufferPos);
            System.arraycopy(b, off + len - remaining, currentBuffer, inBufferPos, part);
            remaining -= part;
            if (remaining > 0) {
                needNewBuffer(newcount);
                inBufferPos = 0;
            }
        }
        count = newcount;

        // Added by Hongliang Tian
        recordSize += len;
        //       }
    }

    /**
     * Write a byte to byte array.
     * @param b the byte to write
     */
    @Override
    public /*synchronized*/ void write(int b) {
        int inBufferPos = count - filledBufferSum;
        if (inBufferPos == currentBuffer.length) {
            needNewBuffer(count + 1);
            inBufferPos = 0;
        }
        currentBuffer[inBufferPos] = (byte) b;
        count++;

        // Added by Hongliang Tian
        recordSize += 1;
    }

    /**
     * Writes the entire contents of the specified input stream to this
     * byte stream. Bytes from the input stream are read directly into the
     * internal buffers of this streams.
     *
     * @param in the input stream to read from
     * @return total number of bytes read from the input stream
     *         (and written to this stream)
     * @throws IOException if an I/O error occurs while reading the input stream
     * @since Commons IO 1.4
     */
    public synchronized int write(InputStream in) throws IOException {
        int readCount = 0;
        int inBufferPos = count - filledBufferSum;
        int n = in.read(currentBuffer, inBufferPos, currentBuffer.length - inBufferPos);
        while (n != -1) {
            readCount += n;
            inBufferPos += n;
            count += n;
            if (inBufferPos == currentBuffer.length) {
                needNewBuffer(currentBuffer.length);
                inBufferPos = 0;
            }
            n = in.read(currentBuffer, inBufferPos, currentBuffer.length - inBufferPos);
        }

        // Added by Hongliang Tian
        recordSize += readCount;

        return readCount;
    }

    /**
     * Return the current size of the byte array.
     * @return the current size of the byte array
     */
    public /*synchronized*/ int size() {
        return count;
    }

    /**
     * Closing a <tt>ByteArrayOutputStream</tt> has no effect. The methods in
     * this class can be called after the stream has been closed without
     * generating an <tt>IOException</tt>.
     *
     * @throws IOException never (this method should not declare this exception
     * but it has to now due to backwards compatability)
     */
    @Override
    public void close() throws IOException {
        //nop
    }

    /**
     * @see java.io.ByteArrayOutputStream#reset()
     */
    public /*synchronized*/ void reset() {
        // Modified by Hongliang Tian
        oldReset();

        // Added by Hongliang Tian
        recordSize = 0;
        recordSizes.clear();
    }

    /**
     * Writes the entire contents of this byte stream to the
     * specified output stream.
     *
     * @param out  the output stream to write to
     * @throws IOException if an I/O error occurs, such as if the stream is closed
     * @see java.io.ByteArrayOutputStream#writeTo(OutputStream)
     */
    public /*synchronized*/ void writeTo(OutputStream out) throws IOException {
        int remaining = count;
        for (byte[] buf : buffers) {
            int c = Math.min(buf.length, remaining);
            out.write(buf, 0, c);
            remaining -= c;
            if (remaining == 0) {
                break;
            }
        }
    }

    /**
     * Fetches entire contents of an <code>InputStream</code> and represent
     * same data as result InputStream.
     * <p>
     * This method is useful where,
     * <ul>
     * <li>Source InputStream is slow.</li>
     * <li>It has network resources associated, so we cannot keep it open for
     * long time.</li>
     * <li>It has network timeout associated.</li>
     * </ul>
     * It can be used in favor of {@link #toByteArray()}, since it
     * avoids unnecessary allocation and copy of byte[].<br>
     * This method buffers the input internally, so there is no need to use a
     * <code>BufferedInputStream</code>.
     * 
     * @param input Stream to be fully buffered.
     * @return A fully buffered stream.
     * @throws IOException if an I/O error occurs
     * @since Commons IO 2.0
     */
    public static InputStream toBufferedInputStream(InputStream input) throws IOException {
        SamplableByteArrayOutputStream output = new SamplableByteArrayOutputStream();
        output.write(input);
        return output.toBufferedInputStream();
    }

    /**
     * Gets the current contents of this byte stream as a Input Stream. The
     * returned stream is backed by buffers of <code>this</code> stream,
     * avoiding memory allocation and copy, thus saving space and time.<br>
     * 
     * @return the current contents of this output stream.
     * @see java.io.ByteArrayOutputStream#toByteArray()
     * @see #reset()
     * @since Commons IO 2.0
     */
    private InputStream toBufferedInputStream() {
        int remaining = count;
        if (remaining == 0) {
            return new ClosedInputStream();
        }
        List<ByteArrayInputStream> list = new ArrayList<ByteArrayInputStream>(buffers.size());
        for (byte[] buf : buffers) {
            int c = Math.min(buf.length, remaining);
            list.add(new ByteArrayInputStream(buf, 0, c));
            remaining -= c;
            if (remaining == 0) {
                break;
            }
        }
        return new SequenceInputStream(Collections.enumeration(list));
    }

    /**
     * Gets the curent contents of this byte stream as a byte array.
     * The result is independent of this stream.
     *
     * @return the current contents of this output stream, as a byte array
     * @see java.io.ByteArrayOutputStream#toByteArray()
     */
    public /*synchronized*/ byte[] toByteArray() {
        int remaining = count;
        if (remaining == 0) {
            return EMPTY_BYTE_ARRAY;
        }
        byte newbuf[] = new byte[remaining];
        int pos = 0;
        for (byte[] buf : buffers) {
            int c = Math.min(buf.length, remaining);
            System.arraycopy(buf, 0, newbuf, pos, c);
            pos += c;
            remaining -= c;
            if (remaining == 0) {
                break;
            }
        }
        return newbuf;
    }

    /**
     * Gets the curent contents of this byte stream as a string.
     * @return the contents of the byte array as a String
     * @see java.io.ByteArrayOutputStream#toString()
     */
    @Override
    public String toString() {
        return new String(toByteArray());
    }

    /**
     * Gets the curent contents of this byte stream as a string
     * using the specified encoding.
     *
     * @param enc  the name of the character encoding
     * @return the string converted from the byte array
     * @throws UnsupportedEncodingException if the encoding is not supported
     * @see java.io.ByteArrayOutputStream#toString(String)
     */
    public String toString(String enc) throws UnsupportedEncodingException {
        return new String(toByteArray(), enc);
    }

}