LZFInputStream and LZFOutputStream

      
/*
 * Copyright 2004-2010 H2 Group. Multiple-Licensed under the H2 License,
 * Version 1.0, and under the Eclipse Public License, Version 1.0
 * (http://h2database.com/html/license.html).
 * Copyright (c) 2000-2005 Marc Alexander Lehmann <schmorp@schmorp.de>
 * Copyright (c) 2005 Oren J. Maurice <oymaurice@hazorea.org.il>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1.  Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *
 *   2.  Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *
 *   3.  The name of the author may not be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

//package com.hyk.proxy.common.rpc.extension.compress.lzf;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

/**
 * An input stream to read from an LZF stream.
 * The data is automatically expanded.
 */
public class LZFInputStream extends InputStream {

    private final InputStream in;
    private CompressLZF decompress = new CompressLZF();
    private int pos;
    private int bufferLength;
    private byte[] inBuffer;
    private byte[] buffer;
    
    /**
     * Copied from org.h2.util.Utils --- Start
     * 
     * Create an array of bytes with the given size. If this is not possible
     * because not enough memory is available, an OutOfMemoryError with the
     * requested size in the message is thrown.
     *
     * @param len the number of bytes requested
     * @return the byte array
     * @throws OutOfMemoryError
     */
    
    public static final byte[] EMPTY_BYTES = {};
    
    public static byte[] newBytes(int len) {
        try {
            if (len == 0) {
                return EMPTY_BYTES;
            }
            return new byte[len];
        } catch (OutOfMemoryError e) {
            Error e2 = new OutOfMemoryError("Requested memory: " + len);
            e2.initCause(e);
            throw e2;
        }
    }
    // org.h2.util.Utils.Java --- END

    
    public LZFInputStream(InputStream in) throws IOException {
        this.in = in;
        if (readInt() != LZFOutputStream.MAGIC) {
            throw new IOException("Not an LZFInputStream");
        }
    }

    private byte[] ensureSize(byte[] buff, int len) {
        return buff == null || buff.length < len ? newBytes(len) : buff;
    }

    private void fillBuffer() throws IOException {
        if (buffer != null && pos < bufferLength) {
            return;
        }
        int len = readInt();
        if (decompress == null) {
            // EOF
            this.bufferLength = 0;
        } else if (len < 0) {
            len = -len;
            buffer = ensureSize(buffer, len);
            readFully(buffer, len);
            this.bufferLength = len;
        } else {
            inBuffer = ensureSize(inBuffer, len);
            int size = readInt();
            readFully(inBuffer, len);
            buffer = ensureSize(buffer, size);
            decompress.expand(inBuffer, 0, len, buffer, 0, size);
            this.bufferLength = size;
        }
        pos = 0;
    }

    private void readFully(byte[] buff, int len) throws IOException {
        int off = 0;
        while (len > 0) {
            int l = in.read(buff, off, len);
            len -= l;
            off += l;
        }
    }

    private int readInt() throws IOException {
        int x = in.read();
        if (x < 0) {
            decompress = null;
            return 0;
        }
        x = (x << 24) + (in.read() << 16) + (in.read() << 8) + in.read();
        return x;
    }

    public int read() throws IOException {
        fillBuffer();
        if (pos >= bufferLength) {
            return -1;
        }
        return buffer[pos++] & 255;
    }

    public int read(byte[] b) throws IOException {
        return read(b, 0, b.length);
    }

    public int read(byte[] b, int off, int len) throws IOException {
        if (len == 0) {
            return 0;
        }
        int read = 0;
        while (len > 0) {
            int r = readBlock(b, off, len);
            if (r < 0) {
                break;
            }
            read += r;
            off += r;
            len -= r;
        }
        return read == 0 ? -1 : read;
    }

    private int readBlock(byte[] b, int off, int len) throws IOException {
        fillBuffer();
        if (pos >= bufferLength) {
            return -1;
        }
        int max = Math.min(len, bufferLength - pos);
        max = Math.min(max, b.length - off);
        System.arraycopy(buffer, pos, b, off, max);
        pos += max;
        return max;
    }

    public void close() throws IOException {
        in.close();
    }

}

/**
 * An output stream to write an LZF stream.
 * The data is automatically compressed.
 */
 class LZFOutputStream extends OutputStream {

  
  public static final int IO_BUFFER_SIZE_COMPRESS = 128 * 1024;
  
    /**
     * The file header of a LZF file.
     */
    static final int MAGIC = ('H' << 24) | ('2' << 16) | ('I' << 8) | 'S';

    private final OutputStream out;
    private final CompressLZF compress = new CompressLZF();
    private final byte[] buffer;
    private int pos;
    private byte[] outBuffer;

    public LZFOutputStream(OutputStream out) throws IOException {
        this.out = out;
        int len = IO_BUFFER_SIZE_COMPRESS;
        buffer = new byte[len];
        ensureOutput(len);
        writeInt(MAGIC);
    }

    private void ensureOutput(int len) {
        // TODO calculate the maximum overhead (worst case) for the output
        // buffer
        int outputLen = (len < 100 ? len + 100 : len) * 2;
        if (outBuffer == null || outBuffer.length < outputLen) {
            outBuffer = new byte[outputLen];
        }
    }

    public void write(int b) throws IOException {
        if (pos >= buffer.length) {
            flush();
        }
        buffer[pos++] = (byte) b;
    }

    private void compressAndWrite(byte[] buff, int len) throws IOException {
        if (len > 0) {
            ensureOutput(len);
            int compressed = compress.compress(buff, len, outBuffer, 0);
            if (compressed > len) {
                writeInt(-len);
                out.write(buff, 0, len);
            } else {
                writeInt(compressed);
                writeInt(len);
                out.write(outBuffer, 0, compressed);
            }
        }
    }

    private void writeInt(int x) throws IOException {
        out.write((byte) (x >> 24));
        out.write((byte) (x >> 16));
        out.write((byte) (x >> 8));
        out.write((byte) x);
    }

    public void write(byte[] buff, int off, int len) throws IOException {
        while (len > 0) {
            int copy = Math.min(buffer.length - pos, len);
            System.arraycopy(buff, off, buffer, pos, copy);
            pos += copy;
            if (pos >= buffer.length) {
                flush();
            }
            off += copy;
            len -= copy;
        }
    }

    public void flush() throws IOException {
        compressAndWrite(buffer, pos);
        pos = 0;
    }

    public void close() throws IOException {
        flush();
        out.close();
    }

}

/**
 * <p>
 * This class implements the LZF lossless data compression algorithm. LZF is a
 * Lempel-Ziv variant with byte-aligned output, and optimized for speed.
 * </p>
 * <p>
 * Safety/Use Notes:
 * </p>
 * <ul>
 * <li>Each instance should be used by a single thread only.</li>
 * <li>The data buffers should be smaller than 1 GB.</li>
 * <li>For performance reasons, safety checks on expansion are omitted.</li>
 * <li>Invalid compressed data can cause an ArrayIndexOutOfBoundsException.</li>
 * </ul>
 * <p>
 * The LZF compressed format knows literal runs and back-references:
 * </p>
 * <ul>
 * <li>Literal run: directly copy bytes from input to output.</li>
 * <li>Back-reference: copy previous data to output stream, with specified
 * offset from location and length. The length is at least 3 bytes.</li>
 * </ul>
 *<p>
 * The first byte of the compressed stream is the control byte. For literal
 * runs, the highest three bits of the control byte are not set, the the lower
 * bits are the literal run length, and the next bytes are data to copy directly
 * into the output. For back-references, the highest three bits of the control
 * byte are the back-reference length. If all three bits are set, then the
 * back-reference length is stored in the next byte. The lower bits of the
 * control byte combined with the next byte form the offset for the
 * back-reference.
 * </p>
 */
 final class CompressLZF implements Compressor {

    /**
     * The number of entries in the hash table. The size is a trade-off between
     * hash collisions (reduced compression) and speed (amount that fits in CPU
     * cache).
     */
    private static final int HASH_SIZE = 1 << 14;

    /**
     * The maximum number of literals in a chunk (32).
     */
    private static final int MAX_LITERAL = 1 << 5;

    /**
     * The maximum offset allowed for a back-reference (8192).
     */
    private static final int MAX_OFF = 1 << 13;

    /**
     * The maximum back-reference length (264).
     */
    private static final int MAX_REF = (1 << 8) + (1 << 3);

    /**
     * Hash table for matching byte sequences (reused for performance).
     */
    private int[] cachedHashTable;

    /**
     * Return byte with lower 2 bytes being byte at index, then index+1.
     */
    private static int first(byte[] in, int inPos) {
        return (in[inPos] << 8) | (in[inPos + 1] & 255);
    }

    /**
     * Shift v 1 byte left, add value at index inPos+2.
     */
    private static int next(int v, byte[] in, int inPos) {
        return (v << 8) | (in[inPos + 2] & 255);
    }

    /**
     * Compute the address in the hash table.
     */
    private static int hash(int h) {
        return ((h * 2777) >> 9) & (HASH_SIZE - 1);
    }

    public int compress(byte[] in, int inLen, byte[] out, int outPos) {
        int inPos = 0;
        if (cachedHashTable == null) {
            cachedHashTable = new int[HASH_SIZE];
        }
        int[] hashTab = cachedHashTable;
        int literals = 0;
        outPos++;
        int future = first(in, 0);
        while (inPos < inLen - 4) {
            byte p2 = in[inPos + 2];
            // next
            future = (future << 8) + (p2 & 255);
            int off = hash(future);
            int ref = hashTab[off];
            hashTab[off] = inPos;
            if (ref < inPos
                        && ref > 0
                        && (off = inPos - ref - 1) < MAX_OFF
                        && in[ref + 2] == p2
                        && in[ref + 1] == (byte) (future >> 8)
                        && in[ref] == (byte) (future >> 16)) {
                // match
                int maxLen = inLen - inPos - 2;
                if (maxLen > MAX_REF) {
                    maxLen = MAX_REF;
                }
                if (literals == 0) {
                    // multiple back-references,
                    // so there is no literal run control byte
                    outPos--;
                } else {
                    // set the control byte at the start of the literal run
                    // to store the number of literals
                    out[outPos - literals - 1] = (byte) (literals - 1);
                    literals = 0;
                }
                int len = 3;
                while (len < maxLen && in[ref + len] == in[inPos + len]) {
                    len++;
                }
                len -= 2;
                if (len < 7) {
                    out[outPos++] = (byte) ((off >> 8) + (len << 5));
                } else {
                    out[outPos++] = (byte) ((off >> 8) + (7 << 5));
                    out[outPos++] = (byte) (len - 7);
                }
                out[outPos++] = (byte) off;
                // move one byte forward to allow for a literal run control byte
                outPos++;
                inPos += len;
                // Rebuild the future, and store the last bytes to the hashtable.
                // Storing hashes of the last bytes in back-reference improves
                // the compression ratio and only reduces speed slightly.
                future = first(in, inPos);
                future = next(future, in, inPos);
                hashTab[hash(future)] = inPos++;
                future = next(future, in, inPos);
                hashTab[hash(future)] = inPos++;
            } else {
                // copy one byte from input to output as part of literal
                out[outPos++] = in[inPos++];
                literals++;
                // at the end of this literal chunk, write the length
                // to the control byte and start a new chunk
                if (literals == MAX_LITERAL) {
                    out[outPos - literals - 1] = (byte) (literals - 1);
                    literals = 0;
                    // move ahead one byte to allow for the
                    // literal run control byte
                    outPos++;
                }
            }
        }
        // write the remaining few bytes as literals
        while (inPos < inLen) {
            out[outPos++] = in[inPos++];
            literals++;
            if (literals == MAX_LITERAL) {
                out[outPos - literals - 1] = (byte) (literals - 1);
                literals = 0;
                outPos++;
            }
        }
        // writes the final literal run length to the control byte
        out[outPos - literals - 1] = (byte) (literals - 1);
        if (literals == 0) {
            outPos--;
        }
        return outPos;
    }

    public void expand(byte[] in, int inPos, int inLen, byte[] out, int outPos, int outLen) {
        if (inPos < 0 || outPos < 0 || outLen < 0) {
            throw new IllegalArgumentException();
        }
        do {
            int ctrl = in[inPos++] & 255;
            if (ctrl < MAX_LITERAL) {
                // literal run of length = ctrl + 1,
                ctrl++;
                // copy to output and move forward this many bytes
                System.arraycopy(in, inPos, out, outPos, ctrl);
                outPos += ctrl;
                inPos += ctrl;
            } else {
                // back reference
                // the highest 3 bits are the match length
                int len = ctrl >> 5;
                // if the length is maxed, add the next byte to the length
                if (len == 7) {
                    len += in[inPos++] & 255;
                }
                // minimum back-reference is 3 bytes,
                // so 2 was subtracted before storing size
                len += 2;

                // ctrl is now the offset for a back-reference...
                // the logical AND operation removes the length bits
                ctrl = -((ctrl & 0x1f) << 8) - 1;

                // the next byte augments/increases the offset
                ctrl -= in[inPos++] & 255;

                // copy the back-reference bytes from the given
                // location in output to current position
                ctrl += outPos;
                if (outPos + len >= out.length) {
                    // reduce array bounds checking
                    throw new ArrayIndexOutOfBoundsException();
                }
                for (int i = 0; i < len; i++) {
                    out[outPos++] = out[ctrl++];
                }
            }
        } while (outPos < outLen);
    }

}

/**
 * Each data compression algorithm must implement this interface.
 */
 interface Compressor {

  /**
     * Compress a number of bytes.
     *
     * @param in the input data
     * @param inLen the number of bytes to compress
     * @param out the output area
     * @param outPos the offset at the output array
     * @return the end position
     */
    int compress(byte[] in, int inLen, byte[] out, int outPos);

    /**
     * Expand a number of compressed bytes.
     *
     * @param in the compressed data
     * @param inPos the offset at the input array
     * @param inLen the number of bytes to read
     * @param out the output area
     * @param outPos the offset at the output array
     * @param outLen the size of the uncompressed data
     */
    void expand(byte[] in, int inPos, int inLen, byte[] out, int outPos, int outLen);


}
Related examples in the same category

1.	Creating an input or output stream on a ByteBuffer
2.	Creating a Manifest for a JAR File
3.	Convert InputStream to String
4.	An limited-data-size input stream
5.	An input stream which reads sequentially from multiple sources
6.	Combined InputStream
7.	Size Limit InputStream
8.	Minimal InputStream subclass to fetch bytes form a String
9.	Read and return the entire contents of the supplied InputStream. This method always closes the stream when finished reading.
10.	Compare two InputStream
11.	Read and return the entire contents of the supplied InputStream.
12.	Deserializes an object from an input stream.
13.	Reads at most certain bytes from input stream and returns them as a byte array.
14.	Convert Reader to InputStream
15.	A trace of the data that is being retrieved from an input stream
16.	EOLConvertingInputStream: InputStream which converts \r bytes not followed by \n and \n not preceded by \r to \r\n.
17.	A line reading wrapper that works with byte streams.
18.	Smart Encoding InputStream
19.	Wraps a stream that has been exported to import the data in readable form
20.	This input stream wrapper closes the base input stream when fully read.
21.	Random input stream
LZFInputStream and LZFOutputStream : InputStream « File Input Output « Java

Related examples in the same category