LZFInputStream and LZFOutputStream
/*
* Copyright 2004-2010 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Copyright (c) 2000-2005 Marc Alexander Lehmann <schmorp@schmorp.de>
* Copyright (c) 2005 Oren J. Maurice <oymaurice@hazorea.org.il>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//package com.hyk.proxy.common.rpc.extension.compress.lzf;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* An input stream to read from an LZF stream.
* The data is automatically expanded.
*/
public class LZFInputStream extends InputStream {
private final InputStream in;
private CompressLZF decompress = new CompressLZF();
private int pos;
private int bufferLength;
private byte[] inBuffer;
private byte[] buffer;
/**
* Copied from org.h2.util.Utils --- Start
*
* Create an array of bytes with the given size. If this is not possible
* because not enough memory is available, an OutOfMemoryError with the
* requested size in the message is thrown.
*
* @param len the number of bytes requested
* @return the byte array
* @throws OutOfMemoryError
*/
public static final byte[] EMPTY_BYTES = {};
public static byte[] newBytes(int len) {
try {
if (len == 0) {
return EMPTY_BYTES;
}
return new byte[len];
} catch (OutOfMemoryError e) {
Error e2 = new OutOfMemoryError("Requested memory: " + len);
e2.initCause(e);
throw e2;
}
}
// org.h2.util.Utils.Java --- END
public LZFInputStream(InputStream in) throws IOException {
this.in = in;
if (readInt() != LZFOutputStream.MAGIC) {
throw new IOException("Not an LZFInputStream");
}
}
private byte[] ensureSize(byte[] buff, int len) {
return buff == null || buff.length < len ? newBytes(len) : buff;
}
private void fillBuffer() throws IOException {
if (buffer != null && pos < bufferLength) {
return;
}
int len = readInt();
if (decompress == null) {
// EOF
this.bufferLength = 0;
} else if (len < 0) {
len = -len;
buffer = ensureSize(buffer, len);
readFully(buffer, len);
this.bufferLength = len;
} else {
inBuffer = ensureSize(inBuffer, len);
int size = readInt();
readFully(inBuffer, len);
buffer = ensureSize(buffer, size);
decompress.expand(inBuffer, 0, len, buffer, 0, size);
this.bufferLength = size;
}
pos = 0;
}
private void readFully(byte[] buff, int len) throws IOException {
int off = 0;
while (len > 0) {
int l = in.read(buff, off, len);
len -= l;
off += l;
}
}
private int readInt() throws IOException {
int x = in.read();
if (x < 0) {
decompress = null;
return 0;
}
x = (x << 24) + (in.read() << 16) + (in.read() << 8) + in.read();
return x;
}
public int read() throws IOException {
fillBuffer();
if (pos >= bufferLength) {
return -1;
}
return buffer[pos++] & 255;
}
public int read(byte[] b) throws IOException {
return read(b, 0, b.length);
}
public int read(byte[] b, int off, int len) throws IOException {
if (len == 0) {
return 0;
}
int read = 0;
while (len > 0) {
int r = readBlock(b, off, len);
if (r < 0) {
break;
}
read += r;
off += r;
len -= r;
}
return read == 0 ? -1 : read;
}
private int readBlock(byte[] b, int off, int len) throws IOException {
fillBuffer();
if (pos >= bufferLength) {
return -1;
}
int max = Math.min(len, bufferLength - pos);
max = Math.min(max, b.length - off);
System.arraycopy(buffer, pos, b, off, max);
pos += max;
return max;
}
public void close() throws IOException {
in.close();
}
}
/**
* An output stream to write an LZF stream.
* The data is automatically compressed.
*/
class LZFOutputStream extends OutputStream {
public static final int IO_BUFFER_SIZE_COMPRESS = 128 * 1024;
/**
* The file header of a LZF file.
*/
static final int MAGIC = ('H' << 24) | ('2' << 16) | ('I' << 8) | 'S';
private final OutputStream out;
private final CompressLZF compress = new CompressLZF();
private final byte[] buffer;
private int pos;
private byte[] outBuffer;
public LZFOutputStream(OutputStream out) throws IOException {
this.out = out;
int len = IO_BUFFER_SIZE_COMPRESS;
buffer = new byte[len];
ensureOutput(len);
writeInt(MAGIC);
}
private void ensureOutput(int len) {
// TODO calculate the maximum overhead (worst case) for the output
// buffer
int outputLen = (len < 100 ? len + 100 : len) * 2;
if (outBuffer == null || outBuffer.length < outputLen) {
outBuffer = new byte[outputLen];
}
}
public void write(int b) throws IOException {
if (pos >= buffer.length) {
flush();
}
buffer[pos++] = (byte) b;
}
private void compressAndWrite(byte[] buff, int len) throws IOException {
if (len > 0) {
ensureOutput(len);
int compressed = compress.compress(buff, len, outBuffer, 0);
if (compressed > len) {
writeInt(-len);
out.write(buff, 0, len);
} else {
writeInt(compressed);
writeInt(len);
out.write(outBuffer, 0, compressed);
}
}
}
private void writeInt(int x) throws IOException {
out.write((byte) (x >> 24));
out.write((byte) (x >> 16));
out.write((byte) (x >> 8));
out.write((byte) x);
}
public void write(byte[] buff, int off, int len) throws IOException {
while (len > 0) {
int copy = Math.min(buffer.length - pos, len);
System.arraycopy(buff, off, buffer, pos, copy);
pos += copy;
if (pos >= buffer.length) {
flush();
}
off += copy;
len -= copy;
}
}
public void flush() throws IOException {
compressAndWrite(buffer, pos);
pos = 0;
}
public void close() throws IOException {
flush();
out.close();
}
}
/**
* <p>
* This class implements the LZF lossless data compression algorithm. LZF is a
* Lempel-Ziv variant with byte-aligned output, and optimized for speed.
* </p>
* <p>
* Safety/Use Notes:
* </p>
* <ul>
* <li>Each instance should be used by a single thread only.</li>
* <li>The data buffers should be smaller than 1 GB.</li>
* <li>For performance reasons, safety checks on expansion are omitted.</li>
* <li>Invalid compressed data can cause an ArrayIndexOutOfBoundsException.</li>
* </ul>
* <p>
* The LZF compressed format knows literal runs and back-references:
* </p>
* <ul>
* <li>Literal run: directly copy bytes from input to output.</li>
* <li>Back-reference: copy previous data to output stream, with specified
* offset from location and length. The length is at least 3 bytes.</li>
* </ul>
*<p>
* The first byte of the compressed stream is the control byte. For literal
* runs, the highest three bits of the control byte are not set, the the lower
* bits are the literal run length, and the next bytes are data to copy directly
* into the output. For back-references, the highest three bits of the control
* byte are the back-reference length. If all three bits are set, then the
* back-reference length is stored in the next byte. The lower bits of the
* control byte combined with the next byte form the offset for the
* back-reference.
* </p>
*/
final class CompressLZF implements Compressor {
/**
* The number of entries in the hash table. The size is a trade-off between
* hash collisions (reduced compression) and speed (amount that fits in CPU
* cache).
*/
private static final int HASH_SIZE = 1 << 14;
/**
* The maximum number of literals in a chunk (32).
*/
private static final int MAX_LITERAL = 1 << 5;
/**
* The maximum offset allowed for a back-reference (8192).
*/
private static final int MAX_OFF = 1 << 13;
/**
* The maximum back-reference length (264).
*/
private static final int MAX_REF = (1 << 8) + (1 << 3);
/**
* Hash table for matching byte sequences (reused for performance).
*/
private int[] cachedHashTable;
/**
* Return byte with lower 2 bytes being byte at index, then index+1.
*/
private static int first(byte[] in, int inPos) {
return (in[inPos] << 8) | (in[inPos + 1] & 255);
}
/**
* Shift v 1 byte left, add value at index inPos+2.
*/
private static int next(int v, byte[] in, int inPos) {
return (v << 8) | (in[inPos + 2] & 255);
}
/**
* Compute the address in the hash table.
*/
private static int hash(int h) {
return ((h * 2777) >> 9) & (HASH_SIZE - 1);
}
public int compress(byte[] in, int inLen, byte[] out, int outPos) {
int inPos = 0;
if (cachedHashTable == null) {
cachedHashTable = new int[HASH_SIZE];
}
int[] hashTab = cachedHashTable;
int literals = 0;
outPos++;
int future = first(in, 0);
while (inPos < inLen - 4) {
byte p2 = in[inPos + 2];
// next
future = (future << 8) + (p2 & 255);
int off = hash(future);
int ref = hashTab[off];
hashTab[off] = inPos;
if (ref < inPos
&& ref > 0
&& (off = inPos - ref - 1) < MAX_OFF
&& in[ref + 2] == p2
&& in[ref + 1] == (byte) (future >> 8)
&& in[ref] == (byte) (future >> 16)) {
// match
int maxLen = inLen - inPos - 2;
if (maxLen > MAX_REF) {
maxLen = MAX_REF;
}
if (literals == 0) {
// multiple back-references,
// so there is no literal run control byte
outPos--;
} else {
// set the control byte at the start of the literal run
// to store the number of literals
out[outPos - literals - 1] = (byte) (literals - 1);
literals = 0;
}
int len = 3;
while (len < maxLen && in[ref + len] == in[inPos + len]) {
len++;
}
len -= 2;
if (len < 7) {
out[outPos++] = (byte) ((off >> 8) + (len << 5));
} else {
out[outPos++] = (byte) ((off >> 8) + (7 << 5));
out[outPos++] = (byte) (len - 7);
}
out[outPos++] = (byte) off;
// move one byte forward to allow for a literal run control byte
outPos++;
inPos += len;
// Rebuild the future, and store the last bytes to the hashtable.
// Storing hashes of the last bytes in back-reference improves
// the compression ratio and only reduces speed slightly.
future = first(in, inPos);
future = next(future, in, inPos);
hashTab[hash(future)] = inPos++;
future = next(future, in, inPos);
hashTab[hash(future)] = inPos++;
} else {
// copy one byte from input to output as part of literal
out[outPos++] = in[inPos++];
literals++;
// at the end of this literal chunk, write the length
// to the control byte and start a new chunk
if (literals == MAX_LITERAL) {
out[outPos - literals - 1] = (byte) (literals - 1);
literals = 0;
// move ahead one byte to allow for the
// literal run control byte
outPos++;
}
}
}
// write the remaining few bytes as literals
while (inPos < inLen) {
out[outPos++] = in[inPos++];
literals++;
if (literals == MAX_LITERAL) {
out[outPos - literals - 1] = (byte) (literals - 1);
literals = 0;
outPos++;
}
}
// writes the final literal run length to the control byte
out[outPos - literals - 1] = (byte) (literals - 1);
if (literals == 0) {
outPos--;
}
return outPos;
}
public void expand(byte[] in, int inPos, int inLen, byte[] out, int outPos, int outLen) {
if (inPos < 0 || outPos < 0 || outLen < 0) {
throw new IllegalArgumentException();
}
do {
int ctrl = in[inPos++] & 255;
if (ctrl < MAX_LITERAL) {
// literal run of length = ctrl + 1,
ctrl++;
// copy to output and move forward this many bytes
System.arraycopy(in, inPos, out, outPos, ctrl);
outPos += ctrl;
inPos += ctrl;
} else {
// back reference
// the highest 3 bits are the match length
int len = ctrl >> 5;
// if the length is maxed, add the next byte to the length
if (len == 7) {
len += in[inPos++] & 255;
}
// minimum back-reference is 3 bytes,
// so 2 was subtracted before storing size
len += 2;
// ctrl is now the offset for a back-reference...
// the logical AND operation removes the length bits
ctrl = -((ctrl & 0x1f) << 8) - 1;
// the next byte augments/increases the offset
ctrl -= in[inPos++] & 255;
// copy the back-reference bytes from the given
// location in output to current position
ctrl += outPos;
if (outPos + len >= out.length) {
// reduce array bounds checking
throw new ArrayIndexOutOfBoundsException();
}
for (int i = 0; i < len; i++) {
out[outPos++] = out[ctrl++];
}
}
} while (outPos < outLen);
}
}
/**
* Each data compression algorithm must implement this interface.
*/
interface Compressor {
/**
* Compress a number of bytes.
*
* @param in the input data
* @param inLen the number of bytes to compress
* @param out the output area
* @param outPos the offset at the output array
* @return the end position
*/
int compress(byte[] in, int inLen, byte[] out, int outPos);
/**
* Expand a number of compressed bytes.
*
* @param in the compressed data
* @param inPos the offset at the input array
* @param inLen the number of bytes to read
* @param out the output area
* @param outPos the offset at the output array
* @param outLen the size of the uncompressed data
*/
void expand(byte[] in, int inPos, int inLen, byte[] out, int outPos, int outLen);
}
Related examples in the same category