Java tutorial
/** 4MC Copyright (c) 2014, Carlo Medas BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact 4MC author at : - 4MC source repository : https://github.com/carlomedas/4mc LZ4 - Copyright (C) 2011-2014, Yann Collet - BSD 2-Clause License. You can contact LZ4 lib author at : - LZ4 source repository : http://code.google.com/p/lz4/ **/ package com.hadoop.compression.fourmc; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.compress.Compressor; import org.apache.hadoop.io.compress.CompressorStream; import java.io.FilterOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.List; /** * Writes with 4mc files, compressed block format leveraging LZ4 compression power. */ public class FourMcOutputStream extends CompressorStream { private static final Log LOG = LogFactory.getLog(FourMcOutputStream.class); private List<Long> blockOffsets; private CountingOutputStream cout; static { if (FourMcNativeCodeLoader.isNativeCodeLoaded()) { boolean nativeLoaded = Lz4Compressor.isNativeLoaded(); if (!nativeLoaded) { LOG.error("Failed to load/initialize native-4mc library"); } } else { LOG.error("Cannot load native-4mc without native-hadoop"); } } protected static void write4mcHeader(OutputStream out) throws IOException { DataOutputBuffer dob = new DataOutputBuffer(); try { dob.writeInt(FourMcCodec.FOURMC_MAGIC); dob.writeInt(FourMcCodec.FOURMC_VERSION); int checksum = Lz4Compressor.xxhash32(dob.getData(), 0, 8, 0); dob.writeInt(checksum); out.write(dob.getData(), 0, dob.getLength()); } finally { dob.close(); } } public FourMcOutputStream(OutputStream out, Compressor compressor, int bufferSize) throws IOException { super(new CountingOutputStream(out), compressor, bufferSize); this.cout = (CountingOutputStream) this.out; this.blockOffsets = new ArrayList<Long>(32); try { write4mcHeader(this.out); } catch (IOException e) { // force release compressor and related direct buffers ((Lz4Compressor) this.compressor).releaseDirectBuffers(); this.compressor = null; throw e; } } /** * Before closing the stream, 4mc footer must be written. */ @Override public void close() throws IOException { if (closed) return; finish(); // write last block marker rawWriteInt(0); rawWriteInt(0); rawWriteInt(0); // time to write footer with block index int footerSize = 20 + blockOffsets.size() * 4; DataOutputBuffer dob = new DataOutputBuffer(); dob.writeInt(footerSize); dob.writeInt(FourMcCodec.FOURMC_VERSION); // write block deltas for (int i = 0; i < blockOffsets.size(); ++i) { long blockDelta = i == 0 ? (blockOffsets.get(i)) : (blockOffsets.get(i) - blockOffsets.get(i - 1)); dob.writeInt((int) blockDelta); } // tail of footer and checksum dob.writeInt(footerSize); dob.writeInt(FourMcCodec.FOURMC_MAGIC); int checksum = Lz4Compressor.xxhash32(dob.getData(), 0, dob.getLength(), 0); dob.writeInt(checksum); out.write(dob.getData(), 0, dob.getLength()); out.close(); closed = true; // force release compressor and related direct buffers ((Lz4Compressor) compressor).releaseDirectBuffers(); compressor = null; } @Override public void write(byte[] b, int off, int len) throws IOException { // exactly like the case of LzopOutputStream this is a bit complex // to be able to handle custom needs of block compression and related block indexes // Sanity checks if (compressor.finished()) { throw new IOException("write beyond end of stream"); } if (b == null) { throw new NullPointerException(); } else if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length)) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return; } long limlen = compressor.getBytesRead(); if (len + limlen > FourMcCodec.FOURMC_MAX_BLOCK_SIZE && limlen > 0) { finish(); compressor.reset(); } if (len > FourMcCodec.FOURMC_MAX_BLOCK_SIZE) { do { int bufLen = Math.min(len, FourMcCodec.FOURMC_MAX_BLOCK_SIZE); compressor.setInput(b, off, bufLen); finish(); compressor.reset(); off += bufLen; len -= bufLen; } while (len > 0); return; } // Give data to the compressor compressor.setInput(b, off, len); if (!compressor.needsInput()) { do { compress(); } while (!compressor.needsInput()); } } @Override public void finish() throws IOException { if (!compressor.finished()) { compressor.finish(); while (!compressor.finished()) { compress(); } } } @Override protected void compress() throws IOException { int len = compressor.compress(buffer, 0, buffer.length); if (len > 0) { // new block. take current position to for block index blockOffsets.add(cout.bytesWritten); rawWriteInt((int) compressor.getBytesRead()); if (compressor.getBytesRead() <= compressor.getBytesWritten()) { // write uncompressed data block byte[] uncompressed = ((Lz4Compressor) compressor).uncompressedBytes(); rawWriteInt(uncompressed.length); int checksum = Lz4Compressor.xxhash32(uncompressed, 0, uncompressed.length, 0); rawWriteInt(checksum); out.write(uncompressed, 0, uncompressed.length); // fix by Xianjin YE (advancedxy) to https://github.com/carlomedas/4mc/issues/12 compressor.reset(); // reset compressor buffers compressor.finish(); // set compressor to be finished. } else { // write compressed data block rawWriteInt(len); int checksum = Lz4Compressor.xxhash32(buffer, 0, len, 0); rawWriteInt(checksum); out.write(buffer, 0, len); } } } private void rawWriteInt(int v) throws IOException { out.write((v >>> 24) & 0xFF); out.write((v >>> 16) & 0xFF); out.write((v >>> 8) & 0xFF); out.write((v) & 0xFF); } /* keeps count of number of bytes written. */ private static class CountingOutputStream extends FilterOutputStream { public CountingOutputStream(OutputStream out) { super(out); } long bytesWritten = 0; public void write(byte[] b, int off, int len) throws IOException { out.write(b, off, len); bytesWritten += len; } public void write(int b) throws IOException { out.write(b); bytesWritten++; } } }