com.hadoop.compression.lzo.LzopOutputStream.java Source code

Introduction

Here is the source code for com.hadoop.compression.lzo.LzopOutputStream.java
Source

/*
 * This file is part of Hadoop-Gpl-Compression.
 *
 * Hadoop-Gpl-Compression is free software: you can redistribute it
 * and/or modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * Hadoop-Gpl-Compression is distributed in the hope that it will be
 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Hadoop-Gpl-Compression.  If not, see
 * <http://www.gnu.org/licenses/>.
 */

package com.hadoop.compression.lzo;

import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.Adler32;

import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.compress.CompressorStream;
import org.apache.hadoop.io.compress.Compressor;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class LzopOutputStream extends CompressorStream {
    private static final Log LOG = LogFactory.getLog(LzopOutputStream.class);

    final int MAX_INPUT_SIZE;

    /**
     * Write an lzop-compatible header to the OutputStream provided.
     */
    protected static void writeLzopHeader(OutputStream out, LzoCompressor.CompressionStrategy strategy)
            throws IOException {
        DataOutputBuffer dob = new DataOutputBuffer();
        try {
            dob.writeShort(LzopCodec.LZOP_VERSION);
            dob.writeShort(LzoCompressor.LZO_LIBRARY_VERSION);
            dob.writeShort(LzopCodec.LZOP_COMPAT_VERSION);
            switch (strategy) {
            case LZO1X_1:
                dob.writeByte(1);
                dob.writeByte(5);
                break;
            case LZO1X_15:
                dob.writeByte(2);
                dob.writeByte(1);
                break;
            case LZO1X_999:
                dob.writeByte(3);
                dob.writeByte(9);
                break;
            default:
                throw new IOException("Incompatible lzop strategy: " + strategy);
            }
            dob.writeInt(0); // all flags 0
            dob.writeInt(0x81A4); // mode
            dob.writeInt((int) (System.currentTimeMillis() / 1000)); // mtime
            dob.writeInt(0); // gmtdiff ignored
            dob.writeByte(0); // no filename
            Adler32 headerChecksum = new Adler32();
            headerChecksum.update(dob.getData(), 0, dob.getLength());
            int hc = (int) headerChecksum.getValue();
            dob.writeInt(hc);
            out.write(LzopCodec.LZO_MAGIC);
            out.write(dob.getData(), 0, dob.getLength());
        } finally {
            dob.close();
        }
    }

    public LzopOutputStream(OutputStream out, Compressor compressor, int bufferSize,
            LzoCompressor.CompressionStrategy strategy) throws IOException {
        super(out, compressor, bufferSize);

        int overhead = strategy.name().contains("LZO1") ? (bufferSize >> 4) + 64 + 3 : (bufferSize >> 3) + 128 + 3;
        MAX_INPUT_SIZE = bufferSize - overhead;

        writeLzopHeader(out, strategy);
    }

    /**
     * Close the underlying stream and write a null word to the output stream.
     */
    @Override
    public void close() throws IOException {
        if (!closed) {
            finish();
            out.write(new byte[] { 0, 0, 0, 0 });
            out.close();
            closed = true;
        }
    }

    @Override
    public void write(byte[] b, int off, int len) throws IOException {
        // TODO: LzopOutputStream used to inherit from BlockCompressorStream
        // but had a bug due to this inheritance chain. In order to fix the
        // bug we pulled down the implementation of the superclass, which
        // is overly general. Thus this function is not quite as succint
        // as it could be, now that it's LZOP-specific.
        // See: http://github.com/toddlipcon/hadoop-lzo/commit/5fe6dd4736a73fa33b86656ce8aeb011e7f2046c

        // Sanity checks
        if (compressor.finished()) {
            throw new IOException("write beyond end of stream");
        }
        if (b == null) {
            throw new NullPointerException();
        } else if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) > b.length)) {
            throw new IndexOutOfBoundsException();
        } else if (len == 0) {
            return;
        }

        long limlen = compressor.getBytesRead();
        if (len + limlen > MAX_INPUT_SIZE && limlen > 0) {
            // Adding this segment would exceed the maximum size.
            // Flush data if we have it.
            finish();
            compressor.reset();
        }

        if (len > MAX_INPUT_SIZE) {
            // The data we're given exceeds the maximum size. Any data
            // we had have been flushed, so we write out this chunk in segments
            // not exceeding the maximum size until it is exhausted.
            do {
                int bufLen = Math.min(len, MAX_INPUT_SIZE);

                compressor.setInput(b, off, bufLen);
                finish();
                compressor.reset();
                off += bufLen;
                len -= bufLen;
            } while (len > 0);
            return;
        }

        // Give data to the compressor
        compressor.setInput(b, off, len);
        if (!compressor.needsInput()) {
            // compressor buffer size might be smaller than the maximum
            // size, so we permit it to flush if required.
            do {
                compress();
            } while (!compressor.needsInput());
        }
    }

    @Override
    public void finish() throws IOException {
        if (!compressor.finished()) {
            compressor.finish();
            while (!compressor.finished()) {
                compress();
            }
        }
    }

    @Override
    protected void compress() throws IOException {
        int len = compressor.compress(buffer, 0, buffer.length);
        if (len > 0) {
            rawWriteInt((int) compressor.getBytesRead());

            // If the compressed buffer is actually larger than the uncompressed buffer,
            // the LZO specification says that we should write the uncompressed bytes rather
            // than the compressed bytes.  The decompressor understands this because both sizes
            // get written to the stream.
            if (compressor.getBytesRead() < compressor.getBytesWritten()) {
                // Compression actually increased the size of the buffer, so write the uncompressed bytes.
                byte[] uncompressed = ((LzoCompressor) compressor).uncompressedBytes();
                rawWriteInt(uncompressed.length);
                out.write(uncompressed, 0, uncompressed.length);
            } else {
                // Write out the compressed chunk.
                rawWriteInt(len);
                out.write(buffer, 0, len);
            }
        }
    }

    private void rawWriteInt(int v) throws IOException {
        out.write((v >>> 24) & 0xFF);
        out.write((v >>> 16) & 0xFF);
        out.write((v >>> 8) & 0xFF);
        out.write((v >>> 0) & 0xFF);
    }
}