org.apache.hadoop.io.compress.bzip2.Bzip2Compressor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.io.compress.bzip2.Bzip2Compressor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.io.compress.bzip2;

import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.Compressor;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * A {@link Compressor} based on the popular 
 * bzip2 compression algorithm.
 * http://www.bzip2.org/
 * 
 */
public class Bzip2Compressor implements Compressor {
    private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024;

    // The default values for the block size and work factor are the same 
    // those in Julian Seward's original bzip2 implementation.
    static final int DEFAULT_BLOCK_SIZE = 9;
    static final int DEFAULT_WORK_FACTOR = 30;

    private static final Log LOG = LogFactory.getLog(Bzip2Compressor.class);

    private long stream;
    private int blockSize;
    private int workFactor;
    private int directBufferSize;
    private byte[] userBuf = null;
    private int userBufOff = 0, userBufLen = 0;
    private Buffer uncompressedDirectBuf = null;
    private int uncompressedDirectBufOff = 0, uncompressedDirectBufLen = 0;
    private boolean keepUncompressedBuf = false;
    private Buffer compressedDirectBuf = null;
    private boolean finish, finished;

    /**
     * Creates a new compressor with a default values for the
     * compression block size and work factor.  Compressed data will be
     * generated in bzip2 format.
     */
    public Bzip2Compressor() {
        this(DEFAULT_BLOCK_SIZE, DEFAULT_WORK_FACTOR, DEFAULT_DIRECT_BUFFER_SIZE);
    }

    /**
     * Creates a new compressor, taking settings from the configuration.
     */
    public Bzip2Compressor(Configuration conf) {
        this(Bzip2Factory.getBlockSize(conf), Bzip2Factory.getWorkFactor(conf), DEFAULT_DIRECT_BUFFER_SIZE);
    }

    /** 
     * Creates a new compressor using the specified block size.
     * Compressed data will be generated in bzip2 format.
     * 
     * @param blockSize The block size to be used for compression.  This is
     *        an integer from 1 through 9, which is multiplied by 100,000 to 
     *        obtain the actual block size in bytes.
     * @param workFactor This parameter is a threshold that determines when a 
     *        fallback algorithm is used for pathological data.  It ranges from
     *        0 to 250.
     * @param directBufferSize Size of the direct buffer to be used.
     */
    public Bzip2Compressor(int blockSize, int workFactor, int directBufferSize) {
        this.blockSize = blockSize;
        this.workFactor = workFactor;
        this.directBufferSize = directBufferSize;
        stream = init(blockSize, workFactor);
        uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
        compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
        compressedDirectBuf.position(directBufferSize);
    }

    /**
     * Prepare the compressor to be used in a new stream with settings defined in
     * the given Configuration. It will reset the compressor's block size and
     * and work factor.
     * 
     * @param conf Configuration storing new settings
     */
    @Override
    public synchronized void reinit(Configuration conf) {
        reset();
        end(stream);
        if (conf == null) {
            stream = init(blockSize, workFactor);
            return;
        }
        blockSize = Bzip2Factory.getBlockSize(conf);
        workFactor = Bzip2Factory.getWorkFactor(conf);
        stream = init(blockSize, workFactor);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Reinit compressor with new compression configuration");
        }
    }

    @Override
    public synchronized void setInput(byte[] b, int off, int len) {
        if (b == null) {
            throw new NullPointerException();
        }
        if (off < 0 || len < 0 || off > b.length - len) {
            throw new ArrayIndexOutOfBoundsException();
        }

        this.userBuf = b;
        this.userBufOff = off;
        this.userBufLen = len;
        uncompressedDirectBufOff = 0;
        setInputFromSavedData();

        // Reinitialize bzip2's output direct buffer.
        compressedDirectBuf.limit(directBufferSize);
        compressedDirectBuf.position(directBufferSize);
    }

    // Copy enough data from userBuf to uncompressedDirectBuf.
    synchronized void setInputFromSavedData() {
        int len = Math.min(userBufLen, uncompressedDirectBuf.remaining());
        ((ByteBuffer) uncompressedDirectBuf).put(userBuf, userBufOff, len);
        userBufLen -= len;
        userBufOff += len;
        uncompressedDirectBufLen = uncompressedDirectBuf.position();
    }

    @Override
    public synchronized void setDictionary(byte[] b, int off, int len) {
        throw new UnsupportedOperationException();
    }

    @Override
    public synchronized boolean needsInput() {
        // Compressed data still available?
        if (compressedDirectBuf.remaining() > 0) {
            return false;
        }

        // Uncompressed data available in either the direct buffer or user buffer?
        if (keepUncompressedBuf && uncompressedDirectBufLen > 0)
            return false;

        if (uncompressedDirectBuf.remaining() > 0) {
            // Check if we have consumed all data in the user buffer.
            if (userBufLen <= 0) {
                return true;
            } else {
                // Copy enough data from userBuf to uncompressedDirectBuf.
                setInputFromSavedData();
                return uncompressedDirectBuf.remaining() > 0;
            }
        }

        return false;
    }

    @Override
    public synchronized void finish() {
        finish = true;
    }

    @Override
    public synchronized boolean finished() {
        // Check if bzip2 says it has finished and
        // all compressed data has been consumed.
        return (finished && compressedDirectBuf.remaining() == 0);
    }

    @Override
    public synchronized int compress(byte[] b, int off, int len) throws IOException {
        if (b == null) {
            throw new NullPointerException();
        }
        if (off < 0 || len < 0 || off > b.length - len) {
            throw new ArrayIndexOutOfBoundsException();
        }

        // Check if there is compressed data.
        int n = compressedDirectBuf.remaining();
        if (n > 0) {
            n = Math.min(n, len);
            ((ByteBuffer) compressedDirectBuf).get(b, off, n);
            return n;
        }

        // Re-initialize bzip2's output direct buffer.
        compressedDirectBuf.rewind();
        compressedDirectBuf.limit(directBufferSize);

        // Compress the data.
        n = deflateBytesDirect();
        compressedDirectBuf.limit(n);

        // Check if bzip2 has consumed the entire input buffer.
        // Set keepUncompressedBuf properly.
        if (uncompressedDirectBufLen <= 0) { // bzip2 consumed all input
            keepUncompressedBuf = false;
            uncompressedDirectBuf.clear();
            uncompressedDirectBufOff = 0;
            uncompressedDirectBufLen = 0;
        } else {
            keepUncompressedBuf = true;
        }

        // Get at most 'len' bytes.
        n = Math.min(n, len);
        ((ByteBuffer) compressedDirectBuf).get(b, off, n);

        return n;
    }

    /**
     * Returns the total number of compressed bytes output so far.
     *
     * @return the total (non-negative) number of compressed bytes output so far
     */
    @Override
    public synchronized long getBytesWritten() {
        checkStream();
        return getBytesWritten(stream);
    }

    /**
     * Returns the total number of uncompressed bytes input so far.</p>
     *
     * @return the total (non-negative) number of uncompressed bytes input so far
     */
    @Override
    public synchronized long getBytesRead() {
        checkStream();
        return getBytesRead(stream);
    }

    @Override
    public synchronized void reset() {
        checkStream();
        end(stream);
        stream = init(blockSize, workFactor);
        finish = false;
        finished = false;
        uncompressedDirectBuf.rewind();
        uncompressedDirectBufOff = uncompressedDirectBufLen = 0;
        keepUncompressedBuf = false;
        compressedDirectBuf.limit(directBufferSize);
        compressedDirectBuf.position(directBufferSize);
        userBufOff = userBufLen = 0;
    }

    @Override
    public synchronized void end() {
        if (stream != 0) {
            end(stream);
            stream = 0;
        }
    }

    static void initSymbols(String libname) {
        initIDs(libname);
    }

    private void checkStream() {
        if (stream == 0)
            throw new NullPointerException();
    }

    private native static void initIDs(String libname);

    private native static long init(int blockSize, int workFactor);

    private native int deflateBytesDirect();

    private native static long getBytesRead(long strm);

    private native static long getBytesWritten(long strm);

    private native static void end(long strm);

    public native static String getLibraryName();
}