org.apache.hadoop.io.compress.snappy.SnappyCompressor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.io.compress.snappy.SnappyCompressor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.io.compress.snappy;

import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.Compressor;

/**
 * A {@link Compressor} based on the snappy compression algorithm.
 * http://code.google.com/p/snappy/
 */
public class SnappyCompressor implements Compressor {
    private static final Log LOG = LogFactory.getLog(SnappyCompressor.class.getName());
    private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024;

    // HACK - Use this as a global lock in the JNI layer
    @SuppressWarnings({ "unchecked", "unused" })
    private static Class clazz = SnappyCompressor.class;

    private int directBufferSize;
    private Buffer compressedDirectBuf = null;
    private int uncompressedDirectBufLen;
    private Buffer uncompressedDirectBuf = null;
    private byte[] userBuf = null;
    private int userBufOff = 0, userBufLen = 0;
    private boolean finish, finished;

    private long bytesRead = 0L;
    private long bytesWritten = 0L;

    static {
        if (LoadSnappy.isLoaded()) {
            // Initialize the native library
            try {
                initIDs();
            } catch (Throwable t) {
                // Ignore failure to load/initialize snappy
                LOG.warn(t.toString());
            }
        } else {
            LOG.error("Cannot load " + SnappyCompressor.class.getName() + " without snappy library!");
        }
    }

    /**
     * Creates a new compressor.
     *
     * @param directBufferSize size of the direct buffer to be used.
     */
    public SnappyCompressor(int directBufferSize) {
        this.directBufferSize = directBufferSize;

        uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
        compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
        compressedDirectBuf.position(directBufferSize);
    }

    /**
     * Creates a new compressor with the default buffer size.
     */
    public SnappyCompressor() {
        this(DEFAULT_DIRECT_BUFFER_SIZE);
    }

    /**
     * Sets input data for compression.
     * This should be called whenever #needsInput() returns
     * <code>true</code> indicating that more input data is required.
     *
     * @param b   Input data
     * @param off Start offset
     * @param len Length
     */
    @Override
    public synchronized void setInput(byte[] b, int off, int len) {
        if (b == null) {
            throw new NullPointerException();
        }
        if (off < 0 || len < 0 || off > b.length - len) {
            throw new ArrayIndexOutOfBoundsException();
        }
        finished = false;

        if (len > uncompressedDirectBuf.remaining()) {
            // save data; now !needsInput
            this.userBuf = b;
            this.userBufOff = off;
            this.userBufLen = len;
        } else {
            ((ByteBuffer) uncompressedDirectBuf).put(b, off, len);
            uncompressedDirectBufLen = uncompressedDirectBuf.position();
        }

        bytesRead += len;
    }

    /**
     * If a write would exceed the capacity of the direct buffers, it is set
     * aside to be loaded by this function while the compressed data are
     * consumed.
     */
    synchronized void setInputFromSavedData() {
        if (0 >= userBufLen) {
            return;
        }
        finished = false;

        uncompressedDirectBufLen = Math.min(userBufLen, directBufferSize);
        ((ByteBuffer) uncompressedDirectBuf).put(userBuf, userBufOff, uncompressedDirectBufLen);

        // Note how much data is being fed to snappy
        userBufOff += uncompressedDirectBufLen;
        userBufLen -= uncompressedDirectBufLen;
    }

    /**
     * Does nothing.
     */
    @Override
    public synchronized void setDictionary(byte[] b, int off, int len) {
        // do nothing
    }

    /**
     * Returns true if the input data buffer is empty and
     * #setInput() should be called to provide more input.
     *
     * @return <code>true</code> if the input data buffer is empty and
     *         #setInput() should be called in order to provide more input.
     */
    @Override
    public synchronized boolean needsInput() {
        return !(compressedDirectBuf.remaining() > 0 || uncompressedDirectBuf.remaining() == 0 || userBufLen > 0);
    }

    /**
     * When called, indicates that compression should end
     * with the current contents of the input buffer.
     */
    @Override
    public synchronized void finish() {
        finish = true;
    }

    /**
     * Returns true if the end of the compressed
     * data output stream has been reached.
     *
     * @return <code>true</code> if the end of the compressed
     *         data output stream has been reached.
     */
    @Override
    public synchronized boolean finished() {
        // Check if all uncompressed data has been consumed
        return (finish && finished && compressedDirectBuf.remaining() == 0);
    }

    /**
     * Fills specified buffer with compressed data. Returns actual number
     * of bytes of compressed data. A return value of 0 indicates that
     * needsInput() should be called in order to determine if more input
     * data is required.
     *
     * @param b   Buffer for the compressed data
     * @param off Start offset of the data
     * @param len Size of the buffer
     * @return The actual number of bytes of compressed data.
     */
    @Override
    public synchronized int compress(byte[] b, int off, int len) throws IOException {
        if (b == null) {
            throw new NullPointerException();
        }
        if (off < 0 || len < 0 || off > b.length - len) {
            throw new ArrayIndexOutOfBoundsException();
        }

        // Check if there is compressed data
        int n = compressedDirectBuf.remaining();
        if (n > 0) {
            n = Math.min(n, len);
            ((ByteBuffer) compressedDirectBuf).get(b, off, n);
            bytesWritten += n;
            return n;
        }

        // Re-initialize the snappy's output direct-buffer
        compressedDirectBuf.clear();
        compressedDirectBuf.limit(0);
        if (0 == uncompressedDirectBuf.position()) {
            // No compressed data, so we should have !needsInput or !finished
            setInputFromSavedData();
            if (0 == uncompressedDirectBuf.position()) {
                // Called without data; write nothing
                finished = true;
                return 0;
            }
        }

        // Compress data
        n = compressBytesDirect();
        compressedDirectBuf.limit(n);
        uncompressedDirectBuf.clear(); // snappy consumes all buffer input

        // Set 'finished' if snapy has consumed all user-data
        if (0 == userBufLen) {
            finished = true;
        }

        // Get atmost 'len' bytes
        n = Math.min(n, len);
        bytesWritten += n;
        ((ByteBuffer) compressedDirectBuf).get(b, off, n);

        return n;
    }

    /**
     * Resets compressor so that a new set of input data can be processed.
     */
    @Override
    public synchronized void reset() {
        finish = false;
        finished = false;
        uncompressedDirectBuf.clear();
        uncompressedDirectBufLen = 0;
        compressedDirectBuf.clear();
        compressedDirectBuf.limit(0);
        userBufOff = userBufLen = 0;
        bytesRead = bytesWritten = 0L;
    }

    /**
     * Prepare the compressor to be used in a new stream with settings defined in
     * the given Configuration
     *
     * @param conf Configuration from which new setting are fetched
     */
    @Override
    public synchronized void reinit(Configuration conf) {
        reset();
    }

    /**
     * Return number of bytes given to this compressor since last reset.
     */
    @Override
    public synchronized long getBytesRead() {
        return bytesRead;
    }

    /**
     * Return number of bytes consumed by callers of compress since last reset.
     */
    @Override
    public synchronized long getBytesWritten() {
        return bytesWritten;
    }

    /**
     * Closes the compressor and discards any unprocessed input.
     */
    @Override
    public synchronized void end() {
    }

    private native static void initIDs();

    private native int compressBytesDirect();
}