com.hadoop.compression.fourmc.zstd.ZstdStreamDecompressor.java Source code

Introduction

Here is the source code for com.hadoop.compression.fourmc.zstd.ZstdStreamDecompressor.java
Source

/*
 * Copyright (c) 2016-2016, Xianjin YE(advancedxy@gmail.com)
 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *  Redistributions of source code must retain the above copyright notice, this
 *   list of conditions and the following disclaimer.
 *
 *  Redistributions in binary form must reproduce the above copyright notice, this
 *   list of conditions and the following disclaimer in the documentation and/or
 *   other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */
package com.hadoop.compression.fourmc.zstd;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.compress.Decompressor;

import java.io.IOException;
import java.nio.ByteBuffer;

/**
 * Zstd Stream Decompressor.
 */
public class ZstdStreamDecompressor implements Decompressor {

    private static final Log LOG = LogFactory.getLog(ZstdStreamDecompressor.class.getName());

    private byte[] userBuf = null;
    private int userBufOff = 0;
    private int userBufLen = 0;
    private boolean finished;

    /* Opaque pointer to ZSTD_DStream context */
    private long dStream;

    /* Some constants */
    private final static int frameHeaderSizePrefix = 5; // Read size after initDStream.

    /* Input and output buffer size in decompressor.
     * It can be any sizes, here uses recommended sizes from ZSTD to reduce feeding and flush latency.
     */
    private final static int iBuffSize = (int) Zstd.dStreamInSize();
    private final static int oBuffSize = (int) Zstd.dStreamOutSize();

    private ByteBuffer iBuff = null;
    private int iBuffLen = 0;
    private long srcPos = 0;
    private ByteBuffer oBuff = null;
    private int oBuffLen = 0;
    private long dstPos = 0;
    private int toRead;

    private static boolean nativeLoaded = false;
    static {
        try {
            if (Zstd.isNativeLoaded()) {
                initIDs();
                nativeLoaded = true;
            }
        } catch (Throwable t) {
            nativeLoaded = false;
            LOG.error(t);
        }
    }

    /**
     * Check if native code is loaded..
     *
     * @return <code>true</code> if native lib is loaded & initialized,
     * else <code>false</code>
     */
    public static boolean isNativeLoaded() {
        return nativeLoaded;
    }

    /**
     * Creates a new ZstdStreamDecompressor.
     *
     */
    public ZstdStreamDecompressor() {
        dStream = createDStream();

        /*
         * Notice for developers:
         * Is a direct buffer pool needed here?
         * The iBuffSize and oBuffSize is about ~128K, which is way smaller than 4MB. Take the community Lz4Compressor
         * for reference, allocateDirect shall not trigger a java.lang.OutOfMemoryError: Direct Buffer Memory exception.
         *
         * We can use a direct buffer pool if needed.
         */
        iBuff = ByteBuffer.allocateDirect(iBuffSize);
        oBuff = ByteBuffer.allocateDirect(oBuffSize);
        reset();
    }

    public synchronized void setInput(byte[] b, int off, int len) {
        if (b == null) {
            throw new NullPointerException();
        }
        if (off < 0 || len < 0 || off > b.length - len) {
            throw new ArrayIndexOutOfBoundsException();
        }

        this.userBuf = b;
        this.userBufOff = off;
        this.userBufLen = len;

        setInputFromSavedData();

        // Reinitialize Zstd output direct-buffer
        oBuff.limit(oBuffSize);
        oBuff.position(oBuffSize);
    }

    synchronized void setInputFromSavedData() {

        int len = Math.min(userBufLen, toRead - iBuff.position());

        ((ByteBuffer) iBuff).put(userBuf, userBufOff, len);

        userBufOff += len;
        userBufLen -= len;
        iBuffLen = iBuff.position();
    }

    public synchronized void setDictionary(byte[] b, int off, int len) {
        // nop
    }

    public synchronized boolean needsInput() {
        // Consume remaining compressed data?
        if (oBuff.remaining() > 0) {
            return false;
        }

        if (iBuffLen < toRead) {
            // Check if we have consumed all user-input
            if (userBufLen <= 0) {
                return true;
            } else {
                setInputFromSavedData();
            }
        }

        return false;
    }

    public synchronized boolean needsDictionary() {
        return false;
    }

    public synchronized boolean finished() {
        return (finished && oBuff.remaining() == 0);
    }

    public synchronized int decompress(byte[] b, int off, int len) throws IOException {
        if (b == null) {
            throw new NullPointerException();
        }
        if (off < 0 || len < 0 || off > b.length - len) {
            throw new ArrayIndexOutOfBoundsException();
        }

        int numBytes = 0;
        numBytes = oBuff.remaining();
        if (numBytes > 0) {
            numBytes = Math.min(numBytes, len);
            ((ByteBuffer) oBuff).get(b, off, numBytes);
            return numBytes;
        }

        // Check if there is data to decompress. When an end of frame is reached, decompress shall not call
        // decompressStream without initStream.
        if (srcPos < iBuffLen || (iBuffLen == toRead && !finished)) {

            // Re-initialize the ZstdStream's output direct-buffer
            oBuff.rewind();
            oBuff.limit(oBuffSize);
            dstPos = 0;

            // Decompress data, all the input should be consumed
            toRead = decompressStream(dStream, oBuff, oBuffSize, iBuff, iBuffLen);
            if (Zstd.isError(toRead)) {
                throw new InternalError("ZSTD decompressStream failed, due to: " + Zstd.getErrorName(toRead));
            }

            // If toRead is 0, then we have finished decoding a frame. Finished should be set to true.
            finished = toRead == 0;

            // Check if all data in iBuff is consumed.
            if (srcPos >= iBuffLen) {
                srcPos = 0;
                iBuffLen = 0;
                iBuff.clear();
                // toRead being 1 is a special case, meaning:
                // 1. zstd really need another one byte.
                // 2. zstd don't flush all the data into oBuff when oBuff is small.
                // When all the input is consumed and dstPos > 0, then toRead = 1 only happens in case 2.
                // This exception will be eliminated in later versions of zstd(>1.0.0). The following line then can
                // be safely removed or kept untouched as it will not be triggered.
                toRead = (toRead == 1 && dstPos != 0) ? 0 : toRead;
            }
            // Read most iBuffSize, works even for skippable frame(toRead can be any sizes between 1 to 4GB-1
            // in a skippable frame)
            toRead = Math.min(toRead, iBuffSize);
            numBytes = oBuffLen;
            oBuff.limit(numBytes);
            // Return atmost 'len' bytes
            numBytes = Math.min(numBytes, len);
            ((ByteBuffer) oBuff).get(b, off, numBytes);
        }

        return numBytes;
    }

    public synchronized int getRemaining() {
        return userBufLen;
    }

    public synchronized void reset() {
        finished = false;
        iBuffLen = 0;
        oBuff.limit(oBuffSize);
        oBuff.position(oBuffSize);
        userBufOff = userBufLen = 0;

        toRead = initDStream(dStream);
        if (Zstd.isError(toRead)) {
            LOG.error("InitDStream failed! Error is: " + Zstd.getErrorName(toRead));
        }
        // In zstd v1.0.0, initDStream is 0, but it will be changed to frameHeaderSizePrefix(5) in the following
        // release. So we add the related code in advance.
        if (toRead == 0) {
            toRead = frameHeaderSizePrefix;
        }
    }

    public synchronized void end() {
        freeDStream(dStream);
    }

    /* JNI methods */
    private static native void initIDs();

    private static native long createDStream();

    /* Notice for developers:
     * We are mapping size_t(in the c version) to int here. Why is that valid?
     * For normal cases, return code is bounded by default buffers sizes(~128K), which is way less then 2G(Int).
     * For error codes, return code is negative, but converted to size_t in the jni calls. There is no loss of
     * information.
     *
     * Also remember: this code is intended to work on normal machines(Windows/Mac/Linux on commodity hardware)
     * Portability is not the major concern.
     */

    private static native int freeDStream(long stream);

    private static native int initDStream(long stream);

    private native int decompressStream(long stream, ByteBuffer dst, int dstSize, ByteBuffer src, int srcSize);

}