com.koda.integ.hbase.storage.LRUStorageRecycler.java Source code

Java tutorial

Introduction

Here is the source code for com.koda.integ.hbase.storage.LRUStorageRecycler.java

Source

/*******************************************************************************
* Copyright (c) 2013 Vladimir Rodionov. All Rights Reserved
*
* This code is released under the GNU Affero General Public License.
*
* See: http://www.fsf.org/licensing/licenses/agpl-3.0.html
*
* VLADIMIR RODIONOV MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY
* OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
* NON-INFRINGEMENT. Vladimir Rodionov SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED
* BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR
* ITS DERIVATIVES.
*
* Author: Vladimir Rodionov
*
*******************************************************************************/
package com.koda.integ.hbase.storage;

import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;

import com.koda.NativeMemoryException;
import com.koda.cache.OffHeapCache;
import com.koda.common.util.NumericHistogram;
import com.koda.integ.hbase.util.FileUtils;

// TODO: Auto-generated Javadoc
/** 
 * This thread recycles (reclaim) space
 * by evicting data according eviction algorithm in OffHeapCache.
 * It scans storage the oldest storage file object by object and purges objects
 * eligible for eviction, all other objects are rewritten into the current open storage file.
 * 
 * 
 * There are 3 parameters which defines Recycler behavior:
 * 1. Maximum Read Throughput  (comes from Configuration). This is disk IO throttling
 *    mechanism. This limit is not hard. It can be dynamically increased if 
 *    StorageRecycler can not keep up with increasing storage size due to
 *    low cache hit rate.
 * 2. EvictionPercentile eligible threshold (EPT). Explain on LRU example:
 *    OffHeapCache tracks approximate distribution of object LRU times (Eviction Value)
 *    This information is available for StorageRecycler (SR). When SR  
 *    scans storage file it makes decision whether to keep the object or not based on  EPT
 *    For example: if EPT = 0.5 - it means that ALL objects which fall into bottom 50% (0.5) of 
 *    object EV distribution histogram are eligible for deletion. 
 *    EPT is not a hard limit as well and can be dynamically adjusted based on  a current 
 *    storage load.
 * 3. Write Batch Size. Default is 2. If object is still usable it needs to be stored in a
 *    current open storage file (the most recent one). By varying this parameter we can vary 
 *    the Recycler throughput.  
 *    
 * TODO:
 * + 1. ioThrottle test
 * 2. Dynamic adjustment of a recycle's parameters
 * + 3. Export statistics (no JMX yet)
 *  
 */
public class LRUStorageRecycler extends Thread implements StorageRecycler {

    /** The Constant LOG. */
    static final Log LOG = LogFactory.getLog(LRUStorageRecycler.class);

    /** Maximum read throughput in MB per second. */
    public final static String MAX_READ_THROUGHPUT_CONF = "file.recycler.max.read.throughput";

    /** Eviction threshold. */
    public final static String EVICTION_THRESHOLD_CONF = "file.recycler.eviction.threshold";

    /** Active object write batch size. */
    public final static String WRITE_BATCH_SIZE_CONF = "file.recycler.write.batch.size";

    /** Adaptive algorithm enabled?. */
    public final static String ADAPTIVE_ALGO_ENABLED_CONF = "file.recycler.adaptive.algo.enabled";

    /* Default values */
    /** Storage low watermark. */
    public final static String STORAGE_RATIO_LOW_DEFAULT = "0.9f";

    /** Storage high watermark. */
    public final static String STORAGE_RATIO_HIGH_DEFAULT = "0.95f";

    /** Maximum read throughput in MB per second. */
    public final static String MAX_READ_THROUGHPUT_DEFAULT = "10"; // 10MB per sec
    /** Eviction threshold default is 0.25*/
    public final static String EVICTION_THRESHOLD_DEFAULT = "0.25";

    /** Active object write batch size. */
    public final static String WRITE_BATCH_SIZE_DEFAULT = "2";

    /** Adaptive algorithm enabled?. */
    public final static String ADAPTIVE_ALGO_ENABLED_DEFAULT = "false";

    /* Current values */

    /** The low watermark. */
    private float lowWatermark;

    /** The high watermark. */
    private float highWatermark;

    /** Panic level watermark. */
    private float panicLevelWatermark = 0.99f;

    /** The max read throughput in MBs. */
    private int maxReadThrougput;

    /** The eviction threshold. */
    private float evictionThreshold;

    /** The write batch size. */
    private int writeBatchSize;

    /** The adaptive algorithm enabled. */
    private boolean adaptiveAlgoEnabled;

    /** The in-memory reference cache. This LRU cache contains references to external (file) storage */
    private OffHeapCache refCache;

    /** The storage this thread works on. */
    private FileExtStorage storage;

    /** Recycle thread statistics. */
    private static AtomicLong totalScannedBytes = new AtomicLong(0);

    /** The total files size. */
    private static AtomicLong totalFilesSize = new AtomicLong(0);

    /** The total scanned files. */
    private static AtomicInteger totalScannedFiles = new AtomicInteger(0);

    /** The total panic events. */
    private static AtomicInteger totalPanicEvents = new AtomicInteger(0);

    /** The total purged bytes. */
    private static AtomicLong totalPurgedBytes = new AtomicLong(0);

    /**
     * The Class Statistics.
     */
    public class Statistics {

        /**
         * Gets the total scanned files.
         *
         * @return the total scanned files
         */
        public int getTotalScannedFiles() {
            return LRUStorageRecycler.totalScannedFiles.get();
        }

        /**
         * Gets the total panic events.
         *
         * @return the total panic events
         */
        public int getTotalPanicEvents() {
            return LRUStorageRecycler.totalPanicEvents.get();
        }

        /**
         * Gets the total scanned bytes.
         *
         * @return the total scanned bytes
         */
        public long getTotalScannedBytes() {
            return LRUStorageRecycler.totalScannedBytes.get();
        }

        /**
         * Gets the total files size.
         *
         * @return the total files size
         */
        public long getTotalFilesSize() {
            return LRUStorageRecycler.totalFilesSize.get();
        }

        /**
         * Gets the total purged bytes.
         *
         * @return the total purged bytes
         */
        public long getTotalPurgedBytes() {
            return LRUStorageRecycler.totalPurgedBytes.get();
        }
    }

    /**
     * Instantiates a new storage Recycle Thread (GC thread).
     *
     */
    public LRUStorageRecycler() {
        super("LRU-Recycler");
    }

    /* (non-Javadoc)
     * @see com.koda.integ.hbase.storage.StorageRecycler#set(com.koda.integ.hbase.storage.ExtStorage)
     */
    @Override
    public void set(ExtStorage storage) {
        this.storage = (FileExtStorage) storage;
        this.refCache = this.storage.getStorageRefCache();
        init(this.storage.getConfig());
    }

    /**
     * Gets the statistics.
     *
     * @return the statistics
     */
    public Statistics getStatistics() {
        return new Statistics();
    }

    /**
     * Inits the.
     *
     * @param config the config
     */
    private void init(Configuration config) {

        lowWatermark = Float.parseFloat(config.get(STORAGE_RATIO_LOW_CONF, STORAGE_RATIO_LOW_DEFAULT));
        highWatermark = Float.parseFloat(config.get(STORAGE_RATIO_HIGH_CONF, STORAGE_RATIO_HIGH_DEFAULT));
        maxReadThrougput = Integer.parseInt(config.get(MAX_READ_THROUGHPUT_CONF, MAX_READ_THROUGHPUT_DEFAULT));
        evictionThreshold = Float.parseFloat(config.get(EVICTION_THRESHOLD_CONF, EVICTION_THRESHOLD_DEFAULT));
        writeBatchSize = Integer.parseInt(config.get(WRITE_BATCH_SIZE_CONF, WRITE_BATCH_SIZE_DEFAULT));
        adaptiveAlgoEnabled = Boolean
                .parseBoolean(config.get(ADAPTIVE_ALGO_ENABLED_CONF, ADAPTIVE_ALGO_ENABLED_DEFAULT));
        dumpConfig();
    }

    /**
     * Dump config.
     */
    private void dumpConfig() {
        LOG.info("Storage Recycler starts with:");
        LOG.info("Low watermark      =" + lowWatermark);
        LOG.info("High watermark     =" + highWatermark);
        LOG.info("Max read tput      =" + maxReadThrougput + "MBs");
        LOG.info("Eviction threshold =" + evictionThreshold);
        LOG.info("Write batch size   =" + writeBatchSize);
        LOG.info("Adaptive algorithm =" + adaptiveAlgoEnabled);

    }

    /* (non-Javadoc)
     * @see java.lang.Thread#run()
     */
    public void run() {
        LOG.info(Thread.currentThread().getName() + " started.");

        while (needContinue()) {
            // Get oldest file         
            int minId = storage.getMinId().get();
            String fileName = storage.getFilePath(minId);
            RandomAccessFile raf = storage.getFile(minId);

            try {
                long size = raf.length();
                // STATISTICS
                totalFilesSize.addAndGet(size);
                LOG.info("Processing file: " + fileName + " size=" + size);
                // Its FIFO now
                processFile(raf);
                // STATISTICS
                totalScannedFiles.incrementAndGet();
                // Update current storage size
                storage.updateStorageSize(-size);
                adjustRecyclerParameters();

            } catch (Exception e) {
                LOG.error(fileName, e);
            }

        }
        LOG.info(Thread.currentThread().getName() + " stopped.");
    }

    /**
     * Adjust recycler parameters.
     *
     */
    private void adjustRecyclerParameters() {
        if (adaptiveAlgoEnabled == false)
            return;
        //TODO
    }

    /**
     * Format of a block in a file:
     * 0..3  - total record size (-4)
     * 4..7  - size of a key in bytes (16 if use hash128)
     * 8 .. x - key data
     * x+1 ..x+1- IN_MEMORY flag ( 1- in memory, 0 - not)
     * x+2 ... block, serialized and compressed
     *
     * @param file the file
     * @throws IOException Signals that an I/O exception has occurred.
     * @throws NativeMemoryException the native memory exception
     */
    private void processFile(RandomAccessFile file) throws IOException, NativeMemoryException {

        FileChannel fc = file.getChannel();
        // make sure that file size < 2G
        LOG.info("File length=" + file.length());
        MappedByteBuffer buffer = fc.map(MapMode.READ_ONLY, 0, file.length());

        long fileLength = file.length();
        long saved = 0;
        long startTime = System.currentTimeMillis();

        while (buffer.position() < fileLength) {
            int oldOffset = buffer.position();
            //LOG.info(oldOffset);
            // check IO throttle
            ioThrottle(startTime, oldOffset);

            NumericHistogram histogram = refCache.getObjectHistogram();

            int blockSize = buffer.getInt();
            int keySize = buffer.getInt();

            //LOG.info("block size="+blockSize+" key size="+keySize);

            byte[] key = new byte[keySize];
            // STATISTICS
            totalScannedBytes.addAndGet(blockSize + 4);

            // read key
            // WE HAVE TO USE byte[] keys
            long data = refCache.getEvictionData(key);
            if (data < 0) {
                // not found in in_memory cache
                buffer.position(oldOffset + blockSize + 4);
                continue;
            }

            double quantValue = histogram.quantile(evictionThreshold);
            if (data > quantValue) {
                // save block
                saved = blockSize + 4;
                buffer.position(oldOffset);
                StorageHandle handle = storage.storeData(buffer);
                refCache.put(key, handle);

            } else {
                // STATISTICS
                totalPurgedBytes.addAndGet(blockSize + 4);
            }

            if (oldOffset + blockSize + 4 < fileLength) {
                // Advance pointer
                buffer.position(oldOffset + blockSize + 4);

            } else {
                break;
            }
            // Check panic. W/o adaptive processing support - killing file entirely 
            // is the only option to keep up with the load.
            if (storage.getCurrentStorageSize() >= panicLevelWatermark * storage.getMaxStorageSize()) {
                LOG.warn("[PANIC DELETE]. Storage size exceeded " + panicLevelWatermark + " mark.");
                // STATISTICS
                totalPanicEvents.incrementAndGet();
            }
        }

        // Unmap mapped ByteBuffer
        fc.close();
        FileUtils.unmapMmaped(buffer);
        ;
        LOG.info("Stats: total length=" + fileLength + "; purged data=" + (fileLength - saved)
                + " with eviction threshold=" + evictionThreshold + "; purged ratio=["
                + (((double) (fileLength - saved)) / fileLength) + "]");

    }

    /**
     * Io throttle.
     *
     * @param startTime the start time
     * @param oldOffset the old offset
     */
    private void ioThrottle(long startTime, int oldOffset) {

        long time = System.currentTimeMillis();
        //if(time - startTime < 10) return; // do not do any throttling first 10ms
        long expectedSize = (long) (((double) (time - startTime) / 1000) * maxReadThrougput * 1000000);
        if (oldOffset > expectedSize) {
            long sleep = (oldOffset - expectedSize) / (maxReadThrougput * 1000);
            try {
                Thread.sleep(sleep);
            } catch (InterruptedException e) {
            }
        }

    }

    /**
     * Need continue.
     *
     * @return true, if successful
     */
    private boolean needContinue() {
        long maxStorageSize = storage.getMaxStorageSize();
        long currentSize = storage.getCurrentStorageSize();
        return (maxStorageSize * lowWatermark - currentSize) < 0;
    }

    /**
     * Gets the low watermark.
     *
     * @return the lowWatermark
     */
    public float getLowWatermark() {
        return lowWatermark;
    }

    /**
     * Sets the low watermark.
     *
     * @param lowWatermark the lowWatermark to set
     */
    public void setLowWatermark(float lowWatermark) {
        this.lowWatermark = lowWatermark;
    }

    /**
     * Gets the high watermark.
     *
     * @return the highWatermark
     */
    public float getHighWatermark() {
        return highWatermark;
    }

    /**
     * Sets the high watermark.
     *
     * @param highWatermark the highWatermark to set
     */
    public void setHighWatermark(float highWatermark) {
        this.highWatermark = highWatermark;
    }

    /**
     * Gets the max read througput.
     *
     * @return the maxReadThrougput
     */
    public int getMaxReadThrougput() {
        return maxReadThrougput;
    }

    /**
     * Sets the max read througput.
     *
     * @param maxReadThrougput the maxReadThrougput to set
     */
    public void setMaxReadThrougput(int maxReadThrougput) {
        this.maxReadThrougput = maxReadThrougput;
    }

    /**
     * Gets the eviction threshold.
     *
     * @return the evictionThreshold
     */
    public float getEvictionThreshold() {
        return evictionThreshold;
    }

    /**
     * Sets the eviction threshold.
     *
     * @param evictionThreshold the evictionThreshold to set
     */
    public void setEvictionThreshold(float evictionThreshold) {
        this.evictionThreshold = evictionThreshold;
    }

    /**
     * Gets the write batch size.
     *
     * @return the writeBatchSize
     */
    public int getWriteBatchSize() {
        return writeBatchSize;
    }

    /**
     * Sets the write batch size.
     *
     * @param writeBatchSize the writeBatchSize to set
     */
    public void setWriteBatchSize(int writeBatchSize) {
        this.writeBatchSize = writeBatchSize;
    }

    /**
     * Checks if is adaptive algo enabled.
     *
     * @return the adaptiveAlgoEnabled
     */
    public boolean isAdaptiveAlgoEnabled() {
        return adaptiveAlgoEnabled;
    }

    /**
     * Sets the adaptive algo enabled.
     *
     * @param adaptiveAlgoEnabled the adaptiveAlgoEnabled to set
     */
    public void setAdaptiveAlgoEnabled(boolean adaptiveAlgoEnabled) {
        this.adaptiveAlgoEnabled = adaptiveAlgoEnabled;
    }

    /**
     * Gets the storage.
     *
     * @return the storage
     */
    public ExtStorage getStorage() {
        return storage;
    }
}

class G1Algo implements StorageRecyclerAlgo {

    @Override
    public void adjustRecyclerParameters() {
        // TODO Auto-generated method stub

    }

    @Override
    public void init(StorageRecycler storage) {
        // TODO Auto-generated method stub

    }

}