com.netflix.bdp.s3mper.metastore.impl.MetastoreJanitor.java Source code

Java tutorial

Introduction

Here is the source code for com.netflix.bdp.s3mper.metastore.impl.MetastoreJanitor.java

Source

/*
 *
 *  Copyright 2013 Netflix, Inc.
 *
 *     Licensed under the Apache License, Version 2.0 (the "License");
 *     you may not use this file except in compliance with the License.
 *     You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License.
 *
 */

package com.netflix.bdp.s3mper.metastore.impl;

import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.dynamodb.AmazonDynamoDBClient;
import com.amazonaws.services.dynamodb.model.Key;
import com.google.common.util.concurrent.RateLimiter;
import com.netflix.bdp.s3mper.metastore.FileInfo;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;

import static java.lang.String.*;

/**
 * Class to cleanup old entries in the DynamoDb Metastore.  This is intended to be a single use class.
 * It is not thread safe and should not be reused.
 * 
 * @author dweeks
 */
public class MetastoreJanitor {
    private static final Logger log = Logger.getLogger(MetastoreJanitor.class.getName());

    static String tableName = "ConsistentListingMetastore";
    private DynamoDBMetastore metastore = null;
    private AmazonDynamoDBClient db = null;

    private int scanLimit = Integer.getInteger("s3mper.janitor.scan.limit", 500);
    private int deleteLimit = Integer.getInteger("s3mper.janitor.delete.limit", 500);
    private int queueSize = Integer.getInteger("s3mper.janitor.queue.limit", 2000);
    private int scanThreads = Integer.getInteger("s3mper.janitor.threads.scan", 1);
    private int deleteThreads = Integer.getInteger("s3mper.janitor.threads.delete", 10);

    private RateLimiter scanLimiter;
    private RateLimiter deleteLimiter;

    private ExecutorService executor;
    private final List<AbstractDynamoDBTask> tasks = Collections
            .synchronizedList(new ArrayList<AbstractDynamoDBTask>());
    private final List<Future> futures = Collections.synchronizedList(new ArrayList<Future>());
    private final List<Future> scanFutures = Collections.synchronizedList(new ArrayList<Future>());

    private BlockingQueue<Key> queue = new LinkedBlockingQueue<Key>();

    public void initalize(URI uri, Configuration conf) throws Exception {
        String keyId = conf.get("fs." + uri.getScheme() + ".awsAccessKeyId");
        String keySecret = conf.get("fs." + uri.getScheme() + ".awsSecretAccessKey");

        //An override option for accessing across accounts
        keyId = conf.get("s3mper.override.awsAccessKeyId", keyId);
        keySecret = conf.get("s3mper.override.awsSecretAccessKey", keySecret);

        db = new AmazonDynamoDBClient(new BasicAWSCredentials(keyId, keySecret));

        tableName = conf.get("s3mper.metastore.name", tableName);

        metastore = new DynamoDBMetastore();
        metastore.initalize(uri, conf);
    }

    /**
     * Deletes all entries for a given path (directory) from the metastore.
     * 
     * @param path
     * @throws Exception 
     */
    public void clearPath(Path path) throws Exception {
        List<FileInfo> listing = metastore.list(Collections.singletonList(path), true);

        for (FileInfo file : listing) {
            metastore.delete(file.getPath());
        }
    }

    /**
     * Scans the timeseries index in dynamodb (i.e. hash key = 'epoch' ) and 
     * deletes entries older than the given time.
     * 
     * @param unit
     * @param time
     * @throws Exception 
     */
    public void deleteTimeseries(TimeUnit unit, long time) throws Exception {
        log.info("Starting Timeseries Delete");
        log.info(format("read_units=%d, write_units=%d, queue_size=%d, scan_threads=%d, delete_threads=%d",
                scanLimit, deleteLimit, queueSize, scanThreads, deleteThreads));

        executor = Executors.newFixedThreadPool(scanThreads + deleteThreads);

        for (int i = 0; i < scanThreads; i++) {
            TimeseriesScannerTask scanner = new TimeseriesScannerTask(db, scanLimiter, queue, queueSize,
                    unit.toMillis(time));

            tasks.add(scanner);

            Future scanFuture = executor.submit(scanner);

            futures.add(scanFuture);
            scanFutures.add(scanFuture);
        }

        processDelete();
    }

    /**
     * Delete paths entries older than the time period provided. This requires
     * a full scan of the table, which is very resource intensive, so timeseries
     * is the preferred approach for deleting entries.
     * 
     * @param unit
     * @param time
     * @throws Exception  
     */
    public void deletePaths(TimeUnit unit, long time) throws Exception {
        log.info("Starting Full Path Delete");
        log.info(format("read_units=%d, write_units=%d, queue_size=%d, scan_threads=%d, delete_threads=%d",
                scanLimit, deleteLimit, queueSize, scanThreads, deleteThreads));

        executor = Executors.newFixedThreadPool(scanThreads + deleteThreads);

        log.info(format("Scanning for items older than: %d (ms)", unit.toMillis(time)));

        for (int i = 0; i < scanThreads; i++) {
            PathScannerTask scanner = new PathScannerTask(db, scanLimiter, queue, queueSize, unit.toMillis(time));

            tasks.add(scanner);
            Future scanFuture = executor.submit(scanner);

            futures.add(scanFuture);
            scanFutures.add(scanFuture);
        }

        processDelete();
    }

    private void processDelete() throws Exception {
        registerShutdownHook();

        for (int i = 0; i < deleteThreads; i++) {
            DeleteWriterTask delete = new DeleteWriterTask(db, deleteLimiter, queue);

            tasks.add(delete);
            futures.add(executor.submit(delete));
        }

        synchronized (scanFutures) {
            for (Future future : scanFutures) {
                future.get();
            }
        }

        synchronized (tasks) {
            for (AbstractDynamoDBTask task : tasks) {
                task.running = false;
            }
        }

        log.info("Shutting down . . .");
        executor.shutdown();
        log.info("Shutdown complete.");
    }

    /**
     * Attempts to shutdown cleanly by finishing processing for all entries in 
     * the queue.  If not done cleanly, some entries timeseries entries may get
     * deleted without deleting their corresponding path entries. 
     */
    private void registerShutdownHook() {
        Runtime.getRuntime().addShutdownHook(new Thread("Metastore Janitor Shutdown Hook") {

            @Override
            public void run() {
                log.info("Shutting down all threads");

                synchronized (tasks) {
                    for (AbstractDynamoDBTask task : tasks) {
                        task.running = false;
                    }
                }

                synchronized (futures) {
                    for (Future future : futures) {
                        try {
                            future.get();
                        } catch (Exception ex) {
                            log.error("", ex);
                        }
                    }
                }

                executor.shutdown();
            }

        });
    }

    public int getScanLimit() {
        return scanLimit;
    }

    public void setScanLimit(int scanLimit) {
        this.scanLimit = scanLimit;

        scanLimiter = RateLimiter.create(scanLimit);
    }

    public int getDeleteLimit() {
        return deleteLimit;
    }

    public void setDeleteLimit(int deleteLimit) {
        this.deleteLimit = deleteLimit;

        deleteLimiter = RateLimiter.create(deleteLimit);
    }

    public int getScanThreads() {
        return scanThreads;
    }

    public void setScanThreads(int scanThreads) {
        this.scanThreads = scanThreads;
    }

    public int getDeleteThreads() {
        return deleteThreads;
    }

    public void setDeleteThreads(int deleteThreads) {
        this.deleteThreads = deleteThreads;
    }

}