Java tutorial
/* * * Copyright 2013 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.netflix.bdp.s3mper.metastore.impl; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.dynamodb.AmazonDynamoDBClient; import com.amazonaws.services.dynamodb.model.Key; import com.google.common.util.concurrent.RateLimiter; import com.netflix.bdp.s3mper.metastore.FileInfo; import java.net.URI; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.log4j.Logger; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; import static java.lang.String.*; /** * Class to cleanup old entries in the DynamoDb Metastore. This is intended to be a single use class. * It is not thread safe and should not be reused. * * @author dweeks */ public class MetastoreJanitor { private static final Logger log = Logger.getLogger(MetastoreJanitor.class.getName()); static String tableName = "ConsistentListingMetastore"; private DynamoDBMetastore metastore = null; private AmazonDynamoDBClient db = null; private int scanLimit = Integer.getInteger("s3mper.janitor.scan.limit", 500); private int deleteLimit = Integer.getInteger("s3mper.janitor.delete.limit", 500); private int queueSize = Integer.getInteger("s3mper.janitor.queue.limit", 2000); private int scanThreads = Integer.getInteger("s3mper.janitor.threads.scan", 1); private int deleteThreads = Integer.getInteger("s3mper.janitor.threads.delete", 10); private RateLimiter scanLimiter; private RateLimiter deleteLimiter; private ExecutorService executor; private final List<AbstractDynamoDBTask> tasks = Collections .synchronizedList(new ArrayList<AbstractDynamoDBTask>()); private final List<Future> futures = Collections.synchronizedList(new ArrayList<Future>()); private final List<Future> scanFutures = Collections.synchronizedList(new ArrayList<Future>()); private BlockingQueue<Key> queue = new LinkedBlockingQueue<Key>(); public void initalize(URI uri, Configuration conf) throws Exception { String keyId = conf.get("fs." + uri.getScheme() + ".awsAccessKeyId"); String keySecret = conf.get("fs." + uri.getScheme() + ".awsSecretAccessKey"); //An override option for accessing across accounts keyId = conf.get("s3mper.override.awsAccessKeyId", keyId); keySecret = conf.get("s3mper.override.awsSecretAccessKey", keySecret); db = new AmazonDynamoDBClient(new BasicAWSCredentials(keyId, keySecret)); tableName = conf.get("s3mper.metastore.name", tableName); metastore = new DynamoDBMetastore(); metastore.initalize(uri, conf); } /** * Deletes all entries for a given path (directory) from the metastore. * * @param path * @throws Exception */ public void clearPath(Path path) throws Exception { List<FileInfo> listing = metastore.list(Collections.singletonList(path), true); for (FileInfo file : listing) { metastore.delete(file.getPath()); } } /** * Scans the timeseries index in dynamodb (i.e. hash key = 'epoch' ) and * deletes entries older than the given time. * * @param unit * @param time * @throws Exception */ public void deleteTimeseries(TimeUnit unit, long time) throws Exception { log.info("Starting Timeseries Delete"); log.info(format("read_units=%d, write_units=%d, queue_size=%d, scan_threads=%d, delete_threads=%d", scanLimit, deleteLimit, queueSize, scanThreads, deleteThreads)); executor = Executors.newFixedThreadPool(scanThreads + deleteThreads); for (int i = 0; i < scanThreads; i++) { TimeseriesScannerTask scanner = new TimeseriesScannerTask(db, scanLimiter, queue, queueSize, unit.toMillis(time)); tasks.add(scanner); Future scanFuture = executor.submit(scanner); futures.add(scanFuture); scanFutures.add(scanFuture); } processDelete(); } /** * Delete paths entries older than the time period provided. This requires * a full scan of the table, which is very resource intensive, so timeseries * is the preferred approach for deleting entries. * * @param unit * @param time * @throws Exception */ public void deletePaths(TimeUnit unit, long time) throws Exception { log.info("Starting Full Path Delete"); log.info(format("read_units=%d, write_units=%d, queue_size=%d, scan_threads=%d, delete_threads=%d", scanLimit, deleteLimit, queueSize, scanThreads, deleteThreads)); executor = Executors.newFixedThreadPool(scanThreads + deleteThreads); log.info(format("Scanning for items older than: %d (ms)", unit.toMillis(time))); for (int i = 0; i < scanThreads; i++) { PathScannerTask scanner = new PathScannerTask(db, scanLimiter, queue, queueSize, unit.toMillis(time)); tasks.add(scanner); Future scanFuture = executor.submit(scanner); futures.add(scanFuture); scanFutures.add(scanFuture); } processDelete(); } private void processDelete() throws Exception { registerShutdownHook(); for (int i = 0; i < deleteThreads; i++) { DeleteWriterTask delete = new DeleteWriterTask(db, deleteLimiter, queue); tasks.add(delete); futures.add(executor.submit(delete)); } synchronized (scanFutures) { for (Future future : scanFutures) { future.get(); } } synchronized (tasks) { for (AbstractDynamoDBTask task : tasks) { task.running = false; } } log.info("Shutting down . . ."); executor.shutdown(); log.info("Shutdown complete."); } /** * Attempts to shutdown cleanly by finishing processing for all entries in * the queue. If not done cleanly, some entries timeseries entries may get * deleted without deleting their corresponding path entries. */ private void registerShutdownHook() { Runtime.getRuntime().addShutdownHook(new Thread("Metastore Janitor Shutdown Hook") { @Override public void run() { log.info("Shutting down all threads"); synchronized (tasks) { for (AbstractDynamoDBTask task : tasks) { task.running = false; } } synchronized (futures) { for (Future future : futures) { try { future.get(); } catch (Exception ex) { log.error("", ex); } } } executor.shutdown(); } }); } public int getScanLimit() { return scanLimit; } public void setScanLimit(int scanLimit) { this.scanLimit = scanLimit; scanLimiter = RateLimiter.create(scanLimit); } public int getDeleteLimit() { return deleteLimit; } public void setDeleteLimit(int deleteLimit) { this.deleteLimit = deleteLimit; deleteLimiter = RateLimiter.create(deleteLimit); } public int getScanThreads() { return scanThreads; } public void setScanThreads(int scanThreads) { this.scanThreads = scanThreads; } public int getDeleteThreads() { return deleteThreads; } public void setDeleteThreads(int deleteThreads) { this.deleteThreads = deleteThreads; } }