com.netflix.bdp.s3mper.metastore.impl.DynamoDBMetastore.java Source code

Java tutorial

Introduction

Here is the source code for com.netflix.bdp.s3mper.metastore.impl.DynamoDBMetastore.java

Source

/*
 *
 *  Copyright 2013 Netflix, Inc.
 *
 *     Licensed under the Apache License, Version 2.0 (the "License");
 *     you may not use this file except in compliance with the License.
 *     You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License.
 *
 */

package com.netflix.bdp.s3mper.metastore.impl;

import com.google.common.annotations.VisibleForTesting;

import com.netflix.bdp.s3mper.common.RetryTask;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.dynamodb.AmazonDynamoDBClient;
import com.amazonaws.services.dynamodb.model.AttributeValue;
import com.amazonaws.services.dynamodb.model.AttributeValueUpdate;
import com.amazonaws.services.dynamodb.model.CreateTableRequest;
import com.amazonaws.services.dynamodb.model.DeleteItemRequest;
import com.amazonaws.services.dynamodb.model.DeleteItemResult;
import com.amazonaws.services.dynamodb.model.Key;
import com.amazonaws.services.dynamodb.model.KeySchema;
import com.amazonaws.services.dynamodb.model.KeySchemaElement;
import com.amazonaws.services.dynamodb.model.ListTablesResult;
import com.amazonaws.services.dynamodb.model.ProvisionedThroughput;
import com.amazonaws.services.dynamodb.model.PutItemRequest;
import com.amazonaws.services.dynamodb.model.QueryRequest;
import com.amazonaws.services.dynamodb.model.QueryResult;
import com.amazonaws.services.dynamodb.model.ReturnValue;
import com.amazonaws.services.dynamodb.model.ScalarAttributeType;
import com.amazonaws.services.dynamodb.model.UpdateItemRequest;
import com.amazonaws.services.dynamodb.model.UpdateItemResult;
import com.netflix.bdp.s3mper.metastore.FileInfo;
import com.netflix.bdp.s3mper.metastore.FileSystemMetastore;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

import java.util.concurrent.Callable;

import static com.netflix.bdp.s3mper.common.PathUtil.*;
import java.util.Random;

/**
 * Implements FileSystemMetastore using DynamoDB as a backend.
 * 
 * @author dweeks
 */
@SuppressWarnings("deprecation")
public class DynamoDBMetastore implements FileSystemMetastore {
    private static final Logger log = Logger.getLogger(DynamoDBMetastore.class.getName());

    private String tableName = "ConsistentListingMetastore";
    private AmazonDynamoDBClient db = null;

    private long readUnits = 500;
    private long writeUnits = 100;

    private int retryCount = Integer.getInteger("s3mper.metastore.retry", 3);
    private int timeout = Integer.getInteger("s3mper.metastore.timeout", 5000);
    private String scheme;

    private boolean deleteMarkerEnabled;

    private Random rand = new Random();

    static final String HASH_KEY = "path";
    static final String RANGE_KEY = "file";
    static final String EPOCH_VALUE = "epoch";
    static final String DIRECTORY_VALUE = "dir";
    static final String DELETE_MARKER = "deleted";

    static final String LINK_HASH_KEY = "linkPath";
    static final String LINK_RANGE_KEY = "linkFile";
    static final String TIMESERIES_KEY = "epoch";

    /**
     * Creates the metastore table in DynamoDB if it doesn't exist with the configured
     * read and write units.
     * 
     * @param uri
     * @param conf
     * @throws Exception 
     */
    @Override
    public void initalize(URI uri, Configuration conf) throws Exception {
        scheme = uri.getScheme();

        String keyId = conf.get("fs." + uri.getScheme() + ".awsAccessKeyId");
        String keySecret = conf.get("fs." + uri.getScheme() + ".awsSecretAccessKey");

        //An override option for accessing across accounts
        keyId = conf.get("s3mper.override.awsAccessKeyId", keyId);
        keySecret = conf.get("s3mper.override.awsSecretAccessKey", keySecret);

        db = new AmazonDynamoDBClient(new BasicAWSCredentials(keyId, keySecret));

        readUnits = conf.getLong("s3mper.metastore.read.units", readUnits);
        writeUnits = conf.getLong("s3mper.metastore.write.units", writeUnits);

        retryCount = conf.getInt("s3mper.metastore.retry", retryCount);
        timeout = conf.getInt("s3mper.metastore.timeout", timeout);

        tableName = conf.get("s3mper.metastore.name", tableName);

        deleteMarkerEnabled = conf.getBoolean("s3mper.metastore.deleteMarker.enabled", false);

        boolean checkTableExists = conf.getBoolean("s3mper.metastore.create", false);

        if (checkTableExists) {
            ListTablesResult tables = db.listTables();

            if (!tables.getTableNames().contains(tableName)) {
                createTable();
            }
        }
    }

    /**
     * Creates the table in DynamoDB. The hash+range key is:
     * 
     *           Hash (String)        |    Range (String)
     *             File Path                 File Name
     * Example:  s3n://netflix/data           test.xml
     *   
     * The table also includes one attribute for epoch (time created), but
     * that is only defined during the put operation (see add()).
     *
     */
    private void createTable() {
        log.info("Creating table in DynamoDB: " + tableName);

        CreateTableRequest createRequest = new CreateTableRequest();
        createRequest.withTableName(tableName);

        //Key
        KeySchemaElement pathKey = new KeySchemaElement().withAttributeName(HASH_KEY)
                .withAttributeType(ScalarAttributeType.S);
        KeySchemaElement fileKey = new KeySchemaElement().withAttributeName(RANGE_KEY)
                .withAttributeType(ScalarAttributeType.S);
        KeySchema schema = new KeySchema();
        schema.setHashKeyElement(pathKey);
        schema.setRangeKeyElement(fileKey);
        createRequest.setKeySchema(schema);

        //Throughput
        ProvisionedThroughput tp = new ProvisionedThroughput();
        tp.setReadCapacityUnits(readUnits);
        tp.setWriteCapacityUnits(writeUnits);

        createRequest.setProvisionedThroughput(tp);

        db.createTable(createRequest);
    }

    /**
     * Returns a list of files that should exist in the FileSystem.
     * 
     * @param paths
     * @return 
     * @throws java.lang.Exception 
     */
    @Override
    public List<FileInfo> list(List<Path> paths) throws Exception {
        return list(paths, deleteMarkerEnabled);
    }

    @Override
    public void add(List<FileInfo> path) throws Exception {
        MetastoreFallback.add(this, path);
    }

    /**
     * Returns a list of files that should exist in the FileSystem with 
     * optional inclusion of deleted entries.
     * 
     * @param paths
     * @param includeDeleted
     * @return
     * @throws Exception 
     */
    public List<FileInfo> list(List<Path> paths, boolean includeDeleted) throws Exception {
        List<FileInfo> listing = new ArrayList<FileInfo>();

        for (Path path : paths) {
            Key startKey = null;

            do {
                RetryTask<QueryResult> queryTask = new RetryTask(new QueryTask(path, startKey), retryCount,
                        timeout);
                QueryResult result = queryTask.call();

                for (Map<String, AttributeValue> item : result.getItems()) {
                    FileInfo file = new FileInfo(
                            new Path(scheme + ":" + item.get(HASH_KEY).getS() + "/" + item.get(RANGE_KEY).getS()));

                    if (item.containsKey(DELETE_MARKER)) {
                        file.setDeleted(Boolean.parseBoolean(item.get(DELETE_MARKER).getS()));

                        //@TODO: cleanup deleteMarker logic after deployed
                        if (!includeDeleted) {
                            continue;
                        }
                    }

                    if (item.containsKey(DIRECTORY_VALUE)) {
                        file.setDirectory(Boolean.parseBoolean((item.get(DIRECTORY_VALUE).getS())));
                    }

                    listing.add(file);
                }

                startKey = result.getLastEvaluatedKey();
            } while (startKey != null);
        }

        return listing;
    }

    /**
     * Adds a path to the metastore.
     * 
     * @param path 
     * @throws java.lang.Exception 
     */
    @Override
    public void add(final Path path, boolean directory) throws Exception {
        RetryTask task = new RetryTask(new AddTask(path, directory), retryCount, timeout);

        task.call();
    }

    /**
     * Delete the provided path from the Metastore or use a delete marker.
     * 
     * @param path
     * @param deleteMarker 
     * @throws Exception 
     */
    @Override
    public void delete(final Path path) throws Exception {
        RetryTask task;

        if (deleteMarkerEnabled) {
            task = new RetryTask(new MarkDeletedTask(path), retryCount, timeout);
        } else {
            task = new RetryTask(new DeleteTask(path), retryCount, timeout);
        }

        task.call();
    }

    @Override
    public void delete(List<Path> path) throws Exception {
        MetastoreFallback.delete(this, path);
    }

    @Override
    public void close() {
    }

    /**
     * A Callable task for use with RetryTask to add a path to the
     * DynamoDB table.
     * 
     */
    private class AddTask implements Callable<Object> {
        private Path path;
        private boolean directory;

        public AddTask(Path path, boolean directory) {
            this.path = path;
            this.directory = directory;
        }

        @Override
        public Object call() throws Exception {
            long epoch = System.currentTimeMillis();

            AttributeValue avPath = new AttributeValue(normalize(path.getParent()));
            AttributeValue avFile = new AttributeValue(path.getName());
            AttributeValue avEpoch = new AttributeValue().withN(epoch + "");

            PutItemRequest put = new PutItemRequest();
            put.setTableName(tableName);
            Map<String, AttributeValue> items = new HashMap<String, AttributeValue>();

            items.put(HASH_KEY, avPath);
            items.put(RANGE_KEY, avFile);
            items.put(EPOCH_VALUE, avEpoch);

            if (directory) {
                items.put(DIRECTORY_VALUE, new AttributeValue(Boolean.TRUE.toString()));
            }

            put.setItem(items);

            if (log.isDebugEnabled()) {
                log.debug("Adding metastore entry for: " + path.toUri());
            }

            db.putItem(put);

            PutItemRequest tsPut = new PutItemRequest();
            tsPut.setTableName(tableName);
            Map<String, AttributeValue> tsItems = new HashMap<String, AttributeValue>();

            tsItems.put(HASH_KEY, new AttributeValue(TIMESERIES_KEY));
            tsItems.put(RANGE_KEY, new AttributeValue(epoch + "-" + rand.nextInt()));
            tsItems.put(LINK_HASH_KEY, avPath);
            tsItems.put(LINK_RANGE_KEY, avFile);
            tsPut.setItem(tsItems);

            db.putItem(tsPut);

            return null;
        }

    }

    /**
     * A Callable task to be used with RetryTask to query a path.
     * 
     * Takes a path and a scan start key and returns the query result.
     */
    class QueryTask implements Callable<QueryResult> {
        private Path path;
        private Key startKey;

        public QueryTask(Path path, Key startKey) {
            this.path = path;
            this.startKey = startKey;
        }

        @Override
        public QueryResult call() throws Exception {
            QueryRequest query = new QueryRequest();
            query.setTableName(tableName);
            query.withHashKeyValue(new AttributeValue(normalize(path)));
            query.setConsistentRead(true);

            if (startKey != null) {
                query.setExclusiveStartKey(startKey);
            }

            if (log.isDebugEnabled()) {
                log.debug("Querying DynamoDB for path: " + path.toUri());
            }

            return db.query(query);
        }

    }

    /**
     * A Callable task to be used with RetryTask to delete a file.
     */
    class DeleteTask implements Callable<DeleteItemResult> {
        private Path path;

        public DeleteTask(Path path) {
            this.path = path;
        }

        @Override
        public DeleteItemResult call() throws Exception {
            DeleteItemRequest delete = new DeleteItemRequest();
            delete.setTableName(tableName);
            delete.setKey(
                    new Key(new AttributeValue(normalize(path.getParent())), new AttributeValue(path.getName())));
            delete.setReturnValues(ReturnValue.NONE);

            if (log.isDebugEnabled()) {
                log.debug("Deleting DynamoDB path: " + path.toUri());
            }

            return db.deleteItem(delete);
        }

    }

    /**
     * Marks a path deleted but does not actually delete the entry.
     */
    private class MarkDeletedTask implements Callable<UpdateItemResult> {
        private Path path;

        public MarkDeletedTask(Path path) {
            this.path = path;
        }

        @Override
        public UpdateItemResult call() throws Exception {
            UpdateItemRequest update = new UpdateItemRequest();
            update.setTableName(tableName);
            update.setKey(
                    new Key(new AttributeValue(normalize(path.getParent())), new AttributeValue(path.getName())));

            Map<String, AttributeValueUpdate> items = new HashMap<String, AttributeValueUpdate>();
            items.put(DELETE_MARKER,
                    new AttributeValueUpdate().withValue(new AttributeValue().withS(Boolean.TRUE.toString())));
            items.put(EPOCH_VALUE, new AttributeValueUpdate()
                    .withValue(new AttributeValue().withN(System.currentTimeMillis() + "")));

            update.setAttributeUpdates(items);

            if (log.isDebugEnabled()) {
                log.debug("Marking DynamoDB path deleted: " + path.toUri());
            }

            return db.updateItem(update);
        }

    }

    public int getRetryCount() {
        return retryCount;
    }

    public void setRetryCount(int retryCount) {
        this.retryCount = retryCount;
    }

    public int getTimeout() {
        return timeout;
    }

    public void setTimeout(int timeout) {
        this.timeout = timeout;
    }

}