com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.willetinc.hadoop.mapreduce.dynamodb.DynamoDBQueryInputFormat.java

Source

/**
 * Copyright 2012 Willet Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.willetinc.hadoop.mapreduce.dynamodb;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import com.amazonaws.services.dynamodb.model.AttributeValue;
import com.amazonaws.services.dynamodb.model.ComparisonOperator;
import com.willetinc.hadoop.mapreduce.dynamodb.io.DynamoDBKeyWritable;

public class DynamoDBQueryInputFormat<T extends DynamoDBKeyWritable> extends InputFormat<LongWritable, T>
        implements Configurable {

    public static class DynamoDBQueryInputSplit extends DynamoDBScanInputFormat.DynamoDBInputSplit {

        private Types hashKeyType = Types.STRING;

        private AttributeValue hashKeyValue;

        private Types rangeKeyType = Types.STRING;

        private ComparisonOperator rangeKeyOperator = ComparisonOperator.EQ;

        private Collection<AttributeValue> rangeKeyValues = Collections.emptyList();

        public DynamoDBQueryInputSplit() {

        }

        public DynamoDBQueryInputSplit(Types hashKeyType, AttributeValue hashKeyValue) {
            this.hashKeyType = hashKeyType;
            this.hashKeyValue = hashKeyValue;
        }

        public DynamoDBQueryInputSplit(Types hashKeyType, AttributeValue hashKeyValue, Types rangeKeyType,
                Collection<AttributeValue> rangeKeyValues, ComparisonOperator rangeKeyOperator) {

            this.hashKeyType = hashKeyType;
            this.hashKeyValue = hashKeyValue;
            this.rangeKeyType = rangeKeyType;
            this.rangeKeyOperator = rangeKeyOperator;
            this.rangeKeyValues = rangeKeyValues;
        }

        /**
         * @return The total row count in this split
         */
        @Override
        public long getLength() throws IOException {
            return 0; // unfortunately, we don't know this.
        }

        @Override
        public void readFields(DataInput in) throws IOException {
            this.hashKeyType = Types.values()[in.readInt()];
            this.hashKeyValue = AttributeValueIOUtils.read(hashKeyType, in);
            this.rangeKeyType = Types.values()[in.readInt()];
            this.rangeKeyValues = AttributeValueIOUtils.readCollection(rangeKeyType, in);
            this.rangeKeyOperator = ComparisonOperator.values()[in.readInt()];
        }

        @Override
        public void write(DataOutput out) throws IOException {
            out.writeInt(hashKeyType.ordinal());
            AttributeValueIOUtils.write(hashKeyType, hashKeyValue, out);
            out.writeInt(rangeKeyType.ordinal());
            AttributeValueIOUtils.writeCollection(rangeKeyType, rangeKeyValues, out);
            out.writeInt(rangeKeyOperator.ordinal());
        }

        public AttributeValue getHashKeyValue() {
            return hashKeyValue;
        }

        public Types getRangeType() {
            return rangeKeyType;
        }

        public Types getHashKeyType() {
            return hashKeyType;
        }

        public boolean hasHashKey() {
            return hashKeyValue != null;
        }

        public Types getRangeKeyType() {
            return rangeKeyType;
        }

        public boolean hasRangeKey() {
            if (rangeKeyValues == null)
                return false;
            return rangeKeyValues.size() > 0;
        }

        public ComparisonOperator getRangeKeyOperator() {
            return rangeKeyOperator;
        }

        public Collection<AttributeValue> getRangeKeyValues() {
            return rangeKeyValues;
        }
    }

    private DynamoDBConfiguration dbConf;

    private String tableName;

    public Configuration getConf() {
        return dbConf.getConf();
    }

    public void setConf(Configuration conf) {
        dbConf = new DynamoDBConfiguration(conf);
        tableName = dbConf.getInputTableName();
    }

    public DynamoDBConfiguration getDBConf() {
        return dbConf;
    }

    public String getTableName() {
        return tableName;
    }

    @Override
    public List<InputSplit> getSplits(JobContext job) throws IOException {
        Configuration conf = job.getConfiguration();
        Types rangeKeyType = DynamoDBQueryInputFormat.getRangeKeyType(conf);

        DynamoDBSplitter splitter = getSplitter(rangeKeyType);
        return splitter.split(conf);
    }

    protected DynamoDBSplitter getSplitter(Types rangeKeyType) {
        switch (rangeKeyType) {
        case STRING:
            return new TextSplitter();
        case NUMBER:
            new BigDecimalSplitter();
        case BINARY:
            return new BinarySplitter();
        default:
            return new DefaultSplitter();
        }
    }

    @Override
    public RecordReader<LongWritable, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
            throws IOException, InterruptedException {
        setConf(context.getConfiguration());

        @SuppressWarnings("unchecked")
        Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
        return new DynamoDBQueryRecordReader<T>((DynamoDBQueryInputSplit) inputSplit, inputClass,
                context.getConfiguration(), dbConf.getAmazonDynamoDBClient(), dbConf, tableName);
    }

    public static void setCredentials(Job job, String accessKey, String secretKey) {

        DynamoDBConfiguration.setCredentals(job.getConfiguration(), accessKey, secretKey);
    }

    public static void setEndpoint(Job job, String endpoint) {
        DynamoDBConfiguration dbConf = new DynamoDBConfiguration(job.getConfiguration());
        dbConf.setDynamoDBEndpoint(endpoint);
    }

    public static String getEndpoint(Job job) {
        DynamoDBConfiguration dbConf = new DynamoDBConfiguration(job.getConfiguration());
        return dbConf.getDynamoDBEndpoint();
    }

    public static void setInput(Job job, Class<? extends DynamoDBKeyWritable> inputClass, String tableName) {
        job.setInputFormatClass(DynamoDBQueryInputFormat.class);
        DynamoDBConfiguration dbConf = new DynamoDBConfiguration(job.getConfiguration());
        dbConf.setInputClass(inputClass);
        dbConf.setInputTableName(tableName);
    }

    public static Types getHashKeyType(Configuration conf) {
        return Types.values()[conf.getInt(DynamoDBConfiguration.HASH_KEY_TYPE_PROPERTY, Types.STRING.ordinal())];
    }

    public static void setHashKeyType(Configuration conf, Types type) {
        conf.setInt(DynamoDBConfiguration.HASH_KEY_TYPE_PROPERTY, type.ordinal());
    }

    public static AttributeValue getHashKeyValue(Configuration conf) {
        String value = conf.get(DynamoDBConfiguration.HASH_KEY_VALUE_PROPERTY);
        return AttributeValueIOUtils.valueOf(getHashKeyType(conf), value);
    }

    public static void setHashKeyValue(Configuration conf, Types type, AttributeValue value) {
        setHashKeyType(conf, type);
        String encodedValue = AttributeValueIOUtils.toString(type, value);
        conf.set(DynamoDBConfiguration.HASH_KEY_VALUE_PROPERTY, encodedValue);
    }

    public static boolean getInterpolateAcrossRangeKeyValues(Configuration conf) {
        return conf.getBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, false);
    }

    public static void setInterpolateAcrossRangeKeyValues(Configuration conf, boolean interpolate) {
        conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, interpolate);
    }

    public static Types getRangeKeyType(Configuration conf) {
        return Types.values()[conf.getInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, Types.STRING.ordinal())];
    }

    public static void setRangeKeyType(Configuration conf, Types type) {
        conf.setInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, type.ordinal());
    }

    public static void setRangeKeyValues(Configuration conf, Types type, Collection<AttributeValue> values) {
        setInterpolateAcrossRangeKeyValues(conf, false);
        setRangeKeyType(conf, type);
        List<String> attrValues = new ArrayList<String>();
        for (AttributeValue attr : values) {
            attrValues.add(AttributeValueIOUtils.toString(type, attr));
        }

        conf.setStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY, attrValues.toArray(new String[] {}));
    }

    public static Collection<AttributeValue> getRangeKeyValues(Configuration conf) {
        List<AttributeValue> values = new ArrayList<AttributeValue>();
        Types type = getRangeKeyType(conf);
        String[] encodedValues = conf.getStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY);

        // if range key values have not been configured return
        if (null == encodedValues)
            return values;

        // decode values
        for (String encodedValue : encodedValues) {
            values.add(AttributeValueIOUtils.valueOf(type, encodedValue));
        }

        return values;
    }

    public static ComparisonOperator getRangeKeyComparisonOperator(Configuration conf) {
        return ComparisonOperator.values()[conf.getInt(DynamoDBConfiguration.RANGE_KEY_OPERATOR_PROPERTY,
                ComparisonOperator.EQ.ordinal())];
    }

    public static void setRangeKeyComparisonOperator(Configuration conf, ComparisonOperator operator) {
        conf.setInt(DynamoDBConfiguration.RANGE_KEY_OPERATOR_PROPERTY, operator.ordinal());
    }

    public static void setRangeKeyCondition(Configuration conf, Types type, ComparisonOperator operator,
            Collection<AttributeValue> values) {
        setRangeKeyComparisonOperator(conf, operator);
        setRangeKeyValues(conf, type, values);
    }

    public static void setRangeKeyInterpolateMinValue(Configuration conf, Types type, AttributeValue value) {
        setInterpolateAcrossRangeKeyValues(conf, true);
        setRangeKeyType(conf, type);
        String encodedValue = AttributeValueIOUtils.toString(type, value);
        conf.set(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MIN_VALUE_PROPERTY, encodedValue);
    }

    public static AttributeValue getRangeKeyInterpolateMinValue(Configuration conf) {
        Types type = getRangeKeyType(conf);
        String encodedValue = conf.get(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MIN_VALUE_PROPERTY);
        return AttributeValueIOUtils.valueOf(type, encodedValue);
    }

    public static void setRangeKeyInterpolateMaxValue(Configuration conf, Types type, AttributeValue value) {
        setInterpolateAcrossRangeKeyValues(conf, true);
        setRangeKeyType(conf, type);
        String encodedValue = AttributeValueIOUtils.toString(type, value);
        conf.set(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MAX_VALUE_PROPERTY, encodedValue);
    }

    public static AttributeValue getRangeKeyInterpolateMaxValue(Configuration conf) {
        Types type = getRangeKeyType(conf);
        String encodedValue = conf.get(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MAX_VALUE_PROPERTY);
        return AttributeValueIOUtils.valueOf(type, encodedValue);
    }

    public static void setRangeKeyInterpolateRange(Configuration conf, Types type, AttributeValue minValue,
            AttributeValue maxValue) {
        setRangeKeyInterpolateMinValue(conf, type, minValue);
        setRangeKeyInterpolateMaxValue(conf, type, maxValue);
    }
}