Java tutorial
/** * Copyright 2012 Willet Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.willetinc.hadoop.mapreduce.dynamodb; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import com.amazonaws.services.dynamodb.model.AttributeValue; import com.amazonaws.services.dynamodb.model.ComparisonOperator; import com.willetinc.hadoop.mapreduce.dynamodb.io.DynamoDBKeyWritable; public class DynamoDBQueryInputFormat<T extends DynamoDBKeyWritable> extends InputFormat<LongWritable, T> implements Configurable { public static class DynamoDBQueryInputSplit extends DynamoDBScanInputFormat.DynamoDBInputSplit { private Types hashKeyType = Types.STRING; private AttributeValue hashKeyValue; private Types rangeKeyType = Types.STRING; private ComparisonOperator rangeKeyOperator = ComparisonOperator.EQ; private Collection<AttributeValue> rangeKeyValues = Collections.emptyList(); public DynamoDBQueryInputSplit() { } public DynamoDBQueryInputSplit(Types hashKeyType, AttributeValue hashKeyValue) { this.hashKeyType = hashKeyType; this.hashKeyValue = hashKeyValue; } public DynamoDBQueryInputSplit(Types hashKeyType, AttributeValue hashKeyValue, Types rangeKeyType, Collection<AttributeValue> rangeKeyValues, ComparisonOperator rangeKeyOperator) { this.hashKeyType = hashKeyType; this.hashKeyValue = hashKeyValue; this.rangeKeyType = rangeKeyType; this.rangeKeyOperator = rangeKeyOperator; this.rangeKeyValues = rangeKeyValues; } /** * @return The total row count in this split */ @Override public long getLength() throws IOException { return 0; // unfortunately, we don't know this. } @Override public void readFields(DataInput in) throws IOException { this.hashKeyType = Types.values()[in.readInt()]; this.hashKeyValue = AttributeValueIOUtils.read(hashKeyType, in); this.rangeKeyType = Types.values()[in.readInt()]; this.rangeKeyValues = AttributeValueIOUtils.readCollection(rangeKeyType, in); this.rangeKeyOperator = ComparisonOperator.values()[in.readInt()]; } @Override public void write(DataOutput out) throws IOException { out.writeInt(hashKeyType.ordinal()); AttributeValueIOUtils.write(hashKeyType, hashKeyValue, out); out.writeInt(rangeKeyType.ordinal()); AttributeValueIOUtils.writeCollection(rangeKeyType, rangeKeyValues, out); out.writeInt(rangeKeyOperator.ordinal()); } public AttributeValue getHashKeyValue() { return hashKeyValue; } public Types getRangeType() { return rangeKeyType; } public Types getHashKeyType() { return hashKeyType; } public boolean hasHashKey() { return hashKeyValue != null; } public Types getRangeKeyType() { return rangeKeyType; } public boolean hasRangeKey() { if (rangeKeyValues == null) return false; return rangeKeyValues.size() > 0; } public ComparisonOperator getRangeKeyOperator() { return rangeKeyOperator; } public Collection<AttributeValue> getRangeKeyValues() { return rangeKeyValues; } } private DynamoDBConfiguration dbConf; private String tableName; public Configuration getConf() { return dbConf.getConf(); } public void setConf(Configuration conf) { dbConf = new DynamoDBConfiguration(conf); tableName = dbConf.getInputTableName(); } public DynamoDBConfiguration getDBConf() { return dbConf; } public String getTableName() { return tableName; } @Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = job.getConfiguration(); Types rangeKeyType = DynamoDBQueryInputFormat.getRangeKeyType(conf); DynamoDBSplitter splitter = getSplitter(rangeKeyType); return splitter.split(conf); } protected DynamoDBSplitter getSplitter(Types rangeKeyType) { switch (rangeKeyType) { case STRING: return new TextSplitter(); case NUMBER: new BigDecimalSplitter(); case BINARY: return new BinarySplitter(); default: return new DefaultSplitter(); } } @Override public RecordReader<LongWritable, T> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { setConf(context.getConfiguration()); @SuppressWarnings("unchecked") Class<T> inputClass = (Class<T>) (dbConf.getInputClass()); return new DynamoDBQueryRecordReader<T>((DynamoDBQueryInputSplit) inputSplit, inputClass, context.getConfiguration(), dbConf.getAmazonDynamoDBClient(), dbConf, tableName); } public static void setCredentials(Job job, String accessKey, String secretKey) { DynamoDBConfiguration.setCredentals(job.getConfiguration(), accessKey, secretKey); } public static void setEndpoint(Job job, String endpoint) { DynamoDBConfiguration dbConf = new DynamoDBConfiguration(job.getConfiguration()); dbConf.setDynamoDBEndpoint(endpoint); } public static String getEndpoint(Job job) { DynamoDBConfiguration dbConf = new DynamoDBConfiguration(job.getConfiguration()); return dbConf.getDynamoDBEndpoint(); } public static void setInput(Job job, Class<? extends DynamoDBKeyWritable> inputClass, String tableName) { job.setInputFormatClass(DynamoDBQueryInputFormat.class); DynamoDBConfiguration dbConf = new DynamoDBConfiguration(job.getConfiguration()); dbConf.setInputClass(inputClass); dbConf.setInputTableName(tableName); } public static Types getHashKeyType(Configuration conf) { return Types.values()[conf.getInt(DynamoDBConfiguration.HASH_KEY_TYPE_PROPERTY, Types.STRING.ordinal())]; } public static void setHashKeyType(Configuration conf, Types type) { conf.setInt(DynamoDBConfiguration.HASH_KEY_TYPE_PROPERTY, type.ordinal()); } public static AttributeValue getHashKeyValue(Configuration conf) { String value = conf.get(DynamoDBConfiguration.HASH_KEY_VALUE_PROPERTY); return AttributeValueIOUtils.valueOf(getHashKeyType(conf), value); } public static void setHashKeyValue(Configuration conf, Types type, AttributeValue value) { setHashKeyType(conf, type); String encodedValue = AttributeValueIOUtils.toString(type, value); conf.set(DynamoDBConfiguration.HASH_KEY_VALUE_PROPERTY, encodedValue); } public static boolean getInterpolateAcrossRangeKeyValues(Configuration conf) { return conf.getBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, false); } public static void setInterpolateAcrossRangeKeyValues(Configuration conf, boolean interpolate) { conf.setBoolean(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_PROPERTY, interpolate); } public static Types getRangeKeyType(Configuration conf) { return Types.values()[conf.getInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, Types.STRING.ordinal())]; } public static void setRangeKeyType(Configuration conf, Types type) { conf.setInt(DynamoDBConfiguration.RANGE_KEY_TYPE_PROPERTY, type.ordinal()); } public static void setRangeKeyValues(Configuration conf, Types type, Collection<AttributeValue> values) { setInterpolateAcrossRangeKeyValues(conf, false); setRangeKeyType(conf, type); List<String> attrValues = new ArrayList<String>(); for (AttributeValue attr : values) { attrValues.add(AttributeValueIOUtils.toString(type, attr)); } conf.setStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY, attrValues.toArray(new String[] {})); } public static Collection<AttributeValue> getRangeKeyValues(Configuration conf) { List<AttributeValue> values = new ArrayList<AttributeValue>(); Types type = getRangeKeyType(conf); String[] encodedValues = conf.getStrings(DynamoDBConfiguration.RANGE_KEY_VALUES_PROPERTY); // if range key values have not been configured return if (null == encodedValues) return values; // decode values for (String encodedValue : encodedValues) { values.add(AttributeValueIOUtils.valueOf(type, encodedValue)); } return values; } public static ComparisonOperator getRangeKeyComparisonOperator(Configuration conf) { return ComparisonOperator.values()[conf.getInt(DynamoDBConfiguration.RANGE_KEY_OPERATOR_PROPERTY, ComparisonOperator.EQ.ordinal())]; } public static void setRangeKeyComparisonOperator(Configuration conf, ComparisonOperator operator) { conf.setInt(DynamoDBConfiguration.RANGE_KEY_OPERATOR_PROPERTY, operator.ordinal()); } public static void setRangeKeyCondition(Configuration conf, Types type, ComparisonOperator operator, Collection<AttributeValue> values) { setRangeKeyComparisonOperator(conf, operator); setRangeKeyValues(conf, type, values); } public static void setRangeKeyInterpolateMinValue(Configuration conf, Types type, AttributeValue value) { setInterpolateAcrossRangeKeyValues(conf, true); setRangeKeyType(conf, type); String encodedValue = AttributeValueIOUtils.toString(type, value); conf.set(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MIN_VALUE_PROPERTY, encodedValue); } public static AttributeValue getRangeKeyInterpolateMinValue(Configuration conf) { Types type = getRangeKeyType(conf); String encodedValue = conf.get(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MIN_VALUE_PROPERTY); return AttributeValueIOUtils.valueOf(type, encodedValue); } public static void setRangeKeyInterpolateMaxValue(Configuration conf, Types type, AttributeValue value) { setInterpolateAcrossRangeKeyValues(conf, true); setRangeKeyType(conf, type); String encodedValue = AttributeValueIOUtils.toString(type, value); conf.set(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MAX_VALUE_PROPERTY, encodedValue); } public static AttributeValue getRangeKeyInterpolateMaxValue(Configuration conf) { Types type = getRangeKeyType(conf); String encodedValue = conf.get(DynamoDBConfiguration.RANGE_KEY_INTERPOLATE_MAX_VALUE_PROPERTY); return AttributeValueIOUtils.valueOf(type, encodedValue); } public static void setRangeKeyInterpolateRange(Configuration conf, Types type, AttributeValue minValue, AttributeValue maxValue) { setRangeKeyInterpolateMinValue(conf, type, minValue); setRangeKeyInterpolateMaxValue(conf, type, maxValue); } }