com.willetinc.hadoop.mapreduce.dynamodb.BigDecimalSplitter.java Source code

Java tutorial

Introduction

Here is the source code for com.willetinc.hadoop.mapreduce.dynamodb.BigDecimalSplitter.java

Source

/**
 * Copyright 2012 Willet Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.willetinc.hadoop.mapreduce.dynamodb;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;

import com.amazonaws.services.dynamodb.model.AttributeValue;
import com.amazonaws.services.dynamodb.model.ComparisonOperator;

/**
 * Implements Splitter over DynamoDB Number datatype values.
 */
public class BigDecimalSplitter extends AbstractSplitter {

    private static final Log LOG = LogFactory.getLog(BigDecimalSplitter.class);

    private static final BigDecimal MIN_POSITIVE_VALUE = new BigDecimal("0.0000000000000000000000000000000000001");

    @Override
    void generateRangeKeySplits(Configuration conf, List<InputSplit> splits, Types hashKeyType,
            AttributeValue hashKeyValue, Types rangeKeyType, AttributeValue minRangeKeyValue,
            AttributeValue maxRangeKeyValue, int numRangeSplits) {

        BigDecimal numSplits = BigDecimal.valueOf(numRangeSplits);
        BigDecimal minVal = new BigDecimal(minRangeKeyValue.getN());
        BigDecimal maxVal = new BigDecimal(maxRangeKeyValue.getN());

        // Get all the split points together.
        List<BigDecimal> splitPoints = split(numSplits, minVal, maxVal);

        // Turn the split points into a set of intervals.
        BigDecimal start = splitPoints.get(0);
        for (int i = 1; i < splitPoints.size(); i++) {
            BigDecimal end = splitPoints.get(i);

            List<AttributeValue> rangeKeyValues = new ArrayList<AttributeValue>();
            rangeKeyValues.add(new AttributeValue().withN(start.toString()));
            rangeKeyValues.add(new AttributeValue().withN(end.toString()));

            splits.add(new DynamoDBQueryInputFormat.DynamoDBQueryInputSplit(hashKeyType, hashKeyValue, rangeKeyType,
                    rangeKeyValues, ComparisonOperator.BETWEEN));

            // set start to end of last interval plus minimum positive value
            // in the case of DynamoDB Numbers it is 1.0^-38:
            // This is necessary to ensure we don't miss any values between
            // intervals.
            start = end.add(MIN_POSITIVE_VALUE);
        }
    }

    private static final BigDecimal MIN_INCREMENT = new BigDecimal(10000 * Double.MIN_VALUE);

    /**
     * Divide numerator by denominator. If impossible in exact mode, use
     * rounding.
     */
    protected static BigDecimal tryDivide(BigDecimal numerator, BigDecimal denominator) {
        try {
            return numerator.divide(denominator);
        } catch (ArithmeticException ae) {
            return numerator.divide(denominator, BigDecimal.ROUND_HALF_UP);
        }
    }

    /**
     * <p>
     * Returns a list of BigDecimals one element longer than the list of input
     * splits. This represents the boundaries between input splits. All splits
     * are open on the top end, except the last one.
     * </p>
     * 
     * <p>
     * So the list [0, 5, 8, 12, 18] would represent splits capturing the
     * intervals:
     * </p>
     * 
     * <p>
     * The smallest positive value supported by DynamoDB 'e' is used to separate
     * intervals
     * </p>
     * 
     * <p>
     * e = 0.0000000000000000000000000000000000001
     * </p>
     * 
     * <p>
     * [0, 5] [5+e, 8] [8+e, 12] [12+e, 18]
     * </p>
     */
    List<BigDecimal> split(BigDecimal numSplits, BigDecimal minVal, BigDecimal maxVal) {

        List<BigDecimal> splits = new ArrayList<BigDecimal>();

        // Use numSplits as a hint. May need an extra task if the size doesn't
        // divide cleanly.

        BigDecimal splitSize = tryDivide(maxVal.subtract(minVal), (numSplits));
        if (splitSize.compareTo(MIN_INCREMENT) < 0) {
            splitSize = MIN_INCREMENT;
            LOG.warn("Set BigDecimal splitSize to MIN_INCREMENT");
        }

        BigDecimal curVal = minVal;

        while (curVal.compareTo(maxVal) <= 0) {
            splits.add(curVal);
            curVal = curVal.add(splitSize);
        }

        if (splits.get(splits.size() - 1).compareTo(maxVal) != 0 || splits.size() == 1) {
            // We didn't end on the maxVal. Add that to the end of the list.
            splits.add(maxVal);
        }

        return splits;
    }

}