Java tutorial
/** * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file * except in compliance with the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0/ * * or in the "LICENSE.TXT" file accompanying this file. This file is distributed on an "AS IS" * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under the License. */ package org.apache.hadoop.dynamodb; import static org.apache.hadoop.dynamodb.DynamoDBConstants.DEFAULT_MAX_ITEMS_PER_BATCH; import static org.apache.hadoop.dynamodb.DynamoDBConstants.MAX_ITEMS_PER_BATCH; import com.google.common.base.Strings; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; import com.google.gson.JsonElement; import com.google.gson.JsonParseException; import com.google.gson.JsonPrimitive; import com.google.gson.JsonSerializationContext; import com.google.gson.JsonSerializer; import com.amazonaws.regions.RegionUtils; import com.amazonaws.regions.ServiceAbbreviations; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.TableDescription; import com.amazonaws.util.EC2MetadataUtils; import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.dynamodb.util.ClusterTopologyNodeCapacityProvider; import org.apache.hadoop.dynamodb.util.NodeCapacityProvider; import org.apache.hadoop.dynamodb.util.RoundRobinYarnContainerAllocator; import org.apache.hadoop.dynamodb.util.TaskCalculator; import org.apache.hadoop.dynamodb.util.YarnContainerAllocator; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.lang.reflect.Type; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Map.Entry; public final class DynamoDBUtil { public static final String CHARACTER_ENCODING = "UTF-8"; private static final Log log = LogFactory.getLog(DynamoDBUtil.class); private static final Gson gson; static { GsonBuilder gsonBuilder = new GsonBuilder(); /* We hand serialize/deserialize ByteBuffer objects. */ gsonBuilder.registerTypeAdapter(ByteBuffer.class, new ByteBufferSerializer()); gsonBuilder.registerTypeAdapter(ByteBuffer.class, new ByteBufferDeserializer()); gson = gsonBuilder.disableHtmlEscaping().create(); } public static Double calculateAverageItemSize(TableDescription description) { if (description.getItemCount() != 0) { return ((double) description.getTableSizeBytes()) / ((double) description.getItemCount()); } return 0.0; } /** * base64 encode a byte array using org.apache.commons.codec.binary.Base64 * * @param bytes bytes to encode * @return base64 encoded representation of the provided byte array */ public static String base64EncodeByteArray(byte[] bytes) { try { byte[] encodeBase64 = Base64.encodeBase64(bytes); return new String(encodeBase64, "UTF-8"); } catch (Exception e) { throw new RuntimeException("Exception while encoding bytes: " + Arrays.toString(bytes)); } } /** * base64 decode a base64String using org.apache.commons.codec.binary.Base64 * * @param base64String string to base64 decode * @return byte array representing the decoded base64 string */ public static byte[] base64DecodeString(String base64String) { try { return Base64.decodeBase64(base64String.getBytes("UTF-8")); } catch (Exception e) { throw new RuntimeException("Exception while decoding " + base64String); } } /** * Converts a base64 encoded key into a ByteBuffer * * @param base64EncodedKey base64 encoded key to be converted * @return {@link ByteBuffer} representation of the provided base64 encoded key string */ public static ByteBuffer base64StringToByteBuffer(String base64EncodedKey) { return ByteBuffer.wrap(base64DecodeString(base64EncodedKey)); } /** * Converts a given list of base64EncodedKeys to a List of ByteBuffers * * @param base64EncodedKeys base64 encoded key(s) to be converted * @return List of {@link ByteBuffer}s representing the provided base64EncodedKeys */ public static List<ByteBuffer> base64StringToByteBuffer(String... base64EncodedKeys) { List<ByteBuffer> byteBuffers = new ArrayList<>(base64EncodedKeys.length); for (String base64EncodedKey : base64EncodedKeys) { byteBuffers.add(base64StringToByteBuffer(base64EncodedKey)); } return byteBuffers; } /** * Get a Gson reference with custom ByteBuffer serializer/deserializer. * * @return Gson reference with custom ByteBuffer serializer/deserializer */ public static Gson getGson() { return gson; } static int getItemSizeBytes(Map<String, AttributeValue> item) { try { int itemSize = 0; for (Entry<String, AttributeValue> entry : item.entrySet()) { itemSize += entry.getKey().getBytes(CHARACTER_ENCODING).length; itemSize += getAttributeSizeBytes(entry.getValue()); } return itemSize; } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } public static void verifyInterval(long intervalBeginTime, long intervalLength) { long interval = intervalBeginTime + intervalLength; long currentDateTime = new DateTime(DateTimeZone.UTC).getMillis(); if (currentDateTime < interval) { try { Thread.sleep(interval - currentDateTime); } catch (InterruptedException e) { throw new RuntimeException("Interrupted while waiting ", e); } } } private static int getAttributeSizeBytes(AttributeValue att) throws UnsupportedEncodingException { int byteSize = 0; if (att.getN() != null) { byteSize += att.getN().getBytes(CHARACTER_ENCODING).length; } else if (att.getS() != null) { byteSize += att.getS().getBytes(CHARACTER_ENCODING).length; } else if (att.getB() != null) { byteSize += att.getB().array().length; } else if (att.getNS() != null) { for (String number : att.getNS()) { byteSize += number.getBytes(CHARACTER_ENCODING).length; } } else if (att.getSS() != null) { for (String string : att.getSS()) { byteSize += string.getBytes(CHARACTER_ENCODING).length; } } else if (att.getBS() != null) { for (ByteBuffer byteBuffer : att.getBS()) { byteSize += byteBuffer.array().length; } } return byteSize; } static long getBoundedBatchLimit(Configuration config, long batchSize) { long maxItemsPerBatch = config.getLong(MAX_ITEMS_PER_BATCH, DEFAULT_MAX_ITEMS_PER_BATCH); return Math.min(Math.max(batchSize, 1), maxItemsPerBatch); } public static String getValueFromConf(Configuration conf, String confKey, String defaultValue) { if (conf == null) { return defaultValue; } return conf.get(confKey, defaultValue); } public static String getValueFromConf(Configuration conf, String confKey) { return getValueFromConf(conf, confKey, null); } /** * Calculates DynamoDB end-point. * * Algorithm details: * <ol> * <li> Use endpoint in job configuration "dynamodb.endpoint" value if available * <li> Use endpoint from region in job configuration "dynamodb.region" value if available * <li> Use endpoint from region in job configuration "dynamodb.regionid" value if available * <li> Use endpoint from EC2 Metadata of instance if available * <li> If all previous attempts at retrieving endpoint fail, default to us-east-1 endpoint * </ol> * * @param conf Job Configuration * @param region optional preferred region * @return end-point for DynamoDb service */ public static String getDynamoDBEndpoint(Configuration conf, String region) { String endpoint = getValueFromConf(conf, DynamoDBConstants.ENDPOINT); if (Strings.isNullOrEmpty(endpoint)) { if (Strings.isNullOrEmpty(region)) { region = getValueFromConf(conf, DynamoDBConstants.REGION); } if (Strings.isNullOrEmpty(region)) { region = getValueFromConf(conf, DynamoDBConstants.REGION_ID); } if (Strings.isNullOrEmpty(region)) { try { region = EC2MetadataUtils.getEC2InstanceRegion(); } catch (Exception e) { log.warn(String.format("Exception when attempting to get AWS region information. Will " + "ignore and default " + "to %s", DynamoDBConstants.DEFAULT_AWS_REGION), e); } } if (Strings.isNullOrEmpty(region)) { region = DynamoDBConstants.DEFAULT_AWS_REGION; } endpoint = RegionUtils.getRegion(region).getServiceEndpoint(ServiceAbbreviations.Dynamodb); } log.info("Using endpoint for DynamoDB: " + endpoint); return endpoint; } public static JobClient createJobClient(JobConf jobConf) { try { return new JobClient(jobConf); } catch (IOException e) { throw new RuntimeException(e); } } public static int calcMaxMapTasks(JobClient jobClient) throws IOException { JobConf conf = (JobConf) jobClient.getConf(); NodeCapacityProvider nodeCapacityProvider = new ClusterTopologyNodeCapacityProvider(conf); YarnContainerAllocator yarnContainerAllocator = new RoundRobinYarnContainerAllocator(); TaskCalculator taskCalculator = new TaskCalculator(jobClient, nodeCapacityProvider, yarnContainerAllocator); return taskCalculator.getMaxMapTasks(); } /** * Since ByteBuffer does not have a no-arg constructor we hand serialize/deserialize them. */ private static class ByteBufferSerializer implements JsonSerializer<ByteBuffer> { @Override public JsonElement serialize(ByteBuffer byteBuffer, Type type, JsonSerializationContext context) { String base64String = DynamoDBUtil.base64EncodeByteArray(byteBuffer.array()); return new JsonPrimitive(base64String); } } /** * Since ByteBuffer does not have a no-arg constructor we hand serialize/deserialize them. */ private static class ByteBufferDeserializer implements JsonDeserializer<ByteBuffer> { @Override public ByteBuffer deserialize(JsonElement jsonElement, Type type, JsonDeserializationContext context) throws JsonParseException { String base64String = jsonElement.getAsJsonPrimitive().getAsString(); return DynamoDBUtil.base64StringToByteBuffer(base64String); } } private DynamoDBUtil() { } }