Source code

Java tutorial


Here is the source code for


* Copyright 2015 herd contributors
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;


 * Encapsulates logic for calculating the best price for EMR cluster.
public class EmrPricingHelper extends AwsHelper {
    private static final Logger LOGGER = Logger.getLogger(EmrPricingHelper.class);

    private Ec2Dao ec2Dao;

    private DmDao dmDao;

    private DmStringHelper dmStringHelper;

     * Finds the best price for each master and core instances based on the subnets and master and core instance search parameters given in the definition.
     * <p/>
     * The results of the findings are used to update the given definition.
     * <p/>
     * If the instance's instanceSpotPrice is set, the instance definition will keep that value. If the instance's instanceMaxSearchPrice is set, the best price
     * will be found. If the found price is spot, the instanceSpotPrice will be set to the value of instanceMaxSearchPrice. If the found price is on-demand, the
     * instanceSpotPrice will be removed. The definition's subnetId will be set to the particular subnet which the best price is found. The value will always be
     * replaced by a single subnet ID.
     * <p/>
     * The definition's instanceMaxSearchPrice and instanceOnDemandThreshold will be removed by this operation.
     * @param emrClusterDefinition The EMR cluster definition with search criteria, and the definition that will be updated.
    public void updateEmrClusterDefinitionWithBestPrice(EmrClusterDefinition emrClusterDefinition) {
        // Get total count of instances this definition will attempt to create
        int totalInstanceCount = getTotalInstanceCount(emrClusterDefinition);

        // Get the subnet information
        List<Subnet> subnets = getSubnets(emrClusterDefinition);
        // Filter out subnets with not enough available IPs
        removeSubnetsWithAvailableIpsLessThan(subnets, totalInstanceCount);

        if (subnets.isEmpty()) {
            throw new ObjectNotFoundException(
                    "There are no subnets in the current VPC which have sufficient IP addresses available to run your clusters. "
                            + "Try expanding the list of subnets or try again later.");

        // Best prices are accumulated in this list
        List<EmrClusterPriceDto> emrClusterPrices = new ArrayList<>();

        InstanceDefinition masterInstanceDefinition = getMasterInstanceDefinition(emrClusterDefinition);
        InstanceDefinition coreInstanceDefinition = getCoreInstanceDefinition(emrClusterDefinition);
        InstanceDefinition taskInstanceDefinition = getTaskInstanceDefinition(emrClusterDefinition);

        Set<String> requestedInstanceTypes = new HashSet<>();

        String masterInstanceType = masterInstanceDefinition.getInstanceType();

        String coreInstanceType = coreInstanceDefinition.getInstanceType();

        if (taskInstanceDefinition != null) {
            String taskInstanceType = taskInstanceDefinition.getInstanceType();

        // Get AZs for the subnets
        List<AvailabilityZone> availabilityZones = getAvailabilityZones(subnets);
        for (AvailabilityZone availabilityZone : availabilityZones) {
            // Create a mapping of instance types to prices for more efficient, in-memory lookup
            Map<String, BigDecimal> instanceTypeSpotPrices = getInstanceTypeSpotPrices(availabilityZone,
            Map<String, BigDecimal> instanceTypeOnDemandPrices = getInstanceTypeOnDemandPrices(availabilityZone,

            // Get and compare master price
            BigDecimal masterSpotPrice = instanceTypeSpotPrices.get(masterInstanceType);
            BigDecimal masterOnDemandPrice = instanceTypeOnDemandPrices.get(masterInstanceType);
            Ec2PriceDto masterPrice = getBestInstancePrice(masterSpotPrice, masterOnDemandPrice,

            // Get and compare core price
            BigDecimal coreSpotPrice = instanceTypeSpotPrices.get(coreInstanceType);
            BigDecimal coreOnDemandPrice = instanceTypeOnDemandPrices.get(coreInstanceType);
            Ec2PriceDto corePrice = getBestInstancePrice(coreSpotPrice, coreOnDemandPrice, coreInstanceDefinition);

            Ec2PriceDto taskPrice = null;
            if (taskInstanceDefinition != null) {
                String taskInstanceType = taskInstanceDefinition.getInstanceType();

                // Get and compare task price
                BigDecimal taskSpotPrice = instanceTypeSpotPrices.get(taskInstanceType);
                BigDecimal taskOnDemandPrice = instanceTypeOnDemandPrices.get(taskInstanceType);
                taskPrice = getBestInstancePrice(taskSpotPrice, taskOnDemandPrice, taskInstanceDefinition);

            // If prices were found for both master and core
            if (masterPrice != null && corePrice != null && (taskInstanceDefinition == null || taskPrice != null)) {
                // Add the pricing result to the result list
                emrClusterPrices.add(createEmrClusterPrice(availabilityZone, masterPrice, corePrice, taskPrice));

            // If prices were not found for either master or core, this AZ cannot satisfy the search criteria. Ignore this AZ.

        if (emrClusterPrices.isEmpty()) {
            throw new ObjectNotFoundException(
                    "There were no subnets which satisfied your best price search criteria. "
                            + "Try setting the max price or the on-demand threshold to a higher value.");

        // Find the best prices from the result list
        EmrClusterPriceDto bestEmrClusterPrice = getEmrClusterPriceWithLowestTotalCost(emrClusterPrices);

        // Find the best subnet among the best AZ's
        Subnet bestEmrClusterSubnet = getBestSubnetForAvailabilityZone(bestEmrClusterPrice.getAvailabilityZone(),

        // Update the definition with the new calculated values
        updateInstanceDefinitionsWithBestPrice(emrClusterDefinition, bestEmrClusterSubnet, bestEmrClusterPrice);

     * Returns the total number of requested instances. Returns the sum of master, core, and task instance counts. Task instance is optional.
     * @param emrClusterDefinition The EMR cluster definition containing the instance definitions
     * @return the total instance count.
    private int getTotalInstanceCount(EmrClusterDefinition emrClusterDefinition) {
        InstanceDefinition masterInstanceDefinition = getMasterInstanceDefinition(emrClusterDefinition);
        InstanceDefinition coreInstanceDefinition = getCoreInstanceDefinition(emrClusterDefinition);
        InstanceDefinition taskInstanceDefinition = getTaskInstanceDefinition(emrClusterDefinition);

        // Get total count of instances this definition will attempt to create
        int totalInstanceCount = masterInstanceDefinition.getInstanceCount()
                + coreInstanceDefinition.getInstanceCount();
        if (taskInstanceDefinition != null) {
            totalInstanceCount += taskInstanceDefinition.getInstanceCount();
        return totalInstanceCount;

     * Updates the given definition with the given subnet and EMR pricing information.
     * <p/>
     * Sets the subnet with the given subnet ID. Removes any maxSearchPrice and onDemandThreshold that were set. Sets the spotPrice only if the given cluster
     * price is a spot.
     * @param emrClusterDefinition The definition to update
     * @param bestEmrClusterSubnet The subnet to use
     * @param bestEmrClusterPrice The EMR pricing information for each instance
    private void updateInstanceDefinitionsWithBestPrice(EmrClusterDefinition emrClusterDefinition,
            Subnet bestEmrClusterSubnet, EmrClusterPriceDto bestEmrClusterPrice) {



     * Returns the bid price based on the given pricing information. Returns the given price's bid price if the pricing is spot. Returns null otherwise.
     * @param ec2Price The EC2 pricing information.
     * @return The bid price, or null
    private BigDecimal getSpotBidPrice(Ec2PriceDto ec2Price) {
        BigDecimal bidPrice = null;
        if (ec2Price.getIsSpot()) {
            bidPrice = ec2Price.getBidPrice();
        return bidPrice;

     * Chooses the best subnet from the given list of subnets, which belongs to the given availability zone. The "best" subnet is selected by the number of
     * available IP addresses in the subnet. A subnet with more availability is preferred. If multiple subnets have same IP availability, then the result subnet
     * is arbitrarily chosen.
     * @param availabilityZone The availability zone in which the subnet belongs to
     * @param subnets The list of subnet to select from
     * @return The subnet with the most number of available IPs
    private Subnet getBestSubnetForAvailabilityZone(String availabilityZone, List<Subnet> subnets) {
        List<Subnet> subnetsInAvailabilityZone = new ArrayList<>();
        for (Subnet subnet : subnets) {
            if (subnet.getAvailabilityZone().equals(availabilityZone)) {

        return getTop(subnetsInAvailabilityZone, new IpAddressComparator());

     * An IP address comparator. A static named inner class was created as opposed to an anonymous inner class since it has no dependencies on it's containing
     * class and is therefore more efficient.
    private static class IpAddressComparator implements Comparator<Subnet>, Serializable {
        private static final long serialVersionUID = 2005944161800182009L;

        public int compare(Subnet o1, Subnet o2) {
            return o2.getAvailableIpAddressCount().compareTo(o1.getAvailableIpAddressCount());

     * Selects the first element after sorting the list using the given comparator. Returns null if the list is empty.
     * @param list The list to select from.
     * @param comparator The comparator to use to sort
     * @return The first element after sorting, or null
    private <T> T getTop(List<T> list, Comparator<T> comparator) {
        Collections.sort(list, comparator);
        return list.get(0);

     * Selects the EMR cluster pricing with the lowest total cost. Returns null if the given list is empty
     * @param emrClusterPrices The list of pricing to select from.
     * @return The pricing with the lowest total cost.
    private EmrClusterPriceDto getEmrClusterPriceWithLowestTotalCost(List<EmrClusterPriceDto> emrClusterPrices) {
        EmrClusterPriceDto top = getTop(emrClusterPrices, new Comparator<EmrClusterPriceDto>() {
            public int compare(EmrClusterPriceDto o1, EmrClusterPriceDto o2) {
                BigDecimal totalCost1 = getEmrClusterTotalCost(o1);
                BigDecimal totalCost2 = getEmrClusterTotalCost(o2);
                return totalCost1.compareTo(totalCost2);

        return top;

     * Gets the total cost of the given pricing. The total cost is the sum of master, core, and task prices - each multiplied by their instance count. Task
     * price is optional and will be ignored if not specified.
     * @param emrClusterPrice The pricing information
     * @return The total cost
    private BigDecimal getEmrClusterTotalCost(EmrClusterPriceDto emrClusterPrice) {
        BigDecimal totalPrice = BigDecimal.ZERO;

        BigDecimal masterPrice = getTotalCost(emrClusterPrice.getMasterPrice());
        totalPrice = totalPrice.add(masterPrice);

        BigDecimal corePrice = getTotalCost(emrClusterPrice.getCorePrice());
        totalPrice = totalPrice.add(corePrice);

        if (emrClusterPrice.getTaskPrice() != null) {
            BigDecimal taskPrice = getTotalCost(emrClusterPrice.getTaskPrice());
            totalPrice = totalPrice.add(taskPrice);

        return totalPrice;

     * Updates the given list of subnets to remove subnets with number of available IPs less than the given value.
     * @param subnets List of subnets
     * @param availableIps The number of available IPs to filter by
    private void removeSubnetsWithAvailableIpsLessThan(List<Subnet> subnets, int availableIps) {
        Iterator<Subnet> iterator = subnets.iterator();
        while (iterator.hasNext()) {
            Subnet subnet =;
            if (subnet.getAvailableIpAddressCount() < availableIps) {

     * Creates a new {@link EmrClusterPriceDto} object from the given parameters.
     * @param availabilityZone The AZ
     * @param masterPrice The master instance's price
     * @param corePrice The core instance's price
     * @param taskPrice The task instance's price
     * @return A new {@link EmrClusterPriceDto}
    private EmrClusterPriceDto createEmrClusterPrice(AvailabilityZone availabilityZone, Ec2PriceDto masterPrice,
            Ec2PriceDto corePrice, Ec2PriceDto taskPrice) {
        EmrClusterPriceDto emrClusterPrice = new EmrClusterPriceDto();
        return emrClusterPrice;

     * Returns the pricing information selected based on the given instance definition's search criteria.
     * <p/>
     * If the instance's spotBidPrice is set, returns spot price with spotBidPrice as the bid price. If the instance's maxSearchPrice is set, compares the given
     * spot, on-demand prices, maxSearchPrice, and optionally, onDemandThreshold to return the best result. This may return null if neither spot or on-demand
     * price matched the given criteria. If neither spotBidPrice or maxSearchPrice is set, returns the pricing as the on-demand price.
     * @param spotPrice The current spot price for the instance type
     * @param onDemandPrice The current on-demand price for the instance type
     * @param instanceDefinition The instance definition containing search criteria
     * @return A new {@link Ec2PriceDto} with the pricing information
    private Ec2PriceDto getBestInstancePrice(BigDecimal spotPrice, BigDecimal onDemandPrice,
            InstanceDefinition instanceDefinition) {
        LOGGER.debug("start: instanceType = " + instanceDefinition.getInstanceType() + ", spotPrice = " + spotPrice
                + ", onDemandPrice = " + onDemandPrice);

        BigDecimal spotBidPrice = instanceDefinition.getInstanceSpotPrice();
        BigDecimal maxSearchPrice = instanceDefinition.getInstanceMaxSearchPrice();
        BigDecimal onDemandThreshold = instanceDefinition.getInstanceOnDemandThreshold();

        LOGGER.debug("spotBidPrice = " + spotBidPrice + ", maxSearchPrice = " + maxSearchPrice
                + ", onDemandThreshold = " + onDemandThreshold);

        Ec2PriceDto bestPrice = null;

        // spotBidPrice is set. User wants to explicitly use spot pricing
        if (spotBidPrice != null) {
            bestPrice = new Ec2PriceDto();
        // spotBidPrice and maxSearchPrice are not specified. User explicitly wants to use on-demand
        else if (maxSearchPrice == null) {
            bestPrice = new Ec2PriceDto();
        // maxSearchPrice is set. User wants system to find best price
        else {
            // Default to on-demand
            bestPrice = new Ec2PriceDto();

            // No on-demand threshold is equivalent to $0.00 threshold
            if (onDemandThreshold == null) {
                onDemandThreshold = BigDecimal.ZERO;

            BigDecimal onDemandThresholdAbsolute = spotPrice.add(onDemandThreshold);

            // Pre-compute some flags for readability
            boolean isSpotBelowMax = spotPrice.compareTo(maxSearchPrice) <= 0;
            boolean isOnDemandBelowMax = onDemandPrice.compareTo(maxSearchPrice) <= 0;
            boolean isSpotBelowOnDemand = spotPrice.compareTo(onDemandPrice) < 0;
            boolean isThresholdBelowOnDemand = onDemandThresholdAbsolute.compareTo(onDemandPrice) < 0;

            // Should I use spot?
            if (isSpotBelowMax && isSpotBelowOnDemand && (isThresholdBelowOnDemand || !isOnDemandBelowMax)) {
            // Is there an error?
            else if (!isOnDemandBelowMax) {
                bestPrice = null;
            // Otherwise use on-demand

        LOGGER.debug("end: bestPrice = " + bestPrice);
        return bestPrice;

     * Returns the core instance definition.
     * @param emrClusterDefinition The EMR cluster definition
     * @return The core instance definition.
    private InstanceDefinition getCoreInstanceDefinition(EmrClusterDefinition emrClusterDefinition) {
        return emrClusterDefinition.getInstanceDefinitions().getCoreInstances();

     * Returns the task instance definition. Returns null if no task definition is specified.
     * @param emrClusterDefinition The EMR cluster definition
     * @return The task instance definition, or null
    private InstanceDefinition getTaskInstanceDefinition(EmrClusterDefinition emrClusterDefinition) {
        return emrClusterDefinition.getInstanceDefinitions().getTaskInstances();

     * Returns the master instance definition. Copies the {@link MasterInstanceDefinition} to a {@link InstanceDefinition} to keep the class type consistent
     * with the core instance.
     * @param emrClusterDefinition The EMR cluster definition
     * @return The master instance definition.
    private InstanceDefinition getMasterInstanceDefinition(EmrClusterDefinition emrClusterDefinition) {
        MasterInstanceDefinition masterInstanceDefinition = emrClusterDefinition.getInstanceDefinitions()

        InstanceDefinition instanceDefinition = new InstanceDefinition();
        return instanceDefinition;

     * Returns a mapping of instance types to on-demand prices for the given AZ and instance types. The on-demand prices are retrieved from database
     * configurations. The on-demand prices are looked up by the AZ's region name.
     * @param availabilityZone The availability zone of the on-demand instances.
     * @param instanceTypes The sizes of the on-demand instances.
     * @return A map of instance type to on-demand price.
     * @throws ObjectNotFoundException when any of the instance type was not found in the given region
    private Map<String, BigDecimal> getInstanceTypeOnDemandPrices(AvailabilityZone availabilityZone,
            Set<String> instanceTypes) {
        Map<String, BigDecimal> instanceTypeOnDemandPrices = new HashMap<>();
        for (String instanceType : instanceTypes) {
            OnDemandPriceEntity onDemandPrice = dmDao.getOnDemandPrice(availabilityZone.getRegionName(),

            if (onDemandPrice == null) {
                throw new ObjectNotFoundException("On-demand price for region '" + availabilityZone.getRegionName()
                        + "' and instance type '" + instanceType + "' not found.");

            instanceTypeOnDemandPrices.put(instanceType, onDemandPrice.getValue());

        return instanceTypeOnDemandPrices;

     * Returns a mapping of instance types to spot prices for the given AZ and instance types. The spot prices are retrieved from EC2 API.
     * <p/>
     * This method also validates that the given instance types are real instance types supported by AWS.
     * @param availabilityZone The AZ of the spot instances.
     * @param instanceTypes The size of the spot instances.
     * @return A mapping of instance type to spot prices.
     * @throws ObjectNotFoundException when any of the instance type does not exist in AWS
    private Map<String, BigDecimal> getInstanceTypeSpotPrices(AvailabilityZone availabilityZone,
            Set<String> instanceTypes) {
        List<SpotPrice> spotPrices = ec2Dao.getLatestSpotPrices(availabilityZone.getZoneName(), instanceTypes,

        Map<String, BigDecimal> instanceTypeSpotPrices = new HashMap<>();
        for (SpotPrice spotPrice : spotPrices) {
            instanceTypeSpotPrices.put(spotPrice.getInstanceType(), new BigDecimal(spotPrice.getSpotPrice()));

        // Ensure that all of the specified instance types were found.
        // If not found, it probably means user tried to lookup non-existent types.
        Set<String> difference = new HashSet<>(instanceTypes);

        if (!difference.isEmpty()) {
            throw new ObjectNotFoundException(
                    "Spot prices for instance types " + difference + " not found in AZ " + availabilityZone + ".");

        return instanceTypeSpotPrices;

     * Returns a list of AZ's which the given list of subnets belong to.
     * @param subnets List of subnets in the AZ
     * @return A list of AZ's
    private List<AvailabilityZone> getAvailabilityZones(List<Subnet> subnets) {
        return ec2Dao.getAvailabilityZonesForSubnetIds(subnets, getAwsParamsDto());

     * Returns a list of subnets specified in the definition. The definition specifies a comma-separated list of subnet IDs. This method parses it, looks up the
     * subnet from AWS, and returns the list. If the subnet is not specified or empty, all subnets in the current VPC is returned. This is AWS's default
     * behavior. All subnet IDs will be trimmed, and ignored if empty.
     * @param emrClusterDefinition The definition specifying the subnet IDs
     * @return List of subnets
    private List<Subnet> getSubnets(EmrClusterDefinition emrClusterDefinition) {
        String definitionSubnetId = emrClusterDefinition.getSubnetId();

        Set<String> subnetIds = Collections.emptySet();
        if (StringUtils.isNotBlank(definitionSubnetId)) {
            subnetIds = dmStringHelper.splitAndTrim(definitionSubnetId, ",");

        return ec2Dao.getSubnets(subnetIds, getAwsParamsDto());

     * Returns the total cost per hour to run the requested number of instances for the given price. Returns the instance price multiplied by the number of
     * instances.
     * @param ec2Price The EC2 pricing information
     * @return USD per hour
    public BigDecimal getTotalCost(Ec2PriceDto ec2Price) {
        BigDecimal instancePrice = ec2Price.getInstancePrice();
        Integer instanceCount = ec2Price.getInstanceCount();
        return instancePrice.multiply(new BigDecimal(instanceCount));