org.finra.herd.dao.impl.EmrDaoImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.finra.herd.dao.impl.EmrDaoImpl.java

Source

/*
* Copyright 2015 herd contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.finra.herd.dao.impl;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient;
import com.amazonaws.services.elasticmapreduce.model.ActionOnFailure;
import com.amazonaws.services.elasticmapreduce.model.AddJobFlowStepsRequest;
import com.amazonaws.services.elasticmapreduce.model.Application;
import com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig;
import com.amazonaws.services.elasticmapreduce.model.Cluster;
import com.amazonaws.services.elasticmapreduce.model.ClusterSummary;
import com.amazonaws.services.elasticmapreduce.model.Configuration;
import com.amazonaws.services.elasticmapreduce.model.DescribeClusterRequest;
import com.amazonaws.services.elasticmapreduce.model.DescribeClusterResult;
import com.amazonaws.services.elasticmapreduce.model.DescribeStepRequest;
import com.amazonaws.services.elasticmapreduce.model.EbsBlockDeviceConfig;
import com.amazonaws.services.elasticmapreduce.model.EbsConfiguration;
import com.amazonaws.services.elasticmapreduce.model.Instance;
import com.amazonaws.services.elasticmapreduce.model.InstanceFleetConfig;
import com.amazonaws.services.elasticmapreduce.model.InstanceFleetProvisioningSpecifications;
import com.amazonaws.services.elasticmapreduce.model.InstanceGroupConfig;
import com.amazonaws.services.elasticmapreduce.model.InstanceGroupType;
import com.amazonaws.services.elasticmapreduce.model.InstanceRoleType;
import com.amazonaws.services.elasticmapreduce.model.InstanceTypeConfig;
import com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig;
import com.amazonaws.services.elasticmapreduce.model.KerberosAttributes;
import com.amazonaws.services.elasticmapreduce.model.ListClustersRequest;
import com.amazonaws.services.elasticmapreduce.model.ListClustersResult;
import com.amazonaws.services.elasticmapreduce.model.ListInstanceFleetsRequest;
import com.amazonaws.services.elasticmapreduce.model.ListInstanceFleetsResult;
import com.amazonaws.services.elasticmapreduce.model.ListInstancesRequest;
import com.amazonaws.services.elasticmapreduce.model.ListStepsRequest;
import com.amazonaws.services.elasticmapreduce.model.MarketType;
import com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest;
import com.amazonaws.services.elasticmapreduce.model.ScriptBootstrapActionConfig;
import com.amazonaws.services.elasticmapreduce.model.SpotProvisioningSpecification;
import com.amazonaws.services.elasticmapreduce.model.Step;
import com.amazonaws.services.elasticmapreduce.model.StepConfig;
import com.amazonaws.services.elasticmapreduce.model.StepState;
import com.amazonaws.services.elasticmapreduce.model.StepSummary;
import com.amazonaws.services.elasticmapreduce.model.Tag;
import com.amazonaws.services.elasticmapreduce.model.VolumeSpecification;
import com.amazonaws.services.elasticmapreduce.util.StepFactory;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Repository;

import org.finra.herd.core.helper.ConfigurationHelper;
import org.finra.herd.dao.AwsClientFactory;
import org.finra.herd.dao.Ec2Dao;
import org.finra.herd.dao.EmrDao;
import org.finra.herd.dao.EmrOperations;
import org.finra.herd.dao.helper.EmrHelper;
import org.finra.herd.dao.helper.HerdStringHelper;
import org.finra.herd.dao.helper.JsonHelper;
import org.finra.herd.model.api.xml.ConfigurationFile;
import org.finra.herd.model.api.xml.ConfigurationFiles;
import org.finra.herd.model.api.xml.EmrClusterDefinition;
import org.finra.herd.model.api.xml.EmrClusterDefinitionApplication;
import org.finra.herd.model.api.xml.EmrClusterDefinitionConfiguration;
import org.finra.herd.model.api.xml.EmrClusterDefinitionEbsBlockDeviceConfig;
import org.finra.herd.model.api.xml.EmrClusterDefinitionEbsConfiguration;
import org.finra.herd.model.api.xml.EmrClusterDefinitionInstanceFleet;
import org.finra.herd.model.api.xml.EmrClusterDefinitionInstanceTypeConfig;
import org.finra.herd.model.api.xml.EmrClusterDefinitionKerberosAttributes;
import org.finra.herd.model.api.xml.EmrClusterDefinitionLaunchSpecifications;
import org.finra.herd.model.api.xml.EmrClusterDefinitionSpotSpecification;
import org.finra.herd.model.api.xml.EmrClusterDefinitionVolumeSpecification;
import org.finra.herd.model.api.xml.HadoopJarStep;
import org.finra.herd.model.api.xml.InstanceDefinitions;
import org.finra.herd.model.api.xml.KeyValuePairConfiguration;
import org.finra.herd.model.api.xml.KeyValuePairConfigurations;
import org.finra.herd.model.api.xml.NodeTag;
import org.finra.herd.model.api.xml.Parameter;
import org.finra.herd.model.api.xml.ScriptDefinition;
import org.finra.herd.model.dto.AwsParamsDto;
import org.finra.herd.model.dto.ConfigurationValue;

/**
 * The EMR DAO implementation.
 */
@Repository
public class EmrDaoImpl implements EmrDao {
    private static final Logger LOGGER = LoggerFactory.getLogger(EmrDaoImpl.class);

    @Autowired
    private AwsClientFactory awsClientFactory;

    @Autowired
    private ConfigurationHelper configurationHelper;

    @Autowired
    private Ec2Dao ec2Dao;

    @Autowired
    private EmrHelper emrHelper;

    @Autowired
    private EmrOperations emrOperations;

    @Autowired
    private HerdStringHelper herdStringHelper;

    @Autowired
    private JsonHelper jsonHelper;

    @Override
    public List<String> addEmrMasterSecurityGroups(String clusterId, List<String> securityGroups,
            AwsParamsDto awsParams) throws Exception {
        // Get the master EC2 instance
        ListInstancesRequest listInstancesRequest = new ListInstancesRequest().withClusterId(clusterId)
                .withInstanceGroupTypes(InstanceGroupType.MASTER);

        List<Instance> instances = emrOperations
                .listClusterInstancesRequest(getEmrClient(awsParams), listInstancesRequest).getInstances();

        // Throw error in case there are no master instances found yet
        if (instances.size() == 0) {
            throw new IllegalArgumentException("No master instances found for the cluster \"" + clusterId + "\".");
        }

        for (Instance instance : instances) {
            ec2Dao.addSecurityGroupsToEc2Instance(instance.getEc2InstanceId(), securityGroups, awsParams);
        }

        return securityGroups;
    }

    @Override
    public String addEmrStep(String clusterId, StepConfig emrStepConfig, AwsParamsDto awsParamsDto)
            throws Exception {
        List<StepConfig> steps = new ArrayList<>();

        steps.add(emrStepConfig);

        // Add the job flow request
        AddJobFlowStepsRequest jobFlowStepRequest = new AddJobFlowStepsRequest(clusterId, steps);
        List<String> emrStepIds = emrOperations.addJobFlowStepsRequest(getEmrClient(awsParamsDto),
                jobFlowStepRequest);

        return emrStepIds.get(0);
    }

    @Override
    public String createEmrCluster(String clusterName, EmrClusterDefinition emrClusterDefinition,
            AwsParamsDto awsParams) {
        RunJobFlowRequest runJobFlowRequest = getRunJobFlowRequest(clusterName, emrClusterDefinition);
        LOGGER.info("runJobFlowRequest={}", jsonHelper.objectToJson(runJobFlowRequest));
        String clusterId = emrOperations.runEmrJobFlow(getEmrClient(awsParams), runJobFlowRequest);
        LOGGER.info("EMR cluster started. emrClusterId=\"{}\"", clusterId);
        return clusterId;
    }

    @Override
    public ClusterSummary getActiveEmrClusterByName(String clusterName, AwsParamsDto awsParams) {
        if (StringUtils.isNotBlank(clusterName)) {
            /**
             * Call AWSOperations for ListClusters API. Need to list all the active clusters that are in
             * BOOTSTRAPPING/RUNNING/STARTING/WAITING states
             */
            ListClustersRequest listClustersRequest = new ListClustersRequest()
                    .withClusterStates(getActiveEmrClusterStates());

            /**
             * ListClusterRequest returns only 50 clusters at a time. However, this returns a marker
             * that can be used for subsequent calls to listClusters to get all the clusters
             */
            String markerForListClusters = listClustersRequest.getMarker();

            // Loop through all the available clusters and look for the given cluster id
            do {
                /**
                 * Call AWSOperations for ListClusters API.
                 * Need to include the Marker returned by the previous iteration
                 */
                ListClustersResult clusterResult = emrOperations.listEmrClusters(getEmrClient(awsParams),
                        listClustersRequest.withMarker(markerForListClusters));

                // Loop through all the active clusters returned by AWS
                for (ClusterSummary clusterInstance : clusterResult.getClusters()) {
                    // If the cluster name matches, then return the status
                    if (StringUtils.isNotBlank(clusterInstance.getName())
                            && clusterInstance.getName().equalsIgnoreCase(clusterName)) {
                        return clusterInstance;
                    }
                }
                markerForListClusters = clusterResult.getMarker();
            } while (markerForListClusters != null);
        }

        return null;
    }

    /**
     * Converts the given list of {@link EmrClusterDefinitionApplication} into a list of {@link Application}
     *
     * @param emrClusterDefinitionApplications list of {@link EmrClusterDefinitionApplication}
     *
     * @return list {@link Application}
     */
    public List<Application> getApplications(
            List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications) {
        List<Application> applications = new ArrayList<>();
        for (EmrClusterDefinitionApplication emrClusterDefinitionApplication : emrClusterDefinitionApplications) {
            Application application = new Application();
            application.setName(emrClusterDefinitionApplication.getName());
            application.setVersion(emrClusterDefinitionApplication.getVersion());
            application.setArgs(emrClusterDefinitionApplication.getArgs());

            List<Parameter> additionalInfoList = emrClusterDefinitionApplication.getAdditionalInfoList();
            if (!CollectionUtils.isEmpty(additionalInfoList)) {
                application.setAdditionalInfo(getMap(additionalInfoList));
            }

            applications.add(application);
        }
        return applications;
    }

    @Override
    public StepSummary getClusterActiveStep(String clusterId, AwsParamsDto awsParamsDto) {
        ListStepsRequest listStepsRequest = new ListStepsRequest().withClusterId(clusterId)
                .withStepStates(StepState.RUNNING);
        List<StepSummary> stepSummaryList = emrOperations
                .listStepsRequest(getEmrClient(awsParamsDto), listStepsRequest).getSteps();

        return !stepSummaryList.isEmpty() ? stepSummaryList.get(0) : null;
    }

    @Override
    public Step getClusterStep(String clusterId, String stepId, AwsParamsDto awsParamsDto) {
        DescribeStepRequest describeStepRequest = new DescribeStepRequest().withClusterId(clusterId)
                .withStepId(stepId);
        return emrOperations.describeStepRequest(getEmrClient(awsParamsDto), describeStepRequest).getStep();
    }

    @Override
    public AmazonElasticMapReduceClient getEmrClient(AwsParamsDto awsParamsDto) {
        return (AmazonElasticMapReduceClient) awsClientFactory.getEmrClient(awsParamsDto);
    }

    @Override
    public Cluster getEmrClusterById(String clusterId, AwsParamsDto awsParams) {
        Cluster cluster = null;
        if (StringUtils.isNotBlank(clusterId)) {
            DescribeClusterResult describeClusterResult = emrOperations.describeClusterRequest(
                    getEmrClient(awsParams), new DescribeClusterRequest().withClusterId(clusterId));
            if (describeClusterResult != null && describeClusterResult.getCluster() != null) {
                cluster = describeClusterResult.getCluster();
            }
        }

        return cluster;
    }

    @Override
    public String getEmrClusterStatusById(String clusterId, AwsParamsDto awsParams) {
        Cluster cluster = getEmrClusterById(clusterId, awsParams);

        return ((cluster == null) ? null : cluster.getStatus().getState());
    }

    @Override
    public Instance getEmrMasterInstance(String clusterId, AwsParamsDto awsParams) throws Exception {
        // Get the master EC2 instance
        ListInstancesRequest listInstancesRequest = new ListInstancesRequest().withClusterId(clusterId)
                .withInstanceGroupTypes(InstanceGroupType.MASTER);

        List<Instance> instances = emrOperations
                .listClusterInstancesRequest(getEmrClient(awsParams), listInstancesRequest).getInstances();

        // Throw error in case there are no master instances found yet
        if (instances.size() == 0) {
            throw new IllegalArgumentException("No master instances found for the cluster \"" + clusterId + "\".");
        }

        // EMR has only one master node.
        return instances.get(0);
    }

    @Override
    public ListInstanceFleetsResult getListInstanceFleetsResult(String clusterId, AwsParamsDto awsParams) {
        return emrOperations.listInstanceFleets(getEmrClient(awsParams),
                new ListInstanceFleetsRequest().withClusterId(clusterId));
    }

    @Override
    public void terminateEmrCluster(String clusterId, boolean overrideTerminationProtection,
            AwsParamsDto awsParams) {
        emrOperations.terminateEmrCluster(getEmrClient(awsParams), clusterId, overrideTerminationProtection);
    }

    /**
     * Converts the given list of {@link EmrClusterDefinitionConfiguration} into a list of {@link Configuration}.
     *
     * @param emrClusterDefinitionConfigurations list of {@link EmrClusterDefinitionConfiguration}
     *
     * @return list of {@link Configuration}
     */
    protected List<Configuration> getConfigurations(
            List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations) {
        List<Configuration> configurations = null;

        if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) {
            configurations = new ArrayList<>();

            for (EmrClusterDefinitionConfiguration emrClusterDefinitionConfiguration : emrClusterDefinitionConfigurations) {
                if (emrClusterDefinitionConfiguration != null) {
                    Configuration configuration = new Configuration();
                    configuration.setClassification(emrClusterDefinitionConfiguration.getClassification());
                    configuration.setConfigurations(
                            getConfigurations(emrClusterDefinitionConfiguration.getConfigurations()));
                    configuration.setProperties(getMap(emrClusterDefinitionConfiguration.getProperties()));

                    configurations.add(configuration);
                }
            }
        }

        return configurations;
    }

    /**
     * Creates a list of {@link EbsBlockDeviceConfig} from a given list of {@link EmrClusterDefinitionEbsBlockDeviceConfig}.
     *
     * @param emrClusterDefinitionEbsBlockDeviceConfigs the list of {@link EmrClusterDefinitionEbsBlockDeviceConfig}
     *
     * @return the list of {@link EbsBlockDeviceConfig}
     */
    protected List<EbsBlockDeviceConfig> getEbsBlockDeviceConfigs(
            List<EmrClusterDefinitionEbsBlockDeviceConfig> emrClusterDefinitionEbsBlockDeviceConfigs) {
        List<EbsBlockDeviceConfig> ebsBlockDeviceConfigs = null;

        if (!CollectionUtils.isEmpty(emrClusterDefinitionEbsBlockDeviceConfigs)) {
            ebsBlockDeviceConfigs = new ArrayList<>();

            for (EmrClusterDefinitionEbsBlockDeviceConfig emrClusterDefinitionEbsBlockDeviceConfig : emrClusterDefinitionEbsBlockDeviceConfigs) {
                if (emrClusterDefinitionEbsBlockDeviceConfig != null) {
                    EbsBlockDeviceConfig ebsBlockDeviceConfig = new EbsBlockDeviceConfig();
                    ebsBlockDeviceConfig.setVolumeSpecification(getVolumeSpecification(
                            emrClusterDefinitionEbsBlockDeviceConfig.getVolumeSpecification()));
                    ebsBlockDeviceConfig.setVolumesPerInstance(
                            emrClusterDefinitionEbsBlockDeviceConfig.getVolumesPerInstance());

                    ebsBlockDeviceConfigs.add(ebsBlockDeviceConfig);
                }
            }
        }

        return ebsBlockDeviceConfigs;
    }

    /**
     * Creates an instance of {@link EbsConfiguration} from a given instance of {@link EmrClusterDefinitionEbsConfiguration}.
     *
     * @param emrClusterDefinitionEbsConfiguration the instance of {@link EmrClusterDefinitionEbsConfiguration}
     *
     * @return the instance of {@link EbsConfiguration}
     */
    protected EbsConfiguration getEbsConfiguration(
            EmrClusterDefinitionEbsConfiguration emrClusterDefinitionEbsConfiguration) {
        EbsConfiguration ebsConfiguration = null;

        if (emrClusterDefinitionEbsConfiguration != null) {
            ebsConfiguration = new EbsConfiguration();
            ebsConfiguration.setEbsBlockDeviceConfigs(
                    getEbsBlockDeviceConfigs(emrClusterDefinitionEbsConfiguration.getEbsBlockDeviceConfigs()));
            ebsConfiguration.setEbsOptimized(emrClusterDefinitionEbsConfiguration.isEbsOptimized());
        }

        return ebsConfiguration;
    }

    /**
     * Creates an instance fleet configuration that describes the EC2 instances and instance configurations for clusters that use this feature.
     *
     * @param emrClusterDefinitionInstanceFleets the list of instance fleet configurations from the EMR cluster definition
     *
     * @return the instance fleet configuration
     */
    protected List<InstanceFleetConfig> getInstanceFleets(
            List<EmrClusterDefinitionInstanceFleet> emrClusterDefinitionInstanceFleets) {
        List<InstanceFleetConfig> instanceFleets = null;

        if (!CollectionUtils.isEmpty(emrClusterDefinitionInstanceFleets)) {
            instanceFleets = new ArrayList<>();

            for (EmrClusterDefinitionInstanceFleet emrClusterDefinitionInstanceFleet : emrClusterDefinitionInstanceFleets) {
                if (emrClusterDefinitionInstanceFleet != null) {
                    InstanceFleetConfig instanceFleetConfig = new InstanceFleetConfig();
                    instanceFleetConfig.setName(emrClusterDefinitionInstanceFleet.getName());
                    instanceFleetConfig
                            .setInstanceFleetType(emrClusterDefinitionInstanceFleet.getInstanceFleetType());
                    instanceFleetConfig.setTargetOnDemandCapacity(
                            emrClusterDefinitionInstanceFleet.getTargetOnDemandCapacity());
                    instanceFleetConfig
                            .setTargetSpotCapacity(emrClusterDefinitionInstanceFleet.getTargetSpotCapacity());
                    instanceFleetConfig.setInstanceTypeConfigs(
                            getInstanceTypeConfigs(emrClusterDefinitionInstanceFleet.getInstanceTypeConfigs()));
                    instanceFleetConfig.setLaunchSpecifications(
                            getLaunchSpecifications(emrClusterDefinitionInstanceFleet.getLaunchSpecifications()));

                    instanceFleets.add(instanceFleetConfig);
                }
            }
        }

        return instanceFleets;
    }

    /**
     * Creates an instance group configuration.
     *
     * @param roleType role type for the instance group (MASTER/CORE/TASK)
     * @param instanceType EC2 instance type for the instance group
     * @param instanceCount number of instances for the instance group
     * @param bidPrice bid price in case of SPOT instance request
     * @param emrClusterDefinitionEbsConfiguration the instance of {@link EmrClusterDefinitionEbsConfiguration} that contains EBS configurations that will be
     * attached to each EC2 instance in this instance group
     *
     * @return the instance group config object
     */
    protected InstanceGroupConfig getInstanceGroupConfig(InstanceRoleType roleType, String instanceType,
            Integer instanceCount, BigDecimal bidPrice,
            EmrClusterDefinitionEbsConfiguration emrClusterDefinitionEbsConfiguration) {
        // Create an instance group configuration with an optional EBS configuration.
        InstanceGroupConfig instanceGroup = new InstanceGroupConfig(roleType, instanceType, instanceCount)
                .withEbsConfiguration(getEbsConfiguration(emrClusterDefinitionEbsConfiguration));

        // Consider spot price, if specified.
        if (bidPrice != null) {
            instanceGroup.setMarket(MarketType.SPOT);
            instanceGroup.setBidPrice(bidPrice.toString());
        }

        return instanceGroup;
    }

    /**
     * Create the instance group configuration for MASTER/CORE/TASK nodes as per the input parameters.
     *
     * @param instanceDefinitions the instance group definitions from the EMR cluster definition
     *
     * @return the instance group config list with all the instance group definitions
     */
    protected List<InstanceGroupConfig> getInstanceGroupConfigs(InstanceDefinitions instanceDefinitions) {
        List<InstanceGroupConfig> instanceGroupConfigs = null;

        if (!emrHelper.isInstanceDefinitionsEmpty(instanceDefinitions)) {
            // Create the instance group configurations.
            instanceGroupConfigs = new ArrayList<>();

            // Fill-in the MASTER node details.
            instanceGroupConfigs.add(getInstanceGroupConfig(InstanceRoleType.MASTER,
                    instanceDefinitions.getMasterInstances().getInstanceType(),
                    instanceDefinitions.getMasterInstances().getInstanceCount(),
                    instanceDefinitions.getMasterInstances().getInstanceSpotPrice(),
                    instanceDefinitions.getMasterInstances().getEbsConfiguration()));

            // if the optional core instances are specified, fill-in the CORE node details.
            if (instanceDefinitions.getCoreInstances() != null) {
                instanceGroupConfigs.add(getInstanceGroupConfig(InstanceRoleType.CORE,
                        instanceDefinitions.getCoreInstances().getInstanceType(),
                        instanceDefinitions.getCoreInstances().getInstanceCount(),
                        instanceDefinitions.getCoreInstances().getInstanceSpotPrice(),
                        instanceDefinitions.getCoreInstances().getEbsConfiguration()));
            }

            // If the optional task instances are specified, fill-in the TASK node details.
            if (instanceDefinitions.getTaskInstances() != null) {
                instanceGroupConfigs.add(getInstanceGroupConfig(InstanceRoleType.TASK,
                        instanceDefinitions.getTaskInstances().getInstanceType(),
                        instanceDefinitions.getTaskInstances().getInstanceCount(),
                        instanceDefinitions.getTaskInstances().getInstanceSpotPrice(),
                        instanceDefinitions.getTaskInstances().getEbsConfiguration()));
            }
        }

        return instanceGroupConfigs;
    }

    /**
     * Creates a list of {@link InstanceTypeConfig} from a given list of {@link EmrClusterDefinitionInstanceTypeConfig}.
     *
     * @param emrClusterDefinitionInstanceTypeConfigs the list of {@link EmrClusterDefinitionInstanceTypeConfig}
     *
     * @return the list of {@link InstanceTypeConfig}
     */
    protected List<InstanceTypeConfig> getInstanceTypeConfigs(
            List<EmrClusterDefinitionInstanceTypeConfig> emrClusterDefinitionInstanceTypeConfigs) {
        List<InstanceTypeConfig> instanceTypeConfigs = null;

        if (!CollectionUtils.isEmpty(emrClusterDefinitionInstanceTypeConfigs)) {
            instanceTypeConfigs = new ArrayList<>();

            for (EmrClusterDefinitionInstanceTypeConfig emrClusterDefinitionInstanceTypeConfig : emrClusterDefinitionInstanceTypeConfigs) {
                if (emrClusterDefinitionInstanceTypeConfig != null) {
                    InstanceTypeConfig instanceTypeConfig = new InstanceTypeConfig();
                    instanceTypeConfig.setInstanceType(emrClusterDefinitionInstanceTypeConfig.getInstanceType());
                    instanceTypeConfig
                            .setWeightedCapacity(emrClusterDefinitionInstanceTypeConfig.getWeightedCapacity());
                    instanceTypeConfig.setBidPrice(emrClusterDefinitionInstanceTypeConfig.getBidPrice());
                    instanceTypeConfig.setBidPriceAsPercentageOfOnDemandPrice(
                            emrClusterDefinitionInstanceTypeConfig.getBidPriceAsPercentageOfOnDemandPrice());
                    instanceTypeConfig.setEbsConfiguration(
                            getEbsConfiguration(emrClusterDefinitionInstanceTypeConfig.getEbsConfiguration()));
                    instanceTypeConfig.setConfigurations(
                            getConfigurations(emrClusterDefinitionInstanceTypeConfig.getConfigurations()));

                    instanceTypeConfigs.add(instanceTypeConfig);
                }
            }
        }

        return instanceTypeConfigs;
    }

    /**
     * Creates an instance of {@link KerberosAttributes} from a given instance of {@link EmrClusterDefinitionKerberosAttributes}.
     *
     * @param emrClusterDefinitionKerberosAttributes the instance of {@link EmrClusterDefinitionKerberosAttributes}, may be null
     *
     * @return the instance of {@link KerberosAttributes}
     */
    protected KerberosAttributes getKerberosAttributes(
            EmrClusterDefinitionKerberosAttributes emrClusterDefinitionKerberosAttributes) {
        KerberosAttributes kerberosAttributes = null;

        if (emrClusterDefinitionKerberosAttributes != null) {
            kerberosAttributes = new KerberosAttributes();
            kerberosAttributes
                    .setADDomainJoinPassword(emrClusterDefinitionKerberosAttributes.getADDomainJoinPassword());
            kerberosAttributes.setADDomainJoinUser(emrClusterDefinitionKerberosAttributes.getADDomainJoinUser());
            kerberosAttributes.setCrossRealmTrustPrincipalPassword(
                    emrClusterDefinitionKerberosAttributes.getCrossRealmTrustPrincipalPassword());
            kerberosAttributes.setKdcAdminPassword(emrClusterDefinitionKerberosAttributes.getKdcAdminPassword());
            kerberosAttributes.setRealm(emrClusterDefinitionKerberosAttributes.getRealm());
        }

        return kerberosAttributes;
    }

    /**
     * Creates an instance of {@link InstanceFleetProvisioningSpecifications} from a given instance of {@link EmrClusterDefinitionLaunchSpecifications}.
     *
     * @param emrClusterDefinitionLaunchSpecifications the instance of {@link EmrClusterDefinitionLaunchSpecifications}
     *
     * @return the instance of {@link InstanceFleetProvisioningSpecifications}
     */
    protected InstanceFleetProvisioningSpecifications getLaunchSpecifications(
            EmrClusterDefinitionLaunchSpecifications emrClusterDefinitionLaunchSpecifications) {
        InstanceFleetProvisioningSpecifications instanceFleetProvisioningSpecifications = null;

        if (emrClusterDefinitionLaunchSpecifications != null) {
            instanceFleetProvisioningSpecifications = new InstanceFleetProvisioningSpecifications();
            instanceFleetProvisioningSpecifications.setSpotSpecification(
                    getSpotSpecification(emrClusterDefinitionLaunchSpecifications.getSpotSpecification()));
        }

        return instanceFleetProvisioningSpecifications;
    }

    /**
     * Converts the given list of {@link Parameter} into a {@link Map} of {@link String}, {@link String}
     *
     * @param parameters List of {@link Parameter}
     *
     * @return {@link Map}
     */
    protected Map<String, String> getMap(List<Parameter> parameters) {
        Map<String, String> map = null;

        if (!CollectionUtils.isEmpty(parameters)) {
            map = new HashMap<>();

            for (Parameter parameter : parameters) {
                if (parameter != null) {
                    map.put(parameter.getName(), parameter.getValue());
                }
            }
        }

        return map;
    }

    /**
     * Creates an instance of {@link SpotProvisioningSpecification} from a given instance of {@link EmrClusterDefinitionSpotSpecification}.
     *
     * @param emrClusterDefinitionSpotSpecification the instance of {@link EmrClusterDefinitionSpotSpecification}
     *
     * @return the instance of {@link SpotProvisioningSpecification}
     */
    protected SpotProvisioningSpecification getSpotSpecification(
            EmrClusterDefinitionSpotSpecification emrClusterDefinitionSpotSpecification) {
        SpotProvisioningSpecification spotProvisioningSpecification = null;

        if (emrClusterDefinitionSpotSpecification != null) {
            spotProvisioningSpecification = new SpotProvisioningSpecification();
            spotProvisioningSpecification
                    .setTimeoutDurationMinutes(emrClusterDefinitionSpotSpecification.getTimeoutDurationMinutes());
            spotProvisioningSpecification
                    .setTimeoutAction(emrClusterDefinitionSpotSpecification.getTimeoutAction());
            spotProvisioningSpecification
                    .setBlockDurationMinutes(emrClusterDefinitionSpotSpecification.getBlockDurationMinutes());
        }

        return spotProvisioningSpecification;
    }

    /**
     * Creates an instance of {@link VolumeSpecification} from a given instance of {@link EmrClusterDefinitionVolumeSpecification}.
     *
     * @param emrClusterDefinitionVolumeSpecification the instance of {@link EmrClusterDefinitionVolumeSpecification}
     *
     * @return the instance of {@link VolumeSpecification}
     */
    protected VolumeSpecification getVolumeSpecification(
            EmrClusterDefinitionVolumeSpecification emrClusterDefinitionVolumeSpecification) {
        VolumeSpecification volumeSpecification = null;

        if (emrClusterDefinitionVolumeSpecification != null) {
            volumeSpecification = new VolumeSpecification();
            volumeSpecification.setVolumeType(emrClusterDefinitionVolumeSpecification.getVolumeType());
            volumeSpecification.setIops(emrClusterDefinitionVolumeSpecification.getIops());
            volumeSpecification.setSizeInGB(emrClusterDefinitionVolumeSpecification.getSizeInGB());
        }

        return volumeSpecification;
    }

    private void addCustomBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition,
            ArrayList<BootstrapActionConfig> bootstrapActions) {
        // Add Custom bootstrap script support if needed
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getCustomBootstrapActionAll())) {
            for (ScriptDefinition scriptDefinition : emrClusterDefinition.getCustomBootstrapActionAll()) {
                BootstrapActionConfig customActionConfigAll = getBootstrapActionConfig(
                        scriptDefinition.getScriptName(), scriptDefinition.getScriptLocation());

                ArrayList<String> argList = new ArrayList<>();
                if (!CollectionUtils.isEmpty(scriptDefinition.getScriptArguments())) {
                    for (String argument : scriptDefinition.getScriptArguments()) {
                        // Trim the argument
                        argList.add(argument.trim());
                    }
                }
                // Set arguments to bootstrap action
                customActionConfigAll.getScriptBootstrapAction().setArgs(argList);

                bootstrapActions.add(customActionConfigAll);
            }
        }
    }

    private void addCustomMasterBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition,
            ArrayList<BootstrapActionConfig> bootstrapActions) {
        // Add Master custom bootstrap script support if needed
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getCustomBootstrapActionMaster())) {
            for (ScriptDefinition scriptDefinition : emrClusterDefinition.getCustomBootstrapActionMaster()) {
                BootstrapActionConfig bootstrapActionConfig = getBootstrapActionConfig(
                        scriptDefinition.getScriptName(),
                        configurationHelper.getProperty(ConfigurationValue.EMR_CONDITIONAL_SCRIPT));

                // Add arguments to the bootstrap script
                ArrayList<String> argList = new ArrayList<>();

                // Execute this script only on the master node.
                argList.add(configurationHelper.getProperty(ConfigurationValue.EMR_NODE_CONDITION));
                argList.add(scriptDefinition.getScriptLocation());

                if (!CollectionUtils.isEmpty(scriptDefinition.getScriptArguments())) {
                    for (String argument : scriptDefinition.getScriptArguments()) {
                        // Trim the argument
                        argList.add(argument.trim());
                    }
                }

                bootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
                bootstrapActions.add(bootstrapActionConfig);
            }
        }
    }

    private void addDaemonBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition,
            ArrayList<BootstrapActionConfig> bootstrapActions) {
        // Add daemon Configuration support if needed
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getDaemonConfigurations())) {
            BootstrapActionConfig daemonBootstrapActionConfig = getBootstrapActionConfig(
                    ConfigurationValue.EMR_CONFIGURE_DAEMON.getKey(),
                    configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON));

            // Add arguments to the bootstrap script
            ArrayList<String> argList = new ArrayList<>();
            for (Parameter daemonConfig : emrClusterDefinition.getDaemonConfigurations()) {
                argList.add(daemonConfig.getName() + "=" + daemonConfig.getValue());
            }

            // Add the bootstrap action with arguments
            daemonBootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
            bootstrapActions.add(daemonBootstrapActionConfig);
        }
    }

    private void addHadoopBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition,
            ArrayList<BootstrapActionConfig> bootstrapActions) {
        // Add hadoop Configuration support if needed
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopConfigurations())) {
            ArrayList<String> argList = new ArrayList<>();
            BootstrapActionConfig hadoopBootstrapActionConfig = getBootstrapActionConfig(
                    ConfigurationValue.EMR_CONFIGURE_HADOOP.getKey(),
                    configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_HADOOP));
            // If config files are available, add them as arguments
            for (Object hadoopConfigObject : emrClusterDefinition.getHadoopConfigurations()) {
                // If the Config Files are available, add them as arguments
                if (hadoopConfigObject instanceof ConfigurationFiles) {
                    for (ConfigurationFile configurationFile : ((ConfigurationFiles) hadoopConfigObject)
                            .getConfigurationFiles()) {
                        argList.add(configurationFile.getFileNameShortcut());
                        argList.add(configurationFile.getConfigFileLocation());
                    }
                }

                // If the key value pairs are available, add them as arguments
                if (hadoopConfigObject instanceof KeyValuePairConfigurations) {
                    for (KeyValuePairConfiguration keyValuePairConfiguration : ((KeyValuePairConfigurations) hadoopConfigObject)
                            .getKeyValuePairConfigurations()) {
                        argList.add(keyValuePairConfiguration.getKeyValueShortcut());
                        argList.add(keyValuePairConfiguration.getAttribKey() + "="
                                + keyValuePairConfiguration.getAttribVal());
                    }
                }
            }

            // Add the bootstrap action with arguments
            hadoopBootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
            bootstrapActions.add(hadoopBootstrapActionConfig);
        }
    }

    private String[] getActiveEmrClusterStates() {
        String emrStatesString = configurationHelper.getProperty(ConfigurationValue.EMR_VALID_STATES);
        return emrStatesString
                .split("\\" + configurationHelper.getProperty(ConfigurationValue.FIELD_DATA_DELIMITER));
    }

    /**
     * Create the BootstrapActionConfig object from the bootstrap script.
     *
     * @param scriptDescription bootstrap script name to be displayed.
     * @param bootstrapScript location of the bootstrap script.
     *
     * @return bootstrap action configuration that contains all the bootstrap actions for the given configuration.
     */
    private BootstrapActionConfig getBootstrapActionConfig(String scriptDescription, String bootstrapScript) {
        // Create the BootstrapActionConfig object
        BootstrapActionConfig bootstrapConfig = new BootstrapActionConfig();
        ScriptBootstrapActionConfig bootstrapConfigScript = new ScriptBootstrapActionConfig();

        // Set the bootstrapScript
        bootstrapConfig.setName(scriptDescription);
        bootstrapConfigScript.setPath(bootstrapScript);
        bootstrapConfig.setScriptBootstrapAction(bootstrapConfigScript);

        // Return the object
        return bootstrapConfig;
    }

    /**
     * Create the bootstrap action configuration List from all the bootstrapping scripts specified.
     *
     * @param emrClusterDefinition the EMR definition name value.
     *
     * @return list of bootstrap action configurations that contains all the bootstrap actions for the given configuration.
     */
    private ArrayList<BootstrapActionConfig> getBootstrapActionConfigList(
            EmrClusterDefinition emrClusterDefinition) {
        // Create the list
        ArrayList<BootstrapActionConfig> bootstrapActions = new ArrayList<>();

        // Add encryption script support if needed
        if (emrClusterDefinition.isEncryptionEnabled() != null && emrClusterDefinition.isEncryptionEnabled()) {
            // Whenever the user requests for encryption, we have an encryption script that is stored in herd bucket.
            // We use this encryption script to encrypt all the volumes of all the instances.
            // Amazon plans to support encryption in EMR soon. Once that support is enabled, we can remove this script and use the one provided by AWS.
            bootstrapActions.add(getBootstrapActionConfig(ConfigurationValue.EMR_ENCRYPTION_SCRIPT.getKey(),
                    getBootstrapScriptLocation(
                            configurationHelper.getProperty(ConfigurationValue.EMR_ENCRYPTION_SCRIPT))));
        }

        // Add NSCD script support if the script location is not empty
        String emrNscdScript = configurationHelper.getProperty(ConfigurationValue.EMR_NSCD_SCRIPT);
        if (StringUtils.isNotEmpty(emrNscdScript)) {
            // Upon launch, all EMR clusters should have NSCD running to cache DNS host lookups so EMR does not overwhelm DNS servers
            bootstrapActions.add(getBootstrapActionConfig(ConfigurationValue.EMR_NSCD_SCRIPT.getKey(),
                    getBootstrapScriptLocation(emrNscdScript)));
        }

        // Add bootstrap actions.
        addDaemonBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
        addHadoopBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
        addCustomBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
        addCustomMasterBootstrapActionConfig(emrClusterDefinition, bootstrapActions);

        // Return the object
        return bootstrapActions;
    }

    /**
     * Create the tag list for the EMR nodes.
     *
     * @param emrClusterDefinition the EMR definition name value.
     *
     * @return list of all tag definitions for the given configuration.
     */
    private List<Tag> getEmrTags(EmrClusterDefinition emrClusterDefinition) {
        List<Tag> tags = new ArrayList<>();

        // Get the nodeTags from xml
        for (NodeTag thisTag : emrClusterDefinition.getNodeTags()) {
            // Create a AWS tag and add
            if (StringUtils.isNotBlank(thisTag.getTagName()) && StringUtils.isNotBlank(thisTag.getTagValue())) {
                tags.add(new Tag(thisTag.getTagName(), thisTag.getTagValue()));
            }
        }

        // Return the object
        return tags;
    }

    /**
     * Get the bootstrap script location from the bucket name and bootstrap script configuration value.
     *
     * @return location of the bootstrap script.
     */
    private String getBootstrapScriptLocation(String bootstrapConfigurationValue) {
        return getS3StagingLocation() + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + bootstrapConfigurationValue;
    }

    /**
     * Creates the job flow instance configuration containing specification of the number and type of Amazon EC2 instances.
     *
     * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters
     *
     * @return the job flow instance configuration
     */
    private JobFlowInstancesConfig getJobFlowInstancesConfig(EmrClusterDefinition emrClusterDefinition) {
        // Create a new job flow instances configuration object.
        JobFlowInstancesConfig jobFlowInstancesConfig = new JobFlowInstancesConfig();
        // Set up master/slave security group
        jobFlowInstancesConfig.setEmrManagedMasterSecurityGroup(emrClusterDefinition.getMasterSecurityGroup());
        jobFlowInstancesConfig.setEmrManagedSlaveSecurityGroup(emrClusterDefinition.getSlaveSecurityGroup());

        // Add additional security groups to master nodes.
        jobFlowInstancesConfig
                .setAdditionalMasterSecurityGroups(emrClusterDefinition.getAdditionalMasterSecurityGroups());

        // Add additional security groups to slave nodes.
        jobFlowInstancesConfig
                .setAdditionalSlaveSecurityGroups(emrClusterDefinition.getAdditionalSlaveSecurityGroups());

        // Fill-in the ssh key.
        if (StringUtils.isNotBlank(emrClusterDefinition.getSshKeyPairName())) {
            jobFlowInstancesConfig.setEc2KeyName(emrClusterDefinition.getSshKeyPairName());
        }

        // Fill in configuration for the instance groups in a cluster.
        jobFlowInstancesConfig
                .setInstanceGroups(getInstanceGroupConfigs(emrClusterDefinition.getInstanceDefinitions()));

        // Fill in instance fleet configuration.
        jobFlowInstancesConfig.setInstanceFleets(getInstanceFleets(emrClusterDefinition.getInstanceFleets()));

        // Fill-in subnet id.
        if (StringUtils.isNotBlank(emrClusterDefinition.getSubnetId())) {
            // Use collection of subnet IDs when instance fleet configuration is specified. Otherwise, we expect a single EC2 subnet ID to be passed here.
            if (CollectionUtils.isNotEmpty(jobFlowInstancesConfig.getInstanceFleets())) {
                jobFlowInstancesConfig
                        .setEc2SubnetIds(herdStringHelper.splitAndTrim(emrClusterDefinition.getSubnetId(), ","));
            } else {
                jobFlowInstancesConfig.setEc2SubnetId(emrClusterDefinition.getSubnetId());
            }
        }

        // Fill in optional keep alive flag.
        if (emrClusterDefinition.isKeepAlive() != null) {
            jobFlowInstancesConfig.setKeepJobFlowAliveWhenNoSteps(emrClusterDefinition.isKeepAlive());
        }

        // Fill in optional termination protection flag.
        if (emrClusterDefinition.isTerminationProtection() != null) {
            jobFlowInstancesConfig.setTerminationProtected(emrClusterDefinition.isTerminationProtection());
        }

        // Fill in optional Hadoop version flag.
        if (StringUtils.isNotBlank(emrClusterDefinition.getHadoopVersion())) {
            jobFlowInstancesConfig.setHadoopVersion(emrClusterDefinition.getHadoopVersion());
        }

        // Return the object.
        return jobFlowInstancesConfig;
    }

    /**
     * Create the run job flow request object.
     *
     * @param emrClusterDefinition the EMR definition name value
     * @param clusterName the EMR cluster name
     *
     * @return the run job flow request for the given configuration
     */
    private RunJobFlowRequest getRunJobFlowRequest(String clusterName, EmrClusterDefinition emrClusterDefinition) {
        // Create the object
        RunJobFlowRequest runJobFlowRequest = new RunJobFlowRequest(clusterName,
                getJobFlowInstancesConfig(emrClusterDefinition));

        // Set release label
        if (StringUtils.isNotBlank(emrClusterDefinition.getReleaseLabel())) {
            runJobFlowRequest.setReleaseLabel(emrClusterDefinition.getReleaseLabel());
        }

        // Set list of Applications
        List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications = emrClusterDefinition
                .getApplications();
        if (!CollectionUtils.isEmpty(emrClusterDefinitionApplications)) {
            runJobFlowRequest.setApplications(getApplications(emrClusterDefinitionApplications));
        }

        // Set list of Configurations
        List<EmrClusterDefinitionConfiguration> emrClusterDefinitionConfigurations = emrClusterDefinition
                .getConfigurations();
        if (!CollectionUtils.isEmpty(emrClusterDefinitionConfigurations)) {
            runJobFlowRequest.setConfigurations(getConfigurations(emrClusterDefinitionConfigurations));
        }

        // Set the log bucket if specified
        if (StringUtils.isNotBlank(emrClusterDefinition.getLogBucket())) {
            runJobFlowRequest.setLogUri(emrClusterDefinition.getLogBucket());
        }

        // Set the visible to all flag
        if (emrClusterDefinition.isVisibleToAll() != null) {
            runJobFlowRequest.setVisibleToAllUsers(emrClusterDefinition.isVisibleToAll());
        }

        // Set the IAM profile for the nodes
        if (StringUtils.isNotBlank(emrClusterDefinition.getEc2NodeIamProfileName())) {
            runJobFlowRequest.setJobFlowRole(emrClusterDefinition.getEc2NodeIamProfileName());
        } else {
            runJobFlowRequest.setJobFlowRole(herdStringHelper
                    .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME));
        }

        // Set the IAM profile for the service
        if (StringUtils.isNotBlank(emrClusterDefinition.getServiceIamRole())) {
            runJobFlowRequest.setServiceRole(emrClusterDefinition.getServiceIamRole());
        } else {
            runJobFlowRequest.setServiceRole(herdStringHelper
                    .getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME));
        }

        // Set the AMI version if specified
        if (StringUtils.isNotBlank(emrClusterDefinition.getAmiVersion())) {
            runJobFlowRequest.setAmiVersion(emrClusterDefinition.getAmiVersion());
        }

        // Set the additionalInfo if specified
        if (StringUtils.isNotBlank(emrClusterDefinition.getAdditionalInfo())) {
            runJobFlowRequest.setAdditionalInfo(emrClusterDefinition.getAdditionalInfo());
        }

        // Set the bootstrap actions
        List<BootstrapActionConfig> bootstrapActionConfigList = getBootstrapActionConfigList(emrClusterDefinition);
        if (!bootstrapActionConfigList.isEmpty()) {
            runJobFlowRequest.setBootstrapActions(bootstrapActionConfigList);
        }

        // Set the app installation steps
        runJobFlowRequest.setSteps(getStepConfig(emrClusterDefinition));

        // Set the tags
        runJobFlowRequest.setTags(getEmrTags(emrClusterDefinition));

        // Assign supported products as applicable
        if (StringUtils.isNotBlank(emrClusterDefinition.getSupportedProduct())) {
            List<String> supportedProducts = new ArrayList<>();
            supportedProducts.add(emrClusterDefinition.getSupportedProduct());
            runJobFlowRequest.setSupportedProducts(supportedProducts);
        }

        // Assign security configuration.
        if (StringUtils.isNotBlank(emrClusterDefinition.getSecurityConfiguration())) {
            runJobFlowRequest.setSecurityConfiguration(emrClusterDefinition.getSecurityConfiguration());
        }

        // Assign scale down behavior.
        if (StringUtils.isNotBlank(emrClusterDefinition.getScaleDownBehavior())) {
            runJobFlowRequest.setScaleDownBehavior(emrClusterDefinition.getScaleDownBehavior());
        }

        // Assign Kerberos attributes.
        runJobFlowRequest
                .setKerberosAttributes(getKerberosAttributes(emrClusterDefinition.getKerberosAttributes()));

        // Return the object
        return runJobFlowRequest;
    }

    /**
     * Get the S3_STAGING_RESOURCE full path from the bucket name as well as other details.
     *
     * @return the s3 managed location.
     */
    private String getS3StagingLocation() {
        return configurationHelper.getProperty(ConfigurationValue.S3_URL_PROTOCOL)
                + configurationHelper.getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME)
                + configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)
                + configurationHelper.getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE);
    }

    /**
     * Create the step config list of objects for hive/pig installation.
     *
     * @param emrClusterDefinition the EMR definition name value.
     *
     * @return list of step configuration that contains all the steps for the given configuration.
     */
    private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) {
        StepFactory stepFactory = new StepFactory();
        List<StepConfig> appSteps = new ArrayList<>();

        // Create install hive step and add to the StepConfig list
        if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) {
            StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion())
                    .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                    .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion()));
            appSteps.add(installHive);
        }

        // Create install Pig step and add to the StepConfig List
        if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) {
            StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion())
                    .withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW)
                    .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion()));
            appSteps.add(installPig);
        }

        // Add the hadoop jar steps that need to be added.
        if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) {
            for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) {
                StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(),
                        hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(),
                        hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError());

                appSteps.add(stepConfig);
            }
        }

        return appSteps;
    }
}