org.openspaces.grid.gsm.rebalancing.DefaultRebalancingSlaEnforcementEndpoint.java Source code

Introduction

Here is the source code for org.openspaces.grid.gsm.rebalancing.DefaultRebalancingSlaEnforcementEndpoint.java
Source

/*******************************************************************************
 * 
 * Copyright (c) 2012 GigaSpaces Technologies Ltd. All rights reserved
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *       http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *  
 ******************************************************************************/
package org.openspaces.grid.gsm.rebalancing;

import com.gigaspaces.cluster.activeelection.SpaceMode;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.openspaces.admin.AdminException;
import org.openspaces.admin.gsc.GridServiceContainer;
import org.openspaces.admin.internal.admin.InternalAdmin;
import org.openspaces.admin.internal.pu.InternalProcessingUnit;
import org.openspaces.admin.machine.Machine;
import org.openspaces.admin.pu.ProcessingUnit;
import org.openspaces.admin.pu.ProcessingUnitInstance;
import org.openspaces.core.internal.commons.math.fraction.Fraction;
import org.openspaces.grid.esm.EsmSystemProperties;
import org.openspaces.grid.gsm.LogPerProcessingUnit;
import org.openspaces.grid.gsm.SingleThreadedPollingLog;
import org.openspaces.grid.gsm.capacity.CapacityRequirements;
import org.openspaces.grid.gsm.capacity.CpuCapacityRequirement;
import org.openspaces.grid.gsm.rebalancing.exceptions.FutureProcessingUnitInstanceDeploymentException;
import org.openspaces.grid.gsm.rebalancing.exceptions.NumberOfInstancesIsBelowMinimumException;
import org.openspaces.grid.gsm.rebalancing.exceptions.NumberOfInstancesPerPartitionIsBelowMinimumException;
import org.openspaces.grid.gsm.rebalancing.exceptions.ProcessingUnitIsNotEvenlyDistributedAccrossMachinesException;
import org.openspaces.grid.gsm.rebalancing.exceptions.ProcessingUnitIsNotEvenlyDistributedAcrossContainersException;
import org.openspaces.grid.gsm.rebalancing.exceptions.ProcessingUnitIsNotInTactException;
import org.openspaces.grid.gsm.rebalancing.exceptions.RebalancingSlaEnforcementInProgressException;
import org.openspaces.grid.gsm.rebalancing.exceptions.WrongContainerProcessingUnitRelocationException;

import org.openspaces.grid.gsm.containers.ContainersSlaUtils;

import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

class DefaultRebalancingSlaEnforcementEndpoint implements RebalancingSlaEnforcementEndpoint {

    //0.01 minimum cpu cores per machine
    private static final Fraction MIN_CPU_CORES_PER_MACHINE_FOR_REBALANCING = new Fraction(1, 100);
    private static final long STATEFUL_DEPLOYMENT_TIMEOUT_SECONDS = Long.getLong(
            EsmSystemProperties.ESM_STATEFUL_DEPLOYMENT_TIMEOUT_SECONDS,
            EsmSystemProperties.ESM_STATEFUL_DEPLOYMENT_TIMEOUT_SECONDS_DEFAULT);
    private static final long STATELESS_DEPLOYMENT_TIMEOUT_SECONDS = Long.getLong(
            EsmSystemProperties.ESM_STATELESS_DEPLOYMENT_TIMEOUT_SECONDS,
            EsmSystemProperties.ESM_STATELESS_DEPLOYMENT_TIMEOUT_SECONDS_DEFAULT);
    private static final long STATEFUL_DEPLOYMENT_FAILURE_FORGET_SECONDS = Long.getLong(
            EsmSystemProperties.ESM_STATEFUL_DEPLOYMENT_FAILURE_FORGET_SECONDS,
            EsmSystemProperties.ESM_STATEFUL_DEPLOYMENT_FAILURE_FORGET_SECONDS_DEFAULT);
    private static final long STATELESS_DEPLOYMENT_FAILURE_FORGET_SECONDS = Long.getLong(
            EsmSystemProperties.ESM_STATELESS_DEPLOYMENT_FAILURE_FORGET_SECONDS,
            EsmSystemProperties.ESM_STATELESS_DEPLOYMENT_FAILURE_FORGET_SECONDS_DEFAULT);

    private final ProcessingUnit pu;
    private final RebalancingSlaEnforcementState state;

    // restart a primary as a last resort continuation state
    // when primary rebalancing algorithm fails, we use this state to restart primaries by partition number (heuristics)
    private int lastResortPartitionRestart = 0;
    private int lastResortPartitionRelocate = 0;

    private final Log logger;

    DefaultRebalancingSlaEnforcementEndpoint(ProcessingUnit pu, RebalancingSlaEnforcementState state) {
        if (pu == null) {
            throw new IllegalArgumentException("pu cannot be null");
        }

        this.pu = pu;
        this.state = state;
        this.logger = new LogPerProcessingUnit(
                new SingleThreadedPollingLog(LogFactory.getLog(DefaultRebalancingSlaEnforcementEndpoint.class)),
                pu);
    }

    public ProcessingUnit getProcessingUnit() {
        return pu;
    }

    public void enforceSla(RebalancingSlaPolicy sla) throws RebalancingSlaEnforcementInProgressException {

        if (state.isDestroyedProcessingUnit(pu)) {
            throw new IllegalStateException("endpoint destroyed");
        }

        if (sla == null) {
            throw new IllegalArgumentException("sla cannot be null");
        }

        sla.validate();

        for (GridServiceContainer container : sla.getContainers()) {
            if (container.getGridServiceAgent() == null) {
                throw new IllegalStateException(
                        "container " + RebalancingUtils.gscToString(container) + " has no agent.");
            }

            String agentUid = container.getGridServiceAgent().getUid();
            if (!sla.getAllocatedCapacity().getAgentUids().contains(agentUid)) {
                throw new IllegalArgumentException(
                        "List of agents must be a superset of agents that started the containers, " + "agentUids="
                                + sla.getAllocatedCapacity().getAgentUids().toString() + " "
                                + "does not include agent " + agentUid);
            }

            if (sla.getAllocatedCapacity().getAgentCapacity(agentUid)
                    .getRequirement(new CpuCapacityRequirement().getType()).equalsZero()) {
                // number of cpu cores per machine cannot be zero (requirement of the primary rebalancing algorithm)
                sla.setAllocatedCapacity(sla.getAllocatedCapacity().add(agentUid, new CapacityRequirements(
                        new CpuCapacityRequirement(MIN_CPU_CORES_PER_MACHINE_FOR_REBALANCING))));

                if (sla.isEager()) {
                    // if we reached this point is eager mode, one of the machines reported zero cpu.
                    // disable cpu rebalancing in this case.
                    // see GS-11381
                    logger.warn("Disabling CPU Rebalancing for pu "
                            + RebalancingUtils.processingUnitDeploymentToString(pu)
                            + " since machine with agentUid=" + agentUid
                            + " reported to have 0 available processors. Once this problem is fixed, "
                            + "restart the ESM and CPU re-balancing will be enabled");
                    sla.ignoreCpuRebalancing(true);
                }
            }
        }

        String zone = pu.getRequiredZones()[0];

        for (GridServiceContainer container : sla.getContainers()) {
            Set<String> zones = container.getZones().keySet();

            if (zones.size() != 1) {
                throw new IllegalArgumentException(
                        "Container " + RebalancingUtils.gscToString(container) + " must have exactly one zone.");
            }

            if (!zones.contains(zone)) {
                throw new IllegalArgumentException(
                        "Container " + RebalancingUtils.gscToString(container) + " must have the zone " + zone);
            }
        }

        enforceSlaInternal(sla);
    }

    private void enforceSlaInternal(RebalancingSlaPolicy sla) throws RebalancingSlaEnforcementInProgressException {

        cleanFutureStatefulDeployments();
        cleanFutureStatelessDeployments();
        cleanRemovedStatelessProcessingUnitInstances();

        if (sla.getSchemaConfig().isPartitionedSync2BackupSchema()) {
            enforceSlaStatefulProcessingUnit(sla);
        } else if (sla.getSchemaConfig().isDefaultSchema()) {
            enforceSlaStatelessProcessingUnit(sla);
        } else {
            throw new IllegalStateException(
                    pu.getName() + " schema " + sla.getSchemaConfig().getSchema() + " is not supported.");
        }

        if (logger.isDebugEnabled()) {
            logger.debug(pu.getName() + " rebalancing is complete");
        }

    }

    private void enforceSlaStatelessProcessingUnit(RebalancingSlaPolicy sla)
            throws RebalancingSlaEnforcementInProgressException {

        final GridServiceContainer[] containers = sla.getContainers();

        if (//don't add instances while removing instances
        state.getRemovedStatelessProcessingUnitInstances(pu).size() == 0 ||
        //unless the sla is breached and we have to add instances
        //(even if it's more than we actually need, and later we would need to remove these instances again)
                pu.getInstances().length < sla.getMinimumNumberOfInstancesPerPartition()) {

            increasePlannedInstancesUntilDeployedOnApprovedContainers(containers);
        }

        if (pu.getInstances().length < sla.getMinimumNumberOfInstancesPerPartition()) {
            throw new NumberOfInstancesIsBelowMinimumException(pu, sla.getMinimumNumberOfInstancesPerPartition());
        }

        decreasePlannedInstancesIfMoreThanAllContainers(sla);

        removeInstancesNotOnApprovedContainers(sla, containers);

        if (!RebalancingUtils.isProcessingUnitIntact(pu, containers)) {
            throw new ProcessingUnitIsNotInTactException(pu);
        }
    }

    private void increasePlannedInstancesUntilDeployedOnApprovedContainers(final GridServiceContainer[] containers)
            throws ProcessingUnitIsNotEvenlyDistributedAcrossContainersException {

        if (state.getNumberOfFutureDeployments(pu) > 0) {
            // incrementNumberOfStatelessInstancesAsync can be called only one at a time
            // if called concurrently they won't share state and it causes too many increment instance calls to the GSM.
            throw new ProcessingUnitIsNotEvenlyDistributedAcrossContainersException(
                    "Instances deployment is in progress", pu, containers);
        }

        Collection<FutureStatelessProcessingUnitInstance> futureInstances = RebalancingUtils
                .incrementNumberOfStatelessInstancesAsync(pu, containers, logger,
                        STATELESS_DEPLOYMENT_TIMEOUT_SECONDS, TimeUnit.SECONDS);

        state.addFutureStatelessDeployments(futureInstances);

        if (state.getNumberOfFutureDeployments(pu) > 0) {
            throw new ProcessingUnitIsNotEvenlyDistributedAcrossContainersException(
                    "Instances deployment is in progress", pu, containers);
        }
    }

    private void removeInstancesNotOnApprovedContainers(RebalancingSlaPolicy sla,
            final GridServiceContainer[] containers)
            throws ProcessingUnitIsNotEvenlyDistributedAcrossContainersException {
        // find all containers with instances that are not in the approved containers
        final Set<GridServiceContainer> approvedContainers = new HashSet<GridServiceContainer>(
                Arrays.asList(containers));
        final List<ProcessingUnitInstance> instancesToRemove = new ArrayList<ProcessingUnitInstance>();
        for (final GridServiceContainer container : pu.getAdmin().getGridServiceContainers()) {
            if (!approvedContainers.contains(container)) {

                for (final ProcessingUnitInstance instance : container.getProcessingUnitInstances(pu.getName())) {
                    instancesToRemove.add(instance);
                }
            }
        }

        if (instancesToRemove.size() > 0) {

            for (final ProcessingUnitInstance instanceToRemove : instancesToRemove) {
                if (pu.getInstances().length - state.getRemovedStatelessProcessingUnitInstances(pu).size() <= sla
                        .getMinimumNumberOfInstancesPerPartition()) {
                    logger.info("Not removing pu instance " + RebalancingUtils.puInstanceToString(instanceToRemove)
                            + " " + "even though deployed on an unapproved container. " + "#instances="
                            + pu.getInstances().length + "-"
                            + state.getRemovedStatelessProcessingUnitInstances(pu).size() + " " + "#minInstances="
                            + sla.getMinimumNumberOfInstancesPerPartition());
                    break;
                }
                removeInstance(instanceToRemove);
            }

            throw new ProcessingUnitIsNotEvenlyDistributedAcrossContainersException(
                    "Instances removal is in progress", pu, containers);
        }

        if (state.getRemovedStatelessProcessingUnitInstances(pu).iterator().hasNext()) {
            throw new ProcessingUnitIsNotEvenlyDistributedAcrossContainersException(
                    "Instances removal is in progress", pu, containers);
        }

    }

    private void decreasePlannedInstancesIfMoreThanAllContainers(RebalancingSlaPolicy sla)
            throws ProcessingUnitIsNotInTactException {
        final int numberOfInstancesBeforeDecrement = pu.getNumberOfInstances();
        final int totalContainers = RebalancingUtils.getContainersOnMachines(pu).length;

        if (numberOfInstancesBeforeDecrement > totalContainers
                && numberOfInstancesBeforeDecrement > sla.getMinimumNumberOfInstancesPerPartition()) {

            // the GSA is not INTACT since planned number of instances is more than available containers.
            ((InternalAdmin) pu.getAdmin()).scheduleAdminOperation(new Runnable() {

                public void run() {
                    try {

                        final boolean decremented = ((InternalProcessingUnit) pu).decrementPlannedInstances();
                        if (decremented) {
                            logger.info("Planned number of instances is " + numberOfInstancesBeforeDecrement + " "
                                    + "instead of " + totalContainers + ". " + "Removed one pu instance of "
                                    + pu.getName());
                        } else {
                            if (logger.isInfoEnabled()) {
                                logger.info("Number of instances is " + numberOfInstancesBeforeDecrement + " "
                                        + "instead of " + totalContainers + ". "
                                        + "Retry to remove one pu instance of " + pu.getName() + " next time.");
                            }
                        }
                    } catch (final AdminException e) {
                        logger.info("Failed to decrement planned number of instances for " + pu.getName(), e);
                    } catch (final Throwable t) {
                        logger.warn("Unexpected exception when decrementing planned number of instances for "
                                + pu.getName(), t);
                    }
                }
            });

            throw new ProcessingUnitIsNotInTactException(pu, "Planned number of instances is "
                    + numberOfInstancesBeforeDecrement + " instead of " + totalContainers);
        }
    }

    private void removeInstance(final ProcessingUnitInstance instance) {

        if (!state.isStatelessProcessingUnitInstanceBeingRemoved(instance)) {

            // this makes sure we try to decrement it only once
            state.addRemovedStatelessProcessingUnitInstance(instance);

            ((InternalAdmin) pu.getAdmin()).scheduleAdminOperation(new Runnable() {

                public void run() {
                    try {
                        // don't check if (instance.isDiscovered()) since the GSM has to decide this
                        logger.info("removing pu instance " + RebalancingUtils.puInstanceToString(instance) + " "
                                + "since deployed on an unapproved container");
                        instance.decrement();
                    } catch (AdminException e) {
                        logger.info("Failed to remove instance " + RebalancingUtils.puInstanceToString(instance),
                                e);
                        state.removeRemovedStatelessProcessingUnitInstance(instance);
                    } catch (Throwable t) {
                        logger.warn("Unexpected exception when removing "
                                + RebalancingUtils.puInstanceToString(instance), t);
                        state.removeRemovedStatelessProcessingUnitInstance(instance);
                    }
                }
            });
        }
    }

    private void enforceSlaStatefulProcessingUnit(RebalancingSlaPolicy sla)
            throws RebalancingSlaEnforcementInProgressException {

        if (!RebalancingUtils.isProcessingUnitHasMinimumNumberOfInstancesPerPartition(pu,
                sla.getMinimumNumberOfInstancesPerPartition())) {
            throw new NumberOfInstancesPerPartitionIsBelowMinimumException(pu,
                    sla.getMinimumNumberOfInstancesPerPartition());
        }

        if (!RebalancingUtils.isProcessingUnitIntact(pu)) {
            throw new ProcessingUnitIsNotInTactException(pu);
        }

        GridServiceContainer[] containers = sla.getContainers();
        if (pu.getNumberOfBackups() == 1) {
            // stage 1 : relocate backups so number of instances per container is balanced
            rebalanceNumberOfInstancesPerContainer(containers, sla, true);

            if (state.getNumberOfFutureDeployments(pu) > 0) {
                logger.debug("Rebalancing of backup instances is in progress after Stage 1. "
                        + "Number of deployments in progress is " + state.getNumberOfFutureDeployments(pu));
                throw new ProcessingUnitIsNotEvenlyDistributedAcrossContainersException(
                        "Instances deployment is in progress", pu, containers);
            }

            // if not all of pu instances are in the approved containers...
            // then skip directly to stage 3
            boolean processingUnitIntact = RebalancingUtils.isProcessingUnitIntact(pu, containers);

            // this flag was added as a workaround for GS-11381.
            // see DefaultRebalancingSlaEnforcementEndpoint#enforceSla
            boolean ignoreCpuRebalancing = sla.ignoreCpuRebalancing();

            if (!processingUnitIntact) {
                logger.debug("Not re-balancing according to CPU since processing unit is not intact");
            }
            if (ignoreCpuRebalancing) {
                logger.debug(
                        "Not re-balancing according to CPU since 'sla.ignoreCpuRebalancing()' is set to true for SLA "
                                + sla);
            }

            if (processingUnitIntact && !ignoreCpuRebalancing) {

                // stage 2: restart primaries so number of cpu cores per primary is balanced
                rebalanceNumberOfPrimaryInstancesPerMachine(containers, sla);

                if (state.getNumberOfFutureDeployments(pu) > 0) {
                    logger.debug("Restarting of primary instances is in progress after Stage 2. "
                            + "Number of deployments in progress is " + state.getNumberOfFutureDeployments(pu));
                    throw new ProcessingUnitIsNotEvenlyDistributedAccrossMachinesException(pu);
                }
            }
        }

        // stage 3: relocate backups or primaries so number of instances per container is
        // balanced
        rebalanceNumberOfInstancesPerContainer(containers, sla, false);

        if (state.getNumberOfFutureDeployments(pu) > 0) {
            logger.debug("Rebalancing of primary or backup instances is in progress after Stage 3. "
                    + "Number of deployments in progress is " + state.getNumberOfFutureDeployments(pu));
            throw new ProcessingUnitIsNotEvenlyDistributedAcrossContainersException(
                    "Instances deployment is in progress", pu, containers);
        }

        if (!RebalancingUtils.isProcessingUnitIntact(pu, containers)) {
            throw new ProcessingUnitIsNotInTactException(pu);
        }
    }

    /**
     * Invokes multiple relocation operations to balance number of pu instances per container.
     * 
     * @param containers
     * @param onlyBackups
     *            - perform only backup relocations.
     * 
     * @throws RebalancingSlaEnforcementInProgressException
     *             - cannot determine what next to relocate since another conflicting operation
     *             is in progress.
     */
    private void rebalanceNumberOfInstancesPerContainer(GridServiceContainer[] containers, RebalancingSlaPolicy sla,
            boolean relocateOnlyBackups) throws RebalancingSlaEnforcementInProgressException {

        logger.debug("Trying to re-balance number of instances per container. relocateOnlyBackups="
                + relocateOnlyBackups);

        while (true) {
            final FutureStatefulProcessingUnitInstance futureInstance = rebalanceNumberOfInstancesPerContainerStep(
                    containers, relocateOnlyBackups, sla.getMaximumNumberOfConcurrentRelocationsPerMachine(),
                    sla.isAtMostOneConcurrentRelocation());

            if (futureInstance == null) {
                break;
            }

            state.addFutureStatefulDeployment(futureInstance);
        }
    }

    /**
     * Invokes one relocation operations to balance number of instances per container
     * 
     * @param pu
     * @param containers
     * @param onlyBackups
     *            - perform only backup relocations.
     * 
     * @return future if performed relocation. null if no action needs to be performed.
     * 
     * @throws RebalancingSlaEnforcementInProgressException
     *             - cannot determine what to relocate since another conflicting operation is in
     *             progress.
     */
    private FutureStatefulProcessingUnitInstance rebalanceNumberOfInstancesPerContainerStep(
            final GridServiceContainer[] containers, boolean onlyBackups, int maximumNumberOfRelocationsPerMachine,
            boolean atMostOneConcurrentRelocation) throws RebalancingSlaEnforcementInProgressException {

        // sort all containers (including those not in the specified containers
        // by (numberOfInstancesPerContainer - minNumberOfInstances)
        final List<GridServiceContainer> sortedContainers = RebalancingUtils
                .sortAllContainersByNumberOfInstancesAboveMinimum(pu, containers);

        logger.debug("Containers sorted by number of instances above minimum: "
                + RebalancingUtils.gscsToString(sortedContainers));

        boolean conflict = false;
        // relocation is done from a source container with too many instances
        // to a target container with too little instances
        for (int targetIndex = 0; targetIndex < sortedContainers.size(); targetIndex++) {

            GridServiceContainer target = sortedContainers.get(targetIndex);

            logger.trace(
                    "Considering target container for re-location as " + ContainersSlaUtils.gscToString(target));

            if (isConflictingDeploymentInProgress(target, maximumNumberOfRelocationsPerMachine,
                    atMostOneConcurrentRelocation)) {
                conflict = true;
                logger.debug("Cannot relocate instances to " + RebalancingUtils.gscToString(target)
                        + " since a conflicting relocation is already in progress.");
                continue;
            }

            int instancesInTarget = target.getProcessingUnitInstances(pu.getName()).length;

            if (instancesInTarget >= RebalancingUtils.getPlannedMaximumNumberOfInstancesForContainer(target,
                    containers, pu)) {
                logger.debug("Cannot relocate instances to " + RebalancingUtils.gscToString(target)
                        + " since the target cannot host any more instances.");
                // target cannot host any more instances
                // since the array is sorted there is no point in continuing the search
                break;
            }
            for (int sourceIndex = sortedContainers.size() - 1; sourceIndex > targetIndex; sourceIndex--) {

                GridServiceContainer source = sortedContainers.get(sourceIndex);

                logger.trace("Considering source container for re-location as "
                        + ContainersSlaUtils.gscToString(source));

                if (isConflictingDeploymentInProgress(source, maximumNumberOfRelocationsPerMachine,
                        atMostOneConcurrentRelocation)) {
                    conflict = true;
                    logger.debug("Cannot relocate instances from " + RebalancingUtils.gscToString(source)
                            + " since a conflicting relocation is already in progress.");
                    continue;
                }

                int instancesInSource = source.getProcessingUnitInstances(pu.getName()).length;
                if (instancesInSource <= RebalancingUtils.getPlannedMinimumNumberOfInstancesForContainer(source,
                        containers, pu)) {
                    logger.debug("Cannot relocate instances from " + RebalancingUtils.gscToString(source)
                            + " since the source cannot give up any instances.");
                    // source cannot give up any instances
                    // since the array is sorted there is no point in continuing the search
                    break;
                }

                if (instancesInTarget >= RebalancingUtils.getPlannedMinimumNumberOfInstancesForContainer(target,
                        containers, pu)
                        && instancesInSource <= RebalancingUtils
                                .getPlannedMaximumNumberOfInstancesForContainer(source, containers, pu)) {
                    logger.debug("No use relocating instances from " + RebalancingUtils.gscToString(source) + " to "
                            + RebalancingUtils.gscToString(target) + " since they are both balanced.");
                    // both source and target are balanced.
                    // since array is sorted there is no point in continuing the search
                    // as this condition will hold true.
                    break;
                }

                // we have a target and a source container. 
                // now let's decide which pu instance to relocate from source to target
                for (ProcessingUnitInstance candidateInstance : source.getProcessingUnitInstances(pu.getName())) {

                    logger.trace("Candidate for re-location is "
                            + RebalancingUtils.puInstanceToString(candidateInstance));

                    if (candidateInstance.getSpaceInstance() == null) {
                        logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + " since embedded space is not detected");
                        continue;
                    }

                    if (onlyBackups && candidateInstance.getSpaceInstance().getMode() != SpaceMode.BACKUP) {
                        logger.debug(
                                "Prefer not to relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                        + " since it is not a backup, and backups are preferred for relocation");
                        continue;
                    }

                    if (!RebalancingUtils.isProcessingUnitPartitionIntact(candidateInstance)) {
                        logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + " since instances from the same partition are missing");
                        conflict = true;
                        continue;
                    }

                    if (isConflictingStatefulDeploymentInProgress(candidateInstance)) {
                        logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + " " + "since another instance from the same partition is being relocated");
                        conflict = true;
                        continue;
                    }

                    for (Machine sourceReplicationMachine : RebalancingUtils.getMachinesHostingContainers(
                            RebalancingUtils.getReplicationSourceContainers(candidateInstance))) {
                        if (isConflictingOperationInProgress(sourceReplicationMachine,
                                maximumNumberOfRelocationsPerMachine, atMostOneConcurrentRelocation)) {
                            logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                    + " " + "since replication source is on machine "
                                    + RebalancingUtils.machineToString(sourceReplicationMachine) + " "
                                    + "which is busy with another relocation");
                            conflict = true;
                        }
                    }

                    //check if pu requires isolation is satisfied
                    if (pu.isRequiresIsolation()) {
                        if (target.getProcessingUnitInstances().length != 0) {
                            logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                    + " to container " + RebalancingUtils.gscToString(target)
                                    + " since container already hosts an instance and processing unit requires isolation");
                            continue;
                        }
                    }

                    // check limit of pu instances from same partition per container
                    if (pu.getMaxInstancesPerVM() > 0) {
                        int numberOfOtherInstancesFromPartitionInTargetContainer = RebalancingUtils
                                .getOtherInstancesFromSamePartitionInContainer(target, candidateInstance).size();

                        if (numberOfOtherInstancesFromPartitionInTargetContainer >= pu.getMaxInstancesPerVM()) {
                            logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                    + " " + "to container " + RebalancingUtils.gscToString(target) + " "
                                    + "since container already hosts "
                                    + numberOfOtherInstancesFromPartitionInTargetContainer + " "
                                    + "instance(s) from the same partition.");
                            continue;
                        }
                    }

                    // check limit of pu instances from same partition per machine 
                    if (pu.getMaxInstancesPerMachine() > 0) {
                        int numberOfOtherInstancesFromPartitionInTargetMachine = RebalancingUtils
                                .getOtherInstancesFromSamePartitionInMachine(target.getMachine(), candidateInstance)
                                .size();

                        if (numberOfOtherInstancesFromPartitionInTargetMachine >= pu.getMaxInstancesPerMachine()) {
                            logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                    + " " + "to container " + RebalancingUtils.gscToString(target) + " "
                                    + "since machine already contains "
                                    + numberOfOtherInstancesFromPartitionInTargetMachine + " "
                                    + "instance(s) from the same partition.");
                            continue;
                        }
                    }

                    logger.info("Relocating " + RebalancingUtils.puInstanceToString(candidateInstance) + " "
                            + "from " + RebalancingUtils.gscToString(source) + " with "
                            + source.getProcessingUnitInstances().length + " instances to "
                            + RebalancingUtils.gscToString(target) + " with "
                            + target.getProcessingUnitInstances().length
                            + " instances. PlannedMaximumNumberOfInstances for target is "
                            + RebalancingUtils.getPlannedMaximumNumberOfInstancesForContainer(target, containers,
                                    pu)
                            + "; PlannedMinimumNumberOfInstances for source is " + RebalancingUtils
                                    .getPlannedMinimumNumberOfInstancesForContainer(source, containers, pu));
                    return RebalancingUtils.relocateProcessingUnitInstanceAsync(target, candidateInstance, logger,
                            STATEFUL_DEPLOYMENT_TIMEOUT_SECONDS, TimeUnit.SECONDS);

                } // for pu instance
            } // for source container
        } // for target container

        if (// we tried to relocate primaries
        !onlyBackups &&

        // backup instances exist and they are the reason we are here due to max instances per machine limitation
                pu.getNumberOfBackups() > 0 &&

                // no future operations that may conflict
                state.getNumberOfFutureDeployments(pu) == 0 &&

                // all instances are deployed
                RebalancingUtils.isProcessingUnitIntact(pu) &&

                // we're not done rebalancing yet!
                !RebalancingUtils.isEvenlyDistributedAcrossContainers(pu, containers)) {

            logger.debug(
                    "Optimal rebalancing heuristics failed balancing instances per container in this deployment. "
                            + "Performing non-optimal relocation heuristics. Will try to re-locate only backups. Starting with partition "
                            + lastResortPartitionRelocate + ". Current deployment state is "
                            + RebalancingUtils.processingUnitDeploymentToString(pu));

            // algorithm failed. we need to use heuristics.
            // The reason the algorithm failed is that the machine that has an empty spot also has instances from partition that prevent a relocation into that machine.
            // For example, the excess machine wants to relocate Primary1 but the empty GSC is on a machine that has Backup1.
            // The workaround is to relocate any backup from another machine to the empty GSC, and so the "emptiness" would move to that other machine.
            // we look for backups by their partition number to avoid an endless loop.

            for (; lastResortPartitionRelocate <= pu.getNumberOfInstances() - 1; lastResortPartitionRelocate++) {

                logger.trace("Trying to relocate a backup from partition " + lastResortPartitionRelocate);

                // find backup to relocate
                ProcessingUnitInstance candidateInstance = pu.getPartition(lastResortPartitionRelocate).getBackup();

                logger.trace(
                        "Candidate for re-location is " + RebalancingUtils.puInstanceToString(candidateInstance));

                GridServiceContainer source = candidateInstance.getGridServiceContainer();

                for (GridServiceContainer target : sortedContainers) {

                    if (target.getMachine().equals(source.getMachine())) {
                        logger.debug("Not re-locating " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + " to " + ContainersSlaUtils.gscToString(target)
                                + " since containers are on the same host.");
                        // there's no point in relocating a backup into the same machine
                        // since we want another machine to have an "empty" container. 
                        continue;
                    }

                    int instancesInTarget = target.getProcessingUnitInstances(pu.getName()).length;
                    if (instancesInTarget >= RebalancingUtils.getPlannedMaximumNumberOfInstancesForContainer(target,
                            containers, pu)) {
                        logger.debug("Not re-locating " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + " to " + ContainersSlaUtils.gscToString(target)
                                + " since target container cannot host any more instances.");
                        // target cannot host any more instances
                        continue;
                    }

                    // check limit of pu instances from same partition per container
                    if (pu.getMaxInstancesPerVM() > 0) {
                        int numberOfOtherInstancesFromPartitionInTargetContainer = RebalancingUtils
                                .getOtherInstancesFromSamePartitionInContainer(target, candidateInstance).size();

                        if (numberOfOtherInstancesFromPartitionInTargetContainer >= pu.getMaxInstancesPerVM()) {
                            logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                    + " " + "to container " + RebalancingUtils.gscToString(target) + " "
                                    + "since container already hosts "
                                    + numberOfOtherInstancesFromPartitionInTargetContainer + " "
                                    + "instance(s) from the same partition.");
                            continue;
                        }
                    }

                    // check limit of pu instances from same partition per machine
                    if (pu.getMaxInstancesPerMachine() > 0) {
                        int numberOfOtherInstancesFromPartitionInTargetMachine = RebalancingUtils
                                .getOtherInstancesFromSamePartitionInMachine(target.getMachine(), candidateInstance)
                                .size();

                        if (numberOfOtherInstancesFromPartitionInTargetMachine >= pu.getMaxInstancesPerMachine()) {
                            logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                    + " " + "to container " + RebalancingUtils.gscToString(target) + " "
                                    + "since machine already contains "
                                    + numberOfOtherInstancesFromPartitionInTargetMachine + " "
                                    + "instance(s) from the same partition.");
                            continue;
                        }
                    }
                    logger.info("Relocating " + RebalancingUtils.puInstanceToString(candidateInstance) + " "
                            + "from " + RebalancingUtils.gscToString(source) + " with "
                            + source.getProcessingUnitInstances().length + " instances to "
                            + RebalancingUtils.gscToString(target) + " with "
                            + target.getProcessingUnitInstances().length
                            + " instances. PlannedMaximumNumberOfInstances for target is "
                            + RebalancingUtils.getPlannedMaximumNumberOfInstancesForContainer(target, containers,
                                    pu)
                            + "; PlannedMinimumNumberOfInstances for source is " + RebalancingUtils
                                    .getPlannedMinimumNumberOfInstancesForContainer(source, containers, pu));
                    return RebalancingUtils.relocateProcessingUnitInstanceAsync(target, candidateInstance, logger,
                            STATEFUL_DEPLOYMENT_TIMEOUT_SECONDS, TimeUnit.SECONDS);

                }
            }

            // we haven't found any partition to relocate, probably the instance that requires
            // relocation has a partition lower than lastResortPartitionRelocate.

            if (lastResortPartitionRelocate > pu.getNumberOfInstances() - 1) {
                lastResortPartitionRelocate = 0; // better luck next time. continuation programming
            }
        }

        if (conflict) {
            throw new RebalancingSlaEnforcementInProgressException(pu);
        }

        return null;
    }

    /**
     * Makes sure that across machines the number of primary instances divided by the number of containers is balanced.  
     * @param containers
     * @param sla
     * @throws RebalancingSlaEnforcementInProgressException
     */
    private void rebalanceNumberOfPrimaryInstancesPerMachine(GridServiceContainer[] containers,
            RebalancingSlaPolicy sla) throws RebalancingSlaEnforcementInProgressException {

        logger.debug("Trying to Re-balance number of primary instances per machine.");

        while (true) {
            final FutureStatefulProcessingUnitInstance futureInstance = rebalanceNumberOfPrimaryInstancesPerCpuCoreStep(
                    containers, sla);

            if (futureInstance == null) {
                break;
            }
            state.addFutureStatefulDeployment(futureInstance);
        }
    }

    /**
     * Restarts one pu so that the number of primary instances divided by the number of containers is more balanced.  
     * @param containers
     * @param sla
     * @throws RebalancingSlaEnforcementInProgressException
     */
    private FutureStatefulProcessingUnitInstance rebalanceNumberOfPrimaryInstancesPerCpuCoreStep(
            GridServiceContainer[] containers, RebalancingSlaPolicy sla)
            throws RebalancingSlaEnforcementInProgressException {

        // sort all machines (including those not in the allocated containers)
        // by (numberOfPrimaryInstancesPerMachine - minNumberOfPrimaryInstances)
        // meaning machines that need primaries the most are first.
        Machine[] machines = RebalancingUtils.getMachinesHostingContainers(containers);
        final List<Machine> sortedMachines = RebalancingUtils.sortMachinesByNumberOfPrimaryInstancesPerCpuCore(pu,
                machines, sla.getAllocatedCapacity());

        Fraction optimalCpuCoresPerPrimary = RebalancingUtils.getAverageCpuCoresPerPrimary(pu,
                sla.getAllocatedCapacity());
        boolean conflict = false;
        // the source machine is the machine where the primary is restarted (high primaries per core)
        // the target machine is the machine where a new primary is elected (low primaries per core)
        // try to match a source container with a target container and then do a primary restart.
        for (int targetIndex = 0; targetIndex < sortedMachines.size(); targetIndex++) {

            Machine target = sortedMachines.get(targetIndex);

            for (int sourceIndex = sortedMachines.size() - 1; sourceIndex > targetIndex; sourceIndex--) {

                Machine source = sortedMachines.get(sourceIndex);

                if (!RebalancingUtils.isRestartRecommended(pu, source, target, optimalCpuCoresPerPrimary,
                        sla.getAllocatedCapacity())) {
                    // source cannot give up any primary instances
                    // since the array is sorted there is no point in continuing the search
                    break;
                }

                if (isConflictingOperationInProgress(target, 1, true)) {
                    // number of primaries on machine might be skewed.
                    conflict = true;
                    logger.debug("Cannot restart a primary instance who's backup is on machine "
                            + RebalancingUtils.machineToString(target)
                            + " since a conflicting relocation is already in progress.");
                    continue;
                }

                if (isConflictingOperationInProgress(source, 1, true)) {
                    // number of primaries on machine might be skewed.
                    conflict = true;
                    logger.debug("Cannot restart a primary instance from machine "
                            + RebalancingUtils.machineToString(source)
                            + " since a conflicting relocation is already in progress.");
                    continue;
                }

                // we have a target and a source container. 
                // now all we need is a primary instance on the source container that has a backup on the target container
                for (ProcessingUnitInstance candidateInstance : source.getProcessingUnitInstances(pu.getName())) {

                    if (candidateInstance.getSpaceInstance() == null) {
                        logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + " since embedded space is not detected");
                        continue;
                    }

                    if (candidateInstance.getSpaceInstance().getMode() != SpaceMode.PRIMARY) {
                        logger.debug(
                                "Cannot restart instance " + RebalancingUtils.puInstanceToString(candidateInstance)
                                        + " since it is not primary.");
                        continue;
                    }

                    if (!RebalancingUtils.isProcessingUnitPartitionIntact(candidateInstance)) {
                        logger.debug("Cannot restart " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + " since instances from the same partition are missing");
                        conflict = true;
                        continue;
                    }

                    if (isConflictingStatefulDeploymentInProgress(candidateInstance)) {
                        logger.debug("Cannot relocate " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + " " + "since another instance from the same partition is being relocated");
                        conflict = true;
                        continue;
                    }

                    Machine[] sourceReplicationMachines = RebalancingUtils.getMachinesHostingContainers(
                            RebalancingUtils.getReplicationSourceContainers(candidateInstance));
                    if (sourceReplicationMachines.length > 1) {
                        throw new IllegalArgumentException("pu " + pu.getName()
                                + " must have exactly one backup instance per partition in order for the primary restart algorithm to work.");
                    }

                    if (!sourceReplicationMachines[0].equals(target)) {
                        logger.debug("Cannot restart " + RebalancingUtils.puInstanceToString(candidateInstance)
                                + "since replication source is on "
                                + RebalancingUtils.machineToString(sourceReplicationMachines[0]) + " "
                                + "and not on the target machine " + RebalancingUtils.machineToString(target));
                        continue;
                    }

                    if (logger.isInfoEnabled()) {
                        String sourceToString = RebalancingUtils.machineToString(source);
                        String targetToString = RebalancingUtils.machineToString(target);
                        int numberOfPrimaryInstancesOnTarget = RebalancingUtils
                                .getNumberOfPrimaryInstancesOnMachine(pu, target);
                        Fraction numberOfCpuCoresOnTarget = RebalancingUtils.getNumberOfCpuCores(target,
                                sla.getAllocatedCapacity());
                        int numberOfPrimaryInstancesOnSource = RebalancingUtils
                                .getNumberOfPrimaryInstancesOnMachine(pu, source);
                        Fraction numberOfCpuCoresOnSource = RebalancingUtils.getNumberOfCpuCores(source,
                                sla.getAllocatedCapacity());
                        logger.info("Restarting " + RebalancingUtils.puInstanceToString(candidateInstance) + " "
                                + "instance on machine " + sourceToString + " so that machine " + sourceToString
                                + " would have less instances per cpu core, and " + targetToString
                                + " would have more primary instances per cpu core. " + sourceToString + " has "
                                + numberOfPrimaryInstancesOnSource + " primary instances " + "running on "
                                + numberOfCpuCoresOnSource + " cpu cores. " + targetToString + " has "
                                + numberOfPrimaryInstancesOnTarget + " primary instances " + "running on "
                                + numberOfCpuCoresOnTarget + " cpu cores.");
                    }

                    return RebalancingUtils.restartProcessingUnitInstanceAsync(candidateInstance, logger,
                            STATEFUL_DEPLOYMENT_TIMEOUT_SECONDS, TimeUnit.SECONDS);
                }
            }
        }

        if (state.getNumberOfFutureDeployments(pu) == 0 && RebalancingUtils.isProcessingUnitIntact(pu)
                && RebalancingUtils.isEvenlyDistributedAcrossContainers(pu, containers)
                && !RebalancingUtils.isEvenlyDistributedAcrossMachines(pu, sla.getAllocatedCapacity())) {

            logger.debug("Optimal primary rebalancing hueristics failed balancing primaries in this deployment. "
                    + "Performing non-optimal restart heuristics. Starting with partition "
                    + lastResortPartitionRestart);

            //
            // We cannot balance primaries per cpu core with one restart.
            // That means we need forward looking logic for more than one step, which we currently haven't implemented.
            // So we just restart a primary on a machine that has too many parimaries per cpu core.
            // In order to make the algorithm deterministic and avoid loops we restart primaries by
            // their natural order (by partition number)
            //
            // lastResortPartitionRestart is the next partition we should restart. 
            for (; lastResortPartitionRestart < pu.getNumberOfInstances() - 1; lastResortPartitionRestart++) {

                ProcessingUnitInstance candidateInstance = pu.getPartition(lastResortPartitionRestart).getPrimary();
                Machine source = candidateInstance.getMachine();

                Machine[] sourceReplicationMachines = RebalancingUtils.getMachinesHostingContainers(
                        RebalancingUtils.getReplicationSourceContainers(candidateInstance));
                if (sourceReplicationMachines.length > 1) {
                    throw new IllegalArgumentException("pu " + pu.getName()
                            + " must have exactly one backup instance per partition in order for the primary restart algorithm to work.");
                }

                if (sourceReplicationMachines[0].equals(source)) {
                    logger.debug("Cannot restart " + RebalancingUtils.puInstanceToString(candidateInstance)
                            + "since replication source is on same machine as primary, so restarting will have not change number of primaries on machine.");
                    continue;
                }

                Fraction numberOfCpuCoresOnSource = RebalancingUtils.getNumberOfCpuCores(source,
                        sla.getAllocatedCapacity());

                Fraction optimalCpuCores = new Fraction(
                        RebalancingUtils.getNumberOfPrimaryInstancesOnMachine(pu, source))
                                .multiply(optimalCpuCoresPerPrimary);

                if (numberOfCpuCoresOnSource.compareTo(optimalCpuCores) <= 0) {

                    // number of cores is below optimal, 
                    // which means there are too many primaries on the machine                        
                    if (logger.isInfoEnabled()) {
                        String sourceToString = RebalancingUtils.machineToString(source);
                        int numberOfPrimaryInstancesOnSource = RebalancingUtils
                                .getNumberOfPrimaryInstancesOnMachine(pu, source);
                        logger.info("Restarting " + RebalancingUtils.puInstanceToString(candidateInstance) + " "
                                + "instance on machine " + sourceToString + " so that machine " + sourceToString
                                + " would have less instances per cpu core. " + sourceToString + " has "
                                + numberOfPrimaryInstancesOnSource + " primary instances " + "running on "
                                + numberOfCpuCoresOnSource + " cpu cores. ");
                    }

                    return RebalancingUtils.restartProcessingUnitInstanceAsync(candidateInstance, logger,
                            STATEFUL_DEPLOYMENT_TIMEOUT_SECONDS, TimeUnit.SECONDS);
                }
            }
            // we haven't found any partition to restart, probably the instance that requires restart
            // has a partition lower than lastResortPartitionRestart.

            if (lastResortPartitionRestart >= pu.getNumberOfInstances() - 1) {
                lastResortPartitionRestart = 0; //better luck next time. continuation programming
            }
        }

        if (conflict) {
            throw new RebalancingSlaEnforcementInProgressException(pu);
        }

        return null;
    }

    private void cleanFutureStatefulDeployments() throws RebalancingSlaEnforcementInProgressException {

        while (true) {

            FutureStatefulProcessingUnitInstance future = state.removeOneDoneFutureStatefulDeployments(pu);

            if (future == null) {
                // no more done futures
                break;
            }

            Throwable throwable = null;

            try {
                ProcessingUnitInstance puInstance = future.get();
                logger.info("Processing unit instance deployment completed successfully "
                        + RebalancingUtils.puInstanceToString(puInstance));

            } catch (ExecutionException e) {
                throwable = e.getCause();
            } catch (TimeoutException e) {
                throwable = e;
            }

            if (throwable != null) {
                state.addFailedStatefulDeployment(future);
                throwFutureProcessingUnitInstanceException(throwable);
            }

        }

        cleanFailedFutureStatefulDeployments();
    }

    private void cleanFutureStatelessDeployments() throws RebalancingSlaEnforcementInProgressException {

        while (true) {

            FutureStatelessProcessingUnitInstance future = state.removeOneDoneFutureStatelessDeployments(pu);

            if (future == null) {
                // no more done futures
                break;
            }

            Throwable throwable = null;

            try {
                ProcessingUnitInstance puInstance = future.get();
                logger.info("Processing unit instance deployment completed successfully "
                        + RebalancingUtils.puInstanceToString(puInstance));

            } catch (ExecutionException e) {
                throwable = e.getCause();
            } catch (TimeoutException e) {
                throwable = e;
            }

            if (throwable != null) {
                state.addFailedStatelessDeployment(future);
                throwFutureProcessingUnitInstanceException(throwable);
            }
        }

        cleanFailedFutureStatelessDeployments();
    }

    /**
     * This method removes failed relocations from the list allowing a retry attempt to take place.
     * Some failures are removed immediately, while others stay in the list for
     * RELOCATION_TIMEOUT_FAILURE_IGNORE_SECONDS.
     */
    private void cleanFailedFutureStatefulDeployments() {

        for (FutureStatefulProcessingUnitInstance future : state.getFailedStatefulDeployments(pu)) {
            int passedSeconds = (int) ((System.currentTimeMillis() - future.getTimestamp().getTime()) / 1000);

            if (future.getException() != null
                    && future.getException().getCause() instanceof WrongContainerProcessingUnitRelocationException
                    && future.getTargetContainer().isDiscovered()
                    && passedSeconds < STATEFUL_DEPLOYMENT_FAILURE_FORGET_SECONDS) {

                // do not remove future from list since the target container did not have enough
                // memory
                // meaning something is very wrong with our assumptions on the target container.
                // We leave this future in the list so it will cause conflicting exceptions.
                // Once STATEFUL_DEPLOYMENT_FAILURE_FORGET_SECONDS passes it is removed from the
                // list.
            } else {
                logger.info("Forgetting relocation error " + future.getFailureMessage());
                state.removeFailedFutureStatefulDeployment(future);
            }
        }
    }

    private void cleanRemovedStatelessProcessingUnitInstances() {
        for (ProcessingUnitInstance instance : state.getRemovedStatelessProcessingUnitInstances(pu)) {
            if (!instance.isDiscovered()) {
                state.removeRemovedStatelessProcessingUnitInstance(instance);
                logger.info("Processing Unit Instance " + RebalancingUtils.puInstanceToString(instance)
                        + " removed successfully.");
            }
        }
    }

    private void throwFutureProcessingUnitInstanceException(Throwable throwable)
            throws RebalancingSlaEnforcementInProgressException {

        if (throwable instanceof RebalancingSlaEnforcementInProgressException) {
            throw (RebalancingSlaEnforcementInProgressException) throwable;
        } else if (throwable instanceof AdminException) {
            throw new FutureProcessingUnitInstanceDeploymentException(pu, (AdminException) throwable);
        } else if (throwable instanceof TimeoutException) {
            throw new FutureProcessingUnitInstanceDeploymentException(pu, (TimeoutException) throwable);
        } else {
            throw new IllegalStateException("Unexpected exception type", throwable);
        }
    }

    /**
     * This method removes failed stateless deployments from the list allowing a retry attempt to take place.
     * Failed deployment stay in the list for RELOCATION_TIMEOUT_FAILURE_IGNORE_SECONDS.
     * Unless the target container has been removed.
     */
    private void cleanFailedFutureStatelessDeployments() {
        for (final FutureStatelessProcessingUnitInstance future : state.getFailedStatelessDeployments(pu)) {

            int passedSeconds = (int) ((System.currentTimeMillis() - future.getTimestamp().getTime()) / 1000);

            if (future.getException() != null && future.getTargetContainer().isDiscovered()
                    && passedSeconds < STATELESS_DEPLOYMENT_FAILURE_FORGET_SECONDS) {

                // do not remove future from list until timeout failure forget
                // since something is very wrong with target container.

                if (logger.isDebugEnabled()) {
                    logger.debug(
                            "Ignoring failure to relocate stateless pu instance " + future.getProcessingUnit()
                                    + " Will try again in "
                                    + (STATELESS_DEPLOYMENT_FAILURE_FORGET_SECONDS - passedSeconds) + " seconds.",
                            future.getException());
                }

            } else {
                final InternalAdmin admin = ((InternalAdmin) pu.getAdmin());
                // 1. kick the GSM a little by removing planned instances that are not deployed
                // 2. forget the relocation error
                admin.scheduleAdminOperation(new Runnable() {

                    public void run() {
                        try {

                            final boolean decremented = ((InternalProcessingUnit) pu).decrementPlannedInstances();
                            if (logger.isInfoEnabled()) {
                                if (decremented) {
                                    logger.info(
                                            "Decreased number of planned instances in the GSM. It will be incremented shortly (instance deployment retry)");
                                } else {
                                    logger.info("Failed to decrement planned number of instances for "
                                            + pu.getName() + " meaning that instance is ok.");
                                }
                            }
                        } catch (final AdminException e) {
                            logger.warn("Unexpected failure to decrement planned number of instances for "
                                    + pu.getName(), e);
                        } catch (final Throwable t) {
                            logger.warn("Unexpected exception when decrementing planned number of instances for "
                                    + pu.getName(), t);
                        } finally {
                            admin.scheduleNonBlockingStateChange(new Runnable() {

                                @Override
                                public void run() {
                                    logger.info("Cleaning deployment error before retry. Error was:"
                                            + future.getFailureMessage());
                                    state.removeFailedFutureStatelessDeployment(future);
                                }
                            });
                        }
                    }
                });

            }
        }
    }

    private boolean isConflictingDeploymentInProgress(GridServiceContainer container,
            int maximumNumberOfConcurrentRelocationsPerMachine, boolean atMostOneConcurrentRelocation) {

        if (maximumNumberOfConcurrentRelocationsPerMachine <= 0) {
            throw new IllegalStateException("maximumNumberOfConcurrentRelocationsPerMachine must be 1 or higher");
        }

        int concurrentRelocationsInContainer = 0;

        for (FutureStatefulProcessingUnitInstance future : state.getAllFutureStatefulProcessingUnitInstances()) {

            GridServiceContainer targetContainer = future.getTargetContainer();
            GridServiceContainer sourceContainer = future.getSourceContainer();
            List<GridServiceContainer> replicationSourceContainers = Arrays
                    .asList(future.getReplicaitonSourceContainers());

            if (sourceContainer.equals(container) || // wrong reading of #instances on source
                    targetContainer.equals(container) || // wrong reading of #instances on target
                    replicationSourceContainers.contains(container)) { // replication source is busy
                                                                       // now with sending data to
                                                                       // the new backup

                concurrentRelocationsInContainer++;
            }
        }

        for (FutureStatelessProcessingUnitInstance future : state.getAllFutureStatelessProcessingUnitInstances()) {

            GridServiceContainer targetContainer = future.getTargetContainer();

            if (targetContainer.equals(container)) { // deployment already in progress
                concurrentRelocationsInContainer++;
            }
        }

        return concurrentRelocationsInContainer > 0 ||

                isConflictingOperationInProgress(container.getMachine(),
                        maximumNumberOfConcurrentRelocationsPerMachine, atMostOneConcurrentRelocation);
    }

    private boolean isConflictingOperationInProgress(Machine machine,
            int maximumNumberOfConcurrentRelocationsPerMachine, boolean atMostOneConcurrentRelocation) {
        if (atMostOneConcurrentRelocation) {
            // check for any relocation.
            return isRelocationInProgress();
        } else {
            // check for relocation on the machine
            return isConflictingOperationInProgress(machine, maximumNumberOfConcurrentRelocationsPerMachine);
        }
    }

    private boolean isRelocationInProgress() {
        List<FutureStatefulProcessingUnitInstance> allFutureStatefulProcessingUnitInstances = state
                .getAllFutureStatefulProcessingUnitInstances();
        for (FutureStatefulProcessingUnitInstance futureStatefulProcessingUnitInstance : allFutureStatefulProcessingUnitInstances) {
            if (!futureStatefulProcessingUnitInstance.isDone()) {
                logger.debug("Relocation of " + futureStatefulProcessingUnitInstance.getInstanceId()
                        + " is in progress from "
                        + ContainersSlaUtils.gscToString(futureStatefulProcessingUnitInstance.getSourceContainer())
                        + " to " + ContainersSlaUtils
                                .gscToString(futureStatefulProcessingUnitInstance.getTargetContainer()));
                return true;
            }
        }
        logger.debug("No active re-locations found.");
        return false;
    }

    private boolean isConflictingOperationInProgress(Machine machine,
            int maximumNumberOfConcurrentRelocationsPerMachine) {

        if (maximumNumberOfConcurrentRelocationsPerMachine <= 0) {
            // maximumNumberOfConcurrentRelocationsPerMachine is disabled
            maximumNumberOfConcurrentRelocationsPerMachine = Integer.MAX_VALUE;
        }

        int concurrentRelocationsInMachine = 0;

        for (FutureStatefulProcessingUnitInstance future : state.getAllFutureStatefulProcessingUnitInstances()) {

            GridServiceContainer targetContainer = future.getTargetContainer();
            List<GridServiceContainer> replicationSourceContainers = Arrays
                    .asList(future.getReplicaitonSourceContainers());

            Machine targetMachine = targetContainer.getMachine();
            Set<Machine> replicaitonSourceMachines = new HashSet<Machine>();
            for (GridServiceContainer replicationSourceContainer : replicationSourceContainers) {
                replicaitonSourceMachines.add(replicationSourceContainer.getMachine());
            }

            if (targetMachine.equals(machine) || // target machine is busy with replication
                    replicaitonSourceMachines.contains(machine)) { // replication source machine is
                                                                   // busy with replication

                concurrentRelocationsInMachine++;
            }
        }

        for (FutureStatelessProcessingUnitInstance future : state.getAllFutureStatelessProcessingUnitInstances()) {

            GridServiceContainer targetContainer = future.getTargetContainer();
            Machine targetMachine = targetContainer.getMachine();

            if (targetMachine.equals(machine)) { // target machine is busy with deployment
                logger.debug("A Relocation to " + ContainersSlaUtils.gscToString(future.getTargetContainer())
                        + " is in progress.");
                concurrentRelocationsInMachine++;
            }
        }

        return concurrentRelocationsInMachine >= maximumNumberOfConcurrentRelocationsPerMachine;
    }

    private boolean isConflictingStatefulDeploymentInProgress(ProcessingUnitInstance candidateInstance) {

        for (FutureStatefulProcessingUnitInstance future : state.getAllFutureStatefulProcessingUnitInstances()) {
            if (future.getProcessingUnit().equals(candidateInstance.getProcessingUnit())
                    && future.getInstanceId() == candidateInstance.getInstanceId()) {
                return true;
            }
        }

        return false;
    }
}