com.datatorrent.stram.plan.physical.PhysicalPlan.java Source code

Java tutorial

Introduction

Here is the source code for com.datatorrent.stram.plan.physical.PhysicalPlan.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.stram.plan.physical;

import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.Context.PortContext;
import com.datatorrent.api.*;
import com.datatorrent.api.DAG.Locality;
import com.datatorrent.api.Operator.InputPort;
import com.datatorrent.api.Partitioner.Partition;
import com.datatorrent.api.Partitioner.PartitionKeys;
import com.datatorrent.api.StatsListener.OperatorRequest;
import com.datatorrent.api.annotation.Stateless;

import com.datatorrent.common.util.AsyncFSStorageAgent;
import com.datatorrent.netlet.util.DTThrowable;
import com.datatorrent.stram.Journal.Recoverable;
import com.datatorrent.stram.api.Checkpoint;
import com.datatorrent.stram.api.StramEvent;
import com.datatorrent.stram.api.StreamingContainerUmbilicalProtocol.StramToNodeRequest;
import com.datatorrent.stram.plan.logical.LogicalPlan;
import com.datatorrent.stram.plan.logical.LogicalPlan.InputPortMeta;
import com.datatorrent.stram.plan.logical.LogicalPlan.OperatorMeta;
import com.datatorrent.stram.plan.logical.LogicalPlan.OutputPortMeta;
import com.datatorrent.stram.plan.logical.LogicalPlan.StreamMeta;
import com.datatorrent.stram.plan.logical.StreamCodecWrapperForPersistance;
import com.datatorrent.stram.plan.physical.PTOperator.HostOperatorSet;
import com.datatorrent.stram.plan.physical.PTOperator.PTInput;
import com.datatorrent.stram.plan.physical.PTOperator.PTOutput;

/**
 * Translates the logical DAG into physical model. Is the initial query planner
 * and performs dynamic changes.
 * <p>
 * Attributes in the logical DAG affect how the physical plan is derived.
 * Examples include partitioning schemes, resource allocation, recovery
 * semantics etc.<br>
 *
 * The current implementation does not dynamically change or optimize allocation
 * of containers. The maximum number of containers and container size can be
 * specified per application, but all containers are requested at the same size
 * and execution will block until all containers were allocated by the resource
 * manager. Future enhancements will allow to define resource constraints at the
 * operator level and elasticity in resource allocation.<br>
 *
 * @since 0.3.2
 */
public class PhysicalPlan implements Serializable {
    private static final long serialVersionUID = 201312112033L;
    private static final Logger LOG = LoggerFactory.getLogger(PhysicalPlan.class);

    public static class LoadIndicator {
        public final int indicator;
        public final String note;

        LoadIndicator(int indicator, String note) {
            this.indicator = indicator;
            this.note = note;
        }
    }

    private final AtomicInteger idSequence = new AtomicInteger();
    final AtomicInteger containerSeq = new AtomicInteger();
    private LinkedHashMap<OperatorMeta, PMapping> logicalToPTOperator = new LinkedHashMap<OperatorMeta, PMapping>();
    private final List<PTContainer> containers = new CopyOnWriteArrayList<PTContainer>();
    private final LogicalPlan dag;
    private transient final PlanContext ctx;
    private int maxContainers = 1;
    private int availableMemoryMB = Integer.MAX_VALUE;
    private final LocalityPrefs localityPrefs = new LocalityPrefs();
    private final LocalityPrefs inlinePrefs = new LocalityPrefs();

    final Set<PTOperator> deployOpers = Sets.newHashSet();
    final Map<PTOperator, Operator> newOpers = Maps.newHashMap();
    final Set<PTOperator> undeployOpers = Sets.newHashSet();
    final ConcurrentMap<Integer, PTOperator> allOperators = Maps.newConcurrentMap();
    private final ConcurrentMap<OperatorMeta, OperatorMeta> pendingRepartition = Maps.newConcurrentMap();

    private final AtomicInteger strCodecIdSequence = new AtomicInteger();
    private final Map<StreamCodec<?>, Integer> streamCodecIdentifiers = Maps.newHashMap();

    private PTContainer getContainer(int index) {
        if (index >= containers.size()) {
            if (index >= maxContainers) {
                index = maxContainers - 1;
            }
            for (int i = containers.size(); i < index + 1; i++) {
                containers.add(i, new PTContainer(this));
            }
        }
        return containers.get(index);
    }

    /**
     * Interface to execution context that can be mocked for plan testing.
     */
    public interface PlanContext {

        /**
         * Record an event in the event log
         *
         * @param ev The event
         *
         */
        public void recordEventAsync(StramEvent ev);

        /**
         * Request deployment change as sequence of undeploy, container start and deploy groups with dependency.
         * Called on initial plan and on dynamic changes during execution.
         * @param releaseContainers
         * @param undeploy
         * @param startContainers
         * @param deploy
         */
        public void deploy(Set<PTContainer> releaseContainers, Collection<PTOperator> undeploy,
                Set<PTContainer> startContainers, Collection<PTOperator> deploy);

        /**
         * Trigger event to perform plan modification.
         * @param r
         */
        public void dispatch(Runnable r);

        /**
         * Write the recoverable operation to the log.
         * @param operation
         */
        public void writeJournal(Recoverable operation);

        public void addOperatorRequest(PTOperator oper, StramToNodeRequest request);
    }

    private static class StatsListenerProxy implements StatsListener, Serializable {
        private static final long serialVersionUID = 201312112033L;
        final private OperatorMeta om;

        private StatsListenerProxy(OperatorMeta om) {
            this.om = om;
        }

        @Override
        public Response processStats(BatchedOperatorStats stats) {
            return ((StatsListener) om.getOperator()).processStats(stats);
        }
    }

    /**
     * The logical operator with physical plan info tagged on.
     */
    public static class PMapping implements java.io.Serializable {
        private static final long serialVersionUID = 201312112033L;

        final private OperatorMeta logicalOperator;
        private List<PTOperator> partitions = new LinkedList<PTOperator>();
        private final Map<LogicalPlan.OutputPortMeta, StreamMapping> outputStreams = Maps.newHashMap();
        private List<StatsListener> statsHandlers;

        /**
         * Operators that form a parallel partition
         */
        private Set<OperatorMeta> parallelPartitions = Sets.newHashSet();

        private PMapping(OperatorMeta om) {
            this.logicalOperator = om;
        }

        private void addPartition(PTOperator p) {
            partitions.add(p);
            p.statsListeners = this.statsHandlers;
        }

        /**
         * Return all partitions and unifiers, except MxN unifiers
         * @return
         */
        private Collection<PTOperator> getAllOperators() {
            Collection<PTOperator> c = new ArrayList<PTOperator>(partitions.size() + 1);
            c.addAll(partitions);
            for (StreamMapping ug : outputStreams.values()) {
                ug.addTo(c);
            }
            return c;
        }

        @Override
        public String toString() {
            return logicalOperator.toString();
        }
    }

    private class LocalityPref implements java.io.Serializable {
        private static final long serialVersionUID = 201312112033L;
        String host;
        Set<PMapping> operators = Sets.newHashSet();
    }

    /**
     * Group logical operators by locality constraint. Used to derive locality
     * groupings for physical operators, which are used when assigning containers
     * and requesting resources from the scheduler.
     */
    private class LocalityPrefs implements java.io.Serializable {
        private static final long serialVersionUID = 201312112033L;
        private final Map<PMapping, LocalityPref> prefs = Maps.newHashMap();
        private final AtomicInteger groupSeq = new AtomicInteger();

        void add(PMapping m, String group) {
            if (group != null) {
                LocalityPref pref = null;
                for (LocalityPref lp : prefs.values()) {
                    if (group.equals(lp.host)) {
                        lp.operators.add(m);
                        pref = lp;
                        break;
                    }
                }
                if (pref == null) {
                    pref = new LocalityPref();
                    pref.host = group;
                    pref.operators.add(m);
                    this.prefs.put(m, pref);
                }
            }
        }

        // if netbeans is not smart, don't produce warnings in other IDE
        //@SuppressWarnings("null") /* for lp2.operators.add(m1); line below - netbeans is not very smart; you don't be an idiot! */
        void setLocal(PMapping m1, PMapping m2) {
            LocalityPref lp1 = prefs.get(m1);
            LocalityPref lp2 = prefs.get(m2);

            if (lp1 == null && lp2 == null) {
                lp1 = lp2 = new LocalityPref();
                lp1.host = "host" + groupSeq.incrementAndGet();
                lp1.operators.add(m1);
                lp1.operators.add(m2);
            } else if (lp1 != null && lp2 != null) {
                // check if we can combine
                if (StringUtils.equals(lp1.host, lp2.host)) {
                    lp1.operators.addAll(lp2.operators);
                    lp2.operators.addAll(lp1.operators);
                } else {
                    LOG.warn("Node locality conflict {} {}", m1, m2);
                }
            } else {
                if (lp1 == null) {
                    lp2.operators.add(m1);
                    lp1 = lp2;
                } else {
                    lp1.operators.add(m2);
                    lp2 = lp1;
                }
            }

            prefs.put(m1, lp1);
            prefs.put(m2, lp2);
        }

    }

    /**
     *
     * @param dag
     * @param ctx
     */
    public PhysicalPlan(LogicalPlan dag, PlanContext ctx) {

        this.dag = dag;
        this.ctx = ctx;
        this.maxContainers = Math.max(dag.getMaxContainerCount(), 1);
        LOG.debug("Max containers: {}", this.maxContainers);

        Stack<OperatorMeta> pendingNodes = new Stack<OperatorMeta>();

        // Add logging operators for streams if not added already
        updatePersistOperatorStreamCodec(dag);

        for (OperatorMeta n : dag.getAllOperators()) {
            pendingNodes.push(n);
        }

        while (!pendingNodes.isEmpty()) {
            OperatorMeta n = pendingNodes.pop();

            if (this.logicalToPTOperator.containsKey(n)) {
                // already processed as upstream dependency
                continue;
            }

            boolean upstreamDeployed = true;

            for (Map.Entry<InputPortMeta, StreamMeta> entry : n.getInputStreams().entrySet()) {
                StreamMeta s = entry.getValue();
                boolean delay = entry.getKey().getValue(LogicalPlan.IS_CONNECTED_TO_DELAY_OPERATOR);
                // skip delay sources since it's going to be handled as downstream
                if (!delay && s.getSource() != null
                        && !this.logicalToPTOperator.containsKey(s.getSource().getOperatorMeta())) {
                    pendingNodes.push(n);
                    pendingNodes.push(s.getSource().getOperatorMeta());
                    upstreamDeployed = false;
                    break;
                }
            }

            if (upstreamDeployed) {
                addLogicalOperator(n);
            }
        }

        updatePartitionsInfoForPersistOperator(dag);

        // assign operators to containers
        int groupCount = 0;
        Set<PTOperator> deployOperators = Sets.newHashSet();
        for (Map.Entry<OperatorMeta, PMapping> e : logicalToPTOperator.entrySet()) {
            for (PTOperator oper : e.getValue().getAllOperators()) {
                if (oper.container == null) {
                    PTContainer container = getContainer((groupCount++) % maxContainers);
                    if (!container.operators.isEmpty()) {
                        LOG.warn(
                                "Operator {} shares container without locality contraint due to insufficient resources.",
                                oper);
                    }
                    Set<PTOperator> inlineSet = oper.getGrouping(Locality.CONTAINER_LOCAL).getOperatorSet();
                    if (!inlineSet.isEmpty()) {
                        // process inline operators
                        for (PTOperator inlineOper : inlineSet) {
                            setContainer(inlineOper, container);
                        }
                    } else {
                        setContainer(oper, container);
                    }
                    deployOperators.addAll(container.operators);
                }
            }
        }

        for (PTContainer container : containers) {
            updateContainerMemoryWithBufferServer(container);
            container.setRequiredVCores(getVCores(container.getOperators()));
        }

        for (Map.Entry<PTOperator, Operator> operEntry : this.newOpers.entrySet()) {
            initCheckpoint(operEntry.getKey(), operEntry.getValue(), Checkpoint.INITIAL_CHECKPOINT);
        }
        // request initial deployment
        ctx.deploy(Collections.<PTContainer>emptySet(), Collections.<PTOperator>emptySet(),
                Sets.newHashSet(containers), deployOperators);
        this.newOpers.clear();
        this.deployOpers.clear();
        this.undeployOpers.clear();
    }

    private void updatePartitionsInfoForPersistOperator(LogicalPlan dag) {
        // Add Partition mask and partition keys of Sinks to persist to Wrapper
        // StreamCodec for persist operator
        try {
            for (OperatorMeta n : dag.getAllOperators()) {
                for (StreamMeta s : n.getOutputStreams().values()) {
                    if (s.getPersistOperator() != null) {
                        InputPortMeta persistInputPort = s.getPersistOperatorInputPort();
                        StreamCodecWrapperForPersistance<?> persistCodec = (StreamCodecWrapperForPersistance<?>) persistInputPort
                                .getAttributes().get(PortContext.STREAM_CODEC);
                        if (persistCodec == null)
                            continue;
                        // Logging is enabled for the stream
                        for (InputPortMeta portMeta : s.getSinksToPersist()) {
                            updatePersistOperatorWithSinkPartitions(persistInputPort, s.getPersistOperator(),
                                    persistCodec, portMeta);
                        }
                    }

                    // Check partitioning for persist operators per sink too
                    for (Entry<InputPortMeta, InputPortMeta> entry : s.sinkSpecificPersistInputPortMap.entrySet()) {
                        InputPortMeta persistInputPort = entry.getValue();
                        StreamCodec<?> codec = persistInputPort.getAttributes().get(PortContext.STREAM_CODEC);
                        if (codec != null) {
                            if (codec instanceof StreamCodecWrapperForPersistance) {
                                StreamCodecWrapperForPersistance<?> persistCodec = (StreamCodecWrapperForPersistance<?>) codec;
                                updatePersistOperatorWithSinkPartitions(persistInputPort,
                                        s.sinkSpecificPersistOperatorMap.get(entry.getKey()), persistCodec,
                                        entry.getKey());
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            DTThrowable.wrapIfChecked(e);
        }
    }

    private void updatePersistOperatorWithSinkPartitions(InputPortMeta persistInputPort,
            OperatorMeta persistOperatorMeta, StreamCodecWrapperForPersistance<?> persistCodec,
            InputPortMeta sinkPortMeta) {
        Collection<PTOperator> ptOperators = getOperators(sinkPortMeta.getOperatorWrapper());
        Collection<PartitionKeys> partitionKeysList = new ArrayList<PartitionKeys>();
        for (PTOperator p : ptOperators) {
            PartitionKeys keys = p.partitionKeys.get(sinkPortMeta);
            partitionKeysList.add(keys);
        }

        persistCodec.inputPortToPartitionMap.put(sinkPortMeta, partitionKeysList);
    }

    private void updatePersistOperatorStreamCodec(LogicalPlan dag) {
        HashMap<StreamMeta, StreamCodec<?>> streamMetaToCodecMap = new HashMap<StreamMeta, StreamCodec<?>>();
        try {
            for (OperatorMeta n : dag.getAllOperators()) {
                for (StreamMeta s : n.getOutputStreams().values()) {
                    if (s.getPersistOperator() != null) {
                        Map<InputPortMeta, StreamCodec<?>> inputStreamCodecs = new HashMap<>();
                        // Logging is enabled for the stream
                        for (InputPortMeta portMeta : s.getSinksToPersist()) {
                            InputPort<?> port = portMeta.getPortObject();
                            StreamCodec<?> inputStreamCodec = (portMeta.getValue(PortContext.STREAM_CODEC) != null)
                                    ? portMeta.getValue(PortContext.STREAM_CODEC)
                                    : port.getStreamCodec();
                            if (inputStreamCodec != null) {
                                boolean alreadyAdded = false;

                                for (StreamCodec<?> codec : inputStreamCodecs.values()) {
                                    if (inputStreamCodec.equals(codec)) {
                                        alreadyAdded = true;
                                        break;
                                    }
                                }
                                if (!alreadyAdded) {
                                    inputStreamCodecs.put(portMeta, inputStreamCodec);
                                }
                            }
                        }

                        if (inputStreamCodecs.isEmpty()) {
                            // Stream codec not specified
                            // So everything out of Source should be captured without any
                            // StreamCodec
                            // Do nothing
                        } else {
                            // Create Wrapper codec for Stream persistence using all unique
                            // stream codecs
                            // Logger should write merged or union of all input stream codecs
                            StreamCodec<?> specifiedCodecForLogger = (s.getPersistOperatorInputPort()
                                    .getValue(PortContext.STREAM_CODEC) != null)
                                            ? s.getPersistOperatorInputPort().getValue(PortContext.STREAM_CODEC)
                                            : s.getPersistOperatorInputPort().getPortObject().getStreamCodec();
                            @SuppressWarnings({ "unchecked", "rawtypes" })
                            StreamCodecWrapperForPersistance<Object> codec = new StreamCodecWrapperForPersistance(
                                    inputStreamCodecs, specifiedCodecForLogger);
                            streamMetaToCodecMap.put(s, codec);
                        }
                    }
                }
            }

            for (java.util.Map.Entry<StreamMeta, StreamCodec<?>> entry : streamMetaToCodecMap.entrySet()) {
                dag.setInputPortAttribute(entry.getKey().getPersistOperatorInputPort().getPortObject(),
                        PortContext.STREAM_CODEC, entry.getValue());
            }
        } catch (Exception e) {
            DTThrowable.wrapIfChecked(e);
        }
    }

    private void setContainer(PTOperator pOperator, PTContainer container) {
        LOG.debug("Setting container {} for {}", container, pOperator);
        assert (pOperator.container == null) : "Container already assigned for " + pOperator;
        pOperator.container = container;
        container.operators.add(pOperator);
        int upStreamUnifierMemory = 0;
        if (!pOperator.upstreamMerge.isEmpty()) {
            for (Map.Entry<InputPortMeta, PTOperator> mEntry : pOperator.upstreamMerge.entrySet()) {
                assert (mEntry.getValue().container == null) : "Container already assigned for "
                        + mEntry.getValue();
                mEntry.getValue().container = container;
                container.operators.add(mEntry.getValue());
                upStreamUnifierMemory += mEntry.getValue().getOperatorMeta().getValue(OperatorContext.MEMORY_MB);
            }
        }
        int memoryMB = pOperator.getOperatorMeta().getValue(OperatorContext.MEMORY_MB) + upStreamUnifierMemory;
        container.setRequiredMemoryMB(container.getRequiredMemoryMB() + memoryMB);
    }

    private void updateContainerMemoryWithBufferServer(PTContainer container) {
        int bufferServerMemory = 0;
        for (PTOperator operator : container.getOperators()) {
            bufferServerMemory += operator.getBufferServerMemory();
        }
        container.setRequiredMemoryMB(container.getRequiredMemoryMB() + bufferServerMemory);
    }

    /**
     * This returns the vCores for a set of operators in a container. This forms the group of thread_local operators and get the maximum value of the group
     *
     * @param operators The container local operators
     * @return the number of vcores required for a container
     */
    private int getVCores(Collection<PTOperator> operators) {
        // this forms the groups of thread local operators in the given container
        HashMap<PTOperator, Set<PTOperator>> groupMap = new HashMap<PTOperator, Set<PTOperator>>();
        for (PTOperator operator : operators) {
            Set<PTOperator> group = new HashSet<PTOperator>();
            group.add(operator);
            groupMap.put(operator, group);
        }
        int vCores = 0;
        for (PTOperator operator : operators) {
            Set<PTOperator> threadLocal = operator.getThreadLocalOperators();
            if (threadLocal != null) {
                Set<PTOperator> group = groupMap.get(operator);
                for (PTOperator operator1 : threadLocal) {
                    group.addAll(groupMap.get(operator1));
                }
                for (PTOperator operator1 : group) {
                    groupMap.put(operator1, group);
                }
            }
        }
        Set<PTOperator> visitedOperators = new HashSet<PTOperator>();
        for (Map.Entry<PTOperator, Set<PTOperator>> group : groupMap.entrySet()) {
            if (!visitedOperators.contains(group.getKey())) {
                visitedOperators.addAll(group.getValue());
                int tempCores = 0;
                for (PTOperator operator : group.getValue()) {
                    tempCores = Math.max(tempCores, operator.getOperatorMeta().getValue(OperatorContext.VCORES));
                }
                vCores += tempCores;
            }
        }
        return vCores;
    }

    private class PartitioningContextImpl implements Partitioner.PartitioningContext {
        private List<InputPort<?>> inputPorts;
        private final int parallelPartitionCount;
        private final PMapping om;

        private PartitioningContextImpl(PMapping om, int parallelPartitionCount) {
            this.om = om;
            this.parallelPartitionCount = parallelPartitionCount;
        }

        @Override
        public int getParallelPartitionCount() {
            return parallelPartitionCount;
        }

        @Override
        public List<InputPort<?>> getInputPorts() {
            if (inputPorts == null) {
                inputPorts = getInputPortList(om.logicalOperator);
            }
            return inputPorts;
        }
    }

    private void initPartitioning(PMapping m, int partitionCnt) {
        Operator operator = m.logicalOperator.getOperator();
        Collection<Partition<Operator>> partitions;

        @SuppressWarnings("unchecked")
        Partitioner<Operator> partitioner = m.logicalOperator.getAttributes().contains(OperatorContext.PARTITIONER)
                ? (Partitioner<Operator>) m.logicalOperator.getValue(OperatorContext.PARTITIONER)
                : operator instanceof Partitioner ? (Partitioner<Operator>) operator : null;

        Collection<Partition<Operator>> collection = new ArrayList<Partition<Operator>>(1);
        DefaultPartition<Operator> firstPartition = new DefaultPartition<Operator>(operator);
        collection.add(firstPartition);

        if (partitioner != null) {
            partitions = partitioner.definePartitions(collection, new PartitioningContextImpl(m, partitionCnt));

            if (partitions == null || partitions.isEmpty()) {
                throw new IllegalStateException("Partitioner returns null or empty.");
            }
        } else {
            //This handles the case when parallel partitioning is occurring. Partition count will be
            //Non zero in the case of parallel partitioning.
            for (int partitionCounter = 0; partitionCounter < partitionCnt - 1; partitionCounter++) {
                collection.add(firstPartition);
            }
            partitions = collection;
        }

        Collection<StatsListener> statsListeners = m.logicalOperator.getValue(OperatorContext.STATS_LISTENERS);
        if (statsListeners != null && !statsListeners.isEmpty()) {
            if (m.statsHandlers == null) {
                m.statsHandlers = new ArrayList<StatsListener>(statsListeners.size());
            }
            m.statsHandlers.addAll(statsListeners);
        }

        if (m.logicalOperator.getOperator() instanceof StatsListener) {
            if (m.statsHandlers == null) {
                m.statsHandlers = new ArrayList<StatsListener>(1);
            }
            m.statsHandlers.add(new StatsListenerProxy(m.logicalOperator));
        }

        // create operator instance per partition
        Map<Integer, Partition<Operator>> operatorIdToPartition = Maps
                .newHashMapWithExpectedSize(partitions.size());
        for (Partition<Operator> partition : partitions) {
            PTOperator p = addPTOperator(m, partition, Checkpoint.INITIAL_CHECKPOINT);
            operatorIdToPartition.put(p.getId(), partition);
        }

        if (partitioner != null) {
            partitioner.partitioned(operatorIdToPartition);
        }
    }

    private class RepartitionContext extends PartitioningContextImpl {
        final List<PTOperator> operators;
        final List<DefaultPartition<Operator>> currentPartitions;
        final Map<Partition<?>, PTOperator> currentPartitionMap;
        final Map<Integer, Partition<Operator>> operatorIdToPartition;
        final List<Partition<Operator>> addedPartitions = new ArrayList<Partition<Operator>>();
        Checkpoint minCheckpoint = null;
        Collection<Partition<Operator>> newPartitions = null;

        RepartitionContext(Partitioner<Operator> partitioner, PMapping currentMapping, int partitionCount) {
            super(currentMapping, partitionCount);
            this.operators = currentMapping.partitions;
            this.currentPartitions = new ArrayList<DefaultPartition<Operator>>(operators.size());
            this.currentPartitionMap = Maps.newHashMapWithExpectedSize(operators.size());
            this.operatorIdToPartition = Maps.newHashMapWithExpectedSize(operators.size());

            // collect current partitions with committed operator state
            // those will be needed by the partitioner for split/merge
            for (PTOperator pOperator : operators) {
                Map<InputPort<?>, PartitionKeys> pks = pOperator.getPartitionKeys();
                if (pks == null) {
                    throw new AssertionError("Null partition: " + pOperator);
                }

                // if partitions checkpoint at different windows, processing for new or modified
                // partitions will start from earliest checkpoint found (at least once semantics)
                if (minCheckpoint == null) {
                    minCheckpoint = pOperator.recoveryCheckpoint;
                } else if (minCheckpoint.windowId > pOperator.recoveryCheckpoint.windowId) {
                    minCheckpoint = pOperator.recoveryCheckpoint;
                }

                Operator partitionedOperator = loadOperator(pOperator);
                DefaultPartition<Operator> partition = new DefaultPartition<Operator>(partitionedOperator, pks,
                        pOperator.loadIndicator, pOperator.stats);
                currentPartitions.add(partition);
                currentPartitionMap.put(partition, pOperator);
                LOG.debug("partition load: {} {} {}", pOperator, partition.getPartitionKeys(), partition.getLoad());
                operatorIdToPartition.put(pOperator.getId(), partition);
            }

            newPartitions = partitioner.definePartitions(new ArrayList<Partition<Operator>>(currentPartitions),
                    this);
        }

    }

    private Partitioner<Operator> getPartitioner(PMapping currentMapping) {
        Operator operator = currentMapping.logicalOperator.getOperator();
        Partitioner<Operator> partitioner = null;
        if (currentMapping.logicalOperator.getAttributes().contains(OperatorContext.PARTITIONER)) {
            @SuppressWarnings("unchecked")
            Partitioner<Operator> tmp = (Partitioner<Operator>) currentMapping.logicalOperator
                    .getValue(OperatorContext.PARTITIONER);
            partitioner = tmp;
        } else if (operator instanceof Partitioner) {
            @SuppressWarnings("unchecked")
            Partitioner<Operator> tmp = (Partitioner<Operator>) operator;
            partitioner = tmp;
        }
        return partitioner;
    }

    private void redoPartitions(PMapping currentMapping, String note) {
        Partitioner<Operator> partitioner = getPartitioner(currentMapping);
        if (partitioner == null) {
            LOG.warn("No partitioner for {}", currentMapping.logicalOperator);
            return;
        }

        RepartitionContext mainPC = new RepartitionContext(partitioner, currentMapping, 0);
        if (mainPC.newPartitions.isEmpty()) {
            LOG.warn("Empty partition list after repartition: {}", currentMapping.logicalOperator);
            return;
        }

        int memoryPerPartition = currentMapping.logicalOperator.getValue(OperatorContext.MEMORY_MB);
        for (Map.Entry<OutputPortMeta, StreamMeta> stream : currentMapping.logicalOperator.getOutputStreams()
                .entrySet()) {
            if (stream.getValue().getLocality() != Locality.THREAD_LOCAL
                    && stream.getValue().getLocality() != Locality.CONTAINER_LOCAL) {
                memoryPerPartition += stream.getKey().getValue(PortContext.BUFFER_MEMORY_MB);
            }
        }
        for (OperatorMeta pp : currentMapping.parallelPartitions) {
            for (Map.Entry<OutputPortMeta, StreamMeta> stream : pp.getOutputStreams().entrySet()) {
                if (stream.getValue().getLocality() != Locality.THREAD_LOCAL
                        && stream.getValue().getLocality() != Locality.CONTAINER_LOCAL) {
                    memoryPerPartition += stream.getKey().getValue(PortContext.BUFFER_MEMORY_MB);
                }
            }
            memoryPerPartition += pp.getValue(OperatorContext.MEMORY_MB);
        }
        int requiredMemoryMB = (mainPC.newPartitions.size() - mainPC.currentPartitions.size()) * memoryPerPartition;
        if (requiredMemoryMB > availableMemoryMB) {
            LOG.warn("Insufficient headroom for repartitioning: available {}m required {}m", availableMemoryMB,
                    requiredMemoryMB);
            return;
        }

        List<Partition<Operator>> addedPartitions = new ArrayList<Partition<Operator>>();
        // determine modifications of partition set, identify affected operator instance(s)
        for (Partition<Operator> newPartition : mainPC.newPartitions) {
            PTOperator op = mainPC.currentPartitionMap.remove(newPartition);
            if (op == null) {
                addedPartitions.add(newPartition);
            } else {
                // check whether mapping was changed
                for (DefaultPartition<Operator> pi : mainPC.currentPartitions) {
                    if (pi == newPartition && pi.isModified()) {
                        // existing partition changed (operator or partition keys)
                        // remove/add to update subscribers and state
                        mainPC.currentPartitionMap.put(newPartition, op);
                        addedPartitions.add(newPartition);
                    }
                }
            }
        }

        // remaining entries represent deprecated partitions
        this.undeployOpers.addAll(mainPC.currentPartitionMap.values());
        // downstream dependencies require redeploy, resolve prior to modifying plan
        Set<PTOperator> deps = this.getDependents(mainPC.currentPartitionMap.values());
        this.undeployOpers.addAll(deps);
        // dependencies need redeploy, except operators excluded in remove
        this.deployOpers.addAll(deps);

        // process parallel partitions before removing operators from the plan
        LinkedHashMap<PMapping, RepartitionContext> partitionContexts = Maps.newLinkedHashMap();
        Stack<OperatorMeta> parallelPartitions = new Stack<LogicalPlan.OperatorMeta>();
        parallelPartitions.addAll(currentMapping.parallelPartitions);
        pendingLoop: while (!parallelPartitions.isEmpty()) {
            OperatorMeta ppMeta = parallelPartitions.pop();
            for (StreamMeta s : ppMeta.getInputStreams().values()) {
                if (currentMapping.parallelPartitions.contains(s.getSource().getOperatorMeta())
                        && parallelPartitions.contains(s.getSource().getOperatorMeta())) {
                    parallelPartitions.push(ppMeta);
                    parallelPartitions.remove(s.getSource().getOperatorMeta());
                    parallelPartitions.push(s.getSource().getOperatorMeta());
                    continue pendingLoop;
                }
            }
            LOG.debug("Processing parallel partition {}", ppMeta);

            PMapping ppm = this.logicalToPTOperator.get(ppMeta);
            Partitioner<Operator> ppp = getPartitioner(ppm);
            if (ppp == null) {
                partitionContexts.put(ppm, null);
            } else {
                RepartitionContext pc = new RepartitionContext(ppp, ppm, mainPC.newPartitions.size());
                if (pc.newPartitions == null) {
                    throw new IllegalStateException(
                            "Partitioner returns null for parallel partition " + ppm.logicalOperator);
                }
                partitionContexts.put(ppm, pc);
            }
        }

        // plan updates start here, after all changes were identified
        // remove obsolete operators first, any freed resources
        // can subsequently be used for new/modified partitions
        List<PTOperator> copyPartitions = Lists.newArrayList(currentMapping.partitions);
        // remove deprecated partitions from plan
        for (PTOperator p : mainPC.currentPartitionMap.values()) {
            copyPartitions.remove(p);
            removePartition(p, currentMapping);
            mainPC.operatorIdToPartition.remove(p.getId());
        }
        currentMapping.partitions = copyPartitions;

        // add new operators
        for (Partition<Operator> newPartition : addedPartitions) {
            PTOperator p = addPTOperator(currentMapping, newPartition, mainPC.minCheckpoint);
            mainPC.operatorIdToPartition.put(p.getId(), newPartition);
        }

        // process parallel partition changes
        for (Map.Entry<PMapping, RepartitionContext> e : partitionContexts.entrySet()) {
            if (e.getValue() == null) {
                // no partitioner, add required operators
                for (int i = 0; i < addedPartitions.size(); i++) {
                    LOG.debug("Automatically adding to parallel partition {}", e.getKey());
                    // set activation windowId to confirm to upstream checkpoints
                    addPTOperator(e.getKey(), null, mainPC.minCheckpoint);
                }
            } else {
                RepartitionContext pc = e.getValue();
                // track previous parallel partition mapping
                Map<Partition<Operator>, Partition<Operator>> prevMapping = Maps.newHashMap();
                for (int i = 0; i < mainPC.currentPartitions.size(); i++) {
                    prevMapping.put(pc.currentPartitions.get(i), mainPC.currentPartitions.get(i));
                }
                // determine which new partitions match upstream, remaining to be treated as new operator
                Map<Partition<Operator>, Partition<Operator>> newMapping = Maps.newHashMap();
                Iterator<Partition<Operator>> itMain = mainPC.newPartitions.iterator();
                Iterator<Partition<Operator>> itParallel = pc.newPartitions.iterator();
                while (itMain.hasNext() && itParallel.hasNext()) {
                    newMapping.put(itParallel.next(), itMain.next());
                }

                for (Partition<Operator> newPartition : pc.newPartitions) {
                    PTOperator op = pc.currentPartitionMap.remove(newPartition);
                    if (op == null) {
                        pc.addedPartitions.add(newPartition);
                    } else if (prevMapping.get(newPartition) != newMapping.get(newPartition)) {
                        // upstream partitions don't match, remove/add to replace with new operator
                        pc.currentPartitionMap.put(newPartition, op);
                        pc.addedPartitions.add(newPartition);
                    } else {
                        // check whether mapping was changed - based on DefaultPartition implementation
                        for (DefaultPartition<Operator> pi : pc.currentPartitions) {
                            if (pi == newPartition && pi.isModified()) {
                                // existing partition changed (operator or partition keys)
                                // remove/add to update subscribers and state
                                mainPC.currentPartitionMap.put(newPartition, op);
                                pc.addedPartitions.add(newPartition);
                            }
                        }
                    }
                }

                if (!pc.currentPartitionMap.isEmpty()) {
                    // remove obsolete partitions
                    List<PTOperator> cowPartitions = Lists.newArrayList(e.getKey().partitions);
                    for (PTOperator p : pc.currentPartitionMap.values()) {
                        cowPartitions.remove(p);
                        removePartition(p, e.getKey());
                        pc.operatorIdToPartition.remove(p.getId());
                    }
                    e.getKey().partitions = cowPartitions;
                }
                // add new partitions
                for (Partition<Operator> newPartition : pc.addedPartitions) {
                    PTOperator oper = addPTOperator(e.getKey(), newPartition, mainPC.minCheckpoint);
                    pc.operatorIdToPartition.put(oper.getId(), newPartition);
                }

                getPartitioner(e.getKey()).partitioned(pc.operatorIdToPartition);
            }
        }

        updateStreamMappings(currentMapping);
        for (PMapping pp : partitionContexts.keySet()) {
            updateStreamMappings(pp);
        }

        deployChanges();

        if (mainPC.currentPartitions.size() != mainPC.newPartitions.size()) {
            StramEvent ev = new StramEvent.PartitionEvent(currentMapping.logicalOperator.getName(),
                    mainPC.currentPartitions.size(), mainPC.newPartitions.size());
            ev.setReason(note);
            this.ctx.recordEventAsync(ev);
        }

        partitioner.partitioned(mainPC.operatorIdToPartition);
    }

    private void updateStreamMappings(PMapping m) {
        for (Map.Entry<OutputPortMeta, StreamMeta> opm : m.logicalOperator.getOutputStreams().entrySet()) {
            StreamMapping ug = m.outputStreams.get(opm.getKey());
            if (ug == null) {
                ug = new StreamMapping(opm.getValue(), this);
                m.outputStreams.put(opm.getKey(), ug);
            }
            LOG.debug("update stream mapping for {} {}", opm.getKey().getOperatorMeta(),
                    opm.getKey().getPortName());
            ug.setSources(m.partitions);
        }

        for (Map.Entry<InputPortMeta, StreamMeta> ipm : m.logicalOperator.getInputStreams().entrySet()) {
            PMapping sourceMapping = this.logicalToPTOperator.get(ipm.getValue().getSource().getOperatorMeta());
            if (ipm.getValue().getSource().getOperatorMeta().getOperator() instanceof Operator.DelayOperator) {
                // skip if the source is a DelayOperator
                continue;
            }
            if (ipm.getKey().getValue(PortContext.PARTITION_PARALLEL)) {
                if (sourceMapping.partitions.size() < m.partitions.size()) {
                    throw new AssertionError("Number of partitions don't match in parallel mapping "
                            + sourceMapping.logicalOperator.getName() + " -> " + m.logicalOperator.getName() + ", "
                            + sourceMapping.partitions.size() + " -> " + m.partitions.size());
                }
                int slidingWindowCount = 0;
                OperatorMeta sourceOM = sourceMapping.logicalOperator;
                if (sourceOM.getAttributes().contains(Context.OperatorContext.SLIDE_BY_WINDOW_COUNT)) {
                    if (sourceOM.getValue(Context.OperatorContext.SLIDE_BY_WINDOW_COUNT) < sourceOM
                            .getValue(Context.OperatorContext.APPLICATION_WINDOW_COUNT)) {
                        slidingWindowCount = sourceOM.getValue(OperatorContext.SLIDE_BY_WINDOW_COUNT);
                    } else {
                        LOG.warn("Sliding Window Count {} should be less than APPLICATION WINDOW COUNT {}",
                                sourceOM.getValue(Context.OperatorContext.SLIDE_BY_WINDOW_COUNT),
                                sourceOM.getValue(Context.OperatorContext.APPLICATION_WINDOW_COUNT));
                    }
                }
                for (int i = 0; i < m.partitions.size(); i++) {
                    PTOperator oper = m.partitions.get(i);
                    PTOperator sourceOper = sourceMapping.partitions.get(i);
                    for (PTOutput sourceOut : sourceOper.outputs) {
                        nextSource: if (sourceOut.logicalStream == ipm.getValue()) {
                            //avoid duplicate entries in case of parallel partitions
                            for (PTInput sinkIn : sourceOut.sinks) {
                                //check if the operator is already in the sinks list and also the port name of that sink is same as the
                                // input-port-meta currently being looked at since we allow an output port to connect to multiple inputs of the same operator.
                                if (sinkIn.target == oper && sinkIn.portName.equals(ipm.getKey().getPortName())) {
                                    break nextSource;
                                }
                            }
                            PTInput input;
                            if (slidingWindowCount > 0) {
                                PTOperator slidingUnifier = StreamMapping.createSlidingUnifier(
                                        sourceOut.logicalStream, this,
                                        sourceOM.getValue(Context.OperatorContext.APPLICATION_WINDOW_COUNT),
                                        slidingWindowCount);
                                StreamMapping.addInput(slidingUnifier, sourceOut, null);
                                input = new PTInput(ipm.getKey().getPortName(), ipm.getValue(), oper, null,
                                        slidingUnifier.outputs.get(0),
                                        ipm.getKey().getValue(LogicalPlan.IS_CONNECTED_TO_DELAY_OPERATOR));
                                sourceMapping.outputStreams.get(ipm.getValue().getSource()).slidingUnifiers
                                        .add(slidingUnifier);
                            } else {
                                input = new PTInput(ipm.getKey().getPortName(), ipm.getValue(), oper, null,
                                        sourceOut,
                                        ipm.getKey().getValue(LogicalPlan.IS_CONNECTED_TO_DELAY_OPERATOR));
                            }
                            oper.inputs.add(input);
                        }
                    }
                }
            } else {
                StreamMapping ug = sourceMapping.outputStreams.get(ipm.getValue().getSource());
                if (ug == null) {
                    ug = new StreamMapping(ipm.getValue(), this);
                    m.outputStreams.put(ipm.getValue().getSource(), ug);
                }
                LOG.debug("update upstream stream mapping for {} {}", sourceMapping.logicalOperator,
                        ipm.getValue().getSource().getPortName());
                ug.setSources(sourceMapping.partitions);
            }
        }

    }

    public void deployChanges() {
        Set<PTContainer> newContainers = Sets.newHashSet();
        Set<PTContainer> releaseContainers = Sets.newHashSet();
        assignContainers(newContainers, releaseContainers);
        updatePartitionsInfoForPersistOperator(this.dag);
        this.undeployOpers.removeAll(newOpers.keySet());
        //make sure all the new operators are included in deploy operator list
        this.deployOpers.addAll(this.newOpers.keySet());
        // include downstream dependencies of affected operators into redeploy
        Set<PTOperator> deployOperators = this.getDependents(this.deployOpers);
        ctx.deploy(releaseContainers, this.undeployOpers, newContainers, deployOperators);
        this.newOpers.clear();
        this.deployOpers.clear();
        this.undeployOpers.clear();
    }

    private void assignContainers(Set<PTContainer> newContainers, Set<PTContainer> releaseContainers) {
        Set<PTOperator> mxnUnifiers = Sets.newHashSet();
        for (PTOperator o : this.newOpers.keySet()) {
            mxnUnifiers.addAll(o.upstreamMerge.values());
        }
        Set<PTContainer> updatedContainers = Sets.newHashSet();

        for (Map.Entry<PTOperator, Operator> operEntry : this.newOpers.entrySet()) {

            PTOperator oper = operEntry.getKey();
            Checkpoint checkpoint = getActivationCheckpoint(operEntry.getKey());
            initCheckpoint(oper, operEntry.getValue(), checkpoint);

            if (mxnUnifiers.contains(operEntry.getKey())) {
                // MxN unifiers are assigned with the downstream operator
                continue;
            }

            PTContainer newContainer = null;
            int memoryMB = 0;

            // handle container locality
            for (PTOperator inlineOper : oper.getGrouping(Locality.CONTAINER_LOCAL).getOperatorSet()) {
                if (inlineOper.container != null) {
                    newContainer = inlineOper.container;
                    break;
                }
                memoryMB += inlineOper.operatorMeta.getValue(OperatorContext.MEMORY_MB);
                memoryMB += inlineOper.getBufferServerMemory();
            }

            if (newContainer == null) {
                int vCores = getVCores(oper.getGrouping(Locality.CONTAINER_LOCAL).getOperatorSet());
                // attempt to find empty container with required size
                for (PTContainer c : this.containers) {
                    if (c.operators.isEmpty() && c.getState() == PTContainer.State.ACTIVE
                            && c.getAllocatedMemoryMB() == memoryMB && c.getAllocatedVCores() == vCores) {
                        LOG.debug("Reusing existing container {} for {}", c, oper);
                        c.setRequiredMemoryMB(0);
                        c.setRequiredVCores(0);
                        newContainer = c;
                        break;
                    }
                }
                if (newContainer == null) {
                    // get new container
                    LOG.debug("New container for: " + oper);
                    newContainer = new PTContainer(this);
                    newContainers.add(newContainer);
                    containers.add(newContainer);
                }
                updatedContainers.add(newContainer);
            }
            setContainer(oper, newContainer);
        }
        // release containers that are no longer used
        for (PTContainer c : this.containers) {
            if (c.operators.isEmpty()) {
                LOG.debug("Container {} to be released", c);
                releaseContainers.add(c);
                containers.remove(c);
            }
        }
        for (PTContainer c : updatedContainers) {
            updateContainerMemoryWithBufferServer(c);
            c.setRequiredVCores(getVCores(c.getOperators()));
        }
    }

    private void initCheckpoint(PTOperator oper, Operator oo, Checkpoint checkpoint) {
        try {
            LOG.debug("Writing activation checkpoint {} {} {}", checkpoint, oper, oo);
            long windowId = oper.isOperatorStateLess() ? Stateless.WINDOW_ID : checkpoint.windowId;
            StorageAgent agent = oper.operatorMeta.getValue(OperatorContext.STORAGE_AGENT);
            agent.save(oo, oper.id, windowId);
            if (agent instanceof AsyncFSStorageAgent) {
                AsyncFSStorageAgent asyncFSStorageAgent = (AsyncFSStorageAgent) agent;
                if (!asyncFSStorageAgent.isSyncCheckpoint()) {
                    asyncFSStorageAgent.copyToHDFS(oper.id, windowId);
                }
            }
        } catch (IOException e) {
            // inconsistent state, no recovery option, requires shutdown
            throw new IllegalStateException("Failed to write operator state after partition change " + oper, e);
        }
        oper.setRecoveryCheckpoint(checkpoint);
        if (!Checkpoint.INITIAL_CHECKPOINT.equals(checkpoint)) {
            oper.checkpoints.add(checkpoint);
        }
    }

    public Operator loadOperator(PTOperator oper) {
        try {
            LOG.debug("Loading state for {}", oper);
            return (Operator) oper.operatorMeta.getValue(OperatorContext.STORAGE_AGENT).load(oper.id,
                    oper.isOperatorStateLess() ? Stateless.WINDOW_ID : oper.recoveryCheckpoint.windowId);
        } catch (IOException e) {
            throw new RuntimeException("Failed to read partition state for " + oper, e);
        }
    }

    /**
     * Initialize the activation checkpoint for the given operator.
     * Recursively traverses inputs until existing checkpoint or root operator is found.
     * NoOp when already initialized.
     * @param oper
     */
    private Checkpoint getActivationCheckpoint(PTOperator oper) {
        if (oper.recoveryCheckpoint == null && oper.checkpoints.isEmpty()) {
            Checkpoint activationCheckpoint = Checkpoint.INITIAL_CHECKPOINT;
            for (PTInput input : oper.inputs) {
                PTOperator sourceOper = input.source.source;
                if (sourceOper.checkpoints.isEmpty()) {
                    getActivationCheckpoint(sourceOper);
                }
                activationCheckpoint = Checkpoint.max(activationCheckpoint, sourceOper.recoveryCheckpoint);
            }
            return activationCheckpoint;
        }
        return oper.recoveryCheckpoint;
    }

    /**
     * Remove a partition that was reported as terminated by the execution layer.
     * Recursively removes all downstream operators with no remaining input.
     * @param p
     */
    public void removeTerminatedPartition(PTOperator p) {
        // keep track of downstream operators for cascading remove
        Set<PTOperator> downstreamOpers = new HashSet<>(p.outputs.size());
        for (PTOutput out : p.outputs) {
            for (PTInput sinkIn : out.sinks) {
                downstreamOpers.add(sinkIn.target);
            }
        }
        PMapping currentMapping = this.logicalToPTOperator.get(p.operatorMeta);
        if (currentMapping != null) {
            List<PTOperator> copyPartitions = Lists.newArrayList(currentMapping.partitions);
            copyPartitions.remove(p);
            removePartition(p, currentMapping);
            currentMapping.partitions = copyPartitions;
        } else {
            // remove the operator
            removePTOperator(p);
        }
        // remove orphaned downstream operators
        for (PTOperator dop : downstreamOpers) {
            if (dop.inputs.isEmpty()) {
                removeTerminatedPartition(dop);
            }
        }
        deployChanges();
    }

    /**
     * Remove the given partition with any associated parallel partitions and
     * per-partition outputStreams.
     *
     * @param oper
     * @return
     */
    private void removePartition(PTOperator oper, PMapping operatorMapping) {
        // remove any parallel partition
        for (PTOutput out : oper.outputs) {
            // copy list as it is modified by recursive remove
            for (PTInput in : Lists.newArrayList(out.sinks)) {
                for (LogicalPlan.InputPortMeta im : in.logicalStream.getSinks()) {
                    PMapping m = this.logicalToPTOperator.get(im.getOperatorWrapper());
                    if (m.parallelPartitions == operatorMapping.parallelPartitions) {
                        // associated operator parallel partitioned
                        removePartition(in.target, operatorMapping);
                        m.partitions.remove(in.target);
                    }
                }
            }
        }
        // remove the operator
        removePTOperator(oper);
    }

    private PTOperator addPTOperator(PMapping nodeDecl, Partition<? extends Operator> partition,
            Checkpoint checkpoint) {
        PTOperator oper = newOperator(nodeDecl.logicalOperator, nodeDecl.logicalOperator.getName());
        oper.recoveryCheckpoint = checkpoint;

        // output port objects
        for (Map.Entry<LogicalPlan.OutputPortMeta, StreamMeta> outputEntry : nodeDecl.logicalOperator
                .getOutputStreams().entrySet()) {
            setupOutput(nodeDecl, oper, outputEntry);
        }

        String host = null;
        if (partition != null) {
            oper.setPartitionKeys(partition.getPartitionKeys());
            host = partition.getAttributes().get(OperatorContext.LOCALITY_HOST);
        }
        if (host == null) {
            host = nodeDecl.logicalOperator.getValue(OperatorContext.LOCALITY_HOST);
        }

        nodeDecl.addPartition(oper);
        this.newOpers.put(oper,
                partition != null ? partition.getPartitionedInstance() : nodeDecl.logicalOperator.getOperator());

        //
        // update locality
        //
        setLocalityGrouping(nodeDecl, oper, inlinePrefs, Locality.CONTAINER_LOCAL, host);
        setLocalityGrouping(nodeDecl, oper, localityPrefs, Locality.NODE_LOCAL, host);

        return oper;
    }

    /**
     * Create output port mapping for given operator and port.
     * Occurs when adding new partition or new logical stream.
     * Does nothing if source was already setup (on add sink to existing stream).
     * @param mapping
     * @param oper
     * @param outputEntry
     */
    private void setupOutput(PMapping mapping, PTOperator oper,
            Map.Entry<LogicalPlan.OutputPortMeta, StreamMeta> outputEntry) {
        for (PTOutput out : oper.outputs) {
            if (out.logicalStream == outputEntry.getValue()) {
                // already processed
                return;
            }
        }

        PTOutput out = new PTOutput(outputEntry.getKey().getPortName(), outputEntry.getValue(), oper);
        oper.outputs.add(out);
    }

    PTOperator newOperator(OperatorMeta om, String name) {
        PTOperator oper = new PTOperator(this, idSequence.incrementAndGet(), name, om);
        allOperators.put(oper.id, oper);
        oper.inputs = new ArrayList<PTInput>();
        oper.outputs = new ArrayList<PTOutput>();

        this.ctx.recordEventAsync(new StramEvent.CreateOperatorEvent(oper.getName(), oper.getId()));

        return oper;
    }

    private void setLocalityGrouping(PMapping pnodes, PTOperator newOperator, LocalityPrefs localityPrefs,
            Locality ltype, String host) {

        HostOperatorSet grpObj = newOperator.getGrouping(ltype);
        if (host != null) {
            grpObj.setHost(host);
        }
        Set<PTOperator> s = grpObj.getOperatorSet();
        s.add(newOperator);
        LocalityPref loc = localityPrefs.prefs.get(pnodes);
        if (loc != null) {
            for (PMapping localPM : loc.operators) {
                if (pnodes.parallelPartitions == localPM.parallelPartitions) {
                    if (localPM.partitions.size() >= pnodes.partitions.size()) {
                        // apply locality setting per partition
                        s.addAll(localPM.partitions.get(pnodes.partitions.size() - 1).getGrouping(ltype)
                                .getOperatorSet());
                    }
                } else {
                    for (PTOperator otherNode : localPM.partitions) {
                        s.addAll(otherNode.getGrouping(ltype).getOperatorSet());
                    }
                }
            }
            for (PTOperator localOper : s) {
                if (grpObj.getHost() == null) {
                    grpObj.setHost(localOper.groupings.get(ltype).getHost());
                }
                localOper.groupings.put(ltype, grpObj);

            }
        }
    }

    private List<InputPort<?>> getInputPortList(LogicalPlan.OperatorMeta operatorMeta) {
        List<InputPort<?>> inputPortList = Lists.newArrayList();

        for (InputPortMeta inputPortMeta : operatorMeta.getInputStreams().keySet()) {
            inputPortList.add(inputPortMeta.getPortObject());
        }

        return inputPortList;
    }

    void removePTOperator(PTOperator oper) {
        LOG.debug("Removing operator " + oper);

        // per partition merge operators
        if (!oper.upstreamMerge.isEmpty()) {
            for (PTOperator unifier : oper.upstreamMerge.values()) {
                removePTOperator(unifier);
            }
        }

        // remove inputs from downstream operators
        for (PTOutput out : oper.outputs) {
            for (PTInput sinkIn : out.sinks) {
                if (sinkIn.source.source == oper) {
                    ArrayList<PTInput> cowInputs = Lists.newArrayList(sinkIn.target.inputs);
                    cowInputs.remove(sinkIn);
                    sinkIn.target.inputs = cowInputs;
                }
            }
        }
        // remove from upstream operators
        for (PTInput in : oper.inputs) {
            in.source.sinks.remove(in);
        }

        for (HostOperatorSet s : oper.groupings.values()) {
            s.getOperatorSet().remove(oper);
        }

        // remove checkpoint states
        try {
            synchronized (oper.checkpoints) {
                for (Checkpoint checkpoint : oper.checkpoints) {
                    oper.operatorMeta.getValue(OperatorContext.STORAGE_AGENT).delete(oper.id, checkpoint.windowId);
                }
            }
        } catch (IOException e) {
            LOG.warn("Failed to remove state for " + oper, e);
        }

        List<PTOperator> cowList = Lists.newArrayList(oper.container.operators);
        cowList.remove(oper);
        oper.container.operators = cowList;
        this.deployOpers.remove(oper);
        this.undeployOpers.add(oper);
        this.allOperators.remove(oper.id);
        this.ctx.recordEventAsync(new StramEvent.RemoveOperatorEvent(oper.getName(), oper.getId()));
    }

    public PlanContext getContext() {
        return ctx;
    }

    public LogicalPlan getLogicalPlan() {
        return this.dag;
    }

    public List<PTContainer> getContainers() {
        return this.containers;
    }

    public Map<Integer, PTOperator> getAllOperators() {
        return this.allOperators;
    }

    /**
     * Get the partitions for the logical operator.
     * Partitions represent instances of the operator and do not include any unifiers.
     * @param logicalOperator
     * @return
     */
    public List<PTOperator> getOperators(OperatorMeta logicalOperator) {
        return this.logicalToPTOperator.get(logicalOperator).partitions;
    }

    public Collection<PTOperator> getAllOperators(OperatorMeta logicalOperator) {
        return this.logicalToPTOperator.get(logicalOperator).getAllOperators();
    }

    public List<PTOperator> getLeafOperators() {
        List<PTOperator> operators = new ArrayList<>();
        for (OperatorMeta opMeta : dag.getLeafOperators()) {
            operators.addAll(getAllOperators(opMeta));
        }
        return operators;
    }

    public boolean hasMapping(OperatorMeta om) {
        return this.logicalToPTOperator.containsKey(om);
    }

    // used for testing only
    @VisibleForTesting
    public List<PTOperator> getMergeOperators(OperatorMeta logicalOperator) {
        List<PTOperator> opers = Lists.newArrayList();
        for (StreamMapping ug : this.logicalToPTOperator.get(logicalOperator).outputStreams.values()) {
            ug.addTo(opers);
        }
        return opers;
    }

    protected List<OperatorMeta> getRootOperators() {
        return dag.getRootOperators();
    }

    private void getDeps(PTOperator operator, Set<PTOperator> visited) {
        visited.add(operator);
        for (PTInput in : operator.inputs) {
            if (in.source.isDownStreamInline()) {
                PTOperator sourceOperator = in.source.source;
                if (!visited.contains(sourceOperator)) {
                    getDeps(sourceOperator, visited);
                }
            }
        }
        // downstream traversal
        for (PTOutput out : operator.outputs) {
            for (PTInput sink : out.sinks) {
                PTOperator sinkOperator = sink.target;
                if (!visited.contains(sinkOperator)) {
                    getDeps(sinkOperator, visited);
                }
            }
        }
    }

    /**
     * Get all operator instances that depend on the specified operator instance(s).
     * Dependencies are all downstream and upstream inline operators.
     * @param operators
     * @return
     */
    public Set<PTOperator> getDependents(Collection<PTOperator> operators) {
        Set<PTOperator> visited = new LinkedHashSet<PTOperator>();
        if (operators != null) {
            for (PTOperator operator : operators) {
                getDeps(operator, visited);
            }
        }
        visited.addAll(getDependentPersistOperators(operators));
        return visited;
    }

    private Set<PTOperator> getDependentPersistOperators(Collection<PTOperator> operators) {
        Set<PTOperator> persistOperators = new LinkedHashSet<PTOperator>();
        if (operators != null) {
            for (PTOperator operator : operators) {
                for (PTInput in : operator.inputs) {
                    if (in.logicalStream.getPersistOperator() != null) {
                        for (InputPortMeta inputPort : in.logicalStream.getSinksToPersist()) {
                            if (inputPort.getOperatorWrapper().equals(operator.operatorMeta)) {
                                // Redeploy the stream wide persist operator only if the current sink is being persisted
                                persistOperators.addAll(getOperators(in.logicalStream.getPersistOperator()));
                                break;
                            }
                        }
                    }
                    for (Entry<InputPortMeta, OperatorMeta> entry : in.logicalStream.sinkSpecificPersistOperatorMap
                            .entrySet()) {
                        // Redeploy sink specific persist operators
                        persistOperators.addAll(getOperators(entry.getValue()));
                    }
                }
            }
        }
        return persistOperators;
    }

    /**
     * Add logical operator to the plan. Assumes that upstream operators have been added before.
     * @param om
     */
    public final void addLogicalOperator(OperatorMeta om) {
        PMapping pnodes = new PMapping(om);
        String host = pnodes.logicalOperator.getValue(OperatorContext.LOCALITY_HOST);
        localityPrefs.add(pnodes, host);

        PMapping upstreamPartitioned = null;

        for (Map.Entry<LogicalPlan.InputPortMeta, StreamMeta> e : om.getInputStreams().entrySet()) {
            if (e.getValue().getSource().getOperatorMeta().getOperator() instanceof Operator.DelayOperator) {
                continue;
            }
            PMapping m = logicalToPTOperator.get(e.getValue().getSource().getOperatorMeta());
            if (e.getKey().getValue(PortContext.PARTITION_PARALLEL).equals(true)) {
                // operator partitioned with upstream
                if (upstreamPartitioned != null) {
                    // need to have common root
                    if (!upstreamPartitioned.parallelPartitions.contains(m.logicalOperator)
                            && upstreamPartitioned != m) {
                        String msg = String.format("operator cannot extend multiple partitions (%s and %s)",
                                upstreamPartitioned.logicalOperator, m.logicalOperator);
                        throw new AssertionError(msg);
                    }
                }
                m.parallelPartitions.add(pnodes.logicalOperator);
                pnodes.parallelPartitions = m.parallelPartitions;
                upstreamPartitioned = m;
            }

            if (Locality.CONTAINER_LOCAL == e.getValue().getLocality()
                    || Locality.THREAD_LOCAL == e.getValue().getLocality()) {
                inlinePrefs.setLocal(m, pnodes);
            } else if (Locality.NODE_LOCAL == e.getValue().getLocality()) {
                localityPrefs.setLocal(m, pnodes);
            }
        }

        //
        // create operator instances
        //
        this.logicalToPTOperator.put(om, pnodes);
        if (upstreamPartitioned != null) {
            // parallel partition
            //LOG.debug("Operator {} should be partitioned to {} partitions", pnodes.logicalOperator.getName(), upstreamPartitioned.partitions.size());
            initPartitioning(pnodes, upstreamPartitioned.partitions.size());
        } else {
            initPartitioning(pnodes, 0);
        }
        updateStreamMappings(pnodes);
    }

    /**
     * Remove physical representation of given stream. Operators that are affected
     * in the execution layer will be added to the set. This method does not
     * automatically remove operators from the plan.
     *
     * @param sm
     */
    public void removeLogicalStream(StreamMeta sm) {
        // remove incoming connections for logical stream
        for (InputPortMeta ipm : sm.getSinks()) {
            OperatorMeta om = ipm.getOperatorWrapper();
            PMapping m = this.logicalToPTOperator.get(om);
            if (m == null) {
                throw new AssertionError("Unknown operator " + om);
            }
            for (PTOperator oper : m.partitions) {
                List<PTInput> inputsCopy = Lists.newArrayList(oper.inputs);
                for (PTInput input : oper.inputs) {
                    if (input.logicalStream == sm) {
                        input.source.sinks.remove(input);
                        inputsCopy.remove(input);
                        undeployOpers.add(oper);
                        deployOpers.add(oper);
                    }
                }
                oper.inputs = inputsCopy;
            }
        }
        // remove outgoing connections for logical stream
        PMapping m = this.logicalToPTOperator.get(sm.getSource().getOperatorMeta());
        for (PTOperator oper : m.partitions) {
            List<PTOutput> outputsCopy = Lists.newArrayList(oper.outputs);
            for (PTOutput out : oper.outputs) {
                if (out.logicalStream == sm) {
                    for (PTInput input : out.sinks) {
                        PTOperator downstreamOper = input.source.source;
                        downstreamOper.inputs.remove(input);
                        Set<PTOperator> deps = this.getDependents(Collections.singletonList(downstreamOper));
                        undeployOpers.addAll(deps);
                        deployOpers.addAll(deps);
                    }
                    outputsCopy.remove(out);
                    undeployOpers.add(oper);
                    deployOpers.add(oper);
                }
            }
            oper.outputs = outputsCopy;
        }
    }

    /**
     * Connect operators through stream. Currently new stream will not affect locality.
     * @param ipm Meta information about the input port
     */
    public void connectInput(InputPortMeta ipm) {
        for (Map.Entry<LogicalPlan.InputPortMeta, StreamMeta> inputEntry : ipm.getOperatorWrapper()
                .getInputStreams().entrySet()) {
            if (inputEntry.getKey() == ipm) {
                // initialize outputs for existing operators
                for (Map.Entry<LogicalPlan.OutputPortMeta, StreamMeta> outputEntry : inputEntry.getValue()
                        .getSource().getOperatorMeta().getOutputStreams().entrySet()) {
                    PMapping sourceOpers = this.logicalToPTOperator.get(outputEntry.getKey().getOperatorMeta());
                    for (PTOperator oper : sourceOpers.partitions) {
                        setupOutput(sourceOpers, oper, outputEntry); // idempotent
                        undeployOpers.add(oper);
                        deployOpers.add(oper);
                    }
                }
                PMapping m = this.logicalToPTOperator.get(ipm.getOperatorWrapper());
                updateStreamMappings(m);
                for (PTOperator oper : m.partitions) {
                    undeployOpers.add(oper);
                    deployOpers.add(oper);
                }
            }
        }
    }

    /**
     * Remove all physical operators for the given logical operator.
     * All connected streams must have been previously removed.
     * @param om
     */
    public void removeLogicalOperator(OperatorMeta om) {
        PMapping opers = this.logicalToPTOperator.get(om);
        if (opers == null) {
            throw new AssertionError("Operator not in physical plan: " + om.getName());
        }

        for (PTOperator oper : opers.partitions) {
            removePartition(oper, opers);
        }

        for (StreamMapping ug : opers.outputStreams.values()) {
            for (PTOperator oper : ug.cascadingUnifiers) {
                removePTOperator(oper);
            }
            if (ug.finalUnifier != null) {
                removePTOperator(ug.finalUnifier);
            }
        }

        LinkedHashMap<OperatorMeta, PMapping> copyMap = Maps.newLinkedHashMap(this.logicalToPTOperator);
        copyMap.remove(om);
        this.logicalToPTOperator = copyMap;
    }

    public void setAvailableResources(int memoryMB) {
        this.availableMemoryMB = memoryMB;
    }

    public void onStatusUpdate(PTOperator oper) {
        for (StatsListener l : oper.statsListeners) {
            final StatsListener.Response rsp = l.processStats(oper.stats);
            if (rsp != null) {
                //LOG.debug("Response to processStats = {}", rsp.repartitionRequired);
                oper.loadIndicator = rsp.loadIndicator;
                if (rsp.repartitionRequired) {
                    final OperatorMeta om = oper.getOperatorMeta();
                    // concurrent heartbeat processing
                    if (this.pendingRepartition.putIfAbsent(om, om) != null) {
                        LOG.debug("Skipping repartitioning for {} load {}", oper, oper.loadIndicator);
                    } else {
                        LOG.debug("Scheduling repartitioning for {} load {}", oper, oper.loadIndicator);
                        // hand over to monitor thread
                        Runnable r = new Runnable() {
                            @Override
                            public void run() {
                                redoPartitions(logicalToPTOperator.get(om), rsp.repartitionNote);
                                pendingRepartition.remove(om);
                            }
                        };
                        ctx.dispatch(r);
                    }
                }
                if (rsp.operatorRequests != null) {
                    for (OperatorRequest cmd : rsp.operatorRequests) {
                        StramToNodeRequest request = new StramToNodeRequest();
                        request.operatorId = oper.getId();
                        request.requestType = StramToNodeRequest.RequestType.CUSTOM;
                        request.cmd = cmd;
                        ctx.addOperatorRequest(oper, request);
                    }
                }
                // for backward compatibility
                if (rsp.operatorCommands != null) {
                    for (@SuppressWarnings("deprecation")
                    com.datatorrent.api.StatsListener.OperatorCommand cmd : rsp.operatorCommands) {
                        StramToNodeRequest request = new StramToNodeRequest();
                        request.operatorId = oper.getId();
                        request.requestType = StramToNodeRequest.RequestType.CUSTOM;
                        OperatorCommandConverter converter = new OperatorCommandConverter();
                        converter.cmd = cmd;
                        request.cmd = converter;
                        ctx.addOperatorRequest(oper, request);
                    }
                }
            }
        }
    }

    /**
     * Read available checkpoints from storage agent for all operators.
     * @param startTime
     * @param currentTime
     * @throws IOException
     */
    public void syncCheckpoints(long startTime, long currentTime) throws IOException {
        for (PTOperator oper : getAllOperators().values()) {
            StorageAgent sa = oper.operatorMeta.getValue(OperatorContext.STORAGE_AGENT);
            long[] windowIds = sa.getWindowIds(oper.getId());
            Arrays.sort(windowIds);
            oper.checkpoints.clear();
            for (long wid : windowIds) {
                if (wid != Stateless.WINDOW_ID) {
                    oper.addCheckpoint(wid, startTime);
                }
            }
        }
    }

    public Integer getStreamCodecIdentifier(StreamCodec<?> streamCodecInfo) {
        Integer id;
        synchronized (streamCodecIdentifiers) {
            id = streamCodecIdentifiers.get(streamCodecInfo);
            if (id == null) {
                id = strCodecIdSequence.incrementAndGet();
                streamCodecIdentifiers.put(streamCodecInfo, id);
            }
        }
        return id;
    }

    @VisibleForTesting
    public Map<StreamCodec<?>, Integer> getStreamCodecIdentifiers() {
        return Collections.unmodifiableMap(streamCodecIdentifiers);
    }

    /**
     * This is for backward compatibility
     */
    public static class OperatorCommandConverter implements OperatorRequest, Serializable {
        private static final long serialVersionUID = 1L;
        @SuppressWarnings("deprecation")
        public com.datatorrent.api.StatsListener.OperatorCommand cmd;

        @SuppressWarnings("deprecation")
        @Override
        public StatsListener.OperatorResponse execute(Operator operator, int operatorId, long windowId)
                throws IOException {
            cmd.execute(operator, operatorId, windowId);
            return null;
        }
    }

}