org.apache.hadoop.corona.NodeManager.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.corona.NodeManager.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.corona;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.net.TopologyCache;
import org.apache.hadoop.util.CoronaSerializer;
import org.apache.hadoop.util.HostsFileReader;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.JsonToken;

/**
 * Manages all the nodes known in the cluster.
 */
public class NodeManager implements Configurable {
    /** Class logger */
    public static final Log LOG = LogFactory.getLog(NodeManager.class);

    /** Configuration. */
    protected CoronaConf conf;
    /** The Cluster Manager. */
    protected ClusterManager clusterManager;

    /**
     * Secondary index on nodes. This is an index of runnable nodes for a resource
     * type. There is one instance of this for each resource type.
     */
    public class RunnableIndices {
        /** Controls how frequently we shuffle the list of rack-runnable nodes. */
        private static final int RACK_SHUFFLE_PERIOD = 100;

        /** The lookup table of requested node for host */
        protected ConcurrentMap<String, RequestedNode> hostToRequestedNode = new ConcurrentHashMap<String, RequestedNode>();

        /** The lookup table of runnable nodes on hosts */
        protected ConcurrentMap<String, NodeContainer> hostToRunnableNodes = new ConcurrentHashMap<String, NodeContainer>();

        /** The lookup table of runnable nodes in racks */
        protected ConcurrentMap<Node, NodeContainer> rackToRunnableNodes = new ConcurrentHashMap<Node, NodeContainer>();

        /** Number of nodes that are still runnable */
        private AtomicInteger hostsWithRunnableNodes = new AtomicInteger(0);

        /** The type of resource this RunnableIndices is tracking */
        private final ResourceType type;

        /**
         * Counter for checking if we need to shuffle the list of rack-runnable
         * nodes.
         */
        private int getRunnableNodeForRackCounter = 0;

        /**
         * Create a runnable indices for a given resource type
         * @param type the type of resource
         */
        public RunnableIndices(ResourceType type) {
            this.type = type;
        }

        /**
         * Get any runnable node that is not one of the excluded nodes
         * @param excluded the list of nodes to ignore
         * @return the runnable node, null if no runnable node can be found
         */
        public ClusterNode getRunnableNodeForAny(Set<String> excluded) {
            double avgLoad = loadManager.getAverageLoad(type);
            // Make two passes over the nodes. In the first pass, try to find a
            // node that has lower than average number of grants on it. If that does
            // not find a node, try looking at all nodes.
            for (int pass = 0; pass < 2; pass++) {
                for (Map.Entry<String, NodeContainer> e : hostToRunnableNodes.entrySet()) {
                    NodeContainer nodeContainer = e.getValue();
                    if (nodeContainer == null) {
                        continue;
                    }
                    synchronized (nodeContainer) {
                        if (nodeContainer.isEmpty()) {
                            continue;
                        }
                        for (ClusterNode node : nodeContainer) {
                            if (excluded == null || !excluded.contains(node.getHost())) {
                                if (resourceLimit.hasEnoughResource(node)) {
                                    // When pass == 0, try to average out the load.
                                    if (pass == 0) {
                                        if (node.getGrantCount(type) < avgLoad) {
                                            return node;
                                        }
                                    } else {
                                        return node;
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return null;
        }

        /**
         * Get runnable node local to the given host
         * @param requestedNode the requested node that needs local scheduling
         * @return the node that is local to the host, null if
         * there are no runnable nodes local to the host
         */
        public ClusterNode getRunnableNodeForHost(RequestedNode requestedNode) {
            // there should only be one node per host in the common case
            NodeContainer nodeContainer = requestedNode.getHostNodes();
            if (nodeContainer == null) {
                return null;
            }
            synchronized (nodeContainer) {
                if (nodeContainer.isEmpty()) {
                    return null;
                }
                for (ClusterNode node : nodeContainer) {
                    if (resourceLimit.hasEnoughResource(node)) {
                        return node;
                    }
                }
            }
            return null;

        }

        /**
         * Get a runnable node in the given rack that is not present in the
         * excluded list
         * @param requestedNode the node to look up rack locality for
         * @param excluded the list of nodes to ignore
         * @return the runnable node from the rack satisfying conditions, null if
         * the node was not found
         */
        public ClusterNode getRunnableNodeForRack(RequestedNode requestedNode, Set<String> excluded) {

            NodeContainer nodeContainer = requestedNode.getRackNodes();
            getRunnableNodeForRackCounter += 1;
            if (nodeContainer == null) {
                return null;
            }
            synchronized (nodeContainer) {
                if (nodeContainer.isEmpty()) {
                    return null;
                }
                if (getRunnableNodeForRackCounter % RACK_SHUFFLE_PERIOD == 0) {
                    // This balances more evenly across nodes in a rack
                    nodeContainer.shuffle();
                }
                for (ClusterNode node : nodeContainer) {
                    if (excluded == null || !excluded.contains(node.getHost())) {
                        if (resourceLimit.hasEnoughResource(node)) {
                            return node;
                        }
                    }
                }
            }
            return null;

        }

        /**
         * Check if there are any runnable nodes
         * @return true if there are any runnable nodes, false otherwise
         */
        public boolean existRunnableNodes() {
            return hostsWithRunnableNodes.get() > 0;
        }

        /**
         * Return an existing NodeContainer representing the node or if it
         * does not exist - create a new NodeContainer and return it.
         *
         * @param host the host to get the node container for
         * @return the node container representing this host
         */
        private NodeContainer getOrCreateHostRunnableNode(String host) {
            NodeContainer nodeContainer = hostToRunnableNodes.get(host);
            if (nodeContainer == null) {
                nodeContainer = new NodeContainer();
                NodeContainer oldList = hostToRunnableNodes.putIfAbsent(host, nodeContainer);
                if (oldList != null) {
                    nodeContainer = oldList;
                }
            }
            return nodeContainer;
        }

        /**
         * Return an existing NodeContainer representing the rack or if it
         * does not exist - create a new NodeContainer and return it.
         *
         * @param rack the rack to return the node container for
         * @return the node container representing the rack
         */
        private NodeContainer getOrCreateRackRunnableNode(Node rack) {
            NodeContainer nodeContainer = rackToRunnableNodes.get(rack);
            if (nodeContainer == null) {
                nodeContainer = new NodeContainer();
                NodeContainer oldList = rackToRunnableNodes.putIfAbsent(rack, nodeContainer);
                if (oldList != null) {
                    nodeContainer = oldList;
                }
            }
            return nodeContainer;
        }

        /**
         * Return a RequestedNode for a given host.
         * Returns a RequestedNode representing a given host by either getting
         * and existing RequestedNode or creating a new one.
         *
         * @param host the host to get the RequestedNode for
         * @return the RequestedNode object representing the host
         */
        private RequestedNode getOrCreateRequestedNode(String host) {
            RequestedNode node = hostToRequestedNode.get(host);
            if (node == null) {
                NodeContainer nodeRunnables = getOrCreateHostRunnableNode(host);
                Node rack = topologyCache.getNode(host).getParent();
                NodeContainer rackRunnables = getOrCreateRackRunnableNode(rack);
                node = new RequestedNode(type, host, rack, nodeRunnables, rackRunnables);
                RequestedNode oldNode = hostToRequestedNode.putIfAbsent(host, node);
                if (oldNode != null) {
                    node = oldNode;
                }
            }
            return node;
        }

        /**
         * Add a node to the runnable indices
         * @param clusterNode the node to add
         */
        public void addRunnable(ClusterNode clusterNode) {
            String host = clusterNode.getHost();

            if (LOG.isDebugEnabled()) {
                LOG.debug(clusterNode.getName() + " added to runnable list for type: " + type);
            }

            NodeContainer nodeContainer = getOrCreateHostRunnableNode(host);
            synchronized (nodeContainer) {
                nodeContainer.addNode(clusterNode);
                hostsWithRunnableNodes.incrementAndGet();
            }

            Node rack = clusterNode.hostNode.getParent();
            nodeContainer = getOrCreateRackRunnableNode(rack);
            synchronized (nodeContainer) {
                nodeContainer.addNode(clusterNode);
            }

        }

        /**
         * Remove the node from the runnable indices
         * @param node node to remove
         */
        public void deleteRunnable(ClusterNode node) {
            String host = node.getHost();

            if (LOG.isDebugEnabled()) {
                LOG.debug(node.getName() + " deleted from runnable list for type: " + type);
            }

            NodeContainer nodeContainer = hostToRunnableNodes.get(host);
            if (nodeContainer != null) {
                synchronized (nodeContainer) {
                    if (nodeContainer.removeNode(node)) {
                        /**
                         * We are not removing the nodeContainer from runnable nodes map
                         * since we are synchronizing operations with runnable indices
                         * on it
                         */
                        hostsWithRunnableNodes.decrementAndGet();
                    }
                }
            }

            Node rack = node.hostNode.getParent();

            nodeContainer = rackToRunnableNodes.get(rack);
            if (nodeContainer != null) {
                synchronized (nodeContainer) {
                    /**
                     * We are not removing the nodeContainer from runnable nodes map
                     * since we are synchronizing operations with runnable indices
                     * on it
                     */
                    nodeContainer.removeNode(node);
                }
            }
        }

        /**
         * Checks if a node is present as runnable in this index. Should be called
         * while holding the node lock.
         * @param clusterNode The node.
         * @return A boolean indicating if the node is present.
         */
        public boolean hasRunnable(ClusterNode clusterNode) {
            String host = clusterNode.getHost();
            NodeContainer nodeContainer = hostToRunnableNodes.get(host);
            return (nodeContainer != null) && !nodeContainer.isEmpty();
        }

        /**
         * Create a snapshot of runnable nodes.
         * @return The snapshot.
         */
        public NodeSnapshot getNodeSnapshot() {
            int nodeCount = 0;
            Map<String, NodeContainer> hostRunnables = new HashMap<String, NodeContainer>();
            for (Map.Entry<String, NodeContainer> entry : hostToRunnableNodes.entrySet()) {
                NodeContainer value = entry.getValue();
                synchronized (value) {
                    if (!value.isEmpty()) {
                        hostRunnables.put(entry.getKey(), value.copy());
                        nodeCount += value.size();
                    }
                }
            }
            Map<Node, NodeContainer> rackRunnables = new HashMap<Node, NodeContainer>();
            for (Map.Entry<Node, NodeContainer> entry : rackToRunnableNodes.entrySet()) {
                NodeContainer value = entry.getValue();
                synchronized (value) {
                    if (!value.isEmpty()) {
                        rackRunnables.put(entry.getKey(), value.copy());
                    }
                }
            }
            return new NodeSnapshot(topologyCache, hostRunnables, rackRunnables, nodeCount);
        }
    }

    /** primary data structure mapping the unique name of the
     node to the node object */
    protected ConcurrentMap<String, ClusterNode> nameToNode = new ConcurrentHashMap<String, ClusterNode>();

    /** The registry of sessions running on the nodes */
    protected ConcurrentMap<ClusterNode, Set<String>> hostsToSessions = new ConcurrentHashMap<ClusterNode, Set<String>>();

    /** Tracks the applications active on the node. */
    protected ConcurrentMap<String, Map<ResourceType, String>> nameToApps = new ConcurrentHashMap<String, Map<ResourceType, String>>();

    /** Fault manager for the nodes */
    protected final FaultManager faultManager;

    /** secondary indices maintained for each resource type */
    protected Map<ResourceType, RunnableIndices> typeToIndices = new EnumMap<ResourceType, RunnableIndices>(
            ResourceType.class);

    /** Track the load on nodes. */
    protected LoadManager loadManager;

    /** The cache for local node lookups */
    protected TopologyCache topologyCache;
    /** The configuration of resources based on the CPUs */
    protected Map<Integer, Map<ResourceType, Integer>> cpuToResourcePartitioning;
    /** Shutdown flag */
    protected volatile boolean shutdown = false;

    /** The time before the node is declared dead if it doesn't heartbeat */
    protected int nodeExpiryInterval;
    /** A thread running expireNodes */
    protected Thread expireNodesThread = null;
    /** A runnable that is responsible for expiring nodes that don't heartbeat */
    private ExpireNodes expireNodes = new ExpireNodes();

    /** Resource limits. */
    private final ResourceLimit resourceLimit = new ResourceLimit();

    /** Hosts reader. */
    private final HostsFileReader hostsReader;

    /**
     * NodeManager constructor given a cluster manager and a
     * {@link HostsFileReader} for includes/excludes lists
     * @param clusterManager the cluster manager
     * @param hostsReader the host reader for includes/excludes
     */
    public NodeManager(ClusterManager clusterManager, HostsFileReader hostsReader) {
        this.hostsReader = hostsReader;
        LOG.info("Included hosts: " + hostsReader.getHostNames().size() + " Excluded hosts: "
                + hostsReader.getExcludedHosts().size());
        this.clusterManager = clusterManager;
        this.expireNodesThread = new Thread(this.expireNodes, "expireNodes");
        this.expireNodesThread.setDaemon(true);
        this.expireNodesThread.start();
        this.faultManager = new FaultManager(this);
    }

    /**
     * Constructor for the NodeManager, used when reading back the state of
     * NodeManager from disk.
     * @param clusterManager The ClusterManager instance
     * @param hostsReader The HostsReader instance
     * @param coronaSerializer The CoronaSerializer instance, which will be used
     *                         to read JSON from disk
     * @throws IOException
     */
    public NodeManager(ClusterManager clusterManager, HostsFileReader hostsReader,
            CoronaSerializer coronaSerializer) throws IOException {
        this(clusterManager, hostsReader);

        // Expecting the START_OBJECT token for nodeManager
        coronaSerializer.readStartObjectToken("nodeManager");
        readNameToNode(coronaSerializer);
        readHostsToSessions(coronaSerializer);
        readNameToApps(coronaSerializer);
        // Expecting the END_OBJECT token for ClusterManager
        coronaSerializer.readEndObjectToken("nodeManager");

        // topologyCache need not be serialized, it will eventually be rebuilt.
        // cpuToResourcePartitioning and resourceLimit need not be serialized,
        // they can be read from the conf.
    }

    /**
     * Reads the nameToNode map from the JSON stream
     * @param coronaSerializer The CoronaSerializer instance to be used to
     *                         read the JSON
     * @throws IOException
     */
    private void readNameToNode(CoronaSerializer coronaSerializer) throws IOException {
        coronaSerializer.readField("nameToNode");
        // Expecting the START_OBJECT token for nameToNode
        coronaSerializer.readStartObjectToken("nameToNode");
        JsonToken current = coronaSerializer.nextToken();
        while (current != JsonToken.END_OBJECT) {
            // nodeName is the key, and the ClusterNode is the value here
            String nodeName = coronaSerializer.getFieldName();
            ClusterNode clusterNode = new ClusterNode(coronaSerializer);
            if (!nameToNode.containsKey(nodeName)) {
                nameToNode.put(nodeName, clusterNode);
            }
            current = coronaSerializer.nextToken();
        }
        // Done with reading the END_OBJECT token for nameToNode
    }

    /**
     * Reads the hostsToSessions map from the JSON stream
     * @param coronaSerializer The CoronaSerializer instance to be used to
     *                         read the JSON
     * @throws java.io.IOException
     */
    private void readHostsToSessions(CoronaSerializer coronaSerializer) throws IOException {
        coronaSerializer.readField("hostsToSessions");
        // Expecting the START_OBJECT token for hostsToSessions
        coronaSerializer.readStartObjectToken("hostsToSessions");
        JsonToken current = coronaSerializer.nextToken();

        while (current != JsonToken.END_OBJECT) {
            String host = coronaSerializer.getFieldName();
            Set<String> sessionsSet = coronaSerializer.readValueAs(Set.class);
            hostsToSessions.put(nameToNode.get(host), sessionsSet);
            current = coronaSerializer.nextToken();
        }
    }

    /**
     * Reads the nameToApps map from the JSON stream
     * @param coronaSerializer The CoronaSerializer instance to be used to
     *                         read the JSON
     * @throws IOException
     */
    private void readNameToApps(CoronaSerializer coronaSerializer) throws IOException {
        coronaSerializer.readField("nameToApps");
        // Expecting the START_OBJECT token for nameToApps
        coronaSerializer.readStartObjectToken("nameToApps");
        JsonToken current = coronaSerializer.nextToken();

        while (current != JsonToken.END_OBJECT) {
            String nodeName = coronaSerializer.getFieldName();
            // Expecting the START_OBJECT token for the Apps
            coronaSerializer.readStartObjectToken(nodeName);
            Map<String, String> appMap = coronaSerializer.readValueAs(Map.class);
            Map<ResourceType, String> appsOnNode = new HashMap<ResourceType, String>();

            for (Map.Entry<String, String> entry : appMap.entrySet()) {
                appsOnNode.put(ResourceType.valueOf(entry.getKey()), entry.getValue());
            }

            nameToApps.put(nodeName, appsOnNode);
            current = coronaSerializer.nextToken();
        }
    }

    /**
      * See if there are any runnable nodes of a given type
      * @param type the type to look for
      * @return true if there are runnable nodes for this type, false otherwise
      */
    public boolean existRunnableNodes(ResourceType type) {
        RunnableIndices r = typeToIndices.get(type);
        return r.existRunnableNodes();
    }

    /**
     * Create node snapshot of runnable nodes of a certain type.
     * @param type The resource type
     * @return The snapshot
     */
    public NodeSnapshot getNodeSnapshot(ResourceType type) {
        return typeToIndices.get(type).getNodeSnapshot();
    }

    /**
     * Find the best matching node for this host subject to the maxLevel
     * constraint
     * @param host the host of the request
     * @param maxLevel the max locality level to consider
     * @param type the type of resource needed on the node
     * @param excluded the list of nodes to exclude from consideration
     * @return the runnable node satisfying the constraints
     */
    public ClusterNode getRunnableNode(String host, LocalityLevel maxLevel, ResourceType type,
            Set<String> excluded) {
        if (host == null) {
            RunnableIndices r = typeToIndices.get(type);
            return r.getRunnableNodeForAny(excluded);
        }
        RequestedNode node = resolve(host, type);
        return getRunnableNode(node, maxLevel, type, excluded);
    }

    /**
     * Get a runnable node.
     * @param requestedNode The request information.
     * @param maxLevel The maximum locality level that we can go to.
     * @param type The type of resource.
     * @param excluded The excluded nodes.
     * @return The runnable node that can be used.
     */
    public ClusterNode getRunnableNode(RequestedNode requestedNode, LocalityLevel maxLevel, ResourceType type,
            Set<String> excluded) {
        ClusterNode node = null;
        RunnableIndices r = typeToIndices.get(type);

        // find host local
        node = r.getRunnableNodeForHost(requestedNode);

        if (maxLevel == LocalityLevel.NODE || node != null) {
            return node;
        }
        node = r.getRunnableNodeForRack(requestedNode, excluded);

        if (maxLevel == LocalityLevel.RACK || node != null) {
            return node;
        }

        // find any node
        node = r.getRunnableNodeForAny(excluded);

        return node;
    }

    /**
     * Add a node to be managed.
     *
     * @param node Node to be managed
     * @param resourceInfos Mapping of the resource type to runnable indices
     */
    protected void addNode(ClusterNode node, Map<ResourceType, String> resourceInfos) {
        synchronized (node) {
            // 1: primary
            nameToNode.put(node.getName(), node);
            faultManager.addNode(node.getName(), resourceInfos.keySet());
            nameToApps.put(node.getName(), resourceInfos);
            hostsToSessions.put(node, new HashSet<String>());
            clusterManager.getMetrics().restartTaskTracker(1);
            setAliveDeadMetrics();

            // 2: update runnable indices
            for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) {
                ResourceType type = entry.getKey();
                if (resourceInfos.containsKey(type)) {
                    if (node.checkForGrant(Utilities.getUnitResourceRequest(type), resourceLimit)) {
                        RunnableIndices r = entry.getValue();
                        r.addRunnable(node);
                    }
                }
            }
        }
    }

    /**
     * Update the runnable status of a node based on resources available.
     * This checks both resources and slot availability.
     * @param node The node
     */
    private void updateRunnability(ClusterNode node) {
        synchronized (node) {
            for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) {
                ResourceType type = entry.getKey();
                RunnableIndices r = entry.getValue();
                ResourceRequest unitReq = Utilities.getUnitResourceRequest(type);
                boolean currentlyRunnable = r.hasRunnable(node);
                boolean shouldBeRunnable = node.checkForGrant(unitReq, resourceLimit);
                if (currentlyRunnable && !shouldBeRunnable) {
                    LOG.info("Node " + node.getName() + " is no longer " + type + " runnable");
                    r.deleteRunnable(node);
                } else if (!currentlyRunnable && shouldBeRunnable) {
                    LOG.info("Node " + node.getName() + " is now " + type + " runnable");
                    r.addRunnable(node);
                }
            }
        }
    }

    /**
     * Register a new application on the node
     * @param node the node to register on
     * @param type the type of an application
     * @param appInfo the appInfo string for the application
     */
    protected void addAppToNode(ClusterNode node, ResourceType type, String appInfo) {
        synchronized (node) {
            // Update primary index.
            Map<ResourceType, String> apps = nameToApps.get(node.getName());
            apps.put(type, appInfo);

            // Update runnable indices.
            for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) {
                if (type.equals(entry.getKey())) {
                    if (node.checkForGrant(Utilities.getUnitResourceRequest(type), resourceLimit)) {
                        RunnableIndices r = entry.getValue();
                        r.addRunnable(node);
                    }
                }
            }
        }
    }

    /**
     * Get all the sessions that have grants on the node
     * @param nodeName the name of the node
     * @return the set of session ids that are running on the node
     */
    public Set<String> getNodeSessions(String nodeName) {
        ClusterNode node = nameToNode.get(nodeName);
        if (node == null) {
            LOG.warn("Trying to get the sessions for a non-existent node " + nodeName);
            return new HashSet<String>();
        }
        synchronized (node) {
            return new HashSet<String>(hostsToSessions.get(node));
        }
    }

    /**
     * Remove the references to the session
     * @param session the session to be deleted
     */
    public void deleteSession(String session) {
        for (Set<String> sessions : hostsToSessions.values()) {
            sessions.remove(session);
        }
    }

    /**
     * Delete the node from the cluster. This happens when the node times out
     * or is being decommissioned.
     * @param nodeName the name of the node to remove
     * @return the list of grants that are running on the node
     */
    public Set<ClusterNode.GrantId> deleteNode(String nodeName) {
        ClusterNode node = nameToNode.get(nodeName);
        if (node == null) {
            LOG.warn("Trying to delete non-existent node: " + nodeName);
            return null;
        }
        return deleteNode(node);
    }

    /**
     * Delete the node from the cluster. This happens when the node times out
     * or is being decommissioned.
     * @param node the node to remove
     * @return the list of grants that are running on the node
     */
    protected Set<ClusterNode.GrantId> deleteNode(ClusterNode node) {
        synchronized (node) {
            if (node.deleted) {
                return null;
            }

            node.deleted = true;
            // 1: primary
            nameToNode.remove(node.getName());
            faultManager.deleteNode(node.getName());
            nameToApps.remove(node.getName());
            hostsToSessions.remove(node);
            setAliveDeadMetrics();

            // 2: update runnable index
            for (RunnableIndices r : typeToIndices.values()) {
                r.deleteRunnable(node);
            }
            return node.getGrants();
        }
    }

    /**
     * Remove one application type from the node. Happens when the daemon
     * responsible for handling this application type on the node goes down
     * @param nodeName the name of the node
     * @param type the type of the resource
     * @return the list of grants that belonged to the application on this node
     */
    public Set<ClusterNode.GrantId> deleteAppFromNode(String nodeName, ResourceType type) {
        ClusterNode node = nameToNode.get(nodeName);
        if (node == null) {
            LOG.warn("Trying to delete type " + type + " from non-existent node: " + nodeName);
            return null;
        }
        return deleteAppFromNode(node, type);
    }

    /**
     * Remove one application type from the node. Happens when the daemon
     * responsible for handling this application type on the node goes down
     * @param node the node
     * @param type the type of the resource
     * @return the list of grants that belonged to the application on this node
     */
    protected Set<ClusterNode.GrantId> deleteAppFromNode(ClusterNode node, ResourceType type) {
        synchronized (node) {
            if (node.deleted) {
                return null;
            }

            nameToApps.remove(node.getName());
            RunnableIndices r = typeToIndices.get(type);
            r.deleteRunnable(node);

            return node.getGrants(type);
        }
    }

    /**
     * Cancel grant on a node
     * @param nodeName the node the grant is on
     * @param sessionId the session the grant was given to
     * @param requestId the request this grant satisfied
     */
    public void cancelGrant(String nodeName, String sessionId, int requestId) {
        ClusterNode node = nameToNode.get(nodeName);
        if (node == null) {
            LOG.warn("Canceling grant for non-existent node: " + nodeName);
            return;
        }
        synchronized (node) {
            if (node.deleted) {
                LOG.warn("Canceling grant for deleted node: " + nodeName);
                return;
            }
            String hoststr = node.getClusterNodeInfo().getAddress().getHost();
            if (!canAllowNode(hoststr)) {
                LOG.warn("Canceling grant for excluded node: " + hoststr);
                return;
            }
            ResourceRequestInfo req = node.getRequestForGrant(sessionId, requestId);
            if (req != null) {
                ResourceRequest unitReq = Utilities.getUnitResourceRequest(req.getType());
                boolean previouslyRunnable = node.checkForGrant(unitReq, resourceLimit);
                node.cancelGrant(sessionId, requestId);
                loadManager.decrementLoad(req.getType());
                if (!previouslyRunnable && node.checkForGrant(unitReq, resourceLimit)) {
                    RunnableIndices r = typeToIndices.get(req.getType());
                    if (!faultManager.isBlacklisted(node.getName(), req.getType())) {
                        r.addRunnable(node);
                    }
                }
            }
        }
    }

    /**
     * Add a grant to a node
     * @param node the node the grant is on
     * @param sessionId the session the grant is given to
     * @param req the request this grant satisfies
     * @return true if the grant can be added to the node, false otherwise
     */
    public boolean addGrant(ClusterNode node, String sessionId, ResourceRequestInfo req) {
        synchronized (node) {
            if (node.deleted) {
                return false;
            }
            if (!node.checkForGrant(Utilities.getUnitResourceRequest(req.getType()), resourceLimit)) {
                return false;
            }

            node.addGrant(sessionId, req);
            loadManager.incrementLoad(req.getType());
            hostsToSessions.get(node).add(sessionId);
            if (!node.checkForGrant(Utilities.getUnitResourceRequest(req.getType()), resourceLimit)) {
                RunnableIndices r = typeToIndices.get(req.getType());
                r.deleteRunnable(node);
            }
        }
        return true;
    }

    @Override
    public void setConf(Configuration newConf) {
        this.conf = (CoronaConf) newConf;
        nodeExpiryInterval = conf.getNodeExpiryInterval();
        if (this.expireNodesThread != null) {
            this.expireNodesThread.interrupt();
        }

        loadManager = new LoadManager(this);
        topologyCache = new TopologyCache(conf);
        cpuToResourcePartitioning = conf.getCpuToResourcePartitioning();

        for (Map.Entry<Integer, Map<ResourceType, Integer>> entry : cpuToResourcePartitioning.entrySet()) {
            for (ResourceType type : entry.getValue().keySet()) {
                if (!typeToIndices.containsKey(type)) {
                    typeToIndices.put(type, new RunnableIndices(type));
                }
            }
        }
        resourceLimit.setConf(conf);

        faultManager.setConf(conf);
    }

    /**
     *  This method rebuilds members related to the NodeManager instance, which
     *  were not directly persisted themselves.
     *  @throws IOException
     */
    public void restoreAfterSafeModeRestart() throws IOException {
        if (!clusterManager.safeMode) {
            throw new IOException(
                    "restoreAfterSafeModeRestart() called while the " + "Cluster Manager was not in Safe Mode");
        }
        // Restoring all the ClusterNode(s)
        for (ClusterNode clusterNode : nameToNode.values()) {
            restoreClusterNode(clusterNode);
        }

        // Restoring all the RequestedNodes(s)
        for (ClusterNode clusterNode : nameToNode.values()) {
            for (ResourceRequestInfo resourceRequestInfo : clusterNode.grants.values()) {
                // Fix the RequestedNode(s)
                restoreResourceRequestInfo(resourceRequestInfo);
                loadManager.incrementLoad(resourceRequestInfo.getType());
            }
        }
    }

    /**
     * This method rebuilds members related to a ResourceRequestInfo instance,
     * which were not directly persisted themselves.
     * @param resourceRequestInfo The ResourceRequestInfo instance to be restored
     */
    public void restoreResourceRequestInfo(ResourceRequestInfo resourceRequestInfo) {
        List<RequestedNode> requestedNodes = null;
        List<String> hosts = resourceRequestInfo.getHosts();
        if (hosts != null && hosts.size() > 0) {
            requestedNodes = new ArrayList<RequestedNode>(hosts.size());
            for (String host : hosts) {
                requestedNodes.add(resolve(host, resourceRequestInfo.getType()));
            }
        }
        resourceRequestInfo.nodes = requestedNodes;
    }

    private void restoreClusterNode(ClusterNode clusterNode) {
        clusterNode.hostNode = topologyCache.getNode(clusterNode.getHost());
        // This will reset the lastHeartbeatTime
        clusterNode.heartbeat(clusterNode.getClusterNodeInfo());
        clusterNode.initResourceTypeToMaxCpuMap(cpuToResourcePartitioning);
        updateRunnability(clusterNode);
    }

    @Override
    public Configuration getConf() {
        return conf;
    }

    /**
     * return true if a new node has been added - else return false
     * @param clusterNodeInfo the node that is heartbeating
     * @return true if this is a new node that has been added, false otherwise
     */
    public boolean heartbeat(ClusterNodeInfo clusterNodeInfo) throws DisallowedNode {
        ClusterNode node = nameToNode.get(clusterNodeInfo.name);
        if (!canAllowNode(clusterNodeInfo.getAddress().getHost())) {
            if (node != null) {
                node.heartbeat(clusterNodeInfo);
            } else {
                throw new DisallowedNode(clusterNodeInfo.getAddress().getHost());
            }
            return false;
        }
        boolean newNode = false;
        Map<ResourceType, String> currentResources = clusterNodeInfo.getResourceInfos();
        if (currentResources == null) {
            currentResources = new EnumMap<ResourceType, String>(ResourceType.class);
        }

        if (node == null) {
            LOG.info("Adding node with heartbeat: " + clusterNodeInfo.toString());
            node = new ClusterNode(clusterNodeInfo, topologyCache.getNode(clusterNodeInfo.address.host),
                    cpuToResourcePartitioning);
            addNode(node, currentResources);
            newNode = true;
        }

        node.heartbeat(clusterNodeInfo);

        boolean appsChanged = false;
        Map<ResourceType, String> prevResources = nameToApps.get(clusterNodeInfo.name);
        Set<ResourceType> deletedApps = null;
        for (Map.Entry<ResourceType, String> entry : prevResources.entrySet()) {
            String newAppInfo = currentResources.get(entry.getKey());
            String oldAppInfo = entry.getValue();
            if (newAppInfo == null || !newAppInfo.equals(oldAppInfo)) {
                if (deletedApps == null) {
                    deletedApps = EnumSet.noneOf(ResourceType.class);
                }
                deletedApps.add(entry.getKey());
                appsChanged = true;
            }
        }
        Map<ResourceType, String> addedApps = null;
        for (Map.Entry<ResourceType, String> entry : currentResources.entrySet()) {
            String newAppInfo = entry.getValue();
            String oldAppInfo = prevResources.get(entry.getKey());
            if (oldAppInfo == null || !oldAppInfo.equals(newAppInfo)) {
                if (addedApps == null) {
                    addedApps = new EnumMap<ResourceType, String>(ResourceType.class);
                }
                addedApps.put(entry.getKey(), entry.getValue());
                appsChanged = true;
            }
        }
        if (deletedApps != null) {
            for (ResourceType deleted : deletedApps) {
                clusterManager.nodeAppRemoved(clusterNodeInfo.name, deleted);
            }
        }
        if (addedApps != null) {
            for (Map.Entry<ResourceType, String> added : addedApps.entrySet()) {
                addAppToNode(node, added.getKey(), added.getValue());
            }
        }

        updateRunnability(node);
        return newNode || appsChanged;
    }

    /**
     * Get information about applications running on a node.
     * @param node The node.
     * @param type The type of resources.
     * @return The application-specific information
     */
    public String getAppInfo(ClusterNode node, ResourceType type) {
        Map<ResourceType, String> resourceInfos = nameToApps.get(node.getName());
        if (resourceInfos == null) {
            return null;
        } else {
            return resourceInfos.get(type);
        }
    }

    /**
     * Check if a node has enough resources.
     * @param node The node
     * @return A boolean indicating if it has enough resources.
     */
    public boolean hasEnoughResource(ClusterNode node) {
        return resourceLimit.hasEnoughResource(node);
    }

    /**
     * Expires dead nodes.
     */
    class ExpireNodes implements Runnable {

        @Override
        public void run() {
            while (!shutdown) {
                try {
                    Thread.sleep(nodeExpiryInterval / 2);

                    if (clusterManager.safeMode) {
                        // Do nothing but sleep
                        continue;
                    }

                    long now = ClusterManager.clock.getTime();
                    for (ClusterNode node : nameToNode.values()) {
                        if (now - node.lastHeartbeatTime > nodeExpiryInterval) {
                            LOG.warn("Timing out node: " + node.getName());
                            clusterManager.nodeTimeout(node.getName());
                        }
                    }

                } catch (InterruptedException iex) {
                    // ignore. if shutting down, while cond. will catch it
                    continue;
                }
            }
        }

    }

    /**
     * Used by the cm.jsp to get the list of resource types.
     *
     * @return Collection of resource types
     */
    public Collection<ResourceType> getResourceTypes() {
        return typeToIndices.keySet();
    }

    /**
     * Find capacity for a resource type.
     * @param type The resource type.
     * @return The capacity.
     */
    public int getMaxCpuForType(ResourceType type) {
        int total = 0;

        for (ClusterNode node : nameToNode.values()) {
            synchronized (node) {
                if (node.deleted) {
                    continue;
                }
                total += node.getMaxCpuForType(type);
            }
        }
        return total;
    }

    /**
     * Find allocation for a resource type.
     * @param type The resource type.
     * @return The allocation.
     */
    public int getAllocatedCpuForType(ResourceType type) {
        int total = 0;

        for (ClusterNode node : nameToNode.values()) {
            synchronized (node) {
                if (node.deleted) {
                    continue;
                }
                total += node.getAllocatedCpuForType(type);
            }
        }
        return total;
    }

    /**
     * Get a list nodes with free Cpu for a resource type
     */
    public List<String> getFreeNodesForType(ResourceType type) {
        ArrayList<String> freeNodes = new ArrayList<String>();
        for (Map.Entry<String, ClusterNode> entry : nameToNode.entrySet()) {
            ClusterNode node = entry.getValue();
            synchronized (node) {
                if (!node.deleted && node.getMaxCpuForType(type) > node.getAllocatedCpuForType(type)) {
                    freeNodes.add(entry.getKey() + ": " + node.getFree().toString());
                }
            }
        }
        return freeNodes;
    }

    /**
     * @return The total number of configured hosts.
     */
    public int getTotalNodeCount() {
        return hostsReader.getHosts().size();
    }

    /**
     * @return All the configured hosts.
     */
    public Set<String> getAllNodes() {
        return hostsReader.getHostNames();
    }

    /**
     * @return The number of excluded hosts.
     */
    public int getExcludedNodeCount() {
        return hostsReader.getExcludedHosts().size();
    }

    /**
     * @return The excluded hosts.
     */
    public Set<String> getExcludedNodes() {
        return hostsReader.getExcludedHosts();
    }

    /**
     * @return The number of alive nodes.
     */
    public int getAliveNodeCount() {
        return nameToNode.size();
    }

    /**
     * @return The alive nodes.
     */
    public List<String> getAliveNodes() {
        return new ArrayList<String>(nameToNode.keySet());
    }

    /**
     * @return The alive nodes.
     */
    public List<ClusterNode> getAliveClusterNodes() {
        return new ArrayList<ClusterNode>(nameToNode.values());
    }

    /**
     * @return The fault manager.
     */
    public FaultManager getFaultManager() {
        return faultManager;
    }

    /**
     * Refresh the includes/excludes information.
     * @throws IOException
     */
    public synchronized void refreshNodes() throws IOException {
        hostsReader.refresh();
        LOG.info("After refresh Included hosts: " + hostsReader.getHostNames().size() + " Excluded hosts: "
                + hostsReader.getExcludedHosts().size());
        Set<String> newHosts = hostsReader.getHostNames();
        Set<String> newExcludes = hostsReader.getExcludedHosts();
        Set<ClusterNode> hostsToExclude = new HashSet<ClusterNode>();
        for (ClusterNode tmpNode : nameToNode.values()) {
            String host = tmpNode.getHost();
            // Check if not included or explicitly excluded.
            if (!newHosts.contains(host) || newExcludes.contains(host)) {
                hostsToExclude.add(tmpNode);
            }
        }
        for (ClusterNode node : hostsToExclude) {
            synchronized (node) {
                for (Map.Entry<ResourceType, RunnableIndices> entry : typeToIndices.entrySet()) {
                    ResourceType type = entry.getKey();
                    RunnableIndices r = entry.getValue();
                    if (r.hasRunnable(node)) {
                        LOG.info("Node " + node.getName() + " is no longer " + type
                                + " runnable because it is excluded");
                        r.deleteRunnable(node);
                    }
                }
            }
        }
    }

    /**
     * Process feedback about nodes.
     * @param handle The session handle.
     * @param resourceTypes The types of resource this feedback is about.
     * @param reportList The list of reports.
     */
    public void nodeFeedback(String handle, List<ResourceType> resourceTypes, List<NodeUsageReport> reportList) {
        // Iterate over each report.
        for (NodeUsageReport usageReport : reportList) {
            faultManager.nodeFeedback(usageReport.getNodeName(), resourceTypes, usageReport);
        }
    }

    /**
     * Blacklist a resource on a node.
     * @param nodeName The node name
     * @param resourceType The resource type.
     */
    void blacklistNode(String nodeName, ResourceType resourceType) {
        LOG.info("Node " + nodeName + " has been blacklisted for resource " + resourceType);
        clusterManager.getMetrics().setBlacklistedNodes(faultManager.getBlacklistedNodeCount());
        deleteAppFromNode(nodeName, resourceType);
    }

    /**
     * Checks if a host is allowed to communicate with the cluster manager.
     *
     * @param host
     *          The host
     * @return a boolean indicating if the host is allowed.
     */
    private boolean canAllowNode(String host) {
        return hostsReader.isAllowedHost(host);
    }

    /**
     * Update metrics for alive/dead nodes.
     */
    private void setAliveDeadMetrics() {
        clusterManager.getMetrics().setAliveNodes(nameToNode.size());
        int totalHosts = hostsReader.getHosts().size();
        if (totalHosts > 0) {
            clusterManager.getMetrics().setDeadNodes(totalHosts - nameToNode.size());
        }
    }

    /**
     * Resolve a host name.
     * @param host The host.
     * @param type The resource type.
     * @return The resolved form.
     */
    public RequestedNode resolve(String host, ResourceType type) {
        RunnableIndices indices = typeToIndices.get(type);
        return indices.getOrCreateRequestedNode(host);
    }

    public ResourceLimit getResourceLimit() {
        return resourceLimit;
    }

    /**
     * This is required when we come out of safe mode, and we need to reset
     * the lastHeartbeatTime for each node
     */
    public void resetNodesLastHeartbeatTime() {
        long now = ClusterManager.clock.getTime();
        for (ClusterNode node : nameToNode.values()) {
            node.lastHeartbeatTime = now;
        }
    }

    /**
     * This method writes the state of the NodeManager to disk
     * @param jsonGenerator The instance of JsonGenerator, which will be used to
     *                      write JSON to disk
     * @throws IOException
     */
    public void write(JsonGenerator jsonGenerator) throws IOException {
        jsonGenerator.writeStartObject();

        // nameToNode begins
        jsonGenerator.writeFieldName("nameToNode");
        jsonGenerator.writeStartObject();
        for (Map.Entry<String, ClusterNode> entry : nameToNode.entrySet()) {
            jsonGenerator.writeFieldName(entry.getKey());
            entry.getValue().write(jsonGenerator);
        }
        jsonGenerator.writeEndObject();
        // nameToNode ends

        // hostsToSessions begins
        // We create a new Map of type <ClusterNode.name, Set<SessionIds>>.
        // The original hostsToSessions map has the ClusterNode as its key, and
        // we do not need to persist the entire ClusterNode again, since we have
        // already done that with nameToNode.
        Map<String, Set<String>> hostsToSessionsMap = new HashMap<String, Set<String>>();
        for (Map.Entry<ClusterNode, Set<String>> entry : hostsToSessions.entrySet()) {
            hostsToSessionsMap.put(entry.getKey().getName(), entry.getValue());
        }
        jsonGenerator.writeObjectField("hostsToSessions", hostsToSessionsMap);
        // hostsToSessions ends

        jsonGenerator.writeObjectField("nameToApps", nameToApps);

        // faultManager is not required

        // We can rebuild the loadManager
        jsonGenerator.writeEndObject();
    }
}