Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentSkipListMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.UpdateContainerRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeResourceUpdateEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeResourceUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.utils.Lock; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; import com.google.common.annotations.VisibleForTesting; @LimitedPrivate("yarn") @Evolving @SuppressWarnings("unchecked") public class FifoScheduler extends AbstractYarnScheduler<FiCaSchedulerApp, FiCaSchedulerNode> implements Configurable { private static final Log LOG = LogFactory.getLog(FifoScheduler.class); private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); Configuration conf; private boolean usePortForNodeName; private ActiveUsersManager activeUsersManager; private static final String DEFAULT_QUEUE_NAME = "default"; private QueueMetrics metrics; private final ResourceCalculator resourceCalculator = new DefaultResourceCalculator(); private final Queue DEFAULT_QUEUE = new Queue() { @Override public String getQueueName() { return DEFAULT_QUEUE_NAME; } @Override public QueueMetrics getMetrics() { return metrics; } @Override public QueueInfo getQueueInfo(boolean includeChildQueues, boolean recursive) { QueueInfo queueInfo = recordFactory.newRecordInstance(QueueInfo.class); queueInfo.setQueueName(DEFAULT_QUEUE.getQueueName()); queueInfo.setCapacity(1.0f); if (clusterResource.getMemorySize() == 0) { queueInfo.setCurrentCapacity(0.0f); } else { queueInfo .setCurrentCapacity((float) usedResource.getMemorySize() / clusterResource.getMemorySize()); } queueInfo.setMaximumCapacity(1.0f); queueInfo.setChildQueues(new ArrayList<QueueInfo>()); queueInfo.setQueueState(QueueState.RUNNING); return queueInfo; } public Map<QueueACL, AccessControlList> getQueueAcls() { Map<QueueACL, AccessControlList> acls = new HashMap<QueueACL, AccessControlList>(); for (QueueACL acl : QueueACL.values()) { acls.put(acl, new AccessControlList("*")); } return acls; } @Override public List<QueueUserACLInfo> getQueueUserAclInfo(UserGroupInformation unused) { QueueUserACLInfo queueUserAclInfo = recordFactory.newRecordInstance(QueueUserACLInfo.class); queueUserAclInfo.setQueueName(DEFAULT_QUEUE_NAME); queueUserAclInfo.setUserAcls(Arrays.asList(QueueACL.values())); return Collections.singletonList(queueUserAclInfo); } @Override public boolean hasAccess(QueueACL acl, UserGroupInformation user) { return getQueueAcls().get(acl).isUserAllowed(user); } @Override public ActiveUsersManager getActiveUsersManager() { return activeUsersManager; } @Override public void recoverContainer(Resource clusterResource, SchedulerApplicationAttempt schedulerAttempt, RMContainer rmContainer) { if (rmContainer.getState().equals(RMContainerState.COMPLETED)) { return; } increaseUsedResources(rmContainer); updateAppHeadRoom(schedulerAttempt); updateAvailableResourcesMetrics(); } @Override public Set<String> getAccessibleNodeLabels() { // TODO add implementation for FIFO scheduler return null; } @Override public String getDefaultNodeLabelExpression() { // TODO add implementation for FIFO scheduler return null; } @Override public void incPendingResource(String nodeLabel, Resource resourceToInc) { } @Override public void decPendingResource(String nodeLabel, Resource resourceToDec) { } @Override public Priority getDefaultApplicationPriority() { // TODO add implementation for FIFO scheduler return null; } @Override public void incReservedResource(String partition, Resource reservedRes) { // TODO add implementation for FIFO scheduler } @Override public void decReservedResource(String partition, Resource reservedRes) { // TODO add implementation for FIFO scheduler } }; public FifoScheduler() { super(FifoScheduler.class.getName()); } private synchronized void initScheduler(Configuration conf) { validateConf(conf); //Use ConcurrentSkipListMap because applications need to be ordered this.applications = new ConcurrentSkipListMap<ApplicationId, SchedulerApplication<FiCaSchedulerApp>>(); this.minimumAllocation = Resources .createResource(conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB)); initMaximumResourceCapability(Resources.createResource( conf.getInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB), conf.getInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES))); this.usePortForNodeName = conf.getBoolean(YarnConfiguration.RM_SCHEDULER_INCLUDE_PORT_IN_NODE_NAME, YarnConfiguration.DEFAULT_RM_SCHEDULER_USE_PORT_FOR_NODE_NAME); this.metrics = QueueMetrics.forQueue(DEFAULT_QUEUE_NAME, null, false, conf); this.activeUsersManager = new ActiveUsersManager(metrics); } @Override public void serviceInit(Configuration conf) throws Exception { initScheduler(conf); super.serviceInit(conf); } @Override public void serviceStart() throws Exception { super.serviceStart(); } @Override public void serviceStop() throws Exception { super.serviceStop(); } @Override public synchronized void setConf(Configuration conf) { this.conf = conf; } private void validateConf(Configuration conf) { // validate scheduler memory allocation setting int minMem = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); int maxMem = conf.getInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB); if (minMem <= 0 || minMem > maxMem) { throw new YarnRuntimeException("Invalid resource scheduler memory" + " allocation configuration" + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB + "=" + minMem + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB + "=" + maxMem + ", min and max should be greater than 0" + ", max should be no smaller than min."); } } @Override public synchronized Configuration getConf() { return conf; } @Override public int getNumClusterNodes() { return nodes.size(); } @Override public synchronized void setRMContext(RMContext rmContext) { this.rmContext = rmContext; } @Override public synchronized void reinitialize(Configuration conf, RMContext rmContext) throws IOException { setConf(conf); } @Override public Allocation allocate(ApplicationAttemptId applicationAttemptId, List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals, List<UpdateContainerRequest> increaseRequests, List<UpdateContainerRequest> decreaseRequests) { FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { LOG.error("Calling allocate on removed " + "or non existant application " + applicationAttemptId); return EMPTY_ALLOCATION; } // Sanity check SchedulerUtils.normalizeRequests(ask, resourceCalculator, clusterResource, minimumAllocation, getMaximumResourceCapability()); // Release containers releaseContainers(release, application); synchronized (application) { // make sure we aren't stopping/removing the application // when the allocate comes in if (application.isStopped()) { LOG.info("Calling allocate on a stopped " + "application " + applicationAttemptId); return EMPTY_ALLOCATION; } if (!ask.isEmpty()) { LOG.debug("allocate: pre-update" + " applicationId=" + applicationAttemptId + " application=" + application); application.showRequests(); // Update application requests application.updateResourceRequests(ask); LOG.debug("allocate: post-update" + " applicationId=" + applicationAttemptId + " application=" + application); application.showRequests(); LOG.debug("allocate:" + " applicationId=" + applicationAttemptId + " #ask=" + ask.size()); } application.updateBlacklist(blacklistAdditions, blacklistRemovals); Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); return new Allocation(application.pullNewlyAllocatedContainers(), headroom, null, null, null, application.pullUpdatedNMTokens()); } } private FiCaSchedulerNode getNode(NodeId nodeId) { return nodes.get(nodeId); } @VisibleForTesting public synchronized void addApplication(ApplicationId applicationId, String queue, String user, boolean isAppRecovering) { SchedulerApplication<FiCaSchedulerApp> application = new SchedulerApplication<FiCaSchedulerApp>( DEFAULT_QUEUE, user); applications.put(applicationId, application); metrics.submitApp(user); LOG.info("Accepted application " + applicationId + " from user: " + user + ", currently num of applications: " + applications.size()); if (isAppRecovering) { if (LOG.isDebugEnabled()) { LOG.debug(applicationId + " is recovering. Skip notifying APP_ACCEPTED"); } } else { rmContext.getDispatcher().getEventHandler() .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); } } @VisibleForTesting public synchronized void addApplicationAttempt(ApplicationAttemptId appAttemptId, boolean transferStateFromPreviousAttempt, boolean isAttemptRecovering) { SchedulerApplication<FiCaSchedulerApp> application = applications.get(appAttemptId.getApplicationId()); String user = application.getUser(); // TODO: Fix store FiCaSchedulerApp schedulerApp = new FiCaSchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager, this.rmContext); if (transferStateFromPreviousAttempt) { schedulerApp.transferStateFromPreviousAttempt(application.getCurrentAppAttempt()); } application.setCurrentAppAttempt(schedulerApp); metrics.submitAppAttempt(user); LOG.info("Added Application Attempt " + appAttemptId + " to scheduler from user " + application.getUser()); if (isAttemptRecovering) { if (LOG.isDebugEnabled()) { LOG.debug(appAttemptId + " is recovering. Skipping notifying ATTEMPT_ADDED"); } } else { rmContext.getDispatcher().getEventHandler() .handle(new RMAppAttemptEvent(appAttemptId, RMAppAttemptEventType.ATTEMPT_ADDED)); } } private synchronized void doneApplication(ApplicationId applicationId, RMAppState finalState) { SchedulerApplication<FiCaSchedulerApp> application = applications.get(applicationId); if (application == null) { LOG.warn("Couldn't find application " + applicationId); return; } // Inform the activeUsersManager activeUsersManager.deactivateApplication(application.getUser(), applicationId); application.stop(finalState); applications.remove(applicationId); } private synchronized void doneApplicationAttempt(ApplicationAttemptId applicationAttemptId, RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) throws IOException { FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId); SchedulerApplication<FiCaSchedulerApp> application = applications .get(applicationAttemptId.getApplicationId()); if (application == null || attempt == null) { throw new IOException("Unknown application " + applicationAttemptId + " has completed!"); } // Kill all 'live' containers for (RMContainer container : attempt.getLiveContainers()) { if (keepContainers && container.getState().equals(RMContainerState.RUNNING)) { // do not kill the running container in the case of work-preserving AM // restart. LOG.info("Skip killing " + container.getContainerId()); continue; } super.completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION), RMContainerEventType.KILL); } // Clean up pending requests, metrics etc. attempt.stop(rmAppAttemptFinalState); } /** * Heart of the scheduler... * * @param node node on which resources are available to be allocated */ private void assignContainers(FiCaSchedulerNode node) { LOG.debug("assignContainers:" + " node=" + node.getRMNode().getNodeAddress() + " #applications=" + applications.size()); // Try to assign containers to applications in fifo order for (Map.Entry<ApplicationId, SchedulerApplication<FiCaSchedulerApp>> e : applications.entrySet()) { FiCaSchedulerApp application = e.getValue().getCurrentAppAttempt(); if (application == null) { continue; } LOG.debug("pre-assignContainers"); application.showRequests(); synchronized (application) { // Check if this resource is on the blacklist if (SchedulerAppUtils.isPlaceBlacklisted(application, node, LOG)) { continue; } for (Priority priority : application.getPriorities()) { int maxContainers = getMaxAllocatableContainers(application, priority, node, NodeType.OFF_SWITCH); // Ensure the application needs containers of this priority if (maxContainers > 0) { int assignedContainers = assignContainersOnNode(node, application, priority); // Do not assign out of order w.r.t priorities if (assignedContainers == 0) { break; } } } } LOG.debug("post-assignContainers"); application.showRequests(); // Done if (Resources.lessThan(resourceCalculator, clusterResource, node.getAvailableResource(), minimumAllocation)) { break; } } // Update the applications' headroom to correctly take into // account the containers assigned in this update. for (SchedulerApplication<FiCaSchedulerApp> application : applications.values()) { FiCaSchedulerApp attempt = (FiCaSchedulerApp) application.getCurrentAppAttempt(); if (attempt == null) { continue; } updateAppHeadRoom(attempt); } } private int getMaxAllocatableContainers(FiCaSchedulerApp application, Priority priority, FiCaSchedulerNode node, NodeType type) { int maxContainers = 0; ResourceRequest offSwitchRequest = application.getResourceRequest(priority, ResourceRequest.ANY); if (offSwitchRequest != null) { maxContainers = offSwitchRequest.getNumContainers(); } if (type == NodeType.OFF_SWITCH) { return maxContainers; } if (type == NodeType.RACK_LOCAL) { ResourceRequest rackLocalRequest = application.getResourceRequest(priority, node.getRMNode().getRackName()); if (rackLocalRequest == null) { return maxContainers; } maxContainers = Math.min(maxContainers, rackLocalRequest.getNumContainers()); } if (type == NodeType.NODE_LOCAL) { ResourceRequest nodeLocalRequest = application.getResourceRequest(priority, node.getRMNode().getNodeAddress()); if (nodeLocalRequest != null) { maxContainers = Math.min(maxContainers, nodeLocalRequest.getNumContainers()); } } return maxContainers; } private int assignContainersOnNode(FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority) { // Data-local int nodeLocalContainers = assignNodeLocalContainers(node, application, priority); // Rack-local int rackLocalContainers = assignRackLocalContainers(node, application, priority); // Off-switch int offSwitchContainers = assignOffSwitchContainers(node, application, priority); LOG.debug("assignContainersOnNode:" + " node=" + node.getRMNode().getNodeAddress() + " application=" + application.getApplicationId().getId() + " priority=" + priority.getPriority() + " #assigned=" + (nodeLocalContainers + rackLocalContainers + offSwitchContainers)); return (nodeLocalContainers + rackLocalContainers + offSwitchContainers); } private int assignNodeLocalContainers(FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority) { int assignedContainers = 0; ResourceRequest request = application.getResourceRequest(priority, node.getNodeName()); if (request != null) { // Don't allocate on this node if we don't need containers on this rack ResourceRequest rackRequest = application.getResourceRequest(priority, node.getRMNode().getRackName()); if (rackRequest == null || rackRequest.getNumContainers() <= 0) { return 0; } int assignableContainers = Math.min( getMaxAllocatableContainers(application, priority, node, NodeType.NODE_LOCAL), request.getNumContainers()); assignedContainers = assignContainer(node, application, priority, assignableContainers, request, NodeType.NODE_LOCAL); } return assignedContainers; } private int assignRackLocalContainers(FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority) { int assignedContainers = 0; ResourceRequest request = application.getResourceRequest(priority, node.getRMNode().getRackName()); if (request != null) { // Don't allocate on this rack if the application doens't need containers ResourceRequest offSwitchRequest = application.getResourceRequest(priority, ResourceRequest.ANY); if (offSwitchRequest.getNumContainers() <= 0) { return 0; } int assignableContainers = Math.min( getMaxAllocatableContainers(application, priority, node, NodeType.RACK_LOCAL), request.getNumContainers()); assignedContainers = assignContainer(node, application, priority, assignableContainers, request, NodeType.RACK_LOCAL); } return assignedContainers; } private int assignOffSwitchContainers(FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority) { int assignedContainers = 0; ResourceRequest request = application.getResourceRequest(priority, ResourceRequest.ANY); if (request != null) { assignedContainers = assignContainer(node, application, priority, request.getNumContainers(), request, NodeType.OFF_SWITCH); } return assignedContainers; } private int assignContainer(FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority, int assignableContainers, ResourceRequest request, NodeType type) { LOG.debug("assignContainers:" + " node=" + node.getRMNode().getNodeAddress() + " application=" + application.getApplicationId().getId() + " priority=" + priority.getPriority() + " assignableContainers=" + assignableContainers + " request=" + request + " type=" + type); Resource capability = request.getCapability(); int availableContainers = (int) (node.getAvailableResource().getMemorySize() / capability.getMemorySize()); // TODO: A buggy application with this zero would crash the scheduler. int assignedContainers = Math.min(assignableContainers, availableContainers); if (assignedContainers > 0) { for (int i = 0; i < assignedContainers; ++i) { NodeId nodeId = node.getRMNode().getNodeID(); ContainerId containerId = BuilderUtils.newContainerId(application.getApplicationAttemptId(), application.getNewContainerId()); // Create the container Container container = BuilderUtils.newContainer(containerId, nodeId, node.getRMNode().getHttpAddress(), capability, priority, null); // Allocate! // Inform the application RMContainer rmContainer = application.allocate(type, node, priority, request, container); // Inform the node node.allocateContainer(rmContainer); // Update usage for this container increaseUsedResources(rmContainer); } } return assignedContainers; } private synchronized void nodeUpdate(RMNode rmNode) { FiCaSchedulerNode node = getNode(rmNode.getNodeID()); List<UpdatedContainerInfo> containerInfoList = rmNode.pullContainerUpdates(); List<ContainerStatus> newlyLaunchedContainers = new ArrayList<ContainerStatus>(); List<ContainerStatus> completedContainers = new ArrayList<ContainerStatus>(); for (UpdatedContainerInfo containerInfo : containerInfoList) { newlyLaunchedContainers.addAll(containerInfo.getNewlyLaunchedContainers()); completedContainers.addAll(containerInfo.getCompletedContainers()); } // Processing the newly launched containers for (ContainerStatus launchedContainer : newlyLaunchedContainers) { containerLaunchedOnNode(launchedContainer.getContainerId(), node); } // Process completed containers for (ContainerStatus completedContainer : completedContainers) { ContainerId containerId = completedContainer.getContainerId(); LOG.debug("Container FINISHED: " + containerId); super.completedContainer(getRMContainer(containerId), completedContainer, RMContainerEventType.FINISHED); node.releaseContainer(containerId, true); } // Updating node resource utilization node.setAggregatedContainersUtilization(rmNode.getAggregatedContainersUtilization()); node.setNodeUtilization(rmNode.getNodeUtilization()); // If the node is decommissioning, send an update to have the total // resource equal to the used resource, so no available resource to // schedule. if (rmNode.getState() == NodeState.DECOMMISSIONING) { this.rmContext.getDispatcher().getEventHandler() .handle(new RMNodeResourceUpdateEvent(rmNode.getNodeID(), ResourceOption.newInstance(getSchedulerNode(rmNode.getNodeID()).getUsedResource(), 0))); } if (rmContext.isWorkPreservingRecoveryEnabled() && !rmContext.isSchedulerReadyForAllocatingContainers()) { return; } if (Resources.greaterThanOrEqual(resourceCalculator, clusterResource, node.getAvailableResource(), minimumAllocation)) { LOG.debug("Node heartbeat " + rmNode.getNodeID() + " available resource = " + node.getAvailableResource()); assignContainers(node); LOG.debug("Node after allocation " + rmNode.getNodeID() + " resource = " + node.getAvailableResource()); } updateAvailableResourcesMetrics(); } private void increaseUsedResources(RMContainer rmContainer) { Resources.addTo(usedResource, rmContainer.getAllocatedResource()); } private void updateAppHeadRoom(SchedulerApplicationAttempt schedulerAttempt) { schedulerAttempt.setHeadroom(Resources.subtract(clusterResource, usedResource)); } private void updateAvailableResourcesMetrics() { metrics.setAvailableResourcesToQueue(Resources.subtract(clusterResource, usedResource)); } @Override public void handle(SchedulerEvent event) { switch (event.getType()) { case NODE_ADDED: { NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent) event; addNode(nodeAddedEvent.getAddedRMNode()); recoverContainersOnNode(nodeAddedEvent.getContainerReports(), nodeAddedEvent.getAddedRMNode()); } break; case NODE_REMOVED: { NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent) event; removeNode(nodeRemovedEvent.getRemovedRMNode()); } break; case NODE_RESOURCE_UPDATE: { NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = (NodeResourceUpdateSchedulerEvent) event; updateNodeResource(nodeResourceUpdatedEvent.getRMNode(), nodeResourceUpdatedEvent.getResourceOption()); } break; case NODE_UPDATE: { NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent) event; nodeUpdate(nodeUpdatedEvent.getRMNode()); } break; case APP_ADDED: { AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; addApplication(appAddedEvent.getApplicationId(), appAddedEvent.getQueue(), appAddedEvent.getUser(), appAddedEvent.getIsAppRecovering()); } break; case APP_REMOVED: { AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent) event; doneApplication(appRemovedEvent.getApplicationID(), appRemovedEvent.getFinalState()); } break; case APP_ATTEMPT_ADDED: { AppAttemptAddedSchedulerEvent appAttemptAddedEvent = (AppAttemptAddedSchedulerEvent) event; addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), appAttemptAddedEvent.getTransferStateFromPreviousAttempt(), appAttemptAddedEvent.getIsAttemptRecovering()); } break; case APP_ATTEMPT_REMOVED: { AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = (AppAttemptRemovedSchedulerEvent) event; try { doneApplicationAttempt(appAttemptRemovedEvent.getApplicationAttemptID(), appAttemptRemovedEvent.getFinalAttemptState(), appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts()); } catch (IOException ie) { LOG.error("Unable to remove application " + appAttemptRemovedEvent.getApplicationAttemptID(), ie); } } break; case CONTAINER_EXPIRED: { ContainerExpiredSchedulerEvent containerExpiredEvent = (ContainerExpiredSchedulerEvent) event; ContainerId containerid = containerExpiredEvent.getContainerId(); super.completedContainer(getRMContainer(containerid), SchedulerUtils.createAbnormalContainerStatus(containerid, SchedulerUtils.EXPIRED_CONTAINER), RMContainerEventType.EXPIRE); } break; default: LOG.error("Invalid eventtype " + event.getType() + ". Ignoring!"); } } @Lock(FifoScheduler.class) @Override protected synchronized void completedContainerInternal(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { // Get the application for the finished container Container container = rmContainer.getContainer(); FiCaSchedulerApp application = getCurrentAttemptForContainer(container.getId()); ApplicationId appId = container.getId().getApplicationAttemptId().getApplicationId(); // Get the node on which the container was allocated FiCaSchedulerNode node = getNode(container.getNodeId()); if (application == null) { LOG.info("Unknown application: " + appId + " released container " + container.getId() + " on node: " + node + " with event: " + event); return; } // Inform the application application.containerCompleted(rmContainer, containerStatus, event, RMNodeLabelsManager.NO_LABEL); // Inform the node node.releaseContainer(rmContainer.getContainerId(), false); // Update total usage Resources.subtractFrom(usedResource, container.getResource()); LOG.info("Application attempt " + application.getApplicationAttemptId() + " released container " + container.getId() + " on node: " + node + " with event: " + event); } private Resource usedResource = recordFactory.newRecordInstance(Resource.class); private synchronized void removeNode(RMNode nodeInfo) { FiCaSchedulerNode node = getNode(nodeInfo.getNodeID()); if (node == null) { return; } // Kill running containers for (RMContainer container : node.getCopiedListOfRunningContainers()) { super.completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } //Remove the node this.nodes.remove(nodeInfo.getNodeID()); updateMaximumAllocation(node, false); // Update cluster metrics Resources.subtractFrom(clusterResource, node.getTotalResource()); } @Override public QueueInfo getQueueInfo(String queueName, boolean includeChildQueues, boolean recursive) { return DEFAULT_QUEUE.getQueueInfo(false, false); } @Override public List<QueueUserACLInfo> getQueueUserAclInfo() { return DEFAULT_QUEUE.getQueueUserAclInfo(null); } @Override public ResourceCalculator getResourceCalculator() { return resourceCalculator; } private synchronized void addNode(RMNode nodeManager) { FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager, usePortForNodeName); this.nodes.put(nodeManager.getNodeID(), schedulerNode); Resources.addTo(clusterResource, schedulerNode.getTotalResource()); updateMaximumAllocation(schedulerNode, true); } @Override public void recover(RMState state) { // NOT IMPLEMENTED } @Override public RMContainer getRMContainer(ContainerId containerId) { FiCaSchedulerApp attempt = getCurrentAttemptForContainer(containerId); return (attempt == null) ? null : attempt.getRMContainer(containerId); } @Override public QueueMetrics getRootQueueMetrics() { return DEFAULT_QUEUE.getMetrics(); } @Override public synchronized boolean checkAccess(UserGroupInformation callerUGI, QueueACL acl, String queueName) { return DEFAULT_QUEUE.hasAccess(acl, callerUGI); } @Override public synchronized List<ApplicationAttemptId> getAppsInQueue(String queueName) { if (queueName.equals(DEFAULT_QUEUE.getQueueName())) { List<ApplicationAttemptId> attempts = new ArrayList<ApplicationAttemptId>(applications.size()); for (SchedulerApplication<FiCaSchedulerApp> app : applications.values()) { attempts.add(app.getCurrentAppAttempt().getApplicationAttemptId()); } return attempts; } else { return null; } } public Resource getUsedResource() { return usedResource; } @Override protected void decreaseContainer(SchedContainerChangeRequest decreaseRequest, SchedulerApplicationAttempt attempt) { // TODO Auto-generated method stub } }