yarnkit.appmaster.ApplicationMasterService.java Source code

Java tutorial

Introduction

Here is the source code for yarnkit.appmaster.ApplicationMasterService.java

Source

/*
 * Hivemall: Hive scalable Machine Learning Library
 *
 * Copyright (C) 2015 Makoto YUI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package yarnkit.appmaster;

import java.nio.ByteBuffer;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;

import yarnkit.container.ContainerLaunchContextFactory;
import yarnkit.container.ContainerLaunchParameters;
import yarnkit.container.ContainerTracker;
import yarnkit.utils.YarnUtils;

import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.AbstractScheduledService;

/**
 * A service that handles the server-side invocation of {@code ApplicationMaster}.
 */
public final class ApplicationMasterService extends AbstractScheduledService
        implements AMRMClientAsync.CallbackHandler {
    private static final Log LOG = LogFactory.getLog(ApplicationMasterService.class);

    @Nonnull
    private final ApplicationMasterParameters parameters;
    @Nonnull
    private final YarnConfiguration yarnConf;

    private AMRMClientAsync<ContainerRequest> resourceManager;
    private boolean hasRunningContainers = false;
    @Nullable
    private Throwable throwable;

    // application master status
    @Nullable
    private ContainerTracker tracker;
    private final AtomicInteger totalRequested = new AtomicInteger(0);
    private final AtomicInteger totalCompleted = new AtomicInteger(0);
    private final AtomicInteger totalFailures = new AtomicInteger(0);

    public ApplicationMasterService(@CheckForNull ApplicationMasterParameters parameters) {
        super();
        this.parameters = Preconditions.checkNotNull(parameters);
        this.yarnConf = new YarnConfiguration(parameters.getConfiguration());
    }

    /**
     * Returns true if there are containers still running.
     */
    public boolean hasRunningContainers() {
        return hasRunningContainers;
    }

    //-----------------------------------------------------------------
    // AMRMClientAsync.CallbackHandler implementations

    @Override
    protected void runOneIteration() throws Exception {
        if (totalFailures.get() > parameters.getAllowedFailures()) {
            stop();
        } else if (totalCompleted.get() == totalRequested.get()) {
            stop();
        }
    }

    @Override
    protected void startUp() throws Exception {
        LOG.info("Starting Application Master");

        // create security tokens
        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        ByteBuffer securityTokens = YarnUtils.getSecurityToken(credentials);

        // Create appSubmitterUgi and add original tokens to it
        String userName = System.getenv(ApplicationConstants.Environment.USER.name());
        UserGroupInformation appSubmitterUgi = UserGroupInformation.createRemoteUser(userName);
        // remove the AM->RM token so that containers cannot access it.
        YarnUtils.removeToken(credentials, AMRMTokenIdentifier.KIND_NAME);
        appSubmitterUgi.addCredentials(credentials);

        // start a resource manager (RM)
        this.resourceManager = AMRMClientAsync.createAMRMClientAsync(1000, this);
        resourceManager.init(yarnConf);
        resourceManager.start();

        // register a application master (AM) to resource manager (RM) 
        final RegisterApplicationMasterResponse registration;
        try {
            registration = resourceManager.registerApplicationMaster(parameters.getHostname(),
                    parameters.getClientPort(), parameters.getTrackingUrl());
            LOG.info("Registered Application Master: " + registration);
        } catch (Exception e) {
            LOG.error("Exception thrown registering Application Master", e);
            stop();
            return;
        }

        // assign containers
        ContainerLaunchContextFactory factory = new ContainerLaunchContextFactory(
                registration.getMaximumResourceCapability(), securityTokens);
        ContainerLaunchParameters containerLaunchParams = parameters.getContainerLaunchParameters();
        this.tracker = new ContainerTracker(this, containerLaunchParams);
        tracker.init(factory, yarnConf);
        this.hasRunningContainers = true;
    }

    @Override
    protected void shutDown() throws Exception {
        LOG.info("Stopping Containers");

        // stop containers
        tracker.kill();
        this.hasRunningContainers = false;

        // unregister application master
        final FinalApplicationStatus status;
        String message = null;
        if (state() == State.FAILED || totalFailures.get() > parameters.getAllowedFailures()) {
            status = FinalApplicationStatus.FAILED;
            if (throwable != null) {
                message = throwable.getLocalizedMessage();
            }
        } else {
            status = FinalApplicationStatus.SUCCEEDED;
        }
        LOG.info("Sending a finish request to Resource Manager: " + status);
        try {
            resourceManager.unregisterApplicationMaster(status, message, null);
        } catch (Exception e) {
            LOG.error("Error finishing Application Master", e);
        }
    }

    @Override
    protected Scheduler scheduler() {
        return Scheduler.newFixedRateSchedule(0, 1, TimeUnit.SECONDS);
    }

    //-----------------------------------------------------------------
    // Callback from ContainerTracker

    public void onContainerRequest(@Nonnull ContainerRequest containerRequest) {
        resourceManager.addContainerRequest(containerRequest);
        totalRequested.incrementAndGet();
    }

    //-----------------------------------------------------------------
    // AMRMClientAsync.CallbackHandler implementations

    @Override
    public void onContainersAllocated(@Nonnull List<Container> allocatedContainers) {
        LOG.info("Allocating " + allocatedContainers.size() + " container(s)");

        int assigned = 0;
        for (Container allocated : allocatedContainers) {
            if (tracker.needsContainers()) {
                tracker.launchContainer(allocated);
                ++assigned;
            }
        }

        if (assigned < allocatedContainers.size()) {
            LOG.error(String.format("Not all containers were allocated (%d out of %d)", assigned,
                    allocatedContainers.size()));
            stop();
        }
    }

    @Override
    public void onContainersCompleted(@Nonnull List<ContainerStatus> containerStatuses) {
        LOG.info(containerStatuses.size() + " container(s) have completed");

        for (ContainerStatus status : containerStatuses) {
            LOG.info(YarnUtils.getContainerExitStatusMessage(status));

            int exitStatus = status.getExitStatus();
            if (exitStatus == ContainerExitStatus.SUCCESS) {
                totalCompleted.incrementAndGet();
            } else {
                if (exitStatus != ContainerExitStatus.ABORTED) {
                    totalCompleted.incrementAndGet();
                    totalFailures.incrementAndGet();
                } else {
                    // Containers killed by the framework, either due to being released by
                    // the application or being 'lost' due to node failures etc.
                }
            }
        }

    }

    @Override
    public void onShutdownRequest() {
        stop();
    }

    @Override
    public void onNodesUpdated(@Nonnull List<NodeReport> updatedNodes) {
        if (LOG.isDebugEnabled() && !updatedNodes.isEmpty()) {
            LOG.debug("Nodes updated:" + updatedNodes);
        }
    }

    @Override
    public void onError(@Nonnull Throwable e) {
        this.throwable = e;
        stop();
    }

    @Override
    public float getProgress() {
        if (tracker == null) {
            return 0.f;
        }

        int num = tracker.getCompleted();
        int den = tracker.getNumContainers();
        if (den == 0) {
            return 0.0f;
        }
        return ((float) num) / den;
    }

}