com.yahoo.storm.yarn.StormOnYarn.java Source code

Java tutorial

Introduction

Here is the source code for com.yahoo.storm.yarn.StormOnYarn.java

Source

/*
 * Copyright (c) 2013 Yahoo! Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. See accompanying LICENSE file.
 */

package com.yahoo.storm.yarn;

import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.URL;
import java.net.URLDecoder;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Vector;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.YarnClient;
import org.apache.hadoop.yarn.client.YarnClientImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
import org.apache.hadoop.yarn.util.Apps;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.yahoo.storm.yarn.Config;
import com.yahoo.storm.yarn.generated.StormMaster;

public class StormOnYarn {
    private static final Logger LOG = LoggerFactory.getLogger(StormOnYarn.class);

    private YarnClient _yarn;
    private YarnConfiguration _hadoopConf;
    private ApplicationId _appId;
    @SuppressWarnings("rawtypes")
    private Map _stormConf;
    private MasterClient _client = null;
    private InetSocketAddress _yarnRMaddr;

    private StormOnYarn(@SuppressWarnings("rawtypes") Map stormConf) {
        this(null, stormConf);
    }

    private StormOnYarn(ApplicationId appId, @SuppressWarnings("rawtypes") Map stormConf) {
        _hadoopConf = new YarnConfiguration();
        _yarnRMaddr = _hadoopConf.getSocketAddr(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS,
                YarnConfiguration.DEFAULT_RM_PORT);
        _yarn = new YarnClientImpl(_yarnRMaddr);
        _stormConf = stormConf;
        _appId = appId;
        _yarn.init(_hadoopConf);
        _yarn.start();
    }

    public void stop() {
        if (_client != null) {
            _client.close();
        }
        _yarn.stop();
    }

    public ApplicationId getAppId() {
        //TODO make this immutable
        return _appId;
    }

    @SuppressWarnings("unchecked")
    public synchronized StormMaster.Client getClient() throws YarnRemoteException {
        if (_client == null) {
            //TODO need a way to force this to reconnect in case of an error
            ApplicationReport report = _yarn.getApplicationReport(_appId);
            LOG.info("application report for " + _appId + " :" + report.getHost() + ":" + report.getRpcPort());
            String host = report.getHost();
            if (host == null) {
                throw new RuntimeException("No host returned for Application Master " + _appId);
            }
            if (_stormConf == null) {
                _stormConf = new HashMap<Object, Object>();
            }
            _stormConf.put(Config.MASTER_HOST, host);
            int port = report.getRpcPort();
            _stormConf.put(Config.MASTER_THRIFT_PORT, port);
            LOG.info("Attaching to " + host + ":" + port + " to talk to app master " + _appId);
            //TODO need a better work around to the config not being set.
            _stormConf.put(Config.MASTER_TIMEOUT_SECS, 10);
            _client = MasterClient.getConfiguredClient(_stormConf);
        }
        return _client.getClient();
    }

    private void launchApp(String appName, String queue, int amMB, String storm_zip_location) throws Exception {
        LOG.debug("StormOnYarn:launchApp() ...");
        GetNewApplicationResponse app = _yarn.getNewApplication();
        _appId = app.getApplicationId();
        LOG.debug("_appId:" + _appId);

        if (amMB > app.getMaximumResourceCapability().getMemory()) {
            //TODO need some sanity checks
            amMB = app.getMaximumResourceCapability().getMemory();
        }
        ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
        appContext.setApplicationId(app.getApplicationId());
        appContext.setApplicationName(appName);
        appContext.setQueue(queue);

        // Set up the container launch context for the application master
        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

        // set local resources for the application master
        // local files or archives as needed
        // In this scenario, the jar file for the application master is part of the
        // local resources
        LOG.info("Copy App Master jar from local filesystem and add to local environment");
        // Copy the application master jar to the filesystem
        // Create a local resource to point to the destination jar path
        String appMasterJar = findContainingJar(MasterServer.class);
        FileSystem fs = FileSystem.get(_hadoopConf);
        Path src = new Path(appMasterJar);
        String appHome = Util.getApplicationHomeForId(_appId.toString());
        Path dst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + "AppMaster.jar");
        fs.copyFromLocalFile(false, true, src, dst);
        localResources.put("AppMaster.jar", Util.newYarnAppResource(fs, dst));

        String stormVersion = Util.getStormVersion(_stormConf);
        Path zip;
        if (storm_zip_location != null) {
            zip = new Path(storm_zip_location);
        } else {
            zip = new Path("/lib/storm/" + stormVersion + "/storm.zip");
        }
        _stormConf.put("storm.zip.path", zip.makeQualified(fs).toUri().getPath());
        localResources.put("storm",
                Util.newYarnAppResource(fs, zip, LocalResourceType.ARCHIVE, LocalResourceVisibility.PUBLIC));

        Path dirDst = Util.createConfigurationFileInFs(fs, appHome, _stormConf, _hadoopConf);
        // establish a symbolic link to conf directory
        localResources.put("conf", Util.newYarnAppResource(fs, dirDst));

        // Set local resource info into app master container launch context
        amContainer.setLocalResources(localResources);

        // Set the env variables to be setup in the env where the application master
        // will be run
        LOG.info("Set the environment for the application master");
        Map<String, String> env = new HashMap<String, String>();
        // add the runtime classpath needed for tests to work
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(), "./conf");
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(), "./AppMaster.jar");
        //TODO need a better way to get the storm .zip created and put where it needs to go.

        String stormHomeInZip = Util.getStormHomeInZip(fs, zip, stormVersion);
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(), "./storm/" + stormHomeInZip + "/*");
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(), "./storm/" + stormHomeInZip + "/lib/*");

        for (String c : _hadoopConf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
                YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
            Apps.addToEnvironment(env, Environment.CLASSPATH.name(), c.trim());
        }

        //For tests purpose, add maven generated classpath     
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.commons.configuration.Configuration")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.commons.cli.Options")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.net.NetUtils")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.conf.Configuration")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(), findContainingJar(
                Class.forName("org.apache.hadoop.security.authentication.client.AuthenticationException")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.yarn.YarnException")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.yarn.api.ApplicationConstants")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.yarn.api.records.ApplicationAttemptId")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.yarn.conf.YarnConfiguration")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.yarn.service.Service")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.yarn.util.ConverterUtils")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("org.apache.hadoop.yarn.client.AMRMClientImpl")));
        Apps.addToEnvironment(env, Environment.CLASSPATH.name(),
                findContainingJar(Class.forName("com.google.protobuf.MessageOrBuilder")));

        env.put("appJar", appMasterJar);
        env.put("appName", appName);
        env.put("appId", new Integer(_appId.getId()).toString());
        amContainer.setEnvironment(env);

        // Set the necessary command to execute the application master
        Vector<String> vargs = new Vector<String>();

        // TODO need a better way to do debugging
        vargs.add("find");
        vargs.add(".");
        vargs.add("-follow");
        vargs.add("|");
        vargs.add("xargs");
        vargs.add("ls");
        vargs.add("-ld");
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/files");
        vargs.add("&&");
        vargs.add("echo");
        vargs.add("$CLASSPATH");
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/classpath");
        vargs.add("&&");
        vargs.add("echo");
        vargs.add("$PWD");
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/pwd");
        vargs.add("&&");
        vargs.add("java");
        vargs.add("-Dstorm.home=./storm/" + stormHomeInZip + "/");
        //vargs.add("-verbose:class");
        vargs.add("com.yahoo.storm.yarn.MasterServer");
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout");
        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr");
        // Set java executable command
        LOG.info("Setting up app master command:" + vargs);

        amContainer.setCommands(vargs);

        // Set up resource type requirements
        // For now, only memory is supported so we set memory requirements
        Resource capability = Records.newRecord(Resource.class);
        capability.setMemory(amMB);
        amContainer.setResource(capability);
        appContext.setAMContainerSpec(amContainer);

        _yarn.submitApplication(appContext);
    }

    /**
     * Wait until the application is successfully launched
     * @throws YarnRemoteException
     */
    public boolean waitUntilLaunched() throws YarnRemoteException {
        while (true) {

            // Check app status every 1 second.
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                LOG.debug("Thread sleep in monitoring loop interrupted");
            }

            // Get application report for the appId we are interested in 
            ApplicationReport report = _yarn.getApplicationReport(_appId);
            YarnApplicationState state = report.getYarnApplicationState();
            FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
            if (YarnApplicationState.FINISHED == state) {
                if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
                    LOG.info("Application has completed successfully. Breaking monitoring loop");
                    return true;
                } else {
                    LOG.info("Application did finished unsuccessfully." + " YarnState=" + state.toString()
                            + ", DSFinalStatus=" + dsStatus.toString() + ". Breaking monitoring loop");
                    return false;
                }
            } else if (YarnApplicationState.KILLED == state || YarnApplicationState.FAILED == state) {
                LOG.info("Application did not finish." + " YarnState=" + state.toString() + ", DSFinalStatus="
                        + dsStatus.toString() + ". Breaking monitoring loop");
                return false;
            }

            //announce application master's host and port
            if (state == YarnApplicationState.RUNNING) {
                return true;
            }
        }
    }

    /** 
     * Find a jar that contains a class of the same name, if any.
     * It will return a jar file, even if that is not the first thing
     * on the class path that has a class with the same name.
     * 
     * @param my_class the class to find.
     * @return a jar file that contains the class, or null.
     * @throws IOException on any error
     */
    public static String findContainingJar(Class<?> my_class) throws IOException {
        ClassLoader loader = my_class.getClassLoader();
        String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
        for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) {
            URL url = itr.nextElement();
            if ("jar".equals(url.getProtocol())) {
                String toReturn = url.getPath();
                if (toReturn.startsWith("file:")) {
                    toReturn = toReturn.substring("file:".length());
                }
                // URLDecoder is a misnamed class, since it actually decodes
                // x-www-form-urlencoded MIME type rather than actual
                // URL encoding (which the file path has). Therefore it would
                // decode +s to ' 's which is incorrect (spaces are actually
                // either unencoded or encoded as "%20"). Replace +s first, so
                // that they are kept sacred during the decoding process.
                toReturn = toReturn.replaceAll("\\+", "%2B");
                toReturn = URLDecoder.decode(toReturn, "UTF-8");
                return toReturn.replaceAll("!.*$", "");
            }
        }

        throw new IOException("Fail to locat a JAR for class: " + my_class.getName());
    }

    public static StormOnYarn launchApplication(String appName, String queue, int amMB,
            @SuppressWarnings("rawtypes") Map stormConf, String storm_zip_location) throws Exception {
        StormOnYarn storm = new StormOnYarn(stormConf);
        storm.launchApp(appName, queue, amMB, storm_zip_location);
        return storm;
    }

    public static StormOnYarn attachToApp(String appId, @SuppressWarnings("rawtypes") Map stormConf) {
        return new StormOnYarn(ConverterUtils.toApplicationId(appId), stormConf);
    }
}