husky.client.HuskyYarnClient.java Source code

Java tutorial

Introduction

Here is the source code for husky.client.HuskyYarnClient.java

Source

/* Copyright 2016 Husky Team
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package husky.client;

import husky.server.HuskyApplicationMaster;
import org.apache.commons.cli.*;
import org.apache.commons.cli.Options;
import org.apache.commons.math3.util.Pair;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import static org.apache.hadoop.yarn.api.ApplicationConstants.Environment.JAVA_HOME;
import static org.apache.hadoop.yarn.api.records.FinalApplicationStatus.SUCCEEDED;
import static org.apache.hadoop.yarn.api.records.LocalResourceType.ARCHIVE;
import static org.apache.hadoop.yarn.api.records.LocalResourceType.FILE;
import static org.apache.hadoop.yarn.api.records.YarnApplicationState.FAILED;
import static org.apache.hadoop.yarn.api.records.YarnApplicationState.FINISHED;
import static org.apache.hadoop.yarn.api.records.YarnApplicationState.KILLED;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH;
import static org.apache.hadoop.yarn.conf.YarnConfiguration.YARN_APPLICATION_CLASSPATH;

// TODO(zzxx): Archive files not working
public class HuskyYarnClient {
    private static final Logger LOG = Logger.getLogger(HuskyYarnClient.class.getName());
    // path on hdfs where local resources store
    private static final String LOCAL_RESOURCES_HDFS_ROOT = "hdfs:///husky-yarn/";
    // name of application
    private static final String DEFAULT_APP_NAME = "Husky-on-Yarn-App";

    // Yarn
    private FileSystem mFileSystem = null;
    private YarnClient mYarnClient = null;
    private YarnConfiguration mYarnConf = null;
    // Yarn application
    private ApplicationId mAppId = null;
    private String mAppName;
    private String mAppMasterJar = ""; // Jar that contains Application class
    private String mLocalResourceHDFSPaths; // Paths to resources that need to download to working environment
    private String mLocalFiles = ""; // Paths to resources that either locate in client machine or on HDFS
    private String mLocalArchives = ""; // Paths to archives that either locate in client machine or on HDFS
    private ArrayList<Pair<String, Integer>> mWorkerInfos = new ArrayList<Pair<String, Integer>>();
    // container resources
    private int mAppMasterMemory = 0;
    private int mContainerMemory = 0; // Memory that can be used by a container
    private int mNumVirtualCores = 0;
    private int mAppPriority = 0;
    // husky application
    private String mMasterExec = "";
    private String mAppExec = "";
    private String mConfigFile = "";
    private String mLdLibraryPath = "";
    // options
    // 1. kerberos
    private String mUserName = "";
    private String mKeyTabFile = "";
    // 2. upload log files to hdfs
    private String mLogPathToHDFS = "";

    public HuskyYarnClient() throws IOException {
        mYarnConf = new YarnConfiguration();
        mFileSystem = FileSystem.get(mYarnConf);
        mYarnClient = YarnClient.createYarnClient();
        mYarnClient.init(mYarnConf);
    }

    private Options createClientOptions() {
        Options opts = new Options();
        // set up options
        opts.addOption("help", false, "Print Usage");

        opts.addOption("app_name", true, "The name of the application");
        opts.addOption("jar", true, "Local path to the jar file of application master.");
        opts.addOption("local_resrcrt", true, "The root directory on hdfs where local resources store");
        opts.addOption("local_files", true,
                "Files that need to pass to working environment. Use comma(,) to split different files.");
        opts.addOption("local_archives", true,
                "Archives that need to pass to and be unarchived in working environment. Use comma(,) to split different archives.");

        opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run application master");
        opts.addOption("container_memory", true,
                "Amount of memory in MB to be requested to run container. Each container is a worker node.");
        opts.addOption("container_vcores", true, "Number of virtual cores that a container can use");
        opts.addOption("app_priority", true, "A number to indicate the priority of the husky application");

        opts.addOption("master", true, "Executable for c++ husky master (on local file system or HDFS)");
        opts.addOption("application", true, "Executable for c++ husky worker (on local file system or HDFS)");
        opts.addOption("config", true,
                "Configuration file for c++ husky master and application (on local file system or HDFS)");
        opts.addOption("worker_infos", true,
                "Specified hosts that husky application will run on. Use comma(,) to split different archives.");
        opts.addOption("ld_library_path", true,
                "Path on datanodes where c++ husky master and application looks for their libraries");

        opts.addOption("user_name", true, "Username used for kinit");
        opts.addOption("key_tab", true,
                "Path on DATANODEs to keytab file which is required by kinit to avoid security issues of HDFS");

        opts.addOption("log_to_hdfs", true,
                "Path on HDFS where to upload logs of application master and worker containers");
        return opts;
    }

    private void printUsage() {
        new HelpFormatter().printHelp("HuskyYarnClient", createClientOptions());
    }

    private boolean init(String[] args) throws ParseException, IOException {
        // parse options
        CommandLine cliParser = new GnuParser().parse(createClientOptions(), args);

        // analyze options
        if (args.length == 0 || cliParser.hasOption("help")) {
            printUsage();
            return false;
        }

        mAppName = cliParser.getOptionValue("app_name", cliParser.getOptionValue("app_name", DEFAULT_APP_NAME));
        mLocalResourceHDFSPaths = cliParser.getOptionValue("local_resrcrt", LOCAL_RESOURCES_HDFS_ROOT);
        mLocalFiles = cliParser.getOptionValue("local_files", "");
        mLocalArchives = cliParser.getOptionValue("local_archives", "");

        if (cliParser.hasOption("jar")) {
            mAppMasterJar = cliParser.getOptionValue("jar");
        } else {
            mAppMasterJar = JobConf.findContainingJar(HuskyApplicationMaster.class);
            if (mAppMasterJar == null) {
                throw new IllegalArgumentException("No jar specified for husky application master");
            }
        }
        LOG.info("Husky Application Master's jar is " + mAppMasterJar);

        if (cliParser.hasOption("worker_infos")) {
            for (String i : cliParser.getOptionValue("worker_infos").split(",")) {
                String[] pair = i.trim().split(":");
                if (pair.length != 2) {
                    throw new IllegalArgumentException("Invalid worker info: " + i.trim());
                }
                try {
                    Pair<String, Integer> p = new Pair<String, Integer>(pair[0], Integer.parseInt(pair[1]));
                    if (p.getSecond() <= 0) {
                        throw new IllegalArgumentException(
                                "Invalid worker info, number of worker should be large than 0: " + i.trim());
                    }
                    mWorkerInfos.add(p);
                } catch (NumberFormatException e) {
                    LOG.log(Level.SEVERE, "Invalid number of worker given in worker_infos: " + i.trim());
                    throw e;
                }
            }
            if (mWorkerInfos.isEmpty()) {
                throw new IllegalArgumentException("Parameter `worker_infos` is empty.");
            }
        } else {
            throw new IllegalArgumentException(
                    "No worker information is provided. Parameter `worker_infos` is not set.");
        }

        mAppMasterMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "2048"));
        if (mAppMasterMemory <= 0) {
            throw new IllegalArgumentException(
                    "Illegal memory specified for application master. Specified memory: " + mAppMasterMemory);
        }

        mContainerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "512"));
        if (mContainerMemory <= 0) {
            throw new IllegalArgumentException(
                    "Illegal memory specified for container. Specified memory: " + mContainerMemory);
        }

        mNumVirtualCores = Integer.parseInt(cliParser.getOptionValue("container_vcores", "1"));
        if (mNumVirtualCores <= 0) {
            throw new IllegalArgumentException(
                    "Illegal number of virtual cores specified for container. Specified number of vcores: "
                            + mNumVirtualCores);
        }

        mAppPriority = Integer.parseInt(cliParser.getOptionValue("app_priority", "1"));
        if (mAppPriority <= 0) {
            throw new IllegalArgumentException(
                    "Illegal priority for husky application. Specified priority: " + mAppPriority);
        }

        if (!cliParser.hasOption("master")) {
            throw new IllegalArgumentException("No executable specified for c++ husky master");
        }
        mMasterExec = cliParser.getOptionValue("master");

        if (!cliParser.hasOption("application")) {
            throw new IllegalArgumentException("No application specified for c++ husky workers");
        }
        mAppExec = cliParser.getOptionValue("application");

        if (!cliParser.hasOption("config")) {
            throw new IllegalArgumentException("No config file given for c++ husky master and application");
        }
        mConfigFile = cliParser.getOptionValue("config");
        mLdLibraryPath = cliParser.getOptionValue("ld_library_path", "");

        mKeyTabFile = cliParser.getOptionValue("key_tab", "");
        mUserName = cliParser.getOptionValue("user_name", "");
        mLogPathToHDFS = cliParser.getOptionValue("log_to_hdfs", "");
        if (!mLogPathToHDFS.isEmpty()) {
            if (!mFileSystem.isDirectory(new Path(mLogPathToHDFS))) {
                throw new IllegalArgumentException(
                        "The given log path is not a directory on HDFS: " + mLogPathToHDFS);
            }
        }

        return true;
    }

    // should access through getLocalResources()
    private HashMap<String, LocalResource> localResources = null;

    private Pair<String, LocalResource> constructLocalResource(String name, String path, LocalResourceType type)
            throws IOException {
        LOG.info("To copy " + name + "(" + path + ") from local file system");

        Path resourcePath = new Path(path);
        if (path.startsWith("hdfs://")) {
            FileStatus fileStatus = mFileSystem.getFileStatus(resourcePath);
            if (!fileStatus.isFile()) {
                throw new RuntimeException("Only files can be provided as local resources.");
            }
        } else {
            File file = new File(path);
            if (!file.exists()) {
                throw new RuntimeException("File not exist: " + path);
            }
            if (!file.isFile()) {
                throw new RuntimeException("Only files can be provided as local resources.");
            }
        }

        // if the file is not on hdfs, upload it to hdfs first.
        if (!path.startsWith("hdfs://")) {
            Path src = resourcePath;
            String newPath = mLocalResourceHDFSPaths + '/' + mAppName + '/' + mAppId + '/' + name;
            resourcePath = new Path(newPath);
            mFileSystem.copyFromLocalFile(false, true, src, resourcePath);
            LOG.info("Upload " + path + " to " + newPath);
            path = newPath;
        }

        FileStatus fileStatus = mFileSystem.getFileStatus(resourcePath);

        LocalResource resource = Records.newRecord(LocalResource.class);
        resource.setType(type);
        resource.setVisibility(LocalResourceVisibility.APPLICATION);
        resource.setResource(ConverterUtils.getYarnUrlFromPath(resourcePath));
        resource.setTimestamp(fileStatus.getModificationTime());
        resource.setSize(fileStatus.getLen());

        return new Pair<String, LocalResource>(path, resource);
    }

    private Map<String, LocalResource> getLocalResources() throws IOException {
        if (localResources == null) {
            localResources = new HashMap<String, LocalResource>();

            Pair<String, LocalResource> resource = constructLocalResource("HuskyAppMaster.jar", mAppMasterJar,
                    FILE);
            mAppMasterJar = resource.getFirst();
            localResources.put("HuskyAppMaster.jar", resource.getSecond());

            resource = constructLocalResource("HuskyMasterExec", mMasterExec, FILE);
            mMasterExec = resource.getFirst();
            localResources.put("HuskyMasterExec", resource.getSecond());

            resource = constructLocalResource("HuskyWorkerExec", mAppExec, FILE);
            mAppExec = resource.getFirst();
            localResources.put("HuskyAppExec", resource.getSecond());

            resource = constructLocalResource("HuskyConfigFile", mConfigFile, FILE);
            mConfigFile = resource.getFirst();
            localResources.put("HuskyConfigFile", resource.getSecond());

            StringBuilder builder = new StringBuilder();
            for (String i : mLocalFiles.split(",")) { // single file
                i = i.trim();
                if (!i.isEmpty()) {
                    Path path = new Path(i);
                    RemoteIterator<LocatedFileStatus> fileIter = mFileSystem.listFiles(path, true);
                    while (fileIter.hasNext()) {
                        LocatedFileStatus s = fileIter.next();
                        String name = s.getPath().getName();
                        resource = constructLocalResource(name, s.getPath().toString(), FILE);
                        localResources.put(name, resource.getSecond());
                        builder.append(resource.getFirst()).append(',');
                    }
                }
            }
            builder.setLength(Math.max(0, builder.length() - 1));
            mLocalFiles = builder.toString();

            builder.setLength(0);
            for (String i : mLocalArchives.split(",")) { // archive file
                i = i.trim();
                if (!i.isEmpty()) {
                    Path path = new Path(i);
                    RemoteIterator<LocatedFileStatus> fileIter = mFileSystem.listFiles(path, true);
                    while (fileIter.hasNext()) {
                        LocatedFileStatus s = fileIter.next();
                        String name = s.getPath().getName();
                        resource = constructLocalResource(name, s.getPath().toString(), ARCHIVE);
                        localResources.put(name, resource.getSecond());
                        builder.append(resource.getFirst()).append(',');
                    }
                }
            }
            builder.setLength(Math.max(0, builder.length() - 1));
            mLocalArchives = builder.toString();

            // delete the directory recursively on exit
            mFileSystem.deleteOnExit(new Path(mLocalResourceHDFSPaths + '/' + mAppName + '/' + mAppId));
        }
        return localResources;
    }

    private Map<String, String> getEnvironment() {
        String[] paths = mYarnConf.getStrings(YARN_APPLICATION_CLASSPATH, DEFAULT_YARN_APPLICATION_CLASSPATH);
        StringBuilder classpath = new StringBuilder();
        classpath.append("./*");
        for (String s : paths) {
            classpath.append(":").append(s);
        }
        return Collections.singletonMap("CLASSPATH", classpath.toString());
    }

    private boolean monitorApp() throws YarnException, IOException {
        while (true) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException ignore) {
            }

            ApplicationReport report = mYarnClient.getApplicationReport(mAppId);

            YarnApplicationState yarnState = report.getYarnApplicationState();
            FinalApplicationStatus appState = report.getFinalApplicationStatus();
            LOG.info("YarnState = " + yarnState + ", AppState = " + appState + ", Progress = "
                    + report.getProgress());

            if (yarnState == FINISHED) {
                if (appState == SUCCEEDED) {
                    LOG.info("Application completed successfully.");
                    return true;
                } else {
                    LOG.info("Application completed unsuccessfully. YarnState: " + yarnState.toString()
                            + ", ApplicationState: " + appState.toString());
                    return false;
                }
            } else if (yarnState == KILLED || yarnState == FAILED) {
                LOG.info("Application did not complete. YarnState: " + yarnState.toString() + ", ApplicationState: "
                        + appState.toString());
                return false;
            }
        }
    }

    private boolean run() throws YarnException, IOException {
        mYarnClient.start();

        YarnClientApplication app = mYarnClient.createApplication();

        ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
        appContext.setApplicationName(mAppName);

        Resource resource = Records.newRecord(Resource.class);
        resource.setMemory(mAppMasterMemory);
        resource.setVirtualCores(mNumVirtualCores);
        appContext.setResource(resource);

        mAppId = appContext.getApplicationId();

        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
        amContainer.setLocalResources(getLocalResources());
        amContainer.setEnvironment(getEnvironment());

        StringBuilder cmdBuilder = new StringBuilder();
        if (!mKeyTabFile.isEmpty()) {
            if (mUserName.isEmpty()) {
                throw new RuntimeException("Username is not given but is required by kinit");
            }
            cmdBuilder.append("kinit -kt ").append(mKeyTabFile).append(" -V ").append(mUserName).append(" && ");
        }
        cmdBuilder.append(JAVA_HOME.$()).append("/bin/java").append(" -Xmx").append(mAppMasterMemory).append("m ")
                .append(HuskyApplicationMaster.class.getName()).append(" --container_memory ")
                .append(mContainerMemory).append(" --container_vcores ").append(mNumVirtualCores)
                .append(" --app_priority ").append(mAppPriority).append(" --app_master_log_dir <LOG_DIR>")
                .append(" --master ").append(mMasterExec).append(" --application ").append(mAppExec)
                .append(" --config ").append(mConfigFile);
        if (!mLdLibraryPath.isEmpty()) {
            cmdBuilder.append(" --ld_library_path \"").append(mLdLibraryPath).append("\" ");
        }
        cmdBuilder.append(" --worker_infos ").append(mWorkerInfos.get(0).getFirst()).append(":")
                .append(mWorkerInfos.get(0).getSecond());
        for (int i = 1; i < mWorkerInfos.size(); i++) {
            cmdBuilder.append(',').append(mWorkerInfos.get(i).getFirst()).append(":")
                    .append(mWorkerInfos.get(i).getSecond());
        }
        if (!mLocalFiles.isEmpty()) {
            cmdBuilder.append(" --local_files \"").append(mLocalFiles).append("\"");
        }
        if (!mLocalArchives.isEmpty()) {
            cmdBuilder.append(" --local_archives \"").append(mLocalArchives).append("\"");
        }
        if (!mLogPathToHDFS.isEmpty()) {
            cmdBuilder.append(" --log_to_hdfs \"").append(mLogPathToHDFS).append("\"");
        }
        cmdBuilder.append(" 1>").append("<LOG_DIR>/HuskyAppMaster.stdout").append(" 2>")
                .append("<LOG_DIR>/HuskyAppMaster.stderr");
        if (!mLogPathToHDFS.isEmpty()) {
            cmdBuilder.append("; am_exit_code=$?").append("; hadoop fs -put -f <LOG_DIR>/HuskyAppMaster.stdout ")
                    .append(mLogPathToHDFS).append("; hadoop fs -put -f <LOG_DIR>/HuskyAppMaster.stderr ")
                    .append(mLogPathToHDFS).append("; exit \"$am_exit_code\"");
        }

        amContainer.setCommands(Collections.singletonList(cmdBuilder.toString()));

        LOG.info("Command: " + amContainer.getCommands().get(0));

        appContext.setAMContainerSpec(amContainer);

        mYarnClient.submitApplication(appContext);

        return monitorApp();
    }

    public static void main(String[] args) {
        LOG.info("Start running HuskyYarnClient");
        boolean result = false;
        try {
            HuskyYarnClient client = new HuskyYarnClient();
            try {
                // argument initialization errors
                if (!client.init(args)) {
                    System.exit(0);
                }
            } catch (Exception e) {
                LOG.log(Level.SEVERE, "Exception on parsing arguments", e);
                client.printUsage();
                System.exit(-1);
            }
            // runtime exceptions
            result = client.run();
        } catch (Exception e) {
            LOG.log(Level.SEVERE, "Exception on running application master", e);
            e.printStackTrace();
            System.exit(-1);
        }
        if (!result) {
            System.exit(-1);
        }
        System.exit(0);
    }
}