com.linkedin.cubert.ScriptExecutor.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.cubert.ScriptExecutor.java

Source

/* (c) 2014 LinkedIn Corp. All rights reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */

package com.linkedin.cubert;

import com.linkedin.cubert.app.CmrExecutor;
import com.linkedin.cubert.app.ExecutionSpec;
import com.linkedin.cubert.utils.FileSystemUtils;
import org.apache.commons.cli.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

/**
 * Compiles and executes cubert scripts.
 *
 * @author Maneesh Varshney
 */
public class ScriptExecutor {
    private static String CUBERT_PROP_IDENTIFIER = "param.";

    private String azkabanJobName;
    private Properties azkabanProperties;

    public ScriptExecutor(String name, Properties props) {
        this.azkabanJobName = name;
        this.azkabanProperties = props;
    }

    /**
     * Called from Azkaban executor to launch job(s)
     *
     * @throws Exception
     */
    public void run() throws Exception {
        String scriptName = azkabanProperties.getProperty("cubert.script");
        if (scriptName == null)
            throw new IllegalArgumentException("Cubert script name is not provided.");

        String argsStr = azkabanProperties.getProperty("cubert.args");
        if (argsStr != null) {
            // TODO: fix this
            if (argsStr.contains("\"") || argsStr.contains("'"))
                throw new IllegalArgumentException(
                        "Oops! The parser does not support quotes in the args. Please use -f <params file> for now.");

            scriptName = scriptName + " " + argsStr;
        }

        String[] args = scriptName.split("\\s+");

        CommandLine cmdLine = getCommandLine(args);

        ExecutionSpec spec = getExecutionSpec(cmdLine);
        if (spec == null) {
            return;
        }

        CmrExecutor cmrExecutor = new CmrExecutor();
        cmrExecutor.run(spec, azkabanProperties);
    }

    /**
     * Called from Azkaban executor to terminate job(s)
     */
    public void cancel() {
        // TODO: kill launched jobs.
    }

    public static void main(String[] args) throws Exception {
        CommandLine cmdLine = getCommandLine(args);
        if (cmdLine == null)
            return;

        ExecutionSpec spec = getExecutionSpec(cmdLine);
        if (spec == null)
            return;

        Properties prop = getProperties(cmdLine, null);

        CmrExecutor cmrExecutor = new CmrExecutor();
        cmrExecutor.run(spec, prop);
    }

    private static ExecutionSpec getExecutionSpec(CommandLine cmdLine) throws IOException {
        String[] remainingArgs = cmdLine.getArgs();

        if (remainingArgs.length == 0) {
            System.err.println("Cubert script file not specified");
            return null;
        }

        String program = readFile(new File(remainingArgs[0]));

        ExecutionSpec spec = new ExecutionSpec(program);

        if (cmdLine.hasOption('P')) {
            String value = cmdLine.getOptionValue('P');

            FileSystem localFs = FileSystem.getLocal(new Configuration());

            for (String pathStr : value.split(":")) {
                List<Path> paths = FileSystemUtils.getPaths(localFs, new Path(pathStr));

                for (Path p : paths)
                    spec.addJar(p);
            }
        }

        if (cmdLine.hasOption("x")) {
            for (String job : cmdLine.getOptionValues("x"))
                spec.addJobs(job);
        }

        spec.setParallel(cmdLine.hasOption("parallel")).setDebugMode(cmdLine.hasOption("d"))
                .setProfileMode(cmdLine.hasOption("perf")).setPrintJson(cmdLine.hasOption("j"))
                .setDescribe(cmdLine.hasOption("describe")).setPreprocessOnly(cmdLine.hasOption("s"))
                .setParseOnly(cmdLine.hasOption("p")).setCompileOnly(cmdLine.hasOption("c"));

        return spec;
    }

    /**
     * Properties are collected in the following order--
     * <p/>
     * 1. Azkaban (or other) executor params
     * 2. properties passed through -f argument (for multiple order in CLI order)
     * 3. properties passed as -D arguments directly on CLI
     *
     * @param cmdLine
     * @param executorProps
     * @return
     * @throws URISyntaxException
     * @throws IOException
     */
    private static Properties getProperties(CommandLine cmdLine, Properties executorProps)
            throws URISyntaxException, IOException {
        Properties props = new Properties();

        // 1. Substitute executor params
        if (executorProps != null) {
            props.putAll(extractCubertParams(executorProps));
        }

        // 2. -f properties
        String[] propFiles = cmdLine.getOptionValues("f");
        if (propFiles != null && propFiles.length > 0) {
            for (String propFile : propFiles) {
                URI uri = new URI(propFile);
                boolean isHDFS = (uri.getScheme() != null) && uri.getScheme().equalsIgnoreCase("hdfs");
                String path = uri.getPath();
                if (isHDFS) {
                    props.load(new BufferedReader(
                            new InputStreamReader(FileSystem.get(new JobConf()).open(new Path(path)))));
                } else {
                    props.load(new BufferedReader(new FileReader(path)));
                }
            }
        }

        // 3. -D properties
        if (cmdLine.getOptionProperties("D").size() > 0) {
            props.putAll(cmdLine.getOptionProperties("D"));
        }
        return props;
    }

    private static Map<String, String> extractCubertParams(Properties executorProps) {
        Map<String, String> cubertParams = new HashMap<String, String>();

        int stripLen = CUBERT_PROP_IDENTIFIER.length();
        String regEx = CUBERT_PROP_IDENTIFIER + "*";

        for (String p : executorProps.stringPropertyNames()) {
            if (!p.matches(regEx))
                continue;

            String key = p.substring(stripLen, p.length());
            String value = executorProps.getProperty(p);

            cubertParams.put(key, value);
        }

        return cubertParams;
    }

    private static String readFile(File file) throws IOException {
        InputStream in = new FileInputStream(file);
        BufferedReader breader = new BufferedReader(new InputStreamReader(in));
        StringBuilder strBuilder = new StringBuilder();
        String line;
        while ((line = breader.readLine()) != null) {
            strBuilder.append(line);
            strBuilder.append("\n");
        }

        breader.close();

        return strBuilder.toString();
    }

    private static CommandLine getCommandLine(String[] args) throws ParseException {
        Options options = new Options();

        options.addOption("s", "preprocess", false, "show the script after preprocessing");
        options.addOption("j", "json", false, "show the plan in JSON");
        options.addOption("p", "parse", false, "stop after parsing");
        options.addOption("c", "compile", false, "stop after compilation");
        options.addOption("d", "debug", false, "print debuging information");
        options.addOption("perf", false, "enable performance profiling");
        options.addOption("h", "help", false, "shows this message");
        options.addOption(new Option("describe", "describe the schemas of output datasets"));

        options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("use given parameter file")
                .withLongOpt("param_file").create("f"));

        options.addOption(OptionBuilder.withArgName("lib path").hasArg()
                .withDescription("classpath to be uploaded to distributed cache").withLongOpt("cache_path")
                .create("P"));

        options.addOption(OptionBuilder.withArgName("property=value").hasArgs(2).withValueSeparator()
                .withDescription("use value for given property").create("D"));

        options.addOption(OptionBuilder.withArgName("job id/name").hasArgs()
                .withDescription("execute this job only").create("x"));

        options.addOption(new Option("parallel", "run independent jobs in parallel"));

        // create the parser
        CommandLineParser parser = new PosixParser();

        // parse the command line arguments
        CommandLine line = parser.parse(options, args);

        if (line.hasOption("h")) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("ScriptExecutor <cubert script file> [options]", options);
            return null;
        }

        return line;
    }
}