org.springframework.data.hadoop.impala.mapreduce.MapReduceCommands.java Source code

Java tutorial

Introduction

Here is the source code for org.springframework.data.hadoop.impala.mapreduce.MapReduceCommands.java

Source

/*
 * Copyright 2011-2012 the original author or authors.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.springframework.data.hadoop.impala.mapreduce;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.reflect.Array;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.List;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.hadoop.impala.common.ConfigurationAware;
import org.springframework.data.hadoop.impala.common.util.SecurityUtil;
import org.springframework.data.hadoop.impala.common.util.SecurityUtil.ExitTrappedException;
import org.springframework.shell.core.ExecutionProcessor;
import org.springframework.shell.core.annotation.CliCommand;
import org.springframework.shell.core.annotation.CliOption;
import org.springframework.shell.event.ParseResult;
import org.springframework.stereotype.Component;

/**
 * Commands to submit and interact with MapReduce jobs
 * 
 * @author Jarred Li
 * @author Author of <code>org.apache.hadoop.util.RunJar</code>
 */
@Component
public class MapReduceCommands extends ConfigurationAware implements ExecutionProcessor {

    private JobClient jobClient;

    private static final String PREFIX = "mr job ";

    @Autowired
    private SecurityUtil securityUtil;

    @Override
    public ParseResult beforeInvocation(ParseResult invocationContext) {
        invocationContext = super.beforeInvocation(invocationContext);
        String jobTracker = getHadoopConfiguration().get("mapred.job.tracker");
        if (jobTracker != null && jobTracker.length() > 0) {
            if (jobClient == null) {
                init();
            }
            String os = System.getProperty("os.name").toLowerCase();
            if (os.contains("win")) {
                org.apache.hadoop.mapreduce.JobSubmissionFiles.JOB_DIR_PERMISSION.fromShort((short) 0700);
                org.apache.hadoop.mapreduce.JobSubmissionFiles.JOB_FILE_PERMISSION.fromShort((short) 0644);
            }
            return invocationContext;
        } else {
            LOG.severe("You must set Job Tracker URL before run Map Reduce commands");
            throw new RuntimeException("You must set Job Tracker URL before run Map Reduce commands");
        }
    }

    public void init() {
        try {
            jobClient = new JobClient(new JobConf(getHadoopConfiguration()));
        } catch (IOException ex) {
            LOG.severe("Cannot create job client" + ex.getMessage());
        }
    }

    @Override
    protected String failedComponentName() {
        return "Map/Reduce";
    }

    @Override
    protected boolean configurationChanged() throws Exception {
        if (jobClient != null) {
            LOG.info("Hadoop configuration changed, re-initializing MR...");
        }
        init();
        return true;
    }

    @CliCommand(value = PREFIX + "submit", help = "Submit a Map Reduce job defined in the job file")
    public void submit(@CliOption(key = {
            "jobfile" }, mandatory = true, help = "the configuration file for MR job") final String jobFile) {
        List<String> argv = new ArrayList<String>();
        argv.add("-submit");
        argv.add(jobFile);
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = PREFIX + "status", help = "Query Map Reduce job status.")
    public void status(@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid) {
        List<String> argv = new ArrayList<String>();
        argv.add("-status");
        argv.add(jobid);
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = PREFIX + "counter", help = "Print the counter value of the MR job")
    public void counter(@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid,
            @CliOption(key = { "groupname" }, mandatory = true, help = "the job Id") final String groupName,
            @CliOption(key = { "countername" }, mandatory = true, help = "the job Id") final String counterName) {
        List<String> argv = new ArrayList<String>();
        argv.add("-counter");
        argv.add(jobid);
        argv.add(groupName);
        argv.add(counterName);
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = PREFIX + "kill", help = "Kill the Map Reduce job")
    public void kill(@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid) {
        List<String> argv = new ArrayList<String>();
        argv.add("-kill");
        argv.add(jobid);
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = PREFIX
            + "events", help = "Print the events' detail received by jobtracker for the given range")
    public void events(@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid,
            @CliOption(key = { "from" }, mandatory = true, help = "from event number") final String from,
            @CliOption(key = { "number" }, mandatory = true, help = "total number of events") final String number) {
        List<String> argv = new ArrayList<String>();
        argv.add("-events");
        argv.add(jobid);
        argv.add(from);
        argv.add(number);
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = PREFIX + "history", help = "Print job details, failed and killed job details")
    public void history(@CliOption(key = {
            "all" }, mandatory = false, specifiedDefaultValue = "true", unspecifiedDefaultValue = "false", help = "Whether print all information") final boolean all,
            @CliOption(key = { "" }, mandatory = true, help = "job output directory") final String outputDir) {
        List<String> argv = new ArrayList<String>();
        argv.add("-history");
        if (all) {
            argv.add("all");
        }
        argv.add(outputDir);
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = PREFIX + "list", help = "List the Map Reduce jobs")
    public void list(@CliOption(key = {
            "all" }, mandatory = false, specifiedDefaultValue = "true", unspecifiedDefaultValue = "false", help = "Whether list all jobs") final boolean all) {
        List<String> argv = new ArrayList<String>();
        argv.add("-list");
        if (all) {
            argv.add("all");
        }
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = "mr task kill", help = "Kill the Map Reduce task")
    public void killTask(
            @CliOption(key = { "taskid" }, mandatory = true, help = "the task Id") final String taskid) {
        List<String> argv = new ArrayList<String>();
        argv.add("-kill-task");
        argv.add(taskid);
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = "mr task fail", help = "Fail the Map Reduce task")
    public void failTask(
            @CliOption(key = { "taskid" }, mandatory = true, help = "the task Id") final String taskid) {
        List<String> argv = new ArrayList<String>();
        argv.add("-fail-task");
        argv.add(taskid);
        run(argv.toArray(new String[0]));
    }

    @CliCommand(value = PREFIX + "set priority", help = "Change the priority of the job")
    public void setPriority(@CliOption(key = { "jobid" }, mandatory = true, help = "the job Id") final String jobid,
            @CliOption(key = {
                    "priority" }, mandatory = true, help = "the job priority") final JobPriority priority) {
        List<String> argv = new ArrayList<String>();
        argv.add("-set-priority");
        argv.add(jobid);
        argv.add(priority.getValue());
        run(argv.toArray(new String[0]));
    }

    public enum JobPriority {
        VERY_HIGH("VERY_HIGH"), HIGH("HIGH"), NORML("NORMAL"), LOW("LOW"), VERY_LOW("VERY_LOW");

        private String val;

        JobPriority(String v) {
            this.val = v;
        }

        public String getValue() {
            return val;
        }
    }

    @CliCommand(value = "mr jar", help = "Run Map Reduce job in the jar")
    public void jar(
            @CliOption(key = { "jarfile" }, mandatory = true, help = "jar file name") final String jarFileName,
            @CliOption(key = "mainclass", mandatory = true, help = "main class name") final String mainClassName,
            @CliOption(key = "args", mandatory = false, help = "input path") final String args) {
        securityUtil.forbidSystemExitCall();
        try {
            runJar(jarFileName, mainClassName, args);
        } catch (ExitTrappedException e) {
            //LOG.info("The MR job call System.exit. This is prevented.");
        } catch (Throwable t) {
            LOG.severe("run MR job failed. Failed Message:" + t.getMessage());
        } finally {
            securityUtil.enableSystemExitCall();
        }
    }

    /**
     * @param jarFileName
     * @param mainClassName
     * @param args
     * @throws Throwable 
     */
    public void runJar(final String jarFileName, final String mainClassName, final String args) throws Throwable {
        File file = new File(jarFileName);
        File tmpDir = new File(new Configuration().get("hadoop.tmp.dir"));
        String os = System.getProperty("os.name").toLowerCase();
        if (os.contains("win")) {
            tmpDir = new File(System.getProperty("java.io.tmpdir"), "impala");
        }
        tmpDir.mkdirs();
        if (!tmpDir.isDirectory()) {
            LOG.severe("Mkdirs failed to create " + tmpDir);
        }

        try {
            final File workDir = File.createTempFile("hadoop-unjar", "", tmpDir);
            workDir.delete();
            workDir.mkdirs();
            if (!workDir.isDirectory()) {
                LOG.severe("Mkdirs failed to create " + workDir);
                return;
            }

            Runtime.getRuntime().addShutdownHook(new Thread() {
                public void run() {
                    try {
                        FileUtil.fullyDelete(workDir);
                    } catch (IOException e) {
                    }
                }
            });

            unJar(file, workDir);

            ArrayList<URL> classPath = new ArrayList<URL>();

            //This is to add hadoop configuration dir to classpath so that 
            //user's configuration can be accessed when running the jar
            File hadoopConfigurationDir = new File(workDir + Path.SEPARATOR + "impala-hadoop-configuration");
            writeHadoopConfiguration(hadoopConfigurationDir, this.getHadoopConfiguration());
            classPath.add(hadoopConfigurationDir.toURL());
            //classPath.add(new File(System.getenv("HADOOP_CONF_DIR")).toURL());

            classPath.add(new File(workDir + Path.SEPARATOR).toURL());
            classPath.add(file.toURL());
            classPath.add(new File(workDir, "classes" + Path.SEPARATOR).toURL());
            File[] libs = new File(workDir, "lib").listFiles();
            if (libs != null) {
                for (int i = 0; i < libs.length; i++) {
                    classPath.add(libs[i].toURL());
                }
            }
            ClassLoader loader = new URLClassLoader(classPath.toArray(new URL[0]),
                    this.getClass().getClassLoader());
            Thread.currentThread().setContextClassLoader(loader);
            Class<?> mainClass = Class.forName(mainClassName, true, loader);
            Method main = mainClass.getMethod("main",
                    new Class[] { Array.newInstance(String.class, 0).getClass() });
            String[] newArgs = args.split(" ");
            main.invoke(null, new Object[] { newArgs });
        } catch (Exception e) {
            if (e instanceof InvocationTargetException) {
                if (e.getCause() instanceof ExitTrappedException) {
                    throw (ExitTrappedException) e.getCause();
                }
            } else {
                throw e;
            }
        }
    }

    /**
     * wirte the Hadoop configuration to one directory, 
     * file name is "core-site.xml", "hdfs-site.xml" and "mapred-site.xml".
     * 
     * @param configDir the directory that the file be written
     * @param config Hadoop configuration
     * 
     */
    public void writeHadoopConfiguration(File configDir, Configuration config) {
        configDir.mkdirs();
        try {
            FileOutputStream fos = new FileOutputStream(new File(configDir + Path.SEPARATOR + "core-site.xml"));
            config.writeXml(fos);
            fos = new FileOutputStream(new File(configDir + Path.SEPARATOR + "hdfs-site.xml"));
            config.writeXml(fos);
            fos = new FileOutputStream(new File(configDir + Path.SEPARATOR + "mapred-site.xml"));
            config.writeXml(fos);
        } catch (Exception e) {
            LOG.severe("Save user's configuration failed. Message:" + e.getMessage());
        }

    }

    private void unJar(File jarFile, File toDir) throws Throwable {
        JarFile jar = new JarFile(jarFile);
        try {
            Enumeration entries = jar.entries();
            while (entries.hasMoreElements()) {
                JarEntry entry = (JarEntry) entries.nextElement();
                if (!entry.isDirectory()) {
                    InputStream in = jar.getInputStream(entry);
                    try {
                        File file = new File(toDir, entry.getName());
                        if (!file.getParentFile().mkdirs()) {
                            if (!file.getParentFile().isDirectory()) {
                                throw new IOException("Mkdirs failed to create " + file.getParentFile().toString());
                            }
                        }
                        OutputStream out = new FileOutputStream(file);
                        try {
                            byte[] buffer = new byte[8192];
                            int i;
                            while ((i = in.read(buffer)) != -1) {
                                out.write(buffer, 0, i);
                            }
                        } finally {
                            out.close();
                        }
                    } finally {
                        in.close();
                    }
                }
            }
        } catch (Throwable t) {
            throw t;
        } finally {
            jar.close();
        }
    }

    private void run(String[] argv) {
        try {
            jobClient.run(argv);
        } catch (Throwable t) {
            LOG.severe("run MR job failed. Failed Message:" + t.getMessage());
        }
    }

}