org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java Source code

Introduction

Here is the source code for org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java
Source

/*******************************************************************************
 *
 * Pentaho Big Data
 *
 * Copyright (C) 2002-2012 by Pentaho : http://www.pentaho.com
 *
 *******************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 ******************************************************************************/

package org.pentaho.di.job.entries.hadoopjobexecutor;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Method;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.TaskCompletionEvent;
import org.pentaho.di.cluster.SlaveServer;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.ResultFile;
import org.pentaho.di.core.annotations.JobEntry;
import org.pentaho.di.core.database.DatabaseMeta;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleXMLException;
import org.pentaho.di.core.logging.Log4jFileAppender;
import org.pentaho.di.core.logging.LogWriter;
import org.pentaho.di.core.xml.XMLHandler;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.job.entry.JobEntryBase;
import org.pentaho.di.job.entry.JobEntryInterface;
import org.pentaho.di.repository.ObjectId;
import org.pentaho.di.repository.Repository;
import org.pentaho.di.ui.job.entries.hadoopjobexecutor.UserDefinedItem;
import org.pentaho.hadoop.jobconf.HadoopConfigurer;
import org.pentaho.hadoop.jobconf.HadoopConfigurerFactory;
import org.w3c.dom.Node;

@JobEntry(id = "HadoopJobExecutorPlugin", name = "Hadoop Job Executor", categoryDescription = "Big Data", description = "Execute MapReduce jobs in Hadoop", image = "HDE.png")
public class JobEntryHadoopJobExecutor extends JobEntryBase implements Cloneable, JobEntryInterface {

    private static Class<?> PKG = JobEntryHadoopJobExecutor.class; // for i18n purposes, needed by Translator2!! $NON-NLS-1$

    private String hadoopJobName;

    private String jarUrl = "";

    private boolean isSimple = true;

    private String cmdLineArgs;

    private String outputKeyClass;
    private String outputValueClass;
    private String mapperClass;
    private String combinerClass;
    private String reducerClass;
    private String inputFormatClass;
    private String outputFormatClass;

    private String workingDirectory;

    private String hdfsHostname;
    private String hdfsPort;

    private String jobTrackerHostname;
    private String jobTrackerPort;

    private String inputPath;
    private String outputPath;

    private boolean blocking;
    private String loggingInterval = "60"; // 60 seconds default

    private String numMapTasks = "1";
    private String numReduceTasks = "1";

    private List<UserDefinedItem> userDefined = new ArrayList<UserDefinedItem>();

    private String hadoopDistribution = "generic";

    public String getHadoopJobName() {
        return hadoopJobName;
    }

    public void setHadoopJobName(String hadoopJobName) {
        this.hadoopJobName = hadoopJobName;
    }

    public String getJarUrl() {
        return jarUrl;
    }

    public void setJarUrl(String jarUrl) {
        this.jarUrl = jarUrl;
    }

    public boolean isSimple() {
        return isSimple;
    }

    public void setSimple(boolean isSimple) {
        this.isSimple = isSimple;
    }

    public String getCmdLineArgs() {
        return cmdLineArgs;
    }

    public void setCmdLineArgs(String cmdLineArgs) {
        this.cmdLineArgs = cmdLineArgs;
    }

    public String getOutputKeyClass() {
        return outputKeyClass;
    }

    public void setOutputKeyClass(String outputKeyClass) {
        this.outputKeyClass = outputKeyClass;
    }

    public String getOutputValueClass() {
        return outputValueClass;
    }

    public void setOutputValueClass(String outputValueClass) {
        this.outputValueClass = outputValueClass;
    }

    public String getMapperClass() {
        return mapperClass;
    }

    public void setMapperClass(String mapperClass) {
        this.mapperClass = mapperClass;
    }

    public String getCombinerClass() {
        return combinerClass;
    }

    public void setCombinerClass(String combinerClass) {
        this.combinerClass = combinerClass;
    }

    public String getReducerClass() {
        return reducerClass;
    }

    public void setReducerClass(String reducerClass) {
        this.reducerClass = reducerClass;
    }

    public String getInputFormatClass() {
        return inputFormatClass;
    }

    public void setInputFormatClass(String inputFormatClass) {
        this.inputFormatClass = inputFormatClass;
    }

    public String getOutputFormatClass() {
        return outputFormatClass;
    }

    public void setOutputFormatClass(String outputFormatClass) {
        this.outputFormatClass = outputFormatClass;
    }

    public String getWorkingDirectory() {
        return workingDirectory;
    }

    public void setWorkingDirectory(String workingDirectory) {
        this.workingDirectory = workingDirectory;
    }

    public String getHdfsHostname() {
        return hdfsHostname;
    }

    public void setHdfsHostname(String hdfsHostname) {
        this.hdfsHostname = hdfsHostname;
    }

    public String getHdfsPort() {
        return hdfsPort;
    }

    public void setHdfsPort(String hdfsPort) {
        this.hdfsPort = hdfsPort;
    }

    public String getJobTrackerHostname() {
        return jobTrackerHostname;
    }

    public void setJobTrackerHostname(String jobTrackerHostname) {
        this.jobTrackerHostname = jobTrackerHostname;
    }

    public String getJobTrackerPort() {
        return jobTrackerPort;
    }

    public void setJobTrackerPort(String jobTrackerPort) {
        this.jobTrackerPort = jobTrackerPort;
    }

    public String getInputPath() {
        return inputPath;
    }

    public void setInputPath(String inputPath) {
        this.inputPath = inputPath;
    }

    public String getOutputPath() {
        return outputPath;
    }

    public void setOutputPath(String outputPath) {
        this.outputPath = outputPath;
    }

    public boolean isBlocking() {
        return blocking;
    }

    public void setBlocking(boolean blocking) {
        this.blocking = blocking;
    }

    public String getLoggingInterval() {
        return loggingInterval;
    }

    public void setLoggingInterval(String loggingInterval) {
        this.loggingInterval = loggingInterval;
    }

    public List<UserDefinedItem> getUserDefined() {
        return userDefined;
    }

    public void setUserDefined(List<UserDefinedItem> userDefined) {
        this.userDefined = userDefined;
    }

    public String getNumMapTasks() {
        return numMapTasks;
    }

    public void setNumMapTasks(String numMapTasks) {
        this.numMapTasks = numMapTasks;
    }

    public String getNumReduceTasks() {
        return numReduceTasks;
    }

    public void setNumReduceTasks(String numReduceTasks) {
        this.numReduceTasks = numReduceTasks;
    }

    public void setHadoopDistribution(String hadoopDistro) {
        hadoopDistribution = hadoopDistro;
    }

    public String getHadoopDistribution() {
        return hadoopDistribution;
    }

    public Result execute(Result result, int arg1) throws KettleException {
        result.setNrErrors(0);

        Log4jFileAppender appender = null;
        String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$

        String hadoopDistro = System.getProperty("hadoop.distribution.name", hadoopDistribution);
        hadoopDistro = environmentSubstitute(hadoopDistro);
        if (Const.isEmpty(hadoopDistro)) {
            hadoopDistro = "generic";
        }

        try {
            appender = LogWriter.createFileAppender(logFileName, true, false);
            LogWriter.getInstance().addAppender(appender);
            log.setLogLevel(parentJob.getLogLevel());
        } catch (Exception e) {
            logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$
                    e.toString()));
            logError(Const.getStackTracker(e));
        }

        try {
            URL resolvedJarUrl = null;
            String jarUrlS = environmentSubstitute(jarUrl);
            if (jarUrlS.indexOf("://") == -1) {
                // default to file://
                File jarFile = new File(jarUrlS);
                resolvedJarUrl = jarFile.toURI().toURL();
            } else {
                resolvedJarUrl = new URL(jarUrlS);
            }

            final String cmdLineArgsS = environmentSubstitute(cmdLineArgs);

            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar",
                        resolvedJarUrl.toExternalForm()));

            if (isSimple) {
                /*      final AtomicInteger taskCount = new AtomicInteger(0);
                      final AtomicInteger successCount = new AtomicInteger(0);
                      final AtomicInteger failedCount = new AtomicInteger(0); */

                if (log.isDetailed())
                    logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode"));
                List<Class<?>> classesWithMains = JarUtility
                        .getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), getClass().getClassLoader());
                for (final Class<?> clazz : classesWithMains) {
                    Runnable r = new Runnable() {
                        public void run() {
                            try {
                                final ClassLoader cl = Thread.currentThread().getContextClassLoader();
                                try {
                                    //                  taskCount.incrementAndGet();
                                    Thread.currentThread().setContextClassLoader(clazz.getClassLoader());
                                    Method mainMethod = clazz.getMethod("main", new Class[] { String[].class });
                                    Object[] args = (cmdLineArgsS != null)
                                            ? new Object[] { cmdLineArgsS.split(" ") }
                                            : new Object[0];
                                    mainMethod.invoke(null, args);
                                } finally {
                                    Thread.currentThread().setContextClassLoader(cl);
                                    //                  successCount.incrementAndGet();
                                    //                  taskCount.decrementAndGet();
                                }
                            } catch (Throwable ignored) {
                                // skip, try the next one
                                //                logError(ignored.getMessage());
                                //                failedCount.incrementAndGet();
                                ignored.printStackTrace();
                            }
                        }
                    };
                    Thread t = new Thread(r);
                    t.start();
                }

                // uncomment to implement blocking
                /* if (blocking) {
                  while (taskCount.get() > 0 && !parentJob.isStopped()) {
                    Thread.sleep(1000);
                  }
                    
                  if (!parentJob.isStopped()) {
                    result.setResult(successCount.get() > 0);
                    result.setNrErrors((successCount.get() > 0) ? 0 : 1);
                  } else {
                    // we can't really know at this stage if 
                    // the hadoop job will finish successfully 
                    // because we have to stop now
                    result.setResult(true); // look on the bright side of life :-)...
                    result.setNrErrors(0);
                  }
                } else { */
                // non-blocking - just set success equal to no failures arising
                // from invocation
                //          result.setResult(failedCount.get() == 0);
                //          result.setNrErrors(failedCount.get());
                result.setResult(true);
                result.setNrErrors(0);
                /* } */
            } else {
                if (log.isDetailed())
                    logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode"));

                URL[] urls = new URL[] { resolvedJarUrl };
                URLClassLoader loader = new URLClassLoader(urls, getClass().getClassLoader());

                JobConf conf = new JobConf();
                String hadoopJobNameS = environmentSubstitute(hadoopJobName);
                conf.setJobName(hadoopJobNameS);

                String outputKeyClassS = environmentSubstitute(outputKeyClass);
                conf.setOutputKeyClass(loader.loadClass(outputKeyClassS));
                String outputValueClassS = environmentSubstitute(outputValueClass);
                conf.setOutputValueClass(loader.loadClass(outputValueClassS));

                if (mapperClass != null) {
                    String mapperClassS = environmentSubstitute(mapperClass);
                    Class<? extends Mapper> mapper = (Class<? extends Mapper>) loader.loadClass(mapperClassS);
                    conf.setMapperClass(mapper);
                }
                if (combinerClass != null) {
                    String combinerClassS = environmentSubstitute(combinerClass);
                    Class<? extends Reducer> combiner = (Class<? extends Reducer>) loader.loadClass(combinerClassS);
                    conf.setCombinerClass(combiner);
                }
                if (reducerClass != null) {
                    String reducerClassS = environmentSubstitute(reducerClass);
                    Class<? extends Reducer> reducer = (Class<? extends Reducer>) loader.loadClass(reducerClassS);
                    conf.setReducerClass(reducer);
                }

                if (inputFormatClass != null) {
                    String inputFormatClassS = environmentSubstitute(inputFormatClass);
                    Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) loader
                            .loadClass(inputFormatClassS);
                    conf.setInputFormat(inputFormat);
                }
                if (outputFormatClass != null) {
                    String outputFormatClassS = environmentSubstitute(outputFormatClass);
                    Class<? extends OutputFormat> outputFormat = (Class<? extends OutputFormat>) loader
                            .loadClass(outputFormatClassS);
                    conf.setOutputFormat(outputFormat);
                }

                String hdfsHostnameS = environmentSubstitute(hdfsHostname);
                String hdfsPortS = environmentSubstitute(hdfsPort);
                String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname);
                String jobTrackerPortS = environmentSubstitute(jobTrackerPort);

                // See if we can auto detect the distribution first
                HadoopConfigurer configurer = HadoopConfigurerFactory.locateConfigurer();

                if (configurer == null) {
                    // go with what has been selected by the user
                    configurer = HadoopConfigurerFactory.getConfigurer(hadoopDistro);

                    // if the user-specified distribution is detectable, make sure it is still
                    // the current distribution!
                    if (configurer != null && configurer.isDetectable()) {
                        if (!configurer.isAvailable()) {
                            throw new KettleException(BaseMessages.getString(PKG,
                                    "JobEntryHadoopJobExecutor.Error.DistroNoLongerPresent",
                                    configurer.distributionName()));
                        }
                    }
                }
                if (configurer == null) {
                    throw new KettleException(BaseMessages.getString(PKG,
                            "JobEntryHadoopJobExecutor.Error.UnknownHadoopDistribution", hadoopDistro));
                }
                logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Message.DistroConfigMessage",
                        configurer.distributionName()));

                List<String> configMessages = new ArrayList<String>();
                configurer.configure(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf,
                        configMessages);
                for (String m : configMessages) {
                    logBasic(m);
                }

                String inputPathS = environmentSubstitute(inputPath);
                String[] inputPathParts = inputPathS.split(",");
                List<Path> paths = new ArrayList<Path>();
                for (String path : inputPathParts) {
                    paths.add(new Path(configurer.getFilesystemURL() + path));
                }
                Path[] finalPaths = paths.toArray(new Path[paths.size()]);

                //FileInputFormat.setInputPaths(conf, new Path(configurer.getFilesystemURL() + inputPathS));
                FileInputFormat.setInputPaths(conf, finalPaths);
                String outputPathS = environmentSubstitute(outputPath);
                FileOutputFormat.setOutputPath(conf, new Path(configurer.getFilesystemURL() + outputPathS));

                // process user defined values
                for (UserDefinedItem item : userDefined) {
                    if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null
                            && !"".equals(item.getValue())) {
                        String nameS = environmentSubstitute(item.getName());
                        String valueS = environmentSubstitute(item.getValue());
                        conf.set(nameS, valueS);
                    }
                }

                String workingDirectoryS = environmentSubstitute(workingDirectory);
                conf.setWorkingDirectory(new Path(configurer.getFilesystemURL() + workingDirectoryS));
                conf.setJar(jarUrl);

                String numMapTasksS = environmentSubstitute(numMapTasks);
                String numReduceTasksS = environmentSubstitute(numReduceTasks);
                int numM = 1;
                try {
                    numM = Integer.parseInt(numMapTasksS);
                } catch (NumberFormatException e) {
                    logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num"
                            + "map tasks to 1");
                }
                int numR = 1;
                try {
                    numR = Integer.parseInt(numReduceTasksS);
                } catch (NumberFormatException e) {
                    logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num"
                            + "reduce tasks to 1");
                }

                conf.setNumMapTasks(numM);
                conf.setNumReduceTasks(numR);

                JobClient jobClient = new JobClient(conf);
                RunningJob runningJob = jobClient.submitJob(conf);

                String loggingIntervalS = environmentSubstitute(loggingInterval);
                int logIntv = 60;
                try {
                    logIntv = Integer.parseInt(loggingIntervalS);
                } catch (NumberFormatException e) {
                    logError("Can't parse logging interval '" + loggingIntervalS + "'. Setting "
                            + "logging interval to 60");
                }
                if (blocking) {
                    try {
                        int taskCompletionEventIndex = 0;
                        while (!parentJob.isStopped() && !runningJob.isComplete()) {
                            if (logIntv >= 1) {
                                printJobStatus(runningJob);
                                taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex);
                                Thread.sleep(logIntv * 1000);
                            } else {
                                Thread.sleep(60000);
                            }
                        }

                        if (parentJob.isStopped() && !runningJob.isComplete()) {
                            // We must stop the job running on Hadoop
                            runningJob.killJob();
                            // Indicate this job entry did not complete
                            result.setResult(false);
                        }

                        printJobStatus(runningJob);
                        // Log any messages we may have missed while polling
                        logTaskMessages(runningJob, taskCompletionEventIndex);
                    } catch (InterruptedException ie) {
                        logError(ie.getMessage(), ie);
                    }

                    // Entry is successful if the MR job is successful overall
                    result.setResult(runningJob.isSuccessful());
                }

            }
        } catch (Throwable t) {
            t.printStackTrace();
            result.setStopped(true);
            result.setNrErrors(1);
            result.setResult(false);
            logError(t.getMessage(), t);
        }

        if (appender != null) {
            LogWriter.getInstance().removeAppender(appender);
            appender.close();

            ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                    parentJob.getJobname(), getName());
            result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
        }

        return result;
    }

    /**
     * Log messages indicating completion (success/failure) of component tasks for the provided running job.
     *
     * @param runningJob Running job to poll for completion events
     * @param startIndex Start at this event index to poll from
     * @return Total events consumed
     * @throws IOException Error fetching events
     */
    private int logTaskMessages(RunningJob runningJob, int startIndex) throws IOException {
        TaskCompletionEvent[] tcEvents = runningJob.getTaskCompletionEvents(startIndex);
        for (int i = 0; i < tcEvents.length; i++) {
            String[] diags = runningJob.getTaskDiagnostics(tcEvents[i].getTaskAttemptId());
            StringBuilder diagsOutput = new StringBuilder();

            if (diags != null && diags.length > 0) {
                diagsOutput.append(Const.CR);
                for (String s : diags) {
                    diagsOutput.append(s);
                    diagsOutput.append(Const.CR);
                }
            }

            switch (tcEvents[i].getTaskStatus()) {
            case KILLED: {
                logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$
                        TaskCompletionEvent.Status.KILLED, tcEvents[i].getTaskAttemptId().getTaskID().getId(),
                        tcEvents[i].getTaskAttemptId().getId(), tcEvents[i].getEventId(), diagsOutput));
            }
                break;
            case FAILED: {
                logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$
                        TaskCompletionEvent.Status.FAILED, tcEvents[i].getTaskAttemptId().getTaskID().getId(),
                        tcEvents[i].getTaskAttemptId().getId(), tcEvents[i].getEventId(), diagsOutput));
            }
                break;
            case SUCCEEDED: {
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$
                        TaskCompletionEvent.Status.SUCCEEDED, tcEvents[i].getTaskAttemptId().getTaskID().getId(),
                        tcEvents[i].getTaskAttemptId().getId(), tcEvents[i].getEventId(), diagsOutput));
            }
                break;
            }
        }
        return tcEvents.length;
    }

    public void printJobStatus(RunningJob runningJob) throws IOException {
        if (log.isBasic()) {
            float setupPercent = runningJob.setupProgress() * 100f;
            float mapPercent = runningJob.mapProgress() * 100f;
            float reducePercent = runningJob.reduceProgress() * 100f;
            logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.RunningPercent", setupPercent,
                    mapPercent, reducePercent));
        }
    }

    public void loadXML(Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers,
            Repository rep) throws KettleXMLException {
        super.loadXML(entrynode, databases, slaveServers);
        hadoopJobName = XMLHandler.getTagValue(entrynode, "hadoop_job_name");
        if (!Const.isEmpty(XMLHandler.getTagValue(entrynode, "hadoop_distribution"))) {
            hadoopDistribution = XMLHandler.getTagValue(entrynode, "hadoop_distribution");
        }

        isSimple = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "simple"));
        jarUrl = XMLHandler.getTagValue(entrynode, "jar_url");
        cmdLineArgs = XMLHandler.getTagValue(entrynode, "command_line_args");
        blocking = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "blocking"));
        /*try {
          loggingInterval = Integer.parseInt(XMLHandler.getTagValue(entrynode, "logging_interval"));
        } catch (NumberFormatException nfe) {
        } */
        loggingInterval = XMLHandler.getTagValue(entrynode, "logging_interval");

        mapperClass = XMLHandler.getTagValue(entrynode, "mapper_class");
        combinerClass = XMLHandler.getTagValue(entrynode, "combiner_class");
        reducerClass = XMLHandler.getTagValue(entrynode, "reducer_class");
        inputPath = XMLHandler.getTagValue(entrynode, "input_path");
        inputFormatClass = XMLHandler.getTagValue(entrynode, "input_format_class");
        outputPath = XMLHandler.getTagValue(entrynode, "output_path");
        outputKeyClass = XMLHandler.getTagValue(entrynode, "output_key_class");
        outputValueClass = XMLHandler.getTagValue(entrynode, "output_value_class");
        outputFormatClass = XMLHandler.getTagValue(entrynode, "output_format_class");

        hdfsHostname = XMLHandler.getTagValue(entrynode, "hdfs_hostname");
        hdfsPort = XMLHandler.getTagValue(entrynode, "hdfs_port");
        jobTrackerHostname = XMLHandler.getTagValue(entrynode, "job_tracker_hostname");
        jobTrackerPort = XMLHandler.getTagValue(entrynode, "job_tracker_port");
        //numMapTasks = Integer.parseInt(XMLHandler.getTagValue(entrynode, "num_map_tasks"));
        numMapTasks = XMLHandler.getTagValue(entrynode, "num_map_tasks");
        //numReduceTasks = Integer.parseInt(XMLHandler.getTagValue(entrynode, "num_reduce_tasks"));
        numReduceTasks = XMLHandler.getTagValue(entrynode, "num_reduce_tasks");
        workingDirectory = XMLHandler.getTagValue(entrynode, "working_dir");

        // How many user defined elements?
        userDefined = new ArrayList<UserDefinedItem>();
        Node userDefinedList = XMLHandler.getSubNode(entrynode, "user_defined_list");
        int nrUserDefined = XMLHandler.countNodes(userDefinedList, "user_defined");
        for (int i = 0; i < nrUserDefined; i++) {
            Node userDefinedNode = XMLHandler.getSubNodeByNr(userDefinedList, "user_defined", i);
            String name = XMLHandler.getTagValue(userDefinedNode, "name");
            String value = XMLHandler.getTagValue(userDefinedNode, "value");
            UserDefinedItem item = new UserDefinedItem();
            item.setName(name);
            item.setValue(value);
            userDefined.add(item);
        }
    }

    public String getXML() {
        StringBuffer retval = new StringBuffer(1024);
        retval.append(super.getXML());
        retval.append("      ").append(XMLHandler.addTagValue("hadoop_job_name", hadoopJobName));
        retval.append("      ").append(XMLHandler.addTagValue("hadoop_distribution", hadoopDistribution));

        retval.append("      ").append(XMLHandler.addTagValue("simple", isSimple));
        retval.append("      ").append(XMLHandler.addTagValue("jar_url", jarUrl));
        retval.append("      ").append(XMLHandler.addTagValue("command_line_args", cmdLineArgs));
        retval.append("      ").append(XMLHandler.addTagValue("blocking", blocking));
        retval.append("      ").append(XMLHandler.addTagValue("logging_interval", loggingInterval));
        retval.append("      ").append(XMLHandler.addTagValue("hadoop_job_name", hadoopJobName));

        retval.append("      ").append(XMLHandler.addTagValue("mapper_class", mapperClass));
        retval.append("      ").append(XMLHandler.addTagValue("combiner_class", combinerClass));
        retval.append("      ").append(XMLHandler.addTagValue("reducer_class", reducerClass));
        retval.append("      ").append(XMLHandler.addTagValue("input_path", inputPath));
        retval.append("      ").append(XMLHandler.addTagValue("input_format_class", inputFormatClass));
        retval.append("      ").append(XMLHandler.addTagValue("output_path", outputPath));
        retval.append("      ").append(XMLHandler.addTagValue("output_key_class", outputKeyClass));
        retval.append("      ").append(XMLHandler.addTagValue("output_value_class", outputValueClass));
        retval.append("      ").append(XMLHandler.addTagValue("output_format_class", outputFormatClass));

        retval.append("      ").append(XMLHandler.addTagValue("hdfs_hostname", hdfsHostname));
        retval.append("      ").append(XMLHandler.addTagValue("hdfs_port", hdfsPort));
        retval.append("      ").append(XMLHandler.addTagValue("job_tracker_hostname", jobTrackerHostname));
        retval.append("      ").append(XMLHandler.addTagValue("job_tracker_port", jobTrackerPort));
        retval.append("      ").append(XMLHandler.addTagValue("num_map_tasks", numMapTasks));
        retval.append("      ").append(XMLHandler.addTagValue("num_reduce_tasks", numReduceTasks));
        retval.append("      ").append(XMLHandler.addTagValue("working_dir", workingDirectory));

        retval.append("      <user_defined_list>").append(Const.CR);
        if (userDefined != null) {
            for (UserDefinedItem item : userDefined) {
                if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null
                        && !"".equals(item.getValue())) {
                    retval.append("        <user_defined>").append(Const.CR);
                    retval.append("          ").append(XMLHandler.addTagValue("name", item.getName()));
                    retval.append("          ").append(XMLHandler.addTagValue("value", item.getValue()));
                    retval.append("        </user_defined>").append(Const.CR);
                }
            }
        }
        retval.append("      </user_defined_list>").append(Const.CR);
        return retval.toString();
    }

    public void loadRep(Repository rep, ObjectId id_jobentry, List<DatabaseMeta> databases,
            List<SlaveServer> slaveServers) throws KettleException {
        if (rep != null) {
            super.loadRep(rep, id_jobentry, databases, slaveServers);

            setHadoopJobName(rep.getJobEntryAttributeString(id_jobentry, "hadoop_job_name"));

            if (!Const.isEmpty(rep.getJobEntryAttributeString(id_jobentry, "hadoop_distribution"))) {
                setHadoopDistribution(rep.getJobEntryAttributeString(id_jobentry, "hadoop_distribution"));
            }

            setSimple(rep.getJobEntryAttributeBoolean(id_jobentry, "simple"));

            setJarUrl(rep.getJobEntryAttributeString(id_jobentry, "jar_url"));
            setCmdLineArgs(rep.getJobEntryAttributeString(id_jobentry, "command_line_args"));
            setBlocking(rep.getJobEntryAttributeBoolean(id_jobentry, "blocking"));
            //setLoggingInterval(new Long(rep.getJobEntryAttributeInteger(id_jobentry, "logging_interval")).intValue());
            setLoggingInterval(rep.getJobEntryAttributeString(id_jobentry, "logging_interval"));

            setMapperClass(rep.getJobEntryAttributeString(id_jobentry, "mapper_class"));
            setCombinerClass(rep.getJobEntryAttributeString(id_jobentry, "combiner_class"));
            setReducerClass(rep.getJobEntryAttributeString(id_jobentry, "reducer_class"));
            setInputPath(rep.getJobEntryAttributeString(id_jobentry, "input_path"));
            setInputFormatClass(rep.getJobEntryAttributeString(id_jobentry, "input_format_class"));
            setOutputPath(rep.getJobEntryAttributeString(id_jobentry, "output_path"));
            setOutputKeyClass(rep.getJobEntryAttributeString(id_jobentry, "output_key_class"));
            setOutputValueClass(rep.getJobEntryAttributeString(id_jobentry, "output_value_class"));
            setOutputFormatClass(rep.getJobEntryAttributeString(id_jobentry, "output_format_class"));

            setHdfsHostname(rep.getJobEntryAttributeString(id_jobentry, "hdfs_hostname"));
            setHdfsPort(rep.getJobEntryAttributeString(id_jobentry, "hdfs_port"));
            setJobTrackerHostname(rep.getJobEntryAttributeString(id_jobentry, "job_tracker_hostname"));
            setJobTrackerPort(rep.getJobEntryAttributeString(id_jobentry, "job_tracker_port"));
            //setNumMapTasks(new Long(rep.getJobEntryAttributeInteger(id_jobentry, "num_map_tasks")).intValue());
            setNumMapTasks(rep.getJobEntryAttributeString(id_jobentry, "num_map_tasks"));
            //      setNumReduceTasks(new Long(rep.getJobEntryAttributeInteger(id_jobentry, "num_reduce_tasks")).intValue());
            setNumReduceTasks(rep.getJobEntryAttributeString(id_jobentry, "num_reduce_tasks"));
            setWorkingDirectory(rep.getJobEntryAttributeString(id_jobentry, "working_dir"));

            int argnr = rep.countNrJobEntryAttributes(id_jobentry, "user_defined_name");//$NON-NLS-1$
            if (argnr > 0) {
                userDefined = new ArrayList<UserDefinedItem>();

                UserDefinedItem item = null;
                for (int i = 0; i < argnr; i++) {
                    item = new UserDefinedItem();
                    item.setName(rep.getJobEntryAttributeString(id_jobentry, i, "user_defined_name")); //$NON-NLS-1$
                    item.setValue(rep.getJobEntryAttributeString(id_jobentry, i, "user_defined_value")); //$NON-NLS-1$
                    userDefined.add(item);
                }
            }
        } else {
            throw new KettleException("Unable to save to a repository. The repository is null."); //$NON-NLS-1$
        }
    }

    public void saveRep(Repository rep, ObjectId id_job) throws KettleException {
        if (rep != null) {
            super.saveRep(rep, id_job);

            rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_job_name", hadoopJobName); //$NON-NLS-1$

            rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_distribution", hadoopDistribution); //$NON-NLS-1$

            rep.saveJobEntryAttribute(id_job, getObjectId(), "simple", isSimple); //$NON-NLS-1$

            rep.saveJobEntryAttribute(id_job, getObjectId(), "jar_url", jarUrl); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "command_line_args", cmdLineArgs); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "blocking", blocking); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "logging_interval", loggingInterval); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_job_name", hadoopJobName); //$NON-NLS-1$

            rep.saveJobEntryAttribute(id_job, getObjectId(), "mapper_class", mapperClass); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "combiner_class", combinerClass); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "reducer_class", reducerClass); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "input_path", inputPath); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "input_format_class", inputFormatClass); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "output_path", outputPath); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "output_key_class", outputKeyClass); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "output_value_class", outputValueClass); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "output_format_class", outputFormatClass); //$NON-NLS-1$

            rep.saveJobEntryAttribute(id_job, getObjectId(), "hdfs_hostname", hdfsHostname); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "hdfs_port", hdfsPort); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "job_tracker_hostname", jobTrackerHostname); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "job_tracker_port", jobTrackerPort); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "num_map_tasks", numMapTasks); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "num_reduce_tasks", numReduceTasks); //$NON-NLS-1$
            rep.saveJobEntryAttribute(id_job, getObjectId(), "working_dir", workingDirectory); //$NON-NLS-1$

            if (userDefined != null) {
                for (int i = 0; i < userDefined.size(); i++) {
                    UserDefinedItem item = userDefined.get(i);
                    if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null //$NON-NLS-1$
                            && !"".equals(item.getValue())) { //$NON-NLS-1$
                        rep.saveJobEntryAttribute(id_job, getObjectId(), i, "user_defined_name", item.getName()); //$NON-NLS-1$
                        rep.saveJobEntryAttribute(id_job, getObjectId(), i, "user_defined_value", item.getValue()); //$NON-NLS-1$
                    }
                }
            }

        } else {
            throw new KettleException("Unable to save to a repository. The repository is null."); //$NON-NLS-1$
        }
    }

    public boolean evaluates() {
        return true;
    }

    public boolean isUnconditional() {
        return true;
    }

}