Java tutorial
/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.job.entries.hadoopjobexecutor; import java.io.File; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.MalformedURLException; import java.net.URL; import java.net.URLClassLoader; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.lang.StringUtils; import org.pentaho.di.cluster.SlaveServer; import org.pentaho.di.core.Const; import org.pentaho.di.core.Result; import org.pentaho.di.core.ResultFile; import org.pentaho.di.core.annotations.JobEntry; import org.pentaho.di.core.database.DatabaseMeta; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleXMLException; import org.pentaho.di.core.hadoop.HadoopConfigurationBootstrap; import org.pentaho.di.core.logging.Log4jFileAppender; import org.pentaho.di.core.logging.LogWriter; import org.pentaho.di.core.namedcluster.NamedClusterManager; import org.pentaho.di.core.namedcluster.model.NamedCluster; import org.pentaho.di.core.xml.XMLHandler; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.entry.JobEntryBase; import org.pentaho.di.job.entry.JobEntryInterface; import org.pentaho.di.repository.ObjectId; import org.pentaho.di.repository.Repository; import org.pentaho.di.ui.job.entries.hadoopjobexecutor.UserDefinedItem; import org.pentaho.hadoop.shim.ConfigurationException; import org.pentaho.hadoop.shim.HadoopConfiguration; import org.pentaho.hadoop.shim.api.Configuration; import org.pentaho.hadoop.shim.api.fs.FileSystem; import org.pentaho.hadoop.shim.api.fs.Path; import org.pentaho.hadoop.shim.api.mapred.RunningJob; import org.pentaho.hadoop.shim.api.mapred.TaskCompletionEvent; import org.pentaho.hadoop.shim.spi.HadoopShim; import org.pentaho.metastore.api.exceptions.MetaStoreException; import org.w3c.dom.Node; @JobEntry(id = "HadoopJobExecutorPlugin", image = "HDE.svg", name = "HadoopJobExecutorPlugin.Name", description = "HadoopJobExecutorPlugin.Description", categoryDescription = "i18n:org.pentaho.di.job:JobCategory.Category.BigData", i18nPackageName = "org.pentaho.di.job.entries.hadoopjobexecutor") public class JobEntryHadoopJobExecutor extends JobEntryBase implements Cloneable, JobEntryInterface { private static SecurityManagerStack smStack = new SecurityManagerStack(); private static final String DEFAULT_LOGGING_INTERVAL = "60"; private static Class<?> PKG = JobEntryHadoopJobExecutor.class; // for i18n purposes, needed by Translator2!! // $NON-NLS-1$ private JarUtility util = new JarUtility(); private String hadoopJobName; private String jarUrl = ""; private String driverClass = ""; private boolean isSimple = true; private String cmdLineArgs; private String outputKeyClass; private String outputValueClass; private String mapperClass; private String combinerClass; private String reducerClass; private String inputFormatClass; private String outputFormatClass; private String clusterName; private String hdfsHostname; private String hdfsPort; private String jobTrackerHostname; private String jobTrackerPort; private String inputPath; private String outputPath; private boolean blocking; private String loggingInterval = DEFAULT_LOGGING_INTERVAL; // 60 seconds default private boolean simpleBlocking; private String simpleLoggingInterval = loggingInterval; private String numMapTasks = "1"; private String numReduceTasks = "1"; private List<UserDefinedItem> userDefined = new ArrayList<UserDefinedItem>(); public String getHadoopJobName() { return hadoopJobName; } public void setHadoopJobName(String hadoopJobName) { this.hadoopJobName = hadoopJobName; } public String getJarUrl() { return jarUrl; } public void setJarUrl(String jarUrl) { this.jarUrl = jarUrl; } public String getDriverClass() { return driverClass; } public void setDriverClass(String driverClass) { this.driverClass = driverClass; } public boolean isSimple() { return isSimple; } public void setSimple(boolean isSimple) { this.isSimple = isSimple; } public String getCmdLineArgs() { return cmdLineArgs; } public void setCmdLineArgs(String cmdLineArgs) { this.cmdLineArgs = cmdLineArgs; } public String getOutputKeyClass() { return outputKeyClass; } public void setOutputKeyClass(String outputKeyClass) { this.outputKeyClass = outputKeyClass; } public String getOutputValueClass() { return outputValueClass; } public void setOutputValueClass(String outputValueClass) { this.outputValueClass = outputValueClass; } public String getMapperClass() { return mapperClass; } public void setMapperClass(String mapperClass) { this.mapperClass = mapperClass; } public String getCombinerClass() { return combinerClass; } public void setCombinerClass(String combinerClass) { this.combinerClass = combinerClass; } public String getReducerClass() { return reducerClass; } public void setReducerClass(String reducerClass) { this.reducerClass = reducerClass; } public String getInputFormatClass() { return inputFormatClass; } public void setInputFormatClass(String inputFormatClass) { this.inputFormatClass = inputFormatClass; } public String getOutputFormatClass() { return outputFormatClass; } public void setOutputFormatClass(String outputFormatClass) { this.outputFormatClass = outputFormatClass; } public String getClusterName() { return clusterName; } public void setClusterName(String clusterName) { this.clusterName = clusterName; } public String getHdfsHostname() { return hdfsHostname; } public void setHdfsHostname(String hdfsHostname) { this.hdfsHostname = hdfsHostname; } public String getHdfsPort() { return hdfsPort; } public void setHdfsPort(String hdfsPort) { this.hdfsPort = hdfsPort; } public String getJobTrackerHostname() { return jobTrackerHostname; } public void setJobTrackerHostname(String jobTrackerHostname) { this.jobTrackerHostname = jobTrackerHostname; } public String getJobTrackerPort() { return jobTrackerPort; } public void setJobTrackerPort(String jobTrackerPort) { this.jobTrackerPort = jobTrackerPort; } public String getInputPath() { return inputPath; } public void setInputPath(String inputPath) { this.inputPath = inputPath; } public String getOutputPath() { return outputPath; } public void setOutputPath(String outputPath) { this.outputPath = outputPath; } public boolean isBlocking() { return blocking; } public void setBlocking(boolean blocking) { this.blocking = blocking; } public String getLoggingInterval() { return loggingInterval == null ? DEFAULT_LOGGING_INTERVAL : loggingInterval; } public void setLoggingInterval(String loggingInterval) { this.loggingInterval = loggingInterval; } public List<UserDefinedItem> getUserDefined() { return userDefined; } public void setUserDefined(List<UserDefinedItem> userDefined) { this.userDefined = userDefined; } public String getNumMapTasks() { return numMapTasks; } public void setNumMapTasks(String numMapTasks) { this.numMapTasks = numMapTasks; } public String getNumReduceTasks() { return numReduceTasks; } public void setNumReduceTasks(String numReduceTasks) { this.numReduceTasks = numReduceTasks; } /** * Restore the security manager if we're done executing all our threads. * * @param counter * Thread counter * @param nesm * Security Manager we set */ private void restoreSecurityManager(AtomicInteger counter, NoExitSecurityManager nesm) { if (counter.decrementAndGet() == 0) { // Restore the cached security manager after all threads have completed smStack.removeSecurityManager(nesm); } } public Result execute(final Result result, int arg1) throws KettleException { result.setNrErrors(0); Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$ e.toString())); logError(Const.getStackTracker(e)); } try { URL resolvedJarUrl = resolveJarUrl(jarUrl); if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar", resolvedJarUrl.toExternalForm())); } HadoopShim shim = getHadoopConfiguration().getHadoopShim(); if (isSimple) { String simpleLoggingIntervalS = environmentSubstitute(getSimpleLoggingInterval()); int simpleLogInt = 60; try { simpleLogInt = Integer.parseInt(simpleLoggingIntervalS, 10); } catch (NumberFormatException e) { logError(BaseMessages.getString(PKG, "ErrorParsingLogInterval", simpleLoggingIntervalS, simpleLogInt)); } final Class<?> mainClass = locateDriverClass(resolvedJarUrl, shim); if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.UsingDriverClass", mainClass == null ? "null" : mainClass.getName())); logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode")); } final AtomicInteger threads = new AtomicInteger(1); final NoExitSecurityManager nesm = new NoExitSecurityManager(System.getSecurityManager()); smStack.setSecurityManager(nesm); try { Runnable r = new Runnable() { public void run() { try { try { executeMainMethod(mainClass); } finally { restoreSecurityManager(threads, nesm); } } catch (NoExitSecurityManager.NoExitSecurityException ex) { // Only log if we're blocking and waiting for this to complete if (simpleBlocking) { logExitStatus(result, mainClass, ex); } } catch (InvocationTargetException ex) { if (ex.getTargetException() instanceof NoExitSecurityManager.NoExitSecurityException) { // Only log if we're blocking and waiting for this to complete if (simpleBlocking) { logExitStatus(result, mainClass, (NoExitSecurityManager.NoExitSecurityException) ex .getTargetException()); } } else { throw new RuntimeException(ex); } } catch (Exception ex) { throw new RuntimeException(ex); } } }; Thread t = new Thread(r); t.setDaemon(true); t.setUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { restoreSecurityManager(threads, nesm); if (simpleBlocking) { // Only log if we're blocking and waiting for this to complete logError(BaseMessages.getString(JobEntryHadoopJobExecutor.class, "JobEntryHadoopJobExecutor.ErrorExecutingClass", mainClass.getName()), e); result.setResult(false); } } }); nesm.addBlockedThread(t); t.start(); if (simpleBlocking) { // wait until the thread is done do { logDetailed(BaseMessages.getString(JobEntryHadoopJobExecutor.class, "JobEntryHadoopJobExecutor.Blocking", mainClass.getName())); t.join(simpleLogInt * 1000); } while (!parentJob.isStopped() && t.isAlive()); if (t.isAlive()) { // Kill thread if it's still running. The job must have been stopped. t.interrupt(); } } } finally { // If we're not performing simple blocking spawn a watchdog thread to restore the security manager when all // threads are complete if (!simpleBlocking) { Runnable threadWatchdog = new Runnable() { @Override public void run() { while (threads.get() > 0) { try { Thread.sleep(100); } catch (InterruptedException e) { /* ignore */ } } restoreSecurityManager(threads, nesm); } }; Thread watchdog = new Thread(threadWatchdog); watchdog.setDaemon(true); watchdog.start(); } } } else { if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode")); } Configuration conf = shim.createConfiguration(); FileSystem fs = shim.getFileSystem(conf); URL[] urls = new URL[] { resolvedJarUrl }; URLClassLoader loader = new URLClassLoader(urls, shim.getClass().getClassLoader()); String hadoopJobNameS = environmentSubstitute(hadoopJobName); conf.setJobName(hadoopJobNameS); String outputKeyClassS = environmentSubstitute(outputKeyClass); conf.setOutputKeyClass(loader.loadClass(outputKeyClassS)); String outputValueClassS = environmentSubstitute(outputValueClass); conf.setOutputValueClass(loader.loadClass(outputValueClassS)); if (mapperClass != null) { String mapperClassS = environmentSubstitute(mapperClass); Class<?> mapper = loader.loadClass(mapperClassS); conf.setMapperClass(mapper); } if (combinerClass != null) { String combinerClassS = environmentSubstitute(combinerClass); Class<?> combiner = loader.loadClass(combinerClassS); conf.setCombinerClass(combiner); } if (reducerClass != null) { String reducerClassS = environmentSubstitute(reducerClass); Class<?> reducer = loader.loadClass(reducerClassS); conf.setReducerClass(reducer); } if (inputFormatClass != null) { String inputFormatClassS = environmentSubstitute(inputFormatClass); Class<?> inputFormat = loader.loadClass(inputFormatClassS); conf.setInputFormat(inputFormat); } if (outputFormatClass != null) { String outputFormatClassS = environmentSubstitute(outputFormatClass); Class<?> outputFormat = loader.loadClass(outputFormatClassS); conf.setOutputFormat(outputFormat); } String hdfsHostnameS = environmentSubstitute(hdfsHostname); String hdfsPortS = environmentSubstitute(hdfsPort); String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname); String jobTrackerPortS = environmentSubstitute(jobTrackerPort); List<String> configMessages = new ArrayList<String>(); shim.configureConnectionInformation(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf, configMessages); for (String m : configMessages) { logBasic(m); } String inputPathS = environmentSubstitute(inputPath); String[] inputPathParts = inputPathS.split(","); List<Path> paths = new ArrayList<Path>(); for (String path : inputPathParts) { paths.add(fs.asPath(conf.getDefaultFileSystemURL(), path)); } Path[] finalPaths = paths.toArray(new Path[paths.size()]); conf.setInputPaths(finalPaths); String outputPathS = environmentSubstitute(outputPath); conf.setOutputPath(fs.asPath(conf.getDefaultFileSystemURL(), outputPathS)); // process user defined values for (UserDefinedItem item : userDefined) { if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null && !"".equals(item.getValue())) { String nameS = environmentSubstitute(item.getName()); String valueS = environmentSubstitute(item.getValue()); conf.set(nameS, valueS); } } conf.setJar(environmentSubstitute(jarUrl)); String numMapTasksS = environmentSubstitute(numMapTasks); String numReduceTasksS = environmentSubstitute(numReduceTasks); int numM = 1; try { numM = Integer.parseInt(numMapTasksS); } catch (NumberFormatException e) { logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num" + "map tasks to 1"); } int numR = 1; try { numR = Integer.parseInt(numReduceTasksS); } catch (NumberFormatException e) { logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num" + "reduce tasks to 1"); } conf.setNumMapTasks(numM); conf.setNumReduceTasks(numR); RunningJob runningJob = shim.submitJob(conf); String loggingIntervalS = environmentSubstitute(getLoggingInterval()); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException e) { logError(BaseMessages.getString(PKG, "ErrorParsingLogInterval", loggingIntervalS, logIntv)); } if (blocking) { try { int taskCompletionEventIndex = 0; while (!parentJob.isStopped() && !runningJob.isComplete()) { if (logIntv >= 1) { printJobStatus(runningJob); taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex); Thread.sleep(logIntv * 1000); } else { Thread.sleep(60000); } } if (parentJob.isStopped() && !runningJob.isComplete()) { // We must stop the job running on Hadoop runningJob.killJob(); // Indicate this job entry did not complete result.setResult(false); } printJobStatus(runningJob); // Log any messages we may have missed while polling logTaskMessages(runningJob, taskCompletionEventIndex); } catch (InterruptedException ie) { logError(ie.getMessage(), ie); } // Entry is successful if the MR job is successful overall result.setResult(runningJob.isSuccessful()); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; } private Class<?> locateDriverClass(final URL resolvedJarUrl, final HadoopShim shim) throws IOException, ClassNotFoundException { if (Const.isEmpty(driverClass)) { Class<?> mainClass = util.getMainClassFromManifest(resolvedJarUrl, shim.getClass().getClassLoader()); if (mainClass == null) { List<Class<?>> mainClasses = util.getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), shim.getClass().getClassLoader()); if (mainClasses.size() == 1) { return mainClasses.get(0); } else if (mainClasses.isEmpty()) { throw new RuntimeException(BaseMessages.getString(PKG, "ErrorDriverClassNotSpecified")); } else { throw new RuntimeException(BaseMessages.getString(PKG, "ErrorMultipleDriverClasses")); } } return mainClass; } else { return util.getClassByName(environmentSubstitute(getDriverClass()), resolvedJarUrl, shim.getClass().getClassLoader()); } } public URL resolveJarUrl(final String jarUrl) throws MalformedURLException { String jarUrlS = environmentSubstitute(jarUrl); if (jarUrlS.indexOf("://") == -1) { // default to file:// File jarFile = new File(jarUrlS); return jarFile.toURI().toURL(); } else { return new URL(jarUrlS); } } /** * Log messages indicating completion (success/failure) of component tasks for the provided running job. * * @param runningJob * Running job to poll for completion events * @param startIndex * Start at this event index to poll from * @return Total events consumed * @throws IOException * Error fetching events */ private int logTaskMessages(RunningJob runningJob, int startIndex) throws IOException { TaskCompletionEvent[] tcEvents = runningJob.getTaskCompletionEvents(startIndex); for (int i = 0; i < tcEvents.length; i++) { String[] diags = runningJob.getTaskDiagnostics(tcEvents[i].getTaskAttemptId()); StringBuilder diagsOutput = new StringBuilder(); if (diags != null && diags.length > 0) { diagsOutput.append(Const.CR); for (String s : diags) { diagsOutput.append(s); diagsOutput.append(Const.CR); } } switch (tcEvents[i].getTaskStatus()) { case KILLED: logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$ TaskCompletionEvent.Status.KILLED, tcEvents[i].getTaskAttemptId(), tcEvents[i].getTaskAttemptId(), tcEvents[i].getEventId(), diagsOutput)); break; case FAILED: logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$ TaskCompletionEvent.Status.FAILED, tcEvents[i].getTaskAttemptId(), tcEvents[i].getTaskAttemptId(), tcEvents[i].getEventId(), diagsOutput)); break; case SUCCEEDED: logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.TaskDetails", //$NON-NLS-1$ TaskCompletionEvent.Status.SUCCEEDED, tcEvents[i].getTaskAttemptId(), tcEvents[i].getTaskAttemptId(), tcEvents[i].getEventId(), diagsOutput)); break; } } return tcEvents.length; } /** * Log the status of an attempt to exit the JVM while executing the provided class' main method. * * @param result * Result to update with failure condition if exit status code was not 0 * @param mainClass * Main class we were executing * @param ex * Exception caught while executing the class provided */ private void logExitStatus(Result result, Class<?> mainClass, NoExitSecurityManager.NoExitSecurityException ex) { // Only error if exit code is not 0 if (ex.getStatus() != 0) { result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToExecuteClass", mainClass.getName(), ex.getStatus())); } } /** * Execute the main method of the provided class with the current command line arguments. * * @param clazz * Class with main method to execute * @throws NoSuchMethodException * @throws IllegalAccessException * @throws InvocationTargetException */ protected void executeMainMethod(Class<?> clazz) throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { final ClassLoader cl = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(clazz.getClassLoader()); Method mainMethod = clazz.getMethod("main", new Class[] { String[].class }); String commandLineArgs = environmentSubstitute(cmdLineArgs); Object[] args = (commandLineArgs != null) ? new Object[] { commandLineArgs.split(" ") } : new Object[0]; mainMethod.invoke(null, args); } finally { Thread.currentThread().setContextClassLoader(cl); } } public void printJobStatus(RunningJob runningJob) throws IOException { if (log.isBasic()) { float setupPercent = runningJob.setupProgress() * 100f; float mapPercent = runningJob.mapProgress() * 100f; float reducePercent = runningJob.reduceProgress() * 100f; logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.RunningPercent", setupPercent, mapPercent, reducePercent)); } } public void loadXML(Node entrynode, List<DatabaseMeta> databases, List<SlaveServer> slaveServers, Repository rep) throws KettleXMLException { super.loadXML(entrynode, databases, slaveServers); hadoopJobName = XMLHandler.getTagValue(entrynode, "hadoop_job_name"); isSimple = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "simple")); jarUrl = XMLHandler.getTagValue(entrynode, "jar_url"); driverClass = XMLHandler.getTagValue(entrynode, "driver_class"); cmdLineArgs = XMLHandler.getTagValue(entrynode, "command_line_args"); simpleBlocking = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "simple_blocking")); blocking = "Y".equalsIgnoreCase(XMLHandler.getTagValue(entrynode, "blocking")); simpleLoggingInterval = XMLHandler.getTagValue(entrynode, "simple_logging_interval"); loggingInterval = XMLHandler.getTagValue(entrynode, "logging_interval"); mapperClass = XMLHandler.getTagValue(entrynode, "mapper_class"); combinerClass = XMLHandler.getTagValue(entrynode, "combiner_class"); reducerClass = XMLHandler.getTagValue(entrynode, "reducer_class"); inputPath = XMLHandler.getTagValue(entrynode, "input_path"); inputFormatClass = XMLHandler.getTagValue(entrynode, "input_format_class"); outputPath = XMLHandler.getTagValue(entrynode, "output_path"); outputKeyClass = XMLHandler.getTagValue(entrynode, "output_key_class"); outputValueClass = XMLHandler.getTagValue(entrynode, "output_value_class"); outputFormatClass = XMLHandler.getTagValue(entrynode, "output_format_class"); loadClusterConfig(null, rep, entrynode); setRepository(rep); // numMapTasks = Integer.parseInt(XMLHandler.getTagValue(entrynode, "num_map_tasks")); numMapTasks = XMLHandler.getTagValue(entrynode, "num_map_tasks"); // numReduceTasks = Integer.parseInt(XMLHandler.getTagValue(entrynode, "num_reduce_tasks")); numReduceTasks = XMLHandler.getTagValue(entrynode, "num_reduce_tasks"); // How many user defined elements? userDefined = new ArrayList<UserDefinedItem>(); Node userDefinedList = XMLHandler.getSubNode(entrynode, "user_defined_list"); int nrUserDefined = XMLHandler.countNodes(userDefinedList, "user_defined"); for (int i = 0; i < nrUserDefined; i++) { Node userDefinedNode = XMLHandler.getSubNodeByNr(userDefinedList, "user_defined", i); String name = XMLHandler.getTagValue(userDefinedNode, "name"); String value = XMLHandler.getTagValue(userDefinedNode, "value"); UserDefinedItem item = new UserDefinedItem(); item.setName(name); item.setValue(value); userDefined.add(item); } } private void loadClusterConfig(ObjectId id_jobentry, Repository rep, Node entrynode) { boolean configLoaded = false; try { // attempt to load from named cluster if (entrynode != null) { setClusterName(XMLHandler.getTagValue(entrynode, "cluster_name")); //$NON-NLS-1$ } else if (rep != null) { setClusterName(rep.getJobEntryAttributeString(id_jobentry, "cluster_name")); //$NON-NLS-1$ //$NON-NLS-2$ } // load from system first, then fall back to copy stored with job (AbstractMeta) NamedCluster nc = null; if (rep != null && !StringUtils.isEmpty(getClusterName()) && NamedClusterManager.getInstance().contains(getClusterName(), rep.getMetaStore())) { // pull config from NamedCluster nc = NamedClusterManager.getInstance().read(getClusterName(), rep.getMetaStore()); } if (nc != null) { setJobTrackerHostname(nc.getJobTrackerHost()); setJobTrackerPort(nc.getJobTrackerPort()); setHdfsHostname(nc.getHdfsHost()); setHdfsPort(nc.getHdfsPort()); configLoaded = true; } } catch (Throwable t) { logDebug(t.getMessage(), t); } if (!configLoaded) { if (entrynode != null) { // load default values for cluster & legacy fallback setHdfsHostname(XMLHandler.getTagValue(entrynode, "hdfs_hostname")); //$NON-NLS-1$ setHdfsPort(XMLHandler.getTagValue(entrynode, "hdfs_port")); //$NON-NLS-1$ setJobTrackerHostname(XMLHandler.getTagValue(entrynode, "job_tracker_hostname")); //$NON-NLS-1$ setJobTrackerPort(XMLHandler.getTagValue(entrynode, "job_tracker_port")); //$NON-NLS-1$ } else if (rep != null) { // load default values for cluster & legacy fallback try { setHdfsHostname(rep.getJobEntryAttributeString(id_jobentry, "hdfs_hostname")); setHdfsPort(rep.getJobEntryAttributeString(id_jobentry, "hdfs_port")); //$NON-NLS-1$ setJobTrackerHostname(rep.getJobEntryAttributeString(id_jobentry, "job_tracker_hostname")); //$NON-NLS-1$ setJobTrackerPort(rep.getJobEntryAttributeString(id_jobentry, "job_tracker_port")); //$NON-NLS-1$ } catch (KettleException ke) { logError(ke.getMessage(), ke); } } } } public String getXML() { StringBuffer retval = new StringBuffer(1024); retval.append(super.getXML()); retval.append(" ").append(XMLHandler.addTagValue("hadoop_job_name", hadoopJobName)); retval.append(" ").append(XMLHandler.addTagValue("simple", isSimple)); retval.append(" ").append(XMLHandler.addTagValue("jar_url", jarUrl)); retval.append(" ").append(XMLHandler.addTagValue("driver_class", driverClass)); retval.append(" ").append(XMLHandler.addTagValue("command_line_args", cmdLineArgs)); retval.append(" ").append(XMLHandler.addTagValue("simple_blocking", simpleBlocking)); retval.append(" ").append(XMLHandler.addTagValue("blocking", blocking)); retval.append(" ").append(XMLHandler.addTagValue("logging_interval", loggingInterval)); retval.append(" ").append(XMLHandler.addTagValue("simple_logging_interval", simpleLoggingInterval)); retval.append(" ").append(XMLHandler.addTagValue("hadoop_job_name", hadoopJobName)); retval.append(" ").append(XMLHandler.addTagValue("mapper_class", mapperClass)); retval.append(" ").append(XMLHandler.addTagValue("combiner_class", combinerClass)); retval.append(" ").append(XMLHandler.addTagValue("reducer_class", reducerClass)); retval.append(" ").append(XMLHandler.addTagValue("input_path", inputPath)); retval.append(" ").append(XMLHandler.addTagValue("input_format_class", inputFormatClass)); retval.append(" ").append(XMLHandler.addTagValue("output_path", outputPath)); retval.append(" ").append(XMLHandler.addTagValue("output_key_class", outputKeyClass)); retval.append(" ").append(XMLHandler.addTagValue("output_value_class", outputValueClass)); retval.append(" ").append(XMLHandler.addTagValue("output_format_class", outputFormatClass)); retval.append(" ").append(XMLHandler.addTagValue("cluster_name", clusterName)); //$NON-NLS-1$ //$NON-NLS-2$ try { if (rep != null && !StringUtils.isEmpty(getClusterName()) && NamedClusterManager.getInstance().contains(getClusterName(), rep.getMetaStore())) { // pull config from NamedCluster NamedCluster nc = NamedClusterManager.getInstance().read(getClusterName(), rep.getMetaStore()); setJobTrackerHostname(nc.getJobTrackerHost()); setJobTrackerPort(nc.getJobTrackerPort()); setHdfsHostname(nc.getHdfsHost()); setHdfsPort(nc.getHdfsPort()); } } catch (MetaStoreException e) { logDebug(e.getMessage(), e); } retval.append(" ").append(XMLHandler.addTagValue("hdfs_hostname", hdfsHostname)); //$NON-NLS-1$ //$NON-NLS-2$ retval.append(" ").append(XMLHandler.addTagValue("hdfs_port", hdfsPort)); //$NON-NLS-1$ //$NON-NLS-2$ retval.append(" ").append(XMLHandler.addTagValue("job_tracker_hostname", jobTrackerHostname)); //$NON-NLS-1$ //$NON-NLS-2$ retval.append(" ").append(XMLHandler.addTagValue("job_tracker_port", jobTrackerPort)); //$NON-NLS-1$ //$NON-NLS-2$ retval.append(" ").append(XMLHandler.addTagValue("num_map_tasks", numMapTasks)); retval.append(" ").append(XMLHandler.addTagValue("num_reduce_tasks", numReduceTasks)); retval.append(" <user_defined_list>").append(Const.CR); if (userDefined != null) { for (UserDefinedItem item : userDefined) { if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null && !"".equals(item.getValue())) { retval.append(" <user_defined>").append(Const.CR); retval.append(" ").append(XMLHandler.addTagValue("name", item.getName())); retval.append(" ").append(XMLHandler.addTagValue("value", item.getValue())); retval.append(" </user_defined>").append(Const.CR); } } } retval.append(" </user_defined_list>").append(Const.CR); return retval.toString(); } public void loadRep(Repository rep, ObjectId id_jobentry, List<DatabaseMeta> databases, List<SlaveServer> slaveServers) throws KettleException { if (rep != null) { super.loadRep(rep, id_jobentry, databases, slaveServers); setHadoopJobName(rep.getJobEntryAttributeString(id_jobentry, "hadoop_job_name")); setSimple(rep.getJobEntryAttributeBoolean(id_jobentry, "simple")); setJarUrl(rep.getJobEntryAttributeString(id_jobentry, "jar_url")); setDriverClass(rep.getJobEntryAttributeString(id_jobentry, "driver_class")); setCmdLineArgs(rep.getJobEntryAttributeString(id_jobentry, "command_line_args")); setSimpleBlocking(rep.getJobEntryAttributeBoolean(id_jobentry, "simple_blocking")); setBlocking(rep.getJobEntryAttributeBoolean(id_jobentry, "blocking")); setSimpleLoggingInterval(rep.getJobEntryAttributeString(id_jobentry, "simple_logging_interval")); setLoggingInterval(rep.getJobEntryAttributeString(id_jobentry, "logging_interval")); setMapperClass(rep.getJobEntryAttributeString(id_jobentry, "mapper_class")); setCombinerClass(rep.getJobEntryAttributeString(id_jobentry, "combiner_class")); setReducerClass(rep.getJobEntryAttributeString(id_jobentry, "reducer_class")); setInputPath(rep.getJobEntryAttributeString(id_jobentry, "input_path")); setInputFormatClass(rep.getJobEntryAttributeString(id_jobentry, "input_format_class")); setOutputPath(rep.getJobEntryAttributeString(id_jobentry, "output_path")); setOutputKeyClass(rep.getJobEntryAttributeString(id_jobentry, "output_key_class")); setOutputValueClass(rep.getJobEntryAttributeString(id_jobentry, "output_value_class")); setOutputFormatClass(rep.getJobEntryAttributeString(id_jobentry, "output_format_class")); loadClusterConfig(id_jobentry, rep, null); setRepository(rep); // setNumMapTasks(new Long(rep.getJobEntryAttributeInteger(id_jobentry, "num_map_tasks")).intValue()); setNumMapTasks(rep.getJobEntryAttributeString(id_jobentry, "num_map_tasks")); // setNumReduceTasks(new Long(rep.getJobEntryAttributeInteger(id_jobentry, "num_reduce_tasks")).intValue()); setNumReduceTasks(rep.getJobEntryAttributeString(id_jobentry, "num_reduce_tasks")); int argnr = rep.countNrJobEntryAttributes(id_jobentry, "user_defined_name"); //$NON-NLS-1$ if (argnr > 0) { userDefined = new ArrayList<UserDefinedItem>(); UserDefinedItem item = null; for (int i = 0; i < argnr; i++) { item = new UserDefinedItem(); item.setName(rep.getJobEntryAttributeString(id_jobentry, i, "user_defined_name")); //$NON-NLS-1$ item.setValue(rep.getJobEntryAttributeString(id_jobentry, i, "user_defined_value")); //$NON-NLS-1$ userDefined.add(item); } } } else { throw new KettleException("Unable to save to a repository. The repository is null."); //$NON-NLS-1$ } } public void saveRep(Repository rep, ObjectId id_job) throws KettleException { if (rep != null) { super.saveRep(rep, id_job); rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_job_name", hadoopJobName); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "simple", isSimple); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "jar_url", jarUrl); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "driver_class", driverClass); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "command_line_args", cmdLineArgs); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "simple_blocking", simpleBlocking); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "blocking", blocking); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "simple_logging_interval", simpleLoggingInterval); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "logging_interval", loggingInterval); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "hadoop_job_name", hadoopJobName); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "mapper_class", mapperClass); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "combiner_class", combinerClass); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "reducer_class", reducerClass); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "input_path", inputPath); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "input_format_class", inputFormatClass); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "output_path", outputPath); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "output_key_class", outputKeyClass); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "output_value_class", outputValueClass); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "output_format_class", outputFormatClass); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "cluster_name", clusterName); //$NON-NLS-1$ try { if (!StringUtils.isEmpty(getClusterName()) && NamedClusterManager.getInstance().contains(getClusterName(), rep.getMetaStore())) { // pull config from NamedCluster NamedCluster nc = NamedClusterManager.getInstance().read(getClusterName(), rep.getMetaStore()); setJobTrackerHostname(nc.getJobTrackerHost()); setJobTrackerPort(nc.getJobTrackerPort()); setHdfsHostname(nc.getHdfsHost()); setHdfsPort(nc.getHdfsPort()); } } catch (MetaStoreException e) { logDebug(e.getMessage(), e); } rep.saveJobEntryAttribute(id_job, getObjectId(), "hdfs_hostname", hdfsHostname); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "hdfs_port", hdfsPort); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "job_tracker_hostname", jobTrackerHostname); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "job_tracker_port", jobTrackerPort); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "num_map_tasks", numMapTasks); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), "num_reduce_tasks", numReduceTasks); //$NON-NLS-1$ if (userDefined != null) { for (int i = 0; i < userDefined.size(); i++) { UserDefinedItem item = userDefined.get(i); if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null //$NON-NLS-1$ && !"".equals(item.getValue())) { //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), i, "user_defined_name", item.getName()); //$NON-NLS-1$ rep.saveJobEntryAttribute(id_job, getObjectId(), i, "user_defined_value", item.getValue()); //$NON-NLS-1$ } } } } else { throw new KettleException("Unable to save to a repository. The repository is null."); //$NON-NLS-1$ } } public boolean evaluates() { return true; } public boolean isUnconditional() { return true; } public String getSimpleLoggingInterval() { return simpleLoggingInterval == null ? DEFAULT_LOGGING_INTERVAL : simpleLoggingInterval; } public void setSimpleLoggingInterval(String simpleLoggingInterval) { this.simpleLoggingInterval = simpleLoggingInterval; } public boolean isSimpleBlocking() { return simpleBlocking; } public void setSimpleBlocking(boolean simpleBlocking) { this.simpleBlocking = simpleBlocking; } /** * Get the {@link org.pentaho.hadoop.shim.HadoopConfiguration} to use when executing. This is by default loaded from * {@link HadoopConfigurationRegistry}. * * @return a valid Hadoop configuration * @throws org.pentaho.hadoop.shim.ConfigurationException * Error locating a valid hadoop configuration */ protected HadoopConfiguration getHadoopConfiguration() throws ConfigurationException { return HadoopConfigurationBootstrap.getHadoopConfigurationProvider().getActiveConfiguration(); } }