List of usage examples for org.apache.hadoop.mapreduce Job setJar
public void setJar(String jar)
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
@Override protected void startUp() throws Exception { // Creates a temporary directory locally for storing all generated files. File tempDir = createTempDirectory(); cleanupTask = createCleanupTask(tempDir); try {/* ww w. ja v a 2s. com*/ Job job = createJob(new File(tempDir, "mapreduce")); Configuration mapredConf = job.getConfiguration(); classLoader = new MapReduceClassLoader(injector, cConf, mapredConf, context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator()); cleanupTask = createCleanupTask(cleanupTask, classLoader); mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader)); ClassLoaders.setContextClassLoader(mapredConf.getClassLoader()); context.setJob(job); beforeSubmit(job); // Localize additional resources that users have requested via BasicMapReduceContext.localize methods Map<String, String> localizedUserResources = localizeUserResources(job, tempDir); // Override user-defined job name, since we set it and depend on the name. // https://issues.cask.co/browse/CDAP-2441 String jobName = job.getJobName(); if (!jobName.isEmpty()) { LOG.warn("Job name {} is being overridden.", jobName); } job.setJobName(getJobName(context)); // Create a temporary location for storing all generated files through the LocationFactory. Location tempLocation = createTempLocationDirectory(); cleanupTask = createCleanupTask(cleanupTask, tempLocation); // For local mode, everything is in the configuration classloader already, hence no need to create new jar if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper // ClassLoader from here and use it for setting up the job Location pluginArchive = createPluginArchive(tempLocation); if (pluginArchive != null) { job.addCacheArchive(pluginArchive.toURI()); mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName()); } } // set resources for the job TaskType.MAP.setResources(mapredConf, context.getMapperResources()); TaskType.REDUCE.setResources(mapredConf, context.getReducerResources()); // replace user's Mapper & Reducer's with our wrappers in job config MapperWrapper.wrap(job); ReducerWrapper.wrap(job); // packaging job jar which includes cdap classes with dependencies File jobJar = buildJobJar(job, tempDir); job.setJar(jobJar.toURI().toString()); Location programJar = programJarLocation; if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // Copy and localize the program jar in distributed mode programJar = copyProgramJar(tempLocation); job.addCacheFile(programJar.toURI()); List<String> classpath = new ArrayList<>(); // Localize logback.xml Location logbackLocation = createLogbackJar(tempLocation); if (logbackLocation != null) { job.addCacheFile(logbackLocation.toURI()); classpath.add(logbackLocation.getName()); } // Generate and localize the launcher jar to control the classloader of MapReduce containers processes classpath.add("job.jar/lib/*"); classpath.add("job.jar/classes"); Location launcherJar = createLauncherJar( Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)), tempLocation); job.addCacheFile(launcherJar.toURI()); // The only thing in the container classpath is the launcher.jar // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch // the actual MapReduce AM/Task from that // We explicitly localize the mr-framwork, but not use it with the classpath URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf); if (frameworkURI != null) { job.addCacheArchive(frameworkURI); } mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH); mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName()); mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName()); } MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf); // We start long-running tx to be used by mapreduce job tasks. Transaction tx = txClient.startLong(); try { // We remember tx, so that we can re-use it in mapreduce tasks CConfiguration cConfCopy = cConf; contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources); LOG.info("Submitting MapReduce Job: {}", context); // submits job and returns immediately. Shouldn't need to set context ClassLoader. job.submit(); this.job = job; this.transaction = tx; } catch (Throwable t) { Transactions.invalidateQuietly(txClient, tx); throw t; } } catch (Throwable t) { LOG.error("Exception when submitting MapReduce Job: {}", context, t); cleanupTask.run(); throw t; } }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.VerySimpleLocalExample.java
License:Apache License
@SuppressWarnings({ "deprecation", "unchecked", "rawtypes" }) @Test// w w w . j ava 2s. c o m public void test_localHadoopLaunch() throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException { // 0) Setup the temp dir final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator; //final Path tmp_path = FileContext.getLocalFSFileContext().makeQualified(new Path(temp_dir)); final Path tmp_path2 = FileContext.getLocalFSFileContext() .makeQualified(new Path(temp_dir + "/tmp_output")); try { FileContext.getLocalFSFileContext().delete(tmp_path2, true); } catch (Exception e) { } // (just doesn't exist yet) // 1) Setup config with local mode final Configuration config = new Configuration(); config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing) config.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); config.set("mapred.job.tracker", "local"); config.set("fs.defaultFS", "local"); config.unset("mapreduce.framework.name"); // If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); // 2) Build job and do more setup using the Job API //TODO: not sure why this is deprecated, it doesn't seem to be in v1? We do need to move to JobConf at some point, but I ran into some // issues when trying to do everything I needed to for V1, so seems expedient to start here and migrate away final Job hj = new Job(config); // (NOTE: from here, changes to config are ignored) // Input format: //TOOD: fails because of guava issue, looks like we'll need to move to 2.7 and check it works with 2.5.x server? //TextInputFormat.addInputPath(hj, tmp_path); //hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName ("org.apache.hadoop.mapreduce.lib.input.TextInputFormat")); hj.setInputFormatClass(TestInputFormat.class); // Output format: hj.setOutputFormatClass((Class<? extends OutputFormat>) Class .forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat")); TextOutputFormat.setOutputPath(hj, tmp_path2); // Mapper etc (combiner/reducer are similar) hj.setMapperClass(TestMapper.class); hj.setOutputKeyClass(Text.class); hj.setOutputValueClass(Text.class); hj.setNumReduceTasks(0); // (disable reducer for now) hj.setJar("test"); try { hj.submit(); } catch (UnsatisfiedLinkError e) { throw new RuntimeException( "This is a windows/hadoop compatibility problem - adding the hadoop-commons in the misc_test_assets subdirectory to the top of the classpath should resolve it (and does in V1), though I haven't yet made that work with Aleph2", e); } //hj.getJobID().toString(); while (!hj.isComplete()) { Thread.sleep(1000); } assertTrue("Finished successfully", hj.isSuccessful()); }
From source file:com.jeffy.mr.WordCount.java
License:Apache License
/** * @param args/*w w w . ja va 2s .c o m*/ */ public static void main(String[] args) { String input = "hdfs://master:8020/tmp/jeffy/input/wordcount.txt"; String output = "hdfs://master:8020/tmp/jeffy/output"; Configuration config = new Configuration(); /** * Windows???no jobCtrol * http://stackoverflow.com/questions/24075669/mapreduce-job-fail-when-submitted-from-windows-machine */ config.set("mapreduce.app-submission.cross-platform", "true"); config.set("mapred.remote.os", "Linux"); try { Job job = Job.getInstance(config); //Windows??? job.setJarByClass(WordCount.class); //????? job.setJar("D:\\bigdata\\mapreduce-demo\\src\\main\\java\\WordCount.jar"); job.setJobName("Wordcount job"); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); TextInputFormat.setInputPaths(job, new Path(input)); TextOutputFormat.setOutputPath(job, new Path(output)); // Submit the job, then poll for progress until the job is complete try { job.waitForCompletion(true); } catch (ClassNotFoundException | InterruptedException e) { e.printStackTrace(); } } catch (IOException e) { e.printStackTrace(); } }
From source file:io.aos.mapreduce.count.WordCountTool.java
License:Apache License
public int run(String[] args) throws Exception { if (!((args.length > 0) && (args.length < 3))) { System.out.println("WordCount <inDir> <outDir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }/* w w w .ja v a 2 s . co m*/ Path inPath = new Path(args[0]); Path outPath = new Path(args[1]); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName("WordCount_" + inPath.getName()); job.setJar("./target/datalayer-hadoop-mapreduce-1.0.0-SNAPSHOT.jar"); // job.setJarByClass(WordCountTool.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, inPath); FileOutputFormat.setOutputPath(job, outPath); job.setOutputFormatClass(TextOutputFormat.class); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java
License:Apache License
public int run(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();// w w w . ja v a2 s . c o m return 1; } try { Job job = new Job(new Configuration()); job.setJobName(getClass().getName()); Configuration conf = job.getConfiguration(); CommandLine results = cli.parse(conf, args); if (results.hasOption("input")) { Path path = new Path(results.getOptionValue("input")); FileInputFormat.setInputPaths(job, path); } if (results.hasOption("output")) { Path path = new Path(results.getOptionValue("output")); FileOutputFormat.setOutputPath(job, path); } if (results.hasOption("jar")) { job.setJar(results.getOptionValue("jar")); } if (results.hasOption("inputformat")) { explicitInputFormat = true; setIsJavaRecordReader(conf, true); job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class)); } if (results.hasOption("javareader")) { setIsJavaRecordReader(conf, true); } if (results.hasOption("map")) { setIsJavaMapper(conf, true); job.setMapperClass(getClass(results, "map", conf, Mapper.class)); } if (results.hasOption("partitioner")) { job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class)); } if (results.hasOption("reduce")) { setIsJavaReducer(conf, true); job.setReducerClass(getClass(results, "reduce", conf, Reducer.class)); } if (results.hasOption("reduces")) { job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces"))); } if (results.hasOption("writer")) { explicitOutputFormat = true; setIsJavaRecordWriter(conf, true); job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class)); } if (results.hasOption("lazyOutput")) { if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) { LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass()); } } if (results.hasOption("avroInput")) { avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase()); } if (results.hasOption("avroOutput")) { avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase()); } if (results.hasOption("program")) { setExecutable(conf, results.getOptionValue("program")); } // if they gave us a jar file, include it into the class path String jarFile = job.getJar(); if (jarFile != null) { final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() }; // FindBugs complains that creating a URLClassLoader should be // in a doPrivileged() block. ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { public ClassLoader run() { return new URLClassLoader(urls); } }); conf.setClassLoader(loader); } setupPipesJob(job); return job.waitForCompletion(true) ? 0 : 1; } catch (ParseException pe) { LOG.info("Error : " + pe); cli.printUsage(); return 1; } }
From source file:org.apache.kylin.engine.mr.common.AbstractHadoopJob.java
License:Apache License
protected void setJobClasspath(Job job, KylinConfig kylinConf) { String jarPath = kylinConf.getKylinJobJarPath(); File jarFile = new File(jarPath); if (jarFile.exists()) { job.setJar(jarPath); logger.info("append job jar: " + jarPath); } else {//from ww w . j a v a 2 s .c o m job.setJarByClass(this.getClass()); } String kylinHiveDependency = System.getProperty("kylin.hive.dependency"); String kylinHBaseDependency = System.getProperty("kylin.hbase.dependency"); String kylinKafkaDependency = System.getProperty("kylin.kafka.dependency"); logger.info("append kylin.hbase.dependency: " + kylinHBaseDependency + " to " + MAP_REDUCE_CLASSPATH); Configuration jobConf = job.getConfiguration(); String classpath = jobConf.get(MAP_REDUCE_CLASSPATH); if (classpath == null || classpath.length() == 0) { logger.info("Didn't find " + MAP_REDUCE_CLASSPATH + " in job configuration, will run 'mapred classpath' to get the default value."); classpath = getDefaultMapRedClasspath(); logger.info("The default mapred classpath is: " + classpath); } if (kylinHBaseDependency != null) { // yarn classpath is comma separated kylinHBaseDependency = kylinHBaseDependency.replace(":", ","); classpath = classpath + "," + kylinHBaseDependency; } jobConf.set(MAP_REDUCE_CLASSPATH, classpath); logger.info("Hadoop job classpath is: " + job.getConfiguration().get(MAP_REDUCE_CLASSPATH)); /* * set extra dependencies as tmpjars & tmpfiles if configured */ StringBuilder kylinDependency = new StringBuilder(); // for hive dependencies if (kylinHiveDependency != null) { // yarn classpath is comma separated kylinHiveDependency = kylinHiveDependency.replace(":", ","); logger.info("Hive Dependencies Before Filtered: " + kylinHiveDependency); String filteredHive = filterKylinHiveDependency(kylinHiveDependency, kylinConf); logger.info("Hive Dependencies After Filtered: " + filteredHive); StringUtil.appendWithSeparator(kylinDependency, filteredHive); } else { logger.info("No hive dependency jars set in the environment, will find them from classpath:"); try { String hiveExecJarPath = ClassUtil .findContainingJar(Class.forName("org.apache.hadoop.hive.ql.Driver")); StringUtil.appendWithSeparator(kylinDependency, hiveExecJarPath); logger.info("hive-exec jar file: " + hiveExecJarPath); String hiveHCatJarPath = ClassUtil .findContainingJar(Class.forName("org.apache.hive.hcatalog.mapreduce.HCatInputFormat")); StringUtil.appendWithSeparator(kylinDependency, hiveHCatJarPath); logger.info("hive-catalog jar file: " + hiveHCatJarPath); String hiveMetaStoreJarPath = ClassUtil .findContainingJar(Class.forName("org.apache.hadoop.hive.metastore.api.Table")); StringUtil.appendWithSeparator(kylinDependency, hiveMetaStoreJarPath); logger.info("hive-metastore jar file: " + hiveMetaStoreJarPath); } catch (ClassNotFoundException e) { logger.error("Cannot found hive dependency jars: " + e); } } // for kafka dependencies if (kylinKafkaDependency != null) { kylinKafkaDependency = kylinKafkaDependency.replace(":", ","); logger.info("Kafka Dependencies: " + kylinKafkaDependency); StringUtil.appendWithSeparator(kylinDependency, kylinKafkaDependency); } else { logger.info("No Kafka dependency jar set in the environment, will find them from classpath:"); try { String kafkaClientJarPath = ClassUtil .findContainingJar(Class.forName("org.apache.kafka.clients.consumer.KafkaConsumer")); StringUtil.appendWithSeparator(kylinDependency, kafkaClientJarPath); logger.info("kafka jar file: " + kafkaClientJarPath); } catch (ClassNotFoundException e) { logger.debug("Not found kafka client jar from classpath, it is optional for normal build: " + e); } } // for KylinJobMRLibDir String mrLibDir = kylinConf.getKylinJobMRLibDir(); StringUtil.appendWithSeparator(kylinDependency, mrLibDir); setJobTmpJarsAndFiles(job, kylinDependency.toString()); overrideJobConfig(job.getConfiguration(), kylinConf.getMRConfigOverride()); }
From source file:org.apache.kylin.job.hadoop.AbstractHadoopJob.java
License:Apache License
protected void setJobClasspath(Job job) { String jarPath = KylinConfig.getInstanceFromEnv().getKylinJobJarPath(); File jarFile = new File(jarPath); if (jarFile.exists()) { job.setJar(jarPath); logger.info("append job jar: " + jarPath); } else {//w w w . j av a2s . c o m job.setJarByClass(this.getClass()); } String kylinHiveDependency = System.getProperty("kylin.hive.dependency"); logger.info("append kylin.hive.dependency: " + kylinHiveDependency + " to " + MAP_REDUCE_CLASSPATH); if (kylinHiveDependency != null) { // yarn classpath is comma separated kylinHiveDependency = kylinHiveDependency.replace(":", ","); Configuration jobConf = job.getConfiguration(); String classpath = jobConf.get(MAP_REDUCE_CLASSPATH); if (classpath == null || classpath.length() == 0) { logger.info("Didn't find " + MAP_REDUCE_CLASSPATH + " in job configuration, will run 'mapred classpath' to get the default value."); classpath = getDefaultMapRedClasspath(); logger.info("The default mapred classpath is: " + classpath); } jobConf.set(MAP_REDUCE_CLASSPATH, classpath + "," + kylinHiveDependency); } logger.info("Hadoop job classpath is: " + job.getConfiguration().get(MAP_REDUCE_CLASSPATH)); }
From source file:org.janusgraph.hadoop.compat.h2.DistCacheConfigurer.java
License:Apache License
@Override public void configure(Job job) throws IOException { Configuration conf = job.getConfiguration(); FileSystem localFS = FileSystem.getLocal(conf); FileSystem jobFS = FileSystem.get(conf); for (Path p : getLocalPaths()) { Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS); // Calling this method decompresses the archive and makes Hadoop // handle its class files individually. This leads to crippling // overhead times (10+ seconds) even with the LocalJobRunner // courtesy of o.a.h.yarn.util.FSDownload.changePermissions // copying and changing the mode of each classfile copy file individually. //job.addArchiveToClassPath(p); // Just add the compressed archive instead: job.addFileToClassPath(stagedPath); }//from ww w . j a va2s . c o m // We don't really need to set a map reduce job jar here, // but doing so suppresses a warning String mj = getMapredJar(); if (null != mj) job.setJar(mj); }
From source file:org.janusgraph.hadoop.compat.h2.MapredJarConfigurer.java
License:Apache License
@Override public void configure(Job job) throws IOException { job.setJar(mapredJar); }