Example usage for org.apache.hadoop.mapreduce Job setJar

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJar.

Prototype

public void setJar(String jar)

Source Link

Document

Set the job jar

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

@Override
protected void startUp() throws Exception {
    // Creates a temporary directory locally for storing all generated files.
    File tempDir = createTempDirectory();
    cleanupTask = createCleanupTask(tempDir);

    try {/*  ww w.  ja  v  a  2s.  com*/
        Job job = createJob(new File(tempDir, "mapreduce"));
        Configuration mapredConf = job.getConfiguration();

        classLoader = new MapReduceClassLoader(injector, cConf, mapredConf,
                context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator());
        cleanupTask = createCleanupTask(cleanupTask, classLoader);

        mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader));
        ClassLoaders.setContextClassLoader(mapredConf.getClassLoader());

        context.setJob(job);

        beforeSubmit(job);

        // Localize additional resources that users have requested via BasicMapReduceContext.localize methods
        Map<String, String> localizedUserResources = localizeUserResources(job, tempDir);

        // Override user-defined job name, since we set it and depend on the name.
        // https://issues.cask.co/browse/CDAP-2441
        String jobName = job.getJobName();
        if (!jobName.isEmpty()) {
            LOG.warn("Job name {} is being overridden.", jobName);
        }
        job.setJobName(getJobName(context));

        // Create a temporary location for storing all generated files through the LocationFactory.
        Location tempLocation = createTempLocationDirectory();
        cleanupTask = createCleanupTask(cleanupTask, tempLocation);

        // For local mode, everything is in the configuration classloader already, hence no need to create new jar
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper
            // ClassLoader from here and use it for setting up the job
            Location pluginArchive = createPluginArchive(tempLocation);
            if (pluginArchive != null) {
                job.addCacheArchive(pluginArchive.toURI());
                mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName());
            }
        }

        // set resources for the job
        TaskType.MAP.setResources(mapredConf, context.getMapperResources());
        TaskType.REDUCE.setResources(mapredConf, context.getReducerResources());

        // replace user's Mapper & Reducer's with our wrappers in job config
        MapperWrapper.wrap(job);
        ReducerWrapper.wrap(job);

        // packaging job jar which includes cdap classes with dependencies
        File jobJar = buildJobJar(job, tempDir);
        job.setJar(jobJar.toURI().toString());

        Location programJar = programJarLocation;
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // Copy and localize the program jar in distributed mode
            programJar = copyProgramJar(tempLocation);
            job.addCacheFile(programJar.toURI());

            List<String> classpath = new ArrayList<>();

            // Localize logback.xml
            Location logbackLocation = createLogbackJar(tempLocation);
            if (logbackLocation != null) {
                job.addCacheFile(logbackLocation.toURI());
                classpath.add(logbackLocation.getName());
            }

            // Generate and localize the launcher jar to control the classloader of MapReduce containers processes
            classpath.add("job.jar/lib/*");
            classpath.add("job.jar/classes");
            Location launcherJar = createLauncherJar(
                    Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)),
                    tempLocation);
            job.addCacheFile(launcherJar.toURI());

            // The only thing in the container classpath is the launcher.jar
            // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch
            // the actual MapReduce AM/Task from that
            // We explicitly localize the mr-framwork, but not use it with the classpath
            URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf);
            if (frameworkURI != null) {
                job.addCacheArchive(frameworkURI);
            }

            mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
            mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName());
            mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName());
        }

        MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf);
        // We start long-running tx to be used by mapreduce job tasks.
        Transaction tx = txClient.startLong();
        try {
            // We remember tx, so that we can re-use it in mapreduce tasks
            CConfiguration cConfCopy = cConf;
            contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources);

            LOG.info("Submitting MapReduce Job: {}", context);
            // submits job and returns immediately. Shouldn't need to set context ClassLoader.
            job.submit();

            this.job = job;
            this.transaction = tx;
        } catch (Throwable t) {
            Transactions.invalidateQuietly(txClient, tx);
            throw t;
        }
    } catch (Throwable t) {
        LOG.error("Exception when submitting MapReduce Job: {}", context, t);
        cleanupTask.run();
        throw t;
    }
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.VerySimpleLocalExample.java

License:Apache License

@SuppressWarnings({ "deprecation", "unchecked", "rawtypes" })
@Test// w w w . j ava 2s.  c  o m
public void test_localHadoopLaunch()
        throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {

    // 0) Setup the temp dir 
    final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;
    //final Path tmp_path = FileContext.getLocalFSFileContext().makeQualified(new Path(temp_dir));
    final Path tmp_path2 = FileContext.getLocalFSFileContext()
            .makeQualified(new Path(temp_dir + "/tmp_output"));
    try {
        FileContext.getLocalFSFileContext().delete(tmp_path2, true);
    } catch (Exception e) {
    } // (just doesn't exist yet)

    // 1) Setup config with local mode
    final Configuration config = new Configuration();
    config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing)
    config.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
    config.set("mapred.job.tracker", "local");
    config.set("fs.defaultFS", "local");
    config.unset("mapreduce.framework.name");

    // If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place
    config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");

    // 2) Build job and do more setup using the Job API
    //TODO: not sure why this is deprecated, it doesn't seem to be in v1? We do need to move to JobConf at some point, but I ran into some 
    // issues when trying to do everything I needed to for V1, so seems expedient to start here and migrate away
    final Job hj = new Job(config); // (NOTE: from here, changes to config are ignored)

    // Input format:
    //TOOD: fails because of guava issue, looks like we'll need to move to 2.7 and check it works with 2.5.x server?
    //TextInputFormat.addInputPath(hj, tmp_path);
    //hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName ("org.apache.hadoop.mapreduce.lib.input.TextInputFormat"));
    hj.setInputFormatClass(TestInputFormat.class);

    // Output format:
    hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
            .forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"));
    TextOutputFormat.setOutputPath(hj, tmp_path2);

    // Mapper etc (combiner/reducer are similar)
    hj.setMapperClass(TestMapper.class);
    hj.setOutputKeyClass(Text.class);
    hj.setOutputValueClass(Text.class);
    hj.setNumReduceTasks(0); // (disable reducer for now)

    hj.setJar("test");

    try {
        hj.submit();
    } catch (UnsatisfiedLinkError e) {
        throw new RuntimeException(
                "This is a windows/hadoop compatibility problem - adding the hadoop-commons in the misc_test_assets subdirectory to the top of the classpath should resolve it (and does in V1), though I haven't yet made that work with Aleph2",
                e);
    }
    //hj.getJobID().toString();
    while (!hj.isComplete()) {
        Thread.sleep(1000);
    }
    assertTrue("Finished successfully", hj.isSuccessful());
}

From source file:com.jeffy.mr.WordCount.java

License:Apache License

/**
 * @param args/*w w w  . ja va  2s  .c  o m*/
 */
public static void main(String[] args) {

    String input = "hdfs://master:8020/tmp/jeffy/input/wordcount.txt";
    String output = "hdfs://master:8020/tmp/jeffy/output";
    Configuration config = new Configuration();
    /**
     * Windows???no jobCtrol
     * http://stackoverflow.com/questions/24075669/mapreduce-job-fail-when-submitted-from-windows-machine
     */
    config.set("mapreduce.app-submission.cross-platform", "true");
    config.set("mapred.remote.os", "Linux");
    try {
        Job job = Job.getInstance(config);
        //Windows???
        job.setJarByClass(WordCount.class);
        //?????
        job.setJar("D:\\bigdata\\mapreduce-demo\\src\\main\\java\\WordCount.jar");
        job.setJobName("Wordcount job");
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        TextInputFormat.setInputPaths(job, new Path(input));
        TextOutputFormat.setOutputPath(job, new Path(output));
        // Submit the job, then poll for progress until the job is complete
        try {
            job.waitForCompletion(true);
        } catch (ClassNotFoundException | InterruptedException e) {
            e.printStackTrace();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:io.aos.mapreduce.count.WordCountTool.java

License:Apache License

public int run(String[] args) throws Exception {

    if (!((args.length > 0) && (args.length < 3))) {
        System.out.println("WordCount <inDir> <outDir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/* w w  w  .ja  v a  2 s  .  co m*/

    Path inPath = new Path(args[0]);
    Path outPath = new Path(args[1]);

    Configuration conf = getConf();

    Job job = Job.getInstance(conf);

    job.setJobName("WordCount_" + inPath.getName());
    job.setJar("./target/datalayer-hadoop-mapreduce-1.0.0-SNAPSHOT.jar");
    //        job.setJarByClass(WordCountTool.class);

    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.setInputPaths(job, inPath);

    FileOutputFormat.setOutputPath(job, outPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;

}

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();// w w  w . ja v  a2  s  .  c  o  m
        return 1;
    }
    try {
        Job job = new Job(new Configuration());
        job.setJobName(getClass().getName());
        Configuration conf = job.getConfiguration();
        CommandLine results = cli.parse(conf, args);
        if (results.hasOption("input")) {
            Path path = new Path(results.getOptionValue("input"));
            FileInputFormat.setInputPaths(job, path);
        }
        if (results.hasOption("output")) {
            Path path = new Path(results.getOptionValue("output"));
            FileOutputFormat.setOutputPath(job, path);
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            explicitInputFormat = true;
            setIsJavaRecordReader(conf, true);
            job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(conf, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(conf, true);
            job.setMapperClass(getClass(results, "map", conf, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(conf, true);
            job.setReducerClass(getClass(results, "reduce", conf, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            explicitOutputFormat = true;
            setIsJavaRecordWriter(conf, true);
            job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class));
        }
        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass());
            }
        }
        if (results.hasOption("avroInput")) {
            avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase());
        }
        if (results.hasOption("avroOutput")) {
            avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase());
        }

        if (results.hasOption("program")) {
            setExecutable(conf, results.getOptionValue("program"));
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() };
            // FindBugs complains that creating a URLClassLoader should be
            // in a doPrivileged() block.
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            conf.setClassLoader(loader);
        }
        setupPipesJob(job);
        return job.waitForCompletion(true) ? 0 : 1;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }
}

From source file:org.apache.kylin.engine.mr.common.AbstractHadoopJob.java

License:Apache License

protected void setJobClasspath(Job job, KylinConfig kylinConf) {
    String jarPath = kylinConf.getKylinJobJarPath();
    File jarFile = new File(jarPath);
    if (jarFile.exists()) {
        job.setJar(jarPath);
        logger.info("append job jar: " + jarPath);
    } else {//from ww  w . j  a  v a  2  s  .c o m
        job.setJarByClass(this.getClass());
    }

    String kylinHiveDependency = System.getProperty("kylin.hive.dependency");
    String kylinHBaseDependency = System.getProperty("kylin.hbase.dependency");
    String kylinKafkaDependency = System.getProperty("kylin.kafka.dependency");
    logger.info("append kylin.hbase.dependency: " + kylinHBaseDependency + " to " + MAP_REDUCE_CLASSPATH);

    Configuration jobConf = job.getConfiguration();
    String classpath = jobConf.get(MAP_REDUCE_CLASSPATH);
    if (classpath == null || classpath.length() == 0) {
        logger.info("Didn't find " + MAP_REDUCE_CLASSPATH
                + " in job configuration, will run 'mapred classpath' to get the default value.");
        classpath = getDefaultMapRedClasspath();
        logger.info("The default mapred classpath is: " + classpath);
    }

    if (kylinHBaseDependency != null) {
        // yarn classpath is comma separated
        kylinHBaseDependency = kylinHBaseDependency.replace(":", ",");
        classpath = classpath + "," + kylinHBaseDependency;
    }

    jobConf.set(MAP_REDUCE_CLASSPATH, classpath);
    logger.info("Hadoop job classpath is: " + job.getConfiguration().get(MAP_REDUCE_CLASSPATH));

    /*
     *  set extra dependencies as tmpjars & tmpfiles if configured
     */
    StringBuilder kylinDependency = new StringBuilder();

    // for hive dependencies
    if (kylinHiveDependency != null) {
        // yarn classpath is comma separated
        kylinHiveDependency = kylinHiveDependency.replace(":", ",");

        logger.info("Hive Dependencies Before Filtered: " + kylinHiveDependency);
        String filteredHive = filterKylinHiveDependency(kylinHiveDependency, kylinConf);
        logger.info("Hive Dependencies After Filtered: " + filteredHive);

        StringUtil.appendWithSeparator(kylinDependency, filteredHive);
    } else {

        logger.info("No hive dependency jars set in the environment, will find them from classpath:");

        try {
            String hiveExecJarPath = ClassUtil
                    .findContainingJar(Class.forName("org.apache.hadoop.hive.ql.Driver"));

            StringUtil.appendWithSeparator(kylinDependency, hiveExecJarPath);
            logger.info("hive-exec jar file: " + hiveExecJarPath);

            String hiveHCatJarPath = ClassUtil
                    .findContainingJar(Class.forName("org.apache.hive.hcatalog.mapreduce.HCatInputFormat"));
            StringUtil.appendWithSeparator(kylinDependency, hiveHCatJarPath);
            logger.info("hive-catalog jar file: " + hiveHCatJarPath);

            String hiveMetaStoreJarPath = ClassUtil
                    .findContainingJar(Class.forName("org.apache.hadoop.hive.metastore.api.Table"));
            StringUtil.appendWithSeparator(kylinDependency, hiveMetaStoreJarPath);
            logger.info("hive-metastore jar file: " + hiveMetaStoreJarPath);
        } catch (ClassNotFoundException e) {
            logger.error("Cannot found hive dependency jars: " + e);
        }
    }

    // for kafka dependencies
    if (kylinKafkaDependency != null) {
        kylinKafkaDependency = kylinKafkaDependency.replace(":", ",");
        logger.info("Kafka Dependencies: " + kylinKafkaDependency);
        StringUtil.appendWithSeparator(kylinDependency, kylinKafkaDependency);
    } else {
        logger.info("No Kafka dependency jar set in the environment, will find them from classpath:");
        try {
            String kafkaClientJarPath = ClassUtil
                    .findContainingJar(Class.forName("org.apache.kafka.clients.consumer.KafkaConsumer"));
            StringUtil.appendWithSeparator(kylinDependency, kafkaClientJarPath);
            logger.info("kafka jar file: " + kafkaClientJarPath);

        } catch (ClassNotFoundException e) {
            logger.debug("Not found kafka client jar from classpath, it is optional for normal build: " + e);
        }
    }

    // for KylinJobMRLibDir
    String mrLibDir = kylinConf.getKylinJobMRLibDir();
    StringUtil.appendWithSeparator(kylinDependency, mrLibDir);

    setJobTmpJarsAndFiles(job, kylinDependency.toString());

    overrideJobConfig(job.getConfiguration(), kylinConf.getMRConfigOverride());
}

From source file:org.apache.kylin.job.hadoop.AbstractHadoopJob.java

License:Apache License

protected void setJobClasspath(Job job) {
    String jarPath = KylinConfig.getInstanceFromEnv().getKylinJobJarPath();
    File jarFile = new File(jarPath);
    if (jarFile.exists()) {
        job.setJar(jarPath);
        logger.info("append job jar: " + jarPath);
    } else {//w  w w  . j  av a2s  .  c o m
        job.setJarByClass(this.getClass());
    }

    String kylinHiveDependency = System.getProperty("kylin.hive.dependency");
    logger.info("append kylin.hive.dependency: " + kylinHiveDependency + " to " + MAP_REDUCE_CLASSPATH);
    if (kylinHiveDependency != null) {
        // yarn classpath is comma separated
        kylinHiveDependency = kylinHiveDependency.replace(":", ",");
        Configuration jobConf = job.getConfiguration();
        String classpath = jobConf.get(MAP_REDUCE_CLASSPATH);
        if (classpath == null || classpath.length() == 0) {
            logger.info("Didn't find " + MAP_REDUCE_CLASSPATH
                    + " in job configuration, will run 'mapred classpath' to get the default value.");
            classpath = getDefaultMapRedClasspath();
            logger.info("The default mapred classpath is: " + classpath);
        }

        jobConf.set(MAP_REDUCE_CLASSPATH, classpath + "," + kylinHiveDependency);

    }
    logger.info("Hadoop job classpath is: " + job.getConfiguration().get(MAP_REDUCE_CLASSPATH));
}

From source file:org.janusgraph.hadoop.compat.h2.DistCacheConfigurer.java

License:Apache License

@Override
public void configure(Job job) throws IOException {

    Configuration conf = job.getConfiguration();
    FileSystem localFS = FileSystem.getLocal(conf);
    FileSystem jobFS = FileSystem.get(conf);

    for (Path p : getLocalPaths()) {
        Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS);
        // Calling this method decompresses the archive and makes Hadoop
        // handle its class files individually.  This leads to crippling
        // overhead times (10+ seconds) even with the LocalJobRunner
        // courtesy of o.a.h.yarn.util.FSDownload.changePermissions
        // copying and changing the mode of each classfile copy file individually.
        //job.addArchiveToClassPath(p);
        // Just add the compressed archive instead:
        job.addFileToClassPath(stagedPath);
    }//from   ww w  .  j a  va2s  .  c o  m

    // We don't really need to set a map reduce job jar here,
    // but doing so suppresses a warning
    String mj = getMapredJar();
    if (null != mj)
        job.setJar(mj);
}

From source file:org.janusgraph.hadoop.compat.h2.MapredJarConfigurer.java

License:Apache License

@Override
public void configure(Job job) throws IOException {
    job.setJar(mapredJar);
}