Example usage for org.apache.hadoop.mapreduce Job getJar

List of usage examples for org.apache.hadoop.mapreduce Job getJar

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJar.

Prototype

public String getJar() 

Source Link

Document

Get the pathname of the job's jar.

Usage

From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.CsvBulkLoadTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    HBaseConfiguration.addHbaseResources(getConf());
    Configuration conf = getConf();
    String quorum = conf.get("hbase.zookeeper.quorum");
    String clientPort = conf.get("hbase.zookeeper.property.clientPort");
    LOG.info("hbase.zookeeper.quorum=" + quorum);
    LOG.info("hbase.zookeeper.property.clientPort=" + clientPort);
    LOG.info("phoenix.query.dateFormat=" + conf.get("phoenix.query.dateFormat"));

    CommandLine cmdLine = null;// w  ww. j  ava  2 s  .  c  om
    try {
        cmdLine = parseOptions(args);
        LOG.info("JdbcUrl=" + getJdbcUrl(quorum + ":" + clientPort));
    } catch (IllegalStateException e) {
        printHelpAndExit(e.getMessage(), getOptions());
    }
    Class.forName(DriverManager.class.getName());
    Connection conn = DriverManager.getConnection(getJdbcUrl(quorum + ":" + clientPort));
    String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt());
    String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt());
    String qualifiedTableName = getQualifiedTableName(schemaName, tableName);
    List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName);

    LOG.info("tableName=" + tableName);
    LOG.info("schemaName=" + schemaName);
    LOG.info("qualifiedTableName=" + qualifiedTableName);

    configureOptions(cmdLine, importColumns, getConf());

    try {
        validateTable(conn, schemaName, tableName);
    } finally {
        conn.close();
    }

    Path inputPath = new Path(cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt()));
    Path outputPath = null;
    if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) {
        outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt()));
    } else {
        outputPath = new Path("/tmp/" + UUID.randomUUID());
    }
    LOG.info("Configuring HFile output path to {}", outputPath);

    Job job = new Job(getConf(),
            "Phoenix MapReduce import for " + getConf().get(PhoenixCsvToKeyValueMapper.TABLE_NAME_CONFKEY));

    // Allow overriding the job jar setting by using a -D system property at startup
    if (job.getJar() == null) {
        job.setJarByClass(PhoenixCsvToKeyValueMapper.class);
    }
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, inputPath);

    FileSystem.get(getConf());
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(PhoenixCsvToKeyValueMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    HTable htable = new HTable(getConf(), qualifiedTableName);

    // Auto configure partitioner and reducer according to the Main Data table
    HFileOutputFormat.configureIncrementalLoad(job, htable);

    LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath);
    boolean success = job.waitForCompletion(true);
    if (!success) {
        LOG.error("Import job failed, check JobTracker for details");
        return 1;
    }

    LOG.info("Loading HFiles from {}", outputPath);
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(getConf());
    loader.doBulkLoad(outputPath, htable);
    htable.close();

    LOG.info("Incremental load complete");

    LOG.info("Removing output directory {}", outputPath);
    if (!FileSystem.get(getConf()).delete(outputPath, true)) {
        LOG.error("Removing output directory {} failed", outputPath);
    }

    return 0;
}

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }/*from w  ww.  ja v a 2 s . c  om*/

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}

From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    handleCommandLine(args, conf);/*from w w w .  j a  va  2s .  c om*/
    HadoopUtils.setMapAttempts(conf, 1);
    adjustConfigurationForHive(conf);
    HiveTools.setupJob(conf);

    Job job = new Job(conf, "hive-io-writing");
    if (job.getJar() == null) {
        job.setJarByClass(getClass());
    }
    job.setMapperClass(SampleMapper.class);
    job.setInputFormatClass(SampleInputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(HiveWritableRecord.class);
    job.setOutputFormatClass(SampleOutputFormat.class);

    job.setNumReduceTasks(0);

    job.submit();
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.nnapz.hbaseexplorer.mr.TableStats.java

License:Apache License

/**
 * M/R Job setup. No reduce.// www  . j  av a  2  s  . c o m
 *
 * @param conf      a suitable hadoop+hbase configuration
 * @param tableName the table we want to get stats from
 * @return the Job object, to be started
 * @throws java.io.IOException any hadoop IO problem
 */
public static Job createSubmittableJob(Configuration conf, String tableName) throws IOException {

    Job job = new Job(conf, NAME + "_" + tableName);
    if (job.getJar() == null) {
        job.setJarByClass(TableStats.class); // otherwise set in conf already
    }
    Scan scan = new Scan();
    scan.setMaxVersions(10000); // todo fixme
    TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCountMapper.class, Text.class, Result.class, job);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);

    return job;
}

From source file:de.bankmark.bigbench.queries.q18.MRlinearRegression.java

License:Apache License

private void usage(Job job) {
    String jar = job.getJar() != null ? job.getJar() : " jarName.jar ";
    System.err.println("Usage:\n hadoop jar " + jar + " " + this.getClass().getName()
            + " <inDir> <outDir> \n<inDir> file format: <double x> <double y> \n<outDir> file format: <double intercept> <double slope>");
}

From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java

License:Apache License

private void usage(Job job) {
    String jar = job.getJar() != null ? job.getJar() : " jarName.jar ";
    System.err.println("Usage:\n hadoop jar " + jar + " " + this.getClass().getName()
            + " <inDir> <outDir> \n<inDir> file format: tab separated csv: <documentID> <text> \n<outDir> file format Sequencefile: <Text documentID> <Text content>");
}

From source file:io.apigee.lembos.mapreduce.LembosMapReduceRunner.java

License:Apache License

/**
 * Returns a properly configured, ready to run Hadoop {@link Job}.
 *
 * @param args the command line arguments as supported by {@link GenericOptionsParser}
 *
 * @return the configured job/* ww w. j av  a  2s  .  c o m*/
 *
 * @throws IOException if there is a problem creating the job
 * @throws ExecutionException if there is an issue running the Node.js module
 * @throws InterruptedException if the execution of the Node.js module gets interrupted
 * @throws NodeException if there is an issue with the Node.js module
 */
public Job initJob(final String[] args)
        throws ExecutionException, InterruptedException, IOException, NodeException {
    final GenericOptionsParser gop = new GenericOptionsParser(args);

    // If ran from ToolRunner, conf should already be set but if not, set it manually
    if (conf == null) {
        setConf(gop.getConfiguration());
    }

    // Load the Hadoop FS URL handler
    RunnerUtils.loadFsUrlStreamHandler(getConf());

    // Persist the non-Runner CLI arguments
    conf.setStrings(LembosConstants.MR_MODULE_ARGS, gop.getRemainingArgs());

    // Package the Node.js module and prepare it to be submitted with the Job
    RunnerUtils.prepareModuleForJob(conf);

    // Add "-libjars" to the current ClassLoader if necessary
    RunnerUtils.addLibJarsToClassLoader(conf);

    // Create Node.js environment for local use
    mrEnv = LembosMapReduceEnvironment.fromConf(conf);

    if (JavaScriptUtils.isDefined(mrEnv.getConfiguration())) {
        for (final Map.Entry<Object, Object> propertyEntry : mrEnv.getConfiguration().entrySet()) {
            final String key = propertyEntry.getKey().toString();
            final Writable value = ConversionUtils.jsToWritable(propertyEntry.getValue(), mrEnv.getModule());

            // Do not set these as we'll be setting them later from values we were passed from the CLI
            if (key.equals(LembosConstants.MR_MODULE_NAME)) {
                continue;
            }

            if (value instanceof BooleanWritable) {
                conf.setBoolean(key, ((BooleanWritable) value).get());
            } else if (value instanceof DoubleWritable || value instanceof FloatWritable) {
                conf.setFloat(key, Float.valueOf(value.toString()));
            } else if (value instanceof IntWritable) {
                conf.setInt(key, ((IntWritable) value).get());
            } else if (value instanceof LongWritable) {
                conf.setLong(key, ((LongWritable) value).get());
            } else if (value instanceof Text) {
                conf.set(key, value.toString());
            } else {
                System.err.println("Cannot convert JavaScript (" + value.getClass().getName()
                        + ") to Configuration, using String");
                conf.set(key, value.toString());
            }
        }
    }

    // Create Job
    final String jobName = "LembosMapReduceJob-" + mrEnv.getModuleName();
    final Job job = new Job(conf, jobName);

    jobWrapper = JobWrap.getInstance(mrEnv.getRuntime(), job);

    if (JavaScriptUtils.isDefined(mrEnv.getJobSetupFunction())) {
        mrEnv.callFunctionSync(mrEnv.getJobSetupFunction(), new Object[] { jobWrapper });
    }

    // Always set the mapper
    job.setMapperClass(LembosMapper.class);

    // Conditionally set the combiner
    if (JavaScriptUtils.isDefined(mrEnv.getCombineFunction())) {
        job.setCombinerClass(LembosCombiner.class);
    }

    // Conditionally set the group comparator
    if (JavaScriptUtils.isDefined(mrEnv.getGroupFunction())) {
        job.setGroupingComparatorClass(LembosGroupComparator.class);
    }

    // Conditionally set the partitioner
    if (JavaScriptUtils.isDefined(mrEnv.getPartitionFunction())) {
        job.setPartitionerClass(LembosPartitioner.class);
    }

    // Conditionally set the reducer
    if (JavaScriptUtils.isDefined(mrEnv.getReduceFunction())) {
        job.setReducerClass(LembosReducer.class);
    } else {
        job.setNumReduceTasks(0);
    }

    // Conditionally set the sort comparator
    if (JavaScriptUtils.isDefined(mrEnv.getSortFunction())) {
        job.setSortComparatorClass(LembosSortComparator.class);
    }

    // This could potentially be unsafe but for testing, we need to set this based on the path to the built JAR
    if (job.getJar() == null) {
        job.setJarByClass(LembosMapReduceRunner.class);
    }

    // MapReduce configuration reference:
    //
    // http://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml
    // org.apache.hadoop.mapreduce.MRConfig
    // org.apache.hadoop.mapreduce.MRJobConfig

    return job;
}

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();//from   w w w  . ja v a  2 s . c o  m
        return 1;
    }
    try {
        Job job = new Job(new Configuration());
        job.setJobName(getClass().getName());
        Configuration conf = job.getConfiguration();
        CommandLine results = cli.parse(conf, args);
        if (results.hasOption("input")) {
            Path path = new Path(results.getOptionValue("input"));
            FileInputFormat.setInputPaths(job, path);
        }
        if (results.hasOption("output")) {
            Path path = new Path(results.getOptionValue("output"));
            FileOutputFormat.setOutputPath(job, path);
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            explicitInputFormat = true;
            setIsJavaRecordReader(conf, true);
            job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(conf, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(conf, true);
            job.setMapperClass(getClass(results, "map", conf, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(conf, true);
            job.setReducerClass(getClass(results, "reduce", conf, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            explicitOutputFormat = true;
            setIsJavaRecordWriter(conf, true);
            job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class));
        }
        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass());
            }
        }
        if (results.hasOption("avroInput")) {
            avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase());
        }
        if (results.hasOption("avroOutput")) {
            avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase());
        }

        if (results.hasOption("program")) {
            setExecutable(conf, results.getOptionValue("program"));
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() };
            // FindBugs complains that creating a URLClassLoader should be
            // in a doPrivileged() block.
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            conf.setClassLoader(loader);
        }
        setupPipesJob(job);
        return job.waitForCompletion(true) ? 0 : 1;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }
}

From source file:org.apache.accumulo.server.test.randomwalk.multitable.CopyTool.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }//from  www.  j  a v  a 2 s .co m

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[0], args[1].getBytes(), args[2],
            new Authorizations());
    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[3], args[4]);

    job.setMapperClass(SeqMapClass.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[0], args[1].getBytes(), true, args[5]);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[3], args[4]);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:org.apache.accumulo.server.test.randomwalk.sequential.MapRedVerifyTool.java

License:Apache License

public int run(String[] args) throws Exception {
    Job job = new Job(getConf(), this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());

    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }/*from  w w  w.  ja v  a  2  s  .c o m*/

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[0], args[1].getBytes(), args[2],
            new Authorizations());
    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[3], args[4]);

    job.setMapperClass(SeqMapClass.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(SeqReduceClass.class);
    job.setNumReduceTasks(1);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[0], args[1].getBytes(), true, args[5]);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[3], args[4]);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}