Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:com.asp.tranlog.ImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//from  w  ww .  j  a  v a  2  s  .  co m
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
    if (actualSeparator != null) {
        conf.set(SEPARATOR_CONF_KEY, new String(Base64.encodeBytes(actualSeparator.getBytes())));
    }

    // See if a non-default Mapper was set
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    String tableName = args[0];
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(mapperClass);
    FileInputFormat.setInputPaths(job, inputDir);

    String inputCodec = conf.get(INPUT_LZO_KEY);
    if (inputCodec == null) {
        FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split
        // size =
        // 64m
        job.setInputFormatClass(TextInputFormat.class);
    } else {
        if (inputCodec.equalsIgnoreCase("lzo"))
            job.setInputFormatClass(LzoTextInputFormat.class);
        else {
            usage("not supported compression codec!");
            System.exit(-1);
        }
    }

    job.setMapperClass(mapperClass);

    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    if (hfileOutPath != null) {
        HTable table = new HTable(conf, tableName);
        job.setReducerClass(PutSortReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
        // No reducers. Just write straight to table. Call
        // initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /*
                                                                                                       * Guava used by TsvParser
                                                                                                       */);
    return job;
}

From source file:com.bah.applefox.main.plugins.fulltextindex.FTLoader.java

License:Apache License

/**
 * run takes the comandline args as arguments (in this case from a
 * configuration file), creates a new job, configures it, initiates it,
 * waits for completion, and returns 0 if it is successful (1 if it is not)
 * /*from   w  w w  .ja v  a 2  s . c o  m*/
 * @param args
 *            the commandline arguments (in this case from a configuration
 *            file)
 * 
 * @return 0 if the job ran successfully and 1 if it isn't
 */
public int run(String[] args) throws Exception {
    try {
        // Initialize variables
        FTLoader.articleFile = args[8];
        FTLoader.maxNGrams = Integer.parseInt(args[9]);
        FTLoader.stopWords = getStopWords();
        FTLoader.dTable = args[10];
        FTLoader.urlCheckedTable = args[11];
        FTLoader.divsFile = args[20];
        FTLoader.exDivs = getExDivs();

        // Give the job a name
        String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

        // Create job and set the jar
        Job job = new Job(getConf(), jobName);
        job.setJarByClass(this.getClass());

        String urlTable = args[5];

        job.setInputFormatClass(AccumuloInputFormat.class);
        InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
        InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable,
                new Authorizations());

        job.setMapperClass(MapperClass.class);
        job.setMapOutputKeyClass(Key.class);
        job.setMapOutputValueClass(Value.class);

        job.setReducerClass(ReducerClass.class);
        job.setNumReduceTasks(Integer.parseInt(args[4]));

        job.setOutputFormatClass(AccumuloOutputFormat.class);
        job.setOutputKeyClass(Key.class);
        job.setOutputValueClass(Value.class);

        AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
        AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable);

        job.waitForCompletion(true);

        return job.isSuccessful() ? 0 : 1;
    } catch (IOException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    } catch (InterruptedException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    } catch (ClassNotFoundException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    }
    return 1;
}

From source file:com.bah.applefox.main.plugins.imageindex.ImageLoader.java

License:Apache License

/**
 * run takes the comandline args as arguments (in this case from a
 * configuration file), creates a new job, configures it, initiates it,
 * waits for completion, and returns 0 if it is successful (1 if it is not)
 * // w  ww . j  av a  2s .  c  o  m
 * @param args
 *            the commandline arguments (in this case from a configuration
 *            file)
 * 
 * @return 0 if the job ran successfully and 1 if it isn't
 */
public int run(String[] args) throws Exception {

    checkedImages = args[18];
    hashTable = args[17];
    tagTable = args[19];
    divsFile = args[20];
    UserAgent = args[6];

    // Create the table
    AccumuloUtils.setSplitSize(args[23]);
    AccumuloUtils.connectBatchWrite(checkedImages).close();

    // Give the job a name
    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    // Create the job and set its jar
    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    // Set the url table to read from
    String urlTable = args[5];

    job.setInputFormatClass(AccumuloInputFormat.class);
    InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable,
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setNumReduceTasks(Integer.parseInt(args[4]));

    job.setReducerClass(ReducerClass.class);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable);

    AccumuloUtils.setSplitSize(args[22]);

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.pageranking.utilities.CountURLs.java

License:Apache License

public int run(String[] args) throws Exception {

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    mappedInput = args[12] + "From";

    job.setInputFormatClass(AccumuloInputFormat.class);
    InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), mappedInput,
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setReducerClass(ReducerClass.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, args[15]);

    job.waitForCompletion(true);/*w w w  .j  av  a 2s.com*/

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.pageranking.utilities.DampenTable.java

License:Apache License

public int run(String[] args) throws Exception {

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    tablePrefix = args[13];/*ww w. jav  a  2  s. co  m*/
    dampeningFactor = Double.parseDouble(args[14]);

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), tablePrefix + "New",
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setReducerClass(ReducerClass.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true,
            tablePrefix + "New");

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.pageranking.utilities.InitializePRTables.java

License:Apache License

public int run(String[] args) throws Exception {

    tablePrefix = args[13];//  w  w w . j ava2 s .  c o m

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    job.setInputFormatClass(AccumuloInputFormat.class);
    InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[12] + "To",
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setReducerClass(ReducerClass.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true,
            tablePrefix + "Old");

    AccumuloUtils.connectBatchWrite(tablePrefix + "New");

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.pageranking.utilities.MRPageRanking.java

License:Apache License

public int run(String[] args) throws Exception {

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    tablePrefix = args[13];/*from   ww  w .  j  a v a  2s.c  o m*/
    outboundLinks = args[15];

    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);

    AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[12] + "To",
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setReducerClass(ReducerClass.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true,
            tablePrefix + "New");

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.webcrawler.WebCrawler.java

License:Apache License

/**
 * run takes the comandline args as arguments (in this case from a
 * configuration file), creates a new job, configures it, initiates it,
 * waits for completion, and returns 0 if it is successful (1 if it is not)
 * //from w  ww.j a  v a2 s  . c  o  m
 * @param args
 *            the commandline arguments (in this case from a configuration
 *            file)
 * 
 * @return 0 if the job ran successfully and 1 if it isn't
 */
public int run(String[] args) throws Exception {

    userAgent = args[6];

    String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

    Job job = new Job(getConf(), jobName);
    job.setJarByClass(this.getClass());

    String clone = args[5];
    String clone2 = args[12];
    table = clone;

    AccumuloUtils.setSplitSize(args[24]);
    table2 = clone2 + "From";
    table3 = clone2 + "To";

    job.setInputFormatClass(AccumuloInputFormat.class);
    InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), clone,
            new Authorizations());

    job.setMapperClass(MapperClass.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    job.setNumReduceTasks(0);
    job.setOutputFormatClass(NullOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
    AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, clone);

    job.waitForCompletion(true);

    return job.isSuccessful() ? 0 : 1;
}

From source file:com.baidu.cloud.bmr.mapreduce.AccessLogAnalyzer.java

License:Open Source License

public static void main(String[] args) {
    Configuration conf = new Configuration();
    if (args.length != 2) {
        System.err.println("Usage: AccessLogAnalyzer <input path> <output path>");
        System.exit(-1);/*  w  w w.j  av a  2 s.c  o m*/
    }
    String inputPath = args[0];
    String outputPath = args[1];
    try {
        Job job = new Job(conf, "AccessLogAnalyzer");
        job.setJarByClass(AccessLogAnalyzer.class);
        job.setMapperClass(AccessLogAnalyzerMapper.class);
        job.setReducerClass(AccessLogAnalyzerReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.setInputPaths(job, inputPath);
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    } catch (IOException | ClassNotFoundException | InterruptedException e) {
    }
}

From source file:com.basho.riak.hadoop.RiakWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    String[] keys = new String[10000];

    for (int i = 0; i < 10000; i++) {
        keys[i] = String.valueOf(i + 1000);
    }//from   w  w  w . ja v a 2s  .  co  m
    Configuration conf = getConf();
    conf = RiakConfig.setKeyLister(conf, new BucketKeyLister("wordcount"));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 11087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 12087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 13087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 14087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 15087));
    conf = RiakConfig.setOutputBucket(conf, "wordcount_out");
    conf = RiakConfig.setHadoopClusterSize(conf, 4);

    Job job = new Job(conf, "Riak-WordCount");

    job.setJarByClass(RiakWordCount.class);

    job.setInputFormatClass(RiakInputFormat.class);
    job.setMapperClass(TokenCounterMapper.class);

    job.setReducerClass(TokenCounterReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(RiakOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(WordCountResult.class);

    job.setNumReduceTasks(4);

    job.submit();
    return job.waitForCompletion(true) ? 0 : 1;
}