Example usage for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException

Source Link

Usage

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * Run the Parallel FPGrowth Map/Reduce Job to calculate the Top K features of group dependent shards
 *///from ww w . j av a2  s  .  com
public static void startParallelFPGrowth(Parameters params, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(PFP_PARAMETERS, params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");
    Path input = new Path(params.get(INPUT));
    Job job = new Job(conf, "PFP Growth Driver running over input" + input);
    job.setJarByClass(PFPGrowth.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(TransactionTree.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TopKStringPatterns.class);

    FileInputFormat.addInputPath(job, input);
    Path outPath = new Path(params.get(OUTPUT), FPGROWTH);
    FileOutputFormat.setOutputPath(job, outPath);

    HadoopUtil.delete(conf, outPath);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(ParallelFPGrowthMapper.class);
    job.setCombinerClass(ParallelFPGrowthCombiner.class);
    job.setReducerClass(ParallelFPGrowthReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Count the frequencies of various features in parallel using Map/Reduce
 *///from   ww w. jav  a  2  s .co m
public static void startParallelCounting(Parameters params, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(PFP_PARAMETERS, params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");

    String input = params.get(INPUT);
    Job job = new Job(conf, "Parallel Counting Driver running over input: " + input);
    job.setJarByClass(PFPGrowth.class);

    //    Job job = initJob(conf);  
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, new Path(input));
    Path outPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING);
    FileOutputFormat.setOutputPath(job, outPath);

    HadoopUtil.delete(conf, outPath);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(ParallelCountingMapper.class);
    job.setCombinerClass(ParallelCountingReducer.class);
    job.setReducerClass(ParallelCountingReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }

}

From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Run the Parallel FPGrowth Map/Reduce Job to calculate the Top K features of group dependent shards
 *//*from  w w w .  jav a2 s  .  c o  m*/
public static void startParallelFPGrowth(Parameters params, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(PFP_PARAMETERS, params.toString());
    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");

    Path input = new Path(params.get(INPUT));
    Job job = new Job(conf, "PFP Growth Driver running over input" + input);
    job.setJarByClass(PFPGrowth.class);
    //    Job job = initJob(conf);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(ArrayList.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileInputFormat.addInputPath(job, input);
    Path outPath = new Path(params.get(OUTPUT), FPGROWTH);
    FileOutputFormat.setOutputPath(job, outPath);

    HadoopUtil.delete(conf, outPath);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(ParallelFPGrowthMapper.class);
    //job.setCombinerClass(ParallelFPGrowthCombiner.class);
    job.setReducerClass(ParallelFPGrowthReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java

License:Apache License

private static Job initJob(Configuration conf) {
    conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/core-site.xml"));
    conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/hdfs-default.xml"));
    conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/hdfs-site.xml"));
    conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/yarn-default.xml"));
    conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/yarn-site.xml"));
    conf.addResource(new Path("D:/program/hadoop-2.6.0/etc/hadoop/mapred-site.xml"));
    conf.set("HADOOP_USER_NAME", "hadoop");
    conf.set("mapred.reduce.tasks", "3");
    Job job = null;/*from   w w w .j a  v  a  2  s  . c  o  m*/
    try {
        File jarFile = EJob.createTempJar("bin");
        EJob.addClasspath("D:/program/hadoop-2.6.0/etc/hadoop/");
        ClassLoader classLoader = EJob.getClassLoader();
        Thread.currentThread().setContextClassLoader(classLoader);
        job = new Job(conf, "PFP");
        ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
    } catch (IOException e) {
        e.printStackTrace();
    }
    return job;
}

From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityLoad.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    final String jobName = this.getClass().getName();
    options.parseArgs(jobName, args);//  w ww  . ja v  a 2s . c om
    final Job job = new Job(getConf(), jobName);

    if (-1 == options.test.numRows) {
        options.test.numRows = job.getConfiguration().getInt("mapred.map.tasks",
                DataCompatibilityTestCli.DEFAULT_NUM_ROWS);
    }

    job.setJarByClass(this.getClass());

    job.setInputFormatClass(DataLoadInputFormat.class);
    DataLoadInputFormat.setTabletServers(job,
            options.connection.getConnector().instanceOperations().getTabletServers());
    DataLoadInputFormat.setNumRows(job, options.test.numRows);
    DataLoadInputFormat.setNumQualifiersPerFamily(job, options.test.qualifiers);

    job.getConfiguration().set(VISIBILITY, new String(options.visibility.visibility.getExpression(), "UTF-8"));

    final TableOperations ops = options.connection.getConnector().tableOperations();

    final List<String> names = options.test.getTableNamesAndConfigureThem(ops);
    for (String name : names) {
        final int numSplits = ops.getSplits(name, options.test.numRows).size();
        if (options.test.numRows > numSplits) {
            log.info("adding splits to table '" + name + "', to bring it from " + numSplits + " to "
                    + options.test.numRows + ".");
            final SortedSet<Text> splits = new TreeSet<Text>();
            // for cases where we're adding way more splits than there are currently possible servers to handle them, do a pre-pre-split
            //   N.B. If we've just created this table, there will be 0 splits because we'll just have the initial tablet.
            if (0 == numSplits || options.test.numRows / numSplits > 10) {
                log.info("splitting in two waves due to the number of splits we need to add.");
                // TODO turtles all the way down.
                final int prepre = options.test.numRows / (0 == numSplits ? 10 : numSplits * 10);
                for (int i = 0; i < prepre; i++) {
                    splits.add(new Text(new StringBuilder(Long.toString(i)).reverse().toString()));
                }
                ops.addSplits(name, splits);
                log.debug("delay 30s for splits to get assigned off host.");
                try {
                    Thread.currentThread().sleep(30 * 1000);
                } catch (InterruptedException exception) {
                    log.warn("interrupted from sleep early.");
                }
                splits.clear();
            }
            for (int i = 0; i < options.test.numRows; i++) {
                splits.add(new Text(new StringBuilder(Long.toString(i)).reverse().toString()));
            }
            ops.addSplits(name, splits);
        }
    }
    log.debug("delay 30s for splits to get assigned off host.");
    try {
        Thread.currentThread().sleep(30 * 1000);
    } catch (InterruptedException exception) {
        log.warn("interrupted from sleep early.");
    }

    job.getConfiguration().setStrings(OUTPUT_TABLE_NAMES, names.toArray(new String[0]));

    job.setMapperClass(DataLoadMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    log.info("launching map-only job to insert " + options.test.numRows + " rows of "
            + (FAMILIES.length * options.test.qualifiers) + " cells each into each of the tables " + names);
    options.output.useAccumuloOutputFormat(job);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

From source file:com.cloudera.accumulo.upgrade.compatibility.DataCompatibilityVerify.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    final String jobName = this.getClass().getName();
    options.parseArgs(jobName, args);/*from   w w w .j  av  a 2  s .c  o m*/
    try {
        final int totalMapSlots = getConf().getInt("mapred.map.tasks",
                DataCompatibilityTestCli.DEFAULT_NUM_ROWS);
        if (-1 == options.test.numRows) {
            options.test.numRows = totalMapSlots;
        }
        final TableOperations ops = options.connection.getConnector().tableOperations();
        final List<String> names = options.test.getTableNames(ops);
        int totalReduceSlots = getConf().getInt("mapred.reduce.tasks", 0);
        if (-1 != options.test.numReduceSlots) {
            totalReduceSlots = options.test.numReduceSlots;
        }
        if (0 == totalReduceSlots) {
            totalReduceSlots = names.size();
        }
        final int reducesPerJob = Math.max(1, totalReduceSlots / names.size());

        final List<Job> jobs = new ArrayList();
        for (String name : names) {
            final Job job = new Job(getConf(), jobName + " " + name);
            job.setJarByClass(this.getClass());
            options.input.useAccumuloInputFormat(job, name);
            job.setMapperClass(DataVerifyMapper.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);
            job.setReducerClass(LongSumReducer.class);
            job.setCombinerClass(LongSumReducer.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            TextOutputFormat.setOutputPath(job, new Path(options.test.output, name));
            job.setNumReduceTasks(reducesPerJob);
            job.submit();
            jobs.add(job);
        }

        boolean success = true;
        final long numCellsPerRow = options.test.qualifiers * DataCompatibilityLoad.FAMILIES.length;
        final long numCellsPerFamily = options.test.qualifiers * options.test.numRows;
        for (Job job : jobs) {
            success &= job.waitForCompletion(true);
            final CounterGroup group = job.getCounters().getGroup(DataVerifyMapper.class.getName());
            if (null == group) {
                log.error("Job '" + job.getJobName() + "' doesn't have counters for the verification mapper.");
                success = false;
            } else {
                final Counter badCounter = group.findCounter(BAD_COUNTER);
                if (null != badCounter && 0 < badCounter.getValue()) {
                    log.error("Job '" + job.getJobName() + "' has " + badCounter.getValue()
                            + " entries with bad checksums.");
                    success = false;
                }
                int numRows = 0;
                int numFamilies = 0;
                for (Counter counter : group) {
                    if (counter.getName().startsWith(ROW_COUNTER_PREFIX)) {
                        numRows++;
                        if (numCellsPerRow != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerRow + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    } else if (counter.getName().startsWith(FAMILY_COUNTER_PREFIX)) {
                        numFamilies++;
                        if (numCellsPerFamily != counter.getValue()) {
                            log.error("Job '" + job.getJobName() + "', counter '" + counter.getName()
                                    + "' should have " + numCellsPerFamily + " cells, but instead has "
                                    + counter.getValue());
                            success = false;
                        }
                    }
                }
                if (options.test.numRows != numRows) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have " + options.test.numRows
                            + " rows, but has " + numRows);
                    success = false;
                }
                if (DataCompatibilityLoad.FAMILIES.length != numFamilies) {
                    log.error("Job '" + job.getJobName() + "' is supposed to have "
                            + DataCompatibilityLoad.FAMILIES.length + " families, but has " + numFamilies);
                    success = false;
                }
            }
        }
        if (success) {
            log.info("All internal checks passed.");
        } else {
            log.info("Some checks failed. see log.");
        }
        return success ? 0 : 1;
    } finally {
        options.input.close();
    }
}

From source file:com.cloudera.hbase.WordCount.java

License:Open Source License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 2;
    }/*from   w  w  w  .ja  v a 2  s.com*/

    Configuration conf = getConf();

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.test.UseHCat.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Get the input and output table names as arguments
    String inputTableName = args[0];
    String outputTableName = args[1];
    // Assume the default database
    String dbName = null;/* w w  w  . j a  v a  2 s .co m*/

    Job job = new Job(conf, "UseHCat");
    HCatInputFormat.setInput(job, dbName, inputTableName);
    job.setJarByClass(UseHCat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    // An HCatalog record as input
    job.setInputFormatClass(HCatInputFormat.class);

    // Mapper emits a string as key and an integer as value
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Ignore the key for the reducer output; emitting an HCatalog record as value
    job.setOutputKeyClass(WritableComparable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    job.setOutputFormatClass(HCatOutputFormat.class);

    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
    HCatSchema s = HCatOutputFormat.getTableSchema(job);
    System.err.println("INFO: output schema explicitly set for writing:" + s);
    HCatOutputFormat.setSchema(job, s);
    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cqx.mr.MRSearchAuto.java

public void searchHBase(int numOfDays) throws IOException, InterruptedException, ClassNotFoundException {
    long startTime;
    long endTime;

    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", "node2,node3,node4");
    conf.set("fs.default.name", "hdfs://node1");
    conf.set("mapred.job.tracker", "node1:54311");
    /*/*  w  w  w . j a va  2 s  .  com*/
     * ?map
     */
    conf.set("search.license", "C87310");
    conf.set("search.color", "10");
    conf.set("search.direction", "2");

    Job job = new Job(conf, "MRSearchHBase");
    System.out.println("search.license: " + conf.get("search.license"));
    job.setNumReduceTasks(0);
    job.setJarByClass(MRSearchAuto.class);
    Scan scan = new Scan();
    scan.addFamily(FAMILY_NAME);
    byte[] startRow = Bytes.toBytes("2011010100000");
    byte[] stopRow;
    switch (numOfDays) {
    case 1:
        stopRow = Bytes.toBytes("2011010200000");
        break;
    case 10:
        stopRow = Bytes.toBytes("2011011100000");
        break;
    case 30:
        stopRow = Bytes.toBytes("2011020100000");
        break;
    case 365:
        stopRow = Bytes.toBytes("2012010100000");
        break;
    default:
        stopRow = Bytes.toBytes("2011010101000");
    }
    // ?key
    scan.setStartRow(startRow);
    scan.setStopRow(stopRow);

    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, SearchMapper.class, ImmutableBytesWritable.class,
            Text.class, job);
    Path outPath = new Path("searchresult");
    HDFS_File file = new HDFS_File();
    file.DelFile(conf, outPath.getName(), true); // 
    FileOutputFormat.setOutputPath(job, outPath);// 

    startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    endTime = System.currentTimeMillis();
    System.out.println("Time used: " + (endTime - startTime));
    System.out.println("startRow:" + Text.decode(startRow));
    System.out.println("stopRow: " + Text.decode(stopRow));
}

From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairs.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(ConditionalProbabilityPairs.class);
    job.setMapperClass(TokenizerMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setPartitionerClass(ProbDistPartitioner.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(5);// w w w.jav a2s. c o  m
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;

}