Example usage for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException

Source Link

Usage

From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java

License:Apache License

protected static Job buildMapRedCounterJobWithoutCombiner(String name,
        @SuppressWarnings("rawtypes") Class<? extends OutputFormat> outputFormat, String outPath,
        Configuration conf) throws IOException {

    Job job = new Job(conf, name);

    Path output = new Path(outPath);
    HadoopUtils.deleteIfExists(FileSystem.get(conf), output);
    job.setJarByClass(MapRedCounter.class);

    job.setReducerClass(MapRedCountReducer.class);
    job.setMapOutputKeyClass(CounterKey.class);
    job.setMapOutputValueClass(CounterValue.class);
    job.setOutputFormatClass(outputFormat);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    // Secondary sorting configuration.
    job.setGroupingComparatorClass(CounterKey.IdGroupComparator.class);
    job.setPartitionerClass(CounterKey.IdGroupPartitioner.class);

    FileOutputFormat.setOutputPath(job, output);

    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    try {/*from  w w w.  j a v a  2  s. c  om*/
        DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf);
        job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
        job.setOutputFormatClass(ProxyOutputFormat.class);
        // Multioutput configuration
        PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTFILE.toString(),
                new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterKey.class, LongWritable.class);
        PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTDISTINCTFILE.toString(),
                new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterDistinctKey.class,
                LongPairWritable.class);
    } catch (URISyntaxException e) {
        e.printStackTrace();
        throw new IOException(e);
    }
    return job;
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java

License:Apache License

public Job getJob() throws IOException {
    if (job == null) {
        job = new Job(conf, name);

        HadoopUtils.deleteIfExists(FileSystem.get(conf), outputPath);
        job.setJarByClass((jarByClass != null) ? jarByClass : reducer);

        job.setReducerClass(reducer);/*from  w  ww.  j a  v  a2 s. co  m*/
        job.setMapOutputValueClass(MultiJoinDatum.class);
        job.setMapOutputKeyClass(MultiJoinPair.class);
        job.setOutputFormatClass(outputFormat);
        job.setOutputKeyClass(outputKeyClass);
        job.setOutputValueClass(outputValueClass);

        job.setGroupingComparatorClass(MultiJoinPair.GroupComparator.class);
        job.setPartitionerClass(MultiJoinPair.GroupPartitioner.class);
        FileOutputFormat.setOutputPath(job, outputPath);

        setMultiJoinPairClass(MultiJoinPair.class);
    }

    return job;
}

From source file:com.dipwater.accountAnalyze.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.job.tracker", "192.168.1.51:9001");
    conf.set("fs.default.name", "hdfs://192.168.1.51:9000");

    String[] ars = new String[] { "input", "newout" };
    String[] otherArgs = new GenericOptionsParser(conf, ars).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from w  w  w. j  a  v  a2s . c o m
    }
    Job job = new Job(conf, "word count");

    File jarFile = EJob.createTempJar("bin");
    EJob.addClasspath("/home/hadoop/hadoop-1.2.1/conf");
    ClassLoader classLoader = EJob.getClassLoader();
    Thread.currentThread().setContextClassLoader(classLoader);
    ((JobConf) job.getConfiguration()).setJar(jarFile.toString());

    //job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.elephantscale.hbase.book.chapter1.SimpleMR.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: SimpleMR <in> <out>");
        return;//w  ww .  ja  v a  2  s  . c  om
    }
    Job job = new Job(conf, "SimpleMR");
    job.setJarByClass(SimpleMR.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.waitForCompletion(true);
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
        throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "Calculating perplexity for " + modelPath;
    log.info("About to run: " + jobName);
    Job job = new Job(conf, jobName);
    job.setJarByClass(CachingCVB0PerplexityMapper.class);
    job.setMapperClass(CachingCVB0PerplexityMapper.class);
    job.setCombinerClass(DualDoubleSumReducer.class);
    job.setReducerClass(DualDoubleSumReducer.class);
    job.setNumReduceTasks(1);// w  ww . j a va 2s . c om
    job.setOutputKeyClass(DoubleWritable.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.addInputPath(job, corpusPath);
    Path outputPath = perplexityPath(modelPath.getParent(), iteration);
    FileOutputFormat.setOutputPath(job, outputPath);
    setModelPaths(job, modelPath);
    HadoopUtil.delete(conf, outputPath);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
    }
    return readPerplexity(conf, modelPath.getParent(), iteration);
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static Job writeTopicModel(Configuration conf, Path modelInput, Path output)
        throws IOException, InterruptedException, ClassNotFoundException {
    String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
    log.info("About to run: " + jobName);
    Job job = new Job(conf, jobName);
    job.setJarByClass(CVB0Driver.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapperClass(CVB0TopicTermVectorNormalizerMapper.class);
    job.setNumReduceTasks(0);//w  w w. j  a  v a 2s .c o m
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.addInputPath(job, modelInput);
    FileOutputFormat.setOutputPath(job, output);
    job.submit();
    return job;
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
        throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
    log.info("About to run: " + jobName);
    Job job = new Job(conf, jobName);
    job.setMapperClass(CVB0DocInferenceMapper.class);
    job.setNumReduceTasks(0);//from  w w  w.  ja va2 s  .  co  m
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);
    FileSystem fs = FileSystem.get(corpus.toUri(), conf);
    if (modelInput != null && fs.exists(modelInput)) {
        FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
        URI[] modelUris = new URI[statuses.length];
        for (int i = 0; i < statuses.length; i++) {
            modelUris[i] = statuses[i].getPath().toUri();
        }
        DistributedCache.setCacheFiles(modelUris, conf);
    }
    setModelPaths(job, modelInput);//bug:mahout-1147
    FileInputFormat.addInputPath(job, corpus);
    FileOutputFormat.setOutputPath(job, output);
    job.setJarByClass(CVB0Driver.class);
    job.submit();
    return job;
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

public static void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput,
        int iterationNumber, int maxIterations, int numReduceTasks)
        throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = String.format("Iteration %d of %d, input path: %s", iterationNumber, maxIterations,
            modelInput);//ww w.j  a v a2  s .c o  m
    log.info("About to run: " + jobName);
    Job job = new Job(conf, jobName);
    job.setJarByClass(CVB0Driver.class);
    job.setMapperClass(CachingCVB0Mapper.class);
    job.setCombinerClass(VectorSumReducer.class);
    job.setReducerClass(VectorSumReducer.class);
    job.setNumReduceTasks(numReduceTasks);
    job.setOutputKeyClass(Text.class);//0.7IntWritable
    job.setOutputValueClass(VectorWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.addInputPath(job, corpusInput);
    FileOutputFormat.setOutputPath(job, modelOutput);
    setModelPaths(job, modelInput);
    HadoopUtil.delete(conf, modelOutput);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException(
                String.format("Failed to complete iteration %d stage 1", iterationNumber));
    }
}

From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    handleCommandLine(args, conf);//w  w w. ja va2  s  .  com
    HadoopUtils.setMapAttempts(conf, 1);
    adjustConfigurationForHive(conf);
    HiveTools.setupJob(conf);

    Job job = new Job(conf, "hive-io-writing");
    if (job.getJar() == null) {
        job.setJarByClass(getClass());
    }
    job.setMapperClass(SampleMapper.class);
    job.setInputFormatClass(SampleInputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(HiveWritableRecord.class);
    job.setOutputFormatClass(SampleOutputFormat.class);

    job.setNumReduceTasks(0);

    job.submit();
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.frdeso.app.Sleepy.java

License:Apache License

/**
 * Performs integer summation of all the values for each key.
 *//* www .  j  a va2s .  com*/

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: wordmean <in> <out> <number second>");
        return 0;
    }

    Configuration conf = getConf();
    conf.set("mapSleepTime", args[2]);
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "joba");
    job.setJarByClass(Sleepy.class);
    job.setMapperClass(SleepyMapper.class);
    job.setCombinerClass(Reducer.class);
    job.setReducerClass(Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);

    return (result ? 0 : 1);
}