Example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the value class for the map output data.

Usage

From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairWordExtractor.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(ConditionalProbabilityPairWordExtractor.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;/*from  www  .j a  va2  s  .  c  o  m*/

}

From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityStripes.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(ConditionalProbabilityStripes.class);
    job.setMapperClass(TokenizerMapper.class);
    // job.setCombinerClass(IntSumReducer.class);
    job.setPartitionerClass(ProbDistPartitioner.class);
    job.setReducerClass(IntSumReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(5);/* w w  w  .  j  a v a 2s.c  o  m*/
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;
}

From source file:com.daleway.training.hadoop.pagerank.PageRankAdjList.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(PageRankAdjList.class);
    job.setMapperClass(PageRankMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(PageRankReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;/*from   ww w . j  av a 2s  .c o  m*/

}

From source file:com.daleway.training.hadoop.pagerank.PageRankCalcDanglingNodeMass.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(PageRankCalcDanglingNodeMass.class);
    job.setMapperClass(PageRankMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(PageRankReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;//  w ww. j  a v a2  s.  com

}

From source file:com.daleway.training.hadoop.pagerank.PageRankComplete.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(PageRankComplete.class);
    job.setMapperClass(PageRankMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(PageRankReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;/*from  w w w.  j a va2 s . c  o  m*/

}

From source file:com.daleway.training.hadoop.pagerank.PageRankSecondarySort.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(PageRankSecondarySort.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);
    job.setMaxReduceAttempts(1);//from w w w.j a v a  2s . co  m
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;
}

From source file:com.daleway.training.hadoop.pagerank.PageRankSimple.java

License:Apache License

public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException {
    Job job = new Job(conf, "pair wise count");
    job.setJarByClass(PageRankSimple.class);
    job.setMapperClass(PageRankMapper.class);
    //job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(PageRankReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job;/*  w ww .j a v a2 s .  c o m*/

}

From source file:com.datasalt.pangool.benchmark.secondarysort.HadoopSecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/*w  w  w  .jav a  2s  .c o m*/
    }
    Job job = new Job(conf, "Hadoop Secondary Sort");
    FileSystem fS = FileSystem.get(conf);
    fS.delete(new Path(otherArgs[1]), true);

    job.setJarByClass(HadoopSecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setPartitionerClass(KeyPartitioner.class);
    job.setGroupingComparatorClass(GroupingComparator.class);

    job.setMapOutputKeyClass(ComplexType.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.waitForCompletion(true);
}

From source file:com.datasalt.pangool.tuplemr.TupleMRBuilder.java

License:Apache License

public Job createJob() throws IOException, TupleMRException {

    failIfNull(tupleReducer, "Need to set a group handler");
    failIfEmpty(multipleInputs.getMultiInputs(), "Need to add at least one input");
    failIfNull(outputFormat, "Need to set output format");
    failIfNull(outputKeyClass, "Need to set outputKeyClass");
    failIfNull(outputValueClass, "Need to set outputValueClass");
    failIfNull(outputPath, "Need to set outputPath");

    // perform a deep copy of the Configuration
    this.conf = new Configuration(this.conf);

    TupleMRConfig tupleMRConf = buildConf();
    // Serialize PangoolConf in Hadoop Configuration
    instanceFilesCreated.addAll(TupleMRConfig.set(tupleMRConf, conf));
    Job job = (jobName == null) ? new Job(conf) : new Job(conf, jobName);
    if (tupleMRConf.getRollupFrom() != null) {
        job.setReducerClass(RollupReducer.class);
    } else {/*w w w. jav a  2  s. c  o m*/
        job.setReducerClass(SimpleReducer.class);
    }

    if (tupleCombiner != null) {
        job.setCombinerClass(SimpleCombiner.class); // not rollup by now
        // Set Combiner Handler
        String uniqueName = UUID.randomUUID().toString() + '.' + "combiner-handler.dat";
        try {
            InstancesDistributor.distribute(tupleCombiner, uniqueName, job.getConfiguration());
            instanceFilesCreated.add(uniqueName);
            job.getConfiguration().set(SimpleCombiner.CONF_COMBINER_HANDLER, uniqueName);
        } catch (URISyntaxException e1) {
            throw new TupleMRException(e1);
        }
    }

    // Set Tuple Reducer
    try {
        String uniqueName = UUID.randomUUID().toString() + '.' + "group-handler.dat";
        InstancesDistributor.distribute(tupleReducer, uniqueName, job.getConfiguration());
        instanceFilesCreated.add(uniqueName);
        job.getConfiguration().set(SimpleReducer.CONF_REDUCER_HANDLER, uniqueName);
    } catch (URISyntaxException e1) {
        throw new TupleMRException(e1);
    }

    // Enabling serialization
    TupleSerialization.enableSerialization(job.getConfiguration());

    job.setJarByClass((jarByClass != null) ? jarByClass : tupleReducer.getClass());
    job.setMapOutputKeyClass(DatumWrapper.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setPartitionerClass(TupleHashPartitioner.class);
    job.setGroupingComparatorClass(GroupComparator.class);
    job.setSortComparatorClass(SortComparator.class);
    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);
    FileOutputFormat.setOutputPath(job, outputPath);
    instanceFilesCreated.addAll(multipleInputs.configureJob(job));
    instanceFilesCreated.addAll(namedOutputs.configureJob(job));
    // Configure a {@link ProxyOutputFormat} for Pangool's Multiple Outputs to
    // work: {@link PangoolMultipleOutput}
    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    try {
        InstancesDistributor.distribute(outputFormat, uniqueName, conf);
        instanceFilesCreated.add(uniqueName);
    } catch (URISyntaxException e1) {
        throw new TupleMRException(e1);
    }
    job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
    job.setOutputFormatClass(ProxyOutputFormat.class);

    return job;
}

From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java

License:Apache License

protected static Job buildMapRedCounterJobWithoutCombiner(String name,
        @SuppressWarnings("rawtypes") Class<? extends OutputFormat> outputFormat, String outPath,
        Configuration conf) throws IOException {

    Job job = new Job(conf, name);

    Path output = new Path(outPath);
    HadoopUtils.deleteIfExists(FileSystem.get(conf), output);
    job.setJarByClass(MapRedCounter.class);

    job.setReducerClass(MapRedCountReducer.class);
    job.setMapOutputKeyClass(CounterKey.class);
    job.setMapOutputValueClass(CounterValue.class);
    job.setOutputFormatClass(outputFormat);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    // Secondary sorting configuration.
    job.setGroupingComparatorClass(CounterKey.IdGroupComparator.class);
    job.setPartitionerClass(CounterKey.IdGroupPartitioner.class);

    FileOutputFormat.setOutputPath(job, output);

    String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat";
    try {//w w  w  .  j av a 2s.  c  o  m
        DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf);
        job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName);
        job.setOutputFormatClass(ProxyOutputFormat.class);
        // Multioutput configuration
        PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTFILE.toString(),
                new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterKey.class, LongWritable.class);
        PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTDISTINCTFILE.toString(),
                new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterDistinctKey.class,
                LongPairWritable.class);
    } catch (URISyntaxException e) {
        e.printStackTrace();
        throw new IOException(e);
    }
    return job;
}