List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairWordExtractor.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(ConditionalProbabilityPairWordExtractor.class); job.setMapperClass(TokenizerMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;/*from www .j a va2 s . c o m*/ }
From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityStripes.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(ConditionalProbabilityStripes.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(ProbDistPartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(5);/* w w w . j a v a 2s.c o m*/ FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:com.daleway.training.hadoop.pagerank.PageRankAdjList.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(PageRankAdjList.class); job.setMapperClass(PageRankMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setReducerClass(PageRankReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;/*from ww w . j av a 2s .c o m*/ }
From source file:com.daleway.training.hadoop.pagerank.PageRankCalcDanglingNodeMass.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(PageRankCalcDanglingNodeMass.class); job.setMapperClass(PageRankMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setReducerClass(PageRankReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;// w ww. j a v a2 s. com }
From source file:com.daleway.training.hadoop.pagerank.PageRankComplete.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(PageRankComplete.class); job.setMapperClass(PageRankMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setReducerClass(PageRankReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;/*from w w w. j a va2 s . c o m*/ }
From source file:com.daleway.training.hadoop.pagerank.PageRankSecondarySort.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(PageRankSecondarySort.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setSortComparatorClass(LongWritable.DecreasingComparator.class); job.setMaxReduceAttempts(1);//from w w w.j a v a 2s . co m job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:com.daleway.training.hadoop.pagerank.PageRankSimple.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(PageRankSimple.class); job.setMapperClass(PageRankMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setReducerClass(PageRankReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;/* w ww .j a v a2 s . c o m*/ }
From source file:com.datasalt.pangool.benchmark.secondarysort.HadoopSecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysrot <in> <out>"); System.exit(2);/*w w w .jav a 2s .c o m*/ } Job job = new Job(conf, "Hadoop Secondary Sort"); FileSystem fS = FileSystem.get(conf); fS.delete(new Path(otherArgs[1]), true); job.setJarByClass(HadoopSecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); job.setPartitionerClass(KeyPartitioner.class); job.setGroupingComparatorClass(GroupingComparator.class); job.setMapOutputKeyClass(ComplexType.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); }
From source file:com.datasalt.pangool.tuplemr.TupleMRBuilder.java
License:Apache License
public Job createJob() throws IOException, TupleMRException { failIfNull(tupleReducer, "Need to set a group handler"); failIfEmpty(multipleInputs.getMultiInputs(), "Need to add at least one input"); failIfNull(outputFormat, "Need to set output format"); failIfNull(outputKeyClass, "Need to set outputKeyClass"); failIfNull(outputValueClass, "Need to set outputValueClass"); failIfNull(outputPath, "Need to set outputPath"); // perform a deep copy of the Configuration this.conf = new Configuration(this.conf); TupleMRConfig tupleMRConf = buildConf(); // Serialize PangoolConf in Hadoop Configuration instanceFilesCreated.addAll(TupleMRConfig.set(tupleMRConf, conf)); Job job = (jobName == null) ? new Job(conf) : new Job(conf, jobName); if (tupleMRConf.getRollupFrom() != null) { job.setReducerClass(RollupReducer.class); } else {/*w w w. jav a 2 s. c o m*/ job.setReducerClass(SimpleReducer.class); } if (tupleCombiner != null) { job.setCombinerClass(SimpleCombiner.class); // not rollup by now // Set Combiner Handler String uniqueName = UUID.randomUUID().toString() + '.' + "combiner-handler.dat"; try { InstancesDistributor.distribute(tupleCombiner, uniqueName, job.getConfiguration()); instanceFilesCreated.add(uniqueName); job.getConfiguration().set(SimpleCombiner.CONF_COMBINER_HANDLER, uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } } // Set Tuple Reducer try { String uniqueName = UUID.randomUUID().toString() + '.' + "group-handler.dat"; InstancesDistributor.distribute(tupleReducer, uniqueName, job.getConfiguration()); instanceFilesCreated.add(uniqueName); job.getConfiguration().set(SimpleReducer.CONF_REDUCER_HANDLER, uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } // Enabling serialization TupleSerialization.enableSerialization(job.getConfiguration()); job.setJarByClass((jarByClass != null) ? jarByClass : tupleReducer.getClass()); job.setMapOutputKeyClass(DatumWrapper.class); job.setMapOutputValueClass(NullWritable.class); job.setPartitionerClass(TupleHashPartitioner.class); job.setGroupingComparatorClass(GroupComparator.class); job.setSortComparatorClass(SortComparator.class); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); FileOutputFormat.setOutputPath(job, outputPath); instanceFilesCreated.addAll(multipleInputs.configureJob(job)); instanceFilesCreated.addAll(namedOutputs.configureJob(job)); // Configure a {@link ProxyOutputFormat} for Pangool's Multiple Outputs to // work: {@link PangoolMultipleOutput} String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try { InstancesDistributor.distribute(outputFormat, uniqueName, conf); instanceFilesCreated.add(uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); return job; }
From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java
License:Apache License
protected static Job buildMapRedCounterJobWithoutCombiner(String name, @SuppressWarnings("rawtypes") Class<? extends OutputFormat> outputFormat, String outPath, Configuration conf) throws IOException { Job job = new Job(conf, name); Path output = new Path(outPath); HadoopUtils.deleteIfExists(FileSystem.get(conf), output); job.setJarByClass(MapRedCounter.class); job.setReducerClass(MapRedCountReducer.class); job.setMapOutputKeyClass(CounterKey.class); job.setMapOutputValueClass(CounterValue.class); job.setOutputFormatClass(outputFormat); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); // Secondary sorting configuration. job.setGroupingComparatorClass(CounterKey.IdGroupComparator.class); job.setPartitionerClass(CounterKey.IdGroupPartitioner.class); FileOutputFormat.setOutputPath(job, output); String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try {//w w w . j av a 2s. c o m DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf); job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); // Multioutput configuration PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTFILE.toString(), new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterKey.class, LongWritable.class); PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTDISTINCTFILE.toString(), new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterDistinctKey.class, LongPairWritable.class); } catch (URISyntaxException e) { e.printStackTrace(); throw new IOException(e); } return job; }