List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }/*from ww w .j av a2 s . c o m*/ /*[*/job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(Mapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); job.setReducerClass(Reducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class);/*]*/ return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }//from w w w . ja va 2 s.c o m job.setInputFormatClass(WholeFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setMapperClass(SequenceFileMapper.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }/*from w w w.j a v a2 s . c o m*/ job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(IntWritable.class); /*[*/job.setOutputFormatClass(MapFileOutputFormat.class);/*]*/ SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }/*w ww. j a v a2s. c om*/ job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }/*ww w .j a v a 2 s. co m*/ job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setPartitionerClass(TotalOrderPartitioner.class); InputSampler.Sampler<IntWritable, Text> sampler = new InputSampler.RandomSampler<IntWritable, Text>(0.1, 10000, 10); InputSampler.writePartitionFile(job, sampler); // Add to DistributedCache Configuration conf = job.getConfiguration(); String partitionFile = TotalOrderPartitioner.getPartitionFile(conf); URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(partitionUri, conf); DistributedCache.createSymlink(conf); return job.waitForCompletion(true) ? 0 : 1; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobBuilder.parseInputAndOutput(this, getConf(), args); if (job == null) { return -1; }//ww w . j av a2 s . c o m job.setMapperClass(CleanerMapper.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); return job.waitForCompletion(true) ? 0 : 1; }
From source file:csc555.ebratt.depaul.edu.AverageScoreDriver.java
License:Open Source License
/** * /*from w w w .j av a2s. c om*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the AverageScoreReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("average score by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), AverageScoreDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(AverageScoreMapper.class); job.setReducerClass(AverageScoreReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(AverageScoreReducer.class); } // The Jar file to run job.setJarByClass(AverageScoreDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.AverageScoreRankerDriver.java
License:Open Source License
/** * /* w ww . j a v a 2s . com*/ * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the AverageScoreRankerReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJobName("average score ranked"); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(AverageScoreRankerMapper.class); job.setReducerClass(AverageScoreRankerReducer.class); // Mapper output classes job.setMapOutputKeyClass(DoubleWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(AverageScoreRankerReducer.class); } // sort in descending order job.setSortComparatorClass(DoubleWritableDescendingComparator.class); // The Jar file to run job.setJarByClass(AverageScoreRankerDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.GildedCountDriver.java
License:Open Source License
/** * //from w w w .j a va 2 s. c om * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the GildedCountReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String groupBy = getConf().get("groupBy"); StringBuffer sb = new StringBuffer(); sb.append("count of gilded comments grouped by: "); sb.append(groupBy); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // testing -- ensure each node gets 2 reducers JobConf jobConf = new JobConf(getConf(), GildedCountDriver.class); JobClient jobClient = new JobClient(jobConf); ClusterStatus cluster = jobClient.getClusterStatus(); job.setNumReduceTasks(cluster.getTaskTrackers() * 2); // Mapper and Reducer Classes to use job.setMapperClass(GildedCountMapper.class); job.setReducerClass(LongSumReducer.class); // Mapper output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(LongSumReducer.class); } // The Jar file to run job.setJarByClass(GildedCountDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }
From source file:csc555.ebratt.depaul.edu.GildedSorterDriver.java
License:Open Source License
/** * //www .j a va2 s.com * Runs the driver by creating a new hadoop Job based on the configuration. * Defines the path in/out based on the first two arguments. Allows for an * optional combiner based on the 4th argument. * * @param args * [0] the input directory on HDFS * @param args * [1] the output directory on HDFS * @param args * [2] tells the system whether or not to use a combiner ("yes") * and, if so, it will use the GildedSorterReducer.class as the * combiner. * @throws Exception * if there is an issue with any of the arguments * */ @Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); StringBuffer sb = new StringBuffer(); sb.append("sorted gild counts"); job.setJobName(sb.toString()); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // to ensure output is sorted job.setNumReduceTasks(1); // Mapper and Reducer Classes to use job.setMapperClass(GildedSorterMapper.class); job.setReducerClass(GildedSorterReducer.class); // Mapper output classes job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); // Input format class job.setInputFormatClass(TextInputFormat.class); // Reducer output classes job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); // Output format class job.setOutputFormatClass(TextOutputFormat.class); // Combiner if (args[2].equals("yes")) { job.setCombinerClass(GildedSorterReducer.class); } // sort in descending order job.setSortComparatorClass(LongWritable.DecreasingComparator.class); // The Jar file to run job.setJarByClass(GildedSorterDriver.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); return 0; }