Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.bigdog.hadoop.mapreduce.combine; import com.bigdog.hadoop.Constants; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * * @author jw362j */ public class WordCountCombineApp { static final String INPUT_PATH = Constants.input; static final String OUT_PATH = Constants.output; static { Configuration.addDefaultResource("hdfs-site.xml"); Configuration.addDefaultResource("core-site.xml"); Configuration.addDefaultResource("mapred-site.xml"); } public void combine() throws Exception { Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf); final Path outPath = new Path(OUT_PATH); if (fileSystem.exists(outPath)) { fileSystem.delete(outPath, true); } final Job job = new Job(conf, WordCountCombineApp.class.getSimpleName()); //1.1?? FileInputFormat.setInputPaths(job, INPUT_PATH); //???? //job.setInputFormatClass(TextInputFormat.class); //1.2 map job.setMapperClass(MyMapper.class); //map<k,v><k3,v3><k2,v2>?? //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(LongWritable.class); //1.3 //job.setPartitionerClass(HashPartitioner.class); //reduce? //job.setNumReduceTasks(1); //1.4 TODO ?? //1.5 job.setCombinerClass(MyCombiner.class); //2.2 reduce job.setReducerClass(MyReducer.class); //reduce job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, outPath); //? //job.setOutputFormatClass(TextOutputFormat.class); //job??JobTracker? job.waitForCompletion(true); } /** * KEYIN ?k1 ??? * VALUEIN ?v1 * KEYOUT ?k2 ?? * VALUEOUT ?v2 ??1 */ static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> { protected void map(LongWritable k1, Text v1, Context context) throws java.io.IOException, InterruptedException { final String[] splited = v1.toString().split(" "); for (String word : splited) { context.write(new Text(word), new LongWritable(1)); System.out.println("Mapper<" + word + "," + 1 + ">"); } }; } /** * KEYIN ?k2 ?? * VALUEIN ?v2 ?? * KEYOUT ?k3 ???? * VALUEOUT ?v3 ???? * */ static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> { protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2s, Context ctx) throws java.io.IOException, InterruptedException { //redcuek2 System.out.println("MyReducer<" + k2.toString() + ",...>"); long times = 0L; for (LongWritable count : v2s) { times += count.get(); //k2,v2? System.out.println("MyReducer<" + k2.toString() + "," + count.get() + ">"); } ctx.write(k2, new LongWritable(times)); }; } static class MyCombiner extends Reducer<Text, LongWritable, Text, LongWritable> { protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2s, Context ctx) throws java.io.IOException, InterruptedException { //redcuek2 System.out.println("Combiner<" + k2.toString() + ",...>"); long times = 0L; for (LongWritable count : v2s) { times += count.get(); //k2,v2? System.out.println("Combiner<" + k2.toString() + "," + count.get() + ">"); } ctx.write(k2, new LongWritable(times)); //k2,v2? System.out.println("Combiner<" + k2.toString() + "," + times + ">"); }; } }