Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.bigdog.hadoop.mapreduce.group; import com.bigdog.hadoop.Constants; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; /** * * @author jw362j */ public class GroupApp { static final String INPUT_PATH = Constants.input; static final String OUT_PATH = Constants.output; static { Configuration.addDefaultResource("hdfs-site.xml"); Configuration.addDefaultResource("core-site.xml"); Configuration.addDefaultResource("mapred-site.xml"); } public void group() throws Exception { final Configuration configuration = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), configuration); if (fileSystem.exists(new Path(OUT_PATH))) { fileSystem.delete(new Path(OUT_PATH), true); } final Job job = new Job(configuration, GroupApp.class.getSimpleName()); //1.1 FileInputFormat.setInputPaths(job, INPUT_PATH); //?? job.setInputFormatClass(TextInputFormat.class); //1.2Mapper job.setMapperClass(MyMapper.class); //<k2,v2> job.setMapOutputKeyClass(NewK2.class); job.setMapOutputValueClass(LongWritable.class); //1.3 job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); //1.4 TODO ?? job.setGroupingComparatorClass(MyGroupingComparator.class); //1.5 TODO ?? //2.2 reduce job.setReducerClass(MyReducer.class); //<k3,v3> job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); //2.3 FileOutputFormat.setOutputPath(job, new Path(OUT_PATH)); //? job.setOutputFormatClass(TextOutputFormat.class); //???JobTracker job.waitForCompletion(true); } static class MyMapper extends Mapper<LongWritable, Text, NewK2, LongWritable> { protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, NewK2, LongWritable>.Context context) throws java.io.IOException, InterruptedException { final String[] splited = value.toString().split("\t"); final NewK2 k2 = new NewK2(Long.parseLong(splited[0]), Long.parseLong(splited[1])); final LongWritable v2 = new LongWritable(Long.parseLong(splited[1])); context.write(k2, v2); }; } static class MyReducer extends Reducer<NewK2, LongWritable, LongWritable, LongWritable> { protected void reduce(NewK2 k2, java.lang.Iterable<LongWritable> v2s, org.apache.hadoop.mapreduce.Reducer<NewK2, LongWritable, LongWritable, LongWritable>.Context context) throws java.io.IOException, InterruptedException { long min = Long.MAX_VALUE; for (LongWritable v2 : v2s) { if (v2.get() < min) { min = v2.get(); } } context.write(new LongWritable(k2.first), new LongWritable(min)); }; } /** * * ?v2????k2v2?k2 * */ static class NewK2 implements WritableComparable<NewK2> { Long first; Long second; public NewK2() { } public NewK2(long first, long second) { this.first = first; this.second = second; } @Override public void readFields(DataInput in) throws IOException { this.first = in.readLong(); this.second = in.readLong(); } @Override public void write(DataOutput out) throws IOException { out.writeLong(first); out.writeLong(second); } /** * k2?. ??????? */ @Override public int compareTo(NewK2 o) { final long minus = this.first - o.first; if (minus != 0) { return (int) minus; } return (int) (this.second - o.second); } @Override public int hashCode() { return this.first.hashCode() + this.second.hashCode(); } @Override public boolean equals(Object obj) { if (!(obj instanceof NewK2)) { return false; } NewK2 oK2 = (NewK2) obj; return (this.first == oK2.first) && (this.second == oK2.second); } } /** * * ?NewK2?? */ static class MyGroupingComparator implements RawComparator<NewK2> { @Override public int compare(NewK2 o1, NewK2 o2) { return (int) (o1.first - o2.first); } /** * @param arg0 ? * @param arg1 ?? * @param arg2 ???? * * @param arg3 ? * @param arg4 ?? * @param arg5 ???? */ @Override public int compare(byte[] arg0, int arg1, int arg2, byte[] arg3, int arg4, int arg5) { return WritableComparator.compareBytes(arg0, arg1, 8, arg3, arg4, 8); } } }