Java examples for Big Data:Hadoop
apache hadoop Map reduce template
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class DataDeduplication { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String jobName = DataDeduplication.class.getSimpleName(); Job job = Job.getInstance(conf, jobName); job.setJarByClass(DataDeduplication.class); FileInputFormat.setInputPaths(job, new Path(args[0])); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true);//from w w w .ja va2 s.c o m } private static class MyMapper extends Mapper<Object, Text, Text, Text> { private static Text line = new Text(); @Override protected void map(Object k1, Text v1, Mapper<Object, Text, Text, Text>.Context context) throws IOException, InterruptedException { line = v1; context.write(line, new Text("")); } } private static class MyReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text k2, Iterable<Text> v2s, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { context.write(k2, new Text("")); } } }