Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.jbw.mutioutputformat; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.hadoop.mrunit.internal.output.MockMultipleOutputs; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * * @author alvin */ public class PatitionByStation extends Configured implements Tool { static class StationMapper extends Mapper<LongWritable, Text, Text, Text> { private final YearTempParser parse = new YearTempParser(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { parse.parse(value); if (parse.isValid()) { context.write(new Text(parse.getStation()), value); } } } static class StationReducer extends Reducer<Text, Text, NullWritable, Text> { private MultipleOutputs multipleOutputs = null; @Override protected void setup(Context context) throws IOException, InterruptedException { multipleOutputs = new MockMultipleOutputs(context); } @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text t : values) { multipleOutputs.write(NullWritable.get(), t, ""); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { multipleOutputs.close(); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Path input = new Path(conf.get("input")); Path output = new Path(conf.get("output")); Job job = Job.getInstance(); job.setJarByClass(PatitionByStation.class); job.setJobName("papapa"); job.setMapperClass(StationMapper.class); job.setMapOutputKeyClass(Text.class); job.setReducerClass(StationReducer.class); job.setOutputKeyClass(NullWritable.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { System.exit(ToolRunner.run(new PatitionByStation(), args)); } }