Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package avro.mr; import java.io.IOException; import org.apache.avro.Schema; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapred.AvroValue; import org.apache.avro.mapreduce.AvroJob; import org.apache.avro.mapreduce.AvroSequenceFileInputFormat; import org.apache.avro.mapreduce.AvroSequenceFileOutputFormat; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import example.avro.User; /** * ColorCount reads in data files containing User records, defined in * examples/user.avsc, and counts the number of instances of each favorite * color. (This example draws inspiration from the canonical WordCount MapReduce * application.) * * @link http://avro.apache.org/docs/current/mr.html */ public class MapReduceColorCountSeqFile extends Configured implements Tool { public static class ColorCountMapper extends Mapper<AvroKey<Long>, AvroValue<User>, Text, IntWritable> { @Override public void map(AvroKey<Long> key, AvroValue<User> value, Context context) throws IOException, InterruptedException { CharSequence color = value.datum().getFavoriteColor(); if (color == null) { color = "none"; } context.write(new Text(color.toString()), new IntWritable(1)); } } public static class ColorCountReducer extends Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> { @Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value : values) { sum += value.get(); } context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum)); } } public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: MapReduceColorCount <input path> <output path>"); return -1; } Job job = Job.getInstance(getConf()); job.setJarByClass(MapReduceColorCountSeqFile.class); job.setJobName("Color Count 2"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(AvroSequenceFileInputFormat.class); AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.LONG)); AvroJob.setInputValueSchema(job, User.getClassSchema()); job.setMapperClass(ColorCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(AvroSequenceFileOutputFormat.class); job.setReducerClass(ColorCountReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); return (job.waitForCompletion(true) ? 0 : 1); } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new MapReduceColorCountSeqFile(), args); System.exit(res); } }