Java tutorial
/* * Copyright (c) 2013 DataTorrent, Inc. ALL Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.datatorrent.demos.mroperator; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.KeyValueTextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import java.io.IOException; import java.util.Calendar; import java.util.Iterator; /** * <p>LogCountsPerHour class.</p> * * @since 0.9.0 */ public class LogCountsPerHour extends Configured implements Tool { public static class LogMapClass extends MapReduceBase implements Mapper<LongWritable, Text, DateWritable, IntWritable> { private DateWritable date = new DateWritable(); private final static IntWritable one = new IntWritable(1); public void map(LongWritable key, // Offset into the file Text value, OutputCollector<DateWritable, IntWritable> output, Reporter reporter) throws IOException { // Get the value as a String; it is of the format: // 111.111.111.111 - - [16/Dec/2012:05:32:50 -0500] "GET / HTTP/1.1" 200 14791 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" String text = value.toString(); // Get the date and time int openBracket = text.indexOf('['); int closeBracket = text.indexOf(']'); if (openBracket != -1 && closeBracket != -1) { // Read the date String dateString = text.substring(text.indexOf('[') + 1, text.indexOf(']')); // Build a date object from a string of the form: 16/Dec/2012:05:32:50 -0500 int index = 0; int nextIndex = dateString.indexOf('/'); int day = Integer.parseInt(dateString.substring(index, nextIndex)); index = nextIndex; nextIndex = dateString.indexOf('/', index + 1); String month = dateString.substring(index + 1, nextIndex); index = nextIndex; nextIndex = dateString.indexOf(':', index); int year = Integer.parseInt(dateString.substring(index + 1, nextIndex)); index = nextIndex; nextIndex = dateString.indexOf(':', index + 1); int hour = Integer.parseInt(dateString.substring(index + 1, nextIndex)); // Build a calendar object for this date Calendar calendar = Calendar.getInstance(); calendar.set(Calendar.DATE, day); calendar.set(Calendar.YEAR, year); calendar.set(Calendar.HOUR, hour); calendar.set(Calendar.MINUTE, 0); calendar.set(Calendar.SECOND, 0); calendar.set(Calendar.MILLISECOND, 0); if (month.equalsIgnoreCase("dec")) { calendar.set(Calendar.MONTH, Calendar.DECEMBER); } else if (month.equalsIgnoreCase("nov")) { calendar.set(Calendar.MONTH, Calendar.NOVEMBER); } else if (month.equalsIgnoreCase("oct")) { calendar.set(Calendar.MONTH, Calendar.OCTOBER); } else if (month.equalsIgnoreCase("sep")) { calendar.set(Calendar.MONTH, Calendar.SEPTEMBER); } else if (month.equalsIgnoreCase("aug")) { calendar.set(Calendar.MONTH, Calendar.AUGUST); } else if (month.equalsIgnoreCase("jul")) { calendar.set(Calendar.MONTH, Calendar.JULY); } else if (month.equalsIgnoreCase("jun")) { calendar.set(Calendar.MONTH, Calendar.JUNE); } else if (month.equalsIgnoreCase("may")) { calendar.set(Calendar.MONTH, Calendar.MAY); } else if (month.equalsIgnoreCase("apr")) { calendar.set(Calendar.MONTH, Calendar.APRIL); } else if (month.equalsIgnoreCase("mar")) { calendar.set(Calendar.MONTH, Calendar.MARCH); } else if (month.equalsIgnoreCase("feb")) { calendar.set(Calendar.MONTH, Calendar.FEBRUARY); } else if (month.equalsIgnoreCase("jan")) { calendar.set(Calendar.MONTH, Calendar.JANUARY); } // Output the date as the key and 1 as the value date.setDate(calendar.getTime()); output.collect(date, one); } } } public static class LogReduce extends MapReduceBase implements Reducer<DateWritable, IntWritable, DateWritable, IntWritable> { public void reduce(DateWritable key, Iterator<IntWritable> values, OutputCollector<DateWritable, IntWritable> output, Reporter reporter) throws IOException { // Iterate over all of the values (counts of occurrences of this word) int count = 0; while (values.hasNext()) { // Add the value to our count count += values.next().get(); } // Output the word with its count (wrapped in an IntWritable) output.collect(key, new IntWritable(count)); } } public int run(String[] args) throws Exception { // Create a configuration Configuration conf = getConf(); // Create a job from the default configuration that will use the WordCount class JobConf job = new JobConf(conf, LogCountsPerHour.class); // Define our input path as the first command line argument and our output path as the second Path in = new Path(args[0]); Path out = new Path(args[1]); // Create File Input/Output formats for these paths (in the job) FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // Configure the job: name, mapper, reducer, and combiner job.setJobName("LogAveragePerHour"); job.setMapperClass(LogMapClass.class); job.setReducerClass(LogReduce.class); job.setCombinerClass(LogReduce.class); // Configure the output job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(DateWritable.class); job.setOutputValueClass(IntWritable.class); // Run the job JobClient.runJob(job); return 0; } public static void main(String[] args) throws Exception { // Start the LogCountsPerHour MapReduce application int res = ToolRunner.run(new Configuration(), new LogCountsPerHour(), args); System.exit(res); } }