Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.mycompany.keywordsearch; /** * * @author ril */ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class KeywordSearch { public static class TokenizerMapper extends Mapper<Text, Text, Text, IntWritable> { private final static IntWritable num = new IntWritable(); private String keyword; @Override protected void setup(Context context) throws IOException, InterruptedException { keyword = context.getConfiguration().get(KEYWORD); } @Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { if (keyword == null) { return; } int index = value.find(keyword), counter = 0; while (index < value.getLength() && index != -1) { ++counter; index = value.find(keyword, index + keyword.length()); } num.set(counter); context.write(key, num); } } public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private final IntWritable result = new IntWritable(); @Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } if (sum < 3) return; result.set(sum); context.write(key, result); } } private static void clearOutput(Configuration conf, Path path) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { fs.delete(path, true); } } private static final String KEYWORD = "course.keyword"; public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set(FileInputFormat.INPUT_DIR_RECURSIVE, String.valueOf(true)); Path input = new Path(args[0]); Path output = new Path(args[1]); BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); System.out.print("Keyword:\t"); conf.set(KEYWORD, in.readLine()); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(KeywordSearch.class); job.setInputFormatClass(TextInputFormatV2.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); clearOutput(conf, output); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); System.exit(job.waitForCompletion(true) ? 0 : 1); } }