Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package nl.utwente.bigdata; import java.net.URI; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.avro.generic.GenericData; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.StringUtils; /** * * @author ime */ public class GoalScorerDefiner { private static String normalizeCharacters(String text) { text = text.replace("", "ue"); text = text.replace("", "oe"); text = text.replace("?", "ae"); text = text.replace("", "e"); text = text.replace("", "a"); text = text.replace("", "e"); text = text.replace("", "c"); text = text.replace("", "a"); text = text.replace("", "e"); text = text.replace("", "u"); return text; } private static boolean containsPlayerName(String name, String text) { String normalizedString = normalizeCharacters(text); return (name != null && normalizedString.matches("^(.*?(\\b(" + name + ")\\b)[^$]*)$")); } public static class ScoreMapper extends Mapper<Object, Text, Text, Text> { private List<String[]> playerNames = new ArrayList<>(); public ScoreMapper() { this.playerNames = importCSVFile("player.csv"); }; public ScoreMapper(String path) { this.playerNames = importCSVFile(path); } private final Text goalId = new Text(); private final Text player = new Text(); @Override public void map(Object key, Text value, Mapper.Context context) throws IOException, InterruptedException { String[] split = value.toString().split("\t"); if (split.length <= 1) return; goalId.set(split[0]); boolean playerFound = false; String tweet = String.valueOf(split[1]).toLowerCase(); for (String[] playerName : playerNames) { //Surename playerFound = containsPlayerName(getSurname(playerName), tweet); if (playerFound) { player.set(StringUtils.join(" ", playerName)); break; } } if (!playerFound) { for (String[] playerName : playerNames) { playerFound = containsPlayerName(getFirstName(playerName), tweet); if (playerFound) { player.set(StringUtils.join(" ", playerName)); break; } } } if (playerFound) { context.write(goalId, player); } } } public static class ScoreReducer extends Reducer<Text, Text, Text, Text> { private Text playerResult = new Text(); public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { HashMap<Text, Integer> playerCountMap = new HashMap<>(); for (Text val : values) { if (playerCountMap.get(val) == null) { playerCountMap.put(val, 1); } else { playerCountMap.put(val, playerCountMap.get(val) + 1); } } int maxValueInMap = (Collections.max(playerCountMap.values())); for (Entry<Text, Integer> entry : playerCountMap.entrySet()) { if (entry.getValue() == maxValueInMap) { playerResult.set(entry.getKey()); } } if (maxValueInMap < 3) { playerResult.set("NO REAL GOAL"); } context.write(key, playerResult); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: userCount <in> [<in>...] <out>"); System.exit(2); } Job job = new Job(conf, "GoalScorerDefiner"); job.setJarByClass(GoalScorerDefiner.class); job.setMapperClass(GoalScorerDefiner.ScoreMapper.class); job.setCombinerClass(GoalScorerDefiner.ScoreReducer.class); job.setReducerClass(GoalScorerDefiner.ScoreReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } public static List<String[]> importCSVFile(String csvFile) { List<String[]> playerNames = new ArrayList<>(); try { Path file_path = new Path(csvFile); FileSystem filesystem = FileSystem.get(URI.create("files/player.csv"), new Configuration()); BufferedReader br = null; String line; br = new BufferedReader(new InputStreamReader(filesystem.open(file_path))); while ((line = br.readLine()) != null) { String[] csvLine = line.split(";"); String fullPlayerName = csvLine[0]; playerNames.add(fullPlayerName.split(" ")); } br.close(); } catch (FileNotFoundException ex) { Logger.getLogger(GoalScorerDefiner.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(GoalScorerDefiner.class.getName()).log(Level.SEVERE, null, ex); } return playerNames; } public static String getFirstName(String[] playerNames) { if (playerNames.length <= 1) return null; // Has no firstname return playerNames[0].toLowerCase(); } public static String getSurname(String[] playerNames) { if (playerNames.length < 1) return null; // Name not correct return playerNames[playerNames.length - 1].toLowerCase(); } }