Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package com.jbw.recommendsystem.filter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.MultipleInputs; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; // yarn jar target/RecommendSystem-0.jar com.jbw.recommendsystem.filter.FilterMRD -Drin=/user/jiabw/out/rs/relation_1/part-r-00000 -Dain=/user/jiabw/out/rs/add_1/part-r-00000 -Dout=/user/jiabw/out/rs/out_1 /** * * @author alvin */ public class FilterMRD extends Configured implements Tool { static class RelationMapper extends Mapper<LongWritable, Text, Text, Text> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String ss[] = value.toString().split("\t"); if (ss != null && ss.length == 2) { context.write(new Text(ss[0]), new Text(ss[1])); } } } static class AddMapper extends Mapper<LongWritable, Text, Text, Text> { @Override protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { String ss[] = value.toString().split("\t"); if (ss != null && ss.length == 2) { context.write(new Text(ss[0]), new Text(ss[1])); } } } static class FilterReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Map<String, String> ll1 = new HashMap<>(); List<String> ll2 = new ArrayList<>(); for (Text t : values) { String s = t.toString().trim(); if (s.startsWith("[")) { s = s.substring(1, s.length() - 1); String ss[] = s.split(","); for (String st : ss) { String sa[] = st.split(":"); ll1.put(sa[0].trim(), sa[1]); } } else { String ss[] = s.split(","); ll2.addAll(Arrays.asList(ss)); } } for (String ssl : ll2) { if (ll1.containsKey(ssl.trim())) { ll1.remove(ssl.trim()); } } for (String sr : ll1.keySet()) { context.write(key, new Text(sr + ":" + ll1.get(sr))); } // context.write(key, new Text(ll1.toString())); } } @Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Path rPath = new Path(conf.get("rin")); Path aPath = new Path(conf.get("ain")); Path out = new Path(conf.get("out")); Job job = Job.getInstance(conf); job.setJobName("666"); job.setJarByClass(FilterMRD.class); MultipleInputs.addInputPath(job, rPath, TextInputFormat.class, RelationMapper.class); MultipleInputs.addInputPath(job, aPath, TextInputFormat.class, AddMapper.class); job.setMapOutputKeyClass(Text.class); job.setReducerClass(FilterReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, out); job.setOutputKeyClass(Text.class); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { System.exit(ToolRunner.run(new FilterMRD(), args)); } }