Use hadoop to do map and reduce on data - Java Big Data

Java examples for Big Data:Hadoop

Description

Use hadoop to do map and reduce on data

Demo Code

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.List;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.*;
import java.util.Iterator;

public class Paretto extends Configured implements Tool {

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run(new Configuration(), new Paretto(), args);
        System.exit(res);/*from  w  w  w  . j  a  v  a2 s  .  c o m*/
    }

    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = this.getConf();
        FileSystem fs = FileSystem.get(conf);
        Path tmpPath = new Path("/aviation/tmp");
        fs.delete(tmpPath, true);

        Job jobA = Job.getInstance(conf, "Airport Count");
        jobA.setOutputKeyClass(Text.class);
        jobA.setOutputValueClass(IntWritable.class);

        jobA.setMapperClass(AirportCountMap.class);
        jobA.setReducerClass(AirportCountReduce.class);

        FileInputFormat.setInputPaths(jobA, new Path(args[0]));
        FileOutputFormat.setOutputPath(jobA, tmpPath);

        jobA.setJarByClass(Paretto.class);
        jobA.waitForCompletion(true);

        Job jobB = Job.getInstance(conf, "Top Airports");
        jobB.setOutputKeyClass(Text.class);
        jobB.setOutputValueClass(Text.class);

        jobB.setMapOutputKeyClass(NullWritable.class);
        jobB.setMapOutputValueClass(TextArrayWritable.class);

        jobB.setMapperClass(ProbabilityMap.class);
        jobB.setReducerClass(ProbabilityReduce.class);
        jobB.setNumReduceTasks(1);

        FileInputFormat.setInputPaths(jobB, tmpPath);
        FileOutputFormat.setOutputPath(jobB, new Path(args[1]));

        jobB.setInputFormatClass(KeyValueTextInputFormat.class);
        jobB.setOutputFormatClass(TextOutputFormat.class);

        jobB.setJarByClass(Paretto.class);
        return jobB.waitForCompletion(true) ? 0 : 1;
    }

    public static class TextArrayWritable extends ArrayWritable {
        public TextArrayWritable() {
            super(Text.class);
        }

        public TextArrayWritable(String[] strings) {
            super(Text.class);
            Text[] texts = new Text[strings.length];
            for (int i = 0; i < strings.length; i++) {
                texts[i] = new Text(strings[i]);
            }
            set(texts);
        }
    }

    public static class AirportCountMap extends
            Mapper<Object, Text, Text, IntWritable> {
        List<String> stopWords;
        String delimiters;

        private final static IntWritable one = new IntWritable(1);
        private Text airport = new Text();

        @Override
        public void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {

            try {
                String[] tokens = value.toString().split(",");
                airport = new Text(tokens[4].trim());
                context.write(airport, one);
                airport = new Text(tokens[6].trim());
                context.write(airport, one);
            } catch (Exception e) {
                airport = new Text("error");
                context.write(airport, one);
            }

        }

    }

    public static class AirportCountReduce extends
            Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        public void reduce(Text key, Iterable<IntWritable> values,
                Context context) throws IOException, InterruptedException {

            int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            context.write(key, new IntWritable(sum));

        }
    }

    public static class ProbabilityMap extends
            Mapper<Text, Text, NullWritable, TextArrayWritable> {
        Integer N;

        private TreeMap<Integer, TextArrayWritable> topDict = new TreeMap<Integer, TextArrayWritable>();

        @Override
        protected void setup(Context context) throws IOException,
                InterruptedException {
            Configuration conf = context.getConfiguration();
            this.N = conf.getInt("N", 100);
        }

        @Override
        public void map(Text key, Text value, Context context)
                throws IOException, InterruptedException {

            Integer count = Integer.parseInt(value.toString());
            String word = key.toString();
            String[] v = new String[] { key.toString(), value.toString() };
            topDict.put(count, new TextArrayWritable(v));

        }

        @Override
        protected void cleanup(Context context) throws IOException,
                InterruptedException {
            for (TextArrayWritable word : topDict.values()) {
                context.write(NullWritable.get(), word);
            }
        }
    }

    public static class ProbabilityReduce extends
            Reducer<NullWritable, TextArrayWritable, Text, Text> {
        Integer N;

        private TreeMap<Integer, TextArrayWritable> topDict = new TreeMap<Integer, TextArrayWritable>();

        @Override
        protected void setup(Context context) throws IOException,
                InterruptedException {
            Configuration conf = context.getConfiguration();
        }

        @Override
        public void reduce(NullWritable key,
                Iterable<TextArrayWritable> values, Context context)
                throws IOException, InterruptedException {
            Integer airport_count = 0;
            Float sum_top20 = 0f;
            Float sum_bott80 = 0f;

            // TODO
            for (TextArrayWritable value : values) {
                Text[] tuple = (Text[]) value.toArray();
                Integer count = Integer.parseInt(tuple[1].toString());
                String[] v = new String[] { tuple[0].toString(),
                        tuple[1].toString() };
                topDict.put(count, new TextArrayWritable(v));
            }

            NavigableSet<Integer> nset = topDict.navigableKeySet();
            Iterator iter = nset.descendingIterator();
            int a_count = 0;
            airport_count = topDict.size();
            while (iter.hasNext()) {

                TextArrayWritable el = topDict.get(iter.next());
                Text[] tuple = (Text[]) el.toArray();
                Integer fcount = Integer.parseInt(tuple[1].toString());

                a_count++;
                if (a_count > (int) (airport_count * 0.2)) {
                    sum_top20 = sum_top20 + fcount;
                } else {
                    sum_bott80 = sum_bott80 + fcount;
                }
            }

            context.write(new Text("Total airports: "),
                    new Text(String.valueOf(airport_count)));
            context.write(
                    new Text("Bottom 80% airports share: "),
                    new Text(String.valueOf(sum_bott80
                            / (sum_top20 + sum_bott80))));

        }
    }

}

class Pair<A extends Comparable<? super A>, B extends Comparable<? super B>>
        implements Comparable<Pair<A, B>> {

    public final A first;
    public final B second;

    public Pair(A first, B second) {
        this.first = first;
        this.second = second;
    }

    public static <A extends Comparable<? super A>, B extends Comparable<? super B>> Pair<A, B> of(
            A first, B second) {
        return new Pair<A, B>(first, second);
    }

    @Override
    public int compareTo(Pair<A, B> o) {
        int cmp = o == null ? 1 : (this.first).compareTo(o.first);
        return cmp == 0 ? (this.second).compareTo(o.second) : cmp;
    }

    @Override
    public int hashCode() {
        return 31 * hashcode(first) + hashcode(second);
    }

    private static int hashcode(Object o) {
        return o == null ? 0 : o.hashCode();
    }

    @Override
    public boolean equals(Object obj) {
        if (!(obj instanceof Pair))
            return false;
        if (this == obj)
            return true;
        return equal(first, ((Pair<?, ?>) obj).first)
                && equal(second, ((Pair<?, ?>) obj).second);
    }

    private boolean equal(Object o1, Object o2) {
        return o1 == o2 || (o1 != null && o1.equals(o2));
    }

    @Override
    public String toString() {
        return "(" + first + ", " + second + ')';
    }
}

Related Tutorials