top10_categories.Top10_Categories.java Source code

Java tutorial

Introduction

Here is the source code for top10_categories.Top10_Categories.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package top10_categories;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 *
 * @author Rajat
 */
public class Top10_Categories {

    public static class Map1 extends Mapper<Object, Text, Text, FloatWritable> {

        private FloatWritable video_rating = new FloatWritable();
        private Text video_id = new Text();

        public void map(Object key, Text value, Mapper.Context context) throws IOException, InterruptedException {

            String[] fields = value.toString().split(",");
            video_id = new Text(fields[0]);
            if (!fields[6].isEmpty()) {
                video_rating = new FloatWritable(Float.parseFloat(fields[6]));
            }

            context.write(video_id, video_rating);
        }

    }

    public static class Reduce1 extends Reducer<Text, FloatWritable, Text, FloatWritable> {

        private FloatWritable result = new FloatWritable();

        @Override
        protected void reduce(Text key, Iterable<FloatWritable> values, Context context)
                throws IOException, InterruptedException {

            int count = 0;
            float sum = 0, avg = 0;

            for (FloatWritable val : values) {
                sum += val.get();
                ++count;
            }

            avg = sum / count;
            result.set(avg);
            context.write(key, result);
        }

    }

    public static class Map2 extends Mapper<Object, Text, FloatWritable, Text> {

        @Override
        protected void map(Object key, Text value, Mapper.Context context)
                throws IOException, InterruptedException {

            String row[] = value.toString().split("\\t");
            Text video_id = new Text(row[0]);
            String rating = row[1];

            try {
                FloatWritable ratingg = new FloatWritable(Float.parseFloat(rating));
                context.write(ratingg, video_id);

            } catch (Exception e) {
                e.printStackTrace();
            }

        }

    }

    public static class Reduce2 extends Reducer<FloatWritable, Text, Text, FloatWritable> {

        private static int count = 25;

        @Override
        protected void reduce(FloatWritable key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {

            for (Text val : values) {
                if (count > 0) {
                    context.write(val, key);
                    --count;
                } else {
                    break;
                }
            }

        }

    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

        Configuration conf1 = new Configuration();
        Configuration conf = new Configuration();
        Path inputDir = new Path(args[0]);
        Path hdfsFile = new Path(args[1]);
        FileSystem hdfs = FileSystem.get(conf);
        FileSystem local = FileSystem.getLocal(conf);

        try {
            FileStatus[] inputFiles = local.listStatus(inputDir);
            FSDataOutputStream out = hdfs.create(hdfsFile);

            for (int i = 0; i < inputFiles.length; i++) {
                System.out.println(inputFiles[i].getPath().getName());
                FSDataInputStream in = local.open(inputFiles[i].getPath());
                byte[] buffer = new byte[256];
                int bytesRead = 0;
                while ((bytesRead = in.read(buffer)) > 0) {
                    out.write(buffer, 0, bytesRead);
                }
                in.close();
            }
            out.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        Job job1 = Job.getInstance(conf1, "Chaining");
        job1.setJarByClass(Top10_Categories.class);

        job1.setMapperClass(Map1.class);
        job1.setMapOutputKeyClass(Text.class);
        job1.setMapOutputValueClass(FloatWritable.class);

        job1.setReducerClass(Reduce1.class);
        job1.setOutputKeyClass(Text.class);
        job1.setOutputValueClass(DoubleWritable.class);
        job1.setCombinerClass(Reduce1.class);

        FileInputFormat.addInputPath(job1, hdfsFile);
        FileOutputFormat.setOutputPath(job1, new Path(args[2]));

        boolean complete = job1.waitForCompletion(true);

        Configuration conf2 = new Configuration();
        Job job2 = Job.getInstance(conf2, "Chaining");

        if (complete) {

            job2.setJarByClass(Top10_Categories.class);
            job2.setMapperClass(Map2.class);
            job2.setMapOutputKeyClass(FloatWritable.class);
            job2.setMapOutputValueClass(Text.class);

            job2.setReducerClass(Reduce2.class);
            job2.setOutputKeyClass(Text.class);
            job2.setOutputValueClass(FloatWritable.class);

            job2.setSortComparatorClass(SortKeyComparator.class);

            job2.setNumReduceTasks(1);

            FileInputFormat.addInputPath(job2, new Path(args[2]));
            FileOutputFormat.setOutputPath(job2, new Path(args[3]));

            System.exit(job2.waitForCompletion(true) ? 0 : 1);
        }
    }

}