MapReduce.SentimentsPerTimezone.java Source code

Java tutorial

Introduction

Here is the source code for MapReduce.SentimentsPerTimezone.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package MapReduce;

import CSVUtils.CSVUtils;
import Helper.KeyValueTuple;
import Utils.TweetUtils;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 *
 * @author rishikaidnani
 */
public class SentimentsPerTimezone {

    private static class MapClass extends TableMapper<Text, LongWritable> {

        private final static LongWritable one = new LongWritable(1);
        private KeyValueTuple kvt = new KeyValueTuple();

        private static Configuration config = HBaseConfiguration.create();
        private static HTable table;

        static {
            try {
                table = new HTable(config, "tweetdata");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        public void map(ImmutableBytesWritable rowKey, Result result, Mapper.Context context)
                throws UnsupportedEncodingException, IOException, InterruptedException {
            boolean isQualifierTweetId = true;
            String tweetid;
            String temp;
            //get column qualifier for tweet column family, give that qualifier to get command and get the sentiment
            for (KeyValue kv : result.raw()) {
                isQualifierTweetId = true;
                try {
                    tweetid = Bytes.toStringBinary(kv.getQualifier());
                    temp = tweetid;
                    Long.parseLong(temp);
                } catch (NumberFormatException e) {
                    isQualifierTweetId = false;
                }
                if (isQualifierTweetId) {
                    tweetid = Bytes.toStringBinary(kv.getQualifier());
                } else {
                    continue;
                }
                Get g = new Get(Bytes.toBytes(tweetid));
                Result resultOfTweetDataTable = table.get(g);
                String sentiment = Bytes.toStringBinary(
                        resultOfTweetDataTable.getValue(Bytes.toBytes("tweet"), Bytes.toBytes("sentiment")));
                if (!(sentiment.equals("null"))) {
                    if (!(sentiment.trim().equals(""))) {
                        String timeZone = Bytes
                                .toStringBinary(result.getValue(Bytes.toBytes("geo"), Bytes.toBytes("timezone")));
                        String tweetsAndTimezone = timeZone + "-" + sentiment;
                        if (!(timeZone.equals("null"))) {
                            context.write(new Text(tweetsAndTimezone), one);
                        }
                    }
                }
            }
        }
    }

    private static class ReducerClass extends Reducer<Text, LongWritable, Text, LongWritable> {

        private SortedSet<KeyValueTuple> tweetsTimezone = new TreeSet<>();

        public void reduce(Text key, Iterable<LongWritable> values, Context context)
                throws IOException, InterruptedException {
            long total = 0;
            for (LongWritable value : values) {
                total += value.get();
            }
            context.write(key, new LongWritable(total));
            KeyValueTuple kvt = new KeyValueTuple();
            kvt.setKey(key.toString());
            kvt.setValue(total);
            tweetsTimezone.add(kvt);
        }

        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            int i = 0;
            //String filename = TweetUtils.OUTPUT_PREFIX + "SentimentsPerTimezone/sentimentTimezone.csv";
            String filename1 = TweetUtils.OUTPUT_PREFIX + "SentimentsPerTimezone/sentimentTimezoneDashboard.js";
            ArrayList<KeyValueTuple> timezones = new ArrayList<>();
            for (KeyValueTuple kvt : tweetsTimezone) {
                if (i == 18)
                    break;
                timezones.add(kvt);
                i++;
            }
            //CSVUtils.createCSVForSentimentTimezone(timezones, "Timezone, Number Of Positive, Number of Neutral, Number Of Negative", filename);
            CSVUtils.createCSVForSentimentTimezoneDashboard(timezones, filename1);
        }
    }

    public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new HBaseConfiguration();

        conf.addResource(TweetUtils.HBASE_CONF);
        Job job = Job.getInstance(conf, "Device count per country");
        job.setJarByClass(SentimentsPerTimezone.class);

        Scan sc = new Scan();
        sc.setCaching(500);
        sc.setCacheBlocks(false);

        TableMapReduceUtil.initTableMapperJob("twitteruser", // input table
                sc, // Scan instance to control CF and attribute selection
                SentimentsPerTimezone.MapClass.class, // mapper class
                Text.class, // mapper output key
                LongWritable.class, // mapper output value
                job);

        job.setMapperClass(SentimentsPerTimezone.MapClass.class);
        job.setReducerClass(SentimentsPerTimezone.ReducerClass.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        String dest = TweetUtils.OUTPUT_PREFIX + "SentimentsPerTimezone";
        if (args.length > 0) {
            dest = args[0];
        }
        File destination = new File(dest);
        FileUtil.fullyDelete(destination);
        FileOutputFormat.setOutputPath(job, new Path(dest));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}