com.pivotal.gfxd.demo.mapreduce.LoadAverage.java Source code

Java tutorial

Introduction

Here is the source code for com.pivotal.gfxd.demo.mapreduce.LoadAverage.java

Source

/*==========================================================================
 * Copyright (c) 2014 Pivotal Software Inc. All Rights Reserved.
 *
 * This product is licensed to you under the Apache License, Version 2.0 (the "License").
 * You may not use this product except in compliance with the License.
 *
 * This product may include a number of subcomponents with separate copyright
 * notices and license terms. Your use of these subcomponents is subject to
 * the terms and conditions of the subcomponent's license, as noted in the
 * LICENSE file.
 *==========================================================================
 */

package com.pivotal.gfxd.demo.mapreduce;

import com.pivotal.gemfirexd.hadoop.mapreduce.Key;
import com.pivotal.gemfirexd.hadoop.mapreduce.Row;
import com.pivotal.gemfirexd.hadoop.mapreduce.RowInputFormat;
import com.pivotal.gemfirexd.hadoop.mapreduce.RowOutputFormat;
import com.pivotal.gemfirexd.internal.engine.GfxdDataSerializable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.URI;
import java.sql.ResultSet;
import java.sql.SQLException;

/**
 * @author William Markito
 */
public class LoadAverage extends Configured implements Tool {

    public static class LoadAverageMapper extends Mapper<Object, Row, Text, LoadKey> {

        public void map(Object key, Row row, Context context) throws IOException, InterruptedException {

            try {

                ResultSet rs = row.getRowAsResultSet();

                LoadKey out = new LoadKey();

                out.setHousehold_id(rs.getInt("household_id"));
                out.setHouse_id(rs.getInt("house_id"));
                out.setWeekday(rs.getInt("weekday"));
                out.setPlug_id(rs.getInt("plug_id"));
                out.setTime_slice(rs.getInt("time_slice"));
                out.setValue(rs.getDouble("value"));
                out.setEvent_count(1);

                StringBuilder sb = new StringBuilder();
                sb.append(out.getWeekday());
                sb.append("-");
                sb.append(out.getTime_slice());
                sb.append("-");
                sb.append(out.getPlug_id());

                Text outKey = new Text();
                outKey.set(sb.toString());
                context.write(outKey, out);

            } catch (SQLException sqex) {
                sqex.printStackTrace();
            }

        }
    }

    public static class LoadAverageReducer extends Reducer<Text, LoadKey, Key, LoadAverageModel> {

        public void reduce(Text key, Iterable<LoadKey> values, Context context)
                throws IOException, InterruptedException {

            double valueSum = 0;
            int numEvents = 0;
            LoadKey loadKey = null;
            for (LoadKey model : values) {
                valueSum = model.getValue() + valueSum;
                numEvents = model.getEvent_count() + numEvents;

                if (loadKey == null) {
                    loadKey = model;
                }
            }

            LoadAverageModel result = new LoadAverageModel(loadKey.getHouse_id(), loadKey.getHousehold_id(),
                    loadKey.getPlug_id(), loadKey.getWeekday(), loadKey.getTime_slice(), valueSum, numEvents);

            context.write(new Key(), result);
        }
    }

    /**
     * This method is assuming fs.default.name as args[0]
     *
     * @param args
     * @return
     * @throws Exception
     */
    @Override
    public int run(String[] args) throws Exception {
        System.out.println("Starting MapReduce Job");
        GfxdDataSerializable.initTypes();
        Configuration conf = new Configuration();
        //Configuration conf = getConf();

        Path outputPath = new Path("/output");
        String hdfsHomeDir = "/sensorStore"; //args[1];
        String tableName = "RAW_SENSOR";
        String outTableName = "LOAD_AVERAGES_SHADOW";
        String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527");

        // conf.set("fs.default.name", args[0]);
        String hdfsUrl = conf.get("fs.defaultFS");

        FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf);

        // Retrieve last run timestamp
        long now = System.currentTimeMillis();
        long lastStart = getLastStart(hdfs);

        outputPath.getFileSystem(conf).delete(outputPath, true);

        conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
        conf.set(RowInputFormat.INPUT_TABLE, tableName);
        conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
        conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart);
        conf.setLong(RowInputFormat.END_TIME_MILLIS, now);

        conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL);
        conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName);

        // print config to troubleshoot possible issues
        // Configuration.dumpConfiguration(conf, new PrintWriter(System.out));

        Job job = Job.getInstance(conf, "LoadAverage");

        job.setNumReduceTasks(1);

        job.setInputFormatClass(RowInputFormat.class);

        // configure mapper and reducer
        job.setJarByClass(LoadAverage.class);
        job.setMapperClass(LoadAverageMapper.class);
        job.setReducerClass(LoadAverageReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LoadKey.class);

        TextOutputFormat.setOutputPath(job, outputPath);
        job.setOutputFormatClass(RowOutputFormat.class);
        job.setOutputKeyClass(Key.class);
        job.setOutputValueClass(LoadAverageModel.class);

        boolean jobSuccess = job.waitForCompletion(true);
        if (jobSuccess) {
            writeLastStart(hdfs, now);
        }

        return jobSuccess ? 0 : 1;
    }

    private long getLastStart(FileSystem hdfs) throws IOException {
        long lastStart = 0;
        Path file = new Path("/sensorStore/last_mapreduce_timestamp");
        if (hdfs.exists(file)) {
            BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(file)));
            String line = br.readLine();
            if (line != null && !line.isEmpty()) {
                lastStart = Long.parseLong(line);
            }
        }
        return lastStart;
    }

    private void writeLastStart(FileSystem hdfs, long timestamp) throws IOException {
        Path file = new Path("/sensorStore/last_mapreduce_timestamp");
        OutputStream os = hdfs.create(file, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os));
        br.write(Long.toString(timestamp));
        br.close();
    }

    public static void main(String[] args) throws Exception {
        // only for testing
        int rc = ToolRunner.run(new LoadAverage(), args);

        System.out.println("Job completed. Return code:" + rc);
        System.exit(rc);
    }

}