ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain.java Source code

Java tutorial

Introduction

Here is the source code for ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain.java

Source

package ca.uwaterloo.iss4e.hadoop.meterperfile;

import ca.uwaterloo.iss4e.algorithm.Threelines;
import ca.uwaterloo.iss4e.hadoop.io.UnsplitableTextInputFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.util.*;

/**
 * Copyright (c) 2014 Xiufeng Liu ( xiufeng.liu@uwaterloo.ca )
 * <p/>
 * This file is free software: you may copy, redistribute and/or modify it
 * under the terms of the GNU General Public License version 2
 * as published by the Free Software Foundation.
 * <p/>
 * This file is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * <p/>
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see http://www.gnu.org/licenses.
 */

public class ThreelMain extends Configured implements Tool {
    public static final String DESCRIPTION = "Threeline regression program";

    private static final Log LOG = LogFactory.getLog(ThreelMain.class);

    public static class MyMapper extends Mapper<Object, Text, LongWritable, Text> {

        List<Double> readings = new ArrayList<Double>();
        List<Double> temperatures = new ArrayList<Double>();
        private int preMeterID = -1;
        private Map<Integer, double[][]> resultMap = new HashMap<Integer, double[][]>();
        StringBuffer ret = new StringBuffer();

        @Override
        protected void map(Object offset, Text line, final Context context)
                throws IOException, InterruptedException {
            String[] fieldValues = line.toString().split(",");
            int curMeterID = Integer.parseInt(fieldValues[0]);
            if (preMeterID != -1 && preMeterID != curMeterID) {
                computerAndWriteOutput(context);
            }
            preMeterID = curMeterID;
            readings.add(Double.parseDouble(fieldValues[2]));
            temperatures.add(Double.parseDouble(fieldValues[3]));
        }

        @Override
        protected void cleanup(Context context) throws java.io.IOException, java.lang.InterruptedException {
            computerAndWriteOutput(context);
        }

        protected void computerAndWriteOutput(Context context) throws IOException, InterruptedException {
            if (temperatures.size() > 0 && readings.size() > 0) {
                double[][] points = Threelines.threel(temperatures, readings);
                if (points != null) {
                    ret.setLength(0);
                    ret.append("[");
                    for (int i = 0; i < points.length; ++i) {
                        double[] point = points[i];
                        ret.append("[");
                        ret.append(point[0]).append(",").append(point[1]);
                        ret.append("]");
                        if (i < points.length - 1)
                            ret.append(",");
                    }
                    ret.append("]");
                    context.write(new LongWritable(preMeterID), new Text(ret.toString()));
                }
                temperatures.clear();
                readings.clear();
            }
        }
    }

    public int run(String[] args) throws IOException {
        Configuration conf = getConf();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length != 2) {
            System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain <input> <output>");
            System.exit(2);
        }

        conf.set("mapreduce.input.fileinputformat.split.maxsize", "100");
        Job job = new Job(conf, "ThreelMain");
        job.setJarByClass(ThreelMain.class);

        job.setInputFormatClass(UnsplitableTextInputFormat.class);
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setNumReduceTasks(0);
        // job.setOutputKeyClass(LongWritable.class);
        //job.setOutputValueClass(Text.class);
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        System.out.println("\nStarting Job ...");
        final long startTime = System.currentTimeMillis();
        try {
            if (!job.waitForCompletion(true)) {
                System.out.println("Job failed.");
                System.exit(1);
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
            System.out.println("Duration is " + duration + " seconds.");
        }
        return 0;
    }

    public static void main(String[] argv) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new ThreelMain(), argv));
    }
}