edu.indiana.soic.ts.mapreduce.VectorCalculator.java Source code

Java tutorial

Introduction

Here is the source code for edu.indiana.soic.ts.mapreduce.VectorCalculator.java

Source

/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
*/

package edu.indiana.soic.ts.mapreduce;

import edu.indiana.soic.ts.utils.TSConfiguration;
import edu.indiana.soic.ts.utils.TableUtils;
import edu.indiana.soic.ts.utils.Constants;
import edu.indiana.soic.ts.utils.Utils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.text.ParseException;
import java.util.*;
import java.util.concurrent.TimeUnit;

public class VectorCalculator {
    private static final Logger LOG = LoggerFactory.getLogger(VectorCalculator.class);
    private String startDate;
    private String endDate;
    private int window;
    private int headShift;
    private int tailShift;
    private TSConfiguration tsConfiguration;

    public void configure(TSConfiguration tsConfiguration) {
        Map conf = tsConfiguration.getConf();
        this.tsConfiguration = tsConfiguration;
        startDate = (String) conf.get(TSConfiguration.START_DATE);
        endDate = (String) conf.get(TSConfiguration.END_DATE);
        this.window = (int) conf.get(TSConfiguration.TIME_WINDOW);
        this.headShift = (int) conf.get(TSConfiguration.TIME_SHIFT_HEAD);
        this.tailShift = (int) conf.get(TSConfiguration.TIME_SHIFT_TAIL);
        if (startDate == null || startDate.isEmpty()) {
            throw new RuntimeException("Start date should be specified");
        }
        if (endDate == null || endDate.isEmpty()) {
            throw new RuntimeException("End date should be specified");
        }
    }

    public void submitJob() {
        try {
            Configuration config = HBaseConfiguration.create();
            config.set("mapreduce.output.textoutputformat.separator", ",");
            TreeMap<String, List<Date>> genDates = TableUtils.genDates(TableUtils.getDate(startDate),
                    TableUtils.getDate(endDate), this.window, TimeUnit.DAYS, this.headShift, this.tailShift,
                    TimeUnit.DAYS);
            LOG.info("Start Date : {} End Date : {}, Gen dates size: {}", startDate, endDate, genDates.size());
            for (String id : genDates.keySet()) {
                LOG.info("Vector calculation for: {}", id);
                Scan scan = new Scan();
                scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
                scan.setCacheBlocks(false); // don't set to true for MR jobs
                List<Date> dates = genDates.get(id);
                String start = TableUtils.convertDateToString(dates.get(0));
                String end = TableUtils.convertDateToString(dates.get(1));
                List<String> suitableDateList = TableUtils.getDates(start, end);
                config.set(Constants.Job.NO_OF_DAYS, String.valueOf(suitableDateList.size()));
                LOG.info("Vector calculator for start: {}, end: {} time window: {}, shift: {}, days: {}", startDate,
                        endDate, window, headShift, suitableDateList.size());
                for (String date : suitableDateList) {
                    scan.addColumn(Constants.STOCK_TABLE_CF_BYTES, date.getBytes());
                }
                Job job = new Job(config, "Vector calculation: " + id);
                job.setJarByClass(VectorCalculator.class);
                TableMapReduceUtil.initTableMapperJob(Constants.STOCK_TABLE_NAME, // input HBase table name
                        scan, // Scan instance to control CF and attribute selection
                        VectorCalculatorMapper.class, // mapper
                        IntWritable.class, // mapper output key
                        Text.class, // mapper output value
                        job);
                // adjust directories as required
                String outPutDir = tsConfiguration.getInterMediateVectorDir() + "/" + id;
                FileOutputFormat.setOutputPath(job, new Path(outPutDir));
                boolean b = job.waitForCompletion(true);
                if (!b) {
                    LOG.error("Error with job for vector calculation");
                    throw new RuntimeException("Error with job for vector calculation");
                }
                Utils.concatOutput(config, id, outPutDir, tsConfiguration.getVectorDir());
            }
        } catch (ParseException e) {
            LOG.error("Error while parsing date", e);
            throw new RuntimeException("Error while parsing date", e);
        } catch (InterruptedException | ClassNotFoundException | IOException e) {
            LOG.error("Error while creating the job", e);
            throw new RuntimeException("Error while creating the job", e);
        }
    }

    public static void main(String[] args) {
        String configFile = Utils.getConfigurationFile(args);
        TSConfiguration tsConfiguration = new TSConfiguration(configFile);
        VectorCalculator vectorCalculator = new VectorCalculator();
        vectorCalculator.configure(tsConfiguration);
        vectorCalculator.submitJob();
    }
}