edu.iu.daal_linreg.LinRegDaalLauncher.java Source code

Java tutorial

Introduction

Here is the source code for edu.iu.daal_linreg.LinRegDaalLauncher.java

Source

/*
 * Copyright 2013-2016 Indiana University
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package edu.iu.daal_linreg;

import java.io.IOException;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.concurrent.ExecutionException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import org.apache.hadoop.filecache.DistributedCache;
import java.net.URI;

import edu.iu.fileformat.MultiFileInputFormat;

public class LinRegDaalLauncher extends Configured implements Tool {

    public static void main(String[] argv) throws Exception {
        int res = ToolRunner.run(new Configuration(), new LinRegDaalLauncher(), argv);
        System.exit(res);
    }

    /**
     * Launches all the tasks in order.
     */
    @Override
    public int run(String[] args) throws Exception {

        /* Put shared libraries into the distributed cache */
        Configuration conf = this.getConf();

        DistributedCache.createSymlink(conf);
        DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf);

        DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf);
        DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so#libtbb.so"), conf);
        DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbbmalloc.so.2#libtbbmalloc.so.2"), conf);
        DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbbmalloc.so#libtbbmalloc.so"), conf);

        if (args.length < 8) {
            System.err.println("Usage: edu.iu.daal_linreg" + "<input train dir> " + "<input test dir>"
                    + "<input ground truth dir>" + "<workDirPath> " + "<mem per mapper>" + "<batch size>"
                    + "<num mappers> <thread per worker>");
            ToolRunner.printGenericCommandUsage(System.err);
            return -1;
        }
        String inputDirPath = args[0];
        String testDirPath = args[1];
        String testGroundTruthDirPath = args[2];
        String workDirPath = args[3];
        int mem = Integer.parseInt(args[4]);
        int batchSize = Integer.parseInt(args[5]);
        int numMapTasks = Integer.parseInt(args[6]);
        int numThreadsPerWorker = Integer.parseInt(args[7]);

        System.out.println("Number of Map Tasks = " + numMapTasks);
        System.out.println("Number of Map Tasks = " + numMapTasks);

        if (mem < 1000) {
            return -1;
        }

        launch(inputDirPath, testDirPath, testGroundTruthDirPath, workDirPath, mem, batchSize, numMapTasks,
                numThreadsPerWorker);
        return 0;
    }

    private void launch(String inputDirPath, String testDirPath, String testGroundTruthDirPath, String workDirPath,
            int mem, int batchSize, int numMapTasks, int numThreadsPerWorker) throws IOException,
            URISyntaxException, InterruptedException, ExecutionException, ClassNotFoundException {

        Configuration configuration = getConf();
        FileSystem fs = FileSystem.get(configuration);
        Path inputDir = new Path(inputDirPath);
        Path workDir = new Path(workDirPath);
        if (fs.exists(workDir)) {
            fs.delete(workDir, true);
            fs.mkdirs(workDir);
        }
        Path modelDir = new Path(workDirPath, "model");
        fs.mkdirs(modelDir);
        // Do not make output dir
        Path outputDir = new Path(workDirPath, "output");
        long startTime = System.currentTimeMillis();

        runLinReg(inputDir, testDirPath, testGroundTruthDirPath, mem, batchSize, numMapTasks, numThreadsPerWorker,
                modelDir, outputDir, configuration);

        long endTime = System.currentTimeMillis();
        System.out.println("Total Linear Regression Execution Time: " + (endTime - startTime));
    }

    private void runLinReg(Path inputDir, String testDirPath, String testGroundTruthDirPath, int mem, int batchSize,
            int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration)
            throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {

        System.out.println("Starting Job");

        long perJobSubmitTime = System.currentTimeMillis();
        System.out.println(
                "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));

        Job linRegJob = configureLinRegJob(inputDir, testDirPath, testGroundTruthDirPath, mem, batchSize,
                numMapTasks, numThreadsPerWorker, modelDir, outputDir, configuration);

        boolean jobSuccess = linRegJob.waitForCompletion(true);

        System.out.println(
                "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));

        System.out.println(
                "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
        // ----------------------------------------
        if (!jobSuccess) {
            linRegJob.killJob();
            System.out.println("LinReg Job failed");
        }
    }

    private Job configureLinRegJob(Path inputDir, String testDirPath, String testGroundTruthDirPath, int mem,
            int batchSize, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir,
            Configuration configuration) throws IOException, URISyntaxException {

        configuration.set(Constants.TEST_FILE_PATH, testDirPath);
        configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath);
        configuration.setInt(Constants.NUM_MAPPERS, numMapTasks);
        configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
        configuration.setInt(Constants.BATCH_SIZE, batchSize);

        Job job = Job.getInstance(configuration, "linreg_job");
        JobConf jobConf = (JobConf) job.getConfiguration();

        jobConf.set("mapreduce.framework.name", "map-collective");

        jobConf.setInt("mapreduce.job.max.split.locations", 10000);

        // mapreduce.map.collective.memory.mb
        // 125000
        jobConf.setInt("mapreduce.map.collective.memory.mb", mem);

        int xmx = (int) Math.ceil((mem - 2000) * 0.5);
        int xmn = (int) Math.ceil(0.25 * xmx);
        jobConf.set("mapreduce.map.collective.java.opts",
                "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

        jobConf.setNumMapTasks(numMapTasks);

        FileInputFormat.setInputPaths(job, inputDir);
        FileOutputFormat.setOutputPath(job, outputDir);

        job.setInputFormatClass(MultiFileInputFormat.class);
        job.setJarByClass(LinRegDaalLauncher.class);
        job.setMapperClass(LinRegDaalCollectiveMapper.class);
        job.setNumReduceTasks(0);

        System.out.println("Launcher launched");
        return job;
    }
}