at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBenchmark.java Source code

Java tutorial

Introduction

Here is the source code for at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBenchmark.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.illecker.hama.hybrid.examples.kmeans;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hama.bsp.BSPJob;

import com.google.caliper.Benchmark;
import com.google.caliper.Param;
import com.google.caliper.api.Macrobenchmark;
import com.google.caliper.runner.CaliperMain;

public class KMeansHybridBenchmark extends Benchmark {

    // Plot 1
    // @Param({ "250000", "500000", "750000", "1000000", "1250000", "1500000",
    // "1750000", "2000000" })
    // private long n = 2000000;

    // Plot 1 and Plot 2 support 2 threads only,
    // because more threads would consume more than 16G RAM
    private long n = 1000000;

    // Plot 2
    // @Param({ "50", "100", "150", "200", "250", "300", "350", "400", "450",
    // "500" })
    private int k = 500;

    // @Param
    // CalcType type;

    public enum CalcType {
        CPU, GPU
    };

    // Plot 3
    // maximal 4 CPU tasks and 1 GPU task
    @Param({ "1", "2", "3", "4", "5" })
    private int bspTaskNum; // = 5; // = 2;
    private final int maxTaskNum = 5;

    // GPU percentage of the input data
    // @Param({ "20", "30", "40", "50", "60", "70", "75", "80", "90" })
    private int GPUWorkload = 0;

    private int vectorDimension = 3;
    private int maxIteration = 10;

    private static final Path CONF_TMP_DIR = new Path(
            "output/hama/hybrid/examples/kmeans/bench-" + System.currentTimeMillis());
    private static final Path CONF_INPUT_DIR = new Path(CONF_TMP_DIR, "input");
    private static final Path CONF_OUTPUT_DIR = new Path(CONF_TMP_DIR, "output");
    private static final Path CONF_CENTER_DIR = new Path(CONF_TMP_DIR, "centers");

    private Configuration m_conf = null;
    private boolean m_runLocally = false;

    // gridSize = amount of blocks and multiprocessors
    public static final int GRID_SIZE = 14;
    // blockSize = amount of threads
    public static final int BLOCK_SIZE = 384; // 1024;

    @Override
    protected void setUp() throws Exception {
        m_conf = new Configuration();

        // Try to load Hadoop configuration
        String HADOOP_HOME = System.getenv("HADOOP_HOME");
        String HADOOP_INSTALL = System.getenv("HADOOP_INSTALL");
        if ((HADOOP_HOME != null) || (HADOOP_INSTALL != null) && (!m_runLocally)) {
            String HADOOP = ((HADOOP_HOME != null) ? HADOOP_HOME : HADOOP_INSTALL);

            m_conf.addResource(new Path(HADOOP, "src/core/core-default.xml"));
            m_conf.addResource(new Path(HADOOP, "src/hdfs/hdfs-default.xml"));
            m_conf.addResource(new Path(HADOOP, "src/mapred/mapred-default.xml"));
            m_conf.addResource(new Path(HADOOP, "conf/core-site.xml"));
            m_conf.addResource(new Path(HADOOP, "conf/hdfs-site.xml"));
            m_conf.addResource(new Path(HADOOP, "conf/mapred-site.xml"));
            // System.out.println("Loaded Hadoop configuration from " + HADOOP);

            try {
                // Connect to HDFS Filesystem
                FileSystem.get(m_conf);
            } catch (Exception e) {
                // HDFS not reachable run Benchmark locally
                m_conf = new Configuration();
                m_runLocally = true;
            }
            // System.out.println("Run Benchmark local: " + m_runLocally);
        }

        // Try to load Hama configuration
        String HAMA_HOME = System.getenv("HAMA_HOME");
        String HAMA_INSTALL = System.getenv("HAMA_INSTALL");
        if ((HAMA_HOME != null) || (HAMA_INSTALL != null) && (!m_runLocally)) {
            String HAMA = ((HAMA_HOME != null) ? HAMA_HOME : HAMA_INSTALL);

            m_conf.addResource(new Path(HAMA, "conf/hama-default.xml"));
            m_conf.addResource(new Path(HAMA, "conf/hama-site.xml"));
            // System.out.println("Loaded Hama configuration from " + HAMA);
        }

        // Setup KMeans config variables
        m_conf.setBoolean(KMeansHybridBSP.CONF_DEBUG, false);
        m_conf.setBoolean("hama.pipes.logging", false);
        m_conf.setBoolean(KMeansHybridBSP.CONF_TIME, false);

        // Set GPU blockSize and gridSize
        m_conf.set(KMeansHybridBSP.CONF_BLOCKSIZE, "" + BLOCK_SIZE);
        m_conf.set(KMeansHybridBSP.CONF_GRIDSIZE, "" + GRID_SIZE);
        // Set maxIterations for KMeans
        m_conf.setInt(KMeansHybridBSP.CONF_MAX_ITERATIONS, maxIteration);
        // Set n for KMeans
        m_conf.setLong(KMeansHybridBSP.CONF_N, n);
        // Set GPUPercentage
        m_conf.setInt(KMeansHybridBSP.CONF_GPU_PERCENTAGE, GPUWorkload);

        Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq");
        Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq");
        m_conf.set(KMeansHybridBSP.CONF_CENTER_IN_PATH, centerIn.toString());
        m_conf.set(KMeansHybridBSP.CONF_CENTER_OUT_PATH, centerOut.toString());

        // CPU vs GPU benchmark
        // Plot 1 and 2
        int numGpuBspTask = 0;
        // if (type == CalcType.GPU) {
        // bspTaskNum = 1;
        // numGpuBspTask = 1;
        // GPUWorkload = 100;
        // }

        // CPU + GPU Hybrid benchmark
        // Plot 3
        if (bspTaskNum == maxTaskNum) {
            numGpuBspTask = 1;
            GPUWorkload = 75;
        } else {
            numGpuBspTask = 0;
        }

        // Set CPU tasks
        m_conf.setInt("bsp.peers.num", bspTaskNum);
        // Set GPU tasks
        m_conf.setInt("bsp.peers.gpu.num", numGpuBspTask);

        // Generate input data
        KMeansHybridBSP.prepareInputData(m_conf, FileSystem.get(m_conf), CONF_INPUT_DIR, centerIn, bspTaskNum,
                numGpuBspTask, n, k, vectorDimension, null, GPUWorkload);

        // Debug output
        // System.out.println("CalcType: " + type);
        System.out.println("CONF_TMP_DIR: " + CONF_TMP_DIR.toString());
        System.out.println("NumBspTask: " + m_conf.getInt("bsp.peers.num", 0) + " NumGpuBspTask: "
                + m_conf.getInt("bsp.peers.gpu.num", 0));
        System.out.println("n: " + n + " k: " + k + " vectorDimension: " + vectorDimension + " maxIteration: "
                + maxIteration + " GPUWorkload: " + GPUWorkload + "%");
    }

    @Override
    protected void tearDown() throws Exception {
        FileSystem fs = FileSystem.get(m_conf);
        fs.delete(CONF_TMP_DIR, true);
    }

    @Macrobenchmark
    public void timeCalculate() {
        doBenchmark();
    }

    public void doBenchmark() {
        try {
            ToolRunner.run(new KMeans(), null);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private class KMeans extends Configured implements Tool {
        public KMeans() {
        }

        @Override
        public int run(String[] arg0) throws Exception {
            BSPJob job = KMeansHybridBSP.createKMeansHybridBSPConf(m_conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

            long startTime = System.currentTimeMillis();
            if (job.waitForCompletion(true)) {
                System.out.println(
                        "Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
            }

            return 0;
        }
    }

    public static void main(String[] args) {
        CaliperMain.main(KMeansHybridBenchmark.class, args);
    }
}