at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java Source code

Java tutorial

Introduction

Here is the source code for at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.illecker.hama.hybrid.examples.kmeans;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Writable;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.BSPJob;
import org.apache.hama.bsp.BSPPeer;
import org.apache.hama.bsp.FileOutputFormat;
import org.apache.hama.bsp.SequenceFileInputFormat;
import org.apache.hama.bsp.SequenceFileOutputFormat;
import org.apache.hama.bsp.gpu.HybridBSP;
import org.apache.hama.bsp.sync.SyncException;
import org.apache.hama.commons.io.PipesVectorWritable;
import org.apache.hama.commons.math.DenseDoubleVector;
import org.apache.hama.commons.math.DoubleVector;
import org.trifort.rootbeer.runtime.Context;
import org.trifort.rootbeer.runtime.Rootbeer;
import org.trifort.rootbeer.runtime.StatsRow;
import org.trifort.rootbeer.runtime.ThreadConfig;
import org.trifort.rootbeer.runtime.util.Stopwatch;

import com.google.common.base.Preconditions;

public class KMeansHybridBSP
        extends HybridBSP<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> {

    private static final Log LOG = LogFactory.getLog(KMeansHybridBSP.class);

    public static final String CONF_DEBUG = "kmeans.hybrid.debug";
    public static final String CONF_TIME = "piestimator.hybrid.time";
    public static final String CONF_MAX_ITERATIONS = "kmeans.hybrid.max.iterations";
    public static final String CONF_N = "kmeans.hybrid.n";
    public static final String CONF_CENTER_IN_PATH = "kmeans.hybrid.center.in.path";
    public static final String CONF_CENTER_OUT_PATH = "kmeans.hybrid.center.out.path";

    private static final Path CONF_TMP_DIR = new Path(
            "output/hama/hybrid/examples/kmeans/hybrid-" + System.currentTimeMillis());
    private static final Path CONF_INPUT_DIR = new Path(CONF_TMP_DIR, "input");
    private static final Path CONF_OUTPUT_DIR = new Path(CONF_TMP_DIR, "output");
    private static final Path CONF_CENTER_DIR = new Path(CONF_TMP_DIR, "centers");

    public static final String CONF_GPU_PERCENTAGE = "kmeans.hybrid.percentage";
    public static final String CONF_BLOCKSIZE = "kmeans.hybrid.blockSize";
    public static final String CONF_GRIDSIZE = "kmeans.hybrid.gridSize";

    // gridSize = amount of blocks and multiprocessors
    public static final int GRID_SIZE = 14;
    // blockSize = amount of threads
    public static final int BLOCK_SIZE = 384; // 1024;

    private boolean m_isDebuggingEnabled;
    private boolean m_timeMeasurement = false;
    private FSDataOutputStream m_logger;

    // a task local copy of our cluster centers
    private DoubleVector[] m_centers_cpu = null;
    private double[][] m_centers_gpu = null;

    // simple cache to speed up computation, because the algorithm is disk based
    // normally we want to rely on OS caching, but if not, we can cache in heap
    private List<DoubleVector> m_cache = new ArrayList<DoubleVector>();
    // numbers of maximum iterations to do
    private int m_maxIterations;

    private Configuration m_conf;

    private int m_gridSize;
    private int m_blockSize;

    /********************************* CPU *********************************/
    @Override
    public void setup(
            BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
            throws IOException {

        this.m_conf = peer.getConfiguration();
        this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false);
        this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false);
        this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1);

        // Init logging
        if (m_isDebuggingEnabled) {
            try {
                FileSystem fs = FileSystem.get(m_conf);
                m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf))
                        + "/BSP_" + peer.getTaskId() + ".log"));

            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        long startTime = 0;
        if (m_timeMeasurement) {
            startTime = System.currentTimeMillis();
        }

        // Init center vectors
        Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH));
        FileSystem fs = FileSystem.get(m_conf);

        final ArrayList<DoubleVector> centers = new ArrayList<DoubleVector>();
        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, centroids, m_conf);
            PipesVectorWritable key = new PipesVectorWritable();
            NullWritable value = NullWritable.get();
            while (reader.next(key, value)) {
                DoubleVector center = key.getVector();
                centers.add(center);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            if (reader != null) {
                reader.close();
            }
        }

        Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!");

        this.m_centers_cpu = centers.toArray(new DoubleVector[centers.size()]);

        long stopTime = 0;
        if (m_timeMeasurement) {
            stopTime = System.currentTimeMillis();
            LOG.info("# setupTime: " + ((stopTime - startTime) / 1000.0) + " sec");
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("PiEstimatorHybrid,setupTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
            }
        }
    }

    @Override
    public void bsp(
            BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
            throws IOException, SyncException, InterruptedException {

        long startTime = 0;
        long totalSyncTime = 0;
        if (m_timeMeasurement) {
            startTime = System.currentTimeMillis();
        }

        long converged;
        while (true) {
            assignCenters(peer);

            long syncTime = 0;
            if (m_timeMeasurement) {
                syncTime = System.currentTimeMillis();
            }

            peer.sync();

            if (m_timeMeasurement) {
                totalSyncTime += System.currentTimeMillis() - syncTime;
            }

            converged = updateCenters(peer);

            // Logging
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("bsp,converged: " + converged + "\n");
                m_logger.flush();
            }

            peer.reopenInput();

            if (converged == 0) {
                break;
            }
            if ((m_maxIterations > 0) && (m_maxIterations < peer.getSuperstepCount())) {
                break;
            }
        }

        recalculateAssignmentsAndWrite(peer);

        long stopTime = System.currentTimeMillis();
        if (m_timeMeasurement) {
            LOG.info("# bspGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec");
            LOG.info("# syncTime: " + (totalSyncTime / 1000.0) + " sec");

            if (m_isDebuggingEnabled) {
                m_logger.writeChars("PiEstimatorHybrid,bspTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
                m_logger.writeChars("PiEstimatorHybrid,syncTime: " + (totalSyncTime / 1000.0) + " sec\n\n");
            }
        }

        // Logging
        if (m_isDebuggingEnabled) {
            m_logger.close();
        }
    }

    private void assignCenters(
            BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
            throws IOException {

        // each task has all the centers, if a center has been updated it
        // needs to be broadcasted.
        final DoubleVector[] newCenterArray = new DoubleVector[m_centers_cpu.length];
        final int[] summationCount = new int[m_centers_cpu.length];

        // if our cache is empty, we have to read it from disk first
        if (m_cache.isEmpty()) {
            final PipesVectorWritable key = new PipesVectorWritable();
            final NullWritable value = NullWritable.get();
            while (peer.readNext(key, value)) {
                DoubleVector deepCopy = key.getVector().deepCopy();
                m_cache.add(deepCopy);
                // but do the assignment directly
                assignCentersInternal(newCenterArray, summationCount, deepCopy);
            }
        } else {
            // now we can iterate in memory and check against the centers
            for (DoubleVector v : m_cache) {
                assignCentersInternal(newCenterArray, summationCount, v);
            }
        }

        // now send messages about the local updates to each other peer
        for (int i = 0; i < newCenterArray.length; i++) {
            if (newCenterArray[i] != null) {
                for (String peerName : peer.getAllPeerNames()) {
                    peer.send(peerName, new CenterMessage(i, summationCount[i], newCenterArray[i]));
                    // Logging
                    // if (m_isDebuggingEnabled) {
                    // m_logger.writeChars("assignCenters,sent,peerName=" + peerName
                    // + ",CenterMessage=" + i + "," + summationCount[i] + ","
                    // + Arrays.toString(newCenterArray[i].toArray()) + "\n");
                    // m_logger.flush();
                    // }
                }
            }
        }

    }

    private void assignCentersInternal(final DoubleVector[] newCenterArray, final int[] summationCount,
            final DoubleVector key) throws IOException {

        final int lowestDistantCenter = getNearestCenter(key);
        final DoubleVector clusterCenter = newCenterArray[lowestDistantCenter];

        if (clusterCenter == null) {
            newCenterArray[lowestDistantCenter] = key;
        } else {
            // add the vector to the center
            newCenterArray[lowestDistantCenter] = newCenterArray[lowestDistantCenter].addUnsafe(key);
        }
        summationCount[lowestDistantCenter]++;
    }

    private int getNearestCenter(DoubleVector key) throws IOException {
        int lowestDistantCenter = 0;
        double lowestDistance = Double.MAX_VALUE;

        for (int i = 0; i < m_centers_cpu.length; i++) {
            final double estimatedDistance = measureEuclidianDistance(m_centers_cpu[i], key);

            // Logging
            // if (m_isDebuggingEnabled) {
            // m_logger.writeChars("getNearestCenter,estimatedDistance: "
            // + estimatedDistance + "\n");
            // m_logger.flush();
            // }

            // check if we have a can assign a new center, because we
            // got a lower distance
            if (estimatedDistance < lowestDistance) {
                lowestDistance = estimatedDistance;
                lowestDistantCenter = i;
            }
        }
        // Logging
        // if (m_isDebuggingEnabled) {
        // m_logger.writeChars("getNearestCenter,lowestDistantCenter: "
        // + lowestDistantCenter + "\n");
        // m_logger.flush();
        // }
        return lowestDistantCenter;
    }

    private double measureEuclidianDistance(DoubleVector vec1, DoubleVector vec2) {
        return Math.sqrt(vec2.subtractUnsafe(vec1).pow(2).sum());
    }

    private long updateCenters(
            BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
            throws IOException {

        // this is the update step
        DoubleVector[] msgCenters = new DoubleVector[m_centers_cpu.length];
        int[] incrementSum = new int[m_centers_cpu.length];

        CenterMessage msg;
        // basically just summing incoming vectors
        while ((msg = peer.getCurrentMessage()) != null) {

            // Logging
            if (m_isDebuggingEnabled) {
                m_logger.writeChars("updateCenters,receive,CenterMessage=" + msg.getCenterIndex() + ","
                        + msg.getIncrementCounter() + "," + Arrays.toString(msg.getData().toArray()) + "\n");
                m_logger.flush();
            }

            DoubleVector oldCenter = msgCenters[msg.getCenterIndex()];
            DoubleVector newCenter = msg.getData();
            incrementSum[msg.getCenterIndex()] += msg.getIncrementCounter();

            if (oldCenter == null) {
                msgCenters[msg.getCenterIndex()] = newCenter;
            } else {
                msgCenters[msg.getCenterIndex()] = oldCenter.addUnsafe(newCenter);
            }
        }
        // divide by how often we globally summed vectors
        for (int i = 0; i < msgCenters.length; i++) {
            // and only if we really have an update for c
            if (msgCenters[i] != null) {
                msgCenters[i] = msgCenters[i].divide(incrementSum[i]);
            }
        }

        // finally check for convergence by the absolute difference
        long convergedCounter = 0L;
        for (int i = 0; i < msgCenters.length; i++) {
            final DoubleVector oldCenter = m_centers_cpu[i];
            if (msgCenters[i] != null) {

                double calculateError = oldCenter.subtractUnsafe(msgCenters[i]).abs().sum();

                // Logging
                // if (m_isDebuggingEnabled) {
                // m_logger.writeChars("updateCenters,i: " + i + "\n");
                // m_logger.writeChars("updateCenters,oldCenter: "
                // + Arrays.toString(oldCenter.toArray()) + "\n");
                // m_logger.writeChars("updateCenters,msgCenters[i]: "
                // + Arrays.toString(msgCenters[i].toArray()) + "\n");
                // m_logger.writeChars("updateCenters,calculateError: " + calculateError
                // + "\n");
                // m_logger.flush();
                // }

                if (calculateError > 0.0d) {
                    m_centers_cpu[i] = msgCenters[i];
                    convergedCounter++;

                    // Logging
                    if (m_isDebuggingEnabled) {
                        m_logger.writeChars(
                                "updateCenters,m_centers_cpu: " + Arrays.toString(msgCenters[i].toArray()) + "\n");
                        m_logger.flush();
                    }
                }

            }
        }
        return convergedCounter;
    }

    private void recalculateAssignmentsAndWrite(
            BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
            throws IOException {

        IntWritable keyWrite = new IntWritable();
        for (DoubleVector v : m_cache) {
            final int lowestDistantCenter = getNearestCenter(v);
            keyWrite.set(lowestDistantCenter);
            peer.write(keyWrite, new PipesVectorWritable(v));
        }

        // just on the first task write the centers to filesystem to prevent
        // collisions
        if (peer.getPeerName().equals(peer.getPeerName(0))) {
            String pathString = m_conf.get(CONF_CENTER_OUT_PATH);
            if (pathString != null) {
                final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf,
                        new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);
                final NullWritable value = NullWritable.get();

                for (DoubleVector center : m_centers_cpu) {
                    dataWriter.append(new PipesVectorWritable(center), value);
                }
                dataWriter.close();
            }
        }
    }

    /********************************* GPU *********************************/
    @Override
    public void setupGpu(
            BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer)
            throws IOException, SyncException, InterruptedException {

        this.m_conf = peer.getConfiguration();
        this.m_timeMeasurement = m_conf.getBoolean(CONF_TIME, false);
        this.m_isDebuggingEnabled = m_conf.getBoolean(CONF_DEBUG, false);
        this.m_maxIterations = m_conf.getInt(CONF_MAX_ITERATIONS, -1);
        this.m_blockSize = Integer.parseInt(this.m_conf.get(CONF_BLOCKSIZE));
        this.m_gridSize = Integer.parseInt(this.m_conf.get(CONF_GRIDSIZE));

        // Init logging
        if (m_isDebuggingEnabled) {
            try {
                FileSystem fs = FileSystem.get(m_conf);
                m_logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) m_conf))
                        + "/BSP_" + peer.getTaskId() + ".log"));

            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        long startTime = 0;
        if (m_timeMeasurement) {
            startTime = System.currentTimeMillis();
        }

        // Init center vectors
        Path centroids = new Path(m_conf.get(CONF_CENTER_IN_PATH));
        FileSystem fs = FileSystem.get(m_conf);

        final List<double[]> centers = new ArrayList<double[]>();
        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, centroids, m_conf);
            PipesVectorWritable key = new PipesVectorWritable();
            NullWritable value = NullWritable.get();
            while (reader.next(key, value)) {
                centers.add(key.getVector().toArray());
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        } finally {
            if (reader != null) {
                reader.close();
            }
        }

        Preconditions.checkArgument(centers.size() > 0, "Centers file must contain at least a single center!");

        // build centers_gpu double[][]
        this.m_centers_gpu = new double[centers.size()][centers.get(0).length];
        for (int i = 0; i < centers.size(); i++) {
            double[] vector = centers.get(i);
            for (int j = 0; j < vector.length; j++) {
                this.m_centers_gpu[i][j] = vector[j];
            }
        }

        long stopTime = 0;
        if (m_timeMeasurement) {
            stopTime = System.currentTimeMillis();
            LOG.info("# setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec");
            if (m_isDebuggingEnabled) {
                m_logger.writeChars(
                        "PiEstimatorHybrid,setupGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
            }
        }
    }

    @Override
    public void bspGpu(
            BSPPeer<PipesVectorWritable, NullWritable, IntWritable, PipesVectorWritable, CenterMessage> peer,
            Rootbeer rootbeer) throws IOException, SyncException, InterruptedException {

        long startTime = 0;
        if (m_timeMeasurement) {
            startTime = System.currentTimeMillis();
        }

        // Fetch inputs
        final List<DoubleVector> inputs = new ArrayList<DoubleVector>();
        final PipesVectorWritable key = new PipesVectorWritable();
        final NullWritable nullValue = NullWritable.get();
        while (peer.readNext(key, nullValue)) {
            inputs.add(key.getVector());
        }
        // Convert inputs to double[][]
        double[][] inputsArr = new double[inputs.size()][inputs.get(0).getLength()];
        for (int i = 0; i < inputs.size(); i++) {
            double[] vector = inputs.get(i).toArray();
            for (int j = 0; j < vector.length; j++) {
                inputsArr[i][j] = vector[j];
            }
        }

        // Logging
        if (m_isDebuggingEnabled) {
            m_logger.writeChars("KMeansHybrid.bspGpu executed on GPU!\n");
            m_logger.writeChars(
                    "KMeansHybrid.bspGpu blockSize: " + m_blockSize + " gridSize: " + m_gridSize + "\n");
            m_logger.writeChars("KMeansHybrid.bspGpu inputSize: " + inputs.size() + "\n");
        }

        KMeansHybridKernel kernel = new KMeansHybridKernel(inputsArr, m_centers_gpu,
                m_conf.getInt(CONF_MAX_ITERATIONS, 0), peer.getAllPeerNames());

        // Run GPU Kernels
        Context context = rootbeer.createDefaultContext();
        Stopwatch watch = new Stopwatch();
        watch.start();
        rootbeer.run(kernel, new ThreadConfig(m_blockSize, m_gridSize, m_blockSize * m_gridSize), context);
        watch.stop();

        // Output inputs with corresponding new center id
        for (int i = 0; i < inputs.size(); i++) {
            peer.write(new IntWritable(kernel.m_input_centers[i]), new PipesVectorWritable(inputs.get(i)));
        }

        // Output new Centers only on first task
        // to prevent collisions
        if (peer.getPeerName().equals(peer.getPeerName(0))) {
            String pathString = m_conf.get(CONF_CENTER_OUT_PATH);
            if (pathString != null) {
                final SequenceFile.Writer dataWriter = SequenceFile.createWriter(FileSystem.get(m_conf), m_conf,
                        new Path(pathString), PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);

                for (int i = 0; i < kernel.m_centers.length; i++) {
                    dataWriter.append(new PipesVectorWritable(new DenseDoubleVector(kernel.m_centers[i])),
                            nullValue);
                }
                dataWriter.close();
            }
        }

        long stopTime = System.currentTimeMillis();
        if (m_timeMeasurement) {
            LOG.info("# bspGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec");

            if (m_isDebuggingEnabled) {
                m_logger.writeChars(
                        "PiEstimatorHybrid,bspGpuTime: " + ((stopTime - startTime) / 1000.0) + " sec\n");
            }
        }

        // Logging
        if (m_isDebuggingEnabled) {
            List<StatsRow> stats = context.getStats();
            for (StatsRow row : stats) {
                m_logger.writeChars("  StatsRow:\n");
                m_logger.writeChars("    serial time: " + row.getSerializationTime() + "\n");
                m_logger.writeChars("    exec time: " + row.getExecutionTime() + "\n");
                m_logger.writeChars("    deserial time: " + row.getDeserializationTime() + "\n");
                m_logger.writeChars("    num blocks: " + row.getNumBlocks() + "\n");
                m_logger.writeChars("    num threads: " + row.getNumThreads() + "\n");
                m_logger.writeChars("GPUTime: " + watch.elapsedTimeMillis() + " ms" + "\n");
            }

            m_logger.close();
        }
    }

    public static BSPJob createKMeansHybridBSPConf(Path inPath, Path outPath) throws IOException {
        return createKMeansHybridBSPConf(new HamaConfiguration(), inPath, outPath);
    }

    public static BSPJob createKMeansHybridBSPConf(Configuration conf, Path inPath, Path outPath)
            throws IOException {

        BSPJob job = new BSPJob(new HamaConfiguration(conf), KMeansHybridBSP.class);
        // Set the job name
        job.setJobName("KMeansHybrid Clustering");
        // set the BSP class which shall be executed
        job.setBspClass(KMeansHybridBSP.class);
        // help Hama to locale the jar to be distributed
        job.setJarByClass(KMeansHybridBSP.class);

        job.setInputFormat(SequenceFileInputFormat.class);
        job.setInputKeyClass(PipesVectorWritable.class);
        job.setInputValueClass(NullWritable.class);
        job.setInputPath(inPath);

        job.setOutputFormat(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(PipesVectorWritable.class);
        job.setOutputPath(outPath);

        job.setMessageClass(CenterMessage.class);

        job.set("bsp.child.java.opts", "-Xmx16G");

        return job;
    }

    public static void main(String[] args) throws Exception {

        // Defaults
        int numBspTask = 1;
        int numGpuBspTask = 1;
        int blockSize = BLOCK_SIZE;
        int gridSize = GRID_SIZE;
        long n = 10; // input vectors
        int k = 3; // start vectors
        int vectorDimension = 2;
        int maxIteration = 10;
        boolean useTestExampleInput = false;
        boolean isDebugging = false;
        boolean timeMeasurement = false;
        int GPUPercentage = 80;

        Configuration conf = new HamaConfiguration();
        FileSystem fs = FileSystem.get(conf);

        // Set numBspTask to maxTasks
        // BSPJobClient jobClient = new BSPJobClient(conf);
        // ClusterStatus cluster = jobClient.getClusterStatus(true);
        // numBspTask = cluster.getMaxTasks();

        if (args.length > 0) {
            if (args.length == 12) {
                numBspTask = Integer.parseInt(args[0]);
                numGpuBspTask = Integer.parseInt(args[1]);
                blockSize = Integer.parseInt(args[2]);
                gridSize = Integer.parseInt(args[3]);
                n = Long.parseLong(args[4]);
                k = Integer.parseInt(args[5]);
                vectorDimension = Integer.parseInt(args[6]);
                maxIteration = Integer.parseInt(args[7]);
                useTestExampleInput = Boolean.parseBoolean(args[8]);
                GPUPercentage = Integer.parseInt(args[9]);
                isDebugging = Boolean.parseBoolean(args[10]);
                timeMeasurement = Boolean.parseBoolean(args[11]);

            } else {
                System.out.println("Wrong argument size!");
                System.out.println("    Argument1=numBspTask");
                System.out.println("    Argument2=numGpuBspTask");
                System.out.println("    Argument3=blockSize");
                System.out.println("    Argument4=gridSize");
                System.out.println("    Argument5=n | Number of input vectors (" + n + ")");
                System.out.println("    Argument6=k | Number of start vectors (" + k + ")");
                System.out.println(
                        "    Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")");
                System.out.println(
                        "    Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")");
                System.out.println("    Argument9=testExample | Use testExample input (true|false=default)");
                System.out.println("    Argument10=GPUPercentage (percentage of input)");
                System.out.println("    Argument11=isDebugging (true|false=defaul)");
                System.out.println("    Argument12=timeMeasurement (true|false=defaul)");
                return;
            }
        }

        // Set config variables
        conf.setBoolean(CONF_DEBUG, isDebugging);
        conf.setBoolean("hama.pipes.logging", false);
        conf.setBoolean(CONF_TIME, timeMeasurement);

        // Set CPU tasks
        conf.setInt("bsp.peers.num", numBspTask);
        // Set GPU tasks
        conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
        // Set GPU blockSize and gridSize
        conf.set(CONF_BLOCKSIZE, "" + blockSize);
        conf.set(CONF_GRIDSIZE, "" + gridSize);
        // Set maxIterations for KMeans
        conf.setInt(CONF_MAX_ITERATIONS, maxIteration);
        // Set n for KMeans
        conf.setLong(CONF_N, n);
        // Set GPU workload
        conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage);

        LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
        LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
        LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
        LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE));
        LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE));
        LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE));
        LOG.info("isDebugging: " + conf.get(CONF_DEBUG));
        LOG.info("timeMeasurement: " + conf.get(CONF_TIME));
        LOG.info("useTestExampleInput: " + useTestExampleInput);
        LOG.info("inputPath: " + CONF_INPUT_DIR);
        LOG.info("centersPath: " + CONF_CENTER_DIR);
        LOG.info("outputPath: " + CONF_OUTPUT_DIR);
        LOG.info("n: " + n);
        LOG.info("k: " + k);
        LOG.info("vectorDimension: " + vectorDimension);
        LOG.info("maxIteration: " + maxIteration);

        Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq");
        Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq");
        conf.set(CONF_CENTER_IN_PATH, centerIn.toString());
        conf.set(CONF_CENTER_OUT_PATH, centerOut.toString());

        // prepare Input
        if (useTestExampleInput) {
            // prepareTestInput(conf, fs, input, centerIn);
            prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                    null, GPUPercentage);
        } else {
            prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension,
                    new Random(3337L), GPUPercentage);
        }

        BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR);

        long startTime = System.currentTimeMillis();
        if (job.waitForCompletion(true)) {
            LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

            if (isDebugging) {
                printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
                printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable());
            }

            if (k < 50) {
                printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get());
            }
        }
    }

    /**
     * prepareInputData
     * 
     */
    public static void prepareInputData(Configuration conf, FileSystem fs, Path in, Path centerIn, int numBspTask,
            int numGPUBspTask, long n, int k, int vectorDimension, Random rand, int GPUPercentage)
            throws IOException {

        // Delete input files if already exist
        if (fs.exists(in)) {
            fs.delete(in, true);
        }
        if (fs.exists(centerIn)) {
            fs.delete(centerIn, true);
        }

        final NullWritable nullValue = NullWritable.get();
        final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn,
                PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);

        // Compute work distributions
        int cpuTaskNum = numBspTask - numGPUBspTask;
        long inputVectorsPerGPUTask = 0;
        long inputVectorsPerCPU = 0;
        long inputVectorsPerCPUTask = 0;
        if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
            inputVectorsPerGPUTask = (n * GPUPercentage) / 100;
            inputVectorsPerCPU = n - inputVectorsPerGPUTask;
        } else {
            inputVectorsPerCPU = n;
        }
        if (cpuTaskNum > 0) {
            inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum;
        }

        // long interval = totalNumberOfPoints / numBspTask;
        long centers = 0;

        for (int part = 0; part < numBspTask; part++) {
            Path partIn = new Path(in, "part" + part + ".seq");
            final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn,
                    PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);

            long interval = 0;
            if (part > cpuTaskNum) {
                interval = inputVectorsPerGPUTask;
            } else {
                interval = inputVectorsPerCPUTask;
            }
            long start = interval * part;
            long end = start + interval - 1;
            if ((numBspTask - 1) == part) {
                end = n; // set to totalNumberOfPoints
            }
            LOG.info("Partition " + part + ": from " + start + " to " + end);

            for (long i = start; i <= end; i++) {
                double[] arr = new double[vectorDimension];
                for (int j = 0; j < vectorDimension; j++) {
                    if (rand != null) {
                        arr[j] = rand.nextInt((int) n);
                    } else {
                        arr[j] = i;
                    }
                }
                PipesVectorWritable vector = new PipesVectorWritable(new DenseDoubleVector(arr));

                // LOG.info("input[" + i + "]: " + Arrays.toString(arr));
                dataWriter.append(vector, nullValue);

                if (k > centers) {
                    // LOG.info("center[" + i + "]: " + Arrays.toString(arr));
                    centerWriter.append(vector, nullValue);
                    centers++;
                } else {
                    centerWriter.close();
                }

            }
            dataWriter.close();
        }

    }

    /**
     * Create testExample vectors and centers as input from
     * http://www.maplesoft.com/support/help/Maple/view.aspx?path=NAG/g03efc
     * 
     * n := 20: vectorDimension := 5: k := 3: maxIterations := 10:
     * 
     * x := Matrix([ [77.3, 13, 9.699999999999999, 1.5, 6.4], [82.5, 10, 7.5, 1.5,
     * 6.5], [66.90000000000001, 20.6, 12.5, 2.3, 7], [47.2, 33.8, 19, 2.8, 5.8],
     * [65.3, 20.5, 14.2, 1.9, 6.9], [83.3, 10, 6.7, 2.2, 7], [81.59999999999999,
     * 12.7, 5.7, 2.9, 6.7], [47.8, 36.5, 15.7, 2.3, 7.2], [48.6, 37.1, 14.3, 2.1,
     * 7.2], [61.6, 25.5, 12.9, 1.9, 7.3], [58.6, 26.5, 14.9, 2.4, 6.7], [69.3,
     * 22.3, 8.4, 4, 7], [61.8, 30.8, 7.4, 2.7, 6.4], [67.7, 25.3, 7, 4.8, 7.3],
     * [57.2, 31.2, 11.6, 2.4, 6.5], [67.2, 22.7, 10.1, 3.3, 6.2], [59.2, 31.2,
     * 9.6, 2.4, 6], [80.2, 13.2, 6.6, 2, 5.8], [82.2, 11.1, 6.7, 2.2, 7.2],
     * [69.7, 20.7, 9.6, 3.1, 5.9]], datatype=float[8], order='C_order'):
     * 
     * cmeans := Matrix( [[82.5, 10, 7.5, 1.5, 6.5], [47.8, 36.5, 15.7, 2.3, 7.2],
     * [67.2, 22.7, 10.1, 3.3, 6.2]], datatype=float[8], order='C_order'):
     * 
     * 
     * Results
     * 
     * cmeans := Matrix([ [81.1833333333333371, 11.6666666666666661,
     * 7.1499999999999947, 2.0500000000000027, 6.6000000000000052],
     * [47.8666666666666671, 35.8000000000000043, 16.3333333333333321,
     * 2.3999999999999992, 6.7333333333333340], [64.0454545454545610,
     * 25.2090909090909037, 10.7454545454545425, 2.83636363636363642,
     * 6.65454545454545521]]):
     * 
     * inc := Vector([0, 0, 2, 1, 2, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
     * 2]):
     * 
     * nic := Vector([6, 3, 11]):
     * 
     * css := Vector([46.5716666666666583, 20.3800000000000097,
     * 468.896363636363503]):
     * 
     */
    public static void prepareTestInput(Configuration conf, FileSystem fs, Path in, Path centerIn)
            throws IOException {

        // Delete input files if already exist
        if (fs.exists(in)) {
            fs.delete(in, true);
        }
        if (fs.exists(centerIn)) {
            fs.delete(centerIn, true);
        }

        double[][] input = { { 77.3, 13, 9.699999999999999, 1.5, 6.4 }, { 82.5, 10, 7.5, 1.5, 6.5 },
                { 66.90000000000001, 20.6, 12.5, 2.3, 7 }, { 47.2, 33.8, 19, 2.8, 5.8 },
                { 65.3, 20.5, 14.2, 1.9, 6.9 }, { 83.3, 10, 6.7, 2.2, 7 },
                { 81.59999999999999, 12.7, 5.7, 2.9, 6.7 }, { 47.8, 36.5, 15.7, 2.3, 7.2 },
                { 48.6, 37.1, 14.3, 2.1, 7.2 }, { 61.6, 25.5, 12.9, 1.9, 7.3 }, { 58.6, 26.5, 14.9, 2.4, 6.7 },
                { 69.3, 22.3, 8.4, 4, 7 }, { 61.8, 30.8, 7.4, 2.7, 6.4 }, { 67.7, 25.3, 7, 4.8, 7.3 },
                { 57.2, 31.2, 11.6, 2.4, 6.5 }, { 67.2, 22.7, 10.1, 3.3, 6.2 }, { 59.2, 31.2, 9.6, 2.4, 6 },
                { 80.2, 13.2, 6.6, 2, 5.8 }, { 82.2, 11.1, 6.7, 2.2, 7.2 }, { 69.7, 20.7, 9.6, 3.1, 5.9 } };
        double[][] centers = { { 82.5, 10, 7.5, 1.5, 6.5 }, { 47.8, 36.5, 15.7, 2.3, 7.2 },
                { 67.2, 22.7, 10.1, 3.3, 6.2 } };

        final NullWritable nullValue = NullWritable.get();

        // Write inputs
        LOG.info("inputs: ");
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, in, PipesVectorWritable.class,
                NullWritable.class, CompressionType.NONE);

        for (int i = 0; i < input.length; i++) {
            dataWriter.append(new PipesVectorWritable(new DenseDoubleVector(input[i])), nullValue);
            LOG.info("input[" + i + "]: " + Arrays.toString(input[i]));
        }

        dataWriter.close();

        // Write centers
        LOG.info("centers: ");
        final SequenceFile.Writer centerWriter = SequenceFile.createWriter(fs, conf, centerIn,
                PipesVectorWritable.class, NullWritable.class, CompressionType.NONE);

        for (int i = 0; i < centers.length; i++) {
            centerWriter.append(new PipesVectorWritable(new DenseDoubleVector(centers[i])), nullValue);
            LOG.info("center[" + i + "]: " + Arrays.toString(centers[i]));
        }

        centerWriter.close();
    }

    static void printOutput(Configuration conf, FileSystem fs, String extensionFilter, Writable key, Writable value)
            throws IOException {
        FileStatus[] files = fs.listStatus(CONF_OUTPUT_DIR);
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getLen() > 0) && (files[i].getPath().getName().endsWith(extensionFilter))) {
                printFile(conf, fs, files[i].getPath(), key, value);
            }
        }
        // fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    static void printFile(Configuration conf, FileSystem fs, Path file, Writable key, Writable value)
            throws IOException {
        System.out.println("File " + file.toString());
        SequenceFile.Reader reader = null;
        try {
            reader = new SequenceFile.Reader(fs, file, conf);

            while (reader.next(key, value)) {
                System.out.println("key: '" + key.toString() + "' value: '" + value.toString() + "'\n");
            }
        } catch (IOException e) {
            FSDataInputStream in = fs.open(file);
            IOUtils.copyBytes(in, System.out, conf, false);
            in.close();
        } catch (NullPointerException e) {
            LOG.error(e);
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }
}