at.illecker.hama.hybrid.examples.testglobalgpusync.TestGlobalGpuSyncHybridBSP.java Source code

Java tutorial

Introduction

Here is the source code for at.illecker.hama.hybrid.examples.testglobalgpusync.TestGlobalGpuSyncHybridBSP.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package at.illecker.hama.hybrid.examples.testglobalgpusync;

import java.io.IOException;
import java.util.List;

import junit.framework.Assert;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.BSPJob;
import org.apache.hama.bsp.BSPPeer;
import org.apache.hama.bsp.NullInputFormat;
import org.apache.hama.bsp.NullOutputFormat;
import org.apache.hama.bsp.gpu.HybridBSP;
import org.apache.hama.bsp.sync.SyncException;
import org.trifort.rootbeer.runtime.Context;
import org.trifort.rootbeer.runtime.Rootbeer;
import org.trifort.rootbeer.runtime.StatsRow;
import org.trifort.rootbeer.runtime.ThreadConfig;
import org.trifort.rootbeer.runtime.util.Stopwatch;

public class TestGlobalGpuSyncHybridBSP
        extends HybridBSP<NullWritable, NullWritable, NullWritable, NullWritable, IntWritable> {

    private static final Log LOG = LogFactory.getLog(TestGlobalGpuSyncHybridBSP.class);

    public static final String CONF_BLOCK_SIZE = "testglobalgpusync.hybrid.blockSize";
    public static final String CONF_GRID_SIZE = "testglobalgpusync.hybrid.gridSize";
    public static final String CONF_TMP_DIR = "testglobalgpusync.tmp.path";
    private static final Path TMP_DIR = new Path(
            "output/hama/hybrid/examples/testglobalgpusync/hybrid-" + System.currentTimeMillis());

    // GridSize = max 14 Multiprocessors (192 CUDA Cores/MP = 2688 CUDA Cores)
    // BlockSize = max 1024
    // 40 registers -> max blockSize 768
    // 45 registers -> max blockSize 640
    // 48 registers -> max blockSize 640
    public static final int BLOCK_SIZE = 14;
    public static final int GRID_SIZE = 14;

    private String m_masterTask;

    @Override
    public void setup(BSPPeer<NullWritable, NullWritable, NullWritable, NullWritable, IntWritable> peer)
            throws IOException {

        // Choose one as a master, who sorts the matrix rows at the end
        // m_masterTask = peer.getPeerName(peer.getNumPeers() / 2);
        // TODO task must be 0 otherwise write out does NOT work!
        this.m_masterTask = peer.getPeerName(0);
    }

    @Override
    public void bsp(BSPPeer<NullWritable, NullWritable, NullWritable, NullWritable, IntWritable> peer)
            throws IOException, SyncException, InterruptedException {

        // Debug output
        HamaConfiguration conf = peer.getConfiguration();
        FileSystem fs = FileSystem.get(peer.getConfiguration());
        FSDataOutputStream outStream = fs.create(new Path(conf.get(CONF_TMP_DIR), peer.getTaskId() + ".log"));
        outStream.writeChars("TestGlobalGpuSycHybridBSP.bsp executed on CPU!\n");

        peer.send(m_masterTask, new IntWritable(peer.getPeerIndex()));

        peer.sync();

        // If master, fetch messages
        if (peer.getPeerName().equals(m_masterTask)) {
            peer.getNumCurrentMessages();

            int msgCount = peer.getNumCurrentMessages();
            for (int i = 0; i < msgCount; i++) {
                int id = peer.getCurrentMessage().get();
                outStream.writeChars(id + "\n");
            }
        }

        outStream.close();
    }

    @Override
    public void setupGpu(BSPPeer<NullWritable, NullWritable, NullWritable, NullWritable, IntWritable> peer)
            throws IOException {
        this.setup(peer);
    }

    @Override
    public void bspGpu(BSPPeer<NullWritable, NullWritable, NullWritable, NullWritable, IntWritable> peer,
            Rootbeer rootbeer) throws IOException, SyncException, InterruptedException {

        HamaConfiguration conf = peer.getConfiguration();
        int blockSize = Integer.parseInt(conf.get(CONF_BLOCK_SIZE));
        int gridSize = Integer.parseInt(conf.get(CONF_GRID_SIZE));

        TestGlobalGpuSyncKernel kernel = new TestGlobalGpuSyncKernel(m_masterTask);

        // Run GPU Kernels
        Context context = rootbeer.createDefaultContext();
        Stopwatch watch = new Stopwatch();
        watch.start();
        rootbeer.run(kernel, new ThreadConfig(blockSize, gridSize, blockSize * gridSize), context);
        watch.stop();

        // Debug output
        FileSystem fs = FileSystem.get(conf);
        FSDataOutputStream outStream = fs.create(new Path(conf.get(CONF_TMP_DIR), peer.getTaskId() + ".log"));

        outStream.writeChars("TestGlobalGpuSycHybridBSP.bspGpu executed on GPU!\n");
        List<StatsRow> stats = context.getStats();
        for (StatsRow row : stats) {
            outStream.writeChars("  StatsRow:\n");
            outStream.writeChars("    serial time: " + row.getSerializationTime() + "\n");
            outStream.writeChars("    exec time: " + row.getExecutionTime() + "\n");
            outStream.writeChars("    deserial time: " + row.getDeserializationTime() + "\n");
            outStream.writeChars("    num blocks: " + row.getNumBlocks() + "\n");
            outStream.writeChars("    num threads: " + row.getNumThreads() + "\n");
        }

        outStream.writeChars("TestGlobalGpuSycHybridBSP,GPUTime=" + watch.elapsedTimeMillis() + " ms\n");
        outStream.writeChars("TestGlobalGpuSycHybridBSP,BlockSize=" + blockSize + "\n");
        outStream.writeChars("TestGlobalGpuSycHybridBSP,GridSize=" + gridSize + "\n");
        outStream.writeChars("TestGlobalGpuSycHybridBSP,TotalThreads=" + (blockSize * gridSize) + "\n");
        outStream.writeChars("TestGlobalGpuSycHybridBSP,MessageCount=" + kernel.messageCount + "\n");
        outStream.writeChars("TestGlobalGpuSycHybridBSP,MessageSum=" + kernel.messageSum + "\n");

        Assert.assertEquals((blockSize * gridSize), kernel.messageCount);
        int n = (blockSize * gridSize) - 1;
        Assert.assertEquals((n * (n + 1)) / 2, kernel.messageSum);
        outStream.writeChars("TestGlobalGpuSycHybridBSP.bspGpu: messages verified!'\n");
        outStream.close();
    }

    public static BSPJob createTestGlobalGpuSyncHybridBSPConf() throws IOException {
        return createTestGlobalGpuSyncHybridBSPConf(new HamaConfiguration());
    }

    public static BSPJob createTestGlobalGpuSyncHybridBSPConf(Configuration conf) throws IOException {

        BSPJob job = new BSPJob(new HamaConfiguration(conf), TestGlobalGpuSyncHybridBSP.class);
        // Set the job name
        job.setJobName("TestGlobalGpuSyncHybridBSP Example");
        // set the BSP class which shall be executed
        job.setBspClass(TestGlobalGpuSyncHybridBSP.class);
        // help Hama to locale the jar to be distributed
        job.setJarByClass(TestGlobalGpuSyncHybridBSP.class);

        job.setInputFormat(NullInputFormat.class);
        job.setOutputFormat(NullOutputFormat.class);

        job.setMessageClass(IntWritable.class);

        job.set("bsp.child.java.opts", "-Xmx4G");

        return job;
    }

    static void printOutput(BSPJob job, FileSystem fs, Path path) throws IOException {
        FileStatus[] files = fs.listStatus(path);
        for (int i = 0; i < files.length; i++) {
            if (files[i].getLen() > 0) {
                System.out.println("File " + files[i].getPath());
                FSDataInputStream in = fs.open(files[i].getPath());
                IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
                in.close();
            }
        }
        // fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException {

        // Defaults
        int numBspTask = 1;
        int numGpuBspTask = 1;
        int blockSize = BLOCK_SIZE;
        int gridSize = GRID_SIZE;
        boolean isDebugging = false;

        Configuration conf = new HamaConfiguration();

        if (args.length > 0) {
            if (args.length == 5) {
                numBspTask = Integer.parseInt(args[0]);
                numGpuBspTask = Integer.parseInt(args[1]);
                blockSize = Integer.parseInt(args[2]);
                gridSize = Integer.parseInt(args[3]);
                isDebugging = Boolean.parseBoolean(args[4]);
            } else {
                System.out.println("Wrong argument size!");
                System.out.println("    Argument1=numBspTask");
                System.out.println("    Argument2=numGpuBspTask");
                System.out.println("    Argument3=blockSize");
                System.out.println("    Argument4=gridSize");
                System.out.println("    Argument5=debug | Enable debugging (true|false=default)");
                return;
            }
        }

        // Set config variables
        conf.setBoolean("hama.pipes.logging", isDebugging);
        // Set CPU tasks
        conf.setInt("bsp.peers.num", numBspTask);
        // Set GPU tasks
        conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
        // Set GPU blockSize and gridSize
        conf.set(CONF_BLOCK_SIZE, "" + blockSize);
        conf.set(CONF_GRID_SIZE, "" + gridSize);
        conf.set(CONF_TMP_DIR, TMP_DIR.toString());

        LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
        LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
        LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
        LOG.info("BlockSize: " + conf.get(CONF_BLOCK_SIZE));
        LOG.info("GridSize: " + conf.get(CONF_GRID_SIZE));
        LOG.info("TempDir: " + conf.get(CONF_TMP_DIR));
        LOG.info("isDebugging: " + conf.getBoolean("hama.pipes.logging", false));

        BSPJob job = createTestGlobalGpuSyncHybridBSPConf(conf);

        long startTime = System.currentTimeMillis();
        if (job.waitForCompletion(true)) {
            LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
            printOutput(job, FileSystem.get(conf), new Path(conf.get(CONF_TMP_DIR)));
        }
    }

}