Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.gpu; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Random; import java.util.SortedMap; import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hama.HamaConfiguration; import org.apache.hama.bsp.BSP; import org.apache.hama.bsp.BSPJob; import org.apache.hama.bsp.BSPJobClient; import org.apache.hama.bsp.BSPPeer; import org.apache.hama.bsp.ClusterStatus; import org.apache.hama.bsp.FileOutputFormat; import org.apache.hama.bsp.SequenceFileInputFormat; import org.apache.hama.bsp.SequenceFileOutputFormat; import org.apache.hama.bsp.join.CompositeInputFormat; import org.apache.hama.bsp.join.TupleWritable; import org.apache.hama.bsp.sync.SyncException; import org.apache.mahout.math.CardinalityException; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import org.apache.mahout.math.function.Functions; import org.trifort.rootbeer.runtime.Context; import org.trifort.rootbeer.runtime.Kernel; import org.trifort.rootbeer.runtime.Rootbeer; import org.trifort.rootbeer.runtime.StatsRow; import org.trifort.rootbeer.runtime.ThreadConfig; import org.trifort.rootbeer.runtime.util.Stopwatch; import at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.DistributedRowMatrix; import at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.util.MatrixRowMessage; public class MatrixMultiplicationBSPGpu extends BSP<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> { private static final Log LOG = LogFactory.getLog(MatrixMultiplicationBSPGpu.class); private static final String OUT_CARD = "output.vector.cardinality"; private static final String DEBUG = "matrixmultiplication.bsp.gpu.debug"; private static final Path OUTPUT_DIR = new Path( "output/hama/rootbeer/examples/matrixmultiplication/GPU-" + System.currentTimeMillis()); private static final Path MATRIX_A_PATH = new Path("input/hama/rootbeer/examples/MatrixA.seq"); private static final Path MATRIX_B_PATH = new Path("input/hama/rootbeer/examples/MatrixB.seq"); private static final Path MATRIX_C_PATH = new Path(OUTPUT_DIR + "/MatrixC.seq"); private static final Path MATRIX_D_PATH = new Path(OUTPUT_DIR + "/MatrixD.seq"); private int outCardinality; private boolean isDebuggingEnabled; private FSDataOutputStream logger; private String masterTask; private List<Kernel> kernels = new ArrayList<Kernel>(); int blockSize = 0; int gridSize = 0; @Override public void setup(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer) throws IOException { Configuration conf = peer.getConfiguration(); outCardinality = conf.getInt(OUT_CARD, Integer.MAX_VALUE); isDebuggingEnabled = conf.getBoolean(DEBUG, false); // Choose one as a master, who sorts the matrix rows at the end this.masterTask = peer.getPeerName(peer.getNumPeers() / 2); // Init logging if (isDebuggingEnabled) { try { FileSystem fs = FileSystem.get(conf); logger = fs.create(new Path(FileOutputFormat.getOutputPath(new BSPJob((HamaConfiguration) conf)) + "/BSP_" + peer.getTaskId() + ".log")); logger.writeChars("bsp,setup,outCardinality=" + outCardinality + "\n"); } catch (IOException e) { e.printStackTrace(); } } } @Override public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer) throws IOException, SyncException, InterruptedException { IntWritable key = new IntWritable(); TupleWritable value = new TupleWritable(); while (peer.readNext(key, value)) { // Logging if (isDebuggingEnabled) { for (int i = 0; i < value.size(); i++) { Vector vector = ((VectorWritable) value.get(i)).get(); logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n"); } } Vector firstVector = ((VectorWritable) value.get(0)).get(); Vector secondVector = ((VectorWritable) value.get(1)).get(); // outCardinality is resulting column size n // (l x m) * (m x n) = (l x n) boolean firstIsOutFrag = secondVector.size() == outCardinality; // outFrag is Matrix which has the resulting column cardinality // (matrixB) Vector outFrag = firstIsOutFrag ? secondVector : firstVector; // multiplier is Matrix which has the resulting row count // (transposed matrixA) Vector multiplier = firstIsOutFrag ? firstVector : secondVector; if (isDebuggingEnabled) { logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n"); logger.writeChars("bsp,outFrag=" + outFrag + "\n"); logger.writeChars("bsp,multiplier=" + multiplier + "\n"); } // outFrag to double[] double[] outFragArray = new double[outFrag.size()]; int i = 0; for (Vector.Element e : outFrag.all()) { outFragArray[i] = e.get(); i++; } // One map task consists of multiple kernels within one block // Each kernel computes a scalar multiplication blockSize = multiplier.size(); gridSize++; for (int j = 0; j < blockSize; j++) { kernels.add(new MatrixMultiplicationBSPKernel(j, multiplier.get(j), outFragArray)); } // Run GPU Kernels Rootbeer rootbeer = new Rootbeer(); Context context = rootbeer.createDefaultContext(); Stopwatch watch = new Stopwatch(); watch.start(); // blockSize = rows of Matrix A (multiplier) // gridSize = cols of Matrix B (for each row a scalar multiplication // has to be made) rootbeer.run(kernels, new ThreadConfig(blockSize, gridSize, kernels.size()), context); watch.stop(); List<StatsRow> stats = context.getStats(); for (StatsRow row : stats) { System.out.println(" StatsRow:\n"); System.out.println(" serial time: " + row.getSerializationTime() + "\n"); System.out.println(" exec time: " + row.getExecutionTime() + "\n"); System.out.println(" deserial time: " + row.getDeserializationTime() + "\n"); System.out.println(" num blocks: " + row.getNumBlocks() + "\n"); System.out.println(" num threads: " + row.getNumThreads() + "\n"); } if (isDebuggingEnabled) { logger.writeChars( "bsp,KernelCount=" + kernels.size() + ",GPUTime=" + watch.elapsedTimeMillis() + "ms\n"); logger.writeChars("bps,blockSize=" + blockSize + ",gridSize=" + gridSize + "\n"); logger.flush(); } // Collect results of GPU kernels for (Kernel kernel : kernels) { MatrixMultiplicationBSPKernel bspKernel = (MatrixMultiplicationBSPKernel) kernel; if (isDebuggingEnabled) { logger.writeChars("bsp,thread_idxx=" + bspKernel.thread_idxx + ",multiplier=" + bspKernel.multiplierVal + ",vector=" + Arrays.toString(bspKernel.vectorVal) + "\n"); } peer.send(masterTask, new MatrixRowMessage(bspKernel.row, new VectorWritable(new DenseVector(bspKernel.results)))); if (isDebuggingEnabled) { logger.writeChars("bsp,send,key=" + bspKernel.row + ",value=" + Arrays.toString(bspKernel.results) + "\n"); } } } peer.sync(); } @Override public void cleanup(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer) throws IOException { // MasterTask accumulates result if (peer.getPeerName().equals(masterTask)) { // SortedMap because the final matrix rows should be in order SortedMap<Integer, Vector> accumlatedRows = new TreeMap<Integer, Vector>(); MatrixRowMessage currentMatrixRowMessage = null; // Collect messages while ((currentMatrixRowMessage = peer.getCurrentMessage()) != null) { int rowIndex = currentMatrixRowMessage.getRowIndex(); Vector rowValues = currentMatrixRowMessage.getRowValues().get(); if (isDebuggingEnabled) { logger.writeChars("bsp,gotMsg,key=" + rowIndex + ",value=" + rowValues.toString() + "\n"); } if (accumlatedRows.containsKey(rowIndex)) { accumlatedRows.get(rowIndex).assign(rowValues, Functions.PLUS); } else { accumlatedRows.put(rowIndex, new RandomAccessSparseVector(rowValues)); } } // Write accumulated results for (Map.Entry<Integer, Vector> row : accumlatedRows.entrySet()) { if (isDebuggingEnabled) { logger.writeChars( "bsp,write,key=" + row.getKey() + ",value=" + row.getValue().toString() + "\n"); } peer.write(new IntWritable(row.getKey()), new VectorWritable(row.getValue())); } } } static void printOutput(Configuration conf) throws IOException { FileSystem fs = OUTPUT_DIR.getFileSystem(conf); FileStatus[] files = fs.listStatus(OUTPUT_DIR); for (int i = 0; i < files.length; i++) { if (files[i].getLen() > 0) { if (files[i].getPath().getName().endsWith(".log")) { System.out.println("File " + files[i].getPath()); FSDataInputStream in = fs.open(files[i].getPath()); IOUtils.copyBytes(in, System.out, conf, false); in.close(); } } } // fs.delete(FileOutputFormat.getOutputPath(job), true); } public static BSPJob createMatrixMultiplicationBSPGpuConf(Path aPath, Path bPath, Path outPath, int outCardinality) throws IOException { return createMatrixMultiplicationBSPGpuConf(new HamaConfiguration(), aPath, bPath, outPath, outCardinality); } public static BSPJob createMatrixMultiplicationBSPGpuConf(Configuration conf, Path aPath, Path bPath, Path outPath, int outCardinality) throws IOException { BSPJob job = new BSPJob(new HamaConfiguration(conf)); // Set the job name job.setJobName("MatrixMultiplicationBSP GPU"); // set the BSP class which shall be executed job.setBspClass(MatrixMultiplicationBSPGpu.class); // help Hama to locale the jar to be distributed job.setJarByClass(MatrixMultiplicationBSPGpu.class); job.setInputFormat(CompositeInputFormat.class); job.set("bsp.join.expr", CompositeInputFormat.compose("inner", SequenceFileInputFormat.class, aPath, bPath)); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); FileOutputFormat.setOutputPath(job, outPath); job.set(OUT_CARD, "" + outCardinality); job.set("bsp.child.java.opts", "-Xmx4G"); return job; } public static void main(String[] args) throws Exception { // Defaults int numRowsA = 10; int numColsA = 10; int numRowsB = 10; int numColsB = 10; boolean isDebugging = false; Configuration conf = new HamaConfiguration(); BSPJobClient jobClient = new BSPJobClient(conf); ClusterStatus cluster = jobClient.getClusterStatus(true); if (args.length > 0) { if (args.length == 6) { conf.setInt("bsp.peers.num", Integer.parseInt(args[0])); numRowsA = Integer.parseInt(args[1]); numColsA = Integer.parseInt(args[2]); numRowsB = Integer.parseInt(args[3]); numColsB = Integer.parseInt(args[4]); isDebugging = Boolean.parseBoolean(args[5]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numRowsA | Number of rows of the first input matrix"); System.out.println(" Argument3=numColsA | Number of columns of the first input matrix"); System.out.println(" Argument4=numRowsB | Number of rows of the second input matrix"); System.out.println(" Argument5=numColsB | Number of columns of the second input matrix"); System.out.println(" Argument6=debug | Enable debugging (true|false)"); return; } } else { conf.setInt("bsp.peers.num", cluster.getMaxTasks()); } conf.setInt("matrixmultiplication.bsp.gpu.numRowsA", numRowsA); conf.setInt("matrixmultiplication.bsp.gpu.numColsA", numColsA); conf.setInt("matrixmultiplication.bsp.gpu.numRowsB", numRowsB); conf.setInt("matrixmultiplication.bsp.gpu.numColsB", numRowsB); conf.setBoolean(DEBUG, isDebugging); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("numRowsA: " + numRowsA); LOG.info("numColsA: " + numColsA); LOG.info("numRowsB: " + numRowsB); LOG.info("numColsB: " + numColsB); LOG.info("isDebugging: " + isDebugging); LOG.info("outputPath: " + OUTPUT_DIR); if (numColsA != numRowsB) { throw new CardinalityException(numColsA, numRowsB); } // Create random DistributedRowMatrix // use constant seeds to get reproducable results // Matrix A is stored transposed DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, true); DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB, new Random(1337L), MATRIX_B_PATH, false); // Load DistributedRowMatrix a and b DistributedRowMatrix a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix b = new DistributedRowMatrix(MATRIX_B_PATH, OUTPUT_DIR, numRowsB, numColsB); b.setConf(conf); // MatrixMultiply all within a new BSP job long startTime = System.currentTimeMillis(); DistributedRowMatrix c = a.multiplyBSP(b, MATRIX_C_PATH, true, false); System.out.println("MatrixMultiplicationCpu using Hama finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); // Verification // Overwrite matrix A, NOT transposed for verification check DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L), MATRIX_A_PATH, false); a = new DistributedRowMatrix(MATRIX_A_PATH, OUTPUT_DIR, numRowsA, numColsA); a.setConf(conf); DistributedRowMatrix d = a.multiplyJava(b, MATRIX_D_PATH); if (c.verify(d)) { System.out.println("Verify PASSED!"); } else { System.out.println("Verify FAILED!"); } if (isDebugging) { System.out.println("Matrix A:"); a.printDistributedRowMatrix(); System.out.println("Matrix B:"); b.printDistributedRowMatrix(); System.out.println("Matrix C:"); c.printDistributedRowMatrix(); System.out.println("Matrix D:"); d.printDistributedRowMatrix(); printOutput(conf); } } }