org.apache.hama.pipes.TestPipes.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hama.pipes.TestPipes.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hama.pipes;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.math.BigDecimal;
import java.util.Random;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hama.Constants;
import org.apache.hama.HamaCluster;
import org.apache.hama.HamaConfiguration;
import org.apache.hama.bsp.BSPJob;
import org.apache.hama.bsp.BSPJobClient;
import org.apache.hama.bsp.ClusterStatus;
import org.apache.hama.bsp.FileInputFormat;
import org.apache.hama.bsp.FileOutputFormat;
import org.apache.hama.bsp.KeyValueTextInputFormat;
import org.apache.hama.bsp.NullInputFormat;
import org.apache.hama.bsp.SequenceFileInputFormat;
import org.apache.hama.bsp.SequenceFileOutputFormat;
import org.apache.hama.bsp.message.MessageManager;
import org.apache.hama.commons.io.PipesKeyValueWritable;
import org.apache.hama.commons.io.PipesVectorWritable;
import org.apache.hama.commons.math.DenseDoubleVector;
import org.apache.hama.commons.math.DoubleVector;

/**
 * Test case for {@link PipesBSP}
 * 
 */
public class TestPipes extends HamaCluster {
    private static final Log LOG = LogFactory.getLog(TestPipes.class);

    public static final String EXAMPLES_INSTALL_PROPERTY = "hama.pipes.examples.install";
    public static final String EXAMPLE_SUMMATION_EXEC = "/examples/summation";
    public static final String EXAMPLE_PIESTIMATOR_EXEC = "/examples/piestimator";
    public static final String EXAMPLE_MATRIXMULTIPLICATION_EXEC = "/examples/matrixmultiplication";
    public static final String EXAMPLE_TMP_OUTPUT = "/tmp/test-example/";
    public static final String HAMA_TMP_OUTPUT = "/tmp/hama-pipes/";
    public static final String HAMA_TMP_DISK_QUEUE_OUTPUT = "/tmp/messageQueue";
    public static final int DOUBLE_PRECISION = 6;

    private HamaConfiguration configuration;
    private static FileSystem fs = null;
    private String examplesInstallPath;

    public TestPipes() {
        configuration = new HamaConfiguration();

        try {
            // Cleanup temp Hama locations
            fs = FileSystem.get(configuration);
            cleanup(fs, new Path(HAMA_TMP_OUTPUT));
            cleanup(fs, new Path(HAMA_TMP_DISK_QUEUE_OUTPUT));
            // Remove local temp folder
            cleanup(fs, new Path(EXAMPLE_TMP_OUTPUT));
        } catch (IOException e) {
            e.printStackTrace();
        }

        configuration.set("bsp.master.address", "localhost");
        configuration.set("hama.child.redirect.log.console", "true");
        assertEquals("Make sure master addr is set to localhost:", "localhost",
                configuration.get("bsp.master.address"));
        configuration.set("bsp.local.dir", HAMA_TMP_OUTPUT);
        configuration.set(Constants.ZOOKEEPER_QUORUM, "localhost");
        configuration.setInt(Constants.ZOOKEEPER_CLIENT_PORT, 21810);
        configuration.set("hama.sync.client.class",
                org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.class.getCanonicalName());
    }

    @Override
    public void setUp() throws Exception {
        super.setUp();
    }

    @Override
    public void tearDown() throws Exception {
        super.tearDown();
    }

    public void testPipes() throws Exception {
        assertNotNull("System property " + EXAMPLES_INSTALL_PROPERTY + " is not defined!",
                System.getProperty(EXAMPLES_INSTALL_PROPERTY));

        if (System.getProperty(EXAMPLES_INSTALL_PROPERTY).isEmpty()) {
            LOG.error("System property " + EXAMPLES_INSTALL_PROPERTY + " is empty! Skipping TestPipes!");
            return;
        }
        this.examplesInstallPath = System.getProperty(EXAMPLES_INSTALL_PROPERTY);

        // *** Summation Test ***
        summation();

        // *** PiEstimator Test ***
        piestimation();

        // *** MatrixMultiplication Test ***
        matrixMult();

        // Remove local temp folder
        cleanup(fs, new Path(EXAMPLE_TMP_OUTPUT));
    }

    private void summation() throws Exception {
        // Setup Paths
        Path summationExec = new Path(this.examplesInstallPath + EXAMPLE_SUMMATION_EXEC);
        Path inputPath = new Path(EXAMPLE_TMP_OUTPUT + "summation/in");
        Path outputPath = new Path(EXAMPLE_TMP_OUTPUT + "summation/out");

        // Generate Summation input
        BigDecimal sum = writeSummationInputFile(fs, inputPath);

        // Run Summation example
        runProgram(getSummationJob(configuration), summationExec, inputPath, outputPath, 1, this.numOfGroom);

        // Verify output
        verifyOutput(configuration, outputPath, sum.doubleValue(), Math.pow(10, (DOUBLE_PRECISION * -1)));

        // Clean input and output folder
        cleanup(fs, inputPath);
        cleanup(fs, outputPath);
    }

    private void piestimation() throws Exception {
        // Setup Paths
        Path piestimatorExec = new Path(this.examplesInstallPath + EXAMPLE_PIESTIMATOR_EXEC);
        Path inputPath = new Path(EXAMPLE_TMP_OUTPUT + "piestimator/in");
        Path outputPath = new Path(EXAMPLE_TMP_OUTPUT + "piestimator/out");

        // Run PiEstimator example
        runProgram(getPiestimatorJob(configuration), piestimatorExec, inputPath, outputPath, 3, this.numOfGroom);

        // Verify output
        verifyOutput(configuration, outputPath, Math.PI, Math.pow(10, (2 * -1)));

        // Clean input and output folder
        cleanup(fs, inputPath);
        cleanup(fs, outputPath);
    }

    private void matrixMult() throws Exception {
        // Setup Paths
        Path matrixmultiplicationExec = new Path(this.examplesInstallPath + EXAMPLE_MATRIXMULTIPLICATION_EXEC);

        Path inputPath = new Path(EXAMPLE_TMP_OUTPUT + "matmult/in");
        Path outputPath = new Path(EXAMPLE_TMP_OUTPUT + "matmult/out");

        // Generate matrix dimensions
        Random rand = new Random();
        // (0-19) + 11 -> between 11-30
        int rows = rand.nextInt(20) + 11;
        int cols = rand.nextInt(20) + 11;

        // Generate MatrixMultiplication input
        double[][] matrixA = createRandomMatrix(rows, cols, rand);
        double[][] matrixB = createRandomMatrix(cols, rows, rand);

        Path matrixAPath = writeMatrix(configuration, matrixA, new Path(inputPath, "matrixA.seq"), false);
        Path transposedMatrixBPath = writeMatrix(configuration, matrixB,
                new Path(inputPath, "transposedMatrixB.seq"), true);

        // Run MatrixMultiplication example
        runProgram(getMatrixMultiplicationJob(configuration, transposedMatrixBPath), matrixmultiplicationExec,
                matrixAPath, outputPath, 2, this.numOfGroom);

        // Verify output
        double[][] matrixC = multiplyMatrix(matrixA, matrixB);
        verifyMatrixMultiplicationOutput(configuration, outputPath, matrixC);

        cleanup(fs, inputPath);
        cleanup(fs, outputPath);
    }

    static BSPJob getSummationJob(HamaConfiguration conf) throws IOException {
        BSPJob bsp = new BSPJob(conf);
        bsp.setInputFormat(KeyValueTextInputFormat.class);
        bsp.setInputKeyClass(Text.class);
        bsp.setInputValueClass(Text.class);
        bsp.setOutputFormat(SequenceFileOutputFormat.class);
        bsp.setOutputKeyClass(NullWritable.class);
        bsp.setOutputValueClass(DoubleWritable.class);
        bsp.setMessageClass(DoubleWritable.class);
        return bsp;
    }

    static BSPJob getPiestimatorJob(HamaConfiguration conf) throws IOException {
        BSPJob bsp = new BSPJob(conf);
        bsp.setInputFormat(NullInputFormat.class);
        bsp.setOutputFormat(SequenceFileOutputFormat.class);
        bsp.setOutputKeyClass(NullWritable.class);
        bsp.setOutputValueClass(DoubleWritable.class);
        bsp.setMessageClass(IntWritable.class);
        return bsp;
    }

    static BSPJob getMatrixMultiplicationJob(HamaConfiguration conf, Path transposedMatrixB) throws IOException {
        BSPJob bsp = new BSPJob(conf);
        bsp.setInputFormat(SequenceFileInputFormat.class);
        bsp.setInputKeyClass(IntWritable.class);
        bsp.setInputValueClass(PipesVectorWritable.class);
        bsp.setOutputFormat(SequenceFileOutputFormat.class);
        bsp.setOutputKeyClass(IntWritable.class);
        bsp.setOutputValueClass(PipesVectorWritable.class);
        bsp.setMessageClass(PipesKeyValueWritable.class);

        bsp.set(Constants.RUNTIME_PARTITIONING_DIR, HAMA_TMP_OUTPUT + "/parts");
        bsp.setBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, true);
        bsp.setPartitioner(PipesPartitioner.class);

        // sort sent messages
        bsp.set(MessageManager.RECEIVE_QUEUE_TYPE_CLASS, "org.apache.hama.bsp.message.queue.SortedMemoryQueue");
        bsp.set("hama.mat.mult.B.path", transposedMatrixB.toString());
        return bsp;
    }

    static BigDecimal writeSummationInputFile(FileSystem fs, Path dir) throws IOException {
        DataOutputStream out = fs.create(new Path(dir, "part0"));
        Random rand = new Random();
        double rangeMin = 0;
        double rangeMax = 100;
        BigDecimal sum = new BigDecimal(0);
        // loop between 50 and 149 times
        for (int i = 0; i < rand.nextInt(100) + 50; i++) {
            // generate key value pair inputs
            double randomValue = rangeMin + (rangeMax - rangeMin) * rand.nextDouble();
            String truncatedValue = new BigDecimal(randomValue).setScale(DOUBLE_PRECISION, BigDecimal.ROUND_DOWN)
                    .toString();

            String line = "key" + (i + 1) + "\t" + truncatedValue + "\n";
            out.writeBytes(line);

            sum = sum.add(new BigDecimal(truncatedValue));
        }
        out.close();
        return sum;
    }

    static double[][] createRandomMatrix(int rows, int columns, Random rand) {
        final double[][] matrix = new double[rows][columns];
        double rangeMin = 0;
        double rangeMax = 100;

        for (int i = 0; i < rows; i++) {
            for (int j = 0; j < columns; j++) {
                double randomValue = rangeMin + (rangeMax - rangeMin) * rand.nextDouble();
                matrix[i][j] = new BigDecimal(randomValue).setScale(DOUBLE_PRECISION, BigDecimal.ROUND_DOWN)
                        .doubleValue();
            }
        }
        return matrix;
    }

    static Path writeMatrix(Configuration conf, double[][] matrix, Path path, boolean saveTransposed) {
        // Write matrix to DFS
        SequenceFile.Writer writer = null;
        try {
            writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, PipesVectorWritable.class);

            // Transpose Matrix before saving
            if (saveTransposed) {
                int rows = matrix.length;
                int columns = matrix[0].length;
                double[][] transposed = new double[columns][rows];
                for (int i = 0; i < rows; i++) {
                    for (int j = 0; j < columns; j++) {
                        transposed[j][i] = matrix[i][j];
                    }
                }
                matrix = transposed;
            }

            LOG.info("writeRandomDistributedRowMatrix path: " + path + " saveTransposed: " + saveTransposed);
            for (int i = 0; i < matrix.length; i++) {
                DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]);
                writer.append(new IntWritable(i), new PipesVectorWritable(rowVector));
                LOG.info("IntWritable: " + i + " PipesVectorWritable: " + rowVector.toString());
            }

        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

        return path;
    }

    static double[][] multiplyMatrix(double[][] matrixA, double[][] matrixB) {
        final double[][] matrixC = new double[matrixA.length][matrixB[0].length];
        int m = matrixA.length;
        int n = matrixA[0].length;
        int p = matrixB[0].length;

        for (int k = 0; k < n; k++) {
            for (int i = 0; i < m; i++) {
                for (int j = 0; j < p; j++) {
                    matrixC[i][j] = matrixC[i][j] + matrixA[i][k] * matrixB[k][j];
                }
            }
        }
        return matrixC;
    }

    static void verifyOutput(HamaConfiguration conf, Path outputPath, String[] expectedResults) throws IOException {
        FileStatus[] listStatus = fs.listStatus(outputPath);
        for (FileStatus status : listStatus) {
            if (!status.isDir()) {
                if (status.getLen() > 0) {
                    LOG.info("Output File: " + status.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    try {
                        String line = "";
                        int i = 0;
                        while ((line = br.readLine()) != null) {
                            LOG.info("output[" + i + "]: '" + line + "'");
                            LOG.info("expected[" + i + "]: '" + expectedResults[i] + "'");
                            assertEquals("'" + expectedResults[i] + "' != '" + line + "'", expectedResults[i],
                                    line);
                            i++;
                        }
                    } finally {
                        br.close();
                    }
                }
            }
        }
    }

    static void verifyOutput(HamaConfiguration conf, Path outputPath, double expectedResult, double delta)
            throws IOException {
        FileStatus[] listStatus = fs.listStatus(outputPath);
        for (FileStatus status : listStatus) {
            if (!status.isDir()) {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), conf);
                NullWritable key = NullWritable.get();
                DoubleWritable value = new DoubleWritable();
                if (reader.next(key, value)) {
                    LOG.info("Output File: " + status.getPath());
                    LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + expectedResult + "'");
                    assertEquals("Expected value: '" + expectedResult + "' != '" + value + "'", expectedResult,
                            value.get(), delta);
                }
                reader.close();
            }
        }
    }

    static void verifyMatrixMultiplicationOutput(HamaConfiguration conf, Path outputPath, double[][] matrix)
            throws IOException {
        FileStatus[] listStatus = fs.listStatus(outputPath);
        for (FileStatus status : listStatus) {
            if (!status.isDir()) {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), conf);
                IntWritable key = new IntWritable();
                PipesVectorWritable value = new PipesVectorWritable();
                int rowIdx = 0;
                while (reader.next(key, value)) {
                    assertEquals("Expected rowIdx: '" + rowIdx + "' != '" + key.get() + "'", rowIdx, key.get());

                    DoubleVector rowVector = value.getVector();

                    for (int colIdx = 0; colIdx < rowVector.getLength(); colIdx++) {
                        double colValue = rowVector.get(colIdx);
                        assertEquals(
                                "Expected colValue: '" + matrix[rowIdx][colIdx] + "' != '" + colValue + "' in row: "
                                        + rowIdx + " values: " + rowVector.toString(),
                                matrix[rowIdx][colIdx], colValue, Math.pow(10, (DOUBLE_PRECISION * -1)));
                    }
                    rowIdx++;
                }
                reader.close();
            }
        }
    }

    static void cleanup(FileSystem fs, Path p) throws IOException {
        fs.delete(p, true);
        assertFalse(p.getName() + " not cleaned up", fs.exists(p));
    }

    static void runProgram(BSPJob bsp, Path program, Path inputPath, Path outputPath, int numBspTasks,
            int numOfGroom) throws IOException, ClassNotFoundException, InterruptedException {
        HamaConfiguration conf = (HamaConfiguration) bsp.getConfiguration();
        bsp.setJobName("Test Hama Pipes " + program.getName());
        bsp.setBspClass(PipesBSP.class);

        FileInputFormat.setInputPaths(bsp, inputPath);
        FileOutputFormat.setOutputPath(bsp, outputPath);

        BSPJobClient jobClient = new BSPJobClient(conf);

        // Set bspTaskNum
        ClusterStatus cluster = jobClient.getClusterStatus(false);
        assertEquals(numOfGroom, cluster.getGroomServers());
        bsp.setNumBspTask(numBspTasks);

        // Copy binary to DFS
        Path testExec = new Path(EXAMPLE_TMP_OUTPUT + "testing/bin/application");
        fs.delete(testExec.getParent(), true);
        fs.copyFromLocalFile(program, testExec);

        // Set Executable
        Submitter.setExecutable(conf, fs.makeQualified(testExec).toString());

        // Run bspJob
        Submitter.runJob(bsp);

        LOG.info("Client finishes execution job");

        // check output
        FileStatus[] listStatus = fs.listStatus(outputPath);
        // TODO check whether you can force the number of tasks.
        System.out.println(listStatus.length + ", " + numBspTasks);
    }
}