Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU//  w w w .  ja  v  a  2s. c  o  m
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(initialConf,
            this.rowPath, other.rowPath, outPath.getParent());

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU//from  w  ww . java  2s . co m
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyBSP(DistributedRowMatrix other, Path outPath, int tileWidth,
        boolean isDebugging) throws IOException, ClassNotFoundException, InterruptedException {

    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new IOException("Cols of MatrixA != rows of MatrixB! (" + numCols + "!=" + other.numRows() + ")");
    }

    Configuration initialConf = (getConf() == null) ? new HamaConfiguration() : getConf();

    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(initialConf,
            this.rowPath, other.rowPath, outPath.getParent(), tileWidth, isDebugging);

    // Multiply Matrix
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Configuration conf = job.getConfiguration();
        FileSystem fs = outPath.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(outPath.getParent());
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), outPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows,
                other.numCols());
        out.setConf(conf);

        return out;
    }

    return null;
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.MatrixMultiplicationHybridBenchmark.java

License:Apache License

static void printOutput(Configuration conf) throws IOException {
    FileSystem fs = CONF_OUTPUT_DIR.getFileSystem(conf);
    FileStatus[] files = fs.listStatus(CONF_OUTPUT_DIR);
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            System.out.println("File " + files[i].getPath());
            FSDataInputStream in = fs.open(files[i].getPath());
            IOUtils.copyBytes(in, System.out, conf, false);
            in.close();//from w  ww  .  java  2 s.c om
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.MatrixMultiplicationHybridBSP.java

License:Apache License

public static void main(String[] args) throws Exception {

    // Defaults/*from  w  w  w .  j  a  va 2 s .c  om*/
    int numBspTask = 1;
    int numGpuBspTask = 1;
    int numRowsA = 4;// 1024;
    int numColsA = 4;// 1024;
    int numRowsB = 4;// 1024;
    int numColsB = 4;// 1024;
    int tileWidth = 32; // 2 * 32 = 1024 threads matches the blocksize
    int GPUPercentage = 100;
    boolean isDebugging = true;

    Configuration conf = new HamaConfiguration();

    if (args.length > 0) {
        if (args.length == 9) {
            numBspTask = Integer.parseInt(args[0]);
            numGpuBspTask = Integer.parseInt(args[1]);
            numRowsA = Integer.parseInt(args[2]);
            numColsA = Integer.parseInt(args[3]);
            numRowsB = Integer.parseInt(args[4]);
            numColsB = Integer.parseInt(args[5]);
            tileWidth = Integer.parseInt(args[6]);
            GPUPercentage = Integer.parseInt(args[7]);
            isDebugging = Boolean.parseBoolean(args[8]);

        } else {
            System.out.println("Wrong argument size!");
            System.out.println("    Argument1=numBspTask");
            System.out.println("    Argument2=numGpuBspTask");
            System.out.println("    Argument3=numRowsA | Number of rows of the first input matrix");
            System.out.println("    Argument4=numColsA | Number of columns of the first input matrix");
            System.out.println("    Argument5=numRowsB | Number of rows of the second input matrix");
            System.out.println("    Argument6=numColsB | Number of columns of the second input matrix");
            System.out.println("    Argument7=tileWidth | TileWidth denotes the size of a submatrix");
            System.out.println("    Argument8=GPUPercentage (percentage of input)");
            System.out.println("    Argument9=debug | Enable debugging (true|false)");
            return;
        }
    }

    // Set config variables
    conf.setBoolean("hama.pipes.logging", false);
    // Set CPU tasks
    conf.setInt("bsp.peers.num", numBspTask);
    // Set GPU tasks
    conf.setInt("bsp.peers.gpu.num", numGpuBspTask);
    // Set GPU workload
    // conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage);

    LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0));
    LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0));
    LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum"));
    // LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE));
    LOG.info("numRowsA: " + numRowsA);
    LOG.info("numColsA: " + numColsA);
    LOG.info("numRowsB: " + numRowsB);
    LOG.info("numColsB: " + numColsB);
    LOG.info("isDebugging: " + isDebugging);
    LOG.info("inputPath: " + CONF_INPUT_DIR);
    LOG.info("outputPath: " + CONF_OUTPUT_DIR);

    if (numColsA != numRowsB) {
        throw new Exception("Cols of MatrixA != rows of MatrixB! (" + numColsA + "!=" + numRowsB + ")");
    }

    // Create random DistributedRowMatrix
    // use constant seeds to get reproducible results
    // Matrix A
    DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA, new Random(42L),
            MATRIX_A_SPLITS_PATH, false, numBspTask, numGpuBspTask, GPUPercentage);

    // Matrix B is stored in transposed order
    List<Path> transposedMatrixBPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB,
            numColsB, new Random(1337L), MATRIX_B_TRANSPOSED_PATH, true);

    // Execute MatrixMultiplication BSP Job
    long startTime = System.currentTimeMillis();

    BSPJob job = MatrixMultiplicationHybridBSP.createMatrixMultiplicationHybridBSPConf(conf,
            MATRIX_A_SPLITS_PATH, transposedMatrixBPaths.get(0), MATRIX_C_PATH, tileWidth, isDebugging);

    // Multiply Matrix
    DistributedRowMatrix matrixC = null;
    if (job.waitForCompletion(true)) {

        // Rename result file to output path
        Path matrixCOutPath = new Path(MATRIX_C_PATH + "/part0.seq");

        FileSystem fs = MATRIX_C_PATH.getFileSystem(conf);
        FileStatus[] files = fs.listStatus(MATRIX_C_PATH);
        for (int i = 0; i < files.length; i++) {
            if ((files[i].getPath().getName().startsWith("part-")) && (files[i].getLen() > 97)) {
                fs.rename(files[i].getPath(), matrixCOutPath);
                break;
            }
        }

        // Read resulting Matrix from HDFS
        matrixC = new DistributedRowMatrix(matrixCOutPath, MATRIX_C_PATH, numRowsA, numColsB);
        matrixC.setConf(conf);
    }

    LOG.info("MatrixMultiplicationHybrid using Hama finished in "
            + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    // Create matrix A in one file for verification
    List<Path> matrixAPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsA, numColsA,
            new Random(42L), MATRIX_A_PATH, false);
    DistributedRowMatrix matrixA = new DistributedRowMatrix(matrixAPaths.get(0), CONF_INPUT_DIR, numRowsA,
            numColsA);
    matrixA.setConf(conf);

    // Create matrix B, NOT transposed for verification
    List<Path> matrixBPaths = DistributedRowMatrix.createRandomDistributedRowMatrix(conf, numRowsB, numColsB,
            new Random(1337L), MATRIX_B_PATH, false);
    DistributedRowMatrix matrixB = new DistributedRowMatrix(matrixBPaths.get(0), CONF_INPUT_DIR, numRowsB,
            numColsB);
    matrixB.setConf(conf);

    // Verification
    DistributedRowMatrix matrixD = matrixA.multiplyJava(matrixB, MATRIX_D_PATH);
    if (matrixC.verify(matrixD)) {
        System.out.println("Verify PASSED!");
    } else {
        System.out.println("Verify FAILED!");
    }

    if (isDebugging) {
        System.out.println("\nMatrix A:");
        matrixA.printDistributedRowMatrix();
        System.out.println("\nMatrix B:");
        matrixB.printDistributedRowMatrix();

        System.out.println("\nTransposedMatrix B:");
        // Load DistributedRowMatrix transposedMatrixB
        DistributedRowMatrix transposedMatrixB = new DistributedRowMatrix(transposedMatrixBPaths.get(0),
                CONF_INPUT_DIR, numColsB, numRowsB);
        transposedMatrixB.setConf(conf);
        transposedMatrixB.printDistributedRowMatrix();

        System.out.println("\nMatrix C:");
        matrixC.printDistributedRowMatrix();
        System.out.println("\nMatrix D:");
        matrixD.printDistributedRowMatrix();

        // Print out log files
        printOutput(conf);
    }
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.MatrixMultiplicationHybridBSP.java

License:Apache License

static void printOutput(Configuration conf) throws IOException {
    FileSystem fs = CONF_OUTPUT_DIR.getFileSystem(conf);
    FileStatus[] files = fs.listStatus(CONF_OUTPUT_DIR);
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            if (files[i].getPath().getName().endsWith(".log")) {
                System.out.println("File " + files[i].getPath());
                FSDataInputStream in = fs.open(files[i].getPath());
                IOUtils.copyBytes(in, System.out, conf, false);
                in.close();//from  w w  w .j  av a 2s.  c o m
            }
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.hybrid.examples.piestimator.PiEstimatorHybridBenchmark.java

License:Apache License

static void printOutput(Configuration conf, Path path) throws IOException {
    System.out.println("printOutput: " + path);
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] files = fs.listStatus(path);
    if (files != null) {
        for (int i = 0; i < files.length; i++) {
            if (files[i].getLen() > 0) {
                System.out.println("File " + files[i].getPath());
                FSDataInputStream in = fs.open(files[i].getPath());
                IOUtils.copyBytes(in, System.out, conf, false);
                in.close();/*  w w  w .  j  a va 2  s  .  c  o m*/
            }
        }
    } else {
        System.out.println("No directory available: " + path);
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.hybrid.examples.piestimator.PiEstimatorHybridBSP.java

License:Apache License

static void printOutput(BSPJob job) throws IOException {
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FileStatus[] files = fs.listStatus(FileOutputFormat.getOutputPath(job));
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            System.out.println("File " + files[i].getPath());
            FSDataInputStream in = fs.open(files[i].getPath());
            IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
            in.close();/*w  w  w. ja va2  s.com*/
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.hybrid.examples.summation.SummationBSP.java

License:Apache License

static void printOutput(BSPJob job, BigDecimal sum) throws IOException {
    FileSystem fs = FileSystem.get(job.getConfiguration());
    FileStatus[] listStatus = fs.listStatus(FileOutputFormat.getOutputPath(job));
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            try {
                SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(),
                        job.getConfiguration());

                Text key = new Text();
                DoubleWritable value = new DoubleWritable();

                if (reader.next(key, value)) {
                    LOG.info("Output File: " + status.getPath());
                    LOG.info("key: '" + key + "' value: '" + value + "' expected: '" + sum.doubleValue() + "'");
                    Assert.assertEquals("Expected value: '" + sum.doubleValue() + "' != '" + value + "'",
                            sum.doubleValue(), value.get(), Math.pow(10, (DOUBLE_PRECISION * -1)));
                }//from  w  w  w  .j av  a2 s . c o  m
                reader.close();

            } catch (IOException e) {
                if (status.getLen() > 0) {
                    System.out.println("Output File " + status.getPath());
                    FSDataInputStream in = fs.open(status.getPath());
                    IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
                    in.close();
                }
            }
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.hybrid.examples.testglobalgpusync.TestGlobalGpuSyncHybridBSP.java

License:Apache License

static void printOutput(BSPJob job, FileSystem fs, Path path) throws IOException {
    FileStatus[] files = fs.listStatus(path);
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            System.out.println("File " + files[i].getPath());
            FSDataInputStream in = fs.open(files[i].getPath());
            IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
            in.close();//from ww w. j  a  va  2 s  .  c  om
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}

From source file:at.illecker.hama.hybrid.examples.testrootbeer.TestRootbeerHybridBSP.java

License:Apache License

static void printOutput(BSPJob job, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    FileStatus[] files = fs.listStatus(path);
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            System.out.println("File " + files[i].getPath());
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, files[i].getPath(), job.getConfiguration());

                NullWritable key = NullWritable.get();
                IntWritable value = new IntWritable();
                while (reader.next(key, value)) {
                    System.out.println("key: '" + key + "' value: '" + value.get() + "'\n");
                }//  w  w w.j  a v  a2s .  c o  m
            } catch (IOException e) {
                FSDataInputStream in = fs.open(files[i].getPath());
                IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
                in.close();
            } finally {
                if (reader != null) {
                    reader.close();
                }
            }
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}