Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

public static boolean isHDFSDirectory(String dir) throws IOException {
    FileSystem fs = FileSystem.get(_rJob);
    Path pth = new Path(dir);
    FileStatus fstat = fs.getFileStatus(pth);
    return fstat.isDirectory();
}

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

public static boolean isFileEmpty(FileSystem fs, String dir) throws IOException {
    Path pth = new Path(dir);
    FileStatus fstat = fs.getFileStatus(pth);

    if (fstat.isDirectory()) {
        // it is a directory
        FileStatus[] stats = fs.listStatus(pth);
        if (stats != null) {
            for (FileStatus stat : stats) {
                if (stat.getLen() > 0)
                    return false;
            }//from   ww  w . ja  v  a2  s .  c  om
            return true;
        } else {
            return true;
        }
    } else {
        // it is a regular file
        if (fstat.getLen() == 0)
            return true;
        else
            return false;
    }
}

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

/**
 * Returns the size of a file or directory on hdfs in bytes.
 * /*from   www .j  a  v  a 2s .c  om*/
 * @param path
 * @return
 * @throws IOException
 */
public static long getFilesizeOnHDFS(Path path) throws IOException {
    FileSystem fs = FileSystem.get(_rJob);
    long ret = 0; //in bytes
    if (fs.isDirectory(path))
        ret = fs.getContentSummary(path).getLength();
    else
        ret = fs.getFileStatus(path).getLen();
    //note: filestatus would return 0 on directories

    return ret;
}

From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java

License:Open Source License

public static MatrixCharacteristics[] processDimsFiles(String dir, MatrixCharacteristics[] stats)
        throws IOException {
    Path pt = new Path(dir);
    FileSystem fs = FileSystem.get(_rJob);

    if (!fs.exists(pt))
        return stats;

    FileStatus fstat = fs.getFileStatus(pt);

    if (fstat.isDirectory()) {
        FileStatus[] files = fs.listStatus(pt);
        for (int i = 0; i < files.length; i++) {
            Path filePath = files[i].getPath();
            //System.out.println("Processing dims file: " + filePath.toString());
            BufferedReader br = setupInputFile(filePath.toString());

            String line = "";
            while ((line = br.readLine()) != null) {
                String[] parts = line.split(" ");
                int resultIndex = Integer.parseInt(parts[0]);
                long maxRows = Long.parseLong(parts[1]);
                long maxCols = Long.parseLong(parts[2]);

                stats[resultIndex].setDimension(
                        (stats[resultIndex].getRows() < maxRows ? maxRows : stats[resultIndex].getRows()),
                        (stats[resultIndex].getCols() < maxCols ? maxCols : stats[resultIndex].getCols()));
            }//  ww w .  ja  v a2 s .com

            br.close();
        }
    } else {
        throw new IOException(dir + " is expected to be a folder!");
    }

    return stats;
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>//from   w  ww  . j ava  2  s .co m
 * Checks a matrix read from a file in text format against a number of
 * specifications.
 * </p>
 * 
 * @param outDir
 *            directory containing the matrix
 * @param rows
 *            number of rows
 * @param cols
 *            number of columns
 * @param min
 *            minimum value
 * @param max
 *            maximum value
 */
public static void checkMatrix(String outDir, long rows, long cols, double min, double max) {
    try {
        FileSystem fs = FileSystem.get(conf);
        Path outDirectory = new Path(outDir);
        assertTrue(outDir + " does not exist", fs.exists(outDirectory));

        if (fs.getFileStatus(outDirectory).isDirectory()) {
            FileStatus[] outFiles = fs.listStatus(outDirectory);
            for (FileStatus file : outFiles) {
                FSDataInputStream fsout = fs.open(file.getPath());
                BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

                String line;
                while ((line = outIn.readLine()) != null) {
                    String[] rcv = line.split(" ");
                    long row = Long.parseLong(rcv[0]);
                    long col = Long.parseLong(rcv[1]);
                    double value = Double.parseDouble(rcv[2]);
                    assertTrue("invalid row index", (row > 0 && row <= rows));
                    assertTrue("invlaid column index", (col > 0 && col <= cols));
                    assertTrue("invalid value", ((value >= min && value <= max) || value == 0));
                }
                outIn.close();
            }
        } else {
            FSDataInputStream fsout = fs.open(outDirectory);
            BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout));

            String line;
            while ((line = outIn.readLine()) != null) {
                String[] rcv = line.split(" ");
                long row = Long.parseLong(rcv[0]);
                long col = Long.parseLong(rcv[1]);
                double value = Double.parseDouble(rcv[2]);
                assertTrue("invalid row index", (row > 0 && row <= rows));
                assertTrue("invlaid column index", (col > 0 && col <= cols));
                assertTrue("invalid value", ((value >= min && value <= max) || value == 0));
            }
            outIn.close();
        }
    } catch (IOException e) {
        fail("unable to read file: " + e.getMessage());
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>/* w w  w  .ja  v  a2s  .c  o m*/
 * Removes all the directories specified in the array in HDFS
 * </p>
 * 
 * @param directories
 *            directories array
 */
public static void removeHDFSDirectories(String[] directories) {
    try {
        FileSystem fs = FileSystem.get(conf);
        for (String directory : directories) {
            Path dir = new Path(directory);
            if (fs.exists(dir) && fs.getFileStatus(dir).isDirectory()) {
                fs.delete(dir, true);
            }
        }
    } catch (IOException e) {
    }
}

From source file:com.ibm.bi.dml.test.utils.TestUtils.java

License:Open Source License

/**
 * <p>/*from  w  w  w.  j a  v  a2 s .  com*/
 * Removes all the files specified in the array in HDFS
 * </p>
 * 
 * @param files
 *            files array
 */
public static void removeHDFSFiles(String[] files) {
    try {
        FileSystem fs = FileSystem.get(conf);
        for (String directory : files) {
            Path dir = new Path(directory);
            if (fs.exists(dir) && !fs.getFileStatus(dir).isDirectory()) {
                fs.delete(dir, false);
            }
        }
    } catch (IOException e) {
    }
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

public void readSequentialDirect() throws Exception {
    System.out.println("reading sequential file in direct mode " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileStatus status = fs.getFileStatus(path);
    FSDataInputStream instream = fs.open(path);
    ByteBuffer buf = ByteBuffer.allocateDirect(size);
    buf.clear();//  w  ww.java  2s.  co m
    double sumbytes = 0;
    double ops = 0;
    System.out.println("file capacity " + status.getLen());
    System.out.println("read size " + size);
    System.out.println("operations " + loop);

    long start = System.currentTimeMillis();
    while (ops < loop) {
        buf.clear();
        double ret = (double) instream.read(buf);
        if (ret > 0) {
            sumbytes = sumbytes + ret;
            ops = ops + 1.0;
        } else {
            ops = ops + 1.0;
            if (instream.getPos() == 0) {
                break;
            } else {
                instream.seek(0);
            }
        }
    }
    long end = System.currentTimeMillis();
    double executionTime = ((double) (end - start)) / 1000.0;
    double throughput = 0.0;
    double latency = 0.0;
    double sumbits = sumbytes * 8.0;
    if (executionTime > 0) {
        throughput = sumbits / executionTime / 1024.0 / 1024.0;
        latency = 1000000.0 * executionTime / ops;
    }
    System.out.println("execution time " + executionTime);
    System.out.println("ops " + ops);
    System.out.println("sumbytes " + sumbytes);
    System.out.println("throughput " + throughput);
    System.out.println("latency " + latency);
    System.out.println("closing stream");
    instream.close();
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

public void readSequentialHeap() throws Exception {
    System.out.println("reading sequential file in heap mode " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileStatus status = fs.getFileStatus(path);
    FSDataInputStream instream = fs.open(path);
    byte[] buf = new byte[size];
    double sumbytes = 0;
    double ops = 0;
    System.out.println("file capacity " + status.getLen());
    System.out.println("read size " + size);
    System.out.println("operations " + loop);

    long start = System.currentTimeMillis();
    while (ops < loop) {
        double ret = (double) this.read(instream, buf);
        if (ret > 0) {
            sumbytes = sumbytes + ret;//from   w  ww . j a v  a2  s.  com
            ops = ops + 1.0;
        } else {
            ops = ops + 1.0;
            if (instream.getPos() == 0) {
                break;
            } else {
                instream.seek(0);
            }
        }
    }
    long end = System.currentTimeMillis();
    double executionTime = ((double) (end - start)) / 1000.0;
    double throughput = 0.0;
    double latency = 0.0;
    double sumbits = sumbytes * 8.0;
    if (executionTime > 0) {
        throughput = sumbits / executionTime / 1024.0 / 1024.0;
        latency = 1000000.0 * executionTime / ops;
    }
    System.out.println("execution time " + executionTime);
    System.out.println("ops " + ops);
    System.out.println("sumbytes " + sumbytes);
    System.out.println("throughput " + throughput);
    System.out.println("latency " + latency);
    System.out.println("closing stream");
    instream.close();
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

public void readRandomDirect() throws Exception {
    System.out.println("reading random file in direct mode " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileStatus status = fs.getFileStatus(path);
    FSDataInputStream instream = fs.open(path);
    ByteBuffer buf = ByteBuffer.allocateDirect(size);
    buf.clear();/*from  w w w. j av a2  s. c  o  m*/
    double sumbytes = 0;
    double ops = 0;
    long _range = status.getLen() - ((long) buf.capacity());
    double range = (double) _range;
    Random random = new Random();

    System.out.println("file capacity " + status.getLen());
    System.out.println("read size " + size);
    System.out.println("operations " + loop);
    long start = System.currentTimeMillis();
    while (ops < loop) {
        buf.clear();
        double _offset = range * random.nextDouble();
        long offset = (long) _offset;
        instream.seek(offset);
        double ret = (double) instream.read(buf);
        if (ret > 0) {
            sumbytes = sumbytes + ret;
            ops = ops + 1.0;
        } else {
            break;
        }
    }
    long end = System.currentTimeMillis();
    double executionTime = ((double) (end - start)) / 1000.0;
    double throughput = 0.0;
    double latency = 0.0;
    double sumbits = sumbytes * 8.0;
    if (executionTime > 0) {
        throughput = sumbits / executionTime / 1024.0 / 1024.0;
        latency = 1000000.0 * executionTime / ops;
    }

    System.out.println("execution time " + executionTime);
    System.out.println("ops " + ops);
    System.out.println("sumbytes " + sumbytes);
    System.out.println("throughput " + throughput);
    System.out.println("latency " + latency);
    System.out.println("closing stream");
    instream.close();
    fs.close();
}