List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java
License:Open Source License
public static boolean isHDFSDirectory(String dir) throws IOException { FileSystem fs = FileSystem.get(_rJob); Path pth = new Path(dir); FileStatus fstat = fs.getFileStatus(pth); return fstat.isDirectory(); }
From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java
License:Open Source License
public static boolean isFileEmpty(FileSystem fs, String dir) throws IOException { Path pth = new Path(dir); FileStatus fstat = fs.getFileStatus(pth); if (fstat.isDirectory()) { // it is a directory FileStatus[] stats = fs.listStatus(pth); if (stats != null) { for (FileStatus stat : stats) { if (stat.getLen() > 0) return false; }//from ww w . ja v a2 s . c om return true; } else { return true; } } else { // it is a regular file if (fstat.getLen() == 0) return true; else return false; } }
From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java
License:Open Source License
/** * Returns the size of a file or directory on hdfs in bytes. * /*from www .j a v a 2s .c om*/ * @param path * @return * @throws IOException */ public static long getFilesizeOnHDFS(Path path) throws IOException { FileSystem fs = FileSystem.get(_rJob); long ret = 0; //in bytes if (fs.isDirectory(path)) ret = fs.getContentSummary(path).getLength(); else ret = fs.getFileStatus(path).getLen(); //note: filestatus would return 0 on directories return ret; }
From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java
License:Open Source License
public static MatrixCharacteristics[] processDimsFiles(String dir, MatrixCharacteristics[] stats) throws IOException { Path pt = new Path(dir); FileSystem fs = FileSystem.get(_rJob); if (!fs.exists(pt)) return stats; FileStatus fstat = fs.getFileStatus(pt); if (fstat.isDirectory()) { FileStatus[] files = fs.listStatus(pt); for (int i = 0; i < files.length; i++) { Path filePath = files[i].getPath(); //System.out.println("Processing dims file: " + filePath.toString()); BufferedReader br = setupInputFile(filePath.toString()); String line = ""; while ((line = br.readLine()) != null) { String[] parts = line.split(" "); int resultIndex = Integer.parseInt(parts[0]); long maxRows = Long.parseLong(parts[1]); long maxCols = Long.parseLong(parts[2]); stats[resultIndex].setDimension( (stats[resultIndex].getRows() < maxRows ? maxRows : stats[resultIndex].getRows()), (stats[resultIndex].getCols() < maxCols ? maxCols : stats[resultIndex].getCols())); }// ww w . ja v a2 s .com br.close(); } } else { throw new IOException(dir + " is expected to be a folder!"); } return stats; }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>//from w ww . j ava 2 s .co m * Checks a matrix read from a file in text format against a number of * specifications. * </p> * * @param outDir * directory containing the matrix * @param rows * number of rows * @param cols * number of columns * @param min * minimum value * @param max * maximum value */ public static void checkMatrix(String outDir, long rows, long cols, double min, double max) { try { FileSystem fs = FileSystem.get(conf); Path outDirectory = new Path(outDir); assertTrue(outDir + " does not exist", fs.exists(outDirectory)); if (fs.getFileStatus(outDirectory).isDirectory()) { FileStatus[] outFiles = fs.listStatus(outDirectory); for (FileStatus file : outFiles) { FSDataInputStream fsout = fs.open(file.getPath()); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); String line; while ((line = outIn.readLine()) != null) { String[] rcv = line.split(" "); long row = Long.parseLong(rcv[0]); long col = Long.parseLong(rcv[1]); double value = Double.parseDouble(rcv[2]); assertTrue("invalid row index", (row > 0 && row <= rows)); assertTrue("invlaid column index", (col > 0 && col <= cols)); assertTrue("invalid value", ((value >= min && value <= max) || value == 0)); } outIn.close(); } } else { FSDataInputStream fsout = fs.open(outDirectory); BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout)); String line; while ((line = outIn.readLine()) != null) { String[] rcv = line.split(" "); long row = Long.parseLong(rcv[0]); long col = Long.parseLong(rcv[1]); double value = Double.parseDouble(rcv[2]); assertTrue("invalid row index", (row > 0 && row <= rows)); assertTrue("invlaid column index", (col > 0 && col <= cols)); assertTrue("invalid value", ((value >= min && value <= max) || value == 0)); } outIn.close(); } } catch (IOException e) { fail("unable to read file: " + e.getMessage()); } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>/* w w w .ja v a2s .c o m*/ * Removes all the directories specified in the array in HDFS * </p> * * @param directories * directories array */ public static void removeHDFSDirectories(String[] directories) { try { FileSystem fs = FileSystem.get(conf); for (String directory : directories) { Path dir = new Path(directory); if (fs.exists(dir) && fs.getFileStatus(dir).isDirectory()) { fs.delete(dir, true); } } } catch (IOException e) { } }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>/*from w w w. j a v a2 s . com*/ * Removes all the files specified in the array in HDFS * </p> * * @param files * files array */ public static void removeHDFSFiles(String[] files) { try { FileSystem fs = FileSystem.get(conf); for (String directory : files) { Path dir = new Path(directory); if (fs.exists(dir) && !fs.getFileStatus(dir).isDirectory()) { fs.delete(dir, false); } } } catch (IOException e) { } }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void readSequentialDirect() throws Exception { System.out.println("reading sequential file in direct mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(path); FSDataInputStream instream = fs.open(path); ByteBuffer buf = ByteBuffer.allocateDirect(size); buf.clear();// w ww.java 2s. co m double sumbytes = 0; double ops = 0; System.out.println("file capacity " + status.getLen()); System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { buf.clear(); double ret = (double) instream.read(buf); if (ret > 0) { sumbytes = sumbytes + ret; ops = ops + 1.0; } else { ops = ops + 1.0; if (instream.getPos() == 0) { break; } else { instream.seek(0); } } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void readSequentialHeap() throws Exception { System.out.println("reading sequential file in heap mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(path); FSDataInputStream instream = fs.open(path); byte[] buf = new byte[size]; double sumbytes = 0; double ops = 0; System.out.println("file capacity " + status.getLen()); System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { double ret = (double) this.read(instream, buf); if (ret > 0) { sumbytes = sumbytes + ret;//from w ww . j a v a2 s. com ops = ops + 1.0; } else { ops = ops + 1.0; if (instream.getPos() == 0) { break; } else { instream.seek(0); } } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void readRandomDirect() throws Exception { System.out.println("reading random file in direct mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(path); FSDataInputStream instream = fs.open(path); ByteBuffer buf = ByteBuffer.allocateDirect(size); buf.clear();/*from w w w. j av a2 s. c o m*/ double sumbytes = 0; double ops = 0; long _range = status.getLen() - ((long) buf.capacity()); double range = (double) _range; Random random = new Random(); System.out.println("file capacity " + status.getLen()); System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { buf.clear(); double _offset = range * random.nextDouble(); long offset = (long) _offset; instream.seek(offset); double ret = (double) instream.read(buf); if (ret > 0) { sumbytes = sumbytes + ret; ops = ops + 1.0; } else { break; } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }