List of usage examples for org.apache.hadoop.fs FileSystem close
@Override public void close() throws IOException
From source file:com.ibm.bi.dml.runtime.transform.MVImputeAgent.java
License:Open Source License
/** * Method to load transform metadata for all attributes * /* www . j a v a 2s . co m*/ * @param job * @throws IOException */ @Override public void loadTxMtd(JobConf job, FileSystem fs, Path tfMtdDir, TfUtils agents) throws IOException { if (fs.isDirectory(tfMtdDir)) { // Load information about missing value imputation if (_mvList != null) for (int i = 0; i < _mvList.length; i++) { int colID = _mvList[i]; if (_mvMethodList[i] == 1 || _mvMethodList[i] == 2) // global_mean or global_mode _replacementList[i] = readReplacement(colID, fs, tfMtdDir, agents); else if (_mvMethodList[i] == 3) { // constant: replace a missing value by a given constant // nothing to do. The constant values are loaded already during configure } else throw new RuntimeException("Invalid Missing Value Imputation methods: " + _mvMethodList[i]); } // Load scaling information if (_mvList != null) for (int i = 0; i < _mvList.length; i++) if (_isMVScaled.get(i)) processScalingFile(i, _mvList, _meanList, _varList, fs, tfMtdDir, agents); if (_scnomvList != null) for (int i = 0; i < _scnomvList.length; i++) processScalingFile(i, _scnomvList, _scnomvMeanList, _scnomvVarList, fs, tfMtdDir, agents); } else { fs.close(); throw new RuntimeException("Path to recode maps must be a directory: " + tfMtdDir); } }
From source file:com.ibm.bi.dml.runtime.transform.RecodeAgent.java
License:Open Source License
/** * Method to load recode maps of all attributes, at once. * //from w w w . ja v a2 s .c o m * @param job * @throws IOException */ @Override public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException { if (_rcdList == null) return; _finalMaps = new HashMap<Integer, HashMap<String, String>>(); if (fs.isDirectory(txMtdDir)) { for (int i = 0; i < _rcdList.length; i++) { int colID = _rcdList[i]; Path path = new Path(txMtdDir + "/Recode/" + agents.getName(colID) + RCD_MAP_FILE_SUFFIX); TfUtils.checkValidInputFile(fs, path, true); HashMap<String, String> map = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path))); String line = null, word = null; String rcdIndex = null; // Example line to parse: "WN (1)67492",1,61975 while ((line = br.readLine()) != null) { // last occurrence of quotation mark int idxQuote = line.lastIndexOf('"'); word = UtilFunctions.unquote(line.substring(0, idxQuote + 1)); int idx = idxQuote + 2; while (line.charAt(idx) != TXMTD_SEP.charAt(0)) idx++; rcdIndex = line.substring(idxQuote + 2, idx); map.put(word, rcdIndex); } br.close(); _finalMaps.put(colID, map); } } else { fs.close(); throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir); } }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void writeSequentialHeap() throws Exception { System.out.println("writing sequential file in heap mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FSDataOutputStream instream = fs.create(path); byte[] buf = new byte[size]; double sumbytes = 0; double ops = 0; System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { // System.out.println("writing data, len " + buf.length); instream.write(buf, 0, buf.length); sumbytes = sumbytes + buf.length; ops = ops + 1.0;/*from w ww. j av a2 s .c om*/ } instream.flush(); long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void readSequentialDirect() throws Exception { System.out.println("reading sequential file in direct mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(path); FSDataInputStream instream = fs.open(path); ByteBuffer buf = ByteBuffer.allocateDirect(size); buf.clear();//from w w w .jav a 2 s .c o m double sumbytes = 0; double ops = 0; System.out.println("file capacity " + status.getLen()); System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { buf.clear(); double ret = (double) instream.read(buf); if (ret > 0) { sumbytes = sumbytes + ret; ops = ops + 1.0; } else { ops = ops + 1.0; if (instream.getPos() == 0) { break; } else { instream.seek(0); } } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void readSequentialHeap() throws Exception { System.out.println("reading sequential file in heap mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(path); FSDataInputStream instream = fs.open(path); byte[] buf = new byte[size]; double sumbytes = 0; double ops = 0; System.out.println("file capacity " + status.getLen()); System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { double ret = (double) this.read(instream, buf); if (ret > 0) { sumbytes = sumbytes + ret;/*from ww w .j a va 2 s .c om*/ ops = ops + 1.0; } else { ops = ops + 1.0; if (instream.getPos() == 0) { break; } else { instream.seek(0); } } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void readRandomDirect() throws Exception { System.out.println("reading random file in direct mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(path); FSDataInputStream instream = fs.open(path); ByteBuffer buf = ByteBuffer.allocateDirect(size); buf.clear();//from ww w . j av a 2 s . com double sumbytes = 0; double ops = 0; long _range = status.getLen() - ((long) buf.capacity()); double range = (double) _range; Random random = new Random(); System.out.println("file capacity " + status.getLen()); System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { buf.clear(); double _offset = range * random.nextDouble(); long offset = (long) _offset; instream.seek(offset); double ret = (double) instream.read(buf); if (ret > 0) { sumbytes = sumbytes + ret; ops = ops + 1.0; } else { break; } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
public void readRandomHeap() throws Exception { System.out.println("reading random file in heap mode " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(path); FSDataInputStream instream = fs.open(path); byte[] buf = new byte[size]; double sumbytes = 0; double ops = 0; long _range = status.getLen() - ((long) buf.length); double range = (double) _range; Random random = new Random(); System.out.println("file capacity " + status.getLen()); System.out.println("read size " + size); System.out.println("operations " + loop); long start = System.currentTimeMillis(); while (ops < loop) { double _offset = range * random.nextDouble(); long offset = (long) _offset; instream.seek(offset);/*from www . j av a 2s . co m*/ double ret = (double) this.read(instream, buf); if (ret > 0) { sumbytes = sumbytes + ret; ops = ops + 1.0; } else { break; } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)) / 1000.0; double throughput = 0.0; double latency = 0.0; double sumbits = sumbytes * 8.0; if (executionTime > 0) { throughput = sumbits / executionTime / 1024.0 / 1024.0; latency = 1000000.0 * executionTime / ops; } System.out.println("execution time " + executionTime); System.out.println("ops " + ops); System.out.println("sumbytes " + sumbytes); System.out.println("throughput " + throughput); System.out.println("latency " + latency); System.out.println("closing stream"); instream.close(); fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void getFile() throws Exception, InterruptedException { System.out.println("get file, path " + path + ", outstanding " + size + ", loop " + loop); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path paths[] = new Path[loop]; for (int j = 0; j < loop; j++) { paths[j] = new Path(path.toString() + "/" + j); }//from w w w . j a v a 2 s . c o m int repfactor = 4; for (int k = 0; k < repfactor; k++) { long start = System.currentTimeMillis(); for (int i = 0; i < size; i++) { //single operation == loop for (int j = 0; j < loop; j++) { fs.listStatus(paths[j]); } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)); double latency = executionTime * 1000.0 / ((double) size); System.out.println("execution time [ms] " + executionTime); System.out.println("latency [us] " + latency); } fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void createFile() throws Exception, InterruptedException { System.out.println("create file async hdfs, path " + path + ", size " + size + ", loop " + loop); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); int repfactor = 4; for (int k = 0; k < repfactor; k++) { LinkedBlockingQueue<Path> pathQueue = new LinkedBlockingQueue<Path>(); fs.mkdirs(path);// w ww . j a va2 s .c o m for (int i = 0; i < loop * size; i++) { String name = "" + i; Path f = new Path(path, name); pathQueue.add(f); } LinkedBlockingQueue<FSDataOutputStream> streamQueue = new LinkedBlockingQueue<FSDataOutputStream>(); long start = System.currentTimeMillis(); for (int i = 0; i < size; i++) { //single operation == loop for (int j = 0; j < loop; j++) { Path path = pathQueue.poll(); fs.create(path).close(); } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)); double latency = executionTime * 1000.0 / ((double) size); System.out.println("execution time [ms] " + executionTime); System.out.println("latency [us] " + latency); while (!streamQueue.isEmpty()) { FSDataOutputStream stream = streamQueue.poll(); stream.close(); } if (k < repfactor - 1) { fs.delete(path, true); Thread.sleep(2000); } } fs.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void enumerateDir() throws Exception { System.out.println("enumarate dir, path " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); int repfactor = 4; for (int k = 0; k < repfactor; k++) { long start = System.currentTimeMillis(); for (int i = 0; i < size; i++) { // single operation == loop RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false); while (iter.hasNext()) { iter.next();/*from ww w .j a v a 2s .co m*/ } } long end = System.currentTimeMillis(); double executionTime = ((double) (end - start)); double latency = executionTime * 1000.0 / ((double) size); System.out.println("execution time [ms] " + executionTime); System.out.println("latency [us] " + latency); } fs.close(); }