Example usage for org.apache.hadoop.fs FileSystem close

List of usage examples for org.apache.hadoop.fs FileSystem close

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Close this FileSystem instance.

Usage

From source file:com.ibm.bi.dml.runtime.transform.MVImputeAgent.java

License:Open Source License

/**
 * Method to load transform metadata for all attributes
 * /*  www .  j  a v a 2s .  co  m*/
 * @param job
 * @throws IOException
 */
@Override
public void loadTxMtd(JobConf job, FileSystem fs, Path tfMtdDir, TfUtils agents) throws IOException {

    if (fs.isDirectory(tfMtdDir)) {

        // Load information about missing value imputation
        if (_mvList != null)
            for (int i = 0; i < _mvList.length; i++) {
                int colID = _mvList[i];

                if (_mvMethodList[i] == 1 || _mvMethodList[i] == 2)
                    // global_mean or global_mode
                    _replacementList[i] = readReplacement(colID, fs, tfMtdDir, agents);
                else if (_mvMethodList[i] == 3) {
                    // constant: replace a missing value by a given constant
                    // nothing to do. The constant values are loaded already during configure 
                } else
                    throw new RuntimeException("Invalid Missing Value Imputation methods: " + _mvMethodList[i]);
            }

        // Load scaling information
        if (_mvList != null)
            for (int i = 0; i < _mvList.length; i++)
                if (_isMVScaled.get(i))
                    processScalingFile(i, _mvList, _meanList, _varList, fs, tfMtdDir, agents);

        if (_scnomvList != null)
            for (int i = 0; i < _scnomvList.length; i++)
                processScalingFile(i, _scnomvList, _scnomvMeanList, _scnomvVarList, fs, tfMtdDir, agents);
    } else {
        fs.close();
        throw new RuntimeException("Path to recode maps must be a directory: " + tfMtdDir);
    }
}

From source file:com.ibm.bi.dml.runtime.transform.RecodeAgent.java

License:Open Source License

/**
 * Method to load recode maps of all attributes, at once.
 * //from w w w  .  ja  v  a2  s  .c  o  m
 * @param job
 * @throws IOException
 */
@Override
public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
    if (_rcdList == null)
        return;

    _finalMaps = new HashMap<Integer, HashMap<String, String>>();

    if (fs.isDirectory(txMtdDir)) {
        for (int i = 0; i < _rcdList.length; i++) {
            int colID = _rcdList[i];

            Path path = new Path(txMtdDir + "/Recode/" + agents.getName(colID) + RCD_MAP_FILE_SUFFIX);
            TfUtils.checkValidInputFile(fs, path, true);

            HashMap<String, String> map = new HashMap<String, String>();

            BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
            String line = null, word = null;
            String rcdIndex = null;

            // Example line to parse: "WN (1)67492",1,61975
            while ((line = br.readLine()) != null) {

                // last occurrence of quotation mark
                int idxQuote = line.lastIndexOf('"');
                word = UtilFunctions.unquote(line.substring(0, idxQuote + 1));

                int idx = idxQuote + 2;
                while (line.charAt(idx) != TXMTD_SEP.charAt(0))
                    idx++;
                rcdIndex = line.substring(idxQuote + 2, idx);

                map.put(word, rcdIndex);
            }
            br.close();
            _finalMaps.put(colID, map);
        }
    } else {
        fs.close();
        throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir);
    }
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

public void writeSequentialHeap() throws Exception {
    System.out.println("writing sequential file in heap mode " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FSDataOutputStream instream = fs.create(path);
    byte[] buf = new byte[size];
    double sumbytes = 0;
    double ops = 0;
    System.out.println("read size " + size);
    System.out.println("operations " + loop);

    long start = System.currentTimeMillis();
    while (ops < loop) {
        //         System.out.println("writing data, len " + buf.length);
        instream.write(buf, 0, buf.length);
        sumbytes = sumbytes + buf.length;
        ops = ops + 1.0;/*from   w ww. j av a2 s  .c om*/
    }
    instream.flush();
    long end = System.currentTimeMillis();
    double executionTime = ((double) (end - start)) / 1000.0;
    double throughput = 0.0;
    double latency = 0.0;
    double sumbits = sumbytes * 8.0;
    if (executionTime > 0) {
        throughput = sumbits / executionTime / 1024.0 / 1024.0;
        latency = 1000000.0 * executionTime / ops;
    }

    System.out.println("execution time " + executionTime);
    System.out.println("ops " + ops);
    System.out.println("sumbytes " + sumbytes);
    System.out.println("throughput " + throughput);
    System.out.println("latency " + latency);
    System.out.println("closing stream");
    instream.close();
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

public void readSequentialDirect() throws Exception {
    System.out.println("reading sequential file in direct mode " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileStatus status = fs.getFileStatus(path);
    FSDataInputStream instream = fs.open(path);
    ByteBuffer buf = ByteBuffer.allocateDirect(size);
    buf.clear();//from w w  w  .jav a  2 s  .c o m
    double sumbytes = 0;
    double ops = 0;
    System.out.println("file capacity " + status.getLen());
    System.out.println("read size " + size);
    System.out.println("operations " + loop);

    long start = System.currentTimeMillis();
    while (ops < loop) {
        buf.clear();
        double ret = (double) instream.read(buf);
        if (ret > 0) {
            sumbytes = sumbytes + ret;
            ops = ops + 1.0;
        } else {
            ops = ops + 1.0;
            if (instream.getPos() == 0) {
                break;
            } else {
                instream.seek(0);
            }
        }
    }
    long end = System.currentTimeMillis();
    double executionTime = ((double) (end - start)) / 1000.0;
    double throughput = 0.0;
    double latency = 0.0;
    double sumbits = sumbytes * 8.0;
    if (executionTime > 0) {
        throughput = sumbits / executionTime / 1024.0 / 1024.0;
        latency = 1000000.0 * executionTime / ops;
    }
    System.out.println("execution time " + executionTime);
    System.out.println("ops " + ops);
    System.out.println("sumbytes " + sumbytes);
    System.out.println("throughput " + throughput);
    System.out.println("latency " + latency);
    System.out.println("closing stream");
    instream.close();
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

public void readSequentialHeap() throws Exception {
    System.out.println("reading sequential file in heap mode " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileStatus status = fs.getFileStatus(path);
    FSDataInputStream instream = fs.open(path);
    byte[] buf = new byte[size];
    double sumbytes = 0;
    double ops = 0;
    System.out.println("file capacity " + status.getLen());
    System.out.println("read size " + size);
    System.out.println("operations " + loop);

    long start = System.currentTimeMillis();
    while (ops < loop) {
        double ret = (double) this.read(instream, buf);
        if (ret > 0) {
            sumbytes = sumbytes + ret;/*from   ww w  .j a  va  2 s  .c om*/
            ops = ops + 1.0;
        } else {
            ops = ops + 1.0;
            if (instream.getPos() == 0) {
                break;
            } else {
                instream.seek(0);
            }
        }
    }
    long end = System.currentTimeMillis();
    double executionTime = ((double) (end - start)) / 1000.0;
    double throughput = 0.0;
    double latency = 0.0;
    double sumbits = sumbytes * 8.0;
    if (executionTime > 0) {
        throughput = sumbits / executionTime / 1024.0 / 1024.0;
        latency = 1000000.0 * executionTime / ops;
    }
    System.out.println("execution time " + executionTime);
    System.out.println("ops " + ops);
    System.out.println("sumbytes " + sumbytes);
    System.out.println("throughput " + throughput);
    System.out.println("latency " + latency);
    System.out.println("closing stream");
    instream.close();
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

public void readRandomDirect() throws Exception {
    System.out.println("reading random file in direct mode " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileStatus status = fs.getFileStatus(path);
    FSDataInputStream instream = fs.open(path);
    ByteBuffer buf = ByteBuffer.allocateDirect(size);
    buf.clear();//from   ww  w . j  av a 2 s  .  com
    double sumbytes = 0;
    double ops = 0;
    long _range = status.getLen() - ((long) buf.capacity());
    double range = (double) _range;
    Random random = new Random();

    System.out.println("file capacity " + status.getLen());
    System.out.println("read size " + size);
    System.out.println("operations " + loop);
    long start = System.currentTimeMillis();
    while (ops < loop) {
        buf.clear();
        double _offset = range * random.nextDouble();
        long offset = (long) _offset;
        instream.seek(offset);
        double ret = (double) instream.read(buf);
        if (ret > 0) {
            sumbytes = sumbytes + ret;
            ops = ops + 1.0;
        } else {
            break;
        }
    }
    long end = System.currentTimeMillis();
    double executionTime = ((double) (end - start)) / 1000.0;
    double throughput = 0.0;
    double latency = 0.0;
    double sumbits = sumbytes * 8.0;
    if (executionTime > 0) {
        throughput = sumbits / executionTime / 1024.0 / 1024.0;
        latency = 1000000.0 * executionTime / ops;
    }

    System.out.println("execution time " + executionTime);
    System.out.println("ops " + ops);
    System.out.println("sumbytes " + sumbytes);
    System.out.println("throughput " + throughput);
    System.out.println("latency " + latency);
    System.out.println("closing stream");
    instream.close();
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

public void readRandomHeap() throws Exception {
    System.out.println("reading random file in heap mode " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileStatus status = fs.getFileStatus(path);
    FSDataInputStream instream = fs.open(path);
    byte[] buf = new byte[size];
    double sumbytes = 0;
    double ops = 0;
    long _range = status.getLen() - ((long) buf.length);
    double range = (double) _range;
    Random random = new Random();

    System.out.println("file capacity " + status.getLen());
    System.out.println("read size " + size);
    System.out.println("operations " + loop);
    long start = System.currentTimeMillis();
    while (ops < loop) {
        double _offset = range * random.nextDouble();
        long offset = (long) _offset;
        instream.seek(offset);/*from www  .  j  av  a 2s .  co m*/
        double ret = (double) this.read(instream, buf);
        if (ret > 0) {
            sumbytes = sumbytes + ret;
            ops = ops + 1.0;
        } else {
            break;
        }
    }
    long end = System.currentTimeMillis();
    double executionTime = ((double) (end - start)) / 1000.0;
    double throughput = 0.0;
    double latency = 0.0;
    double sumbits = sumbytes * 8.0;
    if (executionTime > 0) {
        throughput = sumbits / executionTime / 1024.0 / 1024.0;
        latency = 1000000.0 * executionTime / ops;
    }

    System.out.println("execution time " + executionTime);
    System.out.println("ops " + ops);
    System.out.println("sumbytes " + sumbytes);
    System.out.println("throughput " + throughput);
    System.out.println("latency " + latency);
    System.out.println("closing stream");
    instream.close();
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

void getFile() throws Exception, InterruptedException {
    System.out.println("get file, path " + path + ", outstanding " + size + ", loop " + loop);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path paths[] = new Path[loop];
    for (int j = 0; j < loop; j++) {
        paths[j] = new Path(path.toString() + "/" + j);
    }//from  w w w .  j  a v  a 2 s .  c  o m
    int repfactor = 4;
    for (int k = 0; k < repfactor; k++) {
        long start = System.currentTimeMillis();
        for (int i = 0; i < size; i++) {
            //single operation == loop
            for (int j = 0; j < loop; j++) {
                fs.listStatus(paths[j]);
            }
        }
        long end = System.currentTimeMillis();
        double executionTime = ((double) (end - start));
        double latency = executionTime * 1000.0 / ((double) size);
        System.out.println("execution time [ms] " + executionTime);
        System.out.println("latency [us] " + latency);
    }
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

void createFile() throws Exception, InterruptedException {
    System.out.println("create file async hdfs, path " + path + ", size " + size + ", loop " + loop);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    int repfactor = 4;
    for (int k = 0; k < repfactor; k++) {
        LinkedBlockingQueue<Path> pathQueue = new LinkedBlockingQueue<Path>();
        fs.mkdirs(path);//  w  ww . j a  va2  s .c o  m
        for (int i = 0; i < loop * size; i++) {
            String name = "" + i;
            Path f = new Path(path, name);
            pathQueue.add(f);
        }

        LinkedBlockingQueue<FSDataOutputStream> streamQueue = new LinkedBlockingQueue<FSDataOutputStream>();
        long start = System.currentTimeMillis();
        for (int i = 0; i < size; i++) {
            //single operation == loop
            for (int j = 0; j < loop; j++) {
                Path path = pathQueue.poll();
                fs.create(path).close();
            }
        }
        long end = System.currentTimeMillis();
        double executionTime = ((double) (end - start));
        double latency = executionTime * 1000.0 / ((double) size);
        System.out.println("execution time [ms] " + executionTime);
        System.out.println("latency [us] " + latency);

        while (!streamQueue.isEmpty()) {
            FSDataOutputStream stream = streamQueue.poll();
            stream.close();
        }

        if (k < repfactor - 1) {
            fs.delete(path, true);
            Thread.sleep(2000);
        }
    }
    fs.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

void enumerateDir() throws Exception {
    System.out.println("enumarate dir, path " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    int repfactor = 4;
    for (int k = 0; k < repfactor; k++) {
        long start = System.currentTimeMillis();
        for (int i = 0; i < size; i++) {
            // single operation == loop
            RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false);
            while (iter.hasNext()) {
                iter.next();/*from ww w .j a v  a 2s .co m*/
            }
        }
        long end = System.currentTimeMillis();
        double executionTime = ((double) (end - start));
        double latency = executionTime * 1000.0 / ((double) size);
        System.out.println("execution time [ms] " + executionTime);
        System.out.println("latency [us] " + latency);
    }
    fs.close();
}