Example usage for org.apache.hadoop.fs FileSystem isDirectory

List of usage examples for org.apache.hadoop.fs FileSystem isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isDirectory.

Prototype

@Deprecated
public boolean isDirectory(Path f) throws IOException 

Source Link

Document

True iff the named path is a directory.

Usage

From source file:org.apache.sysml.runtime.io.FrameReaderTextCell.java

License:Apache License

protected void readTextCellFrameFromHDFS(Path path, JobConf job, FileSystem fs, FrameBlock dest,
        ValueType[] schema, String[] names, long rlen, long clen) throws IOException {
    if (fs.isDirectory(path)) {
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);/*from ww  w  .j  ava  2s .  co m*/
        InputSplit[] splits = informat.getSplits(job, 1);
        for (InputSplit split : splits)
            readTextCellFrameFromInputSplit(split, informat, job, dest);
    } else {
        readRawTextCellFrameFromHDFS(path, job, fs, dest, schema, names, rlen, clen);
    }
}

From source file:org.apache.sysml.runtime.io.IOUtilFunctions.java

License:Apache License

public static Path[] getSequenceFilePaths(FileSystem fs, Path file) throws IOException {
    Path[] ret = null;//from   w  ww  . j av  a 2s  .  c o  m

    //Note on object stores: Since the object store file system implementations 
    //only emulate a file system, the directory of a multi-part file does not
    //exist physically and hence the isDirectory call returns false. Furthermore,
    //listStatus call returns all files with the given directory as prefix, which
    //includes the mtd file which needs to be ignored accordingly.

    if (fs.isDirectory(file) || IOUtilFunctions.isObjectStoreFileScheme(file)) {
        LinkedList<Path> tmp = new LinkedList<>();
        FileStatus[] dStatus = fs.listStatus(file);
        for (FileStatus fdStatus : dStatus)
            if (!fdStatus.getPath().getName().startsWith("_") //skip internal files
                    && !fdStatus.getPath().toString().equals(file.toString() + ".mtd")) //mtd file
                tmp.add(fdStatus.getPath());
        ret = tmp.toArray(new Path[0]);
    } else {
        ret = new Path[] { file };
    }

    return ret;
}

From source file:org.apache.sysml.runtime.io.ReaderTextCell.java

License:Apache License

@Override
public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz)
        throws IOException, DMLRuntimeException {
    //allocate output matrix block
    MatrixBlock ret = createOutputMatrixBlock(rlen, clen, (int) rlen, (int) clen, estnnz, true, false);

    //prepare file access
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fname);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);

    //check existence and non-empty file
    checkValidInputFile(fs, path);/*from  ww w.ja  v a  2  s . com*/

    //core read 
    if (fs.isDirectory(path))
        readTextCellMatrixFromHDFS(path, job, ret, rlen, clen, brlen, bclen);
    else
        readRawTextCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen, _isMMFile);

    //finally check if change of sparse/dense block representation required
    if (!ret.isInSparseFormat())
        ret.recomputeNonZeros();
    ret.examSparsity();

    return ret;
}

From source file:org.apache.sysml.runtime.io.ReaderTextCSV.java

License:Apache License

@SuppressWarnings("unchecked")
private static MatrixBlock readCSVMatrixFromHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock dest,
        long rlen, long clen, int brlen, int bclen, boolean hasHeader, String delim, boolean fill,
        double fillValue) throws IOException, DMLRuntimeException {
    //prepare file paths in alphanumeric order
    ArrayList<Path> files = new ArrayList<>();
    if (fs.isDirectory(path)) {
        for (FileStatus stat : fs.listStatus(path, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());/* w  w w .  ja  v  a  2  s. c o m*/
        Collections.sort(files);
    } else
        files.add(path);

    //determine matrix size via additional pass if required
    if (dest == null) {
        dest = computeCSVSize(files, job, fs, hasHeader, delim, fill, fillValue);
        clen = dest.getNumColumns();
    }

    //actual read of individual files
    long lnnz = 0;
    MutableInt row = new MutableInt(0);
    for (int fileNo = 0; fileNo < files.size(); fileNo++) {
        lnnz += readCSVMatrixFromInputStream(fs.open(files.get(fileNo)), path.toString(), dest, row, rlen, clen,
                brlen, bclen, hasHeader, delim, fill, fillValue, fileNo == 0);
    }

    //post processing
    dest.setNonZeros(lnnz);

    return dest;
}

From source file:org.apache.sysml.runtime.io.WriterTextCSV.java

License:Apache License

@SuppressWarnings("unchecked")
public final void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen)
        throws IOException {
    Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf());

    Path srcFilePath = new Path(srcFileName);
    Path destFilePath = new Path(destFileName);
    FileSystem fs = IOUtilFunctions.getFileSystem(srcFilePath, conf);

    if (!_props.hasHeader()) {
        // simply move srcFile to destFile

        /*//from w  w  w .j a  v  a  2 s. c  om
         * TODO: Remove this roundabout way! 
         * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv 
         *              & the only path that exists already on HDFS is /user/biadmin/csv/.
         * In this case: the directory structure /user/biadmin/csv/temp/out must be created. 
         * Simple hdfs.rename() does not seem to create this directory structure.
         */

        // delete the destination file, if exists already
        fs.delete(destFilePath, true);

        // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created.
        fs.createNewFile(destFilePath);

        // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/
        fs.delete(destFilePath, true);

        // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv
        fs.rename(srcFilePath, destFilePath);

        return;
    }

    // construct the header line
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < clen; i++) {
        sb.append("C" + (i + 1));
        if (i < clen - 1)
            sb.append(_props.getDelim());
    }
    sb.append('\n');

    if (fs.isDirectory(srcFilePath)) {

        // compute sorted order among part files
        ArrayList<Path> files = new ArrayList<>();
        for (FileStatus stat : fs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter))
            files.add(stat.getPath());
        Collections.sort(files);

        // first part file path
        Path firstpart = files.get(0);

        // create a temp file, and add header and contents of first part
        Path tmp = new Path(firstpart.toString() + ".tmp");
        OutputStream out = fs.create(tmp, true);
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy rest of the data from firstpart
        InputStream in = null;
        try {
            in = fs.open(firstpart);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }

        // rename tmp to firstpart
        fs.delete(firstpart, true);
        fs.rename(tmp, firstpart);

        // rename srcfile to destFile
        fs.delete(destFilePath, true);
        fs.createNewFile(destFilePath); // force the creation of directory structure
        fs.delete(destFilePath, true); // delete the file, but preserve the directory structure
        fs.rename(srcFilePath, destFilePath); // move the data 

    } else if (fs.isFile(srcFilePath)) {
        // create destination file
        OutputStream out = fs.create(destFilePath, true);

        // write header
        out.write(sb.toString().getBytes());
        sb.setLength(0);

        // copy the data from srcFile
        InputStream in = null;
        try {
            in = fs.open(srcFilePath);
            IOUtils.copyBytes(in, out, conf, true);
        } finally {
            IOUtilFunctions.closeSilently(in);
            IOUtilFunctions.closeSilently(out);
        }
    } else {
        throw new IOException(srcFilePath.toString() + ": No such file or directory");
    }
}

From source file:org.apache.sysml.runtime.transform.BinAgent.java

License:Apache License

/**
 * Method to load transform metadata for all attributes
 *//*from  ww w.  ja  va2 s .  c  om*/
@Override
public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
    if (!isApplicable())
        return;

    if (fs.isDirectory(txMtdDir)) {
        for (int i = 0; i < _colList.length; i++) {
            int colID = _colList[i];

            Path path = new Path(txMtdDir + "/Bin/" + agents.getName(colID) + TfUtils.TXMTD_BIN_FILE_SUFFIX);
            TfUtils.checkValidInputFile(fs, path, true);

            BufferedReader br = null;
            try {
                br = new BufferedReader(new InputStreamReader(fs.open(path)));
                // format: colID,min,max,nbins
                String[] fields = br.readLine().split(TfUtils.TXMTD_SEP);
                double min = UtilFunctions.parseToDouble(fields[1]);
                //double max = UtilFunctions.parseToDouble(fields[2]);
                double binwidth = UtilFunctions.parseToDouble(fields[3]);
                int nbins = UtilFunctions.parseToInt(fields[4]);

                _numBins[i] = nbins;
                _min[i] = min;
                _binWidths[i] = binwidth; // (max-min)/nbins;
            } finally {
                IOUtilFunctions.closeSilently(br);
            }
        }
    } else {
        throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir);
    }
}

From source file:org.apache.sysml.runtime.transform.MVImputeAgent.java

License:Apache License

/**
 * Method to load transform metadata for all attributes
 *///from   w  w  w  .j a  v a  2s.c om
@Override
public void loadTxMtd(JobConf job, FileSystem fs, Path tfMtdDir, TfUtils agents) throws IOException {

    if (fs.isDirectory(tfMtdDir)) {

        // Load information about missing value imputation
        if (_colList != null)
            for (int i = 0; i < _colList.length; i++) {
                int colID = _colList[i];

                if (_mvMethodList[i] == MVMethod.GLOBAL_MEAN || _mvMethodList[i] == MVMethod.GLOBAL_MODE)
                    // global_mean or global_mode
                    _replacementList[i] = readReplacement(colID, fs, tfMtdDir, agents);
                else if (_mvMethodList[i] == MVMethod.CONSTANT) {
                    // constant: replace a missing value by a given constant
                    // nothing to do. The constant values are loaded already during configure 
                } else
                    throw new RuntimeException("Invalid Missing Value Imputation methods: " + _mvMethodList[i]);
            }

        // Load scaling information
        if (_colList != null)
            for (int i = 0; i < _colList.length; i++)
                if (_isMVScaled.get(i))
                    processScalingFile(i, _colList, _meanList, _varList, fs, tfMtdDir, agents);

        if (_scnomvList != null)
            for (int i = 0; i < _scnomvList.length; i++)
                processScalingFile(i, _scnomvList, _scnomvMeanList, _scnomvVarList, fs, tfMtdDir, agents);
    } else {
        throw new RuntimeException("Path to recode maps must be a directory: " + tfMtdDir);
    }
}

From source file:org.apache.sysml.runtime.transform.RecodeAgent.java

License:Apache License

/**
 * Method to load recode maps of all attributes, at once.
 *//*from   w  w  w  . j  av  a2 s .com*/
@Override
public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException {
    if (!isApplicable())
        return;

    _finalMaps = new HashMap<Integer, HashMap<String, String>>();

    if (fs.isDirectory(txMtdDir)) {
        for (int i = 0; i < _colList.length; i++) {
            int colID = _colList[i];

            Path path = new Path(txMtdDir + "/Recode/" + agents.getName(colID) + TfUtils.TXMTD_RCD_MAP_SUFFIX);
            TfUtils.checkValidInputFile(fs, path, true);

            HashMap<String, String> map = new HashMap<String, String>();
            Pair<String, String> pair = new Pair<String, String>();

            String line = null;
            try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)))) {
                // Example line to parse: "WN (1)67492",1,61975
                while ((line = br.readLine()) != null) {
                    DecoderRecode.parseRecodeMapEntry(line, pair);
                    map.put(pair.getKey(), pair.getValue());
                }
            }
            _finalMaps.put(colID, map);
        }
    } else {
        throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir);
    }
}

From source file:org.apache.sysml.runtime.util.MapReduceTool.java

License:Apache License

/**
 * Returns the size of a file or directory on hdfs in bytes.
 * /*from  w  w  w . ja  v a  2s .c o m*/
 * @param path file system path
 * @return file size
 * @throws IOException if IOException occurs
 */
public static long getFilesizeOnHDFS(Path path) throws IOException {
    FileSystem fs = IOUtilFunctions.getFileSystem(path);
    long ret = 0; //in bytes
    if (fs.isDirectory(path))
        ret = fs.getContentSummary(path).getLength();
    else
        ret = fs.getFileStatus(path).getLen();
    //note: filestatus would return 0 on directories

    return ret;
}

From source file:org.apache.tajo.engine.function.FunctionLoader.java

License:Apache License

/**
 * Load functions that are defined by users.
 *
 * @param conf/*from   ww w .  j  a va  2 s .c  om*/
 * @param functionMap
 * @return
 * @throws IOException
 */
public static Map<FunctionSignature, FunctionDesc> loadUserDefinedFunctions(TajoConf conf,
        Map<FunctionSignature, FunctionDesc> functionMap) throws IOException {

    String[] codePaths = conf.getStrings(TajoConf.ConfVars.PYTHON_CODE_DIR.varname);
    if (codePaths != null) {
        FileSystem localFS = FileSystem.getLocal(conf);
        for (String codePathStr : codePaths) {
            Path codePath = new Path(codePathStr);
            List<Path> filePaths = TUtil.newList();
            if (localFS.isDirectory(codePath)) {
                for (FileStatus file : localFS.listStatus(codePath, new PathFilter() {
                    @Override
                    public boolean accept(Path path) {
                        return path.getName().endsWith(PythonScriptEngine.FILE_EXTENSION);
                    }
                })) {
                    filePaths.add(file.getPath());
                }
            } else {
                filePaths.add(codePath);
            }
            for (Path filePath : filePaths) {
                for (FunctionDesc f : PythonScriptEngine.registerFunctions(filePath.toUri(),
                        FunctionLoader.PYTHON_FUNCTION_NAMESPACE)) {
                    functionMap.put(f.getSignature(), f);
                }
            }
        }
    }
    return functionMap;
}