List of usage examples for org.apache.hadoop.fs FileSystem isDirectory
@Deprecated public boolean isDirectory(Path f) throws IOException
From source file:org.apache.sysml.runtime.io.FrameReaderTextCell.java
License:Apache License
protected void readTextCellFrameFromHDFS(Path path, JobConf job, FileSystem fs, FrameBlock dest, ValueType[] schema, String[] names, long rlen, long clen) throws IOException { if (fs.isDirectory(path)) { FileInputFormat.addInputPath(job, path); TextInputFormat informat = new TextInputFormat(); informat.configure(job);/*from ww w .j ava 2s . co m*/ InputSplit[] splits = informat.getSplits(job, 1); for (InputSplit split : splits) readTextCellFrameFromInputSplit(split, informat, job, dest); } else { readRawTextCellFrameFromHDFS(path, job, fs, dest, schema, names, rlen, clen); } }
From source file:org.apache.sysml.runtime.io.IOUtilFunctions.java
License:Apache License
public static Path[] getSequenceFilePaths(FileSystem fs, Path file) throws IOException { Path[] ret = null;//from w ww . j av a 2s . c o m //Note on object stores: Since the object store file system implementations //only emulate a file system, the directory of a multi-part file does not //exist physically and hence the isDirectory call returns false. Furthermore, //listStatus call returns all files with the given directory as prefix, which //includes the mtd file which needs to be ignored accordingly. if (fs.isDirectory(file) || IOUtilFunctions.isObjectStoreFileScheme(file)) { LinkedList<Path> tmp = new LinkedList<>(); FileStatus[] dStatus = fs.listStatus(file); for (FileStatus fdStatus : dStatus) if (!fdStatus.getPath().getName().startsWith("_") //skip internal files && !fdStatus.getPath().toString().equals(file.toString() + ".mtd")) //mtd file tmp.add(fdStatus.getPath()); ret = tmp.toArray(new Path[0]); } else { ret = new Path[] { file }; } return ret; }
From source file:org.apache.sysml.runtime.io.ReaderTextCell.java
License:Apache License
@Override public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int brlen, int bclen, long estnnz) throws IOException, DMLRuntimeException { //allocate output matrix block MatrixBlock ret = createOutputMatrixBlock(rlen, clen, (int) rlen, (int) clen, estnnz, true, false); //prepare file access JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); Path path = new Path(fname); FileSystem fs = IOUtilFunctions.getFileSystem(path, job); //check existence and non-empty file checkValidInputFile(fs, path);/*from ww w.ja v a 2 s . com*/ //core read if (fs.isDirectory(path)) readTextCellMatrixFromHDFS(path, job, ret, rlen, clen, brlen, bclen); else readRawTextCellMatrixFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen, _isMMFile); //finally check if change of sparse/dense block representation required if (!ret.isInSparseFormat()) ret.recomputeNonZeros(); ret.examSparsity(); return ret; }
From source file:org.apache.sysml.runtime.io.ReaderTextCSV.java
License:Apache License
@SuppressWarnings("unchecked") private static MatrixBlock readCSVMatrixFromHDFS(Path path, JobConf job, FileSystem fs, MatrixBlock dest, long rlen, long clen, int brlen, int bclen, boolean hasHeader, String delim, boolean fill, double fillValue) throws IOException, DMLRuntimeException { //prepare file paths in alphanumeric order ArrayList<Path> files = new ArrayList<>(); if (fs.isDirectory(path)) { for (FileStatus stat : fs.listStatus(path, CSVReblockMR.hiddenFileFilter)) files.add(stat.getPath());/* w w w . ja v a 2 s. c o m*/ Collections.sort(files); } else files.add(path); //determine matrix size via additional pass if required if (dest == null) { dest = computeCSVSize(files, job, fs, hasHeader, delim, fill, fillValue); clen = dest.getNumColumns(); } //actual read of individual files long lnnz = 0; MutableInt row = new MutableInt(0); for (int fileNo = 0; fileNo < files.size(); fileNo++) { lnnz += readCSVMatrixFromInputStream(fs.open(files.get(fileNo)), path.toString(), dest, row, rlen, clen, brlen, bclen, hasHeader, delim, fill, fillValue, fileNo == 0); } //post processing dest.setNonZeros(lnnz); return dest; }
From source file:org.apache.sysml.runtime.io.WriterTextCSV.java
License:Apache License
@SuppressWarnings("unchecked") public final void addHeaderToCSV(String srcFileName, String destFileName, long rlen, long clen) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path srcFilePath = new Path(srcFileName); Path destFilePath = new Path(destFileName); FileSystem fs = IOUtilFunctions.getFileSystem(srcFilePath, conf); if (!_props.hasHeader()) { // simply move srcFile to destFile /*//from w w w .j a v a 2 s. c om * TODO: Remove this roundabout way! * For example: destFilePath = /user/biadmin/csv/temp/out/file.csv * & the only path that exists already on HDFS is /user/biadmin/csv/. * In this case: the directory structure /user/biadmin/csv/temp/out must be created. * Simple hdfs.rename() does not seem to create this directory structure. */ // delete the destination file, if exists already fs.delete(destFilePath, true); // Create /user/biadmin/csv/temp/out/file.csv so that ..../temp/out/ is created. fs.createNewFile(destFilePath); // delete the file "file.csv" but preserve the directory structure /user/biadmin/csv/temp/out/ fs.delete(destFilePath, true); // finally, move the data to destFilePath = /user/biadmin/csv/temp/out/file.csv fs.rename(srcFilePath, destFilePath); return; } // construct the header line StringBuilder sb = new StringBuilder(); for (int i = 0; i < clen; i++) { sb.append("C" + (i + 1)); if (i < clen - 1) sb.append(_props.getDelim()); } sb.append('\n'); if (fs.isDirectory(srcFilePath)) { // compute sorted order among part files ArrayList<Path> files = new ArrayList<>(); for (FileStatus stat : fs.listStatus(srcFilePath, CSVReblockMR.hiddenFileFilter)) files.add(stat.getPath()); Collections.sort(files); // first part file path Path firstpart = files.get(0); // create a temp file, and add header and contents of first part Path tmp = new Path(firstpart.toString() + ".tmp"); OutputStream out = fs.create(tmp, true); out.write(sb.toString().getBytes()); sb.setLength(0); // copy rest of the data from firstpart InputStream in = null; try { in = fs.open(firstpart); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } // rename tmp to firstpart fs.delete(firstpart, true); fs.rename(tmp, firstpart); // rename srcfile to destFile fs.delete(destFilePath, true); fs.createNewFile(destFilePath); // force the creation of directory structure fs.delete(destFilePath, true); // delete the file, but preserve the directory structure fs.rename(srcFilePath, destFilePath); // move the data } else if (fs.isFile(srcFilePath)) { // create destination file OutputStream out = fs.create(destFilePath, true); // write header out.write(sb.toString().getBytes()); sb.setLength(0); // copy the data from srcFile InputStream in = null; try { in = fs.open(srcFilePath); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(srcFilePath.toString() + ": No such file or directory"); } }
From source file:org.apache.sysml.runtime.transform.BinAgent.java
License:Apache License
/** * Method to load transform metadata for all attributes *//*from ww w. ja va2 s . c om*/ @Override public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException { if (!isApplicable()) return; if (fs.isDirectory(txMtdDir)) { for (int i = 0; i < _colList.length; i++) { int colID = _colList[i]; Path path = new Path(txMtdDir + "/Bin/" + agents.getName(colID) + TfUtils.TXMTD_BIN_FILE_SUFFIX); TfUtils.checkValidInputFile(fs, path, true); BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(fs.open(path))); // format: colID,min,max,nbins String[] fields = br.readLine().split(TfUtils.TXMTD_SEP); double min = UtilFunctions.parseToDouble(fields[1]); //double max = UtilFunctions.parseToDouble(fields[2]); double binwidth = UtilFunctions.parseToDouble(fields[3]); int nbins = UtilFunctions.parseToInt(fields[4]); _numBins[i] = nbins; _min[i] = min; _binWidths[i] = binwidth; // (max-min)/nbins; } finally { IOUtilFunctions.closeSilently(br); } } } else { throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir); } }
From source file:org.apache.sysml.runtime.transform.MVImputeAgent.java
License:Apache License
/** * Method to load transform metadata for all attributes *///from w w w .j a v a 2s.c om @Override public void loadTxMtd(JobConf job, FileSystem fs, Path tfMtdDir, TfUtils agents) throws IOException { if (fs.isDirectory(tfMtdDir)) { // Load information about missing value imputation if (_colList != null) for (int i = 0; i < _colList.length; i++) { int colID = _colList[i]; if (_mvMethodList[i] == MVMethod.GLOBAL_MEAN || _mvMethodList[i] == MVMethod.GLOBAL_MODE) // global_mean or global_mode _replacementList[i] = readReplacement(colID, fs, tfMtdDir, agents); else if (_mvMethodList[i] == MVMethod.CONSTANT) { // constant: replace a missing value by a given constant // nothing to do. The constant values are loaded already during configure } else throw new RuntimeException("Invalid Missing Value Imputation methods: " + _mvMethodList[i]); } // Load scaling information if (_colList != null) for (int i = 0; i < _colList.length; i++) if (_isMVScaled.get(i)) processScalingFile(i, _colList, _meanList, _varList, fs, tfMtdDir, agents); if (_scnomvList != null) for (int i = 0; i < _scnomvList.length; i++) processScalingFile(i, _scnomvList, _scnomvMeanList, _scnomvVarList, fs, tfMtdDir, agents); } else { throw new RuntimeException("Path to recode maps must be a directory: " + tfMtdDir); } }
From source file:org.apache.sysml.runtime.transform.RecodeAgent.java
License:Apache License
/** * Method to load recode maps of all attributes, at once. *//*from w w w . j av a2 s .com*/ @Override public void loadTxMtd(JobConf job, FileSystem fs, Path txMtdDir, TfUtils agents) throws IOException { if (!isApplicable()) return; _finalMaps = new HashMap<Integer, HashMap<String, String>>(); if (fs.isDirectory(txMtdDir)) { for (int i = 0; i < _colList.length; i++) { int colID = _colList[i]; Path path = new Path(txMtdDir + "/Recode/" + agents.getName(colID) + TfUtils.TXMTD_RCD_MAP_SUFFIX); TfUtils.checkValidInputFile(fs, path, true); HashMap<String, String> map = new HashMap<String, String>(); Pair<String, String> pair = new Pair<String, String>(); String line = null; try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)))) { // Example line to parse: "WN (1)67492",1,61975 while ((line = br.readLine()) != null) { DecoderRecode.parseRecodeMapEntry(line, pair); map.put(pair.getKey(), pair.getValue()); } } _finalMaps.put(colID, map); } } else { throw new RuntimeException("Path to recode maps must be a directory: " + txMtdDir); } }
From source file:org.apache.sysml.runtime.util.MapReduceTool.java
License:Apache License
/** * Returns the size of a file or directory on hdfs in bytes. * /*from w w w . ja v a 2s .c o m*/ * @param path file system path * @return file size * @throws IOException if IOException occurs */ public static long getFilesizeOnHDFS(Path path) throws IOException { FileSystem fs = IOUtilFunctions.getFileSystem(path); long ret = 0; //in bytes if (fs.isDirectory(path)) ret = fs.getContentSummary(path).getLength(); else ret = fs.getFileStatus(path).getLen(); //note: filestatus would return 0 on directories return ret; }
From source file:org.apache.tajo.engine.function.FunctionLoader.java
License:Apache License
/** * Load functions that are defined by users. * * @param conf/*from ww w . j a va 2 s .c om*/ * @param functionMap * @return * @throws IOException */ public static Map<FunctionSignature, FunctionDesc> loadUserDefinedFunctions(TajoConf conf, Map<FunctionSignature, FunctionDesc> functionMap) throws IOException { String[] codePaths = conf.getStrings(TajoConf.ConfVars.PYTHON_CODE_DIR.varname); if (codePaths != null) { FileSystem localFS = FileSystem.getLocal(conf); for (String codePathStr : codePaths) { Path codePath = new Path(codePathStr); List<Path> filePaths = TUtil.newList(); if (localFS.isDirectory(codePath)) { for (FileStatus file : localFS.listStatus(codePath, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().endsWith(PythonScriptEngine.FILE_EXTENSION); } })) { filePaths.add(file.getPath()); } } else { filePaths.add(codePath); } for (Path filePath : filePaths) { for (FunctionDesc f : PythonScriptEngine.registerFunctions(filePath.toUri(), FunctionLoader.PYTHON_FUNCTION_NAMESPACE)) { functionMap.put(f.getSignature(), f); } } } } return functionMap; }