List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java
License:Apache License
protected static void handleDirectory(String relpath, FileSystem fs, Path path, ZipOutputStream zout, byte[] buf) throws IOException { // NB: dirs must end with '/'. If we don't add dirs, then only // dirs which also have files in them will be created when the // archive is unzipped. final ZipEntry entry = new ZipEntry(relpath + '/'); zout.putNextEntry(entry);// w w w. jav a2 s.c o m zout.closeEntry(); for (FileStatus stat : fs.listStatus(path)) { traverse(fs, stat.getPath(), zout, buf); } }
From source file:com.linkedin.cubert.examples.ListFiles.java
License:Open Source License
private void listFiles(FileSystem fs, Path path, List<String> files) throws IOException { FileStatus[] allStatus = fs.listStatus(path); if (allStatus == null) return;// ww w.j ava 2 s . com for (FileStatus status : allStatus) { if (status.isDir()) { listFiles(fs, status.getPath(), files); } else { files.add(status.getPath().toUri().getPath()); } } }
From source file:com.linkedin.cubert.utils.FileSystemUtils.java
License:Open Source License
public static List<Path> getDailyDurationPaths(FileSystem fs, Path root, long startDate, long endDate) throws IOException { List<Path> paths = new ArrayList<Path>(); for (FileStatus years : fs.listStatus(root)) { int year = Integer.parseInt(years.getPath().getName()); for (FileStatus months : fs.listStatus(years.getPath())) { int month = Integer.parseInt(months.getPath().getName()); for (FileStatus days : fs.listStatus(months.getPath())) { int day = Integer.parseInt(days.getPath().getName()); long timestamp = 10000L * year + 100L * month + day; if (timestamp >= startDate && timestamp <= endDate) { paths.add(days.getPath()); }//w w w . j a v a2s . com } } } return paths; }
From source file:com.linkedin.cubert.utils.FileSystemUtils.java
License:Open Source License
public static List<Path> getHourlyDurationPaths(FileSystem fs, Path root, long startDateHour, long endDateHour) throws IOException { List<Path> paths = new ArrayList<Path>(); for (FileStatus years : fs.listStatus(root)) { int year = Integer.parseInt(years.getPath().getName()); for (FileStatus months : fs.listStatus(years.getPath())) { int month = Integer.parseInt(months.getPath().getName()); for (FileStatus days : fs.listStatus(months.getPath())) { int day = Integer.parseInt(days.getPath().getName()); for (FileStatus hours : fs.listStatus(days.getPath())) { int hour = Integer.parseInt(hours.getPath().getName()); long timestamp = 1000000L * year + 10000L * month + 100L * day + hour; if (timestamp >= startDateHour && timestamp <= endDateHour) { paths.add(hours.getPath()); }//w ww. j a va 2 s .com } } } } return paths; }
From source file:com.linkedin.hadoop.jobs.HdfsWaitJob.java
License:Apache License
/** * Method checkDirectory loops through the folders pointed to by dirPath, and will * cause the job to succeed if any of the folders are fresh enough. However, if the * parameter checkExactPath is true, this method only checks for the existence of * dirPath in HDFS.//from w ww .j a v a2 s.co m * * @param dirPath The path to the directory we are searching for fresh folders * @param freshness The timeframe in which the folder has to have been modified by * @param checkExactPath The boolean that decides if we only check for the existence of dirPath in HDFS * @throws IOException If there is an HDFS exception * @return A boolean value corresponding to whether a fresh folder was found */ public boolean checkDirectory(String dirPath, long freshness, boolean checkExactPath) throws IOException, NullPointerException { FileSystem fileSys = FileSystem.get(getConf()); if (fileSys == null) { String errMessage = "ERROR: The file system trying to be accessed does not exist. JOB TERMINATED."; log.info(errMessage); throw new NullPointerException(errMessage); } if (checkExactPath) { if (fileSys.exists(new Path(dirPath))) { log.info("SUCCESS: The exact path: " + dirPath + " was found in HDFS. Program now quitting."); return true; } log.info("STATUS: The exact path: " + dirPath + " was not found during latest polling."); return false; } FileStatus[] status = fileSys.listStatus(new Path(dirPath)); if (status == null) { String errMessage = "ERROR: dirPath -> " + dirPath + " is empty or does not exist. JOB TERMINATED."; log.info(errMessage); throw new IOException(errMessage); } for (FileStatus file : status) { if (file.isDirectory()) { long timeModified = file.getModificationTime(); if ((System.currentTimeMillis() - timeModified) <= freshness) { String fileName = file.getPath().toString(); log.info("We found this fresh folder in the filePath: " + fileName.substring(fileName.lastIndexOf("/") + 1)); log.info("SUCCESS: Program now quitting after successfully finding a fresh folder."); return true; } } } log.info("STATUS: No fresh folders found during latest polling."); return false; }
From source file:com.linkedin.json.JsonSequenceFileInputFormat.java
License:Apache License
private List<FileStatus> getAllSubFileStatus(JobContext jobContext, Path filterMemberPath) throws IOException { List<FileStatus> list = new ArrayList<FileStatus>(); FileSystem fs = filterMemberPath.getFileSystem(jobContext.getConfiguration()); FileStatus[] subFiles = fs.listStatus(filterMemberPath); if (null != subFiles) { if (fs.getFileStatus(filterMemberPath).isDir()) { for (FileStatus subFile : subFiles) { if (!subFile.getPath().getName().startsWith("_")) { list.addAll(getAllSubFileStatus(jobContext, subFile.getPath())); }/* w w w . j a v a 2 s . co m*/ } } else { if (subFiles.length > 0 && !subFiles[0].getPath().getName().startsWith("_")) { list.add(subFiles[0]); } } } return list; }
From source file:com.linkedin.mapred.AvroHdfsFileReader.java
License:Open Source License
@Override protected List<Path> getPaths(String filePath) throws IOException { Path path = new Path(filePath); FileSystem fs = path.getFileSystem(getConf()); List<Path> paths = new ArrayList<Path>(); for (FileStatus status : fs.listStatus(path)) { if (status.isDir() && !AvroUtils.shouldPathBeIgnored(status.getPath())) { paths.addAll(getPaths(status.getPath().toString())); } else if (isAvro(status.getPath())) { paths.add(status.getPath()); }//from w ww. j a v a2 s . c o m } return paths; }
From source file:com.linkedin.mapred.AvroUtils.java
License:Open Source License
/** * Adds all subdirectories under a root path to the input format. * /*from w w w. j a va 2s . com*/ * @param conf The JobConf. * @param path The root path. * @throws IOException */ public static void addAllSubPaths(JobConf conf, Path path) throws IOException { if (shouldPathBeIgnored(path)) { throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", path)); } final FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { for (FileStatus status : fs.listStatus(path)) { if (!shouldPathBeIgnored(status.getPath())) { if (status.isDir()) { addAllSubPaths(conf, status.getPath()); } else { AvroInputFormat.addInputPath(conf, status.getPath()); } } } } }
From source file:com.linkedin.mapred.AvroUtils.java
License:Open Source License
/** * Enumerates all the files under a given path. * /* ww w . j av a2 s .c o m*/ * @param conf The JobConf. * @param basePath The base path. * @return A list of files found under the base path. * @throws IOException */ public static List<Path> enumerateFiles(JobConf conf, Path basePath) throws IOException { if (shouldPathBeIgnored(basePath)) { throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", basePath)); } List<Path> paths = new ArrayList<Path>(); FileSystem fs = basePath.getFileSystem(conf); if (!fs.exists(basePath)) { return Collections.emptyList(); } for (FileStatus s : fs.listStatus(basePath)) { if (!shouldPathBeIgnored(s.getPath())) { if (s.isDir()) { paths.addAll(enumerateFiles(conf, s.getPath())); } else { paths.add(s.getPath()); } } } return paths; }
From source file:com.linkedin.mlease.utils.Util.java
License:Open Source License
public static List<Path> findPartFiles(JobConf conf, Path root) throws IOException { FileSystem fs = root.getFileSystem(new JobConf()); List<Path> files = new ArrayList<Path>(); for (FileStatus status : fs.listStatus(root)) { if (status.isDir()) { files.addAll(findPartFiles(conf, status.getPath())); } else {/*ww w. j a v a 2 s .c o m*/ files.add(status.getPath()); } } return files; }