List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java
License:Apache License
private static RecursiveDeleteResult doRecursiveDeleteFiles(FileSystem fileSystem, Path directory, List<String> filePrefixes, boolean deleteEmptyDirectories) { FileStatus[] allFiles;/* ww w . j a v a 2 s .co m*/ try { allFiles = fileSystem.listStatus(directory); } catch (IOException e) { ImmutableList.Builder<String> notDeletedItems = ImmutableList.builder(); notDeletedItems.add(directory.toString() + "/**"); return new RecursiveDeleteResult(false, notDeletedItems.build()); } boolean allDescendentsDeleted = true; ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder(); for (FileStatus fileStatus : allFiles) { if (HadoopFileStatus.isFile(fileStatus)) { Path filePath = fileStatus.getPath(); String fileName = filePath.getName(); boolean eligible = false; for (String filePrefix : filePrefixes) { if (fileName.startsWith(filePrefix)) { eligible = true; break; } } if (eligible) { if (!deleteIfExists(fileSystem, filePath, false)) { allDescendentsDeleted = false; notDeletedEligibleItems.add(filePath.toString()); } } else { allDescendentsDeleted = false; } } else if (HadoopFileStatus.isDirectory(fileStatus)) { RecursiveDeleteResult subResult = doRecursiveDeleteFiles(fileSystem, fileStatus.getPath(), filePrefixes, deleteEmptyDirectories); if (!subResult.isDirectoryNoLongerExists()) { allDescendentsDeleted = false; } if (!subResult.getNotDeletedEligibleItems().isEmpty()) { notDeletedEligibleItems.addAll(subResult.getNotDeletedEligibleItems()); } } else { allDescendentsDeleted = false; notDeletedEligibleItems.add(fileStatus.getPath().toString()); } } if (allDescendentsDeleted && deleteEmptyDirectories) { verify(notDeletedEligibleItems.build().isEmpty()); if (!deleteIfExists(fileSystem, directory, false)) { return new RecursiveDeleteResult(false, ImmutableList.of(directory.toString() + "/")); } return new RecursiveDeleteResult(true, ImmutableList.of()); } return new RecursiveDeleteResult(false, notDeletedEligibleItems.build()); }
From source file:com.flipkart.fdp.migration.distcp.utils.FileCountDriver.java
License:Apache License
public List<String> getFileStatusRecursive(Path path, FileSystem fs, String destBasePath) throws IOException { List<String> response = new ArrayList<String>(); FileStatus file = fs.getFileStatus(path); if (file != null && file.isFile()) { response.add(trimExtension(file.getPath().toUri().getPath(), destBasePath)); return response; }//from w w w. jav a2s.co m FileStatus[] fstats = fs.listStatus(path); if (fstats != null && fstats.length > 0) { for (FileStatus fstat : fstats) { if (fstat.isDirectory()) { response.addAll(getFileStatusRecursive(fstat.getPath(), fs, destBasePath)); } else { response.add(trimExtension(fstat.getPath().toUri().getPath(), destBasePath)); } } } return response; }
From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java
License:Apache License
/** * If we have a directory recursively gather the files we care about for this job. * * @param file Root file/directory.// w w w . j av a 2 s.co m * @param job Job context. * @return All files we care about. * @throws IOException */ private Collection<FileStatus> handleFile(final FileStatus file, final JobContext job) throws IOException { final List<FileStatus> results = Lists.newArrayList(); if (file.isDir()) { final Path p = file.getPath(); LOG.debug("Expanding {}", p); final FileSystem fs = p.getFileSystem(job.getConfiguration()); final FileStatus[] children = fs.listStatus(p); for (FileStatus child : children) { results.addAll(handleFile(child, job)); } } else { results.add(file); } return results; }
From source file:com.fullcontact.sstable.index.SSTableIndexIndexer.java
License:Apache License
public void index(final Path sstablePath) throws IOException { final FileSystem fileSystem = FileSystem.get(URI.create(sstablePath.toString()), configuration); final FileStatus fileStatus = fileSystem.getFileStatus(sstablePath); if (fileStatus.isDir()) { LOG.info("SSTable Indexing directory {}", sstablePath); final FileStatus[] statuses = fileSystem.listStatus(sstablePath); for (final FileStatus childStatus : statuses) { index(childStatus.getPath()); }/*from www. ja v a 2 s . c o m*/ } else if (sstablePath.toString().endsWith(SST_EXTENSION)) { final Path sstableIndexPath = new Path(sstablePath.toString() + SSTableIndexIndex.SSTABLE_INDEX_SUFFIX); if (fileSystem.exists(sstableIndexPath)) { LOG.info("Skipping as SSTable index file already exists for {}", sstablePath); } else { // Kick a thread for the index. final ListenableFuture<IndexRequest> indexFuture = service.submit(new Callable<IndexRequest>() { @Override public IndexRequest call() throws Exception { final long startTime = System.currentTimeMillis(); final long fileSize = fileStatus.getLen(); LOG.info("Indexing SSTABLE Indexing file {}, size {} GB...", sstablePath, decimalFormat.format(fileSize / (1024.0 * 1024.0 * 1024.0))); indexSingleFile(fileSystem, sstablePath); return new IndexRequest(sstableIndexPath, startTime, fileSize); } }); Futures.addCallback(indexFuture, new FutureCallback<IndexRequest>() { public void onSuccess(final IndexRequest indexRequest) { long indexSize = 0; try { indexSize = fileSystem.getFileStatus(indexRequest.getIndexPath()).getLen(); } catch (IOException e) { LOG.error("Error getting file status for index path: {}", indexRequest.getIndexPath()); } final double elapsed = (System.currentTimeMillis() - indexRequest.getStartTime()) / 1000.0; LOG.info("Completed SSTABLE Indexing in {} seconds ({} MB/s). Index size is {} KB.", decimalFormat.format(elapsed), decimalFormat.format(indexRequest.getFileSize() / (1024.0 * 1024.0 * elapsed)), decimalFormat.format(indexSize / 1024.0)); } public void onFailure(Throwable e) { LOG.error("Failed to index.", e); } }); } } }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java
License:Apache License
/** * Returns a list of of hoplogs present in the bucket's directory, expected to be called during * hoplog set initialization/*from w w w . java 2 s . c o m*/ */ List<Hoplog> identifyAndLoadSortedOplogs(boolean countSize) throws IOException { FileSystem fs = store.getFileSystem(); if (!fs.exists(bucketPath)) { return new ArrayList<Hoplog>(); } FileStatus allFiles[] = fs.listStatus(bucketPath); ArrayList<FileStatus> validFiles = new ArrayList<FileStatus>(); for (FileStatus file : allFiles) { // All hoplog files contribute to disk usage Matcher matcher = HOPLOG_NAME_PATTERN.matcher(file.getPath().getName()); if (!matcher.matches()) { // not a hoplog continue; } // account for the disk used by this file if (countSize) { incrementDiskUsage(file.getLen()); } // All valid hoplog files must match the regex matcher = SORTED_HOPLOG_PATTERN.matcher(file.getPath().getName()); if (matcher.matches()) { validFiles.add(file); } } FileStatus[] markers = getExpiryMarkers(); FileStatus[] validHoplogs = filterValidHoplogs(validFiles.toArray(new FileStatus[validFiles.size()]), markers); ArrayList<Hoplog> results = new ArrayList<Hoplog>(); if (validHoplogs == null || validHoplogs.length == 0) { return results; } for (int i = 0; i < validHoplogs.length; i++) { // Skip directories if (validHoplogs[i].isDirectory()) { continue; } final Path p = validHoplogs[i].getPath(); // skip empty file if (fs.getFileStatus(p).getLen() <= 0) { continue; } Hoplog hoplog = new HFileSortedOplog(store, p, store.getBlockCache(), stats, store.getStats()); results.add(hoplog); } return results; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java
License:Apache License
public static Collection<Collection<FileStatus>> getBucketHoplogs(Path regionPath, FileSystem fs, String type, long start, long end) throws IOException { Collection<Collection<FileStatus>> allBuckets = new ArrayList<Collection<FileStatus>>(); // hoplog files names follow this pattern String HOPLOG_NAME_REGEX = AbstractHoplogOrganizer.HOPLOG_NAME_REGEX + type; String EXPIRED_HOPLOG_NAME_REGEX = HOPLOG_NAME_REGEX + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION; final Pattern pattern = Pattern.compile(HOPLOG_NAME_REGEX); final Pattern expiredPattern = Pattern.compile(EXPIRED_HOPLOG_NAME_REGEX); Path cleanUpIntervalPath = new Path(regionPath.getParent(), HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME); long intervalDurationMillis = readCleanUpIntervalMillis(fs, cleanUpIntervalPath); // a region directory contains directories for individual buckets. A bucket // has a integer name. FileStatus[] bucketDirs = fs.listStatus(regionPath); for (FileStatus bucket : bucketDirs) { if (!bucket.isDirectory()) { continue; }//w w w . ja v a 2 s.com try { Integer.valueOf(bucket.getPath().getName()); } catch (NumberFormatException e) { continue; } ArrayList<FileStatus> bucketHoplogs = new ArrayList<FileStatus>(); // identify all the flush hoplogs and seq hoplogs by visiting all the // bucket directories FileStatus[] bucketFiles = fs.listStatus(bucket.getPath()); Map<String, Long> expiredHoplogs = getExpiredHoplogs(fs, bucketFiles, expiredPattern); FileStatus oldestHopAfterEndTS = null; long oldestHopTS = Long.MAX_VALUE; long currentTimeStamp = System.currentTimeMillis(); for (FileStatus file : bucketFiles) { if (!file.isFile()) { continue; } Matcher match = pattern.matcher(file.getPath().getName()); if (!match.matches()) { continue; } long timeStamp = AbstractHoplogOrganizer.getHoplogTimestamp(match); if (start > 0 && timeStamp < start) { // this hoplog contains records less than the start time stamp continue; } if (end > 0 && timeStamp > end) { // this hoplog contains records mutated after end time stamp. Ignore // this hoplog if it is not the oldest. if (oldestHopTS > timeStamp) { oldestHopTS = timeStamp; oldestHopAfterEndTS = file; } continue; } long expiredTimeStamp = expiredTime(file, expiredHoplogs); if (expiredTimeStamp > 0 && intervalDurationMillis > 0) { if ((currentTimeStamp - expiredTimeStamp) > 0.8 * intervalDurationMillis) { continue; } } bucketHoplogs.add(file); } if (oldestHopAfterEndTS != null) { long expiredTimeStamp = expiredTime(oldestHopAfterEndTS, expiredHoplogs); if (expiredTimeStamp <= 0 || intervalDurationMillis <= 0 || (currentTimeStamp - expiredTimeStamp) <= 0.8 * intervalDurationMillis) { bucketHoplogs.add(oldestHopAfterEndTS); } } if (bucketHoplogs.size() > 0) { allBuckets.add(bucketHoplogs); } } return allBuckets; }
From source file:com.github.joshelser.accumulo.DelimitedIngest.java
License:Apache License
private List<Path> convertInputToPaths() throws IOException { List<String> inputs = args.getInput(); List<Path> paths = new ArrayList<>(inputs.size()); for (String input : inputs) { Path p = new Path(input); FileSystem fs = p.getFileSystem(conf); FileStatus fstat = fs.getFileStatus(p); if (fstat.isFile()) { paths.add(p);/*from w ww . j a v a 2s . co m*/ } else if (fstat.isDirectory()) { for (FileStatus child : fs.listStatus(p)) { if (child.isFile()) { paths.add(child.getPath()); } } } else { throw new IllegalStateException("Unable to handle that which is not file nor directory: " + p); } } return paths; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static String readString(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(path); StringBuilder re = new StringBuilder(); for (FileStatus status : statuses) { if (status.isFile() && !status.getPath().getName().equals("_SUCCESS")) { FSDataInputStream streaming = fs.open(status.getPath()); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(streaming)); re.append(bufferedReader.readLine() + System.lineSeparator()); }/* w ww . ja v a 2 s . c o m*/ } return re.toString(); }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
/** helper returns listStatus() */ private static FileStatus[] shellListStatus(String cmd, FileSystem srcFs, FileStatus src) { if (!src.isDir()) { FileStatus[] files = { src };//from w w w.ja va 2 s .co m return files; } Path path = src.getPath(); try { FileStatus[] files = srcFs.listStatus(path); if (files == null) { System.err.println(cmd + ": could not get listing for '" + path + "'"); } return files; } catch (IOException e) { System.err.println( cmd + ": could not get get listing for '" + path + "' : " + e.getMessage().split("\n")[0]); } return null; }
From source file:com.hadoop.compression.lzo.LzoIndexer.java
License:Open Source License
/** * Lzo index a given path, calling recursively to index directories when encountered. * Files are only indexed if they end in .lzo and have no existing .lzo.index file. * // ww w .ja va 2s.co m * @param lzoPath The base path to index. * @param nestingLevel For pretty printing, the nesting level. * @throws IOException */ private void indexInternal(Path lzoPath, int nestingLevel) throws IOException { FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_); FileStatus fileStatus = fs.getFileStatus(lzoPath); // Recursively walk if (fileStatus.isDir()) { LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "..."); FileStatus[] statuses = fs.listStatus(lzoPath); for (FileStatus childStatus : statuses) { indexInternal(childStatus.getPath(), nestingLevel + 1); } } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) { Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (fs.exists(lzoIndexPath)) { LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n"); } else { long startTime = System.currentTimeMillis(); long fileSize = fileStatus.getLen(); LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size " + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB..."); if (indexSingleFile(fs, lzoPath)) { long indexSize = fs.getFileStatus(lzoIndexPath).getLen(); double elapsed = (System.currentTimeMillis() - startTime) / 1000.0; LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed) + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed)) + " MB/s). Index size is " + df_.format(indexSize / 1024.0) + " KB.\n"); } } } }