List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.turn.camino.render.functions.FileSystemFunctionsTest.java
License:Open Source License
/** * Set up environment/*from ww w . j a v a2 s . co m*/ */ @BeforeClass public void setUp() throws IOException { // mock environment FileSystem fileSystem = mock(FileSystem.class); FileStatus[] fss = new FileStatus[] { new FileStatus(1200000L, false, 3, 1000L, 1409302856296L, new org.apache.hadoop.fs.Path("/a/b/1.dat")), new FileStatus(1400000L, false, 3, 1000L, 1409302867303L, new org.apache.hadoop.fs.Path("/a/b/2.dat")), new FileStatus(1060000L, false, 3, 1000L, 1409302844187L, new org.apache.hadoop.fs.Path("/a/b/3.dat")) }; org.apache.hadoop.fs.Path dir = new org.apache.hadoop.fs.Path("/a/b"); when(fileSystem.exists(dir)).thenReturn(true); when(fileSystem.isDirectory(dir)).thenReturn(true); when(fileSystem.listStatus(dir)).thenReturn(fss); when(fileSystem.exists(new org.apache.hadoop.fs.Path("/x/y"))).thenReturn(false); dir = new org.apache.hadoop.fs.Path("/u/v"); when(fileSystem.exists(dir)).thenReturn(true); when(fileSystem.isDirectory(dir)).thenReturn(false); doThrow(new IOException()).when(fileSystem).listStatus(new org.apache.hadoop.fs.Path("/foo")); context = mock(Context.class); Env env = mock(Env.class); when(context.getEnv()).thenReturn(env); when(env.getCurrentTime()).thenReturn(1409389256296L); when(env.getTimeZone()).thenReturn(TimeZone.getTimeZone("GMT")); when(env.getFileSystem()).thenReturn(fileSystem); }
From source file:com.twitter.elephanttwin.util.HdfsFsWalker.java
License:Apache License
private void walkInternal(FileStatus fileStatus, FileSystem fs, Functional.F2<Boolean, FileStatus, FileSystem> evalFunc, int nestingLevel) throws IOException { if (pathFilter != null && !pathFilter.accept(fileStatus.getPath())) { if (LOG.isDebugEnabled()) { LOG.debug("Path Filter did not accept " + fileStatus.getPath() + ", skipping."); }/*from w w w . j av a 2 s .co m*/ return; } // Recursively walk subdirectories if (fileStatus.isDir()) { FileStatus[] statuses = fs.listStatus(fileStatus.getPath()); if (statuses != null) { for (FileStatus childStatus : statuses) { walkInternal(childStatus, fs, evalFunc, nestingLevel + 1); } } } // Finally, evaluate the current directory. try { evalFunc.eval(fileStatus, fs); } catch (RuntimeException e) { throw new IOException(e); } }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
public static boolean isValidFile(final FileSystem hdfs, final String path) throws IOException { FileStatus[] statuses = hdfs.listStatus(new Path(path)); return (statuses.length == 1 && !statuses[0].isDir() && statuses[0].getBlockSize() > 0L); }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
/** * @param result contains the list of FileStatus passed the filtering conditions; * @param fs//from ww w. j a v a 2s .co m * @param path * @param dirFilter : filter works on directories only; * @param fileFilter: filer works on files only; * @throws IOException */ public static void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter dirFilter, PathFilter fileFilter) throws IOException { FileStatus[] stats = fs.listStatus(path); if (stats != null) { for (FileStatus stat : stats) { if (stat.isDir() && dirFilter.accept(stat.getPath())) { addInputPathRecursively(result, fs, stat.getPath(), dirFilter, fileFilter); } else { if (fileFilter.accept(stat.getPath())) { result.add(stat); } } } } }
From source file:com.twitter.elephanttwin.util.HdfsUtils.java
License:Apache License
public static Iterable<Path> getSubdirectories(final boolean recursive, final String baseDirectory, final FileSystem hdfs) throws IOException { FileStatus[] fileStat;/*w w w . j a va 2 s . co m*/ Path basePath = new Path(baseDirectory); if (!hdfs.exists(basePath)) { throw new IOException( hdfs.getWorkingDirectory() + baseDirectory + " does not exist, cannot getSubdirectories"); } FileStatus status = hdfs.getFileStatus(basePath); if (!status.isDir()) { LOG.warning("tried to find subdirectories of " + status.getPath() + ", but it is a file"); return Lists.newArrayList(status.getPath()); } // get the stat on all files in the source directory fileStat = hdfs.listStatus(basePath); if (fileStat == null) { throw new IOException( "FileSystem.listStatus(" + basePath + ") returned null, cannot getSubdirectories"); } // get paths to the files in the source directory return Arrays.asList(FileUtil.stat2Paths(fileStat)); }
From source file:com.twitter.hraven.etl.FileLister.java
License:Apache License
/** * Recursively traverses the dirs to get the list of * files for a given path filtered as per the input path range filter * *///from w ww. j ava 2s. c om private static void traverseDirs(List<FileStatus> fileStatusesList, FileSystem hdfs, Path inputPath, JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter) throws IOException { // get all the files and dirs in the current dir FileStatus allFiles[] = hdfs.listStatus(inputPath); for (FileStatus aFile : allFiles) { if (aFile.isDir()) { //recurse here traverseDirs(fileStatusesList, hdfs, aFile.getPath(), jobFileModifiedRangePathFilter); } else { // check if the pathFilter is accepted for this file if (jobFileModifiedRangePathFilter.accept(aFile.getPath())) { fileStatusesList.add(aFile); } } } }
From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java
License:Apache License
/** * Walks the temporary directory structure to move (rename) files * to their final location./* ww w . j a va 2 s . com*/ */ private void moveResults(Path p, String rem, FileSystem fs) throws IOException { for (FileStatus fstat : fs.listStatus(p)) { Path src = fstat.getPath(); if (fstat.isDir()) { log.info("mkdir: " + src); fs.mkdirs(removePart(src, rem)); moveResults(fstat.getPath(), rem, fs); } else { Path dst = removePart(src, rem); log.info("mv: " + src + " " + dst); fs.rename(src, dst); } } }
From source file:com.uber.hoodie.cli.commands.HoodieLogFileCommand.java
License:Apache License
@CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files") public String showLogFileCommits( @CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final String logFilePathPattern, @CliOption(key = {// w ww. j ava 2s . c o m "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = { "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = { "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException { FileSystem fs = HoodieCLI.tableMetadata.getFs(); List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern))) .map(status -> status.getPath().toString()).collect(Collectors.toList()); Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = Maps .newHashMap(); int totalEntries = 0; int numCorruptBlocks = 0; int dummyInstantTimeCount = 0; for (String logFilePath : logFilePaths) { FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath)); Schema writerSchema = new AvroSchemaConverter().convert( SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath))); HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema); // read the avro blocks while (reader.hasNext()) { HoodieLogBlock n = reader.next(); String instantTime; int recordCount = 0; if (n instanceof HoodieCorruptBlock) { try { instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME); if (instantTime == null) { throw new Exception("Invalid instant time " + instantTime); } } catch (Exception e) { numCorruptBlocks++; instantTime = "corrupt_block_" + numCorruptBlocks; // could not read metadata for corrupt block } } else { instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME); if (instantTime == null) { // This can happen when reading archived commit files since they were written without any instant time dummyInstantTimeCount++; instantTime = "dummy_instant_time_" + dummyInstantTimeCount; } if (n instanceof HoodieAvroDataBlock) { recordCount = ((HoodieAvroDataBlock) n).getRecords().size(); } } if (commitCountAndMetadata.containsKey(instantTime)) { commitCountAndMetadata.get(instantTime).add(new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount)); totalEntries++; } else { List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list = new ArrayList<>(); list.add(new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount)); commitCountAndMetadata.put(instantTime, list); totalEntries++; } } reader.close(); } List<Comparable[]> rows = new ArrayList<>(); int i = 0; ObjectMapper objectMapper = new ObjectMapper(); for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata .entrySet()) { String instantTime = entry.getKey().toString(); for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry .getValue()) { Comparable[] output = new Comparable[5]; output[0] = instantTime; output[1] = tuple3._3(); output[2] = tuple3._1().toString(); output[3] = objectMapper.writeValueAsString(tuple3._2()._1()); output[4] = objectMapper.writeValueAsString(tuple3._2()._2()); rows.add(output); i++; } } TableHeader header = new TableHeader().addTableHeaderField("InstantTime").addTableHeaderField("RecordCount") .addTableHeaderField("BlockType").addTableHeaderField("HeaderMetadata") .addTableHeaderField("FooterMetadata"); return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows); }
From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java
License:Apache License
public String getFilenameForRecord(FileSystem fs, final HoodieRecord record, String fileId) { try {/* ww w . java 2 s .c o m*/ FileStatus[] files = fs.listStatus(new Path(basePath, record.getPartitionPath())); Map<String, List<FileStatus>> fileIdToVersions = groupFilesByFileId(files, commits.lastCommit()); // If the record is not found if (!fileIdToVersions.containsKey(fileId)) { throw new FileNotFoundException("Cannot find valid versions for fileId " + fileId); } List<FileStatus> statuses = fileIdToVersions.get(fileId); return statuses.get(0).getPath().getName(); } catch (IOException e) { throw new HoodieIOException("Could not get Filename for record " + record, e); } }
From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java
License:Apache License
/** * Get only the latest file in the partition with precondition commitTime(file) lt maxCommitTime * * @param fs/*from w w w .j a v a2 s . c o m*/ * @param partitionPathStr * @param maxCommitTime * @return */ public FileStatus[] getLatestVersionInPartition(FileSystem fs, String partitionPathStr, String maxCommitTime) { try { Path partitionPath = new Path(basePath, partitionPathStr); if (!fs.exists(partitionPath)) { return new FileStatus[0]; } FileStatus[] files = fs.listStatus(partitionPath); Map<String, List<FileStatus>> fileIdToVersions = groupFilesByFileId(files, commits.lastCommit()); HashMap<String, FileStatus> validFiles = new HashMap<>(); for (String fileId : fileIdToVersions.keySet()) { List<FileStatus> versions = fileIdToVersions.get(fileId); for (FileStatus file : versions) { String filename = file.getPath().getName(); String commitTime = FSUtils.getCommitTime(filename); if (HoodieCommits.isCommit1BeforeOrOn(commitTime, maxCommitTime)) { validFiles.put(fileId, file); break; } } } return validFiles.values().toArray(new FileStatus[validFiles.size()]); } catch (IOException e) { throw new HoodieIOException("Could not get latest versions in Partition " + partitionPathStr, e); } }