List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java
License:Apache License
/** * Get ALL the data files in partition grouped by fileId and sorted by the commitTime * Given a partition path, provide all the files with a list of their commits, sorted by commit time. *//*from w ww .ja v a 2s . c om*/ public Map<String, List<FileStatus>> getAllVersionsInPartition(FileSystem fs, String partitionPath) { try { FileStatus[] files = fs.listStatus(new Path(basePath, partitionPath)); return groupFilesByFileId(files, commits.lastCommit()); } catch (IOException e) { throw new HoodieIOException("Could not load all file versions in partition " + partitionPath, e); } }
From source file:com.uber.hoodie.common.model.TestHoodieTableMetadata.java
License:Apache License
@Test public void testGetOnlyLatestVersionFiles() throws Exception { // Put some files in the partition String fullPartitionPath = basePath + "/2016/05/01/"; new File(fullPartitionPath).mkdirs(); String commitTime1 = "20160501123032"; String commitTime2 = "20160502123032"; String commitTime3 = "20160503123032"; String commitTime4 = "20160504123032"; String fileId1 = UUID.randomUUID().toString(); String fileId2 = UUID.randomUUID().toString(); String fileId3 = UUID.randomUUID().toString(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile(); new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile(); new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile(); new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile(); new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile(); // Now we list the entire partition FileSystem fs = FSUtils.getFs(); FileStatus[] statuses = fs.listStatus(new Path(fullPartitionPath)); assertEquals(statuses.length, 7);//from w w w.jav a 2s.c om metadata = new HoodieTableMetadata(fs, basePath, "testTable"); FileStatus[] statuses1 = metadata.getLatestVersionInPartition(fs, "2016/05/01", commitTime4); assertEquals(statuses1.length, 3); Set<String> filenames = Sets.newHashSet(); for (FileStatus status : statuses1) { filenames.add(status.getPath().getName()); } assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1))); assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2))); assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3))); // Reset the max commit time FileStatus[] statuses2 = metadata.getLatestVersionInPartition(fs, "2016/05/01", commitTime3); assertEquals(statuses2.length, 3); filenames = Sets.newHashSet(); for (FileStatus status : statuses2) { filenames.add(status.getPath().getName()); } assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1))); assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2))); assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3))); }
From source file:com.uber.hoodie.common.table.log.HoodieLogFormatTest.java
License:Apache License
/** @Test//w ww . ja v a 2 s. co m public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException { Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1") .overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size1 = writer.getCurrentSize(); // do not close this writer - this simulates a data note appending to a log dying without closing the file // writer.close(); writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100") .withFs(fs).build(); records = SchemaTestUtil.generateTestRecords(0, 100); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size2 = writer.getCurrentSize(); assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1); assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match", size2, fs.getFileStatus(writer.getLogFile().getPath()).getLen()); writer.close(); } **/ @Test public void testAppendNotSupported() throws IOException, URISyntaxException, InterruptedException { // Use some fs like LocalFileSystem, that does not support appends Path localPartitionPath = new Path("file://" + partitionPath); FileSystem localFs = FSUtils.getFs(localPartitionPath.toString(), HoodieTestUtils.getDefaultHadoopConf()); Path testPath = new Path(localPartitionPath, "append_test"); localFs.mkdirs(testPath); // Some data & append two times. List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); for (int i = 0; i < 2; i++) { HoodieLogFormat.newWriterBuilder().onParentPath(testPath) .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive") .overBaseCommit("").withFs(localFs).build().appendBlock(dataBlock).close(); } // ensure there are two log file versions, with same data. FileStatus[] statuses = localFs.listStatus(testPath); assertEquals(2, statuses.length); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its * subdirs are skipped//from w ww . j a v a 2 s . c o m * @param fs File System * @param basePathStr Base-Path * @param consumer Callback for processing * @param excludeMetaFolder Exclude .hoodie folder * @throws IOException */ @VisibleForTesting static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer, boolean excludeMetaFolder) throws IOException { PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER; FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr)); for (int i = 0; i < topLevelStatuses.length; i++) { FileStatus child = topLevelStatuses[i]; if (child.isFile()) { boolean success = consumer.apply(child); if (!success) { throw new HoodieException("Failed to process file-status=" + child); } } else if (pathFilter.accept(child.getPath())) { RemoteIterator<LocatedFileStatus> itr = fs.listFiles(child.getPath(), true); while (itr.hasNext()) { FileStatus status = itr.next(); boolean success = consumer.apply(status); if (!success) { throw new HoodieException("Failed to process file-status=" + status); } } } } }
From source file:com.uber.hoodie.utilities.HoodieDeltaStreamer.java
License:Apache License
private String findLastCommitPulled(FileSystem fs, String dataPath) throws IOException { FileStatus[] commitTimePaths = fs.listStatus(new Path(dataPath)); List<String> commitTimes = new ArrayList<>(commitTimePaths.length); for (FileStatus commitTimePath : commitTimePaths) { String[] splits = commitTimePath.getPath().toString().split("/"); commitTimes.add(splits[splits.length - 1]); }/*ww w . j ava 2 s .c om*/ Collections.sort(commitTimes); Collections.reverse(commitTimes); log.info("Retrieved commit times " + commitTimes); return commitTimes.get(0); }
From source file:com.yolodata.tbana.testutils.HadoopFileTestUtils.java
License:Open Source License
public static String readMapReduceOutputFile(FileSystem fs, Path outputPath) throws IOException, IllegalAccessException, InstantiationException { FileStatus[] fileStatuses = fs.listStatus(outputPath); StringBuilder sb = new StringBuilder(); for (FileStatus f : fileStatuses) { if (f.getPath().toString().endsWith("_SUCCESS")) continue; // skip SUCCESS file String partFileContent = readContentFromLocalFile(f.getPath().toUri()); if (partFileContent != null) sb.append(partFileContent);/* www . j av a2s. c o m*/ } return sb.toString(); }
From source file:com.zhangyue.zeus.service.impl.ExportDataServiceImpl.java
License:Open Source License
@Override public List<String> readResultData(String hdfsResultLocation, boolean isDownload) { List<String> resultList = new ArrayList<String>(); int readedLines = 0; String temp = null;//from www . j a v a2 s .co m try { Path hdfsPath = new Path(hdfsResultLocation); HiveConf hiveConf = new HiveConf(SessionState.class); FileSystem fs = hdfsPath.getFileSystem(hiveConf); FileStatus[] fss = fs.listStatus(hdfsPath); // ??-60000 ?-200 int numberLimit = (isDownload == true) ? Constants.HIVE_RESULT_DOWNLOAD_LIMIT : Constants.HIVE_RESULT_MAX_LIMIT; for (FileStatus fileStatus : fss) { Path fsPath = fileStatus.getPath(); if (readedLines >= numberLimit || fs.getFileStatus(fsPath).isDir()) { break; } BufferedReader bf = new BufferedReader( new InputStreamReader(fs.open(fsPath), Constants.UTF_ENCODING)); while ((temp = bf.readLine()) != null) { if (readedLines >= numberLimit) { break; } resultList.add(temp); readedLines++; } bf.close(); } FileSystem.closeAll(); } catch (Exception e) { LOG.error("don't had your result", e); } return resultList; }
From source file:contrail.stages.GraphStats.java
License:Open Source License
/** * Create an iterator to iterate over the output of the MR job. * @return/*w w w. j av a 2s. com*/ */ protected AvroFileContentsIterator<GraphStatsData> createOutputIterator() { String outputDir = (String) stage_options.get("outputpath"); ArrayList<String> files = new ArrayList<String>(); FileSystem fs = null; try { Path outputPath = new Path(outputDir); fs = FileSystem.get(this.getConf()); for (FileStatus status : fs.listStatus(outputPath)) { String fileName = status.getPath().getName(); if (fileName.startsWith("part-") && fileName.endsWith("avro")) { files.add(status.getPath().toString()); } } } catch (IOException e) { throw new RuntimeException("Can't get filesystem: " + e.getMessage()); } return new AvroFileContentsIterator<GraphStatsData>(files, getConf()); }
From source file:contrail.util.FileHelper.java
License:Open Source License
/** * Function moves the contents of old_path into new_path. This is used * to save the final graph./*w ww. ja v a2 s .c om*/ * @param oldPath * @param newPath */ static public void moveDirectoryContents(Configuration conf, String oldPath, String newPath) { // We can't invoke rename directly on old path because it ends up // making old_path a subdirectory of new_path. FileSystem fs = null; try { fs = FileSystem.get(conf); } catch (IOException e) { throw new RuntimeException("Can't get filesystem: " + e.getMessage()); } try { Path oldPathObject = new Path(oldPath); for (FileStatus status : fs.listStatus(oldPathObject)) { Path oldFile = status.getPath(); Path newFile = new Path(newPath, oldFile.getName()); fs.rename(oldFile, newFile); } } catch (IOException e) { throw new RuntimeException("Problem moving the files: " + e.getMessage()); } }
From source file:corner.hadoop.services.impl.HdfsAccessorProxy.java
License:Apache License
/** * @see corner.hadoop.services.impl.AccessorProxy#list(java.lang.String) *//*from w ww . ja va 2s . c o m*/ @Override public List<FileDesc> list(final String path) throws IOException { String _path = path; if (path.endsWith("/")) { _path = path.substring(0, path.length() - 1); } Path dstPath = new Path(_path); FileSystem dstFs = dstPath.getFileSystem(getConf()); FileStatus _dstStatus = dstFs.getFileStatus(dstPath); if (_dstStatus == null) { throw new IllegalArgumentException("The path [" + path + "] dose not exist."); } if (!_dstStatus.isDir()) { throw new IllegalArgumentException("The path [" + path + "] is not dir."); } FileStatus[] fileStatus = dstFs.listStatus(dstPath); if (fileStatus != null && fileStatus.length > 0) { List<FileDesc> ret = new LinkedList<FileDesc>(); for (FileStatus status : fileStatus) { ret.add(new FileDesc(_path + "/" + status.getPath().getName(), status.isDir(), new Timestamp(status.getModificationTime()), status.getLen())); } return ret; } return null; }