Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java

License:Apache License

/**
 * Get ALL the data files in partition grouped by fileId and sorted by the commitTime
 * Given a partition path, provide all the files with a list of their commits, sorted by commit time.
 *//*from   w  ww .ja  v a  2s . c  om*/
public Map<String, List<FileStatus>> getAllVersionsInPartition(FileSystem fs, String partitionPath) {
    try {
        FileStatus[] files = fs.listStatus(new Path(basePath, partitionPath));
        return groupFilesByFileId(files, commits.lastCommit());
    } catch (IOException e) {
        throw new HoodieIOException("Could not load all file versions in partition " + partitionPath, e);
    }
}

From source file:com.uber.hoodie.common.model.TestHoodieTableMetadata.java

License:Apache License

@Test
public void testGetOnlyLatestVersionFiles() throws Exception {
    // Put some files in the partition
    String fullPartitionPath = basePath + "/2016/05/01/";
    new File(fullPartitionPath).mkdirs();
    String commitTime1 = "20160501123032";
    String commitTime2 = "20160502123032";
    String commitTime3 = "20160503123032";
    String commitTime4 = "20160504123032";
    String fileId1 = UUID.randomUUID().toString();
    String fileId2 = UUID.randomUUID().toString();
    String fileId3 = UUID.randomUUID().toString();

    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile();
    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile();
    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile();
    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile();
    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile();
    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile();
    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile();

    new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
    new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
    new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
    new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();

    // Now we list the entire partition
    FileSystem fs = FSUtils.getFs();
    FileStatus[] statuses = fs.listStatus(new Path(fullPartitionPath));
    assertEquals(statuses.length, 7);//from w w  w.jav  a 2s.c om

    metadata = new HoodieTableMetadata(fs, basePath, "testTable");
    FileStatus[] statuses1 = metadata.getLatestVersionInPartition(fs, "2016/05/01", commitTime4);
    assertEquals(statuses1.length, 3);
    Set<String> filenames = Sets.newHashSet();
    for (FileStatus status : statuses1) {
        filenames.add(status.getPath().getName());
    }
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3)));

    // Reset the max commit time
    FileStatus[] statuses2 = metadata.getLatestVersionInPartition(fs, "2016/05/01", commitTime3);
    assertEquals(statuses2.length, 3);
    filenames = Sets.newHashSet();
    for (FileStatus status : statuses2) {
        filenames.add(status.getPath().getName());
    }
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1)));
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
}

From source file:com.uber.hoodie.common.table.log.HoodieLogFormatTest.java

License:Apache License

/**
@Test//w  ww . ja  v a  2 s.  co  m
public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException {
  Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
.overBaseCommit("100").withFs(fs).build();
  List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
  Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  long size1 = writer.getCurrentSize();
  // do not close this writer - this simulates a data note appending to a log dying without closing the file
  // writer.close();
        
  writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
.withFs(fs).build();
  records = SchemaTestUtil.generateTestRecords(0, 100);
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  dataBlock = new HoodieAvroDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  long size2 = writer.getCurrentSize();
  assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1);
  assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match", size2,
fs.getFileStatus(writer.getLogFile().getPath()).getLen());
  writer.close();
}
**/

@Test
public void testAppendNotSupported() throws IOException, URISyntaxException, InterruptedException {
    // Use some fs like LocalFileSystem, that does not support appends
    Path localPartitionPath = new Path("file://" + partitionPath);
    FileSystem localFs = FSUtils.getFs(localPartitionPath.toString(), HoodieTestUtils.getDefaultHadoopConf());
    Path testPath = new Path(localPartitionPath, "append_test");
    localFs.mkdirs(testPath);

    // Some data & append two times.
    List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
    Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);

    for (int i = 0; i < 2; i++) {
        HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
                .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
                .overBaseCommit("").withFs(localFs).build().appendBlock(dataBlock).close();
    }

    // ensure there are two log file versions, with same data.
    FileStatus[] statuses = localFs.listStatus(testPath);
    assertEquals(2, statuses.length);
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its
 * subdirs are skipped//from  w ww .  j  a  v  a 2 s . c o  m
 * @param fs           File System
 * @param basePathStr  Base-Path
 * @param consumer     Callback for processing
 * @param excludeMetaFolder Exclude .hoodie folder
 * @throws IOException
 */
@VisibleForTesting
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
        boolean excludeMetaFolder) throws IOException {
    PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
    FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
    for (int i = 0; i < topLevelStatuses.length; i++) {
        FileStatus child = topLevelStatuses[i];
        if (child.isFile()) {
            boolean success = consumer.apply(child);
            if (!success) {
                throw new HoodieException("Failed to process file-status=" + child);
            }
        } else if (pathFilter.accept(child.getPath())) {
            RemoteIterator<LocatedFileStatus> itr = fs.listFiles(child.getPath(), true);
            while (itr.hasNext()) {
                FileStatus status = itr.next();
                boolean success = consumer.apply(status);
                if (!success) {
                    throw new HoodieException("Failed to process file-status=" + status);
                }
            }
        }
    }
}

From source file:com.uber.hoodie.utilities.HoodieDeltaStreamer.java

License:Apache License

private String findLastCommitPulled(FileSystem fs, String dataPath) throws IOException {
    FileStatus[] commitTimePaths = fs.listStatus(new Path(dataPath));
    List<String> commitTimes = new ArrayList<>(commitTimePaths.length);
    for (FileStatus commitTimePath : commitTimePaths) {
        String[] splits = commitTimePath.getPath().toString().split("/");
        commitTimes.add(splits[splits.length - 1]);
    }/*ww w  .  j ava  2  s .c om*/
    Collections.sort(commitTimes);
    Collections.reverse(commitTimes);
    log.info("Retrieved commit times " + commitTimes);
    return commitTimes.get(0);
}

From source file:com.yolodata.tbana.testutils.HadoopFileTestUtils.java

License:Open Source License

public static String readMapReduceOutputFile(FileSystem fs, Path outputPath)
        throws IOException, IllegalAccessException, InstantiationException {
    FileStatus[] fileStatuses = fs.listStatus(outputPath);
    StringBuilder sb = new StringBuilder();
    for (FileStatus f : fileStatuses) {
        if (f.getPath().toString().endsWith("_SUCCESS"))
            continue; // skip SUCCESS file
        String partFileContent = readContentFromLocalFile(f.getPath().toUri());
        if (partFileContent != null)
            sb.append(partFileContent);/*  www  .  j av  a2s.  c o m*/
    }

    return sb.toString();
}

From source file:com.zhangyue.zeus.service.impl.ExportDataServiceImpl.java

License:Open Source License

@Override
public List<String> readResultData(String hdfsResultLocation, boolean isDownload) {
    List<String> resultList = new ArrayList<String>();
    int readedLines = 0;
    String temp = null;//from www .  j a v a2 s .co  m
    try {
        Path hdfsPath = new Path(hdfsResultLocation);
        HiveConf hiveConf = new HiveConf(SessionState.class);
        FileSystem fs = hdfsPath.getFileSystem(hiveConf);
        FileStatus[] fss = fs.listStatus(hdfsPath);
        // ??-60000 ?-200
        int numberLimit = (isDownload == true) ? Constants.HIVE_RESULT_DOWNLOAD_LIMIT
                : Constants.HIVE_RESULT_MAX_LIMIT;
        for (FileStatus fileStatus : fss) {
            Path fsPath = fileStatus.getPath();
            if (readedLines >= numberLimit || fs.getFileStatus(fsPath).isDir()) {
                break;
            }
            BufferedReader bf = new BufferedReader(
                    new InputStreamReader(fs.open(fsPath), Constants.UTF_ENCODING));
            while ((temp = bf.readLine()) != null) {
                if (readedLines >= numberLimit) {
                    break;
                }
                resultList.add(temp);
                readedLines++;
            }
            bf.close();
        }
        FileSystem.closeAll();
    } catch (Exception e) {
        LOG.error("don't had your result", e);
    }
    return resultList;
}

From source file:contrail.stages.GraphStats.java

License:Open Source License

/**
 * Create an iterator to iterate over the output of the MR job.
 * @return/*w  w  w. j av  a 2s.  com*/
 */
protected AvroFileContentsIterator<GraphStatsData> createOutputIterator() {
    String outputDir = (String) stage_options.get("outputpath");
    ArrayList<String> files = new ArrayList<String>();
    FileSystem fs = null;
    try {
        Path outputPath = new Path(outputDir);
        fs = FileSystem.get(this.getConf());
        for (FileStatus status : fs.listStatus(outputPath)) {
            String fileName = status.getPath().getName();
            if (fileName.startsWith("part-") && fileName.endsWith("avro")) {
                files.add(status.getPath().toString());
            }
        }
    } catch (IOException e) {
        throw new RuntimeException("Can't get filesystem: " + e.getMessage());
    }
    return new AvroFileContentsIterator<GraphStatsData>(files, getConf());
}

From source file:contrail.util.FileHelper.java

License:Open Source License

/**
 * Function moves the contents of old_path into new_path. This is used
 * to save the final graph./*w ww. ja  v  a2 s .c om*/
 * @param oldPath
 * @param newPath
 */
static public void moveDirectoryContents(Configuration conf, String oldPath, String newPath) {
    // We can't invoke rename directly on old path because it ends up
    // making old_path a subdirectory of new_path.
    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
    } catch (IOException e) {
        throw new RuntimeException("Can't get filesystem: " + e.getMessage());
    }
    try {
        Path oldPathObject = new Path(oldPath);
        for (FileStatus status : fs.listStatus(oldPathObject)) {
            Path oldFile = status.getPath();
            Path newFile = new Path(newPath, oldFile.getName());
            fs.rename(oldFile, newFile);
        }
    } catch (IOException e) {
        throw new RuntimeException("Problem moving the files: " + e.getMessage());
    }
}

From source file:corner.hadoop.services.impl.HdfsAccessorProxy.java

License:Apache License

/**
 * @see corner.hadoop.services.impl.AccessorProxy#list(java.lang.String)
 *//*from   w  ww  .  ja va 2s . c  o m*/
@Override
public List<FileDesc> list(final String path) throws IOException {
    String _path = path;
    if (path.endsWith("/")) {
        _path = path.substring(0, path.length() - 1);
    }
    Path dstPath = new Path(_path);
    FileSystem dstFs = dstPath.getFileSystem(getConf());
    FileStatus _dstStatus = dstFs.getFileStatus(dstPath);
    if (_dstStatus == null) {
        throw new IllegalArgumentException("The path [" + path + "] dose not exist.");
    }
    if (!_dstStatus.isDir()) {
        throw new IllegalArgumentException("The path [" + path + "] is not dir.");
    }
    FileStatus[] fileStatus = dstFs.listStatus(dstPath);
    if (fileStatus != null && fileStatus.length > 0) {
        List<FileDesc> ret = new LinkedList<FileDesc>();
        for (FileStatus status : fileStatus) {
            ret.add(new FileDesc(_path + "/" + status.getPath().getName(), status.isDir(),
                    new Timestamp(status.getModificationTime()), status.getLen()));
        }
        return ret;
    }
    return null;
}