Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.mozilla.grouperfish.transforms.coclustering.text.Dictionary.java

License:Apache License

public static Map<Integer, String> loadInvertedIndexWithKeys(FileSystem fs, Path dictionaryPath)
        throws IOException {
    Map<Integer, String> index = null;
    if (dictionaryPath != null) {
        index = new HashMap<Integer, String>();
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        String[] pair = line.split("\t");
                        index.put(Integer.parseInt(pair[0]), pair[1].trim());
                    }/*w w w .ja va2s  .  c  om*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + index.size());
    }

    return index;
}

From source file:com.mozilla.hadoop.Backup.java

License:Apache License

/**
 * Walk recursively to get all file paths up to a max depth
 * @param fs//from w  w w.  j av  a  2  s . c om
 * @param inputPath
 * @param depth
 * @param maxDepth
 * @return
 * @throws IOException
 */
public static List<Path> getPaths(FileSystem fs, Path inputPath, int depth, int maxDepth) throws IOException {
    List<Path> retPaths = new ArrayList<Path>();
    for (FileStatus status : fs.listStatus(inputPath)) {
        if (status.isDir() && (maxDepth == -1 || depth < maxDepth)) {
            retPaths.addAll(getPaths(fs, status.getPath(), depth + 1, maxDepth));
        } else {
            retPaths.add(status.getPath());
        }
    }

    return retPaths;
}

From source file:com.mozilla.hadoop.UnknownPathFinder.java

License:Apache License

/**
 * Walk recursively to get all file paths up to a max depth
 * @param fs//  ww w. ja v  a  2 s .co m
 * @param inputPath
 * @param depth
 * @param maxDepth
 * @return
 * @throws IOException
 */
public static Set<String> getAllPaths(FileSystem fs, Path inputPath, int depth, int maxDepth)
        throws IOException {
    Set<String> retPaths = new HashSet<String>();
    for (FileStatus status : fs.listStatus(inputPath)) {
        if (status.isDir() && depth < maxDepth) {
            retPaths.addAll(getAllPaths(fs, status.getPath(), depth + 1, maxDepth));
        } else {
            String p = status.getPath().toString();
            if (!p.contains("-ROOT-") && !p.contains(".META.") && !p.contains(".logs")
                    && !p.contains(".regioninfo") && !p.contains("compaction.dir")
                    && !p.contains("hbase.version")) {
                retPaths.add(p);
            }
        }
    }

    return retPaths;
}

From source file:com.mozilla.socorro.hadoop.RawDumpSize.java

License:LGPL

public int run(String[] args) throws Exception {
    if (args.length != 1) {
        return printUsage();
    }/*www. j a  v  a  2s . c o  m*/

    int rc = -1;
    Job job = initJob(args);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        rc = 0;
        FileSystem hdfs = null;
        DescriptiveStatistics rawStats = new DescriptiveStatistics();
        long rawTotal = 0L;
        DescriptiveStatistics processedStats = new DescriptiveStatistics();
        long processedTotal = 0L;
        try {
            hdfs = FileSystem.get(job.getConfiguration());
            Pattern tabPattern = Pattern.compile("\t");
            for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) {
                if (!status.isDir()) {
                    BufferedReader reader = null;
                    try {
                        reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                        String line = null;
                        while ((line = reader.readLine()) != null) {
                            String[] splits = tabPattern.split(line);
                            int byteSize = Integer.parseInt(splits[2]);
                            if ("raw".equals(splits[1])) {
                                rawStats.addValue(byteSize);
                                rawTotal += byteSize;
                            } else if ("processed".equals(splits[1])) {
                                processedStats.addValue(byteSize);
                                processedTotal += byteSize;
                            }
                        }
                    } finally {
                        if (reader != null) {
                            reader.close();
                        }
                    }
                }
            }
        } finally {
            if (hdfs != null) {
                hdfs.close();
            }
        }

        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(),
                rawStats.getMax(), rawStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + rawTotal);
        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(),
                processedStats.getMax(), processedStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d),
                processedStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + processedTotal);
    }

    return rc;
}

From source file:com.mvdb.platform.action.VersionMerge.java

License:Apache License

private static void buildInputPathList(FileSystem fileSystem, Path topPath, List<Path> pathList,
        String lastMergedDirName, String lastcopiedDirName) throws IOException {
    FileStatus topPathStatus = fileSystem.getFileStatus(topPath);
    if (topPathStatus.isDir() == false) {
        String topPathFullName = topPath.toString();
        String[] tokens = topPathFullName.split("/");
        String fileName = tokens[tokens.length - 1];
        if (fileName.startsWith("data-") && fileName.endsWith(".dat")) {
            String timeStamp = tokens[tokens.length - 2];
            if (timeStamp.compareTo(lastMergedDirName) > 0 && timeStamp.compareTo(lastcopiedDirName) <= 0) {
                pathList.add(topPath);// w  w w  .  j ava2  s  . com
            }
        }
        return; //This is a leaf
    }

    FileStatus[] fsArray = fileSystem.listStatus(topPath);
    for (FileStatus fileStatus : fsArray) {
        Path path = fileStatus.getPath();
        buildInputPathList(fileSystem, path, pathList, lastMergedDirName, lastcopiedDirName);
    }
}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;//from   www  .j  a v a2  s  .c  o  m
    FileSystem fs = null;
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}

From source file:com.mycompany.movehdfstohbase.MoveHdfsToHbase.java

private static void putData() throws IOException {
    List<Put> putList = new LinkedList();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] status = fs.listStatus(new Path("/page"));
    int counter = 0;
    Table table = connection.getTable(TableName.valueOf(TABLE_NAME));

    for (FileStatus f : status) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(f.getPath())));
        Put put = new Put(Bytes.toBytes("row" + (++counter)));
        put.addColumn(Bytes.toBytes("url"), null, Bytes.toBytes(br.readLine())); // url
        put.addColumn(Bytes.toBytes("title"), null, Bytes.toBytes(br.readLine())); // title
        put.addColumn(Bytes.toBytes("body"), null, Bytes.toBytes(br.readLine())); // body
        putList.add(put);//from  ww  w. j a  v a  2s.  com
    }

    table.put(putList);

    table.close();

}

From source file:com.nearinfinity.blur.mapreduce.BlurTask.java

License:Apache License

public int getNumReducers(Configuration configuration) {
    Path tablePath = new Path(_tableDescriptor.tableUri);
    try {//from   w  w w  . j  a va 2s  .c o m
        int num = _tableDescriptor.shardCount;
        FileSystem fileSystem = FileSystem.get(tablePath.toUri(), configuration);
        if (!fileSystem.exists(tablePath)) {
            return num;
        }
        FileStatus[] files = fileSystem.listStatus(tablePath);
        int shardCount = 0;
        for (FileStatus fileStatus : files) {
            if (fileStatus.isDir()) {
                String name = fileStatus.getPath().getName();
                if (name.startsWith(BlurConstants.SHARD_PREFIX)) {
                    shardCount++;
                }
            }
        }

        if (shardCount == 0) {
            return num;
        }
        if (shardCount != num) {
            LOG.warn("Asked for " + num + " reducers, but existing table " + _tableDescriptor.name + " has "
                    + shardCount + " shards. Using " + shardCount + " reducers");
        }
        return shardCount;
    } catch (IOException e) {
        throw new RuntimeException("Unable to connect to filesystem", e);
    }
}

From source file:com.nearinfinity.blur.utils.BlurUtil.java

License:Apache License

public static void validateShardCount(int shardCount, FileSystem fileSystem, Path tablePath)
        throws IOException {
    FileStatus[] listStatus = fileSystem.listStatus(tablePath);
    if (listStatus.length != shardCount) {
        LOG.error("Number of directories in table path [" + tablePath + "] does not match definition of ["
                + shardCount + "] shard count.");
        throw new RuntimeException("Number of directories in table path [" + tablePath
                + "] does not match definition of [" + shardCount + "] shard count.");
    }/*from   w w w  .  ja  va2s . c om*/
}

From source file:com.netflix.Aegisthus.java

License:Apache License

protected List<Path> getDataFiles(Configuration conf, String dir) throws IOException {
    Set<String> globs = Sets.newHashSet();
    List<Path> output = Lists.newArrayList();
    Path dirPath = new Path(dir);
    FileSystem fs = dirPath.getFileSystem(conf);
    List<FileStatus> input = Lists.newArrayList(fs.listStatus(dirPath));
    for (String path : DirectoryWalker.with(conf).threaded().addAllStatuses(input).pathsString()) {
        if (path.endsWith("-Data.db")) {
            globs.add(path.replaceAll("[^/]+-Data.db", "*-Data.db"));
        }/*ww  w  . j a v a  2  s.c  om*/
    }
    for (String path : globs) {
        output.add(new Path(path));
    }
    return output;
}