Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.mozilla.grouperfish.transforms.coclustering.text.Dictionary.java

License:Apache License

public static Map<Integer, String> loadInvertedIndexWithKeys(FileSystem fs, Path dictionaryPath)
        throws IOException {
    Map<Integer, String> index = null;
    if (dictionaryPath != null) {
        index = new HashMap<Integer, String>();
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        String[] pair = line.split("\t");
                        index.put(Integer.parseInt(pair[0]), pair[1].trim());
                    }/*w w w .ja va2s  .  c  om*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + index.size());
    }

    return index;
}

From source file:com.mozilla.hadoop.Backup.java

License:Apache License

/**
 * Walk recursively to get all file paths up to a max depth
 * @param fs//from w  w w.  j av  a  2  s . c om
 * @param inputPath
 * @param depth
 * @param maxDepth
 * @return
 * @throws IOException
 */
public static List<Path> getPaths(FileSystem fs, Path inputPath, int depth, int maxDepth) throws IOException {
    List<Path> retPaths = new ArrayList<Path>();
    for (FileStatus status : fs.listStatus(inputPath)) {
        if (status.isDir() && (maxDepth == -1 || depth < maxDepth)) {
            retPaths.addAll(getPaths(fs, status.getPath(), depth + 1, maxDepth));
        } else {
            retPaths.add(status.getPath());
        }
    }

    return retPaths;
}

From source file:com.mozilla.hadoop.UnknownPathFinder.java

License:Apache License

/**
 * Walk recursively to get all file paths up to a max depth
 * @param fs//  ww w. ja v  a  2 s .co m
 * @param inputPath
 * @param depth
 * @param maxDepth
 * @return
 * @throws IOException
 */
public static Set<String> getAllPaths(FileSystem fs, Path inputPath, int depth, int maxDepth)
        throws IOException {
    Set<String> retPaths = new HashSet<String>();
    for (FileStatus status : fs.listStatus(inputPath)) {
        if (status.isDir() && depth < maxDepth) {
            retPaths.addAll(getAllPaths(fs, status.getPath(), depth + 1, maxDepth));
        } else {
            String p = status.getPath().toString();
            if (!p.contains("-ROOT-") && !p.contains(".META.") && !p.contains(".logs")
                    && !p.contains(".regioninfo") && !p.contains("compaction.dir")
                    && !p.contains("hbase.version")) {
                retPaths.add(p);
            }
        }
    }

    return retPaths;
}

From source file:com.mozilla.socorro.hadoop.RawDumpSize.java

License:LGPL

public int run(String[] args) throws Exception {
    if (args.length != 1) {
        return printUsage();
    }/*www. j a  v  a  2s . c o  m*/

    int rc = -1;
    Job job = initJob(args);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        rc = 0;
        FileSystem hdfs = null;
        DescriptiveStatistics rawStats = new DescriptiveStatistics();
        long rawTotal = 0L;
        DescriptiveStatistics processedStats = new DescriptiveStatistics();
        long processedTotal = 0L;
        try {
            hdfs = FileSystem.get(job.getConfiguration());
            Pattern tabPattern = Pattern.compile("\t");
            for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) {
                if (!status.isDir()) {
                    BufferedReader reader = null;
                    try {
                        reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                        String line = null;
                        while ((line = reader.readLine()) != null) {
                            String[] splits = tabPattern.split(line);
                            int byteSize = Integer.parseInt(splits[2]);
                            if ("raw".equals(splits[1])) {
                                rawStats.addValue(byteSize);
                                rawTotal += byteSize;
                            } else if ("processed".equals(splits[1])) {
                                processedStats.addValue(byteSize);
                                processedTotal += byteSize;
                            }
                        }
                    } finally {
                        if (reader != null) {
                            reader.close();
                        }
                    }
                }
            }
        } finally {
            if (hdfs != null) {
                hdfs.close();
            }
        }

        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(),
                rawStats.getMax(), rawStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + rawTotal);
        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(),
                processedStats.getMax(), processedStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d),
                processedStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + processedTotal);
    }

    return rc;
}

From source file:com.mvdb.platform.action.VersionMerge.java

License:Apache License

private static void buildInputPathList(FileSystem fileSystem, Path topPath, List<Path> pathList,
        String lastMergedDirName, String lastcopiedDirName) throws IOException {
    FileStatus topPathStatus = fileSystem.getFileStatus(topPath);
    if (topPathStatus.isDir() == false) {
        String topPathFullName = topPath.toString();
        String[] tokens = topPathFullName.split("/");
        String fileName = tokens[tokens.length - 1];
        if (fileName.startsWith("data-") && fileName.endsWith(".dat")) {
            String timeStamp = tokens[tokens.length - 2];
            if (timeStamp.compareTo(lastMergedDirName) > 0 && timeStamp.compareTo(lastcopiedDirName) <= 0) {
                pathList.add(topPath);// w  w w  .  j ava2  s  . com
            }
        }
        return; //This is a leaf
    }

    FileStatus[] fsArray = fileSystem.listStatus(topPath);
    for (FileStatus fileStatus : fsArray) {
        Path path = fileStatus.getPath();
        buildInputPathList(fileSystem, path, pathList, lastMergedDirName, lastcopiedDirName);
    }
}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;//from   www  .j  a v a2  s  .c  o  m
    FileSystem fs = null;
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}

From source file:com.mycompany.movehdfstohbase.MoveHdfsToHbase.java

private static void putData() throws IOException {
    List<Put> putList = new LinkedList();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] status = fs.listStatus(new Path("/page"));
    int counter = 0;
    Table table = connection.getTable(TableName.valueOf(TABLE_NAME));

    for (FileStatus f : status) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(f.getPath())));
        Put put = new Put(Bytes.toBytes("row" + (++counter)));
        put.addColumn(Bytes.toBytes("url"), null, Bytes.toBytes(br.readLine())); // url
        put.addColumn(Bytes.toBytes("title"), null, Bytes.toBytes(br.readLine())); // title
        put.addColumn(Bytes.toBytes("body"), null, Bytes.toBytes(br.readLine())); // body
        putList.add(put);//from  ww  w. j a  v a  2s.  com
    }

    table.put(putList);

    table.close();

}

From source file:com.nearinfinity.blur.mapreduce.BlurTask.java

License:Apache License

public int getNumReducers(Configuration configuration) {
    Path tablePath = new Path(_tableDescriptor.tableUri);
    try {//from   w  w w  . j  a va 2s  .c o m
        int num = _tableDescriptor.shardCount;
        FileSystem fileSystem = FileSystem.get(tablePath.toUri(), configuration);
        if (!fileSystem.exists(tablePath)) {
            return num;
        }
        FileStatus[] files = fileSystem.listStatus(tablePath);
        int shardCount = 0;
        for (FileStatus fileStatus : files) {
            if (fileStatus.isDir()) {
                String name = fileStatus.getPath().getName();
                if (name.startsWith(BlurConstants.SHARD_PREFIX)) {
                    shardCount++;
                }
            }
        }

        if (shardCount == 0) {
            return num;
        }
        if (shardCount != num) {
            LOG.warn("Asked for " + num + " reducers, but existing table " + _tableDescriptor.name + " has "
                    + shardCount + " shards. Using " + shardCount + " reducers");
        }
        return shardCount;
    } catch (IOException e) {
        throw new RuntimeException("Unable to connect to filesystem", e);
    }
}

From source file:com.nearinfinity.blur.utils.BlurUtil.java

License:Apache License

public static void validateShardCount(int shardCount, FileSystem fileSystem, Path tablePath)
        throws IOException {
    FileStatus[] listStatus = fileSystem.listStatus(tablePath);
    if (listStatus.length != shardCount) {
        LOG.error("Number of directories in table path [" + tablePath + "] does not match definition of ["
                + shardCount + "] shard count.");
        throw new RuntimeException("Number of directories in table path [" + tablePath
                + "] does not match definition of [" + shardCount + "] shard count.");
    }/*from   w w w  .  ja  va2s . c om*/
}

From source file:com.netflix.Aegisthus.java

License:Apache License

protected List<Path> getDataFiles(Configuration conf, String dir) throws IOException {
    Set<String> globs = Sets.newHashSet();
    List<Path> output = Lists.newArrayList();
    Path dirPath = new Path(dir);
    FileSystem fs = dirPath.getFileSystem(conf);
    List<FileStatus> input = Lists.newArrayList(fs.listStatus(dirPath));
    for (String path : DirectoryWalker.with(conf).threaded().addAllStatuses(input).pathsString()) {
        if (path.endsWith("-Data.db")) {
            globs.add(path.replaceAll("[^/]+-Data.db", "*-Data.db"));
        }/*ww  w  . j a v a  2  s.c  om*/
    }
    for (String path : globs) {
        output.add(new Path(path));
    }
    return output;
}