Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.asakusafw.cleaner.main.HDFSCleaner.java

License:Apache License

/**
 * ???// ww  w. jav  a 2  s .  com
 * @param fs 
 * @param path 
 * @return ?
 * @throws IOException
 */
private FileStatus[] getListStatus(FileSystem fs, Path path) throws IOException {
    FileStatus[] status;
    try {
        status = fs.listStatus(path);
    } catch (FileNotFoundException e) {
        status = null;
    }
    if (status == null) {
        status = new FileStatus[0];
    }
    return status;
}

From source file:com.asakusafw.operation.tools.hadoop.fs.Clean.java

License:Apache License

private boolean remove(FileSystem fs, FileStatus file, Context context) {
    LOG.debug("Attempt to remove {}", file.getPath()); //$NON-NLS-1$
    boolean isSymlink = context.isSymlink(fs, file);
    if (isSymlink) {
        LOG.error(MessageFormat.format("[OT-CLEAN-W01001] Symlink is currenty not supported: {0}",
                file.getPath()));// www.  j a va2s .com
        context.setError();
        return false;
    }
    if (file.isDir()) {
        if (context.isRecursive()) {
            List<FileStatus> children;
            try {
                children = asList(fs.listStatus(file.getPath()));
            } catch (IOException e) {
                LOG.error(
                        MessageFormat.format("[OT-CLEAN-E01003] Failed to list directory: {0}", file.getPath()),
                        e);
                context.setError();
                return false;
            }
            boolean deleteChildren = true;
            for (FileStatus child : children) {
                deleteChildren &= remove(fs, child, context);
            }
            if (deleteChildren == false) {
                LOG.info(MessageFormat.format("[OT-CLEAN-I01004] Skipped: {0} (is no-empty directory)",
                        file.getPath(), new Date(file.getModificationTime())));
                return false;
            }
        } else {
            LOG.info(MessageFormat.format("[OT-CLEAN-I01003] Skipped: {0} (is directory)", file.getPath(),
                    new Date(file.getModificationTime())));
            return false;
        }
    }
    if (context.canDelete(file)) {
        LOG.debug("Removing {}", file.getPath()); //$NON-NLS-1$
        if (context.isDryRun() == false) {
            try {
                boolean removed = fs.delete(file.getPath(), false);
                if (removed == false) {
                    LOG.error(MessageFormat.format("[OT-CLEAN-E01004] Failed to remove: {0}", file.getPath()));
                    context.setError();
                    return false;
                }
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("[OT-CLEAN-E01004] Failed to remove: {0}", file.getPath()), e);
                context.setError();
                return false;
            }
        }
        LOG.info(MessageFormat.format("[OT-CLEAN-I01001] Removed: {0} (timestamp={1})", file.getPath(),
                new Date(file.getModificationTime())));
    } else {
        LOG.info(MessageFormat.format("[OT-CLEAN-I01002] Kept: {0} (timestamp={1})", file.getPath(),
                new Date(file.getModificationTime())));
        return false;
    }
    return true;
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java

License:Apache License

/**
 * Returns the all transaction info files.
 * @param conf the current configuration
 * @return target path/* www . jav  a 2 s .c o m*/
 * @throws IOException if failed to find files by I/O error
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public static Collection<FileStatus> findAllTransactionInfoFiles(Configuration conf) throws IOException {
    if (conf == null) {
        throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
    }
    Path dir = getTransactionInfoDir(conf);
    FileSystem fs = dir.getFileSystem(conf);
    FileStatus[] statusArray;
    try {
        statusArray = fs.listStatus(dir);
    } catch (FileNotFoundException e) {
        statusArray = null;
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Target file is not found: {0}", dir), e); //$NON-NLS-1$
        }
    }
    if (statusArray == null || statusArray.length == 0) {
        return Collections.emptyList();
    }
    Collection<FileStatus> results = new ArrayList<>();
    for (FileStatus stat : statusArray) {
        if (getTransactionInfoExecutionId(stat.getPath()) != null) {
            results.add(stat);
        }
    }
    return results;
}

From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java

License:Apache License

private static List<FileStatus> recursiveStep(FileSystem fs, List<FileStatus> current) throws IOException {
    assert fs != null;
    assert current != null;
    Set<Path> paths = new HashSet<>();
    List<FileStatus> results = new ArrayList<>();
    LinkedList<FileStatus> work = new LinkedList<>(current);
    while (work.isEmpty() == false) {
        FileStatus next = work.removeFirst();
        Path path = next.getPath();
        if (paths.contains(path) == false) {
            paths.add(path);//w w  w  . j  a  v  a2 s  .c om
            results.add(next);
            if (FileSystemCompatibility.isDirectory(next)) {
                FileStatus[] children;
                try {
                    children = fs.listStatus(path);
                } catch (FileNotFoundException e) {
                    children = null;
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(MessageFormat.format("Target file is not found: {0}", path), e); //$NON-NLS-1$
                    }
                }
                if (children != null) {
                    Collections.addAll(work, children);
                }
            }
        }
    }
    return results;
}

From source file:com.asakusafw.testdriver.testing.dsl.SimpleBatchAction.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    MacSnappyJavaWorkaround.install();/*from  w ww .  j a v a 2s  .  c  om*/
    FileSystem fs = FileSystem.get(getConf());
    fs.mkdirs(new Path(SimpleExporter.DIRECTORY));
    Path inputDir = new Path(SimpleImporter.DIRECTORY);
    int index = 0;
    for (FileStatus input : fs.listStatus(inputDir)) {
        Path output = new Path(SimpleExporter.OUTPUT_PREFIX + index++);
        process(input.getPath(), output);
    }
    extra();
    return 0;
}

From source file:com.blackberry.logdriver.admin.HFind.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final long startTime = System.currentTimeMillis();

    int i = 0;//from   w w  w  .j  a va  2 s .c om
    while (i < args.length) {
        if (args[i].startsWith("-")) {
            break;
        }

        Path path = new Path(args[i]);
        FileSystem fs = path.getFileSystem(getConf());
        FileStatus[] fileStatuses = fs.globStatus(path);
        if (fileStatuses != null) {
            for (FileStatus fileStatus : fileStatuses) {
                paths.add(fileStatus.getPath());
                fileStatusCache.put(fileStatus.getPath(), fileStatus);
            }
        }

        i++;
    }

    while (i < args.length) {
        // -print action
        if ("-print".equals(args[i])) {
            actions.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    System.out.println(fileStatus.getPath());
                    return true;
                }
            });
        }

        // -delete action
        if ("-delete".equals(args[i])) {
            actions.add(new FileStatusFilter() {
                @SuppressWarnings("deprecation")
                @Override
                public boolean accept(FileStatus fileStatus) {
                    try {
                        FileSystem fs = fileStatus.getPath().getFileSystem(getConf());
                        if (!fileStatus.isDir() || fs.listStatus(fileStatus.getPath()).length == 0) {
                            return fs.delete(fileStatus.getPath(), true);
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    return false;
                }
            });
        }

        // -atime test
        else if ("-atime".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -atime");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -mtime test
        else if ("-mtime".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -mtime");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -amin test
        else if ("-amin".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -amin");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -mmin test
        else if ("-mmin".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -mmin");
                System.exit(1);
            }

            String t = args[i];
            if (t.charAt(0) == '+') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) > time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else if (t.charAt(0) == '-') {
                final long time = Long.parseLong(t.substring(1));
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) < time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            } else {
                final long time = Long.parseLong(t);
                tests.add(new FileStatusFilter() {
                    @Override
                    public boolean accept(FileStatus fileStatus) {
                        if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) == time) {
                            return true;
                        } else {
                            return false;
                        }
                    }
                });
            }
        }

        // -regex test
        else if ("-regex".equals(args[i])) {
            i++;
            if (i >= args.length) {
                System.err.println("Missing arguement for -regex");
                System.exit(1);
            }

            final Pattern p = Pattern.compile(args[i]);
            tests.add(new FileStatusFilter() {
                @Override
                public boolean accept(FileStatus fileStatus) {
                    if (p.matcher(fileStatus.getPath().toString()).matches()) {
                        return true;
                    } else {
                        return false;
                    }
                }
            });
        }

        i++;
    }

    if (actions.size() == 0) {
        actions.add(new FileStatusFilter() {
            @Override
            public boolean accept(FileStatus fileStatus) {
                System.out.println(fileStatus.getPath());
                return true;
            }
        });
    }

    search();

    return 0;
}

From source file:com.blackberry.logdriver.admin.HFind.java

License:Apache License

@SuppressWarnings("deprecation")
private void search() throws IOException {
    Set<Path> seen = new HashSet<Path>();
    while (paths.size() > 0) {

        // Check if the top of the list has any children. If so, add them to the
        // stack. If not, then process it.
        Path p = paths.peekFirst();
        FileSystem fs = p.getFileSystem(getConf());

        FileStatus fileStatus = fileStatusCache.get(p);
        // Only check if we haven't seen this before.
        if (fileStatus.isDir() && seen.contains(p) == false) {
            FileStatus[] fileStatuses = fs.listStatus(p);
            if (fileStatuses != null && fileStatuses.length > 0) {
                for (FileStatus x : fileStatuses) {
                    paths.addFirst(x.getPath());
                    fileStatusCache.put(x.getPath(), x);
                }//from  w  w  w .j  a v a2  s .  c  om
                seen.add(p);
                continue;
            }
        }

        // If we get here, then we should be processing the path.
        p = paths.removeFirst();
        // If we're processing it, we won't need it's status in the cache anymore.
        fileStatusCache.remove(p);

        boolean match = true;
        for (FileStatusFilter test : tests) {
            try {
                if (test.accept(fileStatus) == false) {
                    match = false;
                    break;
                }
            } catch (Throwable t) {
                t.printStackTrace();
                System.err.println("path=" + p + " fileStatus=" + fileStatus);
            }
        }
        if (match == false) {
            continue;
        }

        for (FileStatusFilter action : actions) {
            try {
                if (action.accept(fileStatus) == false) {
                    match = false;
                    break;
                }
            } catch (Throwable t) {
                t.printStackTrace();
                System.err.println("path=" + p + " fileStatus=" + fileStatus);
            }
        }
    }
}

From source file:com.blackberry.logdriver.util.IndexLogs.java

License:Apache License

private static void findComponents(Map<String, Map<String, Map<String, Map<String, Component>>>> data,
        List<String> unmergedCSVStrings, FileSystem fs, Path path)
        throws FileNotFoundException, IOException, ParseException {
    // Grab FileStatus for each file in the path  
    FileStatus[] allFiles = fs.listStatus(path);
    // For each file, try to match a pattern that indicates we have identified a component.
    // If we find a match, add or update the component and return.
    try {// w ww. ja v  a 2  s . c om
        for (int i = 0; i < allFiles.length; i++) {
            if (dataPattern.matcher(allFiles[i].getPath().toString()).find()) {
                updateComponent(data, unmergedCSVStrings, fs, allFiles[i], path);
                return;
            }
        }
        // If we got here no component was matched, so go one level deeper. 
        for (int i = 0; i < allFiles.length; i++) {
            if (allFiles[i].isDirectory()) {
                findComponents(data, unmergedCSVStrings, fs, allFiles[i].getPath());
            }
        }
    }
    // It's possible that we don't have access to files in this path, or that the path is empty.
    catch (AccessControlException e) {
    } catch (FileNotFoundException e) {
    }
}

From source file:com.blackberry.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from  w w w .  j  a  v  a2s. c o m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.QueryIndex.java

License:Apache License

private static Path getLatestIndex(FileSystem fs) throws FileNotFoundException, IOException {
    // Get path of latest index file
    FileStatus[] indexFiles = fs.listStatus(new Path("/service/.index/"));
    long latestDate = 0;
    for (int i = 0; i < indexFiles.length; i++) {
        try {//from w  w w. j  av a 2  s  .c o m
            if (indexFiles[i].getPath().toString().matches(".*\\.json$")) {
                String[] pathPieces = indexFiles[i].getPath().toString().split("\\.");
                long currentDate = Integer.parseInt(pathPieces[pathPieces.length - 2]);
                if (currentDate > latestDate) {
                    latestDate = currentDate;
                }
            }
        } catch (NumberFormatException e) {
        } catch (IndexOutOfBoundsException e) {
        }
    }
    if (latestDate > 0) {
        Date now = new Date();
        Long age = (now.getTime() - (latestDate * 1000)) / oneHour;
        if (age > 24) {
            System.out.println("\nWarning: Index is over 24 hours old!");
        }
        System.out.println("\nIndex is from " + outputFormat.format(latestDate * 1000) + " and is " + age
                + " hours old.\n");
        return new Path("/service/.index/logindex." + latestDate + ".json");
    } else {
        return null;
    }
}