Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java

License:Apache License

/**
 * Lists all jars in the specified directory.
 *
 * @param conf Configuration to get FileSystem from
 * @param jarDirectory The directory of jars to get.
 * @return A list of qualified paths to the jars in jarDirectory.
 * @throws IOException if there's a problem.
 *//*from   ww w  .j  av  a  2s.  c  o m*/
public static Collection<Path> listJarFilesFromDirectory(Configuration conf, Path jarDirectory)
        throws IOException {
    LOG.debug("Listing jar files {}/*.jar", jarDirectory);
    final FileSystem fs = jarDirectory.getFileSystem(conf);
    if (!fs.isDirectory(jarDirectory)) {
        throw new IOException("Attempted to add jars from non-directory: " + jarDirectory);
    }
    final List<Path> jarFiles = Lists.newArrayList();
    for (FileStatus status : fs.listStatus(jarDirectory)) {
        if (!status.isDir() && status.getPath().getName().endsWith(".jar")) {
            jarFiles.add(fs.makeQualified(status.getPath()));
        }
    }
    return jarFiles;
}

From source file:com.moz.fiji.mapreduce.tools.FijiBulkLoad.java

License:Apache License

/**
 * Helper method used by recursiveGrantAllReadWritePermissions to actually grant the
 * additional read and write permissions to all.  It deals with FileStatus objects
 * since that is the object that supports listStatus.
 *
 * @param hdfs The FileSystem on which the file exists.
 * @param status The status of the file whose permissions are checked and on whose children
 *     this method is called recursively.
 * @throws IOException on IOException./*  w  w  w . j  a va 2s. com*/
 */
private void recursiveGrantAllReadWritePermissions(FileSystem hdfs, FileStatus status) throws IOException {
    final FsPermission currentPermissions = status.getPermission();
    if (!currentPermissions.getOtherAction().implies(FsAction.READ_WRITE)) {
        LOG.info("Adding a+rw to permissions for {}: {}", status.getPath(), currentPermissions);
        hdfs.setPermission(status.getPath(),
                new FsPermission(currentPermissions.getUserAction(),
                        currentPermissions.getGroupAction().or(FsAction.READ_WRITE),
                        currentPermissions.getOtherAction().or(FsAction.READ_WRITE)));
    }
    // Recurse into any files and directories in the path.
    // We must use listStatus because listFiles does not list subdirectories.
    FileStatus[] subStatuses = hdfs.listStatus(status.getPath());
    for (FileStatus subStatus : subStatuses) {
        if (!subStatus.equals(status)) {
            recursiveGrantAllReadWritePermissions(hdfs, subStatus);
        }
    }
}

From source file:com.mozilla.grouperfish.pig.eval.ml.TFIDFVectorizer.java

License:Apache License

private void loadFeatureIndex(String featureIndexPath) throws IOException {
    if (featureIndex == null) {
        featureIndex = new HashMap<String, Integer>();

        Path p = new Path(featureIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        featureIndex.put(line.trim(), index++);
                    }//from w w w  .j a  v  a  2  s  .c  o  m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded feature index with size: " + featureIndex.size());
    }
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Set<String> loadDictionary(FileSystem fs, Path dictionaryPath) throws IOException {
    Set<String> dictionary = null;
    if (dictionaryPath != null) {
        dictionary = new HashSet<String>();
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        dictionary.add(word.trim());
                    }//from   w ww. j  av  a  2 s.  c  o  m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + dictionary.size());
    }

    return dictionary;
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Map<String, Integer> loadFeatureIndex(FileSystem fs, Path dictionaryPath) throws IOException {
    Map<String, Integer> featureIndex = null;
    if (dictionaryPath != null) {
        featureIndex = new HashMap<String, Integer>();
        int idx = 0;
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        featureIndex.put(word.trim(), idx++);
                    }/*from  w  w  w  .j a va2 s.  c  o  m*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + featureIndex.size());
    }

    return featureIndex;
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Map<Integer, String> loadInvertedFeatureIndex(FileSystem fs, Path dictionaryPath)
        throws IOException {
    Map<Integer, String> featureIndex = null;
    if (dictionaryPath != null) {
        featureIndex = new HashMap<Integer, String>();
        int idx = 0;
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        featureIndex.put(idx++, word.trim());
                    }// www.j a v  a2s . c  om
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + featureIndex.size());
    }

    return featureIndex;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertDocumentIDToID.java

License:Apache License

private void loadDocumentIndex(String documentIndexPath) throws IOException {
    if (documentIndex == null) {
        documentIndex = new HashMap<String, Integer>();

        Path p = new Path(documentIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        documentIndex.put(line.trim(), index++);
                    }//  w ww  .jav a  2  s  .c  o m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded document index with size: " + documentIndex.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertFeatureToID.java

License:Apache License

private void loadFeatureIndex(String featureIndexPath) throws IOException {
    if (featureIndex == null) {
        featureIndex = new HashMap<String, Integer>();

        Path p = new Path(featureIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        featureIndex.put(line.trim(), index++);
                    }//from  w  w  w  . j a v  a2 s  .  c  o m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded feature index with size: " + featureIndex.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.UnigramExtractor.java

License:Apache License

private void loadStopwordDict() throws IOException {
    if (stopwordDictPath != null) {
        stopwords = new HashSet<String>();

        FileSystem hdfs = null;
        Path p = new Path(stopwordDictPath);
        hdfs = FileSystem.get(p.toUri(), new Configuration());
        for (FileStatus status : hdfs.listStatus(p)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        stopwords.add(line.trim());
                    }//from   w  w w .j  a v  a  2 s  .  co  m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded stopword dictionary with size: " + stopwords.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked", "finally" })
@Override/*from   www  . j ava  2 s .co  m*/
public void prepareToWrite(RecordWriter writer) throws IOException {
    if (dimensionPath != null) {
        Path p = new Path(dimensionPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = reader.readLine();
                    this.dimensions = Integer.parseInt(line);
                } catch (NumberFormatException nfe) {
                    LOG.error("Unexpected input for dimensions", nfe);
                    throw new IOException();
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                    // TODO: SMELLY: Why loop if we always cancel after the first file?
                    break;
                }
            }
        }
    }
    this.writer = writer;
}