Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java

License:Apache License

/**
 * Lists all jars in the specified directory.
 *
 * @param conf Configuration to get FileSystem from
 * @param jarDirectory The directory of jars to get.
 * @return A list of qualified paths to the jars in jarDirectory.
 * @throws IOException if there's a problem.
 *//*from   ww w  .j  av  a  2s.  c  o m*/
public static Collection<Path> listJarFilesFromDirectory(Configuration conf, Path jarDirectory)
        throws IOException {
    LOG.debug("Listing jar files {}/*.jar", jarDirectory);
    final FileSystem fs = jarDirectory.getFileSystem(conf);
    if (!fs.isDirectory(jarDirectory)) {
        throw new IOException("Attempted to add jars from non-directory: " + jarDirectory);
    }
    final List<Path> jarFiles = Lists.newArrayList();
    for (FileStatus status : fs.listStatus(jarDirectory)) {
        if (!status.isDir() && status.getPath().getName().endsWith(".jar")) {
            jarFiles.add(fs.makeQualified(status.getPath()));
        }
    }
    return jarFiles;
}

From source file:com.moz.fiji.mapreduce.tools.FijiBulkLoad.java

License:Apache License

/**
 * Helper method used by recursiveGrantAllReadWritePermissions to actually grant the
 * additional read and write permissions to all.  It deals with FileStatus objects
 * since that is the object that supports listStatus.
 *
 * @param hdfs The FileSystem on which the file exists.
 * @param status The status of the file whose permissions are checked and on whose children
 *     this method is called recursively.
 * @throws IOException on IOException./*  w  w  w . j  a va 2s. com*/
 */
private void recursiveGrantAllReadWritePermissions(FileSystem hdfs, FileStatus status) throws IOException {
    final FsPermission currentPermissions = status.getPermission();
    if (!currentPermissions.getOtherAction().implies(FsAction.READ_WRITE)) {
        LOG.info("Adding a+rw to permissions for {}: {}", status.getPath(), currentPermissions);
        hdfs.setPermission(status.getPath(),
                new FsPermission(currentPermissions.getUserAction(),
                        currentPermissions.getGroupAction().or(FsAction.READ_WRITE),
                        currentPermissions.getOtherAction().or(FsAction.READ_WRITE)));
    }
    // Recurse into any files and directories in the path.
    // We must use listStatus because listFiles does not list subdirectories.
    FileStatus[] subStatuses = hdfs.listStatus(status.getPath());
    for (FileStatus subStatus : subStatuses) {
        if (!subStatus.equals(status)) {
            recursiveGrantAllReadWritePermissions(hdfs, subStatus);
        }
    }
}

From source file:com.mozilla.grouperfish.pig.eval.ml.TFIDFVectorizer.java

License:Apache License

private void loadFeatureIndex(String featureIndexPath) throws IOException {
    if (featureIndex == null) {
        featureIndex = new HashMap<String, Integer>();

        Path p = new Path(featureIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        featureIndex.put(line.trim(), index++);
                    }//from w w w  .j a  v  a  2  s  .c  o  m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded feature index with size: " + featureIndex.size());
    }
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Set<String> loadDictionary(FileSystem fs, Path dictionaryPath) throws IOException {
    Set<String> dictionary = null;
    if (dictionaryPath != null) {
        dictionary = new HashSet<String>();
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        dictionary.add(word.trim());
                    }//from   w ww. j  av  a  2 s.  c  o  m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + dictionary.size());
    }

    return dictionary;
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Map<String, Integer> loadFeatureIndex(FileSystem fs, Path dictionaryPath) throws IOException {
    Map<String, Integer> featureIndex = null;
    if (dictionaryPath != null) {
        featureIndex = new HashMap<String, Integer>();
        int idx = 0;
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        featureIndex.put(word.trim(), idx++);
                    }/*from  w  w  w  .j a va2 s.  c  o  m*/
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + featureIndex.size());
    }

    return featureIndex;
}

From source file:com.mozilla.grouperfish.text.Dictionary.java

License:Apache License

public static Map<Integer, String> loadInvertedFeatureIndex(FileSystem fs, Path dictionaryPath)
        throws IOException {
    Map<Integer, String> featureIndex = null;
    if (dictionaryPath != null) {
        featureIndex = new HashMap<Integer, String>();
        int idx = 0;
        for (FileStatus status : fs.listStatus(dictionaryPath)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath())));
                    String word = null;
                    while ((word = reader.readLine()) != null) {
                        featureIndex.put(idx++, word.trim());
                    }// www.j a v  a2s . c  om
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        LOG.info("Loaded dictionary with size: " + featureIndex.size());
    }

    return featureIndex;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertDocumentIDToID.java

License:Apache License

private void loadDocumentIndex(String documentIndexPath) throws IOException {
    if (documentIndex == null) {
        documentIndex = new HashMap<String, Integer>();

        Path p = new Path(documentIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        documentIndex.put(line.trim(), index++);
                    }//  w ww  .jav a  2  s  .c  o m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded document index with size: " + documentIndex.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertFeatureToID.java

License:Apache License

private void loadFeatureIndex(String featureIndexPath) throws IOException {
    if (featureIndex == null) {
        featureIndex = new HashMap<String, Integer>();

        Path p = new Path(featureIndexPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        int index = 0;
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        featureIndex.put(line.trim(), index++);
                    }//from  w  w  w  . j a v  a2 s  .  c  o m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded feature index with size: " + featureIndex.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.UnigramExtractor.java

License:Apache License

private void loadStopwordDict() throws IOException {
    if (stopwordDictPath != null) {
        stopwords = new HashSet<String>();

        FileSystem hdfs = null;
        Path p = new Path(stopwordDictPath);
        hdfs = FileSystem.get(p.toUri(), new Configuration());
        for (FileStatus status : hdfs.listStatus(p)) {
            if (!status.isDir()) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                    String line = null;
                    while ((line = reader.readLine()) != null) {
                        stopwords.add(line.trim());
                    }//from   w  w w .j  a v  a  2 s  .  co  m
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
        }

        log.info("Loaded stopword dictionary with size: " + stopwords.size());
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked", "finally" })
@Override/*from   www  . j ava  2 s .co  m*/
public void prepareToWrite(RecordWriter writer) throws IOException {
    if (dimensionPath != null) {
        Path p = new Path(dimensionPath);
        FileSystem fs = FileSystem.get(p.toUri(), new Configuration());
        for (FileStatus status : fs.listStatus(p)) {
            Path currPath = status.getPath();
            if (!status.isDir() && !currPath.getName().startsWith("_")) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(fs.open(currPath)));
                    String line = reader.readLine();
                    this.dimensions = Integer.parseInt(line);
                } catch (NumberFormatException nfe) {
                    LOG.error("Unexpected input for dimensions", nfe);
                    throw new IOException();
                } finally {
                    if (reader != null) {
                        reader.close();
                    }
                    // TODO: SMELLY: Why loop if we always cancel after the first file?
                    break;
                }
            }
        }
    }
    this.writer = writer;
}