List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java
License:Apache License
/** * Lists all jars in the specified directory. * * @param conf Configuration to get FileSystem from * @param jarDirectory The directory of jars to get. * @return A list of qualified paths to the jars in jarDirectory. * @throws IOException if there's a problem. *//*from ww w .j av a 2s. c o m*/ public static Collection<Path> listJarFilesFromDirectory(Configuration conf, Path jarDirectory) throws IOException { LOG.debug("Listing jar files {}/*.jar", jarDirectory); final FileSystem fs = jarDirectory.getFileSystem(conf); if (!fs.isDirectory(jarDirectory)) { throw new IOException("Attempted to add jars from non-directory: " + jarDirectory); } final List<Path> jarFiles = Lists.newArrayList(); for (FileStatus status : fs.listStatus(jarDirectory)) { if (!status.isDir() && status.getPath().getName().endsWith(".jar")) { jarFiles.add(fs.makeQualified(status.getPath())); } } return jarFiles; }
From source file:com.moz.fiji.mapreduce.tools.FijiBulkLoad.java
License:Apache License
/** * Helper method used by recursiveGrantAllReadWritePermissions to actually grant the * additional read and write permissions to all. It deals with FileStatus objects * since that is the object that supports listStatus. * * @param hdfs The FileSystem on which the file exists. * @param status The status of the file whose permissions are checked and on whose children * this method is called recursively. * @throws IOException on IOException./* w w w . j a va 2s. com*/ */ private void recursiveGrantAllReadWritePermissions(FileSystem hdfs, FileStatus status) throws IOException { final FsPermission currentPermissions = status.getPermission(); if (!currentPermissions.getOtherAction().implies(FsAction.READ_WRITE)) { LOG.info("Adding a+rw to permissions for {}: {}", status.getPath(), currentPermissions); hdfs.setPermission(status.getPath(), new FsPermission(currentPermissions.getUserAction(), currentPermissions.getGroupAction().or(FsAction.READ_WRITE), currentPermissions.getOtherAction().or(FsAction.READ_WRITE))); } // Recurse into any files and directories in the path. // We must use listStatus because listFiles does not list subdirectories. FileStatus[] subStatuses = hdfs.listStatus(status.getPath()); for (FileStatus subStatus : subStatuses) { if (!subStatus.equals(status)) { recursiveGrantAllReadWritePermissions(hdfs, subStatus); } } }
From source file:com.mozilla.grouperfish.pig.eval.ml.TFIDFVectorizer.java
License:Apache License
private void loadFeatureIndex(String featureIndexPath) throws IOException { if (featureIndex == null) { featureIndex = new HashMap<String, Integer>(); Path p = new Path(featureIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { featureIndex.put(line.trim(), index++); }//from w w w .j a v a 2 s .c o m } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded feature index with size: " + featureIndex.size()); } }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Set<String> loadDictionary(FileSystem fs, Path dictionaryPath) throws IOException { Set<String> dictionary = null; if (dictionaryPath != null) { dictionary = new HashSet<String>(); for (FileStatus status : fs.listStatus(dictionaryPath)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String word = null; while ((word = reader.readLine()) != null) { dictionary.add(word.trim()); }//from w ww. j av a 2 s. c o m } finally { if (reader != null) { reader.close(); } } } } LOG.info("Loaded dictionary with size: " + dictionary.size()); } return dictionary; }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Map<String, Integer> loadFeatureIndex(FileSystem fs, Path dictionaryPath) throws IOException { Map<String, Integer> featureIndex = null; if (dictionaryPath != null) { featureIndex = new HashMap<String, Integer>(); int idx = 0; for (FileStatus status : fs.listStatus(dictionaryPath)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String word = null; while ((word = reader.readLine()) != null) { featureIndex.put(word.trim(), idx++); }/*from w w w .j a va2 s. c o m*/ } finally { if (reader != null) { reader.close(); } } } } LOG.info("Loaded dictionary with size: " + featureIndex.size()); } return featureIndex; }
From source file:com.mozilla.grouperfish.text.Dictionary.java
License:Apache License
public static Map<Integer, String> loadInvertedFeatureIndex(FileSystem fs, Path dictionaryPath) throws IOException { Map<Integer, String> featureIndex = null; if (dictionaryPath != null) { featureIndex = new HashMap<Integer, String>(); int idx = 0; for (FileStatus status : fs.listStatus(dictionaryPath)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String word = null; while ((word = reader.readLine()) != null) { featureIndex.put(idx++, word.trim()); }// www.j a v a2s . c om } finally { if (reader != null) { reader.close(); } } } } LOG.info("Loaded dictionary with size: " + featureIndex.size()); } return featureIndex; }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertDocumentIDToID.java
License:Apache License
private void loadDocumentIndex(String documentIndexPath) throws IOException { if (documentIndex == null) { documentIndex = new HashMap<String, Integer>(); Path p = new Path(documentIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = null; while ((line = reader.readLine()) != null) { documentIndex.put(line.trim(), index++); }// w ww .jav a 2 s .c o m } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded document index with size: " + documentIndex.size()); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.ConvertFeatureToID.java
License:Apache License
private void loadFeatureIndex(String featureIndexPath) throws IOException { if (featureIndex == null) { featureIndex = new HashMap<String, Integer>(); Path p = new Path(featureIndexPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); int index = 0; for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = null; while ((line = reader.readLine()) != null) { featureIndex.put(line.trim(), index++); }//from w w w . j a v a2 s . c o m } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded feature index with size: " + featureIndex.size()); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.eval.text.UnigramExtractor.java
License:Apache License
private void loadStopwordDict() throws IOException { if (stopwordDictPath != null) { stopwords = new HashSet<String>(); FileSystem hdfs = null; Path p = new Path(stopwordDictPath); hdfs = FileSystem.get(p.toUri(), new Configuration()); for (FileStatus status : hdfs.listStatus(p)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { stopwords.add(line.trim()); }//from w w w .j a v a 2 s . co m } finally { if (reader != null) { reader.close(); } } } } log.info("Loaded stopword dictionary with size: " + stopwords.size()); } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked", "finally" }) @Override/*from www . j ava 2 s .co m*/ public void prepareToWrite(RecordWriter writer) throws IOException { if (dimensionPath != null) { Path p = new Path(dimensionPath); FileSystem fs = FileSystem.get(p.toUri(), new Configuration()); for (FileStatus status : fs.listStatus(p)) { Path currPath = status.getPath(); if (!status.isDir() && !currPath.getName().startsWith("_")) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(currPath))); String line = reader.readLine(); this.dimensions = Integer.parseInt(line); } catch (NumberFormatException nfe) { LOG.error("Unexpected input for dimensions", nfe); throw new IOException(); } finally { if (reader != null) { reader.close(); } // TODO: SMELLY: Why loop if we always cancel after the first file? break; } } } } this.writer = writer; }