List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.mozilla.grouperfish.transforms.coclustering.text.Dictionary.java
License:Apache License
public static Map<Integer, String> loadInvertedIndexWithKeys(FileSystem fs, Path dictionaryPath) throws IOException { Map<Integer, String> index = null; if (dictionaryPath != null) { index = new HashMap<Integer, String>(); for (FileStatus status : fs.listStatus(dictionaryPath)) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { String[] pair = line.split("\t"); index.put(Integer.parseInt(pair[0]), pair[1].trim()); }/*w w w .ja va2s . c om*/ } finally { if (reader != null) { reader.close(); } } } } LOG.info("Loaded dictionary with size: " + index.size()); } return index; }
From source file:com.mozilla.hadoop.Backup.java
License:Apache License
/** * Walk recursively to get all file paths up to a max depth * @param fs//from w w w. j av a 2 s . c om * @param inputPath * @param depth * @param maxDepth * @return * @throws IOException */ public static List<Path> getPaths(FileSystem fs, Path inputPath, int depth, int maxDepth) throws IOException { List<Path> retPaths = new ArrayList<Path>(); for (FileStatus status : fs.listStatus(inputPath)) { if (status.isDir() && (maxDepth == -1 || depth < maxDepth)) { retPaths.addAll(getPaths(fs, status.getPath(), depth + 1, maxDepth)); } else { retPaths.add(status.getPath()); } } return retPaths; }
From source file:com.mozilla.hadoop.UnknownPathFinder.java
License:Apache License
/** * Walk recursively to get all file paths up to a max depth * @param fs// ww w. ja v a 2 s .co m * @param inputPath * @param depth * @param maxDepth * @return * @throws IOException */ public static Set<String> getAllPaths(FileSystem fs, Path inputPath, int depth, int maxDepth) throws IOException { Set<String> retPaths = new HashSet<String>(); for (FileStatus status : fs.listStatus(inputPath)) { if (status.isDir() && depth < maxDepth) { retPaths.addAll(getAllPaths(fs, status.getPath(), depth + 1, maxDepth)); } else { String p = status.getPath().toString(); if (!p.contains("-ROOT-") && !p.contains(".META.") && !p.contains(".logs") && !p.contains(".regioninfo") && !p.contains("compaction.dir") && !p.contains("hbase.version")) { retPaths.add(p); } } } return retPaths; }
From source file:com.mozilla.socorro.hadoop.RawDumpSize.java
License:LGPL
public int run(String[] args) throws Exception { if (args.length != 1) { return printUsage(); }/*www. j a v a 2s . c o m*/ int rc = -1; Job job = initJob(args); job.waitForCompletion(true); if (job.isSuccessful()) { rc = 0; FileSystem hdfs = null; DescriptiveStatistics rawStats = new DescriptiveStatistics(); long rawTotal = 0L; DescriptiveStatistics processedStats = new DescriptiveStatistics(); long processedTotal = 0L; try { hdfs = FileSystem.get(job.getConfiguration()); Pattern tabPattern = Pattern.compile("\t"); for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { String[] splits = tabPattern.split(line); int byteSize = Integer.parseInt(splits[2]); if ("raw".equals(splits[1])) { rawStats.addValue(byteSize); rawTotal += byteSize; } else if ("processed".equals(splits[1])) { processedStats.addValue(byteSize); processedTotal += byteSize; } } } finally { if (reader != null) { reader.close(); } } } } } finally { if (hdfs != null) { hdfs.close(); } } System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump ====="); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(), rawStats.getMax(), rawStats.getMean())); System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d))); System.out.println("Total Bytes: " + rawTotal); System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json ====="); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(), processedStats.getMax(), processedStats.getMean())); System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d), processedStats.getPercentile(75.0d))); System.out.println("Total Bytes: " + processedTotal); } return rc; }
From source file:com.mvdb.platform.action.VersionMerge.java
License:Apache License
private static void buildInputPathList(FileSystem fileSystem, Path topPath, List<Path> pathList, String lastMergedDirName, String lastcopiedDirName) throws IOException { FileStatus topPathStatus = fileSystem.getFileStatus(topPath); if (topPathStatus.isDir() == false) { String topPathFullName = topPath.toString(); String[] tokens = topPathFullName.split("/"); String fileName = tokens[tokens.length - 1]; if (fileName.startsWith("data-") && fileName.endsWith(".dat")) { String timeStamp = tokens[tokens.length - 2]; if (timeStamp.compareTo(lastMergedDirName) > 0 && timeStamp.compareTo(lastcopiedDirName) <= 0) { pathList.add(topPath);// w w w . j ava2 s . com } } return; //This is a leaf } FileStatus[] fsArray = fileSystem.listStatus(topPath); for (FileStatus fileStatus : fsArray) { Path path = fileStatus.getPath(); buildInputPathList(fileSystem, path, pathList, lastMergedDirName, lastcopiedDirName); } }
From source file:com.mycompany.app.TestStagingDirectoryPermissions.java
License:Apache License
@Test public void perms() throws IOException, InterruptedException { MiniDFSCluster minidfs = null;//from www .j a v a2 s .c o m FileSystem fs = null; MiniMRClientCluster minimr = null; try { Configuration conf = new Configuration(true); conf.set("fs.permission.umask-mode", "0077"); minidfs = new MiniDFSCluster.Builder(conf).build(); minidfs.waitActive(); fs = minidfs.getFileSystem(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString()); Path p = path("/in"); fs.mkdirs(p); FSDataOutputStream os = fs.create(new Path(p, "input.txt")); os.write("hello!".getBytes("UTF-8")); os.close(); String user = UserGroupInformation.getCurrentUser().getUserName(); Path home = new Path("/User/" + user); fs.mkdirs(home); minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf); JobConf job = new JobConf(minimr.getConfig()); job.setJobName("PermsTest"); JobClient client = new JobClient(job); FileInputFormat.addInputPath(job, p); FileOutputFormat.setOutputPath(job, path("/out")); job.setInputFormat(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MySleepMapper.class); job.setNumReduceTasks(1); RunningJob submittedJob = client.submitJob(job); // Sleep for a bit to let localization finish System.out.println("Sleeping..."); Thread.sleep(3 * 1000l); System.out.println("Done sleeping..."); assertFalse(UserGroupInformation.isSecurityEnabled()); Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/"); assertTrue(fs.exists(stagingRoot)); assertEquals(1, fs.listStatus(stagingRoot).length); Path staging = fs.listStatus(stagingRoot)[0].getPath(); Path jobXml = path(staging + "/job.xml"); assertTrue(fs.exists(jobXml)); FileStatus fileStatus = fs.getFileStatus(jobXml); System.out.println("job.xml permission = " + fileStatus.getPermission()); assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ)); assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ)); submittedJob.waitForCompletion(); } finally { if (minimr != null) { minimr.stop(); } if (fs != null) { fs.close(); } if (minidfs != null) { minidfs.shutdown(true); } } }
From source file:com.mycompany.movehdfstohbase.MoveHdfsToHbase.java
private static void putData() throws IOException { List<Put> putList = new LinkedList(); FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.listStatus(new Path("/page")); int counter = 0; Table table = connection.getTable(TableName.valueOf(TABLE_NAME)); for (FileStatus f : status) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(f.getPath()))); Put put = new Put(Bytes.toBytes("row" + (++counter))); put.addColumn(Bytes.toBytes("url"), null, Bytes.toBytes(br.readLine())); // url put.addColumn(Bytes.toBytes("title"), null, Bytes.toBytes(br.readLine())); // title put.addColumn(Bytes.toBytes("body"), null, Bytes.toBytes(br.readLine())); // body putList.add(put);//from ww w. j a v a 2s. com } table.put(putList); table.close(); }
From source file:com.nearinfinity.blur.mapreduce.BlurTask.java
License:Apache License
public int getNumReducers(Configuration configuration) { Path tablePath = new Path(_tableDescriptor.tableUri); try {//from w w w . j a va 2s .c o m int num = _tableDescriptor.shardCount; FileSystem fileSystem = FileSystem.get(tablePath.toUri(), configuration); if (!fileSystem.exists(tablePath)) { return num; } FileStatus[] files = fileSystem.listStatus(tablePath); int shardCount = 0; for (FileStatus fileStatus : files) { if (fileStatus.isDir()) { String name = fileStatus.getPath().getName(); if (name.startsWith(BlurConstants.SHARD_PREFIX)) { shardCount++; } } } if (shardCount == 0) { return num; } if (shardCount != num) { LOG.warn("Asked for " + num + " reducers, but existing table " + _tableDescriptor.name + " has " + shardCount + " shards. Using " + shardCount + " reducers"); } return shardCount; } catch (IOException e) { throw new RuntimeException("Unable to connect to filesystem", e); } }
From source file:com.nearinfinity.blur.utils.BlurUtil.java
License:Apache License
public static void validateShardCount(int shardCount, FileSystem fileSystem, Path tablePath) throws IOException { FileStatus[] listStatus = fileSystem.listStatus(tablePath); if (listStatus.length != shardCount) { LOG.error("Number of directories in table path [" + tablePath + "] does not match definition of [" + shardCount + "] shard count."); throw new RuntimeException("Number of directories in table path [" + tablePath + "] does not match definition of [" + shardCount + "] shard count."); }/*from w w w . ja va2s . c om*/ }
From source file:com.netflix.Aegisthus.java
License:Apache License
protected List<Path> getDataFiles(Configuration conf, String dir) throws IOException { Set<String> globs = Sets.newHashSet(); List<Path> output = Lists.newArrayList(); Path dirPath = new Path(dir); FileSystem fs = dirPath.getFileSystem(conf); List<FileStatus> input = Lists.newArrayList(fs.listStatus(dirPath)); for (String path : DirectoryWalker.with(conf).threaded().addAllStatuses(input).pathsString()) { if (path.endsWith("-Data.db")) { globs.add(path.replaceAll("[^/]+-Data.db", "*-Data.db")); }/*ww w . j a v a 2 s.c om*/ } for (String path : globs) { output.add(new Path(path)); } return output; }