List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.asakusafw.cleaner.main.HDFSCleaner.java
License:Apache License
/** * ???// ww w. jav a 2 s . com * @param fs * @param path * @return ? * @throws IOException */ private FileStatus[] getListStatus(FileSystem fs, Path path) throws IOException { FileStatus[] status; try { status = fs.listStatus(path); } catch (FileNotFoundException e) { status = null; } if (status == null) { status = new FileStatus[0]; } return status; }
From source file:com.asakusafw.operation.tools.hadoop.fs.Clean.java
License:Apache License
private boolean remove(FileSystem fs, FileStatus file, Context context) { LOG.debug("Attempt to remove {}", file.getPath()); //$NON-NLS-1$ boolean isSymlink = context.isSymlink(fs, file); if (isSymlink) { LOG.error(MessageFormat.format("[OT-CLEAN-W01001] Symlink is currenty not supported: {0}", file.getPath()));// www. j a va2s .com context.setError(); return false; } if (file.isDir()) { if (context.isRecursive()) { List<FileStatus> children; try { children = asList(fs.listStatus(file.getPath())); } catch (IOException e) { LOG.error( MessageFormat.format("[OT-CLEAN-E01003] Failed to list directory: {0}", file.getPath()), e); context.setError(); return false; } boolean deleteChildren = true; for (FileStatus child : children) { deleteChildren &= remove(fs, child, context); } if (deleteChildren == false) { LOG.info(MessageFormat.format("[OT-CLEAN-I01004] Skipped: {0} (is no-empty directory)", file.getPath(), new Date(file.getModificationTime()))); return false; } } else { LOG.info(MessageFormat.format("[OT-CLEAN-I01003] Skipped: {0} (is directory)", file.getPath(), new Date(file.getModificationTime()))); return false; } } if (context.canDelete(file)) { LOG.debug("Removing {}", file.getPath()); //$NON-NLS-1$ if (context.isDryRun() == false) { try { boolean removed = fs.delete(file.getPath(), false); if (removed == false) { LOG.error(MessageFormat.format("[OT-CLEAN-E01004] Failed to remove: {0}", file.getPath())); context.setError(); return false; } } catch (IOException e) { LOG.warn(MessageFormat.format("[OT-CLEAN-E01004] Failed to remove: {0}", file.getPath()), e); context.setError(); return false; } } LOG.info(MessageFormat.format("[OT-CLEAN-I01001] Removed: {0} (timestamp={1})", file.getPath(), new Date(file.getModificationTime()))); } else { LOG.info(MessageFormat.format("[OT-CLEAN-I01002] Kept: {0} (timestamp={1})", file.getPath(), new Date(file.getModificationTime()))); return false; } return true; }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java
License:Apache License
/** * Returns the all transaction info files. * @param conf the current configuration * @return target path/* www . jav a 2 s .c o m*/ * @throws IOException if failed to find files by I/O error * @throws IllegalArgumentException if some parameters were {@code null} */ public static Collection<FileStatus> findAllTransactionInfoFiles(Configuration conf) throws IOException { if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ } Path dir = getTransactionInfoDir(conf); FileSystem fs = dir.getFileSystem(conf); FileStatus[] statusArray; try { statusArray = fs.listStatus(dir); } catch (FileNotFoundException e) { statusArray = null; if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Target file is not found: {0}", dir), e); //$NON-NLS-1$ } } if (statusArray == null || statusArray.length == 0) { return Collections.emptyList(); } Collection<FileStatus> results = new ArrayList<>(); for (FileStatus stat : statusArray) { if (getTransactionInfoExecutionId(stat.getPath()) != null) { results.add(stat); } } return results; }
From source file:com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.java
License:Apache License
private static List<FileStatus> recursiveStep(FileSystem fs, List<FileStatus> current) throws IOException { assert fs != null; assert current != null; Set<Path> paths = new HashSet<>(); List<FileStatus> results = new ArrayList<>(); LinkedList<FileStatus> work = new LinkedList<>(current); while (work.isEmpty() == false) { FileStatus next = work.removeFirst(); Path path = next.getPath(); if (paths.contains(path) == false) { paths.add(path);//w w w . j a v a2 s .c om results.add(next); if (FileSystemCompatibility.isDirectory(next)) { FileStatus[] children; try { children = fs.listStatus(path); } catch (FileNotFoundException e) { children = null; if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Target file is not found: {0}", path), e); //$NON-NLS-1$ } } if (children != null) { Collections.addAll(work, children); } } } } return results; }
From source file:com.asakusafw.testdriver.testing.dsl.SimpleBatchAction.java
License:Apache License
@Override public int run(String[] args) throws Exception { MacSnappyJavaWorkaround.install();/*from w ww . j a v a 2s . c om*/ FileSystem fs = FileSystem.get(getConf()); fs.mkdirs(new Path(SimpleExporter.DIRECTORY)); Path inputDir = new Path(SimpleImporter.DIRECTORY); int index = 0; for (FileStatus input : fs.listStatus(inputDir)) { Path output = new Path(SimpleExporter.OUTPUT_PREFIX + index++); process(input.getPath(), output); } extra(); return 0; }
From source file:com.blackberry.logdriver.admin.HFind.java
License:Apache License
@Override public int run(String[] args) throws Exception { final long startTime = System.currentTimeMillis(); int i = 0;//from w w w .j a va 2 s .c om while (i < args.length) { if (args[i].startsWith("-")) { break; } Path path = new Path(args[i]); FileSystem fs = path.getFileSystem(getConf()); FileStatus[] fileStatuses = fs.globStatus(path); if (fileStatuses != null) { for (FileStatus fileStatus : fileStatuses) { paths.add(fileStatus.getPath()); fileStatusCache.put(fileStatus.getPath(), fileStatus); } } i++; } while (i < args.length) { // -print action if ("-print".equals(args[i])) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { System.out.println(fileStatus.getPath()); return true; } }); } // -delete action if ("-delete".equals(args[i])) { actions.add(new FileStatusFilter() { @SuppressWarnings("deprecation") @Override public boolean accept(FileStatus fileStatus) { try { FileSystem fs = fileStatus.getPath().getFileSystem(getConf()); if (!fileStatus.isDir() || fs.listStatus(fileStatus.getPath()).length == 0) { return fs.delete(fileStatus.getPath(), true); } } catch (IOException e) { e.printStackTrace(); } return false; } }); } // -atime test else if ("-atime".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -atime"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (24 * 60 * 60 * 1000) == time) { return true; } else { return false; } } }); } } // -mtime test else if ("-mtime".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -mtime"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (24 * 60 * 60 * 1000) == time) { return true; } else { return false; } } }); } } // -amin test else if ("-amin".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -amin"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getAccessTime()) / (60 * 1000) == time) { return true; } else { return false; } } }); } } // -mmin test else if ("-mmin".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -mmin"); System.exit(1); } String t = args[i]; if (t.charAt(0) == '+') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) > time) { return true; } else { return false; } } }); } else if (t.charAt(0) == '-') { final long time = Long.parseLong(t.substring(1)); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) < time) { return true; } else { return false; } } }); } else { final long time = Long.parseLong(t); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if ((startTime - fileStatus.getModificationTime()) / (60 * 1000) == time) { return true; } else { return false; } } }); } } // -regex test else if ("-regex".equals(args[i])) { i++; if (i >= args.length) { System.err.println("Missing arguement for -regex"); System.exit(1); } final Pattern p = Pattern.compile(args[i]); tests.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { if (p.matcher(fileStatus.getPath().toString()).matches()) { return true; } else { return false; } } }); } i++; } if (actions.size() == 0) { actions.add(new FileStatusFilter() { @Override public boolean accept(FileStatus fileStatus) { System.out.println(fileStatus.getPath()); return true; } }); } search(); return 0; }
From source file:com.blackberry.logdriver.admin.HFind.java
License:Apache License
@SuppressWarnings("deprecation") private void search() throws IOException { Set<Path> seen = new HashSet<Path>(); while (paths.size() > 0) { // Check if the top of the list has any children. If so, add them to the // stack. If not, then process it. Path p = paths.peekFirst(); FileSystem fs = p.getFileSystem(getConf()); FileStatus fileStatus = fileStatusCache.get(p); // Only check if we haven't seen this before. if (fileStatus.isDir() && seen.contains(p) == false) { FileStatus[] fileStatuses = fs.listStatus(p); if (fileStatuses != null && fileStatuses.length > 0) { for (FileStatus x : fileStatuses) { paths.addFirst(x.getPath()); fileStatusCache.put(x.getPath(), x); }//from w w w .j a v a2 s . c om seen.add(p); continue; } } // If we get here, then we should be processing the path. p = paths.removeFirst(); // If we're processing it, we won't need it's status in the cache anymore. fileStatusCache.remove(p); boolean match = true; for (FileStatusFilter test : tests) { try { if (test.accept(fileStatus) == false) { match = false; break; } } catch (Throwable t) { t.printStackTrace(); System.err.println("path=" + p + " fileStatus=" + fileStatus); } } if (match == false) { continue; } for (FileStatusFilter action : actions) { try { if (action.accept(fileStatus) == false) { match = false; break; } } catch (Throwable t) { t.printStackTrace(); System.err.println("path=" + p + " fileStatus=" + fileStatus); } } } }
From source file:com.blackberry.logdriver.util.IndexLogs.java
License:Apache License
private static void findComponents(Map<String, Map<String, Map<String, Map<String, Component>>>> data, List<String> unmergedCSVStrings, FileSystem fs, Path path) throws FileNotFoundException, IOException, ParseException { // Grab FileStatus for each file in the path FileStatus[] allFiles = fs.listStatus(path); // For each file, try to match a pattern that indicates we have identified a component. // If we find a match, add or update the component and return. try {// w ww. ja v a 2 s . c om for (int i = 0; i < allFiles.length; i++) { if (dataPattern.matcher(allFiles[i].getPath().toString()).find()) { updateComponent(data, unmergedCSVStrings, fs, allFiles[i], path); return; } } // If we got here no component was matched, so go one level deeper. for (int i = 0; i < allFiles.length; i++) { if (allFiles[i].isDirectory()) { findComponents(data, unmergedCSVStrings, fs, allFiles[i].getPath()); } } } // It's possible that we don't have access to files in this path, or that the path is empty. catch (AccessControlException e) { } catch (FileNotFoundException e) { } }
From source file:com.blackberry.logdriver.util.MultiSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w w w . j a v a2s. c o m FileSystem fs = FileSystem.get(conf); // The command line options String searchStringDir = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchStringDir = args[0]; // We are going to be reading all the files in this directory a lot. So // let's up the replication factor by a lot so that they're easy to read. for (FileStatus f : fs.listStatus(new Path(searchStringDir))) { fs.setReplication(f.getPath(), (short) 16); } for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(MultiSearch.class); jobConf.setIfUnset("mapred.job.name", "MultiSearch"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string.dir", searchStringDir); // This search is generally too fast to make good use of 128MB blocks, so // let's set the value to 256MB (if it's not set already) if (jobConf.get("mapred.max.split.size") == null) { jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024); } job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.QueryIndex.java
License:Apache License
private static Path getLatestIndex(FileSystem fs) throws FileNotFoundException, IOException { // Get path of latest index file FileStatus[] indexFiles = fs.listStatus(new Path("/service/.index/")); long latestDate = 0; for (int i = 0; i < indexFiles.length; i++) { try {//from w w w. j av a 2 s .c o m if (indexFiles[i].getPath().toString().matches(".*\\.json$")) { String[] pathPieces = indexFiles[i].getPath().toString().split("\\."); long currentDate = Integer.parseInt(pathPieces[pathPieces.length - 2]); if (currentDate > latestDate) { latestDate = currentDate; } } } catch (NumberFormatException e) { } catch (IndexOutOfBoundsException e) { } } if (latestDate > 0) { Date now = new Date(); Long age = (now.getTime() - (latestDate * 1000)) / oneHour; if (age > 24) { System.out.println("\nWarning: Index is over 24 hours old!"); } System.out.println("\nIndex is from " + outputFormat.format(latestDate * 1000) + " and is " + age + " hours old.\n"); return new Path("/service/.index/logindex." + latestDate + ".json"); } else { return null; } }