List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files, PathFilter filter) throws FileNotFoundException, IOException
From source file:com.blackberry.logtools.LogTools.java
License:Apache License
public void runPigRemote(Map<String, String> params, String out, String tmp, boolean quiet, boolean silent, Configuration conf, String queue_name, String additional_jars, File pig_tmp, ArrayList<String> D_options, String PIG_DIR, FileSystem fs) { //Set input parameter for pig job - calling Pig directly params.put("tmpdir", StringEscapeUtils.escapeJava(tmp)); //Check for an out of '-', meaning write to stdout String pigout;/* w w w . j a v a 2s.co m*/ if (out.equals("-")) { params.put("out", tmp + "/final"); pigout = tmp + "/final"; } else { params.put("out", StringEscapeUtils.escapeJava(out)); pigout = StringEscapeUtils.escapeJava(out); } try { logConsole(quiet, silent, info, "Running PIG Command"); conf.set("mapred.job.queue.name", queue_name); conf.set("pig.additional.jars", additional_jars); conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000)); conf.set("pig.logfile", pig_tmp.toString()); conf.set("hadoopversion", "23"); //PIG temp directory set to be able to delete all temp files/directories conf.set("pig.temp.dir", tmp); //Setting output separator for logdriver String DEFAULT_OUTPUT_SEPARATOR = "\t"; Charset UTF_8 = Charset.forName("UTF-8"); String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { logConsole(true, true, error, "The output separator must be a single byte in UTF-8."); System.exit(1); } conf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); dOpts(D_options, silent, out, conf); PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf); pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params); } catch (Exception e) { e.printStackTrace(); System.exit(1); } logConsole(quiet, silent, warn, "PIG Job Completed."); if (out.equals("-")) { System.out.println(";#################### DATA RESULTS ####################"); try { //Create filter to find files with the results from PIG job PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().contains("part-"); } }; //Find the files in the directory, open and printout results FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter); for (int i = 0; i < status.length; i++) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath()))); String line; line = br.readLine(); while (line != null) { System.out.println(line); line = br.readLine(); } } System.out.println(";#################### END OF RESULTS ####################"); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } else { System.out.println( ";#################### Done. Search results are in " + pigout + " ####################"); } }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * List input directories. Subclasses may override to, e.g., select only files * matching a regular expression.//from w w w .j a va2 s . c o m * * @param job * the job to list input paths for * @return array of FileStatus objects * @throws IOException * if zero items. */ protected List<FileStatus> listStatus(BSPJob job) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(HIDDEN_FILE_FILTER); PathFilter inputFilter = new MultiPathFilter(filters); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConf()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDir()) { for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) { result.add(stat); } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }
From source file:com.cloudera.crunch.io.CompositePathIterable.java
License:Open Source License
public static <S> Iterable<S> create(FileSystem fs, Path path, FileReaderFactory<S> readerFactory) throws IOException { FileStatus[] stati;// w w w . j av a 2s. c o m try { stati = fs.listStatus(path, FILTER); } catch (FileNotFoundException fnfe) { stati = new FileStatus[0]; } if (stati.length == 0) { throw new IOException("No files found to materialize at: " + path); } return new CompositePathIterable<S>(stati, fs, readerFactory); }
From source file:com.cloudera.hoop.fs.FSListStatus.java
License:Open Source License
/** * Executes the filesystem operation./*from www . j a v a2s . c o m*/ * * @param fs filesystem instance to use. * @return a JSONArray with the file status of the directory * contents. * @throws IOException thrown if an IO error occured. */ @Override public JSONArray execute(FileSystem fs) throws IOException { FileStatus[] status = fs.listStatus(path, filter); String httpBaseUrl = HoopServer.get().getBaseUrl(); return FSUtils.fileStatusToJSON(status, httpBaseUrl); }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static Path[] findVcfs(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.isDirectory(path)) { FileStatus[] fileStatuses = fs.listStatus(path, new HiddenPathFilter()); Path[] vcfs = new Path[fileStatuses.length]; int i = 0; for (FileStatus status : fileStatuses) { vcfs[i++] = status.getPath(); }// w ww. ja va2 s .c o m return vcfs; } else { return new Path[] { path }; } }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static boolean sampleGroupExists(Path path, Configuration conf, String sampleGroup) throws IOException { FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { return false; }// w w w. ja v a2 s . c om for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) { for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) { if (fs.listStatus(posStatus.getPath(), new PartitionPathFilter("sample_group", sampleGroup)).length > 0) { return true; } } } return false; }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException { FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { return;//from www . j a v a 2 s .com } for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) { for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) { for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(), new PartitionPathFilter("sample_group", sampleGroup))) { fs.delete(sampleGroupStatus.getPath(), true); } } } }
From source file:com.cloudera.science.quince.SchemaUtils.java
License:Open Source License
public static Path findFile(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); if (fs.isDirectory(path)) { FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() { @Override//from w w w .j a va 2 s.c o m public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); return fileStatuses[0].getPath(); } else { return path; } }
From source file:com.cloudera.seismic.segy.SegyUnloader.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption("input", true, "SU sequence files to export from Hadoop"); options.addOption("output", true, "The local SU file to write"); // Parse the commandline and check for required arguments. CommandLine cmdLine = new PosixParser().parse(options, args, false); if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) { System.out.println("Mising required input/output arguments"); new HelpFormatter().printHelp("SegyUnloader", options); System.exit(1);/* w ww.jav a 2s . c o m*/ } Configuration conf = getConf(); FileSystem hdfs = FileSystem.get(conf); Path inputPath = new Path(cmdLine.getOptionValue("input")); if (!hdfs.exists(inputPath)) { System.out.println("Input path does not exist"); System.exit(1); } PathFilter pf = new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().startsWith("_"); } }; DataOutputStream os = new DataOutputStream(new FileOutputStream(cmdLine.getOptionValue("output"))); for (FileStatus fs : hdfs.listStatus(inputPath, pf)) { write(fs.getPath(), os, conf); } os.close(); return 0; }
From source file:com.cloudera.sqoop.TestMultiMaps.java
License:Apache License
/** @return a list of Path objects for each data file */ protected List<Path> getDataFilePaths() throws IOException { List<Path> paths = new ArrayList<Path>(); Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }/*from w ww . ja v a2 s . co m*/ FileSystem fs = FileSystem.get(conf); FileStatus[] stats = fs.listStatus(getTablePath(), new Utils.OutputFileUtils.OutputFilesFilter()); for (FileStatus stat : stats) { paths.add(stat.getPath()); } return paths; }