Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files, PathFilter filter) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using user-supplied path filter.

Usage

From source file:com.blackberry.logtools.LogTools.java

License:Apache License

public void runPigRemote(Map<String, String> params, String out, String tmp, boolean quiet, boolean silent,
        Configuration conf, String queue_name, String additional_jars, File pig_tmp,
        ArrayList<String> D_options, String PIG_DIR, FileSystem fs) {
    //Set input parameter for pig job - calling Pig directly
    params.put("tmpdir", StringEscapeUtils.escapeJava(tmp));

    //Check for an out of '-', meaning write to stdout
    String pigout;/* w  w  w . j a v a  2s.co  m*/
    if (out.equals("-")) {
        params.put("out", tmp + "/final");
        pigout = tmp + "/final";
    } else {
        params.put("out", StringEscapeUtils.escapeJava(out));
        pigout = StringEscapeUtils.escapeJava(out);
    }

    try {
        logConsole(quiet, silent, info, "Running PIG Command");
        conf.set("mapred.job.queue.name", queue_name);
        conf.set("pig.additional.jars", additional_jars);
        conf.set("pig.exec.reducers.bytes.per.reducer", Integer.toString(100 * 1000 * 1000));
        conf.set("pig.logfile", pig_tmp.toString());
        conf.set("hadoopversion", "23");
        //PIG temp directory set to be able to delete all temp files/directories
        conf.set("pig.temp.dir", tmp);

        //Setting output separator for logdriver
        String DEFAULT_OUTPUT_SEPARATOR = "\t";
        Charset UTF_8 = Charset.forName("UTF-8");
        String outputSeparator = conf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            logConsole(true, true, error, "The output separator must be a single byte in UTF-8.");
            System.exit(1);
        }
        conf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));

        dOpts(D_options, silent, out, conf);

        PigServer pigServer = new PigServer(ExecType.MAPREDUCE, conf);
        pigServer.registerScript(PIG_DIR + "/formatAndSort.pg", params);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }

    logConsole(quiet, silent, warn, "PIG Job Completed.");
    if (out.equals("-")) {
        System.out.println(";#################### DATA RESULTS ####################");
        try {
            //Create filter to find files with the results from PIG job
            PathFilter filter = new PathFilter() {
                public boolean accept(Path file) {
                    return file.getName().contains("part-");
                }
            };

            //Find the files in the directory, open and printout results
            FileStatus[] status = fs.listStatus(new Path(tmp + "/final"), filter);
            for (int i = 0; i < status.length; i++) {
                BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(status[i].getPath())));
                String line;
                line = br.readLine();
                while (line != null) {
                    System.out.println(line);
                    line = br.readLine();
                }
            }
            System.out.println(";#################### END OF RESULTS ####################");
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    } else {
        System.out.println(
                ";#################### Done. Search results are in " + pigout + " ####################");
    }
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * List input directories. Subclasses may override to, e.g., select only files
 * matching a regular expression.//from   w w  w .j  a va2 s .  c o  m
 *
 * @param job
 *        the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException
 *         if zero items.
 */
protected List<FileStatus> listStatus(BSPJob job) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }
    List<IOException> errors = new ArrayList<IOException>();
    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(HIDDEN_FILE_FILTER);
    PathFilter inputFilter = new MultiPathFilter(filters);
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConf());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }
    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:com.cloudera.crunch.io.CompositePathIterable.java

License:Open Source License

public static <S> Iterable<S> create(FileSystem fs, Path path, FileReaderFactory<S> readerFactory)
        throws IOException {
    FileStatus[] stati;// w w  w .  j av a  2s. c  o  m
    try {
        stati = fs.listStatus(path, FILTER);
    } catch (FileNotFoundException fnfe) {
        stati = new FileStatus[0];
    }
    if (stati.length == 0) {
        throw new IOException("No files found to materialize at: " + path);
    }
    return new CompositePathIterable<S>(stati, fs, readerFactory);
}

From source file:com.cloudera.hoop.fs.FSListStatus.java

License:Open Source License

/**
 * Executes the filesystem operation./*from  www  .  j  a  v  a2s  .  c  o  m*/
 *
 * @param fs filesystem instance to use.
 * @return a JSONArray with the file status of the directory
 * contents.
 * @throws IOException thrown if an IO error occured.
 */
@Override
public JSONArray execute(FileSystem fs) throws IOException {
    FileStatus[] status = fs.listStatus(path, filter);
    String httpBaseUrl = HoopServer.get().getBaseUrl();
    return FSUtils.fileStatusToJSON(status, httpBaseUrl);
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static Path[] findVcfs(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.isDirectory(path)) {
        FileStatus[] fileStatuses = fs.listStatus(path, new HiddenPathFilter());
        Path[] vcfs = new Path[fileStatuses.length];
        int i = 0;
        for (FileStatus status : fileStatuses) {
            vcfs[i++] = status.getPath();
        }//  w  ww. ja  va2 s  .c  o m
        return vcfs;
    } else {
        return new Path[] { path };
    }
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static boolean sampleGroupExists(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return false;
    }//  w  w w.  ja v a2 s . c om
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            if (fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup)).length > 0) {
                return true;
            }
        }
    }
    return false;
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return;//from  www  .  j  a v a  2 s  .com
    }
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup))) {
                fs.delete(sampleGroupStatus.getPath(), true);
            }
        }
    }
}

From source file:com.cloudera.science.quince.SchemaUtils.java

License:Open Source License

public static Path findFile(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.isDirectory(path)) {
        FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {
            @Override//from w  w  w .j  a va 2  s.c o  m
            public boolean accept(Path p) {
                String name = p.getName();
                return !name.startsWith("_") && !name.startsWith(".");
            }
        });
        return fileStatuses[0].getPath();
    } else {
        return path;
    }
}

From source file:com.cloudera.seismic.segy.SegyUnloader.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption("input", true, "SU sequence files to export from Hadoop");
    options.addOption("output", true, "The local SU file to write");

    // Parse the commandline and check for required arguments.
    CommandLine cmdLine = new PosixParser().parse(options, args, false);
    if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) {
        System.out.println("Mising required input/output arguments");
        new HelpFormatter().printHelp("SegyUnloader", options);
        System.exit(1);/* w  ww.jav  a 2s  . c  o m*/
    }

    Configuration conf = getConf();
    FileSystem hdfs = FileSystem.get(conf);
    Path inputPath = new Path(cmdLine.getOptionValue("input"));
    if (!hdfs.exists(inputPath)) {
        System.out.println("Input path does not exist");
        System.exit(1);
    }

    PathFilter pf = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };

    DataOutputStream os = new DataOutputStream(new FileOutputStream(cmdLine.getOptionValue("output")));
    for (FileStatus fs : hdfs.listStatus(inputPath, pf)) {
        write(fs.getPath(), os, conf);
    }
    os.close();

    return 0;
}

From source file:com.cloudera.sqoop.TestMultiMaps.java

License:Apache License

/** @return a list of Path objects for each data file */
protected List<Path> getDataFilePaths() throws IOException {
    List<Path> paths = new ArrayList<Path>();
    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }/*from   w  ww .  ja v a2 s .  co  m*/
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] stats = fs.listStatus(getTablePath(), new Utils.OutputFileUtils.OutputFilesFilter());

    for (FileStatus stat : stats) {
        paths.add(stat.getPath());
    }

    return paths;
}