Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files, PathFilter filter) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using user-supplied path filter.

Usage

From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java

License:Apache License

@Test
public void testBasicTableImport() throws Exception {
    DBQueryInputJob job = new DBQueryInputJob();

    String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl",
            "jdbc:hsqldb:hsql://localhost/URLAccess",
            //   "-jdbcUsername", "",
            //   "-jdbcPassword", "",
            "-outputPath", "testBasicTableImport", "-outputStrategy", "delimited", "-delimiter", "DELIM",
            "-numberOfMappers", "2", "-inputTableName", "Pageview", "-inputOrderBy", "pageview" };
    int res = ToolRunner.run(createJobConf(), job, args);
    assertEquals(0, res);//from w  w  w.  j av  a 2  s .c o m
    //lets verify the result now
    FileSystem outputFS = getFileSystem();
    //Path outputPath = getOutputDir();

    Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("/aDELIM1000");
    expectedOutput.add("/bDELIM2000");
    expectedOutput.add("/cDELIM3000");
    expectedOutput.add("/dDELIM4000");
    expectedOutput.add("/eDELIM5000");
    expectedOutput.add("/fDELIM6000");
    expectedOutput.add("/gDELIM7000");
    expectedOutput.add("/hDELIM8000");
    expectedOutput.add("/iDELIM9000");
    expectedOutput.add("/jDELIM10000");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(10, count);
}

From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java

License:Apache License

@Test
public void testBasicAvroTableImport() throws Exception {
    DBQueryInputJob job = new DBQueryInputJob();

    String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl",
            "jdbc:hsqldb:hsql://localhost/URLAccess", "-outputPath", "testQueryBasedImport", "-inputQuery",
            "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url",
            "-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment",
            "-outputStrategy", "AVRO", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputOrderBy",
            "Pageview.pageview" };
    int res = ToolRunner.run(createJobConf(), job, args);
    assertEquals(0, res);/*from   w  ww  .java  2 s. c om*/
    //lets verify the result now
    FileSystem outputFS = getFileSystem();
    //Path outputPath = getOutputDir();

    Path outputPath = new Path(outputFS.getHomeDirectory(), "testBasicTableImport");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    /*   List<String> expectedOutput = new ArrayList<String>();
       expectedOutput.add("/aDELIM1000");
       expectedOutput.add("/bDELIM2000");
       expectedOutput.add("/cDELIM3000");
       expectedOutput.add("/dDELIM4000");
       expectedOutput.add("/eDELIM5000");
       expectedOutput.add("/fDELIM6000");
       expectedOutput.add("/gDELIM7000");
       expectedOutput.add("/hDELIM8000");
       expectedOutput.add("/iDELIM9000");
       expectedOutput.add("/jDELIM10000");
       int count = 0;
       for (FileStatus fileStat: status) {
          logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
          FSDataInputStream in = outputFS.open(fileStat.getPath());
          String line = null;         
          while ((line = in.readLine()) != null) {
    logger.debug("Output is " + line);
    assertTrue("Matched output " + line , expectedOutput.contains(line));
    expectedOutput.remove(line);
    count++;
          }
          in.close();
       }
       assertEquals(10, count);   */
}

From source file:co.nubetech.hiho.job.TestDBQueryInputJobWithCluster.java

License:Apache License

@Test
public void testQueryBasedImport() throws Exception {
    DBQueryInputJob job = new DBQueryInputJob();

    String[] args = new String[] { "-jdbcDriver", "org.hsqldb.jdbcDriver", "-jdbcUrl",
            "jdbc:hsqldb:hsql://localhost/URLAccess", "-outputPath", "testQueryBasedImport", "-inputQuery",
            "select url,pageview,commentCount from Pageview, PageComment where Pageview.url = PageComment.url",
            "-inputBoundingQuery", "select min(commentCount), max(commentCount) from PageComment",
            "-outputStrategy", "delimited", "-delimiter", "DELIM", "-numberOfMappers", "2", "-inputOrderBy",
            "Pageview.pageview" };
    int res = ToolRunner.run(createJobConf(), job, args);
    assertEquals(0, res);/* w w w .j av  a 2s . c  om*/
    //lets verify the result now
    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "testQueryBasedImport");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("/aDELIM1000DELIM10");
    expectedOutput.add("/bDELIM2000DELIM10");
    expectedOutput.add("/cDELIM3000DELIM10");
    expectedOutput.add("/dDELIM4000DELIM10");
    expectedOutput.add("/eDELIM5000DELIM10");
    expectedOutput.add("/fDELIM6000DELIM10");
    expectedOutput.add("/gDELIM7000DELIM10");
    expectedOutput.add("/hDELIM8000DELIM10");
    expectedOutput.add("/iDELIM9000DELIM10");
    expectedOutput.add("/jDELIM10000DELIM10");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(10, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByKeyWithDelimitedTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat",
            "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName",
            "org.apache.hadoop.io.Text" };
    MergeJob job = runMergeJobs(args);//ww  w . j a  va2  s .  c  o m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(4, count);

}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithDelimitedTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"
            + "\nTimon Leonard,716 Ac Ave,1-857-935-3882,62240";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat",
            "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName",
            "org.apache.hadoop.io.Text", };
    MergeJob job = runMergeJobs(args);//  ww w . j a va2  s  .c  o m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);

}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithTextInputFormat() throws Exception {

    final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "Timon Leonard,716 Ac Ave,1-857-935-3882,62240"
            + "\nMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
            "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);//from w  w  w .j  a  va  2 s .c  o m
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByKeyWithKeyValueTextInputFormat() throws Exception {

    final String inputData1 = "A\tMacon Kent,6269 Aenean St.,1-247-399-1051,08253"
            + "\nB\tDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"
            + "\nC\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    final String inputData2 = "A\tTimon Leonard,716 Ac Ave,1-857-935-3882,62240"
            + "\nD\tMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"
            + "\nB\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714";
    createTextFileInHDFS(inputData1, "/input1", "testFile1.txt");
    createTextFileInHDFS(inputData2, "/input2", "testFile2.txt");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);/*w w  w.j av a  2  s  .co  m*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(4, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(4, count);
}

From source file:co.nubetech.hiho.merge.TestMergeJob.java

License:Apache License

@Test
public void testMergeByValueWithSequenceFileAsTextInputFormat() throws Exception {
    HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>();
    inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"));
    inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"));
    inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq");

    HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>();
    inputData2.put(new IntWritable(1), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"));
    inputData2.put(new IntWritable(2), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"));
    inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"));
    createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq");

    String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value",
            "-outputPath", "output", "-inputFormat",
            "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-inputKeyClassName",
            "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat",
            "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" };
    MergeJob job = runMergeJobs(args);/*from   w w  w .  ja v  a 2  s .  c  o m*/
    assertEquals(3, job.getTotalRecordsNew());
    assertEquals(3, job.getTotalRecordsOld());
    assertEquals(0, job.getBadRecords());
    assertEquals(5, job.getOutput());

    FileSystem outputFS = getFileSystem();
    Path outputPath = new Path(outputFS.getHomeDirectory(), "output");
    FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter());
    assertTrue(outputFS.exists(outputPath));
    List<String> expectedOutput = new ArrayList<String>();
    expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253");
    expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510");
    expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714");
    expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240");
    expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584");
    int count = 0;
    for (FileStatus fileStat : status) {
        logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory());
        FSDataInputStream in = outputFS.open(fileStat.getPath());
        String line = null;
        while ((line = in.readLine()) != null) {
            logger.debug("Output is " + line);
            assertTrue("Matched output " + line, expectedOutput.contains(line));
            expectedOutput.remove(line);
            count++;
        }
        in.close();
    }
    assertEquals(5, count);
}

From source file:com.architecting.ch07.MapReduceIndexerTool.java

License:Apache License

private FileStatus[] listSortedOutputShardDirs(Job job, Path outputReduceDir, FileSystem fs)
        throws FileNotFoundException, IOException {
    final String dirPrefix = SolrOutputFormat.getOutputName(job);
    FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() {
        @Override/* w w  w. j a  v a2s . c  om*/
        public boolean accept(Path path) {
            return path.getName().startsWith(dirPrefix);
        }
    });
    for (FileStatus dir : dirs) {
        if (!dir.isDirectory()) {
            throw new IllegalStateException("Not a directory: " + dir.getPath());
        }
    }

    // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999
    // shards
    Arrays.sort(dirs, new Comparator<FileStatus>() {
        @Override
        public int compare(FileStatus f1, FileStatus f2) {
            return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName());
        }
    });

    return dirs;
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. //from  w ww  .ja  va 2 s. c o  m
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected FileStatus[] listStatus(JobConf job) throws IOException {
    Path[] dirs = getInputPaths(job);
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job);

    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    PathFilter jobFilter = getInputPathFilter(job);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);

    for (Path p : dirs) {
        FileSystem fs = p.getFileSystem(job);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDir()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result.toArray(new FileStatus[result.size()]);
}