Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.cloudera.recordbreaker.analyzer.FSCrawler.java

License:Open Source License

/**
 * Traverse an entire region of the filesystem, analyzing files.
 * This code should:/*from  www .j ava  2  s .  c om*/
 * a) Navigate the directory hierarchy
 * b) Run analysis code to figure out the file details
 * c) Invoke addSingleFile() appropriately.
 */
protected void recursiveCrawlBuildList(FileSystem fs, Path p, int subdirDepth, long crawlId,
        List<Path> todoFileList, List<Path> todoDirList) throws IOException {
    FileStatus fstatus = fs.getFileStatus(p);
    if (!fstatus.isDir()) {
        todoFileList.add(p);
    } else {
        if (subdirDepth > 0 || subdirDepth < 0) {
            todoDirList.add(p);
            Path paths[] = new Path[1];
            paths[0] = p;
            for (FileStatus subfilestatus : fs.listStatus(p)) {
                Path subfile = subfilestatus.getPath();
                try {
                    recursiveCrawlBuildList(fs, subfile, subdirDepth - 1, crawlId, todoFileList, todoDirList);
                } catch (IOException iex) {
                    iex.printStackTrace();
                }
            }
        }
    }
}

From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java

License:Apache License

/** Create the directory where we'll write our test files to; and
 * make sure it has no files in it.//  w  ww.j a  v  a  2s . c o  m
 */
private void ensureEmptyWriteDir() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());
    Path writeDir = getWritePath();

    fs.mkdirs(writeDir);

    FileStatus[] stats = fs.listStatus(writeDir);

    for (FileStatus stat : stats) {
        if (stat.isDir()) {
            fail("setUp(): Write directory " + writeDir + " contains subdirectories");
        }

        LOG.debug("setUp(): Removing " + stat.getPath());
        if (!fs.delete(stat.getPath(), false)) {
            fail("setUp(): Could not delete residual file " + stat.getPath());
        }
    }

    if (!fs.exists(writeDir)) {
        fail("setUp: Could not create " + writeDir);
    }
}

From source file:com.cloudera.sqoop.mapreduce.TestImportJob.java

License:Apache License

private String[] getContent(Configuration conf, Path path) throws Exception {
    FileSystem fs = FileSystem.getLocal(conf);
    FileStatus[] stats = fs.listStatus(path);
    String[] fileNames = new String[stats.length];
    for (int i = 0; i < stats.length; i++) {
        fileNames[i] = stats[i].getPath().toString();
    }//from  w  w  w.ja  v a2 s . co  m

    // Read all the files adding the value lines to the list.
    List<String> strings = new ArrayList<String>();
    for (String fileName : fileNames) {
        if (fileName.startsWith("_") || fileName.startsWith(".")) {
            continue;
        }

        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance();
        Writable value = (Writable) reader.getValueClass().newInstance();
        while (reader.next(key, value)) {
            strings.add(value.toString());
        }
    }
    return strings.toArray(new String[0]);
}

From source file:com.cloudera.sqoop.mapreduce.TestImportJob.java

License:Apache License

public void testDeleteTargetDir() throws Exception {
    // Make sure that if a MapReduce job to do the import fails due
    // to an IOException, we tell the user about it.

    // Create a table to attempt to import.
    createTableForColType("VARCHAR(32)", "'meep'");

    Configuration conf = new Configuration();

    // Make the output dir does not exist
    Path outputPath = new Path(new Path(getWarehouseDir()), getTableName());
    FileSystem fs = FileSystem.getLocal(conf);
    fs.delete(outputPath, true);//from w w  w. ja v  a  2  s .  co  m
    assertTrue(!fs.exists(outputPath));

    String[] argv = getArgv(true, new String[] { "DATA_COL0" }, conf);
    argv = Arrays.copyOf(argv, argv.length + 1);
    argv[argv.length - 1] = "--delete-target-dir";

    Sqoop importer = new Sqoop(new ImportTool());
    try {
        int ret = Sqoop.runSqoop(importer, argv);
        assertTrue("Expected job to go through if target directory" + " does not exist.", 0 == ret);
        assertTrue(fs.exists(outputPath));
        // expecting one _SUCCESS file and one file containing data
        assertTrue("Expecting two files in the directory.", fs.listStatus(outputPath).length == 2);
        String[] output = getContent(conf, outputPath);
        assertEquals("Expected output and actual output should be same.", "meep", output[0]);

        ret = Sqoop.runSqoop(importer, argv);
        assertTrue("Expected job to go through if target directory exists.", 0 == ret);
        assertTrue(fs.exists(outputPath));
        // expecting one _SUCCESS file and one file containing data
        assertTrue("Expecting two files in the directory.", fs.listStatus(outputPath).length == 2);
        output = getContent(conf, outputPath);
        assertEquals("Expected output and actual output should be same.", "meep", output[0]);
    } catch (Exception e) {
        // In debug mode, ImportException is wrapped in RuntimeException.
        LOG.info("Got exceptional return (expected: ok). msg is: " + e);
    }
}

From source file:com.cloudera.sqoop.TestAppendUtils.java

License:Apache License

/** @return FileStatus for data files only. */
private FileStatus[] listFiles(FileSystem fs, Path path) throws IOException {
    FileStatus[] fileStatuses = fs.listStatus(path);
    ArrayList files = new ArrayList();
    Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
    for (FileStatus fstat : fileStatuses) {
        String fname = fstat.getPath().getName();
        if (!fstat.isDir()) {
            Matcher mat = patt.matcher(fname);
            if (mat.matches()) {
                files.add(fstat);//from w  w w.  j  ava 2  s  .  c  om
            }
        }
    }
    return (FileStatus[]) files.toArray(new FileStatus[files.size()]);
}

From source file:com.cloudera.sqoop.TestIncrementalImport.java

License:Apache License

/**
 * Look at a directory that should contain files full of an imported 'id'
 * column. Assert that all numbers in [0, expectedNums) are present
 * in order./*from   w w  w. j  av  a2 s  .  c  o  m*/
 */
public void assertDirOfNumbers(String tableName, int expectedNums) {
    try {
        FileSystem fs = FileSystem.getLocal(new Configuration());
        Path warehouse = new Path(BaseSqoopTestCase.LOCAL_WAREHOUSE_DIR);
        Path tableDir = new Path(warehouse, tableName);
        FileStatus[] stats = fs.listStatus(tableDir);
        String[] fileNames = new String[stats.length];
        for (int i = 0; i < stats.length; i++) {
            fileNames[i] = stats[i].getPath().toString();
        }

        Arrays.sort(fileNames);

        // Read all the files in sorted order, adding the value lines to the list.
        List<String> receivedNums = new ArrayList<String>();
        for (String fileName : fileNames) {
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }

            BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName))));
            try {
                while (true) {
                    String s = r.readLine();
                    if (null == s) {
                        break;
                    }

                    receivedNums.add(s.trim());
                }
            } finally {
                r.close();
            }
        }

        assertEquals(expectedNums, receivedNums.size());

        // Compare the received values with the expected set.
        for (int i = 0; i < expectedNums; i++) {
            assertEquals((int) i, (int) Integer.valueOf(receivedNums.get(i)));
        }
    } catch (Exception e) {
        fail("Got unexpected exception: " + StringUtils.stringifyException(e));
    }
}

From source file:com.cloudera.sqoop.TestIncrementalImport.java

License:Apache License

/**
 * Assert that a directory contains a file with exactly one line
 * in it, containing the prescribed number 'val'.
 *//*  w ww .j  a va 2 s . c  o m*/
public void assertSpecificNumber(String tableName, int val) {
    try {
        FileSystem fs = FileSystem.getLocal(new Configuration());
        Path warehouse = new Path(BaseSqoopTestCase.LOCAL_WAREHOUSE_DIR);
        Path tableDir = new Path(warehouse, tableName);
        FileStatus[] stats = fs.listStatus(tableDir);
        String[] filePaths = new String[stats.length];
        for (int i = 0; i < stats.length; i++) {
            filePaths[i] = stats[i].getPath().toString();
        }

        // Read the first file that is not a hidden file.
        boolean foundVal = false;
        for (String filePath : filePaths) {
            String fileName = new Path(filePath).getName();
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }

            if (foundVal) {
                // Make sure we don't have two or more "real" files in the dir.
                fail("Got an extra data-containing file in this directory.");
            }

            BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(filePath))));
            try {
                String s = r.readLine();
                if (null == s) {
                    fail("Unexpected empty file " + filePath + ".");
                }
                assertEquals(val, (int) Integer.valueOf(s.trim()));

                String nextLine = r.readLine();
                if (nextLine != null) {
                    fail("Expected only one result, but got another line: " + nextLine);
                }

                // Successfully got the value we were looking for.
                foundVal = true;
            } finally {
                r.close();
            }
        }
    } catch (IOException e) {
        fail("Got unexpected exception: " + StringUtils.stringifyException(e));
    }
}

From source file:com.cloudera.sqoop.TestMerge.java

License:Apache License

/**
 * Return true if there's a file in 'dirName' with a line that starts with
 * 'prefix'.//from  w  ww .j a  v  a  2  s . c om
 */
protected boolean recordStartsWith(String prefix, String dirName) throws Exception {
    Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR);
    Path targetPath = new Path(warehousePath, dirName);

    FileSystem fs = FileSystem.getLocal(new Configuration());
    FileStatus[] files = fs.listStatus(targetPath);

    if (null == files || files.length == 0) {
        fail("Got no import files!");
    }

    for (FileStatus stat : files) {
        Path p = stat.getPath();
        if (p.getName().startsWith("part-")) {
            if (checkFileForLine(fs, p, prefix)) {
                // We found the line. Nothing further to do.
                return true;
            }
        }
    }

    return false;
}

From source file:com.cloudera.sqoop.testutil.ImportJobTestCase.java

License:Apache License

/**
 * Do a MapReduce-based import of the table and verify that the results
 * were imported as expected. (tests readFields(ResultSet) and toString())
 * @param expectedVal the value we injected into the table.
 * @param importCols the columns to import. If null, all columns are used.
 *//*from   ww  w.j a  va 2  s  . c o m*/
protected void verifyImport(String expectedVal, String[] importCols) {

    // paths to where our output file will wind up.
    Path tableDirPath = getTablePath();

    removeTableDir();

    Configuration conf = getConf();
    SqoopOptions opts = getSqoopOptions(conf);

    // run the tool through the normal entry-point.
    int ret;
    try {
        Sqoop importer = new Sqoop(new ImportTool(), conf, opts);
        ret = Sqoop.runSqoop(importer, getArgv(true, importCols, conf));
    } catch (Exception e) {
        LOG.error("Got exception running Sqoop: " + e.toString());
        throw new RuntimeException(e);
    }

    // expect a successful return.
    assertEquals("Failure during job", 0, ret);

    opts = getSqoopOptions(conf);
    try {
        ImportTool importTool = new ImportTool();
        opts = importTool.parseArguments(getArgv(false, importCols, conf), conf, opts, true);
    } catch (Exception e) {
        fail(e.toString());
    }

    CompilationManager compileMgr = new CompilationManager(opts);
    String jarFileName = compileMgr.getJarFilename();
    ClassLoader prevClassLoader = null;
    try {
        prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, getTableName());

        // Now open and check all part-files in the table path until we find
        // a non-empty one that we can verify contains the value.
        if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
            conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
        }
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stats = fs.listStatus(tableDirPath);

        if (stats == null || stats.length == 0) {
            fail("Error: no files in " + tableDirPath);
        }

        boolean foundRecord = false;
        for (FileStatus stat : stats) {
            if (!stat.getPath().getName().startsWith("part-")
                    && !stat.getPath().getName().startsWith("data-")) {
                // This isn't a data file. Ignore it.
                continue;
            }

            try {
                Object readValue = SeqFileReader.getFirstValue(stat.getPath().toString());
                LOG.info("Read back from sequencefile: " + readValue);
                foundRecord = true;
                // Add trailing '\n' to expected value since SqoopRecord.toString()
                // encodes the record delim.
                if (null == expectedVal) {
                    assertEquals("Error validating result from SeqFile", "null\n", readValue.toString());
                } else {
                    assertEquals("Error validating result from SeqFile", expectedVal + "\n",
                            readValue.toString());
                }
            } catch (EOFException eoe) {
                // EOF in a file isn't necessarily a problem. We may have some
                // empty sequence files, which will throw this. Just continue
                // in the loop.
            }
        }

        if (!foundRecord) {
            fail("Couldn't read any records from SequenceFiles");
        }
    } catch (IOException ioe) {
        fail("IOException: " + ioe.toString());
    } finally {
        if (null != prevClassLoader) {
            ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
        }
    }
}

From source file:com.cloudera.sqoop.util.AppendUtils.java

License:Apache License

/**
 * Returns the greatest partition number available for appending, for data
 * files in targetDir./*from w w w . j  a va  2s .  c  om*/
 */
private int getNextPartition(FileSystem fs, Path targetDir) throws IOException {

    int nextPartition = 0;
    FileStatus[] existingFiles = fs.listStatus(targetDir);
    if (existingFiles != null && existingFiles.length > 0) {
        Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
        for (FileStatus fileStat : existingFiles) {
            if (!fileStat.isDir()) {
                String filename = fileStat.getPath().getName();
                Matcher mat = patt.matcher(filename);
                if (mat.matches()) {
                    int thisPart = Integer.parseInt(mat.group(1));
                    if (thisPart >= nextPartition) {
                        nextPartition = thisPart;
                        nextPartition++;
                    }
                }
            }
        }
    }

    if (nextPartition > 0) {
        LOG.info("Using found partition " + nextPartition);
    }

    return nextPartition;
}