List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.cloudera.recordbreaker.analyzer.FSCrawler.java
License:Open Source License
/** * Traverse an entire region of the filesystem, analyzing files. * This code should:/*from www .j ava 2 s . c om*/ * a) Navigate the directory hierarchy * b) Run analysis code to figure out the file details * c) Invoke addSingleFile() appropriately. */ protected void recursiveCrawlBuildList(FileSystem fs, Path p, int subdirDepth, long crawlId, List<Path> todoFileList, List<Path> todoDirList) throws IOException { FileStatus fstatus = fs.getFileStatus(p); if (!fstatus.isDir()) { todoFileList.add(p); } else { if (subdirDepth > 0 || subdirDepth < 0) { todoDirList.add(p); Path paths[] = new Path[1]; paths[0] = p; for (FileStatus subfilestatus : fs.listStatus(p)) { Path subfile = subfilestatus.getPath(); try { recursiveCrawlBuildList(fs, subfile, subdirDepth - 1, crawlId, todoFileList, todoDirList); } catch (IOException iex) { iex.printStackTrace(); } } } } }
From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java
License:Apache License
/** Create the directory where we'll write our test files to; and * make sure it has no files in it.// w ww.j a v a 2s . c o m */ private void ensureEmptyWriteDir() throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); Path writeDir = getWritePath(); fs.mkdirs(writeDir); FileStatus[] stats = fs.listStatus(writeDir); for (FileStatus stat : stats) { if (stat.isDir()) { fail("setUp(): Write directory " + writeDir + " contains subdirectories"); } LOG.debug("setUp(): Removing " + stat.getPath()); if (!fs.delete(stat.getPath(), false)) { fail("setUp(): Could not delete residual file " + stat.getPath()); } } if (!fs.exists(writeDir)) { fail("setUp: Could not create " + writeDir); } }
From source file:com.cloudera.sqoop.mapreduce.TestImportJob.java
License:Apache License
private String[] getContent(Configuration conf, Path path) throws Exception { FileSystem fs = FileSystem.getLocal(conf); FileStatus[] stats = fs.listStatus(path); String[] fileNames = new String[stats.length]; for (int i = 0; i < stats.length; i++) { fileNames[i] = stats[i].getPath().toString(); }//from w w w.ja v a2 s . co m // Read all the files adding the value lines to the list. List<String> strings = new ArrayList<String>(); for (String fileName : fileNames) { if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; } SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance(); Writable value = (Writable) reader.getValueClass().newInstance(); while (reader.next(key, value)) { strings.add(value.toString()); } } return strings.toArray(new String[0]); }
From source file:com.cloudera.sqoop.mapreduce.TestImportJob.java
License:Apache License
public void testDeleteTargetDir() throws Exception { // Make sure that if a MapReduce job to do the import fails due // to an IOException, we tell the user about it. // Create a table to attempt to import. createTableForColType("VARCHAR(32)", "'meep'"); Configuration conf = new Configuration(); // Make the output dir does not exist Path outputPath = new Path(new Path(getWarehouseDir()), getTableName()); FileSystem fs = FileSystem.getLocal(conf); fs.delete(outputPath, true);//from w w w. ja v a 2 s . co m assertTrue(!fs.exists(outputPath)); String[] argv = getArgv(true, new String[] { "DATA_COL0" }, conf); argv = Arrays.copyOf(argv, argv.length + 1); argv[argv.length - 1] = "--delete-target-dir"; Sqoop importer = new Sqoop(new ImportTool()); try { int ret = Sqoop.runSqoop(importer, argv); assertTrue("Expected job to go through if target directory" + " does not exist.", 0 == ret); assertTrue(fs.exists(outputPath)); // expecting one _SUCCESS file and one file containing data assertTrue("Expecting two files in the directory.", fs.listStatus(outputPath).length == 2); String[] output = getContent(conf, outputPath); assertEquals("Expected output and actual output should be same.", "meep", output[0]); ret = Sqoop.runSqoop(importer, argv); assertTrue("Expected job to go through if target directory exists.", 0 == ret); assertTrue(fs.exists(outputPath)); // expecting one _SUCCESS file and one file containing data assertTrue("Expecting two files in the directory.", fs.listStatus(outputPath).length == 2); output = getContent(conf, outputPath); assertEquals("Expected output and actual output should be same.", "meep", output[0]); } catch (Exception e) { // In debug mode, ImportException is wrapped in RuntimeException. LOG.info("Got exceptional return (expected: ok). msg is: " + e); } }
From source file:com.cloudera.sqoop.TestAppendUtils.java
License:Apache License
/** @return FileStatus for data files only. */ private FileStatus[] listFiles(FileSystem fs, Path path) throws IOException { FileStatus[] fileStatuses = fs.listStatus(path); ArrayList files = new ArrayList(); Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*"); for (FileStatus fstat : fileStatuses) { String fname = fstat.getPath().getName(); if (!fstat.isDir()) { Matcher mat = patt.matcher(fname); if (mat.matches()) { files.add(fstat);//from w w w. j ava 2 s . c om } } } return (FileStatus[]) files.toArray(new FileStatus[files.size()]); }
From source file:com.cloudera.sqoop.TestIncrementalImport.java
License:Apache License
/** * Look at a directory that should contain files full of an imported 'id' * column. Assert that all numbers in [0, expectedNums) are present * in order./*from w w w. j av a2 s . c o m*/ */ public void assertDirOfNumbers(String tableName, int expectedNums) { try { FileSystem fs = FileSystem.getLocal(new Configuration()); Path warehouse = new Path(BaseSqoopTestCase.LOCAL_WAREHOUSE_DIR); Path tableDir = new Path(warehouse, tableName); FileStatus[] stats = fs.listStatus(tableDir); String[] fileNames = new String[stats.length]; for (int i = 0; i < stats.length; i++) { fileNames[i] = stats[i].getPath().toString(); } Arrays.sort(fileNames); // Read all the files in sorted order, adding the value lines to the list. List<String> receivedNums = new ArrayList<String>(); for (String fileName : fileNames) { if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; } BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName)))); try { while (true) { String s = r.readLine(); if (null == s) { break; } receivedNums.add(s.trim()); } } finally { r.close(); } } assertEquals(expectedNums, receivedNums.size()); // Compare the received values with the expected set. for (int i = 0; i < expectedNums; i++) { assertEquals((int) i, (int) Integer.valueOf(receivedNums.get(i))); } } catch (Exception e) { fail("Got unexpected exception: " + StringUtils.stringifyException(e)); } }
From source file:com.cloudera.sqoop.TestIncrementalImport.java
License:Apache License
/** * Assert that a directory contains a file with exactly one line * in it, containing the prescribed number 'val'. *//* w ww .j a va 2 s . c o m*/ public void assertSpecificNumber(String tableName, int val) { try { FileSystem fs = FileSystem.getLocal(new Configuration()); Path warehouse = new Path(BaseSqoopTestCase.LOCAL_WAREHOUSE_DIR); Path tableDir = new Path(warehouse, tableName); FileStatus[] stats = fs.listStatus(tableDir); String[] filePaths = new String[stats.length]; for (int i = 0; i < stats.length; i++) { filePaths[i] = stats[i].getPath().toString(); } // Read the first file that is not a hidden file. boolean foundVal = false; for (String filePath : filePaths) { String fileName = new Path(filePath).getName(); if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; } if (foundVal) { // Make sure we don't have two or more "real" files in the dir. fail("Got an extra data-containing file in this directory."); } BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(filePath)))); try { String s = r.readLine(); if (null == s) { fail("Unexpected empty file " + filePath + "."); } assertEquals(val, (int) Integer.valueOf(s.trim())); String nextLine = r.readLine(); if (nextLine != null) { fail("Expected only one result, but got another line: " + nextLine); } // Successfully got the value we were looking for. foundVal = true; } finally { r.close(); } } } catch (IOException e) { fail("Got unexpected exception: " + StringUtils.stringifyException(e)); } }
From source file:com.cloudera.sqoop.TestMerge.java
License:Apache License
/** * Return true if there's a file in 'dirName' with a line that starts with * 'prefix'.//from w ww .j a v a 2 s . c om */ protected boolean recordStartsWith(String prefix, String dirName) throws Exception { Path warehousePath = new Path(LOCAL_WAREHOUSE_DIR); Path targetPath = new Path(warehousePath, dirName); FileSystem fs = FileSystem.getLocal(new Configuration()); FileStatus[] files = fs.listStatus(targetPath); if (null == files || files.length == 0) { fail("Got no import files!"); } for (FileStatus stat : files) { Path p = stat.getPath(); if (p.getName().startsWith("part-")) { if (checkFileForLine(fs, p, prefix)) { // We found the line. Nothing further to do. return true; } } } return false; }
From source file:com.cloudera.sqoop.testutil.ImportJobTestCase.java
License:Apache License
/** * Do a MapReduce-based import of the table and verify that the results * were imported as expected. (tests readFields(ResultSet) and toString()) * @param expectedVal the value we injected into the table. * @param importCols the columns to import. If null, all columns are used. *//*from ww w.j a va 2 s . c o m*/ protected void verifyImport(String expectedVal, String[] importCols) { // paths to where our output file will wind up. Path tableDirPath = getTablePath(); removeTableDir(); Configuration conf = getConf(); SqoopOptions opts = getSqoopOptions(conf); // run the tool through the normal entry-point. int ret; try { Sqoop importer = new Sqoop(new ImportTool(), conf, opts); ret = Sqoop.runSqoop(importer, getArgv(true, importCols, conf)); } catch (Exception e) { LOG.error("Got exception running Sqoop: " + e.toString()); throw new RuntimeException(e); } // expect a successful return. assertEquals("Failure during job", 0, ret); opts = getSqoopOptions(conf); try { ImportTool importTool = new ImportTool(); opts = importTool.parseArguments(getArgv(false, importCols, conf), conf, opts, true); } catch (Exception e) { fail(e.toString()); } CompilationManager compileMgr = new CompilationManager(opts); String jarFileName = compileMgr.getJarFilename(); ClassLoader prevClassLoader = null; try { prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, getTableName()); // Now open and check all part-files in the table path until we find // a non-empty one that we can verify contains the value. if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); } FileSystem fs = FileSystem.get(conf); FileStatus[] stats = fs.listStatus(tableDirPath); if (stats == null || stats.length == 0) { fail("Error: no files in " + tableDirPath); } boolean foundRecord = false; for (FileStatus stat : stats) { if (!stat.getPath().getName().startsWith("part-") && !stat.getPath().getName().startsWith("data-")) { // This isn't a data file. Ignore it. continue; } try { Object readValue = SeqFileReader.getFirstValue(stat.getPath().toString()); LOG.info("Read back from sequencefile: " + readValue); foundRecord = true; // Add trailing '\n' to expected value since SqoopRecord.toString() // encodes the record delim. if (null == expectedVal) { assertEquals("Error validating result from SeqFile", "null\n", readValue.toString()); } else { assertEquals("Error validating result from SeqFile", expectedVal + "\n", readValue.toString()); } } catch (EOFException eoe) { // EOF in a file isn't necessarily a problem. We may have some // empty sequence files, which will throw this. Just continue // in the loop. } } if (!foundRecord) { fail("Couldn't read any records from SequenceFiles"); } } catch (IOException ioe) { fail("IOException: " + ioe.toString()); } finally { if (null != prevClassLoader) { ClassLoaderStack.setCurrentClassLoader(prevClassLoader); } } }
From source file:com.cloudera.sqoop.util.AppendUtils.java
License:Apache License
/** * Returns the greatest partition number available for appending, for data * files in targetDir./*from w w w . j a va 2s . c om*/ */ private int getNextPartition(FileSystem fs, Path targetDir) throws IOException { int nextPartition = 0; FileStatus[] existingFiles = fs.listStatus(targetDir); if (existingFiles != null && existingFiles.length > 0) { Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*"); for (FileStatus fileStat : existingFiles) { if (!fileStat.isDir()) { String filename = fileStat.getPath().getName(); Matcher mat = patt.matcher(filename); if (mat.matches()) { int thisPart = Integer.parseInt(mat.group(1)); if (thisPart >= nextPartition) { nextPartition = thisPart; nextPartition++; } } } } } if (nextPartition > 0) { LOG.info("Using found partition " + nextPartition); } return nextPartition; }