List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.inmobi.databus.local.LocalStreamServiceTest.java
License:Apache License
private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception { FileStatus[] files = createTestData(2, "/databus/data/stream", true); FileStatus[] stream1 = createTestData(2, "/databus/data/stream1/collector", true); FileStatus[] stream3 = createTestData(number_files, "/databus/data/stream1/collector1/file", true); FileStatus[] stream4 = createTestData(number_files, "/databus/data/stream1/collector2/file", true); FileStatus[] stream2 = createTestData(2, "/databus/data/stream2/collector", true); FileStatus[] stream5 = createTestData(number_files, "/databus/data/stream2/collector1/file", true); FileStatus[] stream6 = createTestData(number_files, "/databus/data/stream2/collector2/file", true); when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/")); when(fs.getUri()).thenReturn(new URI("localhost")); when(fs.listStatus(cluster.getDataDir())).thenReturn(files); when(fs.listStatus(new Path("/databus/data/stream1"))).thenReturn(stream1); when(fs.listStatus(new Path("/databus/data/stream1/collector1"), any(CollectorPathFilter.class))) .thenReturn(stream3);//from w ww .j a v a 2 s . c o m when(fs.listStatus(new Path("/databus/data/stream2"))).thenReturn(stream2); when(fs.listStatus(new Path("/databus/data/stream1/collector2"), any(CollectorPathFilter.class))) .thenReturn(stream4); when(fs.listStatus(new Path("/databus/data/stream2/collector1"), any(CollectorPathFilter.class))) .thenReturn(stream5); when(fs.listStatus(new Path("/databus/data/stream2/collector2"), any(CollectorPathFilter.class))) .thenReturn(stream6); Path file = mock(Path.class); when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/databus/data/stream1/collector1/")); }
From source file:com.inmobi.databus.purge.DataPurgerService.java
License:Apache License
private FileStatus[] getAllFilesInDir(Path dir, FileSystem fs) throws Exception { return fs.listStatus(dir); }
From source file:com.inmobi.databus.readers.TestDatabusEmptyFolders.java
License:Apache License
private Path removeFilesIfAny() throws IOException { FileSystem fs = FileSystem.get(cluster.getHadoopConf()); Path streamDir = DatabusUtil.getStreamDir(StreamType.LOCAL, new Path(cluster.getRootDir()), testStream); Path minuteDirPath = DatabusStreamReader.getMinuteDirPath(streamDir, modifyTime(new Date(), Calendar.MINUTE, -10)); FileStatus[] fileStatuses = fs.listStatus(minuteDirPath.getParent()); for (FileStatus folders : fileStatuses) { if (!folders.isDir()) { continue; }/*from w w w .j a va 2s . c o m*/ LOG.debug("Folder=" + folders.getPath().toString()); FileStatus[] files = fs.listStatus(folders.getPath()); for (FileStatus file : files) { if (file.isDir()) { continue; } fs.delete(file.getPath()); } } Arrays.sort(fileStatuses, new java.util.Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { try { return getDateFromFile(o1.getPath().toString()).before(getDateFromFile(o2.getPath().toString())) ? -1 : 1; } catch (ParseException e) { e.printStackTrace(); } return 0; } }); return fileStatuses[fileStatuses.length - 1].getPath(); }
From source file:com.inmobi.databus.utils.CollapseFilesInDir.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); configuration.set("fs.default.name", args[0]); String dir = args[1];/*w w w. ja v a2s . c om*/ FileSystem fs = FileSystem.get(configuration); FileStatus[] fileList = fs.listStatus(new Path(dir)); if (fileList != null) { if (fileList.length > 1) { Set<Path> sourceFiles = new HashSet<Path>(); Set<String> consumePaths = new HashSet<String>(); //inputPath has have multiple files due to backlog //read all and create a tmp file for (int i = 0; i < fileList.length; i++) { Path consumeFilePath = fileList[i].getPath().makeQualified(fs); sourceFiles.add(consumeFilePath); FSDataInputStream fsDataInputStream = fs.open(consumeFilePath); try { while (fsDataInputStream.available() > 0) { String fileName = fsDataInputStream.readLine(); if (fileName != null) { consumePaths.add(fileName.trim()); System.out.println("Adding [" + fileName + "] to pull"); } } } finally { fsDataInputStream.close(); } } Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString()); FSDataOutputStream out = fs.create(finalPath); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); try { for (String consumePath : consumePaths) { System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList"); writer.write(consumePath); writer.write("\n"); } } finally { writer.close(); } LOG.warn("Final File - [" + finalPath + "]"); for (Path deletePath : sourceFiles) { System.out.println("Deleting - [" + deletePath + "]"); fs.delete(deletePath); } } } }
From source file:com.inmobi.grid.fs.test.TestS3NwithSlash.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); //conf.set("fs.default.name", "s3n://AKIAJWJBBSUODWD7RMEA:LUnvKWWSeFuInkoOpYX%2FbJtDj080EovlioOchGwM@inmobi-grid-emr-dev"); Path path = new Path( "s3n://AKIAJWJBBSUODWD7RMEA:LUnvKWWSeFuInkoOpYX/bJtDj080EovlioOchGwM@inmobi-grid-emr-dev/"); FileSystem fs = path.getFileSystem(conf); FileStatus[] list = fs.listStatus(path); //System.out.println(list[0].getp); }
From source file:com.inmobi.grill.driver.hive.TestHiveDriver.java
License:Apache License
private void validatePersistentResult(GrillResultSet resultSet, String dataFile, String outptuDir, boolean formatNulls) throws Exception { assertTrue(resultSet instanceof HivePersistentResultSet); HivePersistentResultSet persistentResultSet = (HivePersistentResultSet) resultSet; String path = persistentResultSet.getOutputPath(); QueryHandle handle = persistentResultSet.getQueryHandle(); Path actualPath = new Path(path); FileSystem fs = actualPath.getFileSystem(conf); assertEquals(actualPath, fs.makeQualified(new Path(outptuDir, handle.toString()))); List<String> actualRows = new ArrayList<String>(); for (FileStatus stat : fs.listStatus(actualPath)) { FSDataInputStream in = fs.open(stat.getPath()); BufferedReader br = null; try {//from w w w . j av a2s .c o m br = new BufferedReader(new InputStreamReader(in)); String line = ""; while ((line = br.readLine()) != null) { System.out.println("Actual:" + line); actualRows.add(line.trim()); } } finally { if (br != null) { br.close(); } } } BufferedReader br = null; List<String> expectedRows = new ArrayList<String>(); try { br = new BufferedReader(new FileReader(new File(dataFile))); String line = ""; while ((line = br.readLine()) != null) { String row = line.trim(); if (formatNulls) { row += ",-NA-,"; row += line.trim(); } expectedRows.add(row); } } finally { if (br != null) { br.close(); } } assertEquals(actualRows, expectedRows); }
From source file:com.inmobi.grill.server.query.TestQueryService.java
License:Apache License
private void validatePersistentResult(PersistentQueryResult resultset, QueryHandle handle) throws IOException { Assert.assertTrue(resultset.getPersistedURI().endsWith(handle.toString())); Path actualPath = new Path(resultset.getPersistedURI()); FileSystem fs = actualPath.getFileSystem(new Configuration()); List<String> actualRows = new ArrayList<String>(); for (FileStatus fstat : fs.listStatus(actualPath)) { FSDataInputStream in = fs.open(fstat.getPath()); BufferedReader br = null; try {//from w w w . j a va 2 s .c om br = new BufferedReader(new InputStreamReader(in)); String line = ""; while ((line = br.readLine()) != null) { actualRows.add(line); } } finally { if (br != null) { br.close(); } if (in != null) { in.close(); } } } Assert.assertEquals(actualRows.get(0), "1one"); Assert.assertEquals(actualRows.get(1), "\\Ntwo"); Assert.assertEquals(actualRows.get(2), "3\\N"); Assert.assertEquals(actualRows.get(3), "\\N\\N"); Assert.assertEquals(actualRows.get(4), "5"); }
From source file:com.inmobi.messaging.consumer.databus.DatabusConsumer.java
License:Apache License
private List<String> getCollectors(FileSystem fs, Path baseDir) throws IOException { List<String> collectors = new ArrayList<String>(); LOG.debug("Stream dir: " + baseDir); FileStatus[] list = fs.listStatus(baseDir); numList++;/*from w ww .j a v a2s.co m*/ if (list != null && list.length > 0) { for (FileStatus status : list) { collectors.add(status.getPath().getName()); } } else { LOG.warn("No collector dirs available in " + baseDir); } return collectors; }
From source file:com.intel.hadoop.hbase.dot.TestHiveIntegration.java
License:Apache License
@BeforeClass public static void setUp() throws Exception { Configuration config = TEST_UTIL.getConfiguration(); config.set("hbase.coprocessor.region.classes", "com.intel.hadoop.hbase.dot.access.DataManipulationOps"); config.set("hbase.coprocessor.master.classes", "com.intel.hadoop.hbase.dot.access.DataDefinitionOps"); TEST_UTIL.startMiniCluster(1);/*from www . j a va2 s .c o m*/ TEST_UTIL.startMiniMapReduceCluster(); initialize(TEST_UTIL.getConfiguration()); // 1. To put the test data onto miniDFS, and get the file path FileSystem fs = FileSystem.get(config); FSDataOutputStream output = fs.create(new Path("/tsvfile")); PrintStream out = new PrintStream(output); out.println("row1|row1_fd1|row1_fd2|row1_fd3|row1_fd4"); out.println("row2|row2_fd1|row2_fd2|row2_fd3|row2_fd4"); out.println("row3|row3_fd1|row3_fd2|row3_fd3|row3_fd4"); out.println("row4|row4_fd1|row4_fd2|row4_fd3|row4_fd4"); out.println("row5|row5_fd1|row5_fd2|row5_fd3|row5_fd4"); out.close(); output.close(); // fs.copyFromLocalFile(new Path("./src/test/data/data"), new // Path("/tsvfile")); assertEquals("tsv file name is not correct", fs.listStatus(new Path("/tsvfile"))[0].getPath().getName(), "tsvfile"); }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Initialize ExecFilesMapper specific job-configuration. * * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from w w w . j a va 2s .c om*/ * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); jobConf.set(EXEC_CMD_LABEL, args.execCmd); //set boolean values jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname, args.flags.contains(Options.REDIRECT_ERROR_TO_OUT)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_" + NAME + "_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory. final boolean special = (args.srcs.size() == 1 && !dstExists); int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_" + NAME + "_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToExecCount=" + fileCount); LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(fileCount, jobConf); return fileCount > 0; }