Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.inmobi.databus.local.LocalStreamServiceTest.java

License:Apache License

private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception {
    FileStatus[] files = createTestData(2, "/databus/data/stream", true);

    FileStatus[] stream1 = createTestData(2, "/databus/data/stream1/collector", true);

    FileStatus[] stream3 = createTestData(number_files, "/databus/data/stream1/collector1/file", true);

    FileStatus[] stream4 = createTestData(number_files, "/databus/data/stream1/collector2/file", true);

    FileStatus[] stream2 = createTestData(2, "/databus/data/stream2/collector", true);

    FileStatus[] stream5 = createTestData(number_files, "/databus/data/stream2/collector1/file", true);

    FileStatus[] stream6 = createTestData(number_files, "/databus/data/stream2/collector2/file", true);

    when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/"));
    when(fs.getUri()).thenReturn(new URI("localhost"));
    when(fs.listStatus(cluster.getDataDir())).thenReturn(files);
    when(fs.listStatus(new Path("/databus/data/stream1"))).thenReturn(stream1);

    when(fs.listStatus(new Path("/databus/data/stream1/collector1"), any(CollectorPathFilter.class)))
            .thenReturn(stream3);//from   w  ww .j a v a 2  s  .  c o  m
    when(fs.listStatus(new Path("/databus/data/stream2"))).thenReturn(stream2);
    when(fs.listStatus(new Path("/databus/data/stream1/collector2"), any(CollectorPathFilter.class)))
            .thenReturn(stream4);
    when(fs.listStatus(new Path("/databus/data/stream2/collector1"), any(CollectorPathFilter.class)))
            .thenReturn(stream5);
    when(fs.listStatus(new Path("/databus/data/stream2/collector2"), any(CollectorPathFilter.class)))
            .thenReturn(stream6);

    Path file = mock(Path.class);
    when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/databus/data/stream1/collector1/"));
}

From source file:com.inmobi.databus.purge.DataPurgerService.java

License:Apache License

private FileStatus[] getAllFilesInDir(Path dir, FileSystem fs) throws Exception {
    return fs.listStatus(dir);
}

From source file:com.inmobi.databus.readers.TestDatabusEmptyFolders.java

License:Apache License

private Path removeFilesIfAny() throws IOException {
    FileSystem fs = FileSystem.get(cluster.getHadoopConf());
    Path streamDir = DatabusUtil.getStreamDir(StreamType.LOCAL, new Path(cluster.getRootDir()), testStream);
    Path minuteDirPath = DatabusStreamReader.getMinuteDirPath(streamDir,
            modifyTime(new Date(), Calendar.MINUTE, -10));
    FileStatus[] fileStatuses = fs.listStatus(minuteDirPath.getParent());
    for (FileStatus folders : fileStatuses) {
        if (!folders.isDir()) {
            continue;
        }/*from   w  w  w .j a va  2s .  c  o m*/
        LOG.debug("Folder=" + folders.getPath().toString());
        FileStatus[] files = fs.listStatus(folders.getPath());
        for (FileStatus file : files) {
            if (file.isDir()) {
                continue;
            }
            fs.delete(file.getPath());
        }
    }
    Arrays.sort(fileStatuses, new java.util.Comparator<FileStatus>() {

        @Override
        public int compare(FileStatus o1, FileStatus o2) {
            try {
                return getDateFromFile(o1.getPath().toString()).before(getDateFromFile(o2.getPath().toString()))
                        ? -1
                        : 1;
            } catch (ParseException e) {
                e.printStackTrace();
            }
            return 0;
        }
    });
    return fileStatuses[fileStatuses.length - 1].getPath();
}

From source file:com.inmobi.databus.utils.CollapseFilesInDir.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration configuration = new Configuration();
    configuration.set("fs.default.name", args[0]);
    String dir = args[1];/*w  w w. ja v  a2s  .  c om*/
    FileSystem fs = FileSystem.get(configuration);
    FileStatus[] fileList = fs.listStatus(new Path(dir));
    if (fileList != null) {
        if (fileList.length > 1) {
            Set<Path> sourceFiles = new HashSet<Path>();
            Set<String> consumePaths = new HashSet<String>();
            //inputPath has have multiple files due to backlog
            //read all and create a tmp file
            for (int i = 0; i < fileList.length; i++) {
                Path consumeFilePath = fileList[i].getPath().makeQualified(fs);
                sourceFiles.add(consumeFilePath);
                FSDataInputStream fsDataInputStream = fs.open(consumeFilePath);
                try {
                    while (fsDataInputStream.available() > 0) {
                        String fileName = fsDataInputStream.readLine();
                        if (fileName != null) {
                            consumePaths.add(fileName.trim());
                            System.out.println("Adding [" + fileName + "] to pull");
                        }
                    }
                } finally {
                    fsDataInputStream.close();
                }
            }
            Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString());
            FSDataOutputStream out = fs.create(finalPath);
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
            try {
                for (String consumePath : consumePaths) {
                    System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList");
                    writer.write(consumePath);
                    writer.write("\n");
                }
            } finally {
                writer.close();
            }
            LOG.warn("Final File - [" + finalPath + "]");
            for (Path deletePath : sourceFiles) {
                System.out.println("Deleting - [" + deletePath + "]");
                fs.delete(deletePath);
            }
        }
    }
}

From source file:com.inmobi.grid.fs.test.TestS3NwithSlash.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    //conf.set("fs.default.name", "s3n://AKIAJWJBBSUODWD7RMEA:LUnvKWWSeFuInkoOpYX%2FbJtDj080EovlioOchGwM@inmobi-grid-emr-dev");
    Path path = new Path(
            "s3n://AKIAJWJBBSUODWD7RMEA:LUnvKWWSeFuInkoOpYX/bJtDj080EovlioOchGwM@inmobi-grid-emr-dev/");
    FileSystem fs = path.getFileSystem(conf);
    FileStatus[] list = fs.listStatus(path);
    //System.out.println(list[0].getp);

}

From source file:com.inmobi.grill.driver.hive.TestHiveDriver.java

License:Apache License

private void validatePersistentResult(GrillResultSet resultSet, String dataFile, String outptuDir,
        boolean formatNulls) throws Exception {
    assertTrue(resultSet instanceof HivePersistentResultSet);
    HivePersistentResultSet persistentResultSet = (HivePersistentResultSet) resultSet;
    String path = persistentResultSet.getOutputPath();
    QueryHandle handle = persistentResultSet.getQueryHandle();

    Path actualPath = new Path(path);
    FileSystem fs = actualPath.getFileSystem(conf);
    assertEquals(actualPath, fs.makeQualified(new Path(outptuDir, handle.toString())));
    List<String> actualRows = new ArrayList<String>();
    for (FileStatus stat : fs.listStatus(actualPath)) {
        FSDataInputStream in = fs.open(stat.getPath());
        BufferedReader br = null;
        try {//from w  w w  .  j  av a2s .c o m
            br = new BufferedReader(new InputStreamReader(in));
            String line = "";

            while ((line = br.readLine()) != null) {
                System.out.println("Actual:" + line);
                actualRows.add(line.trim());
            }
        } finally {
            if (br != null) {
                br.close();
            }
        }
    }

    BufferedReader br = null;
    List<String> expectedRows = new ArrayList<String>();

    try {
        br = new BufferedReader(new FileReader(new File(dataFile)));
        String line = "";
        while ((line = br.readLine()) != null) {
            String row = line.trim();
            if (formatNulls) {
                row += ",-NA-,";
                row += line.trim();
            }
            expectedRows.add(row);
        }
    } finally {
        if (br != null) {
            br.close();
        }
    }
    assertEquals(actualRows, expectedRows);
}

From source file:com.inmobi.grill.server.query.TestQueryService.java

License:Apache License

private void validatePersistentResult(PersistentQueryResult resultset, QueryHandle handle) throws IOException {
    Assert.assertTrue(resultset.getPersistedURI().endsWith(handle.toString()));
    Path actualPath = new Path(resultset.getPersistedURI());
    FileSystem fs = actualPath.getFileSystem(new Configuration());
    List<String> actualRows = new ArrayList<String>();
    for (FileStatus fstat : fs.listStatus(actualPath)) {
        FSDataInputStream in = fs.open(fstat.getPath());
        BufferedReader br = null;
        try {//from w w  w . j  a va 2 s .c  om
            br = new BufferedReader(new InputStreamReader(in));
            String line = "";

            while ((line = br.readLine()) != null) {
                actualRows.add(line);
            }
        } finally {
            if (br != null) {
                br.close();
            }
            if (in != null) {
                in.close();
            }
        }
    }
    Assert.assertEquals(actualRows.get(0), "1one");
    Assert.assertEquals(actualRows.get(1), "\\Ntwo");
    Assert.assertEquals(actualRows.get(2), "3\\N");
    Assert.assertEquals(actualRows.get(3), "\\N\\N");
    Assert.assertEquals(actualRows.get(4), "5");
}

From source file:com.inmobi.messaging.consumer.databus.DatabusConsumer.java

License:Apache License

private List<String> getCollectors(FileSystem fs, Path baseDir) throws IOException {
    List<String> collectors = new ArrayList<String>();
    LOG.debug("Stream dir: " + baseDir);
    FileStatus[] list = fs.listStatus(baseDir);
    numList++;/*from   w  ww  .j a  v  a2s.co  m*/
    if (list != null && list.length > 0) {
        for (FileStatus status : list) {
            collectors.add(status.getPath().getName());
        }
    } else {
        LOG.warn("No collector dirs available in " + baseDir);
    }
    return collectors;
}

From source file:com.intel.hadoop.hbase.dot.TestHiveIntegration.java

License:Apache License

@BeforeClass
public static void setUp() throws Exception {
    Configuration config = TEST_UTIL.getConfiguration();
    config.set("hbase.coprocessor.region.classes", "com.intel.hadoop.hbase.dot.access.DataManipulationOps");
    config.set("hbase.coprocessor.master.classes", "com.intel.hadoop.hbase.dot.access.DataDefinitionOps");
    TEST_UTIL.startMiniCluster(1);/*from   www .  j  a  va2 s .c  o  m*/
    TEST_UTIL.startMiniMapReduceCluster();
    initialize(TEST_UTIL.getConfiguration());

    // 1. To put the test data onto miniDFS, and get the file path
    FileSystem fs = FileSystem.get(config);
    FSDataOutputStream output = fs.create(new Path("/tsvfile"));
    PrintStream out = new PrintStream(output);
    out.println("row1|row1_fd1|row1_fd2|row1_fd3|row1_fd4");
    out.println("row2|row2_fd1|row2_fd2|row2_fd3|row2_fd4");
    out.println("row3|row3_fd1|row3_fd2|row3_fd3|row3_fd4");
    out.println("row4|row4_fd1|row4_fd2|row4_fd3|row4_fd4");
    out.println("row5|row5_fd1|row5_fd2|row5_fd3|row5_fd4");
    out.close();
    output.close();

    // fs.copyFromLocalFile(new Path("./src/test/data/data"), new
    // Path("/tsvfile"));
    assertEquals("tsv file name is not correct", fs.listStatus(new Path("/tsvfile"))[0].getPath().getName(),
            "tsvfile");

}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

/**
 * Initialize ExecFilesMapper specific job-configuration.
 *
 * @param conf    : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args    Arguments/*from  w w  w . j  a  va 2s .c  om*/
 * @return true if it is necessary to launch a job.
 */
private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());
    jobConf.set(EXEC_CMD_LABEL, args.execCmd);

    //set boolean values
    jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname,
            args.flags.contains(Options.REDIRECT_ERROR_TO_OUT));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path stagingArea;
    try {
        stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);

    // get tokens for all the required FileSystems..
    TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_" + NAME + "_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists);
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_" + NAME + "_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("sourcePathsCount=" + srcCount);
    LOG.info("filesToExecCount=" + fileCount);
    LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount));
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(fileCount, jobConf);
    return fileCount > 0;
}