Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

static Path findOriginalBucket(FileSystem fs, Path directory, int bucket) throws IOException {
    for (FileStatus stat : fs.listStatus(directory)) {
        String name = stat.getPath().getName();
        String numberPart = name.substring(0, name.indexOf('_'));
        if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart)
                && Integer.parseInt(numberPart) == bucket) {
            return stat.getPath();
        }//from   www. j  av  a  2s .c o m
    }
    throw new IllegalArgumentException("Can't find bucket " + bucket + " in " + directory);
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

private void readMigratePartition(StaffSSControllerInterface sssc, int currentSuperStepCounter)
        throws IOException {
    BufferedReader br = null;//from   w w w .j  a v a2s.  co  m
    Path migratePartitionPath = new Path(migratePartitionDir);
    FileSystem fsFileSystem = FileSystem.get(this.getConf().getConf());
    FileStatus[] fs = fsFileSystem.listStatus(migratePartitionPath);
    Path[] listPath = FileUtil.stat2Paths(fs);
    for (Path p : listPath) {
        FSDataInputStream fsInput = fsFileSystem.open(p);
        br = new BufferedReader(new InputStreamReader(fsInput));
        String line = null;
        while (null != (line = br.readLine())) {
            String[] strs = line.split(":");
            this.partitioner.updateMigratePartition(new Text(strs[0]), Integer.parseInt(strs[1]));
        }
    }
}

From source file:com.ckelsel.hadoop.dfs.Test.Test.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String uri = "hdfs://localhost:9000/";
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), config);

    // hdfs/user/ckelsel/
    FileStatus[] statuses = fs.listStatus(new Path("/user/ckelsel"));
    for (FileStatus status : statuses) {
        System.out.println(status);
    }//from  ww w.  j  av  a  2  s  .  c o  m

    // hdfs/user/ckelsel
    FSDataOutputStream os = fs.create(new Path("/user/ckelsel/test.log"));
    os.write("Hello World!".getBytes());
    os.flush();
    os.close();

    // hdfs/user/ckelsel
    InputStream is = fs.open(new Path("/user/ckelsel/test.log"));
    IOUtils.copyBytes(is, System.out, 1024, true);
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemView.java

License:Apache License

private static boolean cleanlyDelete(FileSystem fs, Path root, Path dir) {
    try {//from   w  w  w.j  a  va  2 s.  c  om
        boolean deleted = false;
        if (dir.isAbsolute()) {
            deleted = fs.delete(dir, true /* include any files */ );
        } else {
            // the path should be treated as relative to the root path
            Path absolute = new Path(root, dir);
            deleted = fs.delete(absolute, true /* include any files */ );
            // iterate up to the root, removing empty directories
            for (Path current = absolute.getParent(); !current.equals(root)
                    && !current.isRoot(); current = current.getParent()) {
                final FileStatus[] stats = fs.listStatus(current);
                if (stats == null || stats.length == 0) {
                    // dir is empty and should be removed
                    deleted = fs.delete(current, true) || deleted;
                } else {
                    // all parent directories will be non-empty
                    break;
                }
            }
        }
        return deleted;
    } catch (IOException ex) {
        throw new DatasetIOException("Could not cleanly delete path:" + dir, ex);
    }
}

From source file:com.cloudera.crunch.impl.mr.exec.CrunchJob.java

License:Open Source License

private int getMinPartIndex(Path path, FileSystem fs) throws IOException {
    // Quick and dirty way to ensure unique naming in the directory
    return fs.listStatus(path).length;
}

From source file:com.cloudera.crunch.io.SourceTargetHelper.java

License:Open Source License

public static long getPathSize(FileSystem fs, Path path) throws IOException {
    FileStatus[] stati = fs.listStatus(path);
    if (stati.length == 0) {
        throw new IllegalArgumentException("Path " + path + " does not exist!");
    }//  w  ww .j a  v a  2 s .c  om
    long size = 0;
    for (FileStatus status : stati) {
        size += status.getLen();
    }
    return size;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runElasticSearchMarkerQueries() {
    boolean success = true;
    FileSystem hdfs;
    FSDataInputStream in;/*from   w w  w.ja v a  2 s . c  o  m*/
    dstPath = new Path(elasticsearchMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    in.close();
                    LOG.info("cleaning markerfile @: " + fs.getPath().toString());
                    cleanMarkerFile(fs.getPath().toString());
                    sendESQuery(elasticsearchUrl, new String(fileData));

                }
            }
        }
    } catch (Exception e) {
        success = false;
    }
    return success;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runHiveMarkerQueries() {
    boolean queryStatus = true;
    FileSystem hdfs;
    FSDataInputStream in;//from   ww w  . jav  a2 s.  c o m
    dstPath = new Path(hiveMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    String[] splitTab = new String(fileData).split("\t");
                    if (splitTab.length == 2) {
                        dstPath = new Path(splitTab[0]);
                        FileSystem hiveFile = dstPath.getFileSystem(conf);
                        if (hiveFile.exists(dstPath)) {
                            LOG.info("marker file data: " + splitTab[1]);
                            if (runHiveQuery(splitTab[1])) {
                                LOG.info("Marker query is successful");
                                in.close();
                                cleanMarkerFile(fs.getPath().toString());
                            } else {
                                LOG.info("Error running marker query, marker point not deleted");
                                queryStatus = false;
                            }

                        } else {
                            LOG.info("marker points to invalid hive file location, deleting the marker");
                            in.close();
                            cleanMarkerFile(fs.getPath().toString());
                        }
                    }
                    //in.close();
                }
            }
        }
        hdfs.close();
    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }

    return queryStatus;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) {
    FileSystem hdfs;
    FSDataInputStream in;/*from w  ww .j  av  a2 s  . c  om*/
    FSDataOutputStream out;
    List<Path> fileCollection = new ArrayList<Path>();
    dstPath = new Path(folder);
    LOG.info("mergeFiles DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);

        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            LOG.error("Creating file @: " + hiveOutputLocation);
            out = hdfs.create(new Path(hiveOutputLocation));

            in = hdfs.open(file);
            byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()];
            in.readFully(fileData);
            out.write(fileData);

            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("mergeFiles File marker path: " + fs.getPath());
                    fileCollection.add(fs.getPath());
                    in = hdfs.open(fs.getPath());
                    fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    out.write(fileData);
                }
            }
            out.close();
        }

        hdfs.close();
        LOG.error("Written file: " + hiveOutputLocation);

        //lets start the purge process, delete all files except the merged file
        hdfs = dstPath.getFileSystem(conf);
        for (Path p : fileCollection) {
            if (hdfs.delete(p, false)) {
                LOG.error("Successfully deleted: " + p);
            } else {
                LOG.error("Error deleting file: " + p);
            }
        }

    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }
    LOG.error("mergeFiles Done merging files");
    return false;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.READDIRHandler.java

License:Apache License

@Override
protected READDIRResponse doHandle(NFS4Handler server, Session session, READDIRRequest request)
        throws NFS4Exception, IOException {
    if (session.getCurrentFileHandle() == null) {
        throw new NFS4Exception(NFS4ERR_NOFILEHANDLE);
    }// w w  w .  j a v a  2s  .  co  m
    Path path = server.getPath(session.getCurrentFileHandle());
    FileSystem fs = session.getFileSystem();
    FileStatus fileStatus = fs.getFileStatus(path);
    if (!fileStatus.isDir()) {
        throw new NFS4Exception(NFS4ERR_NOTDIR);
    }

    FileStatus[] fileStati = fs.listStatus(path);
    if (fileStati == null) {
        // we have already check the dir exists, this means it's empty
        fileStati = new FileStatus[0];
    }
    Arrays.sort(fileStati);
    // low cookie numbers are "special" in the linux kernel
    // since we don't return . and .. they fake them with low #s
    long verifer = fileStati.length;
    long cookie = request.getCookie();
    if (cookie == 0) {
        cookie += NFS4_COOKIE_OFFSET;
    } else {
        cookie++;
    }
    long requestVerifer = Bytes.toLong(request.getCookieVerifer().getData());
    if ((requestVerifer > 0 && verifer != requestVerifer)
            || (cookie > 0 && cookie > (fileStati.length + NFS4_COOKIE_OFFSET))) {
        LOGGER.warn("BAD COOKIE verifier = " + verifer + ", request.getVerifier() = " + requestVerifer
                + ", cookie = " + cookie + ", dirLength = " + fileStati.length);
        throw new NFS4Exception(NFS4ERR_BAD_COOKIE);
    }
    // TODO improve this guess
    // we can only send maxCount bytes including xdr overhead
    // save 100 bytes for the readDir header and for RPC header
    // I saw about 100 bytes in wireshark for linux and pulled
    // the RPC number out of my arse. I guessed high.
    int messageSize = 100 + 150;
    int maxMessageSize = request.getMaxCount();
    // TODO this check should be after we add the first entry to the response
    if (messageSize > maxMessageSize) {
        throw new NFS4Exception(NFS4ERR_TOOSMALL);
    }
    List<DirectoryEntry> entries = Lists.newArrayList();
    for (; cookie < (fileStati.length + NFS4_COOKIE_OFFSET); cookie++) {
        fileStatus = fileStati[(int) (cookie - NFS4_COOKIE_OFFSET)];
        // we have to force creation of a file handle because that creates
        // a fileid which is required later in the getAttrs.
        server.createFileHandle(fileStatus.getPath());
        DirectoryEntry entry = readAttrs(server, session, request.getAttrs(), fs, fileStatus);
        entry.setName(fileStatus.getPath().getName());
        entry.setCookie(cookie);

        // If this entry is more than we can send
        // break out and the rest will be handled
        // in a future call

        // below is ugly as hell but this code is not hot
        RPCBuffer buffer = new RPCBuffer();
        entry.write(buffer);
        buffer.flip();
        int entryLength = buffer.length();
        if (messageSize + entryLength >= maxMessageSize) {
            break;
        }
        messageSize += entryLength;
        entries.add(entry);
        server.incrementMetric("NFS_READDIR_ENTRIES", 1);
    }
    DirectoryList entryList = new DirectoryList();
    entryList.setDirEntries(entries);
    entryList.setEOF(cookie == (fileStati.length + NFS4_COOKIE_OFFSET));

    READDIRResponse response = createResponse();
    response.setStatus(NFS4_OK);
    response.setCookieVerifer(new OpaqueData8(verifer));
    response.setDirectoryList(entryList);
    return response;
}