Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

static Path findOriginalBucket(FileSystem fs, Path directory, int bucket) throws IOException {
    for (FileStatus stat : fs.listStatus(directory)) {
        String name = stat.getPath().getName();
        String numberPart = name.substring(0, name.indexOf('_'));
        if (org.apache.commons.lang3.StringUtils.isNumeric(numberPart)
                && Integer.parseInt(numberPart) == bucket) {
            return stat.getPath();
        }//from   www. j  av  a  2s .c o m
    }
    throw new IllegalArgumentException("Can't find bucket " + bucket + " in " + directory);
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

private void readMigratePartition(StaffSSControllerInterface sssc, int currentSuperStepCounter)
        throws IOException {
    BufferedReader br = null;//from   w w w .j  a v a2s.  co  m
    Path migratePartitionPath = new Path(migratePartitionDir);
    FileSystem fsFileSystem = FileSystem.get(this.getConf().getConf());
    FileStatus[] fs = fsFileSystem.listStatus(migratePartitionPath);
    Path[] listPath = FileUtil.stat2Paths(fs);
    for (Path p : listPath) {
        FSDataInputStream fsInput = fsFileSystem.open(p);
        br = new BufferedReader(new InputStreamReader(fsInput));
        String line = null;
        while (null != (line = br.readLine())) {
            String[] strs = line.split(":");
            this.partitioner.updateMigratePartition(new Text(strs[0]), Integer.parseInt(strs[1]));
        }
    }
}

From source file:com.ckelsel.hadoop.dfs.Test.Test.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String uri = "hdfs://localhost:9000/";
    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), config);

    // hdfs/user/ckelsel/
    FileStatus[] statuses = fs.listStatus(new Path("/user/ckelsel"));
    for (FileStatus status : statuses) {
        System.out.println(status);
    }//from  ww w.  j  av  a  2  s  .  c o  m

    // hdfs/user/ckelsel
    FSDataOutputStream os = fs.create(new Path("/user/ckelsel/test.log"));
    os.write("Hello World!".getBytes());
    os.flush();
    os.close();

    // hdfs/user/ckelsel
    InputStream is = fs.open(new Path("/user/ckelsel/test.log"));
    IOUtils.copyBytes(is, System.out, 1024, true);
}

From source file:com.cloudera.cdk.data.filesystem.FileSystemView.java

License:Apache License

private static boolean cleanlyDelete(FileSystem fs, Path root, Path dir) {
    try {//from   w  w  w.j  a  va  2 s.  c  om
        boolean deleted = false;
        if (dir.isAbsolute()) {
            deleted = fs.delete(dir, true /* include any files */ );
        } else {
            // the path should be treated as relative to the root path
            Path absolute = new Path(root, dir);
            deleted = fs.delete(absolute, true /* include any files */ );
            // iterate up to the root, removing empty directories
            for (Path current = absolute.getParent(); !current.equals(root)
                    && !current.isRoot(); current = current.getParent()) {
                final FileStatus[] stats = fs.listStatus(current);
                if (stats == null || stats.length == 0) {
                    // dir is empty and should be removed
                    deleted = fs.delete(current, true) || deleted;
                } else {
                    // all parent directories will be non-empty
                    break;
                }
            }
        }
        return deleted;
    } catch (IOException ex) {
        throw new DatasetIOException("Could not cleanly delete path:" + dir, ex);
    }
}

From source file:com.cloudera.crunch.impl.mr.exec.CrunchJob.java

License:Open Source License

private int getMinPartIndex(Path path, FileSystem fs) throws IOException {
    // Quick and dirty way to ensure unique naming in the directory
    return fs.listStatus(path).length;
}

From source file:com.cloudera.crunch.io.SourceTargetHelper.java

License:Open Source License

public static long getPathSize(FileSystem fs, Path path) throws IOException {
    FileStatus[] stati = fs.listStatus(path);
    if (stati.length == 0) {
        throw new IllegalArgumentException("Path " + path + " does not exist!");
    }//  w  ww .j a  v a  2 s .c  om
    long size = 0;
    for (FileStatus status : stati) {
        size += status.getLen();
    }
    return size;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runElasticSearchMarkerQueries() {
    boolean success = true;
    FileSystem hdfs;
    FSDataInputStream in;/*from   w w  w.ja v a  2 s . c  o  m*/
    dstPath = new Path(elasticsearchMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    in.close();
                    LOG.info("cleaning markerfile @: " + fs.getPath().toString());
                    cleanMarkerFile(fs.getPath().toString());
                    sendESQuery(elasticsearchUrl, new String(fileData));

                }
            }
        }
    } catch (Exception e) {
        success = false;
    }
    return success;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

private boolean runHiveMarkerQueries() {
    boolean queryStatus = true;
    FileSystem hdfs;
    FSDataInputStream in;//from   ww w  . jav  a2 s.  c o m
    dstPath = new Path(hiveMarkerFolder);
    LOG.info("DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);
        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("File marker path: " + fs.getPath());
                    in = hdfs.open(fs.getPath());
                    byte[] fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    String[] splitTab = new String(fileData).split("\t");
                    if (splitTab.length == 2) {
                        dstPath = new Path(splitTab[0]);
                        FileSystem hiveFile = dstPath.getFileSystem(conf);
                        if (hiveFile.exists(dstPath)) {
                            LOG.info("marker file data: " + splitTab[1]);
                            if (runHiveQuery(splitTab[1])) {
                                LOG.info("Marker query is successful");
                                in.close();
                                cleanMarkerFile(fs.getPath().toString());
                            } else {
                                LOG.info("Error running marker query, marker point not deleted");
                                queryStatus = false;
                            }

                        } else {
                            LOG.info("marker points to invalid hive file location, deleting the marker");
                            in.close();
                            cleanMarkerFile(fs.getPath().toString());
                        }
                    }
                    //in.close();
                }
            }
        }
        hdfs.close();
    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }

    return queryStatus;
}

From source file:com.cloudera.flume.handlers.hive.MarkerStore.java

License:Apache License

public boolean mergeFiles(String folder, Path file, String hiveOutputLocation) {
    FileSystem hdfs;
    FSDataInputStream in;/*from w  ww .j  av  a2 s  . c  om*/
    FSDataOutputStream out;
    List<Path> fileCollection = new ArrayList<Path>();
    dstPath = new Path(folder);
    LOG.info("mergeFiles DSTPATH: " + dstPath);
    try {
        hdfs = dstPath.getFileSystem(conf);

        if (hdfs.exists(dstPath)) {
            FileStatus[] fileListing = hdfs.listStatus(dstPath);
            LOG.error("Creating file @: " + hiveOutputLocation);
            out = hdfs.create(new Path(hiveOutputLocation));

            in = hdfs.open(file);
            byte[] fileData = new byte[(int) hdfs.getFileStatus(file).getLen()];
            in.readFully(fileData);
            out.write(fileData);

            for (FileStatus fs : fileListing) {
                if (!fs.isDir()) {
                    LOG.info("mergeFiles File marker path: " + fs.getPath());
                    fileCollection.add(fs.getPath());
                    in = hdfs.open(fs.getPath());
                    fileData = new byte[(int) fs.getLen()];
                    in.readFully(fileData);
                    out.write(fileData);
                }
            }
            out.close();
        }

        hdfs.close();
        LOG.error("Written file: " + hiveOutputLocation);

        //lets start the purge process, delete all files except the merged file
        hdfs = dstPath.getFileSystem(conf);
        for (Path p : fileCollection) {
            if (hdfs.delete(p, false)) {
                LOG.error("Successfully deleted: " + p);
            } else {
                LOG.error("Error deleting file: " + p);
            }
        }

    } catch (IOException e) {
        LOG.error("ERROR running runMarkerQueries:" + e.getMessage());
    }
    LOG.error("mergeFiles Done merging files");
    return false;
}

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.handlers.READDIRHandler.java

License:Apache License

@Override
protected READDIRResponse doHandle(NFS4Handler server, Session session, READDIRRequest request)
        throws NFS4Exception, IOException {
    if (session.getCurrentFileHandle() == null) {
        throw new NFS4Exception(NFS4ERR_NOFILEHANDLE);
    }// w w  w .  j a v a  2s  .  co  m
    Path path = server.getPath(session.getCurrentFileHandle());
    FileSystem fs = session.getFileSystem();
    FileStatus fileStatus = fs.getFileStatus(path);
    if (!fileStatus.isDir()) {
        throw new NFS4Exception(NFS4ERR_NOTDIR);
    }

    FileStatus[] fileStati = fs.listStatus(path);
    if (fileStati == null) {
        // we have already check the dir exists, this means it's empty
        fileStati = new FileStatus[0];
    }
    Arrays.sort(fileStati);
    // low cookie numbers are "special" in the linux kernel
    // since we don't return . and .. they fake them with low #s
    long verifer = fileStati.length;
    long cookie = request.getCookie();
    if (cookie == 0) {
        cookie += NFS4_COOKIE_OFFSET;
    } else {
        cookie++;
    }
    long requestVerifer = Bytes.toLong(request.getCookieVerifer().getData());
    if ((requestVerifer > 0 && verifer != requestVerifer)
            || (cookie > 0 && cookie > (fileStati.length + NFS4_COOKIE_OFFSET))) {
        LOGGER.warn("BAD COOKIE verifier = " + verifer + ", request.getVerifier() = " + requestVerifer
                + ", cookie = " + cookie + ", dirLength = " + fileStati.length);
        throw new NFS4Exception(NFS4ERR_BAD_COOKIE);
    }
    // TODO improve this guess
    // we can only send maxCount bytes including xdr overhead
    // save 100 bytes for the readDir header and for RPC header
    // I saw about 100 bytes in wireshark for linux and pulled
    // the RPC number out of my arse. I guessed high.
    int messageSize = 100 + 150;
    int maxMessageSize = request.getMaxCount();
    // TODO this check should be after we add the first entry to the response
    if (messageSize > maxMessageSize) {
        throw new NFS4Exception(NFS4ERR_TOOSMALL);
    }
    List<DirectoryEntry> entries = Lists.newArrayList();
    for (; cookie < (fileStati.length + NFS4_COOKIE_OFFSET); cookie++) {
        fileStatus = fileStati[(int) (cookie - NFS4_COOKIE_OFFSET)];
        // we have to force creation of a file handle because that creates
        // a fileid which is required later in the getAttrs.
        server.createFileHandle(fileStatus.getPath());
        DirectoryEntry entry = readAttrs(server, session, request.getAttrs(), fs, fileStatus);
        entry.setName(fileStatus.getPath().getName());
        entry.setCookie(cookie);

        // If this entry is more than we can send
        // break out and the rest will be handled
        // in a future call

        // below is ugly as hell but this code is not hot
        RPCBuffer buffer = new RPCBuffer();
        entry.write(buffer);
        buffer.flip();
        int entryLength = buffer.length();
        if (messageSize + entryLength >= maxMessageSize) {
            break;
        }
        messageSize += entryLength;
        entries.add(entry);
        server.incrementMetric("NFS_READDIR_ENTRIES", 1);
    }
    DirectoryList entryList = new DirectoryList();
    entryList.setDirEntries(entries);
    entryList.setEOF(cookie == (fileStati.length + NFS4_COOKIE_OFFSET));

    READDIRResponse response = createResponse();
    response.setStatus(NFS4_OK);
    response.setCookieVerifer(new OpaqueData8(verifer));
    response.setDirectoryList(entryList);
    return response;
}