Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.knewton.mapreduce.io.SSTableInputFormat.java

License:Apache License

/**
 * Expands all directories passed as input and keeps only valid data tables.
 *
 * @return A list of all the data tables found under the input directories.
 *///from w ww .j  a  v  a2  s.  c o m
@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    Configuration conf = job.getConfiguration();
    List<FileStatus> files = super.listStatus(job);
    DataTablePathFilter dataTableFilter = getDataTableFilter(conf);
    files = cleanUpBackupDir(files);
    for (int i = 0; i < files.size(); i++) {
        FileStatus file = files.get(i);
        Path p = file.getPath();
        // Expand if directory
        if (file.isDirectory() && p != null) {
            LOG.info("Expanding {}", p);
            FileSystem fs = p.getFileSystem(conf);
            FileStatus[] children = fs.listStatus(p);
            List<FileStatus> listChildren = Lists.newArrayList(children);
            listChildren = cleanUpBackupDir(listChildren);
            files.addAll(i + 1, listChildren);
        }
        if (!dataTableFilter.accept(file.getPath())) {
            LOG.info("Removing {}", file.getPath());
            files.remove(i);
            i--;
        }
    }
    return files;
}

From source file:com.kxen.han.projection.giraph.BspCase.java

License:Apache License

/**
 * Get the single part file status and make sure there is only one part
 *
 * @param conf Configuration to get the file system from
 * @param partDirPath Directory where the single part file should exist
 * @return Single part file status/*w  ww  . j a va  2  s. c  o m*/
 * @throws IOException
 */
public static FileStatus getSinglePartFileStatus(Configuration conf, Path partDirPath) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    FileStatus singlePartFileStatus = null;
    int partFiles = 0;
    for (FileStatus fileStatus : fs.listStatus(partDirPath)) {
        if (fileStatus.getPath().getName().equals("part-m-00000")) {
            singlePartFileStatus = fileStatus;
        }
        if (fileStatus.getPath().getName().startsWith("part-m-")) {
            ++partFiles;
        }
    }

    Preconditions.checkState(partFiles == 1,
            "getSinglePartFile: Part file " + "count should be 1, but is " + partFiles);

    return singlePartFileStatus;
}

From source file:com.kylinolap.dict.lookup.HiveTable.java

License:Apache License

private FileStatus findOnlyFile(String hdfsDir, FileSystem fs) throws FileNotFoundException, IOException {
    FileStatus[] files = fs.listStatus(new Path(hdfsDir));
    ArrayList<FileStatus> nonZeroFiles = Lists.newArrayList();
    for (FileStatus f : files) {
        if (f.getLen() > 0)
            nonZeroFiles.add(f);/*from www.ja  va  2 s  . c  o  m*/
    }
    if (nonZeroFiles.size() != 1)
        throw new IllegalStateException(
                "Expect 1 and only 1 non-zero file under " + hdfsDir + ", but find " + nonZeroFiles.size());
    return nonZeroFiles.get(0);
}

From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java

License:Apache License

public void addInputDirs(String input, Job job) throws IOException {
    for (String inp : StringSplitter.split(input, ",")) {
        inp = inp.trim();/*from ww w  .j ava  2  s  . c o m*/
        if (inp.endsWith("/*")) {
            inp = inp.substring(0, inp.length() - 2);
            FileSystem fs = FileSystem.get(job.getConfiguration());
            Path path = new Path(inp);
            FileStatus[] fileStatuses = fs.listStatus(path);
            boolean hasDir = false;
            for (FileStatus stat : fileStatuses) {
                if (stat.isDirectory()) {
                    hasDir = true;
                    addInputDirs(stat.getPath().toString(), job);
                }
            }
            if (fileStatuses.length > 0 && !hasDir) {
                addInputDirs(path.toString(), job);
            }
        } else {
            System.out.println("Add input " + inp);
            FileInputFormat.addInputPath(job, new Path(inp));
        }
    }
}

From source file:com.kylinolap.job.hadoop.cube.StorageCleanupJob.java

License:Apache License

private void cleanUnusedHdfsFiles(Configuration conf) throws IOException {
    JobEngineConfig engineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
    CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

    FileSystem fs = FileSystem.get(conf);
    List<String> allHdfsPathsNeedToBeDeleted = new ArrayList<String>();
    // GlobFilter filter = new
    // GlobFilter(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()
    // + "/kylin-.*");
    FileStatus[] fStatus = fs.listStatus(new Path(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()));
    for (FileStatus status : fStatus) {
        String path = status.getPath().getName();
        // System.out.println(path);
        if (path.startsWith(JobInstance.JOB_WORKING_DIR_PREFIX)) {
            String kylinJobPath = engineConfig.getHdfsWorkingDirectory() + "/" + path;
            allHdfsPathsNeedToBeDeleted.add(kylinJobPath);
        }//from   w w  w  . j  a va2 s. c o  m
    }

    List<JobInstance> allJobs = JobDAO.getInstance(KylinConfig.getInstanceFromEnv()).listAllJobs();
    for (JobInstance jobInstance : allJobs) {
        // only remove FINISHED and DISCARDED job intermediate files
        if (isJobInUse(jobInstance) == true) {
            String path = JobInstance.getJobWorkingDir(jobInstance, engineConfig);
            allHdfsPathsNeedToBeDeleted.remove(path);
            log.info("Remove " + path + " from deletion list, as the path belongs to job "
                    + jobInstance.getUuid() + " with status " + jobInstance.getStatus());
        }
    }

    // remove every segment working dir from deletion list
    for (CubeInstance cube : cubeMgr.listAllCubes()) {
        for (CubeSegment seg : cube.getSegments()) {
            String jobUuid = seg.getLastBuildJobID();
            if (jobUuid != null && jobUuid.equals("") == false) {
                String path = JobInstance.getJobWorkingDir(jobUuid, engineConfig.getHdfsWorkingDirectory());
                allHdfsPathsNeedToBeDeleted.remove(path);
                log.info("Remove " + path + " from deletion list, as the path belongs to segment " + seg
                        + " of cube " + cube.getName());
            }
        }
    }

    if (delete == true) {
        // remove files
        for (String hdfsPath : allHdfsPathsNeedToBeDeleted) {
            log.info("Deleting hdfs path " + hdfsPath);
            Path p = new Path(hdfsPath);
            if (fs.exists(p) == true) {
                fs.delete(p, true);
                log.info("Deleted hdfs path " + hdfsPath);
            } else {
                log.info("Hdfs path " + hdfsPath + "does not exist");
            }
        }
    } else {
        System.out.println("--------------- HDFS Path To Be Deleted ---------------");
        for (String hdfsPath : allHdfsPathsNeedToBeDeleted) {
            System.out.println(hdfsPath);
        }
        System.out.println("-------------------------------------------------------");
    }

}

From source file:com.kylinolap.job.tools.DeployCoprocessorCLI.java

License:Apache License

public static Path getNewestCoprocessorJar(KylinConfig config, FileSystem fileSystem) throws IOException {
    Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, config);
    FileStatus newestJar = null;/*  ww  w .  j  a v  a2  s . c  o m*/
    for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) {
        if (fileStatus.getPath().toString().endsWith(".jar")) {
            if (newestJar == null) {
                newestJar = fileStatus;
            } else {
                if (newestJar.getModificationTime() < fileStatus.getModificationTime())
                    newestJar = fileStatus;
            }
        }
    }
    if (newestJar == null)
        return null;

    Path path = newestJar.getPath().makeQualified(fileSystem.getUri(), null);
    logger.info("The newest coprocessor is " + path.toString());
    return path;
}

From source file:com.kylinolap.job.tools.DeployCoprocessorCLI.java

License:Apache License

public static Path uploadCoprocessorJar(String localCoprocessorJar, FileSystem fileSystem,
        Set<String> oldJarPaths) throws IOException {
    Path uploadPath = null;/*from ww w  .  j  a  v a  2s. co m*/
    File localCoprocessorFile = new File(localCoprocessorJar);

    // check existing jars
    if (oldJarPaths == null) {
        oldJarPaths = new HashSet<String>();
    }
    Path coprocessorDir = getCoprocessorHDFSDir(fileSystem, KylinConfig.getInstanceFromEnv());
    for (FileStatus fileStatus : fileSystem.listStatus(coprocessorDir)) {
        if (fileStatus.getLen() == localCoprocessorJar.length()
                && fileStatus.getModificationTime() == localCoprocessorFile.lastModified()) {
            uploadPath = fileStatus.getPath();
            break;
        }
        String filename = fileStatus.getPath().toString();
        if (filename.endsWith(".jar")) {
            oldJarPaths.add(filename);
        }
    }

    // upload if not existing
    if (uploadPath == null) {
        // figure out a unique new jar file name
        Set<String> oldJarNames = new HashSet<String>();
        for (String path : oldJarPaths) {
            oldJarNames.add(new Path(path).getName());
        }
        String baseName = getBaseFileName(localCoprocessorJar);
        String newName = null;
        int i = 0;
        while (newName == null) {
            newName = baseName + "-" + (i++) + ".jar";
            if (oldJarNames.contains(newName))
                newName = null;
        }

        // upload
        uploadPath = new Path(coprocessorDir, newName);
        FileInputStream in = null;
        FSDataOutputStream out = null;
        try {
            in = new FileInputStream(localCoprocessorFile);
            out = fileSystem.create(uploadPath);
            IOUtils.copy(in, out);
        } finally {
            IOUtils.closeQuietly(in);
            IOUtils.closeQuietly(out);
        }

        fileSystem.setTimes(uploadPath, localCoprocessorFile.lastModified(), System.currentTimeMillis());

    }

    uploadPath = uploadPath.makeQualified(fileSystem.getUri(), null);
    return uploadPath;
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public String[] getFileNames(long companyId, long repositoryId, String dirName) throws SystemException {

    Path fullPath = HadoopManager.getFullDirPath(companyId, repositoryId, dirName);

    try {//from  w w w  .  ja  v a2  s  .  com
        FileSystem fileSystem = HadoopManager.getFileSystem();

        FileStatus[] listStatus = fileSystem.listStatus(fullPath);

        if ((listStatus == null) || (listStatus.length < 1)) {
            return new String[0];
        }

        List<String> fileNameList = new ArrayList<String>(listStatus.length);

        for (FileStatus fileStatus : listStatus) {
            String fileStatusPathString = fileStatus.getPath().toString();

            int pos = fileStatusPathString.indexOf(dirName);

            if (pos != -1) {
                fileStatusPathString = fileStatusPathString.substring(pos);
            }

            fileNameList.add(fileStatusPathString);
        }

        return fileNameList.toArray(new String[fileNameList.size()]);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

protected void deleteEmptyAncestors(long companyId, long repositoryId, Path path) throws SystemException {

    try {/*from ww w .  ja  v a 2  s . co m*/
        FileSystem fileSystem = HadoopManager.getFileSystem();

        FileStatus[] listStatus = fileSystem.listStatus(path);

        if ((listStatus == null) || (listStatus.length > 0)) {
            return;
        }

        Path parentPath = path.getParent();

        if (fileSystem.delete(path, true) && fileSystem.exists(parentPath)) {

            deleteEmptyAncestors(companyId, repositoryId, parentPath);
        }
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.lightboxtechnologies.spectrum.ExtractData.java

License:Apache License

protected static void chmodR(FileSystem fs, Path p) throws IOException {
    final FsPermission perm = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL);
    final FileStatus[] list = fs.listStatus(p);
    for (FileStatus f : list) {
        if (f.isDir()) {
            chmodR(fs, f.getPath());/*from   ww w.jav a2  s.c o m*/
        }
        fs.setPermission(f.getPath(), perm);
    }
    fs.setPermission(p, perm);
}