Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_SECOND)) {
            paths.put(pathName, path);/*from  w  w  w .  ja va  2  s.c  om*/
        }
    }

    File outFile = new File(outPath, Names.indexId2Node + ".dat");
    OutputStream out = new FileOutputStream(outFile);
    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile());
        InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat"));
        IOUtils.copyBytes(in, out, configuration, false);
        in.close();
    }
    out.close();
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    // Find all the right paths and copy .gz files locally
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_FOURTH)) {
            paths.put(pathName, path);/*from   w w  w  .j a v a2s  .com*/
        }
    }

    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        status = fs.listStatus(path);
        for (FileStatus fileStatus : status) {
            Path p = fileStatus.getPath();
            log.debug("Copying {} to {}...", p.toUri(), outPath);
            fs.copyToLocalFile(p, new Path(outPath, p.getName()));
        }
    }

    // Merge .gz files into indexName.gz
    File fileOutputPath = new File(outPath);
    File[] files = fileOutputPath.listFiles(new FileFilter() {
        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(".gz");
        }
    });
    Arrays.sort(files);
    String prevIndexName = null;
    OutputStream out = null;
    for (File file : files) {
        log.debug("Processing {}... ", file.getName());
        String indexName = file.getName().substring(0, file.getName().indexOf("_"));
        if (prevIndexName == null)
            prevIndexName = indexName;
        if (out == null)
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        if (!prevIndexName.equals(indexName)) {
            if (out != null)
                out.close();
            log.debug("Index name set to {}", indexName);
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        }
        InputStream in = new GZIPInputStream(new FileInputStream(file));
        log.debug("Copying {} into {}.gz ...", file.getName(), indexName);
        IOUtils.copyBytes(in, out, 8192, false);
        in.close();
        file.delete();
        prevIndexName = indexName;
    }
    if (out != null)
        out.close();

    // build B+Tree indexes
    Location location = new Location(outPath);
    for (String idxName : Constants.indexNames) {
        log.debug("Creating {} index...", idxName);
        String indexFilename = location.absolute(idxName, "gz");
        if (new File(indexFilename).exists()) {
            new File(outPath, idxName + ".dat").delete();
            new File(outPath, idxName + ".idn").delete();
            CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename);
            // To save some disk space
            new File(indexFilename).delete();
        }
    }
}

From source file:cmd.tdbloader4.java

License:Apache License

private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException {
    log.debug("Creating offsets file...");
    Map<Long, Long> offsets = new TreeMap<Long, Long>();
    FileStatus[] status = fs.listStatus(new Path(input));
    for (FileStatus fileStatus : status) {
        Path file = fileStatus.getPath();
        if (file.getName().startsWith("part-r-")) {
            log.debug("Processing: {}", file.getName());
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file)));
            String line = in.readLine();
            String[] tokens = line.split("\\s");
            long partition = Long.valueOf(tokens[0]);
            long offset = Long.valueOf(tokens[1]);
            log.debug("Partition {} has offset {}", partition, offset);
            offsets.put(partition, offset);
        }/*from  w w  w .j av a2s.  co m*/
    }

    Path outputPath = new Path(output, Constants.OFFSETS_FILENAME);
    PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath)));
    for (Long partition : offsets.keySet()) {
        out.println(partition + "\t" + offsets.get(partition));
    }
    out.close();
    log.debug("Offset file created.");
}

From source file:cn.lhfei.hadoop.ch03.ListStatus.java

License:Apache License

public static void main(String[] args) {

    String uri = args[0];//from ww w .  j  a  v a2s .co m
    Configuration conf = new Configuration();
    FileSystem fs = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);

        Path[] paths = new Path[args.length];
        for (int i = 0; i < paths.length; i++) {
            paths[i] = new Path(args[i]);
        }

        FileStatus[] status = fs.listStatus(paths);
        Path[] listPath = FileUtil.stat2Paths(status);

        for (Path p : listPath) {
            log.info(p.toString());
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java

License:Apache License

/**
 * Get the input splits for a stream./*  w  w  w  .  jav a  2s .  c om*/
 *
 * @param conf Configuration of the filesystem the stream resides on.
 * @return List of input splits for the stream.
 * @throws IOException
 */
public List<T> getSplits(Configuration conf) throws IOException {
    List<T> splits = Lists.newArrayList();

    // Collects all stream event files timestamp, size and block locations information

    // First grab all directories (partition) that matches with the time range.
    FileSystem fs = path.getFileSystem(conf);
    for (FileStatus partitionStatus : fs.listStatus(path)) {

        // partition should be directory
        String pathName = partitionStatus.getPath().getName();
        if (!partitionStatus.isDirectory() || !StreamUtils.isPartition(pathName)) {
            continue;
        }

        // Match the time range
        long partitionStartTime = StreamUtils.getPartitionStartTime(pathName);
        long partitionEndTime = StreamUtils.getPartitionEndTime(pathName);
        if (partitionStartTime > endTime || partitionEndTime <= startTime) {
            continue;
        }

        // Collects all bucket file status in the partition.
        Collection<StreamDataFileSplitter> eventFiles = collectBuckets(fs, partitionStatus.getPath());

        // For each bucket inside the partition directory, compute the splits
        for (StreamDataFileSplitter splitter : eventFiles) {
            splitter.computeSplits(fs, minSplitSize, maxSplitSize, startTime, endTime, splits, splitFactory);
        }
    }

    return splits;
}

From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java

License:Apache License

/**
 * Collects file status of all buckets under a given partition.
 *///from   w  w w  .j a v a  2 s.  c  o  m
private Collection<StreamDataFileSplitter> collectBuckets(FileSystem fs, Path partitionPath)
        throws IOException {
    ImmutableList.Builder<StreamDataFileSplitter> builder = ImmutableList.builder();

    for (FileStatus fileStatus : fs.listStatus(partitionPath)) {
        if (StreamFileType.EVENT.isMatched(fileStatus.getPath().getName())) {
            builder.add(new StreamDataFileSplitter(fileStatus));
        }
    }
    return builder.build();
}

From source file:co.cask.cdap.data.tools.ReplicationStatusTool.java

License:Apache License

private static void addAllDirFiles(Path filePath, FileSystem fs, List<String> fileList) throws IOException {
    FileStatus[] fileStatus = fs.listStatus(filePath);
    for (FileStatus fileStat : fileStatus) {
        if (fileStat.isDirectory()) {
            addAllDirFiles(fileStat.getPath(), fs, fileList);
        } else {/*from   w  w w  .ja  v a  2  s.  c  om*/
            fileList.add(fileStat.getPath().toString());
        }
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

/**
 * Merge two paths together.  Anything in from will be moved into to, if there
 * are any name conflicts while merging the files or directories in from win.
 * @param fs the File System to use//from   www . j  a  v a  2 s.c  o m
 * @param from the path data is coming from.
 * @param to the path data is going to.
 * @throws IOException on any error
 */
private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException {
    if (from.isFile()) {
        if (fs.exists(to)) {
            if (!fs.delete(to, true)) {
                throw new IOException("Failed to delete " + to);
            }
        }

        if (!fs.rename(from.getPath(), to)) {
            throw new IOException("Failed to rename " + from + " to " + to);
        }
    } else if (from.isDirectory()) {
        if (fs.exists(to)) {
            FileStatus toStat = fs.getFileStatus(to);
            if (!toStat.isDirectory()) {
                if (!fs.delete(to, true)) {
                    throw new IOException("Failed to delete " + to);
                }
                if (!fs.rename(from.getPath(), to)) {
                    throw new IOException("Failed to rename " + from + " to " + to);
                }
            } else {
                //It is a directory so merge everything in the directories
                for (FileStatus subFrom : fs.listStatus(from.getPath())) {
                    Path subTo = new Path(to, subFrom.getPath().getName());
                    mergePaths(fs, subFrom, subTo);
                }
            }
        } else {
            //it does not exist just rename
            if (!fs.rename(from.getPath(), to)) {
                throw new IOException("Failed to rename " + from + " to " + to);
            }
        }
    }
}

From source file:co.cask.hydrator.plugin.batch.action.FileAction.java

License:Apache License

@SuppressWarnings("ConstantConditions")
@Override/*from  w w w  .ja va 2  s .  co  m*/
public void run(BatchActionContext context) throws Exception {
    if (!config.shouldRun(context)) {
        return;
    }
    config.substituteMacros(context);

    Job job = JobUtils.createInstance();
    Configuration conf = job.getConfiguration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path[] paths;
    Path sourcePath = new Path(config.path);
    if (fileSystem.isDirectory(sourcePath)) {
        FileStatus[] status = fileSystem.listStatus(sourcePath);
        paths = FileUtil.stat2Paths(status);
    } else {
        paths = new Path[] { sourcePath };
    }

    //get regex pattern for file name filtering.
    boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern);
    if (patternSpecified) {
        regex = Pattern.compile(config.pattern);
    }

    switch (config.action.toLowerCase()) {
    case "delete":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                fileSystem.delete(path, true);
            }
        }
        break;
    case "move":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                Path targetFileMovePath = new Path(config.targetFolder, path.getName());
                fileSystem.rename(path, targetFileMovePath);
            }
        }
        break;
    case "archive":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                try (FSDataOutputStream archivedStream = fileSystem
                        .create(new Path(config.targetFolder, path.getName() + ".zip"));
                        ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream);
                        FSDataInputStream fdDataInputStream = fileSystem.open(path)) {
                    zipArchivedStream.putNextEntry(new ZipEntry(path.getName()));
                    int length;
                    byte[] buffer = new byte[1024];
                    while ((length = fdDataInputStream.read(buffer)) > 0) {
                        zipArchivedStream.write(buffer, 0, length);
                    }
                    zipArchivedStream.closeEntry();
                }
                fileSystem.delete(path, true);
            }
        }
        break;
    default:
        LOG.warn("No action required on the file.");
        break;
    }
}

From source file:co.cask.hydrator.plugin.db.batch.action.VerticaBulkImportAction.java

License:Apache License

@Override
public void run(ActionContext context) throws Exception {
    Object driver = Class.forName("com.vertica.jdbc.Driver").newInstance();
    DriverManager.registerDriver((Driver) driver);

    Preconditions.checkArgument(tableExists(config.tableName),
            "Table %s does not exist. Please check that the 'tableName' property "
                    + "has been set correctly, and that the connection string %s points to a valid database.",
            config.tableName, config.connectionString);

    String copyStatement;/*from   w w w  . j a  v  a 2s. co m*/

    if (config.level.equalsIgnoreCase("basic")) {
        // COPY tableName FROM STDIN DELIMITER 'delimiter'
        copyStatement = String.format("COPY %s FROM STDIN DELIMITER '%s'", config.tableName, config.delimiter);
    } else {
        copyStatement = config.copyStatement;
    }

    LOG.debug("Copy statement is: {}", copyStatement);

    try {
        try (Connection connection = DriverManager.getConnection(config.connectionString, config.user,
                config.password)) {
            connection.setAutoCommit(false);
            // run Copy statement
            VerticaCopyStream stream = new VerticaCopyStream((VerticaConnection) connection, copyStatement);
            // Keep running count of the number of rejects
            int totalRejects = 0;

            // start() starts the stream process, and opens the COPY command.
            stream.start();

            FileSystem fs = FileSystem.get(new Configuration());

            List<String> fileList = new ArrayList<>();
            FileStatus[] fileStatus;
            try {
                fileStatus = fs.listStatus(new Path(config.path));
                for (FileStatus fileStat : fileStatus) {
                    fileList.add(fileStat.getPath().toString());
                }
            } catch (FileNotFoundException e) {
                throw new IllegalArgumentException(String.format(String.format(
                        "Path %s not found on file system. Please provide correct path.", config.path), e));
            }

            if (fileStatus.length <= 0) {
                LOG.warn("No files available to load into vertica database");
            }

            for (String file : fileList) {
                Path path = new Path(file);

                FSDataInputStream inputStream = fs.open(path);
                // Add stream to the VerticaCopyStream
                stream.addStream(inputStream);

                // call execute() to load the newly added stream. You could
                // add many streams and call execute once to load them all.
                // Which method you choose depends mainly on whether you want
                // the ability to check the number of rejections as the load
                // progresses so you can stop if the number of rejects gets too
                // high. Also, high numbers of InputStreams could create a
                // resource issue on your client system.
                stream.execute();

                // Show any rejects from this execution of the stream load
                // getRejects() returns a List containing the
                // row numbers of rejected rows.
                List<Long> rejects = stream.getRejects();

                // The size of the list gives you the number of rejected rows.
                int numRejects = rejects.size();
                totalRejects += numRejects;
                if (config.autoCommit.equalsIgnoreCase("true")) {
                    // Commit the loaded data
                    connection.commit();
                }
            }

            // Finish closes the COPY command. It returns the number of
            // rows inserted.
            long results = stream.finish();

            context.getMetrics().gauge("num.of.rows.rejected", totalRejects);
            context.getMetrics().gauge("num.of.rows.inserted", results);

            // Commit the loaded data
            connection.commit();
        }
    } catch (Exception e) {
        throw new RuntimeException(String.format("Exception while running copy statement %s", copyStatement),
                e);
    } finally {
        DriverManager.deregisterDriver((Driver) driver);
    }
}