Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_SECOND)) {
            paths.put(pathName, path);/*from  w  w  w .  ja va  2  s.c  om*/
        }
    }

    File outFile = new File(outPath, Names.indexId2Node + ".dat");
    OutputStream out = new FileOutputStream(outFile);
    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile());
        InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat"));
        IOUtils.copyBytes(in, out, configuration, false);
        in.close();
    }
    out.close();
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile2(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    // Find all the right paths and copy .gz files locally
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_FOURTH)) {
            paths.put(pathName, path);/*from   w w  w  .j a v a2s  .com*/
        }
    }

    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        status = fs.listStatus(path);
        for (FileStatus fileStatus : status) {
            Path p = fileStatus.getPath();
            log.debug("Copying {} to {}...", p.toUri(), outPath);
            fs.copyToLocalFile(p, new Path(outPath, p.getName()));
        }
    }

    // Merge .gz files into indexName.gz
    File fileOutputPath = new File(outPath);
    File[] files = fileOutputPath.listFiles(new FileFilter() {
        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(".gz");
        }
    });
    Arrays.sort(files);
    String prevIndexName = null;
    OutputStream out = null;
    for (File file : files) {
        log.debug("Processing {}... ", file.getName());
        String indexName = file.getName().substring(0, file.getName().indexOf("_"));
        if (prevIndexName == null)
            prevIndexName = indexName;
        if (out == null)
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        if (!prevIndexName.equals(indexName)) {
            if (out != null)
                out.close();
            log.debug("Index name set to {}", indexName);
            out = new GZIPOutputStream(new FileOutputStream(new File(outPath, indexName + ".gz")));
        }
        InputStream in = new GZIPInputStream(new FileInputStream(file));
        log.debug("Copying {} into {}.gz ...", file.getName(), indexName);
        IOUtils.copyBytes(in, out, 8192, false);
        in.close();
        file.delete();
        prevIndexName = indexName;
    }
    if (out != null)
        out.close();

    // build B+Tree indexes
    Location location = new Location(outPath);
    for (String idxName : Constants.indexNames) {
        log.debug("Creating {} index...", idxName);
        String indexFilename = location.absolute(idxName, "gz");
        if (new File(indexFilename).exists()) {
            new File(outPath, idxName + ".dat").delete();
            new File(outPath, idxName + ".idn").delete();
            CmdIndexBuild.main(location.getDirectoryPath(), idxName, indexFilename);
            // To save some disk space
            new File(indexFilename).delete();
        }
    }
}

From source file:cmd.tdbloader4.java

License:Apache License

private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException {
    log.debug("Creating offsets file...");
    Map<Long, Long> offsets = new TreeMap<Long, Long>();
    FileStatus[] status = fs.listStatus(new Path(input));
    for (FileStatus fileStatus : status) {
        Path file = fileStatus.getPath();
        if (file.getName().startsWith("part-r-")) {
            log.debug("Processing: {}", file.getName());
            BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file)));
            String line = in.readLine();
            String[] tokens = line.split("\\s");
            long partition = Long.valueOf(tokens[0]);
            long offset = Long.valueOf(tokens[1]);
            log.debug("Partition {} has offset {}", partition, offset);
            offsets.put(partition, offset);
        }/*from  w w  w .j av a2s.  co m*/
    }

    Path outputPath = new Path(output, Constants.OFFSETS_FILENAME);
    PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath)));
    for (Long partition : offsets.keySet()) {
        out.println(partition + "\t" + offsets.get(partition));
    }
    out.close();
    log.debug("Offset file created.");
}

From source file:cn.lhfei.hadoop.ch03.ListStatus.java

License:Apache License

public static void main(String[] args) {

    String uri = args[0];//from ww w .  j  a  v a2s .co m
    Configuration conf = new Configuration();
    FileSystem fs = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);

        Path[] paths = new Path[args.length];
        for (int i = 0; i < paths.length; i++) {
            paths[i] = new Path(args[i]);
        }

        FileStatus[] status = fs.listStatus(paths);
        Path[] listPath = FileUtil.stat2Paths(status);

        for (Path p : listPath) {
            log.info(p.toString());
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java

License:Apache License

/**
 * Get the input splits for a stream./*  w  w  w  .  jav a  2s .  c om*/
 *
 * @param conf Configuration of the filesystem the stream resides on.
 * @return List of input splits for the stream.
 * @throws IOException
 */
public List<T> getSplits(Configuration conf) throws IOException {
    List<T> splits = Lists.newArrayList();

    // Collects all stream event files timestamp, size and block locations information

    // First grab all directories (partition) that matches with the time range.
    FileSystem fs = path.getFileSystem(conf);
    for (FileStatus partitionStatus : fs.listStatus(path)) {

        // partition should be directory
        String pathName = partitionStatus.getPath().getName();
        if (!partitionStatus.isDirectory() || !StreamUtils.isPartition(pathName)) {
            continue;
        }

        // Match the time range
        long partitionStartTime = StreamUtils.getPartitionStartTime(pathName);
        long partitionEndTime = StreamUtils.getPartitionEndTime(pathName);
        if (partitionStartTime > endTime || partitionEndTime <= startTime) {
            continue;
        }

        // Collects all bucket file status in the partition.
        Collection<StreamDataFileSplitter> eventFiles = collectBuckets(fs, partitionStatus.getPath());

        // For each bucket inside the partition directory, compute the splits
        for (StreamDataFileSplitter splitter : eventFiles) {
            splitter.computeSplits(fs, minSplitSize, maxSplitSize, startTime, endTime, splits, splitFactory);
        }
    }

    return splits;
}

From source file:co.cask.cdap.data.stream.StreamInputSplitFinder.java

License:Apache License

/**
 * Collects file status of all buckets under a given partition.
 *///from   w  w w  .j a v a  2 s.  c  o  m
private Collection<StreamDataFileSplitter> collectBuckets(FileSystem fs, Path partitionPath)
        throws IOException {
    ImmutableList.Builder<StreamDataFileSplitter> builder = ImmutableList.builder();

    for (FileStatus fileStatus : fs.listStatus(partitionPath)) {
        if (StreamFileType.EVENT.isMatched(fileStatus.getPath().getName())) {
            builder.add(new StreamDataFileSplitter(fileStatus));
        }
    }
    return builder.build();
}

From source file:co.cask.cdap.data.tools.ReplicationStatusTool.java

License:Apache License

private static void addAllDirFiles(Path filePath, FileSystem fs, List<String> fileList) throws IOException {
    FileStatus[] fileStatus = fs.listStatus(filePath);
    for (FileStatus fileStat : fileStatus) {
        if (fileStat.isDirectory()) {
            addAllDirFiles(fileStat.getPath(), fs, fileList);
        } else {/*from   w  w w  .ja  v a  2  s.  c  om*/
            fileList.add(fileStat.getPath().toString());
        }
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

/**
 * Merge two paths together.  Anything in from will be moved into to, if there
 * are any name conflicts while merging the files or directories in from win.
 * @param fs the File System to use//from   www . j  a  v a  2 s.c  o m
 * @param from the path data is coming from.
 * @param to the path data is going to.
 * @throws IOException on any error
 */
private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException {
    if (from.isFile()) {
        if (fs.exists(to)) {
            if (!fs.delete(to, true)) {
                throw new IOException("Failed to delete " + to);
            }
        }

        if (!fs.rename(from.getPath(), to)) {
            throw new IOException("Failed to rename " + from + " to " + to);
        }
    } else if (from.isDirectory()) {
        if (fs.exists(to)) {
            FileStatus toStat = fs.getFileStatus(to);
            if (!toStat.isDirectory()) {
                if (!fs.delete(to, true)) {
                    throw new IOException("Failed to delete " + to);
                }
                if (!fs.rename(from.getPath(), to)) {
                    throw new IOException("Failed to rename " + from + " to " + to);
                }
            } else {
                //It is a directory so merge everything in the directories
                for (FileStatus subFrom : fs.listStatus(from.getPath())) {
                    Path subTo = new Path(to, subFrom.getPath().getName());
                    mergePaths(fs, subFrom, subTo);
                }
            }
        } else {
            //it does not exist just rename
            if (!fs.rename(from.getPath(), to)) {
                throw new IOException("Failed to rename " + from + " to " + to);
            }
        }
    }
}

From source file:co.cask.hydrator.plugin.batch.action.FileAction.java

License:Apache License

@SuppressWarnings("ConstantConditions")
@Override/*from  w w w  .ja va 2  s .  co  m*/
public void run(BatchActionContext context) throws Exception {
    if (!config.shouldRun(context)) {
        return;
    }
    config.substituteMacros(context);

    Job job = JobUtils.createInstance();
    Configuration conf = job.getConfiguration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path[] paths;
    Path sourcePath = new Path(config.path);
    if (fileSystem.isDirectory(sourcePath)) {
        FileStatus[] status = fileSystem.listStatus(sourcePath);
        paths = FileUtil.stat2Paths(status);
    } else {
        paths = new Path[] { sourcePath };
    }

    //get regex pattern for file name filtering.
    boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern);
    if (patternSpecified) {
        regex = Pattern.compile(config.pattern);
    }

    switch (config.action.toLowerCase()) {
    case "delete":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                fileSystem.delete(path, true);
            }
        }
        break;
    case "move":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                Path targetFileMovePath = new Path(config.targetFolder, path.getName());
                fileSystem.rename(path, targetFileMovePath);
            }
        }
        break;
    case "archive":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                try (FSDataOutputStream archivedStream = fileSystem
                        .create(new Path(config.targetFolder, path.getName() + ".zip"));
                        ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream);
                        FSDataInputStream fdDataInputStream = fileSystem.open(path)) {
                    zipArchivedStream.putNextEntry(new ZipEntry(path.getName()));
                    int length;
                    byte[] buffer = new byte[1024];
                    while ((length = fdDataInputStream.read(buffer)) > 0) {
                        zipArchivedStream.write(buffer, 0, length);
                    }
                    zipArchivedStream.closeEntry();
                }
                fileSystem.delete(path, true);
            }
        }
        break;
    default:
        LOG.warn("No action required on the file.");
        break;
    }
}

From source file:co.cask.hydrator.plugin.db.batch.action.VerticaBulkImportAction.java

License:Apache License

@Override
public void run(ActionContext context) throws Exception {
    Object driver = Class.forName("com.vertica.jdbc.Driver").newInstance();
    DriverManager.registerDriver((Driver) driver);

    Preconditions.checkArgument(tableExists(config.tableName),
            "Table %s does not exist. Please check that the 'tableName' property "
                    + "has been set correctly, and that the connection string %s points to a valid database.",
            config.tableName, config.connectionString);

    String copyStatement;/*from   w w w  . j a  v  a 2s. co m*/

    if (config.level.equalsIgnoreCase("basic")) {
        // COPY tableName FROM STDIN DELIMITER 'delimiter'
        copyStatement = String.format("COPY %s FROM STDIN DELIMITER '%s'", config.tableName, config.delimiter);
    } else {
        copyStatement = config.copyStatement;
    }

    LOG.debug("Copy statement is: {}", copyStatement);

    try {
        try (Connection connection = DriverManager.getConnection(config.connectionString, config.user,
                config.password)) {
            connection.setAutoCommit(false);
            // run Copy statement
            VerticaCopyStream stream = new VerticaCopyStream((VerticaConnection) connection, copyStatement);
            // Keep running count of the number of rejects
            int totalRejects = 0;

            // start() starts the stream process, and opens the COPY command.
            stream.start();

            FileSystem fs = FileSystem.get(new Configuration());

            List<String> fileList = new ArrayList<>();
            FileStatus[] fileStatus;
            try {
                fileStatus = fs.listStatus(new Path(config.path));
                for (FileStatus fileStat : fileStatus) {
                    fileList.add(fileStat.getPath().toString());
                }
            } catch (FileNotFoundException e) {
                throw new IllegalArgumentException(String.format(String.format(
                        "Path %s not found on file system. Please provide correct path.", config.path), e));
            }

            if (fileStatus.length <= 0) {
                LOG.warn("No files available to load into vertica database");
            }

            for (String file : fileList) {
                Path path = new Path(file);

                FSDataInputStream inputStream = fs.open(path);
                // Add stream to the VerticaCopyStream
                stream.addStream(inputStream);

                // call execute() to load the newly added stream. You could
                // add many streams and call execute once to load them all.
                // Which method you choose depends mainly on whether you want
                // the ability to check the number of rejections as the load
                // progresses so you can stop if the number of rejects gets too
                // high. Also, high numbers of InputStreams could create a
                // resource issue on your client system.
                stream.execute();

                // Show any rejects from this execution of the stream load
                // getRejects() returns a List containing the
                // row numbers of rejected rows.
                List<Long> rejects = stream.getRejects();

                // The size of the list gives you the number of rejected rows.
                int numRejects = rejects.size();
                totalRejects += numRejects;
                if (config.autoCommit.equalsIgnoreCase("true")) {
                    // Commit the loaded data
                    connection.commit();
                }
            }

            // Finish closes the COPY command. It returns the number of
            // rows inserted.
            long results = stream.finish();

            context.getMetrics().gauge("num.of.rows.rejected", totalRejects);
            context.getMetrics().gauge("num.of.rows.inserted", results);

            // Commit the loaded data
            connection.commit();
        }
    } catch (Exception e) {
        throw new RuntimeException(String.format("Exception while running copy statement %s", copyStatement),
                e);
    } finally {
        DriverManager.deregisterDriver((Driver) driver);
    }
}