Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java

License:Apache License

public static void moveContentsAndDelete(Path srcDir, Path dstDir, FileSystem fs, Logger logger)
        throws IOException {
    if (!fs.exists(srcDir)) {
        return;/*from www  .j av  a  2  s.co m*/
    }
    if (fs.exists(srcDir) && !fs.isDirectory(srcDir)) {
        throw new IllegalArgumentException(srcDir + " is not a directory");
    }
    if (fs.exists(dstDir) && !fs.isDirectory(dstDir)) {
        throw new IllegalArgumentException(dstDir + " is not a directory");
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Moving contents of: " + srcDir + " to: " + dstDir);
    }
    FileStatus[] files = fs.listStatus(srcDir);
    for (FileStatus file : files) {
        Path sourcePath = file.getPath();
        Path targetPath = new Path(dstDir, file.getPath().getName());
        if (logger.isDebugEnabled()) {
            logger.debug("Moving: " + sourcePath + " to: " + targetPath);
        }
        if (!fs.mkdirs(targetPath.getParent())) {
            throw new IOException("Failed at creating directory " + targetPath.getParent());
        }
        if (!fs.rename(sourcePath, targetPath)) {
            throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath);
        }
    }
    fs.delete(srcDir);
}

From source file:com.liveramp.hank.hadoop.DomainBuilderOutputCommitter.java

License:Apache License

private static void copyPartitionsFrom(Path sourceDir, FileSystem fs, Set<Integer> copiedPartitions,
        List<MoveContentsAndDeleteTask> tasks, ExecutorService executor, Path outputPath) throws IOException {
    for (FileStatus partition : fs.listStatus(sourceDir)) {
        if (!IGNORE_PATHS.contains(partition.getPath().getName()) && partition.isDir()) {
            int partitionNumber = Integer.valueOf(partition.getPath().getName());
            if (!copiedPartitions.contains(partitionNumber)) {
                copiedPartitions.add(partitionNumber);
                MoveContentsAndDeleteTask task = new MoveContentsAndDeleteTask(partition.getPath(),
                        new Path(outputPath, partition.getPath().getName()), fs);
                tasks.add(task);/*ww  w  .  ja v  a 2s. c o m*/
                executor.execute(task);
            }
        }
    }
}

From source file:com.marklogic.mapreduce.ForestInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) { // stand directories
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FileStatus children[] = fs.listStatus(path);
        FileStatus treeIndexStatus = null, treeDataStatus = null, ordinalsStatus = null,
                timestampsStatus = null;
        boolean obsolete = false;
        for (FileStatus child : children) {
            String fileName = child.getPath().getName();
            if (fileName.equals("TreeData")) { // inside a stand
                treeDataStatus = child;/*from   w w  w .ja  v  a  2 s.c  o m*/
            } else if (fileName.equals("TreeIndex")) {
                treeIndexStatus = child;
            } else if (fileName.equals("Ordinals")) {
                ordinalsStatus = child;
            } else if (fileName.equals("Timestamps")) {
                timestampsStatus = child;
            } else if (fileName.equals("Obsolete")) {
                obsolete = true;
                break;
            }
        }
        if (obsolete) {
            LOG.warn("Obsolete file found.  The forest is either live or isn't "
                    + "dismounted cleanly.  Ignoring stand " + path);
            break;
        }
        if (treeDataStatus == null) {
            throw new RuntimeException("TreeData file not found.");
        } else if (treeIndexStatus == null) {
            throw new RuntimeException("TreeIndex file not found.");
        } else if (ordinalsStatus == null) {
            throw new RuntimeException("Ordinals file not found.");
        } else if (timestampsStatus == null) {
            throw new RuntimeException("Timestamps file not found.");
        }
        long treeDataSize = treeDataStatus.getLen();
        if (treeDataSize == 0) {
            // unexpected, give up this stand
            LOG.warn("Found empty TreeData file.  Skipping...");
            continue; // skipping this stand
        }
        Path treeDataPath = treeDataStatus.getPath();
        long blockSize = treeDataStatus.getBlockSize();
        long splitSize = computeSplitSize(blockSize, minSize, maxSize);
        // make splits based on TreeIndex
        FSDataInputStream is = fs.open(treeIndexStatus.getPath());
        BiendianDataInputStream in = new BiendianDataInputStream(is);
        int prevDocid = -1, docid = -1, position = 0;
        long prevOffset = -1L, offset = 0, splitStart = 0;
        BlockLocation[] blkLocations = fs.getFileBlockLocations(treeDataStatus, 0, treeDataSize);
        try {
            for (;; ++position) {
                try {
                    docid = in.readInt();
                    in.readInt();
                    offset = in.readLong();
                } catch (EOFException e) {
                    break;
                }
                int comp = InternalUtilities.compareUnsignedLong(offset, treeDataSize);
                if (comp > 0) {
                    throw new RuntimeException("TreeIndex offset is out of bound: position = " + position
                            + ", offset = " + offset + ", treeDataSize = " + treeDataSize);
                }
                if (prevDocid != -1 && (docid & 0xffffffffL) <= (prevDocid & 0xffffffffL)) {
                    throw new RuntimeException("docid out of order, position = " + position + ", docid = "
                            + docid + ", prevDocid = " + prevDocid);
                }
                prevDocid = docid;
                if (prevOffset != -1L && InternalUtilities.compareUnsignedLong(offset, prevOffset) <= 0) {
                    throw new RuntimeException("offset out of order, position = " + position + ", offset = "
                            + offset + ", prevOffset = " + prevOffset);
                }
                long splitLen = offset - splitStart;
                if (splitLen == splitSize || (splitLen > splitSize
                        && splitLen - splitSize <= splitSize - (prevOffset - splitStart))) {
                    int blkIndex = getBlockIndex(blkLocations, offset);
                    InputSplit split = new FileSplit(treeDataPath, splitStart, splitLen,
                            blkLocations[blkIndex].getHosts());
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Created split: start=" + splitStart + " len=" + splitLen + " last docid="
                                + docid);
                    }
                    splits.add(split);
                    splitStart = offset;
                } else if (splitLen > splitSize) {
                    int blkIndex = getBlockIndex(blkLocations, prevOffset);
                    InputSplit split = new FileSplit(treeDataPath, splitStart, prevOffset - splitStart,
                            blkLocations[blkIndex].getHosts());
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Created split: start=" + splitStart + " len=" + (prevOffset - splitStart)
                                + " last docid=" + docid);
                    }
                    splits.add(split);
                    splitStart = prevOffset;
                }
            }
        } finally {
            in.close();
        }
        if (offset > splitStart) {
            int blkIndex = getBlockIndex(blkLocations, offset - 1);
            InputSplit split = new FileSplit(treeDataPath, splitStart, offset - splitStart,
                    blkLocations[blkIndex].getHosts());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Created split: start=" + splitStart + " len=" + (offset - splitStart)
                        + " last docid=" + docid);
            }

            splits.add(split);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Made " + splits.size() + " splits.");
    }

    return splits;
}

From source file:com.marklogic.mapreduce.test.FCheck.java

License:Apache License

public void checkTreeIndex(File dir) throws IOException {
    File file = new File(dir, "TreeIndex");
    if (verbose)/*from  w  ww .java2s. c o  m*/
        System.out.println(file.getAbsolutePath() + " -> checkTreeIndex");
    //      BiendianDataInputStream in = openFile(file, 1 << 18);
    Path path = new Path(dir.getAbsolutePath());
    FileSystem fs = path.getFileSystem(new Configuration());
    FileStatus children[] = fs.listStatus(path);
    FileStatus treeIndexStatus = null, treeDataStatus = null;
    for (FileStatus child : children) {
        String fileName = child.getPath().getName();
        if (fileName.equals("TreeData")) { // inside a stand
            treeDataStatus = child;
        } else if (fileName.equals("TreeIndex")) {
            treeIndexStatus = child;
        }
        if (treeDataStatus != null && treeIndexStatus != null) {
            break;
        }
    }
    if (treeDataStatus == null) {
        throw new RuntimeException("TreeData file not found.");
    } else if (treeIndexStatus == null) {
        throw new RuntimeException("TreeIndex file not found.");
    }
    long treeDataSize = treeDataStatus.getLen();
    if (treeDataSize == 0) {
        // unexpected, give up this stand
        System.err.println("Found empty TreeData file.  Skipping...");
        return;
    }
    FSDataInputStream is = fs.open(treeIndexStatus.getPath());
    BiendianDataInputStream in = new BiendianDataInputStream(is);
    in.setLittleEndian(littleEndian);
    int prevDocid = -1;
    long prevOffset = -1L;
    int position = 0;
    int docid;
    long offset;
    for (;; ++position) {
        try {
            docid = in.readInt();
            in.readInt();
            offset = in.readLong();
        } catch (EOFException e) {
            break;
        }
        if (debug) {
            System.out.println(String.format("TreeIndex p %08x d %08x o %016x", position, docid, offset));
        }
        if (compareUnsignedLong(offset, treeDataSize) >= 0) {
            panic(file, String.format("offset out of range, position=%d, offset=%d, treeDataSize=%d", position,
                    offset, treeDataSize));
        }
        if (prevDocid != -1 && (docid & 0xffffffffL) <= (prevDocid & 0xffffffffL)) {
            panic(file, String.format("docid out of order, position=%d, docid=%d, prevDocid=%d", position,
                    docid, prevDocid));
        }
        prevDocid = docid;
        if (prevOffset != -1L && compareUnsignedLong(offset, prevOffset) <= 0) {
            panic(file, String.format("offset out of order, position=%d, offset=%d, prevOffset=%d", position,
                    offset, prevOffset));
        }
        prevOffset = offset;
    }
    if (verbose)
        System.out.println(file.getAbsolutePath() + " <- checkTreeIndex [" + position + "]");
}

From source file:com.maxpoint.cascading.avro.AvroScheme.java

License:Open Source License

private void retrieveSchema(FlowProcess<JobConf> flowProcess, Tap tap) {
    try {//from w  w  w . j a  va2s .  c o m
        if (tap instanceof CompositeTap)
            tap = (Tap) ((CompositeTap) tap).getChildTaps().next();
        final String file = tap.getIdentifier();
        Path p = new Path(file);
        Configuration conf = new Configuration();
        final FileSystem fs = p.getFileSystem(conf);
        for (FileStatus status : fs.listStatus(p)) {
            p = status.getPath();
            // no need to open them all
            InputStream stream = new BufferedInputStream(fs.open(p));
            DataFileStream reader = new DataFileStream(stream, new ReflectDatumReader());
            dataSchema = reader.getSchema();
            retrieveSourceFields(tap);
            return;
        }
        throw new RuntimeException("no schema found in " + file);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

/**
 * List all the entries of a directory//  ww w. ja  va 2 s. c  o  m
 * 
 * Note that this operation is not atomic for a large directory.
 * The entries of a directory may be fetched from NameNode multiple times.
 * It only guarantees that each name occurs once if a directory
 * undergoes changes between the calls.
 */
@Override
public FileStatus[] listStatus(Path p) throws IOException {
    Path absF = fixRelativePart(p);
    return new FileSystemLinkResolver<FileStatus[]>() {
        @Override
        public FileStatus[] doCall(final Path p) throws IOException, UnresolvedLinkException {
            return listStatusInternal(p);
        }

        @Override
        public FileStatus[] next(final FileSystem fs, final Path p) throws IOException {
            return fs.listStatus(p);
        }
    }.resolve(this, absF);
}

From source file:com.metamx.druid.indexer.DbUpdaterJob.java

License:Open Source License

@Override
public boolean run() {
    final Configuration conf = new Configuration();

    ImmutableList.Builder<DataSegment> publishedSegmentsBuilder = ImmutableList.builder();

    for (String propName : System.getProperties().stringPropertyNames()) {
        if (propName.startsWith("hadoop.")) {
            conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
        }/*  w  ww. j a  v a 2s .  c o m*/
    }

    final Path descriptorInfoDir = config.makeDescriptorInfoDir();

    try {
        FileSystem fs = descriptorInfoDir.getFileSystem(conf);

        for (FileStatus status : fs.listStatus(descriptorInfoDir)) {
            final DataSegment segment = jsonMapper.readValue(fs.open(status.getPath()), DataSegment.class);

            dbi.withHandle(new HandleCallback<Void>() {
                @Override
                public Void withHandle(Handle handle) throws Exception {
                    handle.createStatement(String.format(
                            "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) "
                                    + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)",
                            spec.getSegmentTable())).bind("id", segment.getIdentifier())
                            .bind("dataSource", segment.getDataSource())
                            .bind("created_date", new DateTime().toString())
                            .bind("start", segment.getInterval().getStart().toString())
                            .bind("end", segment.getInterval().getEnd().toString())
                            .bind("partitioned", segment.getShardSpec().getPartitionNum())
                            .bind("version", segment.getVersion()).bind("used", true)
                            .bind("payload", jsonMapper.writeValueAsString(segment)).execute();

                    return null;
                }
            });

            publishedSegmentsBuilder.add(segment);
            log.info("Published %s", segment.getIdentifier());
        }
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }

    publishedSegments = publishedSegmentsBuilder.build();

    return true;
}

From source file:com.metamx.druid.indexer.hadoop.FSSpideringIterator.java

License:Open Source License

public static FSSpideringIterator spiderPathPropogateExceptions(FileSystem fs, Path path) {
    try {// w w w .j ava  2  s .  co m
        final FileStatus[] statii = fs.listStatus(path);
        return new FSSpideringIterator(fs, statii == null ? new FileStatus[] {} : statii);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.metamx.milano.pig.MilanoLoadFunc.java

License:Apache License

private void getPaths(Path baseDirectory, Set<Path> paths, FileSystem fileSystem) throws IOException {
    FileStatus[] files = fileSystem.listStatus(baseDirectory);
    for (FileStatus file : files) {
        Path path = file.getPath();
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        if (fileStatus.isDir()) {
            getPaths(path, paths, fileSystem);
        } else {/*from www  . j  a v  a 2s  . co  m*/
            paths.add(baseDirectory);
        }
    }
}

From source file:com.metamx.milano.pig.MilanoLoadFunc.java

License:Apache License

private Path getFilePath(Path path, FileSystem fileSystem) throws IOException {
    Path newPath = null;/*from   w  w  w  . j  a  v a 2  s.  co m*/
    FileStatus[] files = fileSystem.listStatus(path);
    for (FileStatus file : files) {
        if (file.isDir()) {
            newPath = getFilePath(file.getPath(), fileSystem);
            if (newPath != null) {
                break;
            }
        } else {
            newPath = file.getPath();
            break;
        }
    }

    return newPath;
}