Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java

License:Apache License

public static void moveContentsAndDelete(Path srcDir, Path dstDir, FileSystem fs, Logger logger)
        throws IOException {
    if (!fs.exists(srcDir)) {
        return;/*from www  .j av  a  2  s.co m*/
    }
    if (fs.exists(srcDir) && !fs.isDirectory(srcDir)) {
        throw new IllegalArgumentException(srcDir + " is not a directory");
    }
    if (fs.exists(dstDir) && !fs.isDirectory(dstDir)) {
        throw new IllegalArgumentException(dstDir + " is not a directory");
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Moving contents of: " + srcDir + " to: " + dstDir);
    }
    FileStatus[] files = fs.listStatus(srcDir);
    for (FileStatus file : files) {
        Path sourcePath = file.getPath();
        Path targetPath = new Path(dstDir, file.getPath().getName());
        if (logger.isDebugEnabled()) {
            logger.debug("Moving: " + sourcePath + " to: " + targetPath);
        }
        if (!fs.mkdirs(targetPath.getParent())) {
            throw new IOException("Failed at creating directory " + targetPath.getParent());
        }
        if (!fs.rename(sourcePath, targetPath)) {
            throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath);
        }
    }
    fs.delete(srcDir);
}

From source file:com.liveramp.hank.hadoop.DomainBuilderOutputCommitter.java

License:Apache License

private static void copyPartitionsFrom(Path sourceDir, FileSystem fs, Set<Integer> copiedPartitions,
        List<MoveContentsAndDeleteTask> tasks, ExecutorService executor, Path outputPath) throws IOException {
    for (FileStatus partition : fs.listStatus(sourceDir)) {
        if (!IGNORE_PATHS.contains(partition.getPath().getName()) && partition.isDir()) {
            int partitionNumber = Integer.valueOf(partition.getPath().getName());
            if (!copiedPartitions.contains(partitionNumber)) {
                copiedPartitions.add(partitionNumber);
                MoveContentsAndDeleteTask task = new MoveContentsAndDeleteTask(partition.getPath(),
                        new Path(outputPath, partition.getPath().getName()), fs);
                tasks.add(task);/*ww  w  .  ja v  a 2s. c o m*/
                executor.execute(task);
            }
        }
    }
}

From source file:com.marklogic.mapreduce.ForestInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) { // stand directories
        Path path = file.getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FileStatus children[] = fs.listStatus(path);
        FileStatus treeIndexStatus = null, treeDataStatus = null, ordinalsStatus = null,
                timestampsStatus = null;
        boolean obsolete = false;
        for (FileStatus child : children) {
            String fileName = child.getPath().getName();
            if (fileName.equals("TreeData")) { // inside a stand
                treeDataStatus = child;/*from   w w  w .ja  v  a  2 s.c  o m*/
            } else if (fileName.equals("TreeIndex")) {
                treeIndexStatus = child;
            } else if (fileName.equals("Ordinals")) {
                ordinalsStatus = child;
            } else if (fileName.equals("Timestamps")) {
                timestampsStatus = child;
            } else if (fileName.equals("Obsolete")) {
                obsolete = true;
                break;
            }
        }
        if (obsolete) {
            LOG.warn("Obsolete file found.  The forest is either live or isn't "
                    + "dismounted cleanly.  Ignoring stand " + path);
            break;
        }
        if (treeDataStatus == null) {
            throw new RuntimeException("TreeData file not found.");
        } else if (treeIndexStatus == null) {
            throw new RuntimeException("TreeIndex file not found.");
        } else if (ordinalsStatus == null) {
            throw new RuntimeException("Ordinals file not found.");
        } else if (timestampsStatus == null) {
            throw new RuntimeException("Timestamps file not found.");
        }
        long treeDataSize = treeDataStatus.getLen();
        if (treeDataSize == 0) {
            // unexpected, give up this stand
            LOG.warn("Found empty TreeData file.  Skipping...");
            continue; // skipping this stand
        }
        Path treeDataPath = treeDataStatus.getPath();
        long blockSize = treeDataStatus.getBlockSize();
        long splitSize = computeSplitSize(blockSize, minSize, maxSize);
        // make splits based on TreeIndex
        FSDataInputStream is = fs.open(treeIndexStatus.getPath());
        BiendianDataInputStream in = new BiendianDataInputStream(is);
        int prevDocid = -1, docid = -1, position = 0;
        long prevOffset = -1L, offset = 0, splitStart = 0;
        BlockLocation[] blkLocations = fs.getFileBlockLocations(treeDataStatus, 0, treeDataSize);
        try {
            for (;; ++position) {
                try {
                    docid = in.readInt();
                    in.readInt();
                    offset = in.readLong();
                } catch (EOFException e) {
                    break;
                }
                int comp = InternalUtilities.compareUnsignedLong(offset, treeDataSize);
                if (comp > 0) {
                    throw new RuntimeException("TreeIndex offset is out of bound: position = " + position
                            + ", offset = " + offset + ", treeDataSize = " + treeDataSize);
                }
                if (prevDocid != -1 && (docid & 0xffffffffL) <= (prevDocid & 0xffffffffL)) {
                    throw new RuntimeException("docid out of order, position = " + position + ", docid = "
                            + docid + ", prevDocid = " + prevDocid);
                }
                prevDocid = docid;
                if (prevOffset != -1L && InternalUtilities.compareUnsignedLong(offset, prevOffset) <= 0) {
                    throw new RuntimeException("offset out of order, position = " + position + ", offset = "
                            + offset + ", prevOffset = " + prevOffset);
                }
                long splitLen = offset - splitStart;
                if (splitLen == splitSize || (splitLen > splitSize
                        && splitLen - splitSize <= splitSize - (prevOffset - splitStart))) {
                    int blkIndex = getBlockIndex(blkLocations, offset);
                    InputSplit split = new FileSplit(treeDataPath, splitStart, splitLen,
                            blkLocations[blkIndex].getHosts());
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Created split: start=" + splitStart + " len=" + splitLen + " last docid="
                                + docid);
                    }
                    splits.add(split);
                    splitStart = offset;
                } else if (splitLen > splitSize) {
                    int blkIndex = getBlockIndex(blkLocations, prevOffset);
                    InputSplit split = new FileSplit(treeDataPath, splitStart, prevOffset - splitStart,
                            blkLocations[blkIndex].getHosts());
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Created split: start=" + splitStart + " len=" + (prevOffset - splitStart)
                                + " last docid=" + docid);
                    }
                    splits.add(split);
                    splitStart = prevOffset;
                }
            }
        } finally {
            in.close();
        }
        if (offset > splitStart) {
            int blkIndex = getBlockIndex(blkLocations, offset - 1);
            InputSplit split = new FileSplit(treeDataPath, splitStart, offset - splitStart,
                    blkLocations[blkIndex].getHosts());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Created split: start=" + splitStart + " len=" + (offset - splitStart)
                        + " last docid=" + docid);
            }

            splits.add(split);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Made " + splits.size() + " splits.");
    }

    return splits;
}

From source file:com.marklogic.mapreduce.test.FCheck.java

License:Apache License

public void checkTreeIndex(File dir) throws IOException {
    File file = new File(dir, "TreeIndex");
    if (verbose)/*from  w  ww .java2s. c o  m*/
        System.out.println(file.getAbsolutePath() + " -> checkTreeIndex");
    //      BiendianDataInputStream in = openFile(file, 1 << 18);
    Path path = new Path(dir.getAbsolutePath());
    FileSystem fs = path.getFileSystem(new Configuration());
    FileStatus children[] = fs.listStatus(path);
    FileStatus treeIndexStatus = null, treeDataStatus = null;
    for (FileStatus child : children) {
        String fileName = child.getPath().getName();
        if (fileName.equals("TreeData")) { // inside a stand
            treeDataStatus = child;
        } else if (fileName.equals("TreeIndex")) {
            treeIndexStatus = child;
        }
        if (treeDataStatus != null && treeIndexStatus != null) {
            break;
        }
    }
    if (treeDataStatus == null) {
        throw new RuntimeException("TreeData file not found.");
    } else if (treeIndexStatus == null) {
        throw new RuntimeException("TreeIndex file not found.");
    }
    long treeDataSize = treeDataStatus.getLen();
    if (treeDataSize == 0) {
        // unexpected, give up this stand
        System.err.println("Found empty TreeData file.  Skipping...");
        return;
    }
    FSDataInputStream is = fs.open(treeIndexStatus.getPath());
    BiendianDataInputStream in = new BiendianDataInputStream(is);
    in.setLittleEndian(littleEndian);
    int prevDocid = -1;
    long prevOffset = -1L;
    int position = 0;
    int docid;
    long offset;
    for (;; ++position) {
        try {
            docid = in.readInt();
            in.readInt();
            offset = in.readLong();
        } catch (EOFException e) {
            break;
        }
        if (debug) {
            System.out.println(String.format("TreeIndex p %08x d %08x o %016x", position, docid, offset));
        }
        if (compareUnsignedLong(offset, treeDataSize) >= 0) {
            panic(file, String.format("offset out of range, position=%d, offset=%d, treeDataSize=%d", position,
                    offset, treeDataSize));
        }
        if (prevDocid != -1 && (docid & 0xffffffffL) <= (prevDocid & 0xffffffffL)) {
            panic(file, String.format("docid out of order, position=%d, docid=%d, prevDocid=%d", position,
                    docid, prevDocid));
        }
        prevDocid = docid;
        if (prevOffset != -1L && compareUnsignedLong(offset, prevOffset) <= 0) {
            panic(file, String.format("offset out of order, position=%d, offset=%d, prevOffset=%d", position,
                    offset, prevOffset));
        }
        prevOffset = offset;
    }
    if (verbose)
        System.out.println(file.getAbsolutePath() + " <- checkTreeIndex [" + position + "]");
}

From source file:com.maxpoint.cascading.avro.AvroScheme.java

License:Open Source License

private void retrieveSchema(FlowProcess<JobConf> flowProcess, Tap tap) {
    try {//from w  w  w . j a  va2s .  c o m
        if (tap instanceof CompositeTap)
            tap = (Tap) ((CompositeTap) tap).getChildTaps().next();
        final String file = tap.getIdentifier();
        Path p = new Path(file);
        Configuration conf = new Configuration();
        final FileSystem fs = p.getFileSystem(conf);
        for (FileStatus status : fs.listStatus(p)) {
            p = status.getPath();
            // no need to open them all
            InputStream stream = new BufferedInputStream(fs.open(p));
            DataFileStream reader = new DataFileStream(stream, new ReflectDatumReader());
            dataSchema = reader.getSchema();
            retrieveSourceFields(tap);
            return;
        }
        throw new RuntimeException("no schema found in " + file);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

/**
 * List all the entries of a directory//  ww w. ja  va 2 s. c  o  m
 * 
 * Note that this operation is not atomic for a large directory.
 * The entries of a directory may be fetched from NameNode multiple times.
 * It only guarantees that each name occurs once if a directory
 * undergoes changes between the calls.
 */
@Override
public FileStatus[] listStatus(Path p) throws IOException {
    Path absF = fixRelativePart(p);
    return new FileSystemLinkResolver<FileStatus[]>() {
        @Override
        public FileStatus[] doCall(final Path p) throws IOException, UnresolvedLinkException {
            return listStatusInternal(p);
        }

        @Override
        public FileStatus[] next(final FileSystem fs, final Path p) throws IOException {
            return fs.listStatus(p);
        }
    }.resolve(this, absF);
}

From source file:com.metamx.druid.indexer.DbUpdaterJob.java

License:Open Source License

@Override
public boolean run() {
    final Configuration conf = new Configuration();

    ImmutableList.Builder<DataSegment> publishedSegmentsBuilder = ImmutableList.builder();

    for (String propName : System.getProperties().stringPropertyNames()) {
        if (propName.startsWith("hadoop.")) {
            conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
        }/*  w  ww. j a  v a 2s .  c o m*/
    }

    final Path descriptorInfoDir = config.makeDescriptorInfoDir();

    try {
        FileSystem fs = descriptorInfoDir.getFileSystem(conf);

        for (FileStatus status : fs.listStatus(descriptorInfoDir)) {
            final DataSegment segment = jsonMapper.readValue(fs.open(status.getPath()), DataSegment.class);

            dbi.withHandle(new HandleCallback<Void>() {
                @Override
                public Void withHandle(Handle handle) throws Exception {
                    handle.createStatement(String.format(
                            "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) "
                                    + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)",
                            spec.getSegmentTable())).bind("id", segment.getIdentifier())
                            .bind("dataSource", segment.getDataSource())
                            .bind("created_date", new DateTime().toString())
                            .bind("start", segment.getInterval().getStart().toString())
                            .bind("end", segment.getInterval().getEnd().toString())
                            .bind("partitioned", segment.getShardSpec().getPartitionNum())
                            .bind("version", segment.getVersion()).bind("used", true)
                            .bind("payload", jsonMapper.writeValueAsString(segment)).execute();

                    return null;
                }
            });

            publishedSegmentsBuilder.add(segment);
            log.info("Published %s", segment.getIdentifier());
        }
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }

    publishedSegments = publishedSegmentsBuilder.build();

    return true;
}

From source file:com.metamx.druid.indexer.hadoop.FSSpideringIterator.java

License:Open Source License

public static FSSpideringIterator spiderPathPropogateExceptions(FileSystem fs, Path path) {
    try {// w w w .j ava  2  s .  co m
        final FileStatus[] statii = fs.listStatus(path);
        return new FSSpideringIterator(fs, statii == null ? new FileStatus[] {} : statii);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.metamx.milano.pig.MilanoLoadFunc.java

License:Apache License

private void getPaths(Path baseDirectory, Set<Path> paths, FileSystem fileSystem) throws IOException {
    FileStatus[] files = fileSystem.listStatus(baseDirectory);
    for (FileStatus file : files) {
        Path path = file.getPath();
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        if (fileStatus.isDir()) {
            getPaths(path, paths, fileSystem);
        } else {/*from www  . j  a v  a 2s  . co  m*/
            paths.add(baseDirectory);
        }
    }
}

From source file:com.metamx.milano.pig.MilanoLoadFunc.java

License:Apache License

private Path getFilePath(Path path, FileSystem fileSystem) throws IOException {
    Path newPath = null;/*from   w  w  w  . j  a  v a 2  s.  co m*/
    FileStatus[] files = fileSystem.listStatus(path);
    for (FileStatus file : files) {
        if (file.isDir()) {
            newPath = getFilePath(file.getPath(), fileSystem);
            if (newPath != null) {
                break;
            }
        } else {
            newPath = file.getPath();
            break;
        }
    }

    return newPath;
}