Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    Configuration conf = ContextUtil.getConfiguration(context);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);

    FileSplit split = (FileSplit) genericSplit;
    start = (split.getStart()) << 16;
    end = (start + split.getLength()) << 16;

    final Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);

    bin = new BlockCompressedInputStream(
            new WrapSeekable<FSDataInputStream>(fs.open(file), fs.getFileStatus(file).getLen(), file));

    in = new LineReader(bin, conf);

    if (start != 0) {
        bin.seek(start);/*www . ja  va2 s .c  o m*/

        // Skip first line
        in.readLine(new Text());
        start = bin.getFilePointer();
    }
    this.pos = start;
}

From source file:fi.tkk.ics.hadoop.bam.SplittingBAMIndexer.java

License:Open Source License

/**
 * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link
 * org.apache.hadoop.conf.Configuration} object instead of a supplied
 * argument list//from w w  w . j  av a2  s  .  c  o  m
 *
 * @throws java.lang.IllegalArgumentException if the "input" property is not
 *                                            in the Configuration
 */
public static void run(final Configuration conf) throws IOException {
    final String inputString = conf.get("input");
    if (inputString == null)
        throw new IllegalArgumentException("String property \"input\" path not found in given Configuration");

    final FileSystem fs = FileSystem.get(conf);

    // Default to a granularity level of 4096. This is generally sufficient
    // for very large BAM files, relative to a maximum heap size in the
    // gigabyte range.
    final SplittingBAMIndexer indexer = new SplittingBAMIndexer(conf.getInt("granularity", 4096));

    final Path input = new Path(inputString);

    indexer.index(fs.open(input), fs.create(input.suffix(OUTPUT_FILE_EXTENSION)),
            fs.getFileStatus(input).getLen());
}

From source file:fi.tkk.ics.hadoop.bam.util.WrapSeekable.java

License:Open Source License

/** A helper for the common use case. */
public static WrapSeekable<FSDataInputStream> openPath(FileSystem fs, Path p) throws IOException {
    return new WrapSeekable<FSDataInputStream>(fs.open(p), fs.getFileStatus(p).getLen(), p);
}

From source file:finderbots.recommenders.hadoop.ActionSplitterJob.java

License:Apache License

public void saveIndexes(Path where) throws IOException {
    Path userIndexPath = new Path(where, options.getUserIndexFile());
    Path itemIndexPath = new Path(where, options.getItemIndexFile());
    FileSystem fs = where.getFileSystem(new JobConf());
    if (fs.getFileStatus(where).isDir()) {
        FSDataOutputStream userIndexFile = fs.create(userIndexPath);
        Utils.writeIndex(userIndex, userIndexFile);
        FSDataOutputStream itemIndexFile = fs.create(itemIndexPath);
        Utils.writeIndex(itemIndex, itemIndexFile);
    } else {/*from   w w  w  .  j av  a2s  . c  om*/
        throw new IOException("Bad locaton for ID Indexes: " + where.toString());
    }
}

From source file:finderbots.recommenders.hadoop.ActionSplitterJob.java

License:Apache License

public List<FSDataInputStream> getActionFiles(Path baseInputDir) throws IOException {
    List<FSDataInputStream> files = new ArrayList<FSDataInputStream>();
    FileSystem fs = baseInputDir.getFileSystem(getConf());
    try {/*from  ww  w.ja v  a2 s.  c o  m*/
        FileStatus inStat = fs.getFileStatus(baseInputDir);
        Boolean inputIsDir = inStat.isDir();
        LOGGER.info("\n======\n\n\n   Input path = " + baseInputDir.toString() + "\n" + "   isDir = "
                + inputIsDir.toString() + "\n\n\n======\n");
        if (inputIsDir) {
            FileStatus[] stats = fs.listStatus(baseInputDir);
            for (FileStatus fstat : stats) {
                String filename = fstat.getPath().getName();
                String regex = options.getInputFilePattern();
                Boolean match = filename.matches(regex);
                if (fstat.isDir()) {
                    files.addAll(getActionFiles(fstat.getPath()));
                } else if (fstat.getPath().getName().matches(options.getInputFilePattern())) {
                    //assume a regex was passed in and check for matches
                    files.add(fs.open(fstat.getPath()));
                } else if (
                //assume a simple ".tsv" or other included string was passed in
                //exclude system files, like the hadoop created files _SUCCEED, .crc's etc.
                fstat.getPath().getName().contains(options.getInputFilePattern()) && !fstat.isDir()
                        && !fstat.getPath().getName().startsWith("_")
                        && !fstat.getPath().getName().startsWith(".")
                        && !fstat.getPath().getName().startsWith("~")) {
                    files.add(fs.open(fstat.getPath()));
                }
            }
        } else if (// processing a single file as input but exclude system files
        inStat.getPath().getName().contains(options.getInputFilePattern())) {
            files.add(fs.open(inStat.getPath()));
        } else {// doesn't match any input pattern so no input
            throw new IOException("No input to process at: " + baseInputDir.toString());
        }
    } catch (IOException e) {
        LOGGER.error("Cannot find base of input file tree for: " + baseInputDir.toString());
        throw e;
    }
    return files;
}

From source file:fm.last.hadoop.utils.seq.SequenceFileUtils.java

License:Apache License

/**
 * Reads the contents of a SequenceFile into a human-readable String. The number of lines read in will be 
 * limited to the value set by the lineCount variable, if this is >= 0 , all lines will be read.
 * //w  ww. j av  a2  s . c  o m
 * @param inputPath Path to the file to be read.
 * @param lineCount Number of lines to read from the top of the file.
 * @param conf Configuration object to use to get file system.
 * @return The contents of the file as a String.
 * @throws IOException If an error occurs reading the file.
 */
public static String readSequenceFileTop(Path inputPath, int lineCount, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path[] files = null;
    FileStatus fileStatus = fs.getFileStatus(inputPath);
    if (fileStatus.isDir()) { // if a dir is passed in, list contents of each file in dir
        files = getPathsFromFileStatus(fs.listStatus(inputPath));
    } else { // we just have a single file
        files = new Path[] { inputPath };
    }

    StringBuffer result = new StringBuffer();
    for (Path inputFile : files) {
        fileStatus = fs.getFileStatus(inputFile);
        if (!fileStatus.isDir()) { // ignore subdirs for now, only process files
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, inputFile, conf);
                WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(),
                        conf);
                Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
                result.append("file: " + inputFile + ", keyClass: " + key.getClass().getName()
                        + ", valueClass: " + value.getClass().getName() + "\n");
                int linesRead = 0;
                while (reader.next(key, value)) {
                    if (lineCount > 0 && linesRead >= lineCount) {
                        break;
                    }
                    String valueString = value.toString();
                    result.append(key.toString().trim() + "\t" + valueString);
                    // for pretty output, put a newline between records which don't have one
                    if (!valueString.endsWith("\n")) {
                        result.append("\n");
                    }
                    linesRead++;
                }
            } finally {
                if (reader != null) {
                    reader.close();
                }
            }
        }
    }
    return result.toString();
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java

License:LGPL

@Override
public InputStream getData(final DataFile src) throws IOException {

    final Path path = getPath(src);

    if (path == null) {
        throw new NullPointerException("Path to create is null");
    }/*from   ww w .java 2s  . co m*/
    if (this.conf == null) {
        throw new NullPointerException("The configuration object is null");
    }

    final FileSystem fs = path.getFileSystem(this.conf);

    if (fs == null) {
        throw new IOException("Unable to create InputSteam, The FileSystem is null");
    }

    final FileStatus fStatus = fs.getFileStatus(path);

    if (fStatus.isDirectory()) {

        final List<Path> paths = getPathToConcat(fs, path);

        if (paths != null && paths.size() > 0) {
            return new PathConcatInputStream(paths, this.conf);
        }
    }

    return fs.open(path);
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java

License:LGPL

@Override
public List<DataFile> list(final DataFile file) throws IOException {

    final Path path = getPath(file);

    if (path == null) {
        throw new NullPointerException("Path to delete is null");
    }//from  www  .  j  av a 2 s.c o m
    if (this.conf == null) {
        throw new NullPointerException("The configuration object is null");
    }

    final FileSystem fs = path.getFileSystem(this.conf);

    if (fs == null) {
        throw new IOException("Unable to delete the file, The FileSystem is null");
    }

    FileStatus fileStatus = fs.getFileStatus(path);

    if (!fs.exists(path)) {
        throw new FileNotFoundException("File not found: " + file);
    }

    if (!fileStatus.isDirectory()) {
        throw new IOException("The file is not a directory: " + file);
    }

    // List directory
    final FileStatus[] files = fs.listStatus(path);

    // Convert the File array to a list of DataFile
    final List<DataFile> result = new ArrayList<>(files.length);
    for (FileStatus f : files) {
        result.add(new DataFile(f.getPath().toUri().toString()));
    }

    // Return an unmodifiable list
    return Collections.unmodifiableList(result);
}

From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.PathDataProtocol.java

License:LGPL

@Override
public boolean exists(final DataFile src, final boolean followLink) {

    final Path path = getPath(src);

    try {//from   www.j  ava2s.  co  m

        final FileSystem fs = path.getFileSystem(conf);

        final FileStatus status = fs.getFileStatus(path);

        if (status == null) {
            return false;
        }

        if (status.isSymlink()) {

            return fs.getFileStatus(fs.getLinkTarget(path)) != null;
        }

        return true;
    } catch (IOException e) {
        return false;
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

License:LGPL

static private void finalize(final Configuration conf, final JobConf jobconf, final Path destPath,
        final String presevedAttributes) throws IOException {
    if (presevedAttributes == null) {
        return;/*from  w  w  w  . java  2  s.co m*/
    }
    EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
    if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP)
            && !preseved.contains(FileAttribute.PERMISSION)) {
        return;
    }

    FileSystem dstfs = destPath.getFileSystem(conf);
    Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
    SequenceFile.Reader in = null;
    try {
        in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf);
        Text dsttext = new Text();
        FilePair pair = new FilePair();
        for (; in.next(dsttext, pair);) {
            Path absdst = new Path(destPath, pair.output);
            updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
        }
    } finally {
        checkAndClose(in);
    }
}