List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java
License:Open Source License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { Configuration conf = ContextUtil.getConfiguration(context); this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); FileSplit split = (FileSplit) genericSplit; start = (split.getStart()) << 16; end = (start + split.getLength()) << 16; final Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); bin = new BlockCompressedInputStream( new WrapSeekable<FSDataInputStream>(fs.open(file), fs.getFileStatus(file).getLen(), file)); in = new LineReader(bin, conf); if (start != 0) { bin.seek(start);/*www . ja va2 s .c o m*/ // Skip first line in.readLine(new Text()); start = bin.getFilePointer(); } this.pos = start; }
From source file:fi.tkk.ics.hadoop.bam.SplittingBAMIndexer.java
License:Open Source License
/** * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link * org.apache.hadoop.conf.Configuration} object instead of a supplied * argument list//from w w w . j av a2 s . c o m * * @throws java.lang.IllegalArgumentException if the "input" property is not * in the Configuration */ public static void run(final Configuration conf) throws IOException { final String inputString = conf.get("input"); if (inputString == null) throw new IllegalArgumentException("String property \"input\" path not found in given Configuration"); final FileSystem fs = FileSystem.get(conf); // Default to a granularity level of 4096. This is generally sufficient // for very large BAM files, relative to a maximum heap size in the // gigabyte range. final SplittingBAMIndexer indexer = new SplittingBAMIndexer(conf.getInt("granularity", 4096)); final Path input = new Path(inputString); indexer.index(fs.open(input), fs.create(input.suffix(OUTPUT_FILE_EXTENSION)), fs.getFileStatus(input).getLen()); }
From source file:fi.tkk.ics.hadoop.bam.util.WrapSeekable.java
License:Open Source License
/** A helper for the common use case. */ public static WrapSeekable<FSDataInputStream> openPath(FileSystem fs, Path p) throws IOException { return new WrapSeekable<FSDataInputStream>(fs.open(p), fs.getFileStatus(p).getLen(), p); }
From source file:finderbots.recommenders.hadoop.ActionSplitterJob.java
License:Apache License
public void saveIndexes(Path where) throws IOException { Path userIndexPath = new Path(where, options.getUserIndexFile()); Path itemIndexPath = new Path(where, options.getItemIndexFile()); FileSystem fs = where.getFileSystem(new JobConf()); if (fs.getFileStatus(where).isDir()) { FSDataOutputStream userIndexFile = fs.create(userIndexPath); Utils.writeIndex(userIndex, userIndexFile); FSDataOutputStream itemIndexFile = fs.create(itemIndexPath); Utils.writeIndex(itemIndex, itemIndexFile); } else {/*from w w w . j av a2s . c om*/ throw new IOException("Bad locaton for ID Indexes: " + where.toString()); } }
From source file:finderbots.recommenders.hadoop.ActionSplitterJob.java
License:Apache License
public List<FSDataInputStream> getActionFiles(Path baseInputDir) throws IOException { List<FSDataInputStream> files = new ArrayList<FSDataInputStream>(); FileSystem fs = baseInputDir.getFileSystem(getConf()); try {/*from ww w.ja v a2 s. c o m*/ FileStatus inStat = fs.getFileStatus(baseInputDir); Boolean inputIsDir = inStat.isDir(); LOGGER.info("\n======\n\n\n Input path = " + baseInputDir.toString() + "\n" + " isDir = " + inputIsDir.toString() + "\n\n\n======\n"); if (inputIsDir) { FileStatus[] stats = fs.listStatus(baseInputDir); for (FileStatus fstat : stats) { String filename = fstat.getPath().getName(); String regex = options.getInputFilePattern(); Boolean match = filename.matches(regex); if (fstat.isDir()) { files.addAll(getActionFiles(fstat.getPath())); } else if (fstat.getPath().getName().matches(options.getInputFilePattern())) { //assume a regex was passed in and check for matches files.add(fs.open(fstat.getPath())); } else if ( //assume a simple ".tsv" or other included string was passed in //exclude system files, like the hadoop created files _SUCCEED, .crc's etc. fstat.getPath().getName().contains(options.getInputFilePattern()) && !fstat.isDir() && !fstat.getPath().getName().startsWith("_") && !fstat.getPath().getName().startsWith(".") && !fstat.getPath().getName().startsWith("~")) { files.add(fs.open(fstat.getPath())); } } } else if (// processing a single file as input but exclude system files inStat.getPath().getName().contains(options.getInputFilePattern())) { files.add(fs.open(inStat.getPath())); } else {// doesn't match any input pattern so no input throw new IOException("No input to process at: " + baseInputDir.toString()); } } catch (IOException e) { LOGGER.error("Cannot find base of input file tree for: " + baseInputDir.toString()); throw e; } return files; }
From source file:fm.last.hadoop.utils.seq.SequenceFileUtils.java
License:Apache License
/** * Reads the contents of a SequenceFile into a human-readable String. The number of lines read in will be * limited to the value set by the lineCount variable, if this is >= 0 , all lines will be read. * //w ww. j av a2 s . c o m * @param inputPath Path to the file to be read. * @param lineCount Number of lines to read from the top of the file. * @param conf Configuration object to use to get file system. * @return The contents of the file as a String. * @throws IOException If an error occurs reading the file. */ public static String readSequenceFileTop(Path inputPath, int lineCount, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path[] files = null; FileStatus fileStatus = fs.getFileStatus(inputPath); if (fileStatus.isDir()) { // if a dir is passed in, list contents of each file in dir files = getPathsFromFileStatus(fs.listStatus(inputPath)); } else { // we just have a single file files = new Path[] { inputPath }; } StringBuffer result = new StringBuffer(); for (Path inputFile : files) { fileStatus = fs.getFileStatus(inputFile); if (!fileStatus.isDir()) { // ignore subdirs for now, only process files SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, inputFile, conf); WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); result.append("file: " + inputFile + ", keyClass: " + key.getClass().getName() + ", valueClass: " + value.getClass().getName() + "\n"); int linesRead = 0; while (reader.next(key, value)) { if (lineCount > 0 && linesRead >= lineCount) { break; } String valueString = value.toString(); result.append(key.toString().trim() + "\t" + valueString); // for pretty output, put a newline between records which don't have one if (!valueString.endsWith("\n")) { result.append("\n"); } linesRead++; } } finally { if (reader != null) { reader.close(); } } } } return result.toString(); }
From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java
License:LGPL
@Override public InputStream getData(final DataFile src) throws IOException { final Path path = getPath(src); if (path == null) { throw new NullPointerException("Path to create is null"); }/*from ww w .java 2s . co m*/ if (this.conf == null) { throw new NullPointerException("The configuration object is null"); } final FileSystem fs = path.getFileSystem(this.conf); if (fs == null) { throw new IOException("Unable to create InputSteam, The FileSystem is null"); } final FileStatus fStatus = fs.getFileStatus(path); if (fStatus.isDirectory()) { final List<Path> paths = getPathToConcat(fs, path); if (paths != null && paths.size() > 0) { return new PathConcatInputStream(paths, this.conf); } } return fs.open(path); }
From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.HDFSPathDataProtocol.java
License:LGPL
@Override public List<DataFile> list(final DataFile file) throws IOException { final Path path = getPath(file); if (path == null) { throw new NullPointerException("Path to delete is null"); }//from www . j av a 2 s.c o m if (this.conf == null) { throw new NullPointerException("The configuration object is null"); } final FileSystem fs = path.getFileSystem(this.conf); if (fs == null) { throw new IOException("Unable to delete the file, The FileSystem is null"); } FileStatus fileStatus = fs.getFileStatus(path); if (!fs.exists(path)) { throw new FileNotFoundException("File not found: " + file); } if (!fileStatus.isDirectory()) { throw new IOException("The file is not a directory: " + file); } // List directory final FileStatus[] files = fs.listStatus(path); // Convert the File array to a list of DataFile final List<DataFile> result = new ArrayList<>(files.length); for (FileStatus f : files) { result.add(new DataFile(f.getPath().toUri().toString())); } // Return an unmodifiable list return Collections.unmodifiableList(result); }
From source file:fr.ens.biologie.genomique.eoulsan.data.protocols.PathDataProtocol.java
License:LGPL
@Override public boolean exists(final DataFile src, final boolean followLink) { final Path path = getPath(src); try {//from www.j ava2s. co m final FileSystem fs = path.getFileSystem(conf); final FileStatus status = fs.getFileStatus(path); if (status == null) { return false; } if (status.isSymlink()) { return fs.getFileStatus(fs.getLinkTarget(path)) != null; } return true; } catch (IOException e) { return false; } }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
static private void finalize(final Configuration conf, final JobConf jobconf, final Path destPath, final String presevedAttributes) throws IOException { if (presevedAttributes == null) { return;/*from w w w . java 2 s.co m*/ } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); SequenceFile.Reader in = null; try { in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf); Text dsttext = new Text(); FilePair pair = new FilePair(); for (; in.next(dsttext, pair);) { Path absdst = new Path(destPath, pair.output); updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } finally { checkAndClose(in); } }