List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java
License:Apache License
public static void moveContentsAndDelete(Path srcDir, Path dstDir, FileSystem fs, Logger logger) throws IOException { if (!fs.exists(srcDir)) { return;/*from www .j av a 2 s.co m*/ } if (fs.exists(srcDir) && !fs.isDirectory(srcDir)) { throw new IllegalArgumentException(srcDir + " is not a directory"); } if (fs.exists(dstDir) && !fs.isDirectory(dstDir)) { throw new IllegalArgumentException(dstDir + " is not a directory"); } if (logger.isDebugEnabled()) { logger.debug("Moving contents of: " + srcDir + " to: " + dstDir); } FileStatus[] files = fs.listStatus(srcDir); for (FileStatus file : files) { Path sourcePath = file.getPath(); Path targetPath = new Path(dstDir, file.getPath().getName()); if (logger.isDebugEnabled()) { logger.debug("Moving: " + sourcePath + " to: " + targetPath); } if (!fs.mkdirs(targetPath.getParent())) { throw new IOException("Failed at creating directory " + targetPath.getParent()); } if (!fs.rename(sourcePath, targetPath)) { throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath); } } fs.delete(srcDir); }
From source file:com.liveramp.hank.hadoop.DomainBuilderOutputCommitter.java
License:Apache License
private static void copyPartitionsFrom(Path sourceDir, FileSystem fs, Set<Integer> copiedPartitions, List<MoveContentsAndDeleteTask> tasks, ExecutorService executor, Path outputPath) throws IOException { for (FileStatus partition : fs.listStatus(sourceDir)) { if (!IGNORE_PATHS.contains(partition.getPath().getName()) && partition.isDir()) { int partitionNumber = Integer.valueOf(partition.getPath().getName()); if (!copiedPartitions.contains(partitionNumber)) { copiedPartitions.add(partitionNumber); MoveContentsAndDeleteTask task = new MoveContentsAndDeleteTask(partition.getPath(), new Path(outputPath, partition.getPath().getName()), fs); tasks.add(task);/*ww w . ja v a 2s. c o m*/ executor.execute(task); } } } }
From source file:com.marklogic.mapreduce.ForestInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> files = listStatus(job); for (FileStatus file : files) { // stand directories Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); FileStatus children[] = fs.listStatus(path); FileStatus treeIndexStatus = null, treeDataStatus = null, ordinalsStatus = null, timestampsStatus = null; boolean obsolete = false; for (FileStatus child : children) { String fileName = child.getPath().getName(); if (fileName.equals("TreeData")) { // inside a stand treeDataStatus = child;/*from w w w .ja v a 2 s.c o m*/ } else if (fileName.equals("TreeIndex")) { treeIndexStatus = child; } else if (fileName.equals("Ordinals")) { ordinalsStatus = child; } else if (fileName.equals("Timestamps")) { timestampsStatus = child; } else if (fileName.equals("Obsolete")) { obsolete = true; break; } } if (obsolete) { LOG.warn("Obsolete file found. The forest is either live or isn't " + "dismounted cleanly. Ignoring stand " + path); break; } if (treeDataStatus == null) { throw new RuntimeException("TreeData file not found."); } else if (treeIndexStatus == null) { throw new RuntimeException("TreeIndex file not found."); } else if (ordinalsStatus == null) { throw new RuntimeException("Ordinals file not found."); } else if (timestampsStatus == null) { throw new RuntimeException("Timestamps file not found."); } long treeDataSize = treeDataStatus.getLen(); if (treeDataSize == 0) { // unexpected, give up this stand LOG.warn("Found empty TreeData file. Skipping..."); continue; // skipping this stand } Path treeDataPath = treeDataStatus.getPath(); long blockSize = treeDataStatus.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); // make splits based on TreeIndex FSDataInputStream is = fs.open(treeIndexStatus.getPath()); BiendianDataInputStream in = new BiendianDataInputStream(is); int prevDocid = -1, docid = -1, position = 0; long prevOffset = -1L, offset = 0, splitStart = 0; BlockLocation[] blkLocations = fs.getFileBlockLocations(treeDataStatus, 0, treeDataSize); try { for (;; ++position) { try { docid = in.readInt(); in.readInt(); offset = in.readLong(); } catch (EOFException e) { break; } int comp = InternalUtilities.compareUnsignedLong(offset, treeDataSize); if (comp > 0) { throw new RuntimeException("TreeIndex offset is out of bound: position = " + position + ", offset = " + offset + ", treeDataSize = " + treeDataSize); } if (prevDocid != -1 && (docid & 0xffffffffL) <= (prevDocid & 0xffffffffL)) { throw new RuntimeException("docid out of order, position = " + position + ", docid = " + docid + ", prevDocid = " + prevDocid); } prevDocid = docid; if (prevOffset != -1L && InternalUtilities.compareUnsignedLong(offset, prevOffset) <= 0) { throw new RuntimeException("offset out of order, position = " + position + ", offset = " + offset + ", prevOffset = " + prevOffset); } long splitLen = offset - splitStart; if (splitLen == splitSize || (splitLen > splitSize && splitLen - splitSize <= splitSize - (prevOffset - splitStart))) { int blkIndex = getBlockIndex(blkLocations, offset); InputSplit split = new FileSplit(treeDataPath, splitStart, splitLen, blkLocations[blkIndex].getHosts()); if (LOG.isDebugEnabled()) { LOG.debug("Created split: start=" + splitStart + " len=" + splitLen + " last docid=" + docid); } splits.add(split); splitStart = offset; } else if (splitLen > splitSize) { int blkIndex = getBlockIndex(blkLocations, prevOffset); InputSplit split = new FileSplit(treeDataPath, splitStart, prevOffset - splitStart, blkLocations[blkIndex].getHosts()); if (LOG.isDebugEnabled()) { LOG.debug("Created split: start=" + splitStart + " len=" + (prevOffset - splitStart) + " last docid=" + docid); } splits.add(split); splitStart = prevOffset; } } } finally { in.close(); } if (offset > splitStart) { int blkIndex = getBlockIndex(blkLocations, offset - 1); InputSplit split = new FileSplit(treeDataPath, splitStart, offset - splitStart, blkLocations[blkIndex].getHosts()); if (LOG.isDebugEnabled()) { LOG.debug("Created split: start=" + splitStart + " len=" + (offset - splitStart) + " last docid=" + docid); } splits.add(split); } } if (LOG.isDebugEnabled()) { LOG.debug("Made " + splits.size() + " splits."); } return splits; }
From source file:com.marklogic.mapreduce.test.FCheck.java
License:Apache License
public void checkTreeIndex(File dir) throws IOException { File file = new File(dir, "TreeIndex"); if (verbose)/*from w ww .java2s. c o m*/ System.out.println(file.getAbsolutePath() + " -> checkTreeIndex"); // BiendianDataInputStream in = openFile(file, 1 << 18); Path path = new Path(dir.getAbsolutePath()); FileSystem fs = path.getFileSystem(new Configuration()); FileStatus children[] = fs.listStatus(path); FileStatus treeIndexStatus = null, treeDataStatus = null; for (FileStatus child : children) { String fileName = child.getPath().getName(); if (fileName.equals("TreeData")) { // inside a stand treeDataStatus = child; } else if (fileName.equals("TreeIndex")) { treeIndexStatus = child; } if (treeDataStatus != null && treeIndexStatus != null) { break; } } if (treeDataStatus == null) { throw new RuntimeException("TreeData file not found."); } else if (treeIndexStatus == null) { throw new RuntimeException("TreeIndex file not found."); } long treeDataSize = treeDataStatus.getLen(); if (treeDataSize == 0) { // unexpected, give up this stand System.err.println("Found empty TreeData file. Skipping..."); return; } FSDataInputStream is = fs.open(treeIndexStatus.getPath()); BiendianDataInputStream in = new BiendianDataInputStream(is); in.setLittleEndian(littleEndian); int prevDocid = -1; long prevOffset = -1L; int position = 0; int docid; long offset; for (;; ++position) { try { docid = in.readInt(); in.readInt(); offset = in.readLong(); } catch (EOFException e) { break; } if (debug) { System.out.println(String.format("TreeIndex p %08x d %08x o %016x", position, docid, offset)); } if (compareUnsignedLong(offset, treeDataSize) >= 0) { panic(file, String.format("offset out of range, position=%d, offset=%d, treeDataSize=%d", position, offset, treeDataSize)); } if (prevDocid != -1 && (docid & 0xffffffffL) <= (prevDocid & 0xffffffffL)) { panic(file, String.format("docid out of order, position=%d, docid=%d, prevDocid=%d", position, docid, prevDocid)); } prevDocid = docid; if (prevOffset != -1L && compareUnsignedLong(offset, prevOffset) <= 0) { panic(file, String.format("offset out of order, position=%d, offset=%d, prevOffset=%d", position, offset, prevOffset)); } prevOffset = offset; } if (verbose) System.out.println(file.getAbsolutePath() + " <- checkTreeIndex [" + position + "]"); }
From source file:com.maxpoint.cascading.avro.AvroScheme.java
License:Open Source License
private void retrieveSchema(FlowProcess<JobConf> flowProcess, Tap tap) { try {//from w w w . j a va2s . c o m if (tap instanceof CompositeTap) tap = (Tap) ((CompositeTap) tap).getChildTaps().next(); final String file = tap.getIdentifier(); Path p = new Path(file); Configuration conf = new Configuration(); final FileSystem fs = p.getFileSystem(conf); for (FileStatus status : fs.listStatus(p)) { p = status.getPath(); // no need to open them all InputStream stream = new BufferedInputStream(fs.open(p)); DataFileStream reader = new DataFileStream(stream, new ReflectDatumReader()); dataSchema = reader.getSchema(); retrieveSourceFields(tap); return; } throw new RuntimeException("no schema found in " + file); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.mellanox.r4h.DistributedFileSystem.java
License:Apache License
/** * List all the entries of a directory// ww w. ja va 2 s. c o m * * Note that this operation is not atomic for a large directory. * The entries of a directory may be fetched from NameNode multiple times. * It only guarantees that each name occurs once if a directory * undergoes changes between the calls. */ @Override public FileStatus[] listStatus(Path p) throws IOException { Path absF = fixRelativePart(p); return new FileSystemLinkResolver<FileStatus[]>() { @Override public FileStatus[] doCall(final Path p) throws IOException, UnresolvedLinkException { return listStatusInternal(p); } @Override public FileStatus[] next(final FileSystem fs, final Path p) throws IOException { return fs.listStatus(p); } }.resolve(this, absF); }
From source file:com.metamx.druid.indexer.DbUpdaterJob.java
License:Open Source License
@Override public boolean run() { final Configuration conf = new Configuration(); ImmutableList.Builder<DataSegment> publishedSegmentsBuilder = ImmutableList.builder(); for (String propName : System.getProperties().stringPropertyNames()) { if (propName.startsWith("hadoop.")) { conf.set(propName.substring("hadoop.".length()), System.getProperty(propName)); }/* w ww. j a v a 2s . c o m*/ } final Path descriptorInfoDir = config.makeDescriptorInfoDir(); try { FileSystem fs = descriptorInfoDir.getFileSystem(conf); for (FileStatus status : fs.listStatus(descriptorInfoDir)) { final DataSegment segment = jsonMapper.readValue(fs.open(status.getPath()), DataSegment.class); dbi.withHandle(new HandleCallback<Void>() { @Override public Void withHandle(Handle handle) throws Exception { handle.createStatement(String.format( "INSERT INTO %s (id, dataSource, created_date, start, end, partitioned, version, used, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", spec.getSegmentTable())).bind("id", segment.getIdentifier()) .bind("dataSource", segment.getDataSource()) .bind("created_date", new DateTime().toString()) .bind("start", segment.getInterval().getStart().toString()) .bind("end", segment.getInterval().getEnd().toString()) .bind("partitioned", segment.getShardSpec().getPartitionNum()) .bind("version", segment.getVersion()).bind("used", true) .bind("payload", jsonMapper.writeValueAsString(segment)).execute(); return null; } }); publishedSegmentsBuilder.add(segment); log.info("Published %s", segment.getIdentifier()); } } catch (IOException e) { throw Throwables.propagate(e); } publishedSegments = publishedSegmentsBuilder.build(); return true; }
From source file:com.metamx.druid.indexer.hadoop.FSSpideringIterator.java
License:Open Source License
public static FSSpideringIterator spiderPathPropogateExceptions(FileSystem fs, Path path) { try {// w w w .j ava 2 s . co m final FileStatus[] statii = fs.listStatus(path); return new FSSpideringIterator(fs, statii == null ? new FileStatus[] {} : statii); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.metamx.milano.pig.MilanoLoadFunc.java
License:Apache License
private void getPaths(Path baseDirectory, Set<Path> paths, FileSystem fileSystem) throws IOException { FileStatus[] files = fileSystem.listStatus(baseDirectory); for (FileStatus file : files) { Path path = file.getPath(); FileStatus fileStatus = fileSystem.getFileStatus(path); if (fileStatus.isDir()) { getPaths(path, paths, fileSystem); } else {/*from www . j a v a 2s . co m*/ paths.add(baseDirectory); } } }
From source file:com.metamx.milano.pig.MilanoLoadFunc.java
License:Apache License
private Path getFilePath(Path path, FileSystem fileSystem) throws IOException { Path newPath = null;/*from w w w . j a v a 2 s. co m*/ FileStatus[] files = fileSystem.listStatus(path); for (FileStatus file : files) { if (file.isDir()) { newPath = getFilePath(file.getPath(), fileSystem); if (newPath != null) { break; } } else { newPath = file.getPath(); break; } } return newPath; }