List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.intel.hibench.datagen.streaming.util.SourceFileReader.java
License:Apache License
static private InputStream openMultipleParts(FileSystem fs, Path pt, long offset) throws IOException { System.out.println("opening all parts in path: " + pt + ", from offset: " + offset); // list all files in given path RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false); Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>(); while (rit.hasNext()) { Path path = rit.next().getPath(); // Only read those files start with "part-" if (path.getName().startsWith("part-")) { long fileSize = fs.getFileStatus(path).getLen(); if (offset < fileSize) { FSDataInputStream inputStream = fs.open(path); if (offset > 0) { inputStream.seek(offset); }// www . jav a2 s . c o m fileHandleList.add(inputStream); } offset -= fileSize; } } if (!fileHandleList.isEmpty()) { return new SequenceInputStream(fileHandleList.elements()); } else { System.err.println("Error, no source file loaded. run genSeedDataset.sh first!"); return null; } }
From source file:com.intel.hibench.streambench.FileDataGenNew.java
License:Apache License
private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset) throws IOException { System.out.println("Opening files, path:" + pt + " offset:" + offset); RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false); Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>(); while (rit.hasNext()) { Path path = rit.next().getPath(); String filename = path.toString().substring(path.getParent().toString().length(), path.toString().length()); if (filename.startsWith("/part-")) { long filesize = fs.getFileStatus(path).getLen(); if (offset < filesize) { FSDataInputStream handle = fs.open(path); if (offset > 0) { handle.seek(offset); }/* ww w .j av a 2 s.c o m*/ fileHandleList.add(handle); } offset -= filesize; } } if (fileHandleList.size() == 1) return fileHandleList.get(0); else if (fileHandleList.size() > 1) { Enumeration<FSDataInputStream> enu = fileHandleList.elements(); return new SequenceInputStream(enu); } else { System.err.println("Error, no source file loaded. run genSeedDataset.sh first!"); return null; } }
From source file:com.jeffy.hdfs.FileMetaData.java
License:Apache License
public static void showFileStatusForFile(String path) { Configuration config = new Configuration(); try {/* w w w . j a v a 2 s . com*/ FileSystem fs = FileSystem.get(URI.create(path), config); FileStatus stat = fs.getFileStatus(new Path(path)); System.out.println("File URI: " + stat.getPath().toUri().getPath()); System.out.println("Is directory: " + stat.isDirectory()); System.out.println("File length: " + stat.getLen()); System.out.println("Modification Time: " + new Date(stat.getModificationTime())); System.out.println("File replications: " + stat.getReplication()); System.out.println("File Block Size: " + (stat.getBlockSize() >>> 10 >>> 10) + " MB"); System.out.println("File Owner: " + stat.getOwner()); System.out.println("File Group: " + stat.getGroup()); System.out.println("File Permission: " + stat.getPermission().toString()); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** * Initialize ExecFilesMapper specific job-configuration. * * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments// w w w . j a va 2 s .c o m * @return true if it is necessary to launch a job. */ private static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); jobConf.set(EXEC_CMD_LABEL, args.execCmd); //set boolean values jobConf.setBoolean(Options.REDIRECT_ERROR_TO_OUT.propertyname, args.flags.contains(Options.REDIRECT_ERROR_TO_OUT)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path stagingArea; try { stagingArea = JobSubmissionFiles.getStagingDir(jClient, conf); } catch (InterruptedException e) { throw new IOException(e); } Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(FileSystem.get(jobDirectory.toUri(), conf), jobDirectory, mapredSysPerms); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_" + NAME + "_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_" + NAME + "_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_" + NAME + "_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_" + NAME + "_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory. final boolean special = (args.srcs.size() == 1 && !dstExists); int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_" + NAME + "_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_" + NAME + "_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("sourcePathsCount=" + srcCount); LOG.info("filesToExecCount=" + fileCount); LOG.info("bytesToExecCount=" + StringUtils.humanReadableInt(byteCount)); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(fileCount, jobConf); return fileCount > 0; }
From source file:com.knewton.mapreduce.SSTableRecordReader.java
License:Apache License
/** * Copies a remote path to the local filesystem, while updating hadoop that we're making * progress. Doesn't support directories. *//*from w w w . ja va 2 s . c o m*/ @VisibleForTesting void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException { // don't support transferring from remote directories FileStatus remoteStat = remoteFS.getFileStatus(remote); Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote)); // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do if (localFS.exists(local)) { FileStatus localStat = localFS.getFileStatus(local); if (localStat.isDirectory()) { local = new Path(local, remote.getName()); } } long remoteFileSize = remoteStat.getLen(); // do actual copy InputStream in = null; OutputStream out = null; try { long startTime = System.currentTimeMillis(); long lastLogTime = 0; long bytesCopied = 0; in = remoteFS.open(remote); out = localFS.create(local, true); int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT); byte[] buf = new byte[buffSize]; int bytesRead = in.read(buf); while (bytesRead >= 0) { long now = System.currentTimeMillis(); // log transfer rate once per min, starting 1 min after transfer began if (now - lastLogTime > 60000L && now - startTime > 60000L) { double elapsedSec = (now - startTime) / 1000D; double bytesPerSec = bytesCopied / elapsedSec; LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize, bytesPerSec); lastLogTime = now; } this.ctx.progress(); out.write(buf, 0, bytesRead); bytesCopied += bytesRead; bytesRead = in.read(buf); } // try to close these outside of finally so we receive exception on failure out.close(); out = null; in.close(); in = null; } finally { // make sure everything's closed IOUtils.closeStream(out); IOUtils.closeStream(in); } }
From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java
License:Apache License
/** * Tests to see if tables can be correctly copied locally *///w ww . j a va 2 s. c o m @Test public void testCopyTablesToLocal() throws Exception { TaskAttemptContext context = getTaskAttemptContext(true, true, true); ssTableColumnRecordReader.initialize(inputSplit, context); doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class)); FileSystem remoteFS = mock(FileSystem.class); FileSystem localFS = mock(FileSystem.class); byte[] data = new byte[] { 0xA }; FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data)); FSDataOutputStream fsOut = mock(FSDataOutputStream.class); when(remoteFS.open(any(Path.class))).thenReturn(fsIn); when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut); Path dataTablePath = inputSplit.getPath(); FileStatus fileStatus = mock(FileStatus.class); when(fileStatus.getLen()).thenReturn(10L); when(fileStatus.isDirectory()).thenReturn(false); when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus); ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context); verify(remoteFS).getFileStatus(dataTablePath); ssTableColumnRecordReader.close(); verify(fsOut).write(any(byte[].class), eq(0), eq(data.length)); assertEquals(2, ssTableColumnRecordReader.getComponentSize()); }
From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java
License:Apache License
/** * Tests to see if tables can be correctly copied locally including the compression info table *///from w w w. ja va 2s . c om @Test public void testCopyTablesToLocalWithCompressionInfo() throws Exception { TaskAttemptContext context = getTaskAttemptContext(true, true, true); ssTableColumnRecordReader.initialize(inputSplit, context); doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class), any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class)); FileSystem remoteFS = mock(FileSystem.class); FileSystem localFS = mock(FileSystem.class); byte[] data = new byte[] { 0xA }; FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data)); FSDataOutputStream fsOut = mock(FSDataOutputStream.class); when(remoteFS.open(any(Path.class))).thenReturn(fsIn); when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut); Path dataTablePath = inputSplit.getPath(); FileStatus fileStatus = mock(FileStatus.class); when(fileStatus.getLen()).thenReturn(10L); when(fileStatus.isDirectory()).thenReturn(false); when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus); String str = ssTableColumnRecordReader.getDescriptor().filenameFor(Component.COMPRESSION_INFO); when(remoteFS.exists(new Path(str))).thenReturn(true); ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context); verify(remoteFS).getFileStatus(dataTablePath); ssTableColumnRecordReader.close(); verify(fsOut).write(any(byte[].class), eq(0), eq(data.length)); assertEquals(3, ssTableColumnRecordReader.getComponentSize()); }
From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java
License:Apache License
/** * Makes sure an exception is thrown when the data table path is a directory *///from ww w . j a va2 s .co m @Test(expected = IllegalArgumentException.class) public void testCopyToLocalFileWithDirectoryPath() throws Exception { FileSystem remoteFS = mock(FileSystem.class); FileSystem localFS = mock(FileSystem.class); Path dataTablePath = inputSplit.getPath(); Path localPath = new Path("local/path"); FileStatus fileStatus = mock(FileStatus.class); when(fileStatus.getLen()).thenReturn(10L); when(fileStatus.isDirectory()).thenReturn(true); when(remoteFS.getFileStatus(dataTablePath)).thenReturn(fileStatus); ssTableColumnRecordReader.copyToLocalFile(remoteFS, localFS, dataTablePath, localPath); }
From source file:com.kylinolap.dict.DictionaryManager.java
License:Apache License
private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException { InputStream in = this.getClass().getResourceAsStream("/com/kylinolap/dict/" + dataSetName + ".txt"); if (in == null) // data set resource not found return null; ByteArrayOutputStream buf = new ByteArrayOutputStream(); IOUtils.copy(in, buf);/*ww w. j ava2s.c om*/ in.close(); byte[] bytes = buf.toByteArray(); Path tmpDataSetPath = new Path( tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt"); FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir); boolean writtenNewFile = false; if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) { fs.mkdirs(tmpDataSetPath.getParent()); FSDataOutputStream out = fs.create(tmpDataSetPath); IOUtils.copy(new ByteArrayInputStream(bytes), out); out.close(); writtenNewFile = true; } String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString(); if (writtenNewFile) logger.info("Dictionary temp data set file written to " + qualifiedPath); return qualifiedPath; }
From source file:com.kylinolap.dict.lookup.FileTable.java
License:Apache License
@Override public TableSignature getSignature() throws IOException { FileSystem fs = HadoopUtil.getFileSystem(path); FileStatus status = fs.getFileStatus(new Path(path)); return new TableSignature(path, status.getLen(), status.getModificationTime()); }