List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.yolodata.tbana.hadoop.mapred.shuttl.ShuttlCSVInputFormat.java
License:Open Source License
private static List<CsvSplit> getFileSplitsFast(int numSplits, Path filePath, FileSystem fs, long startKey) { List<CsvSplit> splits = new ArrayList<CsvSplit>(); try {//from w w w . ja va2 s .c o m long fileSize = fs.getFileStatus(filePath).getLen(); long sizePerSplit = fileSize / (numSplits + 1); long start = 0, end = 0; while (end < fileSize) { end = start + sizePerSplit; if (restOfFileChunkFitsInOneSplit(fileSize, end)) { splits.add(new CsvSplit(filePath, start, end, startKey, true)); break; } // Seek to current length and find the new line FSDataInputStream in = fs.open(filePath); end = findEndOfLinePosition(in, end); splits.add(new CsvSplit(filePath, start, end, startKey, true)); start = end; } } catch (IOException e) { e.printStackTrace(); } return splits; }
From source file:com.yolodata.tbana.testutils.FileSystemTestUtils.java
License:Open Source License
public static Path createEmptyFile(FileSystem fs, String extension) throws IOException { Path file = new Path(FileTestUtils.getRandomTestFilepath().concat("." + extension)); assertTrue(fs.createNewFile(file));//ww w. j av a 2 s. c o m return fs.getFileStatus(file).getPath(); }
From source file:com.yolodata.tbana.testutils.FileSystemTestUtils.java
License:Open Source License
public static Path createEmptyFile(FileSystem fs, Path location, String extension) throws IOException { Path file = HadoopFileTestUtils.createPath(location.toString(), FileTestUtils.getRandomFilename(extension)); assertTrue(fs.createNewFile(file));//from www. j a va 2 s . c om return fs.getFileStatus(file).getPath(); }
From source file:com.yolodata.tbana.testutils.FileSystemTestUtils.java
License:Open Source License
public static Path createEmptyDir(FileSystem fileSystem) throws IOException { Path dir = new Path(FileTestUtils.getRandomTestFilepath()); assertTrue(fileSystem.mkdirs(dir));// w w w . jav a2 s. c o m return fileSystem.getFileStatus(dir).getPath(); }
From source file:com.yolodata.tbana.testutils.FileSystemTestUtils.java
License:Open Source License
public static Path createEmptyDir(FileSystem fileSystem, Path directory, String directoryName) throws IOException { Path dir = HadoopFileTestUtils.createPath(directory.toString(), directoryName); assertTrue(fileSystem.mkdirs(dir));//w w w .j a v a2 s . co m return fileSystem.getFileStatus(dir).getPath(); }
From source file:com.yolodata.tbana.util.search.filter.ModifiedTimeFilterTest.java
License:Open Source License
private Path createMockPathWithModifiedTime(FileSystem fs, long modifiedTime) throws IOException { Path anyPath = new Path("anyPath"); FileStatus status = new FileStatus(0, false, 0, 0, modifiedTime, anyPath); when(fs.getFileStatus(anyPath)).thenReturn(status); return anyPath; }
From source file:com.yss.util.YarnUtil.java
License:Open Source License
public static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type, LocalResourceVisibility vis) throws IOException { Path qualified = fs.makeQualified(path); FileStatus status = fs.getFileStatus(qualified); LocalResource resource = Records.newRecord(LocalResource.class); resource.setType(type);//from ww w . j ava2 s .c om resource.setVisibility(vis); resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified)); resource.setTimestamp(status.getModificationTime()); resource.setSize(status.getLen()); return resource; }
From source file:com.zhangyue.zeus.service.impl.ExportDataServiceImpl.java
License:Open Source License
@Override public List<String> readResultData(String hdfsResultLocation, boolean isDownload) { List<String> resultList = new ArrayList<String>(); int readedLines = 0; String temp = null;// w ww . ja v a 2 s . co m try { Path hdfsPath = new Path(hdfsResultLocation); HiveConf hiveConf = new HiveConf(SessionState.class); FileSystem fs = hdfsPath.getFileSystem(hiveConf); FileStatus[] fss = fs.listStatus(hdfsPath); // ??-60000 ?-200 int numberLimit = (isDownload == true) ? Constants.HIVE_RESULT_DOWNLOAD_LIMIT : Constants.HIVE_RESULT_MAX_LIMIT; for (FileStatus fileStatus : fss) { Path fsPath = fileStatus.getPath(); if (readedLines >= numberLimit || fs.getFileStatus(fsPath).isDir()) { break; } BufferedReader bf = new BufferedReader( new InputStreamReader(fs.open(fsPath), Constants.UTF_ENCODING)); while ((temp = bf.readLine()) != null) { if (readedLines >= numberLimit) { break; } resultList.add(temp); readedLines++; } bf.close(); } FileSystem.closeAll(); } catch (Exception e) { LOG.error("don't had your result", e); } return resultList; }
From source file:com.zjy.mongo.output.MongoOutputCommitter.java
License:Apache License
@Override public void commitTask(final TaskAttemptContext taskContext) throws IOException { LOG.info("Committing task."); collections = MongoConfigUtil.getOutputCollections(taskContext.getConfiguration()); numberOfHosts = collections.size();/* ww w .j av a 2 s . c o m*/ // Get temporary file. Path tempFilePath = getTaskAttemptPath(taskContext); LOG.info("Committing from temporary file: " + tempFilePath.toString()); long filePos = 0, fileLen; FSDataInputStream inputStream = null; try { FileSystem fs = FileSystem.get(taskContext.getConfiguration()); inputStream = fs.open(tempFilePath); fileLen = fs.getFileStatus(tempFilePath).getLen(); } catch (IOException e) { LOG.error("Could not open temporary file for committing", e); cleanupAfterCommit(inputStream, taskContext); throw e; } int maxDocs = MongoConfigUtil.getBatchSize(taskContext.getConfiguration()); int curBatchSize = 0; DBCollection coll = getDbCollectionByRoundRobin(); BulkWriteOperation bulkOp = coll.initializeOrderedBulkOperation(); // Read Writables out of the temporary file. BSONWritable bw = new BSONWritable(); MongoUpdateWritable muw = new MongoUpdateWritable(); while (filePos < fileLen) { try { // Determine writable type, and perform corresponding operation // on MongoDB. int mwType = inputStream.readInt(); if (MongoWritableTypes.BSON_WRITABLE == mwType) { bw.readFields(inputStream); bulkOp.insert(new BasicDBObject(bw.getDoc().toMap())); } else if (MongoWritableTypes.MONGO_UPDATE_WRITABLE == mwType) { muw.readFields(inputStream); DBObject query = new BasicDBObject(muw.getQuery().toMap()); DBObject modifiers = new BasicDBObject(muw.getModifiers().toMap()); if (muw.isMultiUpdate()) { if (muw.isUpsert()) { bulkOp.find(query).upsert().update(modifiers); } else { bulkOp.find(query).update(modifiers); } } else { if (muw.isUpsert()) { bulkOp.find(query).upsert().updateOne(modifiers); } else { bulkOp.find(query).updateOne(modifiers); } } } else { throw new IOException("Unrecognized type: " + mwType); } filePos = inputStream.getPos(); // Write to MongoDB if the batch is full, or if this is the last // operation to be performed for the Task. if (++curBatchSize >= maxDocs || filePos >= fileLen) { try { bulkOp.execute(); } catch (MongoException e) { LOG.error("Could not write to MongoDB", e); throw e; } coll = getDbCollectionByRoundRobin(); bulkOp = coll.initializeOrderedBulkOperation(); curBatchSize = 0; // Signal progress back to Hadoop framework so that we // don't time out. taskContext.progress(); } } catch (IOException e) { LOG.error("Error reading from temporary file", e); throw e; } } cleanupAfterCommit(inputStream, taskContext); }
From source file:com.zjy.mongo.splitter.BSONSplitter.java
License:Apache License
/** * Load splits from a splits file./*ww w .jav a 2 s .c o m*/ * * @param inputFile the file whose splits are contained in the splits file. * @param splitFile the Path to the splits file. * @throws NoSplitFileException if the splits file is not found. * @throws IOException when an error occurs reading from the file. */ public void loadSplitsFromSplitFile(final FileStatus inputFile, final Path splitFile) throws NoSplitFileException, IOException { ArrayList<BSONFileSplit> splits = new ArrayList<BSONFileSplit>(); FileSystem fs = splitFile.getFileSystem(getConf()); // throws IOException FileStatus splitFileStatus; FSDataInputStream fsDataStream = null; try { try { splitFileStatus = fs.getFileStatus(splitFile); LOG.info("Found split file at : " + splitFileStatus); } catch (Exception e) { throw new NoSplitFileException(); } fsDataStream = fs.open(splitFile); // throws IOException while (fsDataStream.getPos() < splitFileStatus.getLen()) { callback.reset(); bsonDec.decode(fsDataStream, callback); BSONObject splitInfo = (BSONObject) callback.get(); splits.add(createFileSplitFromBSON(splitInfo, fs, inputFile)); } } finally { if (null != fsDataStream) { fsDataStream.close(); } } splitsList = splits; }