List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
public static long getFileSize(FileSystem fs, Path path) throws IOException { return fs.getFileStatus(path).getLen(); }
From source file:com.uber.hoodie.hadoop.HoodieInputFormat.java
License:Apache License
/** * Checks the file status for a race condition which can set the file size to 0. 1. * HiveInputFormat does super.listStatus() and gets back a FileStatus[] 2. Then it creates the * HoodieTableMetaClient for the paths listed. 3. Generation of splits looks at FileStatus size to * create splits, which skips this file// w ww . j a v a 2s. c om */ private HoodieDataFile checkFileStatus(HoodieDataFile dataFile) throws IOException { Path dataPath = dataFile.getFileStatus().getPath(); try { if (dataFile.getFileSize() == 0) { FileSystem fs = dataPath.getFileSystem(conf); LOG.info("Refreshing file status " + dataFile.getPath()); return new HoodieDataFile(fs.getFileStatus(dataPath)); } return dataFile; } catch (IOException e) { throw new HoodieIOException("Could not get FileStatus on path " + dataPath); } }
From source file:com.uber.hoodie.utilities.UtilHelpers.java
License:Apache License
/** * Parse Schema from file/*from w w w . j a v a 2s .com*/ * * @param fs File System * @param schemaFile Schema File */ public static String parseSchema(FileSystem fs, String schemaFile) throws Exception { // Read schema file. Path p = new Path(schemaFile); if (!fs.exists(p)) { throw new Exception(String.format("Could not find - %s - schema file.", schemaFile)); } long len = fs.getFileStatus(p).getLen(); ByteBuffer buf = ByteBuffer.allocate((int) len); try (FSDataInputStream inputStream = fs.open(p)) { inputStream.readFully(0, buf.array(), 0, buf.array().length); } return new String(buf.array()); }
From source file:com.wipro.ats.bdre.dq.DQDriver.java
License:Apache License
@Override public int run(String[] arg) throws Exception { String processId = arg[0];/* w ww . j a v a 2 s . c o m*/ String sPath = arg[1]; String destDir = arg[2]; Properties props = new GetProperties().getProperties(processId, "dq"); LOGGER.debug("props=" + props); Configuration conf = getConf(); conf.set("dq.process.id", processId); Job job = Job.getInstance(conf); job.setJobName("Data Quality " + processId); job.setJarByClass(DQDriver.class); job.setMapperClass(DQMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //Reducer is not required job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); Path inputFilePath = new Path(sPath); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, removeIfExistAndSetOutputPath(conf, destDir)); MultipleOutputs.addNamedOutput(job, DQConstants.GOOD_RECORDS_FILE, TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, DQConstants.BAD_RECORDS_FILE, TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, DQConstants.FILE_REPORT_FILE, TextOutputFormat.class, Text.class, NullWritable.class); if (!job.waitForCompletion(true)) { return 1; } Path outputDir = new Path(destDir); FileSystem srcFs = outputDir.getFileSystem(getConf()); FileSystem destFs = outputDir.getFileSystem(getConf()); //Valid Records Path goodFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_GOOD_RECORD_OUTPUT_DIR); //Input and quality filtered file should have same name (but different path) Path goodDestFile = new Path(destDir + "/" + inputFilePath.getName()); if (srcFs.exists(goodFilesSrcDir)) { FileUtil.copyMerge(srcFs, goodFilesSrcDir, destFs, goodDestFile, true, conf, ""); } // Invalid Records Path badFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_BAD_RECORD_OUTPUT_DIR); Path badDestFile = new Path(destDir + "/" + DQConstants.BAD_RECORDS_FILE); if (srcFs.exists(badFilesSrcDir)) { FileUtil.copyMerge(srcFs, badFilesSrcDir, destFs, badDestFile, true, conf, ""); } // Preparing report aggregation job Job fileReportAggregationJob = Job.getInstance(conf); fileReportAggregationJob.setJobName("File Report Computing " + processId); fileReportAggregationJob.setJarByClass(DQMain.class); fileReportAggregationJob.setMapperClass(DQFileReportMapper.class); fileReportAggregationJob.setMapOutputKeyClass(Text.class); fileReportAggregationJob.setMapOutputValueClass(IntWritable.class); fileReportAggregationJob.setReducerClass(DQFileReportReducer.class); fileReportAggregationJob.setOutputKeyClass(Text.class); fileReportAggregationJob.setOutputValueClass(Text.class); fileReportAggregationJob.setNumReduceTasks(1); Path fileReportDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_REPORT_OUTPUT_DIR); Path fileReportOutputDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER); FileInputFormat.addInputPath(fileReportAggregationJob, fileReportDir); FileOutputFormat.setOutputPath(fileReportAggregationJob, fileReportOutputDir); if (!fileReportAggregationJob.waitForCompletion(true)) { return 1; } // Merge Report Records MR stuffs Path reportsSrcDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER); Path reportsDestFile = new Path(destDir + "/" + DQConstants.FILE_REPORT_FILE); FileUtil.copyMerge(srcFs, reportsSrcDir, destFs, reportsDestFile, true, conf, ""); Path reportDestFile = new Path(outputDir.toString() + "/" + DQConstants.FILE_REPORT_FILE); //Read the report file from HDFS and report the percentage DQStats dqStats = getQualityStats(getConf(), reportDestFile); LOGGER.info("Percentage of good records :" + dqStats.getGoodPercent()); props = new GetProperties().getProperties(processId, "dq"); String strThreshold = props.getProperty("min.pass.threshold.percent"); float threshold = Float.parseFloat(strThreshold); dqStats.setThreshold(threshold); //Update the result in metadata logResult(dqStats, processId, 0L); if (dqStats.getGoodPercent() < threshold) { LOGGER.error("DQ check did not pass"); throw new DQValidationException(dqStats); } LOGGER.info(dqStats); FileChecksum hdfsChecksum = destFs.getFileChecksum(goodDestFile); String fileHash = hdfsChecksum == null ? "0" : hdfsChecksum.toString(); //Return file info oozie params RegisterFileInfo registerFileInfo = new RegisterFileInfo(); registerFileInfo.setBatchId(null); registerFileInfo.setCreationTs(new Timestamp(new Date().getTime())); registerFileInfo.setFileHash(fileHash); registerFileInfo.setFileSize(destFs.getFileStatus(goodDestFile).getLen()); registerFileInfo.setPath(goodDestFile.toString()); registerFileInfo.setSubProcessId(Integer.parseInt(processId)); OozieUtil oozieUtil = new OozieUtil(); oozieUtil.persistBeanData(registerFileInfo, false); return 0; }
From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java
License:Apache License
/** * Constructor/*from ww w . ja va 2 s . co m*/ * @param fs * @param directory * @param create * @param conf * @throws IOException */ public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf) throws IOException { this.fs = fs; this.directory = directory; this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096); if (create) { create(); } boolean isDir = false; try { FileStatus status = fs.getFileStatus(directory); if (status != null) { isDir = status.isDirectory(); } } catch (IOException e) { // file does not exist, isDir already set to false } if (!isDir) { throw new IOException(directory + " is not a directory"); } }
From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java
License:Open Source License
public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException { this.fileSystem = fileSystem; CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf()); codecIfAny = factory.getCodec(srcPath); FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath); if (srcFileStatus.isDirectory()) { // returns FileStatus objects sorted by filename. String partFilenamePattern = "part-?-?????"; if (codecIfAny != null) { partFilenamePattern += codecIfAny.getDefaultExtension(); }// w w w. jav a 2 s.co m Path partPathGlob = new Path(srcPath, partFilenamePattern); partFileStatuses = fileSystem.globStatus(partPathGlob); } else { partFileStatuses = new FileStatus[] { srcFileStatus }; } }
From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSource.java
License:Apache License
public void init(Configuration config, String path, String ext) { this.config = config; this.filePaths = new ArrayList<String>(); Path hdfsPath = new Path(path); FileSystem fs; try {/* www .j a va2s.com*/ fs = FileSystem.get(config); FileStatus fileStat = fs.getFileStatus(hdfsPath); if (fileStat.isDirectory()) { Path filterPath = hdfsPath; if (ext != null) { filterPath = new Path(path.toString(), "*." + ext); } else { filterPath = new Path(path.toString(), "*"); } FileStatus[] filesInDir = fs.globStatus(filterPath); for (int i = 0; i < filesInDir.length; i++) { if (filesInDir[i].isFile()) { filePaths.add(filesInDir[i].getPath().toString()); } } } else { this.filePaths.add(path); } } catch (IOException ioe) { throw new RuntimeException("Failed getting list of files at:" + path, ioe); } this.currentIndex = -1; }
From source file:com.yahoo.storm.yarn.Util.java
License:Open Source License
static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type, LocalResourceVisibility vis) throws IOException { Path qualified = fs.makeQualified(path); FileStatus status = fs.getFileStatus(qualified); LocalResource resource = Records.newRecord(LocalResource.class); resource.setType(type);//from w ww .j a v a 2s . co m resource.setVisibility(vis); resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified)); resource.setTimestamp(status.getModificationTime()); resource.setSize(status.getLen()); return resource; }
From source file:com.yahoo.storm.yarn.Util.java
License:Open Source License
/** * Checks for a given path whether the Other permissions on it * imply the permission in the passed FsAction * @param fs//from w ww . j a v a 2 s .c om * @param path * @param action * @return true if the path in the uri is visible to all, false otherwise * @throws IOException */ private static boolean checkPermissionOfOther(FileSystem fs, Path path, FsAction action) throws IOException { FileStatus status = fs.getFileStatus(path); FsPermission perms = status.getPermission(); FsAction otherAction = perms.getOtherAction(); if (otherAction.implies(action)) { return true; } return false; }
From source file:com.yolodata.tbana.hadoop.mapred.shuttl.ShuttlCSVInputFormat.java
License:Open Source License
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); FileSystem fs = FileSystem.get(job); SplunkDataQuery dataQuery = SplunkDataQueryFactory.createWithJobConf(job); ShuttlCsvFileFinder fileFinder = new ShuttlCsvFileFinder(fs, getInputPaths(job)[0]); List<String> csvPaths = fileFinder.findFiles(dataQuery); long currentOffset = 0; for (String p : csvPaths) { FileStatus csvFile = fs.getFileStatus(new Path(p)); List<CsvSplit> fileSplits = getSplitsForFile(csvFile, job, numSplits, currentOffset); currentOffset += csvFile.getLen(); splits.addAll(fileSplits);//from ww w . j av a2 s .c o m } if (splits.size() > 0) ((CsvSplit) splits.get(0)).setSkipHeader(false); return splits.toArray(new InputSplit[splits.size()]); }