Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

public static long getFileSize(FileSystem fs, Path path) throws IOException {
    return fs.getFileStatus(path).getLen();
}

From source file:com.uber.hoodie.hadoop.HoodieInputFormat.java

License:Apache License

/**
 * Checks the file status for a race condition which can set the file size to 0. 1.
 * HiveInputFormat does super.listStatus() and gets back a FileStatus[] 2. Then it creates the
 * HoodieTableMetaClient for the paths listed. 3. Generation of splits looks at FileStatus size to
 * create splits, which skips this file//  w ww .  j a v  a 2s. c  om
 */
private HoodieDataFile checkFileStatus(HoodieDataFile dataFile) throws IOException {
    Path dataPath = dataFile.getFileStatus().getPath();
    try {
        if (dataFile.getFileSize() == 0) {
            FileSystem fs = dataPath.getFileSystem(conf);
            LOG.info("Refreshing file status " + dataFile.getPath());
            return new HoodieDataFile(fs.getFileStatus(dataPath));
        }
        return dataFile;
    } catch (IOException e) {
        throw new HoodieIOException("Could not get FileStatus on path " + dataPath);
    }
}

From source file:com.uber.hoodie.utilities.UtilHelpers.java

License:Apache License

/**
 * Parse Schema from file/*from  w w  w . j a v a  2s  .com*/
 *
 * @param fs         File System
 * @param schemaFile Schema File
 */
public static String parseSchema(FileSystem fs, String schemaFile) throws Exception {
    // Read schema file.
    Path p = new Path(schemaFile);
    if (!fs.exists(p)) {
        throw new Exception(String.format("Could not find - %s - schema file.", schemaFile));
    }
    long len = fs.getFileStatus(p).getLen();
    ByteBuffer buf = ByteBuffer.allocate((int) len);
    try (FSDataInputStream inputStream = fs.open(p)) {
        inputStream.readFully(0, buf.array(), 0, buf.array().length);
    }
    return new String(buf.array());
}

From source file:com.wipro.ats.bdre.dq.DQDriver.java

License:Apache License

@Override
public int run(String[] arg) throws Exception {
    String processId = arg[0];/* w ww .  j  a v a  2 s . c  o m*/
    String sPath = arg[1];
    String destDir = arg[2];

    Properties props = new GetProperties().getProperties(processId, "dq");
    LOGGER.debug("props=" + props);
    Configuration conf = getConf();

    conf.set("dq.process.id", processId);
    Job job = Job.getInstance(conf);
    job.setJobName("Data Quality " + processId);
    job.setJarByClass(DQDriver.class);
    job.setMapperClass(DQMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    //Reducer is not required
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    Path inputFilePath = new Path(sPath);
    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, removeIfExistAndSetOutputPath(conf, destDir));
    MultipleOutputs.addNamedOutput(job, DQConstants.GOOD_RECORDS_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, DQConstants.BAD_RECORDS_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, DQConstants.FILE_REPORT_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);

    if (!job.waitForCompletion(true)) {
        return 1;
    }

    Path outputDir = new Path(destDir);
    FileSystem srcFs = outputDir.getFileSystem(getConf());
    FileSystem destFs = outputDir.getFileSystem(getConf());

    //Valid Records
    Path goodFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_GOOD_RECORD_OUTPUT_DIR);
    //Input and quality filtered file should have same name (but different path)
    Path goodDestFile = new Path(destDir + "/" + inputFilePath.getName());
    if (srcFs.exists(goodFilesSrcDir)) {
        FileUtil.copyMerge(srcFs, goodFilesSrcDir, destFs, goodDestFile, true, conf, "");
    }
    // Invalid Records
    Path badFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_BAD_RECORD_OUTPUT_DIR);
    Path badDestFile = new Path(destDir + "/" + DQConstants.BAD_RECORDS_FILE);
    if (srcFs.exists(badFilesSrcDir)) {
        FileUtil.copyMerge(srcFs, badFilesSrcDir, destFs, badDestFile, true, conf, "");
    }

    // Preparing report aggregation job
    Job fileReportAggregationJob = Job.getInstance(conf);
    fileReportAggregationJob.setJobName("File Report Computing " + processId);
    fileReportAggregationJob.setJarByClass(DQMain.class);

    fileReportAggregationJob.setMapperClass(DQFileReportMapper.class);
    fileReportAggregationJob.setMapOutputKeyClass(Text.class);
    fileReportAggregationJob.setMapOutputValueClass(IntWritable.class);

    fileReportAggregationJob.setReducerClass(DQFileReportReducer.class);
    fileReportAggregationJob.setOutputKeyClass(Text.class);
    fileReportAggregationJob.setOutputValueClass(Text.class);

    fileReportAggregationJob.setNumReduceTasks(1);

    Path fileReportDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_REPORT_OUTPUT_DIR);
    Path fileReportOutputDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER);

    FileInputFormat.addInputPath(fileReportAggregationJob, fileReportDir);
    FileOutputFormat.setOutputPath(fileReportAggregationJob, fileReportOutputDir);

    if (!fileReportAggregationJob.waitForCompletion(true)) {
        return 1;
    }

    // Merge Report Records MR stuffs
    Path reportsSrcDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER);
    Path reportsDestFile = new Path(destDir + "/" + DQConstants.FILE_REPORT_FILE);
    FileUtil.copyMerge(srcFs, reportsSrcDir, destFs, reportsDestFile, true, conf, "");

    Path reportDestFile = new Path(outputDir.toString() + "/" + DQConstants.FILE_REPORT_FILE);
    //Read the report file from HDFS and report the percentage
    DQStats dqStats = getQualityStats(getConf(), reportDestFile);
    LOGGER.info("Percentage of good records :" + dqStats.getGoodPercent());
    props = new GetProperties().getProperties(processId, "dq");
    String strThreshold = props.getProperty("min.pass.threshold.percent");
    float threshold = Float.parseFloat(strThreshold);
    dqStats.setThreshold(threshold);
    //Update the result in metadata
    logResult(dqStats, processId, 0L);
    if (dqStats.getGoodPercent() < threshold) {
        LOGGER.error("DQ check did not pass");
        throw new DQValidationException(dqStats);
    }
    LOGGER.info(dqStats);
    FileChecksum hdfsChecksum = destFs.getFileChecksum(goodDestFile);
    String fileHash = hdfsChecksum == null ? "0" : hdfsChecksum.toString();
    //Return file info oozie params
    RegisterFileInfo registerFileInfo = new RegisterFileInfo();
    registerFileInfo.setBatchId(null);
    registerFileInfo.setCreationTs(new Timestamp(new Date().getTime()));
    registerFileInfo.setFileHash(fileHash);
    registerFileInfo.setFileSize(destFs.getFileStatus(goodDestFile).getLen());
    registerFileInfo.setPath(goodDestFile.toString());
    registerFileInfo.setSubProcessId(Integer.parseInt(processId));
    OozieUtil oozieUtil = new OozieUtil();
    oozieUtil.persistBeanData(registerFileInfo, false);

    return 0;
}

From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java

License:Apache License

/**
 * Constructor/*from   ww w  .  ja  va 2  s .  co m*/
 * @param fs
 * @param directory
 * @param create
 * @param conf
 * @throws IOException
 */
public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf)
        throws IOException {

    this.fs = fs;
    this.directory = directory;
    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);

    if (create) {
        create();
    }

    boolean isDir = false;
    try {
        FileStatus status = fs.getFileStatus(directory);
        if (status != null) {
            isDir = status.isDirectory();
        }
    } catch (IOException e) {
        // file does not exist, isDir already set to false
    }
    if (!isDir) {
        throw new IOException(directory + " is not a directory");
    }
}

From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java

License:Open Source License

public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException {
    this.fileSystem = fileSystem;

    CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
    codecIfAny = factory.getCodec(srcPath);

    FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath);
    if (srcFileStatus.isDirectory()) {
        // returns FileStatus objects sorted by filename.
        String partFilenamePattern = "part-?-?????";
        if (codecIfAny != null) {
            partFilenamePattern += codecIfAny.getDefaultExtension();
        }//  w w  w.  jav a 2  s.co  m
        Path partPathGlob = new Path(srcPath, partFilenamePattern);
        partFileStatuses = fileSystem.globStatus(partPathGlob);
    } else {
        partFileStatuses = new FileStatus[] { srcFileStatus };
    }

}

From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSource.java

License:Apache License

public void init(Configuration config, String path, String ext) {
    this.config = config;
    this.filePaths = new ArrayList<String>();
    Path hdfsPath = new Path(path);
    FileSystem fs;
    try {/* www  .j  a va2s.com*/
        fs = FileSystem.get(config);
        FileStatus fileStat = fs.getFileStatus(hdfsPath);
        if (fileStat.isDirectory()) {
            Path filterPath = hdfsPath;
            if (ext != null) {
                filterPath = new Path(path.toString(), "*." + ext);
            } else {
                filterPath = new Path(path.toString(), "*");
            }
            FileStatus[] filesInDir = fs.globStatus(filterPath);
            for (int i = 0; i < filesInDir.length; i++) {
                if (filesInDir[i].isFile()) {
                    filePaths.add(filesInDir[i].getPath().toString());
                }
            }
        } else {
            this.filePaths.add(path);
        }
    } catch (IOException ioe) {
        throw new RuntimeException("Failed getting list of files at:" + path, ioe);
    }

    this.currentIndex = -1;
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type,
        LocalResourceVisibility vis) throws IOException {
    Path qualified = fs.makeQualified(path);
    FileStatus status = fs.getFileStatus(qualified);
    LocalResource resource = Records.newRecord(LocalResource.class);
    resource.setType(type);//from  w ww  .j a v  a  2s . co m
    resource.setVisibility(vis);
    resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified));
    resource.setTimestamp(status.getModificationTime());
    resource.setSize(status.getLen());
    return resource;
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

/**
 * Checks for a given path whether the Other permissions on it
 * imply the permission in the passed FsAction
 * @param fs//from   w  ww  .  j  a  v  a  2 s .c  om
 * @param path
 * @param action
 * @return true if the path in the uri is visible to all, false otherwise
 * @throws IOException
 */
private static boolean checkPermissionOfOther(FileSystem fs, Path path, FsAction action) throws IOException {
    FileStatus status = fs.getFileStatus(path);
    FsPermission perms = status.getPermission();
    FsAction otherAction = perms.getOtherAction();
    if (otherAction.implies(action)) {
        return true;
    }
    return false;
}

From source file:com.yolodata.tbana.hadoop.mapred.shuttl.ShuttlCSVInputFormat.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    FileSystem fs = FileSystem.get(job);
    SplunkDataQuery dataQuery = SplunkDataQueryFactory.createWithJobConf(job);

    ShuttlCsvFileFinder fileFinder = new ShuttlCsvFileFinder(fs, getInputPaths(job)[0]);
    List<String> csvPaths = fileFinder.findFiles(dataQuery);

    long currentOffset = 0;

    for (String p : csvPaths) {
        FileStatus csvFile = fs.getFileStatus(new Path(p));
        List<CsvSplit> fileSplits = getSplitsForFile(csvFile, job, numSplits, currentOffset);
        currentOffset += csvFile.getLen();
        splits.addAll(fileSplits);//from  ww w  . j av  a2 s .c o m
    }

    if (splits.size() > 0)
        ((CsvSplit) splits.get(0)).setSkipHeader(false);

    return splits.toArray(new InputSplit[splits.size()]);
}