Example usage for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

public static long getFileSize(FileSystem fs, Path path) throws IOException {
    return fs.getFileStatus(path).getLen();
}

From source file:com.uber.hoodie.hadoop.HoodieInputFormat.java

License:Apache License

/**
 * Checks the file status for a race condition which can set the file size to 0. 1.
 * HiveInputFormat does super.listStatus() and gets back a FileStatus[] 2. Then it creates the
 * HoodieTableMetaClient for the paths listed. 3. Generation of splits looks at FileStatus size to
 * create splits, which skips this file//  w ww .  j a v  a 2s. c  om
 */
private HoodieDataFile checkFileStatus(HoodieDataFile dataFile) throws IOException {
    Path dataPath = dataFile.getFileStatus().getPath();
    try {
        if (dataFile.getFileSize() == 0) {
            FileSystem fs = dataPath.getFileSystem(conf);
            LOG.info("Refreshing file status " + dataFile.getPath());
            return new HoodieDataFile(fs.getFileStatus(dataPath));
        }
        return dataFile;
    } catch (IOException e) {
        throw new HoodieIOException("Could not get FileStatus on path " + dataPath);
    }
}

From source file:com.uber.hoodie.utilities.UtilHelpers.java

License:Apache License

/**
 * Parse Schema from file/*from  w w  w . j a v a  2s  .com*/
 *
 * @param fs         File System
 * @param schemaFile Schema File
 */
public static String parseSchema(FileSystem fs, String schemaFile) throws Exception {
    // Read schema file.
    Path p = new Path(schemaFile);
    if (!fs.exists(p)) {
        throw new Exception(String.format("Could not find - %s - schema file.", schemaFile));
    }
    long len = fs.getFileStatus(p).getLen();
    ByteBuffer buf = ByteBuffer.allocate((int) len);
    try (FSDataInputStream inputStream = fs.open(p)) {
        inputStream.readFully(0, buf.array(), 0, buf.array().length);
    }
    return new String(buf.array());
}

From source file:com.wipro.ats.bdre.dq.DQDriver.java

License:Apache License

@Override
public int run(String[] arg) throws Exception {
    String processId = arg[0];/* w ww .  j  a v a  2 s . c  o m*/
    String sPath = arg[1];
    String destDir = arg[2];

    Properties props = new GetProperties().getProperties(processId, "dq");
    LOGGER.debug("props=" + props);
    Configuration conf = getConf();

    conf.set("dq.process.id", processId);
    Job job = Job.getInstance(conf);
    job.setJobName("Data Quality " + processId);
    job.setJarByClass(DQDriver.class);
    job.setMapperClass(DQMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    //Reducer is not required
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    Path inputFilePath = new Path(sPath);
    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, removeIfExistAndSetOutputPath(conf, destDir));
    MultipleOutputs.addNamedOutput(job, DQConstants.GOOD_RECORDS_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, DQConstants.BAD_RECORDS_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, DQConstants.FILE_REPORT_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);

    if (!job.waitForCompletion(true)) {
        return 1;
    }

    Path outputDir = new Path(destDir);
    FileSystem srcFs = outputDir.getFileSystem(getConf());
    FileSystem destFs = outputDir.getFileSystem(getConf());

    //Valid Records
    Path goodFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_GOOD_RECORD_OUTPUT_DIR);
    //Input and quality filtered file should have same name (but different path)
    Path goodDestFile = new Path(destDir + "/" + inputFilePath.getName());
    if (srcFs.exists(goodFilesSrcDir)) {
        FileUtil.copyMerge(srcFs, goodFilesSrcDir, destFs, goodDestFile, true, conf, "");
    }
    // Invalid Records
    Path badFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_BAD_RECORD_OUTPUT_DIR);
    Path badDestFile = new Path(destDir + "/" + DQConstants.BAD_RECORDS_FILE);
    if (srcFs.exists(badFilesSrcDir)) {
        FileUtil.copyMerge(srcFs, badFilesSrcDir, destFs, badDestFile, true, conf, "");
    }

    // Preparing report aggregation job
    Job fileReportAggregationJob = Job.getInstance(conf);
    fileReportAggregationJob.setJobName("File Report Computing " + processId);
    fileReportAggregationJob.setJarByClass(DQMain.class);

    fileReportAggregationJob.setMapperClass(DQFileReportMapper.class);
    fileReportAggregationJob.setMapOutputKeyClass(Text.class);
    fileReportAggregationJob.setMapOutputValueClass(IntWritable.class);

    fileReportAggregationJob.setReducerClass(DQFileReportReducer.class);
    fileReportAggregationJob.setOutputKeyClass(Text.class);
    fileReportAggregationJob.setOutputValueClass(Text.class);

    fileReportAggregationJob.setNumReduceTasks(1);

    Path fileReportDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_REPORT_OUTPUT_DIR);
    Path fileReportOutputDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER);

    FileInputFormat.addInputPath(fileReportAggregationJob, fileReportDir);
    FileOutputFormat.setOutputPath(fileReportAggregationJob, fileReportOutputDir);

    if (!fileReportAggregationJob.waitForCompletion(true)) {
        return 1;
    }

    // Merge Report Records MR stuffs
    Path reportsSrcDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER);
    Path reportsDestFile = new Path(destDir + "/" + DQConstants.FILE_REPORT_FILE);
    FileUtil.copyMerge(srcFs, reportsSrcDir, destFs, reportsDestFile, true, conf, "");

    Path reportDestFile = new Path(outputDir.toString() + "/" + DQConstants.FILE_REPORT_FILE);
    //Read the report file from HDFS and report the percentage
    DQStats dqStats = getQualityStats(getConf(), reportDestFile);
    LOGGER.info("Percentage of good records :" + dqStats.getGoodPercent());
    props = new GetProperties().getProperties(processId, "dq");
    String strThreshold = props.getProperty("min.pass.threshold.percent");
    float threshold = Float.parseFloat(strThreshold);
    dqStats.setThreshold(threshold);
    //Update the result in metadata
    logResult(dqStats, processId, 0L);
    if (dqStats.getGoodPercent() < threshold) {
        LOGGER.error("DQ check did not pass");
        throw new DQValidationException(dqStats);
    }
    LOGGER.info(dqStats);
    FileChecksum hdfsChecksum = destFs.getFileChecksum(goodDestFile);
    String fileHash = hdfsChecksum == null ? "0" : hdfsChecksum.toString();
    //Return file info oozie params
    RegisterFileInfo registerFileInfo = new RegisterFileInfo();
    registerFileInfo.setBatchId(null);
    registerFileInfo.setCreationTs(new Timestamp(new Date().getTime()));
    registerFileInfo.setFileHash(fileHash);
    registerFileInfo.setFileSize(destFs.getFileStatus(goodDestFile).getLen());
    registerFileInfo.setPath(goodDestFile.toString());
    registerFileInfo.setSubProcessId(Integer.parseInt(processId));
    OozieUtil oozieUtil = new OozieUtil();
    oozieUtil.persistBeanData(registerFileInfo, false);

    return 0;
}

From source file:com.xiaomi.linden.hadoop.indexing.reduce.FileSystemDirectory.java

License:Apache License

/**
 * Constructor/*from   ww w  .  ja  va 2  s .  co m*/
 * @param fs
 * @param directory
 * @param create
 * @param conf
 * @throws IOException
 */
public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf)
        throws IOException {

    this.fs = fs;
    this.directory = directory;
    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);

    if (create) {
        create();
    }

    boolean isDir = false;
    try {
        FileStatus status = fs.getFileStatus(directory);
        if (status != null) {
            isDir = status.isDirectory();
        }
    } catch (IOException e) {
        // file does not exist, isDir already set to false
    }
    if (!isDir) {
        throw new IOException(directory + " is not a directory");
    }
}

From source file:com.yahoo.glimmer.util.MapReducePartInputStreamEnumeration.java

License:Open Source License

public MapReducePartInputStreamEnumeration(FileSystem fileSystem, Path srcPath) throws IOException {
    this.fileSystem = fileSystem;

    CompressionCodecFactory factory = new CompressionCodecFactory(fileSystem.getConf());
    codecIfAny = factory.getCodec(srcPath);

    FileStatus srcFileStatus = fileSystem.getFileStatus(srcPath);
    if (srcFileStatus.isDirectory()) {
        // returns FileStatus objects sorted by filename.
        String partFilenamePattern = "part-?-?????";
        if (codecIfAny != null) {
            partFilenamePattern += codecIfAny.getDefaultExtension();
        }//  w w  w.  jav a 2  s.co  m
        Path partPathGlob = new Path(srcPath, partFilenamePattern);
        partFileStatuses = fileSystem.globStatus(partPathGlob);
    } else {
        partFileStatuses = new FileStatus[] { srcFileStatus };
    }

}

From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSource.java

License:Apache License

public void init(Configuration config, String path, String ext) {
    this.config = config;
    this.filePaths = new ArrayList<String>();
    Path hdfsPath = new Path(path);
    FileSystem fs;
    try {/* www  .j  a va2s.com*/
        fs = FileSystem.get(config);
        FileStatus fileStat = fs.getFileStatus(hdfsPath);
        if (fileStat.isDirectory()) {
            Path filterPath = hdfsPath;
            if (ext != null) {
                filterPath = new Path(path.toString(), "*." + ext);
            } else {
                filterPath = new Path(path.toString(), "*");
            }
            FileStatus[] filesInDir = fs.globStatus(filterPath);
            for (int i = 0; i < filesInDir.length; i++) {
                if (filesInDir[i].isFile()) {
                    filePaths.add(filesInDir[i].getPath().toString());
                }
            }
        } else {
            this.filePaths.add(path);
        }
    } catch (IOException ioe) {
        throw new RuntimeException("Failed getting list of files at:" + path, ioe);
    }

    this.currentIndex = -1;
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

static LocalResource newYarnAppResource(FileSystem fs, Path path, LocalResourceType type,
        LocalResourceVisibility vis) throws IOException {
    Path qualified = fs.makeQualified(path);
    FileStatus status = fs.getFileStatus(qualified);
    LocalResource resource = Records.newRecord(LocalResource.class);
    resource.setType(type);//from  w ww  .j a v  a  2s . co m
    resource.setVisibility(vis);
    resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified));
    resource.setTimestamp(status.getModificationTime());
    resource.setSize(status.getLen());
    return resource;
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

/**
 * Checks for a given path whether the Other permissions on it
 * imply the permission in the passed FsAction
 * @param fs//from   w  ww  .  j  a  v  a  2 s .c  om
 * @param path
 * @param action
 * @return true if the path in the uri is visible to all, false otherwise
 * @throws IOException
 */
private static boolean checkPermissionOfOther(FileSystem fs, Path path, FsAction action) throws IOException {
    FileStatus status = fs.getFileStatus(path);
    FsPermission perms = status.getPermission();
    FsAction otherAction = perms.getOtherAction();
    if (otherAction.implies(action)) {
        return true;
    }
    return false;
}

From source file:com.yolodata.tbana.hadoop.mapred.shuttl.ShuttlCSVInputFormat.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    FileSystem fs = FileSystem.get(job);
    SplunkDataQuery dataQuery = SplunkDataQueryFactory.createWithJobConf(job);

    ShuttlCsvFileFinder fileFinder = new ShuttlCsvFileFinder(fs, getInputPaths(job)[0]);
    List<String> csvPaths = fileFinder.findFiles(dataQuery);

    long currentOffset = 0;

    for (String p : csvPaths) {
        FileStatus csvFile = fs.getFileStatus(new Path(p));
        List<CsvSplit> fileSplits = getSplitsForFile(csvFile, job, numSplits, currentOffset);
        currentOffset += csvFile.getLen();
        splits.addAll(fileSplits);//from  ww w  . j av  a2 s .c o m
    }

    if (splits.size() > 0)
        ((CsvSplit) splits.get(0)).setSkipHeader(false);

    return splits.toArray(new InputSplit[splits.size()]);
}