Example usage for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.zjy.mongo.splitter.BSONSplitter.java

License:Apache License

/**
 * Calculate splits for each file in the input path, sensitive to options such
 * as {@link com.zjy.mongo.util.MongoConfigUtil#BSON_READ_SPLITS bson.split.read_splits}.
 * This method always re-calculates the splits and will try to write the
 * splits file.//from w w  w .ja v  a 2 s.c  om
 *
 * @see #readSplitsForFile
 *
 * @throws IOException when an error occurs reading from the file
 */
public void readSplits() throws IOException {
    splitsList = new ArrayList<BSONFileSplit>();
    if (inputPath == null) {
        throw new IllegalStateException("Input path has not been set.");
    }
    FileSystem fs = inputPath.getFileSystem(getConf());
    FileStatus file = fs.getFileStatus(inputPath);
    readSplitsForFile(file);
}

From source file:com.zjy.mongo.splitter.BSONSplitter.java

License:Apache License

/**
 * Get the position at which the BSONFileRecordReader should begin
 * iterating the given split. This may not be at the beginning of the split
 * if the splits were not calculated by BSONSplitter.
 *
 * @param split the FileSplit for which to find the starting position.
 * @return the position of the first complete document within the split.
 * @throws IOException when an error occurs while reading a file
 *//*from   ww  w  .  jav a  2 s .com*/
public synchronized long getStartingPositionForSplit(final FileSplit split) throws IOException {

    FileSystem fs = split.getPath().getFileSystem(getConf());
    FileStatus file = fs.getFileStatus(split.getPath());
    ArrayList<BSONFileSplit> splits;
    BSONFileSplit[] splitsArr;

    // Get splits calculated on document boundaries.
    if (MongoConfigUtil.getBSONReadSplits(getConf())) {
        // Use the splits file to load splits on document boundaries.
        try {
            // Try to use the existing splits file.
            loadSplitsFromSplitFile(file, getSplitsFilePath(file.getPath(), getConf()));
        } catch (NoSplitFileException e) {
            // Create a splits file from scratch.
            readSplitsForFile(file);
        }
        splits = getAllSplits();
    } else {
        // Can't use a splits file, so create splits from scratch.
        splits = (ArrayList<BSONFileSplit>) splitFile(file);
    }
    splitsArr = new BSONFileSplit[splits.size()];
    splits.toArray(splitsArr);

    // Get the first pre-calculated split occurring before the start of
    // the given split.
    long previousStart = split.getStart();
    long startIterating = 0;
    for (BSONFileSplit bfs : splitsArr) {
        if (bfs.getStart() >= split.getStart()) {
            startIterating = previousStart;
            break;
        }
        previousStart = bfs.getStart();
    }

    // Beginning at 'startIterating', jump to the first document that begins
    // at or beyond the given split.
    FSDataInputStream fsDataStream = null;
    long pos = startIterating;
    try {
        fsDataStream = fs.open(split.getPath());
        fsDataStream.seek(pos);
        while (pos < split.getStart()) {
            callback.reset();
            bsonDec.decode(fsDataStream, callback);
            pos = fsDataStream.getPos();
        }
    } finally {
        if (null != fsDataStream) {
            fsDataStream.close();
        }
    }

    return pos;
}

From source file:com.zqh.hadoop.moya.core.yarn.Client.java

License:Apache License

/**
 * Main run function for the client/*from  w  ww  . j  av a2  s  . c om*/
 *
 * @return true if application completed successfully
 * @throws java.io.IOException
 * @throws org.apache.hadoop.yarn.exceptions.YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports();
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask
    // if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource
    // manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();
    appContext.setApplicationName(appName);

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of
    // the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    Path src = new Path(appMasterJar);
    String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar";
    Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
    fs.copyFromLocalFile(false, true, src, dst);
    FileStatus destStatus = fs.getFileStatus(dst);
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);

    // Set the type of resource - file or archive
    // archives are untarred at destination
    // we don't need the jar file to be untarred
    amJarRsrc.setType(LocalResourceType.FILE);
    // Set visibility of the resource
    // Setting to most private option
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    // Set the resource to be copied over
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    // Set timestamp and length of file so that the framework
    // can do basic sanity checks for the local resource
    // after it has been copied over to ensure it is the same
    // resource the client intended to use with the application
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put("AppMaster.jar", amJarRsrc);

    // Setup App Master Constants
    String amJarLocation = "";
    long amJarLen = 0;
    long amJarTimestamp = 0;

    // adding info so we can add the jar to the App master container path
    amJarLocation = dst.toUri().toString();
    FileStatus shellFileStatus = fs.getFileStatus(dst);
    amJarLen = shellFileStatus.getLen();
    amJarTimestamp = shellFileStatus.getModificationTime();

    // ADD libs needed that will be untared
    // Keep it all archived for now so add it as a file...
    src = new Path(localLibJar);
    pathSuffix = appName + "/" + appId.getId() + "/Runnable.jar";
    dst = new Path(fs.getHomeDirectory(), pathSuffix);
    fs.copyFromLocalFile(false, true, src, dst);
    destStatus = fs.getFileStatus(dst);
    LocalResource libsJarRsrc = Records.newRecord(LocalResource.class);
    libsJarRsrc.setType(LocalResourceType.FILE);
    libsJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    libsJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    libsJarRsrc.setTimestamp(destStatus.getModificationTime());
    localResources.put("Runnable.jar", libsJarRsrc);

    // Setup Libs Constants
    String libsLocation = "";
    long libsLen = 0;
    long libsTimestamp = 0;

    // adding info so we can add the jar to the App master container path
    libsLocation = dst.toUri().toString();
    FileStatus libsFileStatus = fs.getFileStatus(dst);
    libsLen = libsFileStatus.getLen();
    libsTimestamp = libsFileStatus.getModificationTime();

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        Path log4jSrc = new Path(log4jPropFile);
        Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props");
        fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
        FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
        LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
        log4jRsrc.setType(LocalResourceType.FILE);
        log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
        log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
        log4jRsrc.setSize(log4jFileStatus.getLen());
        localResources.put("log4j.properties", log4jRsrc);
    }

    // Set local resource info into app master container launch context
    amContainer.setLocalResources(localResources);

    // Set the env variables to be setup in the env where the application
    // master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put the AM jar into env and MOYA Runnable
    // using the env info, the application master will create the correct
    // local resource for the
    // eventual containers that will be launched to execute the shell
    // scripts
    env.put(MConstants.APPLICATIONMASTERJARLOCATION, amJarLocation);
    env.put(MConstants.APPLICATIONMASTERJARTIMESTAMP, Long.toString(amJarTimestamp));
    env.put(MConstants.APPLICATIONMASTERJARLEN, Long.toString(amJarLen));

    env.put(MConstants.LIBSLOCATION, libsLocation);
    env.put(MConstants.LIBSTIMESTAMP, Long.toString(libsTimestamp));
    env.put(MConstants.LIBSLEN, Long.toString(libsLen));

    env.put(MConstants.ZOOKEEPERHOSTS, ZKHosts);

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$()).append(File.pathSeparatorChar)
            .append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classPathEnv.append(File.pathSeparatorChar);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(File.pathSeparatorChar).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    amContainer.setEnvironment(env);

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    vargs.add("--priority " + String.valueOf(moyaPriority));
    if (!localLibJar.isEmpty()) {
        vargs.add("--lib " + localLibJar + "");
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());
    amContainer.setCommands(commands);

    // Set up resource type requirements
    // For now, only memory is supported so we set memory requirements
    Resource capability = Records.newRecord(Resource.class);
    capability.setMemory(amMemory);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // The following are not required for launching an application master
    // amContainer.setContainerId(containerId);

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    Priority pri = Records.newRecord(Priority.class);
    // TODO - what is the range for priority? how to decide?
    pri.setPriority(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp =
    // applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on
    // success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:corner.hadoop.services.impl.HdfsAccessorProxy.java

License:Apache License

/**
 * @see corner.hadoop.services.impl.AccessorProxy#getFileMTTime(java.lang.String)
 *//*from  ww w  .j a  va  2 s .  c  om*/
@Override
public long getFileMTTime(String filePath) throws IOException {
    Path srcPath = new Path(filePath);
    FileSystem srcFS = srcPath.getFileSystem(getConf());
    if (srcFS != null) {
        return srcFS.getFileStatus(srcPath).getModificationTime();
    } else {
        return -1;
    }
}

From source file:corner.hadoop.services.impl.HdfsAccessorProxy.java

License:Apache License

/**
 * @see corner.hadoop.services.impl.AccessorProxy#list(java.lang.String)
 *//*from   ww w . j a  v a2s .  co  m*/
@Override
public List<FileDesc> list(final String path) throws IOException {
    String _path = path;
    if (path.endsWith("/")) {
        _path = path.substring(0, path.length() - 1);
    }
    Path dstPath = new Path(_path);
    FileSystem dstFs = dstPath.getFileSystem(getConf());
    FileStatus _dstStatus = dstFs.getFileStatus(dstPath);
    if (_dstStatus == null) {
        throw new IllegalArgumentException("The path [" + path + "] dose not exist.");
    }
    if (!_dstStatus.isDir()) {
        throw new IllegalArgumentException("The path [" + path + "] is not dir.");
    }
    FileStatus[] fileStatus = dstFs.listStatus(dstPath);
    if (fileStatus != null && fileStatus.length > 0) {
        List<FileDesc> ret = new LinkedList<FileDesc>();
        for (FileStatus status : fileStatus) {
            ret.add(new FileDesc(_path + "/" + status.getPath().getName(), status.isDir(),
                    new Timestamp(status.getModificationTime()), status.getLen()));
        }
        return ret;
    }
    return null;
}

From source file:corner.hadoop.services.impl.HdfsAccessorProxy.java

License:Apache License

@Override
public FileDesc getFileDesc(String filePath) throws IOException {
    Path srcPath = new Path(filePath);
    FileSystem srcFS = srcPath.getFileSystem(getConf());
    FileStatus _status = srcFS.getFileStatus(srcPath);
    if (_status != null) {
        return new FileDesc(filePath, _status.isDir(), new Timestamp(_status.getModificationTime()),
                _status.getLen());//from  w ww.  j av  a  2s.c o m
    } else {
        return null;
    }
}

From source file:corner.services.hadoop.impl.HdfsAccessorProxy.java

License:Apache License

public long getFileMTTime(String filePath) throws IOException {
    Path srcPath = new Path(filePath);
    FileSystem srcFS = srcPath.getFileSystem(getConf());
    if (srcFS != null) {
        return srcFS.getFileStatus(srcPath).getModificationTime();
    } else {/*  w  ww.j av a 2  s .c o m*/
        return -1;
    }
}

From source file:corner.services.hadoop.impl.HdfsAccessorProxy.java

License:Apache License

@Override
public List<FileDesc> list(final String path) throws IOException {
    String _path = path;/*from w  ww .  j ava2  s .com*/
    if (path.endsWith("/")) {
        _path = path.substring(0, path.length() - 1);
    }
    Path dstPath = new Path(_path);
    FileSystem dstFs = dstPath.getFileSystem(getConf());
    FileStatus _dstStatus = dstFs.getFileStatus(dstPath);
    if (_dstStatus == null) {
        throw new IllegalArgumentException("The path [" + path + "] dose not exist.");
    }
    if (!_dstStatus.isDir()) {
        throw new IllegalArgumentException("The path [" + path + "] is not dir.");
    }
    FileStatus[] fileStatus = dstFs.listStatus(dstPath);
    if (fileStatus != null && fileStatus.length > 0) {
        List<FileDesc> ret = new LinkedList<FileDesc>();
        for (FileStatus status : fileStatus) {
            ret.add(new FileDesc(_path + "/" + status.getPath().getName(), status.isDir(),
                    new Timestamp(status.getModificationTime()), status.getLen()));
        }
        return ret;
    }
    return null;
}

From source file:cz.muni.fi.xfabian7.bp.mgrid.HdfsStorageBucket.java

/**
 * Add an object to the bucket/*from w  ww  . j  av  a 2  s .c  o m*/
 *
 * @param T
 * @return true if the object was successfully added
 * @throws BucketStorageException
 */
@Override
public boolean add(LocalAbstractObject object) throws BucketStorageException {
    try {
        // Open output stream if not opened yet (this statement is never reached if the storage is readonly)
        FileSystem fs = getFileSystem();
        ObjectOutputStream oos = openObjectOutputStream(fs, new Path(path),
                fs.getFileStatus(new Path(path)).getLen() != 0);

        // Write object
        oos.writeObject(object);
        oos.reset();

        // Update internal counters
        objectCount++;

        oos.close();
        createMetaFile();
        return true;
    } catch (EOFException e) {
        throw new CapacityFullException(e.getMessage());
    } catch (IOException e) {
        throw new StorageFailureException("Cannot store object into hdfs storage", e);
    }

}

From source file:cz.muni.fi.xfabian7.bp.mgrid.HdfsStorageBucket.java

/**
 * Add objects to the bucket using Iterator of LocalAbstractObject
 *
 * @param objects/*  w  w  w  .  j a va2  s .c om*/
 * @return amounts of added objects
 * @throws BucketStorageException
 */
@Override
public int addObjects(Iterator<? extends LocalAbstractObject> objects) throws BucketStorageException {
    int i = 0;

    try {
        // Open output stream if not opened yet (this statement is never reached if the storage is readonly)
        FileSystem fs = getFileSystem();
        ObjectOutputStream oos = openObjectOutputStream(fs, new Path(path),
                fs.getFileStatus(new Path(path)).getLen() != 0);
        for (Iterator<? extends LocalAbstractObject> iter = objects; iter.hasNext();) {
            LocalAbstractObject obj = iter.next();

            // Write object
            oos.writeObject(obj);
            oos.reset();

            // Update internal counters
            objectCount++;
        }
        oos.close();
        //            fs.close();
        createMetaFile();
    } catch (EOFException e) {
        throw new CapacityFullException(e.getMessage());
    } catch (IOException e) {
        throw new StorageFailureException("Cannot store object into hdfs storage", e);
    }
    i++;

    return i;
}