Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.ikanow.infinit.e.processing.custom.utils.InfiniteHadoopUtils.java

License:Open Source License

@SuppressWarnings("rawtypes")
public static List<URL> handleCacheList(Object cacheFileList, CustomMapReduceJobPojo job, Configuration config,
        PropertiesManager prop_custom) throws MalformedURLException, IOException, Exception {
    if (null == cacheFileList) {
        return null;
    }/*from  w w  w.j a v  a2  s  .  c  om*/
    LinkedList<URL> localJarCache = null;

    Collection cacheFiles = null;
    if (cacheFileList instanceof String) { // comma separated list
        String[] cacheFilesArray = ((String) cacheFileList).split("\\s*,\\s*");
        cacheFiles = Arrays.asList(cacheFilesArray);
    } else {
        cacheFiles = (Collection) cacheFileList;
    }
    for (Object cache : cacheFiles) {
        String cacheStr = (String) cache;
        ObjectId cacheId = null;
        try {
            cacheId = new ObjectId(cacheStr);
        } catch (Exception e) {
        } // fine

        FileSystem fs = null;

        if ((null != cacheId) || cacheStr.startsWith("http:") || cacheStr.startsWith("https:")
                || cacheStr.startsWith("$")) {
            if (null != cacheId) { // this might be a custom cache in which case just bypass all this, handled in the main list
                if (checkIfSourceOrCustomAndAuthenticate(null, cacheId, job)) {
                    continue;
                }
            } //TESTED (by hand = skip and continue)

            // Use existing code to cache to local fs (and then onwards to HDFS!)
            URL localPathURL = new File(
                    downloadJarFile(cacheStr, job.communityIds, prop_custom, job.submitterID)).toURI().toURL();
            String localPath = localPathURL.getPath();
            String pathMinusName = localPath.substring(0, localPath.lastIndexOf('/') + 1);
            String name = localPath.substring(localPath.lastIndexOf('/') + 1);
            Path distPath = cacheLocalFile(pathMinusName, name, config);
            if (name.endsWith(".jar")) {
                if (null == localJarCache) {
                    localJarCache = new LinkedList<URL>();
                }
                localJarCache.add(localPathURL);
                DistributedCache.addFileToClassPath(distPath, config);
            } //TESTED
            else if (name.endsWith(".zip") || name.endsWith("gz")) {
                DistributedCache.addCacheArchive(distPath.toUri(), config);
            } //TESTED
            else {
                DistributedCache.addCacheFile(distPath.toUri(), config);
            } //TESTED
        } else { // this is the location of a file (it is almost certainly an input/output path)
            if (checkIfSourceOrCustomAndAuthenticate(cacheStr, null, job)) {
                continue;
            } //TESTED (by hand - seen it skip if not a jobid/sourcekey - currently not possible for it to be one anyway; c/p from checkIfSourceOrCustomAndAuthenticate call in previous call anyway^2)            

            String path = authenticateInputDirectory(job, cacheStr);
            if (null == fs) {
                fs = FileSystem.get(config);
            }
            Path distPath = new Path(fs.getFileStatus(new Path(path)).getPath().toUri().getPath());
            if (path.endsWith(".jar")) {
                DistributedCache.addFileToClassPath(distPath, config);
            } //TESTED
            else if (path.endsWith(".zip") || path.endsWith("gz")) {
                DistributedCache.addCacheArchive(distPath.toUri(), config);
            } //TESTED
            else {
                DistributedCache.addCacheFile(distPath.toUri(), config);
            } //TESTED
        } //TESTED
    }
    return localJarCache;
}

From source file:com.inforefiner.hdata.SubmitClient.java

License:Apache License

/**
 * Main run function for the client/*from www  .j  av  a 2 s .  c  om*/
 *
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    if (domainId != null && domainId.length() > 0 && toCreateDomain) {
        prepareTimelineDomain();
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max.
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max.
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem
    // Create a local resource to point to the destination jar path
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed.
    // To do this, we need to first copy into the filesystem that is visible
    // to the yarn framework.
    // We do not need to set this as a local resource for the application
    // master as the application master does not need it.
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
    if (domainId != null && domainId.length() > 0) {
        env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId);
    }

    // Add AppMaster.jar location to classpath
    // At some point we should not be required to add
    // the hadoop specific classpaths to the env.
    // It should be provided out of the box.
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide?
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?
    Thread t = new Thread(new LogReceiver());
    t.start();
    // Monitor the application
    return monitorApplication(appId);
}

From source file:com.inmobi.conduit.distcp.MergedStreamService.java

License:Apache License

private List<FileStatus> recursiveListingOfDir(FileSystem currentFs, Path path) {

    try {//  w w w .  ja v a2 s .c om
        FileStatus streamDir = currentFs.getFileStatus(path);
        List<FileStatus> filestatus = new ArrayList<FileStatus>();
        createListing(currentFs, streamDir, filestatus);
        return filestatus;
    } catch (IOException ie) {
        LOG.error("IOException while doing recursive listing to create checkpoint on " + "cluster filesystem"
                + currentFs.getUri(), ie);
    }
    return null;

}

From source file:com.inmobi.conduit.distcp.MirrorStreamService.java

License:Apache License

private Path getFirstOrLastPath(FileSystem fs, Path streamFinalDestDir, boolean returnLast) throws IOException {
    if (!fs.exists(streamFinalDestDir))
        return null;
    FileStatus streamRoot;//www  .  jav a2 s . co m
    List<FileStatus> streamPaths = new ArrayList<FileStatus>();
    streamRoot = fs.getFileStatus(streamFinalDestDir);
    recursiveListingTillMinuteDir(fs, streamRoot, streamPaths, 0);
    if (streamPaths.size() == 0)
        return null;
    DatePathComparator comparator = new DatePathComparator();
    FileStatus result = streamPaths.get(0);
    for (int i = 0; i < streamPaths.size(); i++) {
        FileStatus current = streamPaths.get(i);
        if (returnLast && comparator.compare(current, result) > 0)
            result = current;
        else if (!returnLast && comparator.compare(current, result) < 0)
            result = current;
    }
    if (!result.isDir())
        return result.getPath().getParent();
    else
        return result.getPath();

}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void preserveFileAttributes(Configuration conf) throws IOException {
    String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
    LOG.info("About to preserve attributes: " + attrSymbols);

    EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf);
    long totalLen = clusterFS.getFileStatus(sourceListing).getLen();

    Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));

    long preservedEntries = 0;
    try {/*from  ww w  . j a  va2  s  . co m*/
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();

        while (sourceReader.next(srcRelPath, srcFileStatus)) {
            if (!srcFileStatus.isDir())
                continue;

            Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);

            //Skip the root folder, preserve the status after atomic commit is complete
            //If it is changed any earlier, then atomic commit may fail
            if (targetRoot.equals(targetFile))
                continue;

            FileSystem targetFS = targetFile.getFileSystem(conf);
            DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes);

            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. ["
                    + sourceReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
    }
    LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void deleteMissing(Configuration conf) throws IOException {
    LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source");

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
    CopyListing target = new GlobbedCopyListing(conf, null);

    List<Path> targets = new ArrayList<Path>(1);
    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
    targets.add(targetFinalPath);//from   w w  w . j a  v  a  2s  .  c  o m
    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));

    target.buildListing(targetListing, options);
    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf);
    SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf);

    long deletedEntries = 0;
    try {
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();
        FileStatus trgtFileStatus = new FileStatus();
        Text trgtRelPath = new Text();

        FileSystem targetFS = targetFinalPath.getFileSystem(conf);
        boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
        while (targetReader.next(trgtRelPath, trgtFileStatus)) {
            while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
                srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
            }

            if (srcAvailable && trgtRelPath.equals(srcRelPath))
                continue;

            boolean result = (!targetFS.exists(trgtFileStatus.getPath())
                    || targetFS.delete(trgtFileStatus.getPath(), true));
            if (result) {
                LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
                deletedEntries++;
            } else {
                throw new IOException("Unable to delete " + trgtFileStatus.getPath());
            }
            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. ["
                    + targetReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
        IOUtils.closeStream(targetReader);
    }
    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java

License:Apache License

/**
 * Implementation of the Mapper<>::map(). Does the copy.
 * @param relPath: The target path.//  w  w w  .  j  av a 2 s. co m
 * @param sourceFileStatus: The source path.
 * @throws IOException
 */
@Override
public void map(Text relPath, FileStatus sourceFileStatus, Context context)
        throws IOException, InterruptedException {
    Path sourcePath = sourceFileStatus.getPath();
    Map<Long, Long> received = null;
    if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) {
        received = new HashMap<Long, Long>();
    }
    if (LOG.isDebugEnabled())
        LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);

    Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString());

    EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context);

    final String description = "Copying " + sourcePath + " to " + target;
    context.setStatus(description);

    LOG.info(description);

    try {
        FileStatus sourceCurrStatus;
        FileSystem sourceFS;
        try {
            sourceFS = sourcePath.getFileSystem(conf);
            sourceCurrStatus = sourceFS.getFileStatus(sourcePath);
        } catch (FileNotFoundException e) {
            throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
        }

        FileStatus targetStatus = null;

        try {
            targetStatus = targetFS.getFileStatus(target);
        } catch (FileNotFoundException ignore) {
        }

        if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) {
            throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus)
                    + ", Source is " + getFileType(sourceCurrStatus));
        }

        if (sourceCurrStatus.isDir()) {
            createTargetDirsWithRetry(description, target, context);
            return;
        }

        if (skipFile(sourceFS, sourceCurrStatus, target)) {
            LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target);
            updateSkipCounters(context, sourceCurrStatus);
        } else {
            String streamName = null;
            if (!relPath.toString().isEmpty()) {
                Path relativePath = new Path(relPath.toString());
                if (relativePath.depth() > 2) {
                    // path is for mirror service and is of format
                    // /conduit/streams/<streamName>/2013/09/12
                    Path tmpPath = relativePath;
                    while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) {
                        tmpPath = tmpPath.getParent();
                    }
                    streamName = tmpPath.getName();
                } else {
                    // path is for merge service and of form /<stream name>/filename.gz
                    streamName = relativePath.getParent().getName();
                }
            }
            copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received);
            // generate audit counters
            if (received != null) {
                for (Entry<Long, Long> entry : received.entrySet()) {
                    String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(),
                            entry.getKey(), entry.getValue());
                    context.write(NullWritable.get(), new Text(counterNameValue));
                }
            }
        }

        DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes);

    } catch (IOException exception) {
        handleFailures(exception, sourceFileStatus, target, context);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java

License:Apache License

private void compareFileLengths(FileStatus sourceFileStatus, Path target, Configuration configuration,
        long bytesRead) throws IOException {
    final Path sourcePath = sourceFileStatus.getPath();
    FileSystem fs = sourcePath.getFileSystem(configuration);
    if (fs.getFileStatus(sourcePath).getLen() != bytesRead)
        throw new IOException("Mismatch in length of source:" + sourcePath + " and target:" + target);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

private static void touchFile(String path) throws Exception {
    FileSystem fs;
    DataOutputStream outputStream = null;
    GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfiguration());
    Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec);
    OutputStream compressedOut = null;
    try {//from ww  w. j a  v  a  2 s  . com
        fs = cluster.getFileSystem();
        final Path qualifiedPath = new Path(path).makeQualified(fs);
        final long blockSize = fs.getDefaultBlockSize() * 2;
        outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize);
        compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor);
        Message msg = new Message("generating test data".getBytes());
        AuditUtil.attachHeaders(msg, currentTimestamp);
        byte[] encodeMsg = Base64.encodeBase64(msg.getData().array());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        // Genearate a msg with different timestamp.  Default window period is 60sec
        AuditUtil.attachHeaders(msg, nextMinuteTimeStamp);
        encodeMsg = Base64.encodeBase64(msg.getData().array());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        compressedOut.flush();
        compressedOut.close();
        pathList.add(qualifiedPath);
        ++nFiles;

        FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
        System.out.println(fileStatus.getBlockSize());
        System.out.println(fileStatus.getReplication());
    } finally {
        compressedOut.close();
        IOUtils.cleanup(null, outputStream);
        CodecPool.returnCompressor(gzipCompressor);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

@Test
public void testRun() {
    try {//from  w  w  w.  j  av a  2  s  . com
        deleteState();
        createSourceData();

        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StatusReporter reporter = new StubStatusReporter();
        InMemoryWriter writer = new InMemoryWriter();
        Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter,
                writer);
        copyMapper.setup(context);

        for (Path path : pathList) {
            copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
                    fs.getFileStatus(path), context);
        }
        // Check that the maps worked.
        for (Path path : pathList) {
            final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
            Assert.assertTrue(fs.exists(targetPath));
            Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
            Assert.assertEquals(fs.getFileStatus(path).getReplication(),
                    fs.getFileStatus(targetPath).getReplication());
            Assert.assertEquals(fs.getFileStatus(path).getBlockSize(),
                    fs.getFileStatus(targetPath).getBlockSize());
            Assert.assertTrue(
                    !fs.isFile(targetPath) || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path)));
        }

        Assert.assertEquals(pathList.size(), reporter.getCounter(CopyMapper.Counter.PATHS_COPIED).getValue());
        // Here file is compressed file. So, we should compare the file length
        // with the number of bytes read
        long totalSize = 0;
        for (Path path : pathList) {
            totalSize += fs.getFileStatus(path).getLen();
        }
        Assert.assertEquals(totalSize, reporter.getCounter(CopyMapper.Counter.BYTES_COPIED).getValue());
        long totalCounterValue = 0;
        for (Text value : writer.values()) {
            String tmp[] = value.toString().split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
            Assert.assertEquals(4, tmp.length);
            Long numOfMsgs = Long.parseLong(tmp[3]);
            totalCounterValue += numOfMsgs;
        }
        Assert.assertEquals(nFiles * NUMBER_OF_MESSAGES_PER_FILE, totalCounterValue);
        testCopyingExistingFiles(fs, copyMapper, context);
    } catch (Exception e) {
        LOG.error("Unexpected exception: ", e);
        Assert.assertTrue(false);
    }
}