List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.ikanow.infinit.e.processing.custom.utils.InfiniteHadoopUtils.java
License:Open Source License
@SuppressWarnings("rawtypes") public static List<URL> handleCacheList(Object cacheFileList, CustomMapReduceJobPojo job, Configuration config, PropertiesManager prop_custom) throws MalformedURLException, IOException, Exception { if (null == cacheFileList) { return null; }/*from w w w.j a v a2 s . c om*/ LinkedList<URL> localJarCache = null; Collection cacheFiles = null; if (cacheFileList instanceof String) { // comma separated list String[] cacheFilesArray = ((String) cacheFileList).split("\\s*,\\s*"); cacheFiles = Arrays.asList(cacheFilesArray); } else { cacheFiles = (Collection) cacheFileList; } for (Object cache : cacheFiles) { String cacheStr = (String) cache; ObjectId cacheId = null; try { cacheId = new ObjectId(cacheStr); } catch (Exception e) { } // fine FileSystem fs = null; if ((null != cacheId) || cacheStr.startsWith("http:") || cacheStr.startsWith("https:") || cacheStr.startsWith("$")) { if (null != cacheId) { // this might be a custom cache in which case just bypass all this, handled in the main list if (checkIfSourceOrCustomAndAuthenticate(null, cacheId, job)) { continue; } } //TESTED (by hand = skip and continue) // Use existing code to cache to local fs (and then onwards to HDFS!) URL localPathURL = new File( downloadJarFile(cacheStr, job.communityIds, prop_custom, job.submitterID)).toURI().toURL(); String localPath = localPathURL.getPath(); String pathMinusName = localPath.substring(0, localPath.lastIndexOf('/') + 1); String name = localPath.substring(localPath.lastIndexOf('/') + 1); Path distPath = cacheLocalFile(pathMinusName, name, config); if (name.endsWith(".jar")) { if (null == localJarCache) { localJarCache = new LinkedList<URL>(); } localJarCache.add(localPathURL); DistributedCache.addFileToClassPath(distPath, config); } //TESTED else if (name.endsWith(".zip") || name.endsWith("gz")) { DistributedCache.addCacheArchive(distPath.toUri(), config); } //TESTED else { DistributedCache.addCacheFile(distPath.toUri(), config); } //TESTED } else { // this is the location of a file (it is almost certainly an input/output path) if (checkIfSourceOrCustomAndAuthenticate(cacheStr, null, job)) { continue; } //TESTED (by hand - seen it skip if not a jobid/sourcekey - currently not possible for it to be one anyway; c/p from checkIfSourceOrCustomAndAuthenticate call in previous call anyway^2) String path = authenticateInputDirectory(job, cacheStr); if (null == fs) { fs = FileSystem.get(config); } Path distPath = new Path(fs.getFileStatus(new Path(path)).getPath().toUri().getPath()); if (path.endsWith(".jar")) { DistributedCache.addFileToClassPath(distPath, config); } //TESTED else if (path.endsWith(".zip") || path.endsWith("gz")) { DistributedCache.addCacheArchive(distPath.toUri(), config); } //TESTED else { DistributedCache.addCacheFile(distPath.toUri(), config); } //TESTED } //TESTED } return localJarCache; }
From source file:com.inforefiner.hdata.SubmitClient.java
License:Apache License
/** * Main run function for the client/*from www .j av a 2 s . c om*/ * * @return true if application completed successfully * @throws IOException * @throws YarnException */ public boolean run() throws IOException, YarnException { LOG.info("Running Client"); yarnClient.start(); YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } if (domainId != null && domainId.length() > 0 && toCreateDomain) { prepareTimelineDomain(); } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); GetNewApplicationResponse appResponse = app.getNewApplicationResponse(); // TODO get min/max resource capabilities from RM and change memory ask if needed // If we do not have min/max, we may not be able to correctly request // the required resources from the RM for the app master // Memory ask has to be a multiple of min and less than max. // Dump out information about cluster capability as seen by the resource manager int maxMem = appResponse.getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); // A resource ask cannot exceed the max. if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores(); LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores); if (amVCores > maxVCores) { LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value." + ", specified=" + amVCores + ", max=" + maxVCores); amVCores = maxVCores; } // set the application name ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); if (attemptFailuresValidityInterval >= 0) { appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LOG.info("Copy App Master jar from local filesystem and add to local environment"); // Copy the application master jar to the filesystem // Create a local resource to point to the destination jar path FileSystem fs = FileSystem.get(conf); addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null); // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null); } // The shell script has to be made available on the final container(s) // where it will be executed. // To do this, we need to first copy into the filesystem that is visible // to the yarn framework. // We do not need to set this as a local resource for the application // master as the application master does not need it. String hdfsShellScriptLocation = ""; long hdfsShellScriptLen = 0; long hdfsShellScriptTimestamp = 0; if (!shellScriptPath.isEmpty()) { Path shellSrc = new Path(shellScriptPath); String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH; Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); fs.copyFromLocalFile(false, true, shellSrc, shellDst); hdfsShellScriptLocation = shellDst.toUri().toString(); FileStatus shellFileStatus = fs.getFileStatus(shellDst); hdfsShellScriptLen = shellFileStatus.getLen(); hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); } if (!shellCommand.isEmpty()) { addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand); } if (shellArgs.length > 0) { addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources, StringUtils.join(shellArgs, " ")); } // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // put location of shell script into env // using the env info, the application master will create the correct local resource for the // eventual containers that will be launched to execute the shell scripts env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); if (domainId != null && domainId.length() > 0) { env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId); } // Add AppMaster.jar location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$()) .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) { classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR); classPathEnv.append(c.trim()); } classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties"); // add the runtime classpath needed for tests to work if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) { classPathEnv.append(':'); classPathEnv.append(System.getProperty("java.class.path")); } env.put("CLASSPATH", classPathEnv.toString()); // Set the necessary command to execute the application master Vector<CharSequence> vargs = new Vector<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(Environment.JAVA_HOME.$$() + "/bin/java"); // Set Xmx based on am memory size vargs.add("-Xmx" + amMemory + "m"); // Set class name vargs.add(appMasterMainClass); // Set params for Application Master vargs.add("--container_memory " + String.valueOf(containerMemory)); vargs.add("--container_vcores " + String.valueOf(containerVirtualCores)); vargs.add("--num_containers " + String.valueOf(numContainers)); if (null != nodeLabelExpression) { appContext.setNodeLabelExpression(nodeLabelExpression); } vargs.add("--priority " + String.valueOf(shellCmdPriority)); for (Map.Entry<String, String> entry : shellEnv.entrySet()) { vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); } if (debugFlag) { vargs.add("--debug"); } vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); // Set up the container launch context for the application master ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null, null, null); // Set up resource type requirements // For now, both memory and vcores are supported, so we set memory and // vcores requirements Resource capability = Resource.newInstance(amMemory, amVCores); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Set the priority for the application master // TODO - what is the range for priority? how to decide? Priority pri = Priority.newInstance(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(amQueue); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); yarnClient.submitApplication(appContext); // TODO // Try submitting the same request again // app submission failure? Thread t = new Thread(new LogReceiver()); t.start(); // Monitor the application return monitorApplication(appId); }
From source file:com.inmobi.conduit.distcp.MergedStreamService.java
License:Apache License
private List<FileStatus> recursiveListingOfDir(FileSystem currentFs, Path path) { try {// w w w . ja v a2 s .c om FileStatus streamDir = currentFs.getFileStatus(path); List<FileStatus> filestatus = new ArrayList<FileStatus>(); createListing(currentFs, streamDir, filestatus); return filestatus; } catch (IOException ie) { LOG.error("IOException while doing recursive listing to create checkpoint on " + "cluster filesystem" + currentFs.getUri(), ie); } return null; }
From source file:com.inmobi.conduit.distcp.MirrorStreamService.java
License:Apache License
private Path getFirstOrLastPath(FileSystem fs, Path streamFinalDestDir, boolean returnLast) throws IOException { if (!fs.exists(streamFinalDestDir)) return null; FileStatus streamRoot;//www . jav a2 s . co m List<FileStatus> streamPaths = new ArrayList<FileStatus>(); streamRoot = fs.getFileStatus(streamFinalDestDir); recursiveListingTillMinuteDir(fs, streamRoot, streamPaths, 0); if (streamPaths.size() == 0) return null; DatePathComparator comparator = new DatePathComparator(); FileStatus result = streamPaths.get(0); for (int i = 0; i < streamPaths.size(); i++) { FileStatus current = streamPaths.get(i); if (returnLast && comparator.compare(current, result) > 0) result = current; else if (!returnLast && comparator.compare(current, result) < 0) result = current; } if (!result.isDir()) return result.getPath().getParent(); else return result.getPath(); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void preserveFileAttributes(Configuration conf) throws IOException { String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); LOG.info("About to preserve attributes: " + attrSymbols); EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf); long totalLen = clusterFS.getFileStatus(sourceListing).getLen(); Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); long preservedEntries = 0; try {/*from ww w . j a va2 s . co m*/ FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); while (sourceReader.next(srcRelPath, srcFileStatus)) { if (!srcFileStatus.isDir()) continue; Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath); //Skip the root folder, preserve the status after atomic commit is complete //If it is changed any earlier, then atomic commit may fail if (targetRoot.equals(targetFile)) continue; FileSystem targetFS = targetFile.getFileSystem(conf); DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes); HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. [" + sourceReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); } LOG.info("Preserved status on " + preservedEntries + " dir entries on target"); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteMissing(Configuration conf) throws IOException { LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source"); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); CopyListing target = new GlobbedCopyListing(conf, null); List<Path> targets = new ArrayList<Path>(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath);//from w w w . j a v a 2s . c o m DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); target.buildListing(targetListing, options); Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing); long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen(); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf); SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf); long deletedEntries = 0; try { FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); FileStatus trgtFileStatus = new FileStatus(); Text trgtRelPath = new Text(); FileSystem targetFS = targetFinalPath.getFileSystem(conf); boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); while (targetReader.next(trgtRelPath, trgtFileStatus)) { while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) { srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); } if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue; boolean result = (!targetFS.exists(trgtFileStatus.getPath()) || targetFS.delete(trgtFileStatus.getPath(), true)); if (result) { LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source"); deletedEntries++; } else { throw new IOException("Unable to delete " + trgtFileStatus.getPath()); } HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); IOUtils.closeStream(targetReader); } LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0)); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
/** * Implementation of the Mapper<>::map(). Does the copy. * @param relPath: The target path.// w w w . j av a 2 s. co m * @param sourceFileStatus: The source path. * @throws IOException */ @Override public void map(Text relPath, FileStatus sourceFileStatus, Context context) throws IOException, InterruptedException { Path sourcePath = sourceFileStatus.getPath(); Map<Long, Long> received = null; if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) { received = new HashMap<Long, Long>(); } if (LOG.isDebugEnabled()) LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath); Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString()); EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context); final String description = "Copying " + sourcePath + " to " + target; context.setStatus(description); LOG.info(description); try { FileStatus sourceCurrStatus; FileSystem sourceFS; try { sourceFS = sourcePath.getFileSystem(conf); sourceCurrStatus = sourceFS.getFileStatus(sourcePath); } catch (FileNotFoundException e) { throw new IOException(new RetriableFileCopyCommand.CopyReadException(e)); } FileStatus targetStatus = null; try { targetStatus = targetFS.getFileStatus(target); } catch (FileNotFoundException ignore) { } if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) { throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus)); } if (sourceCurrStatus.isDir()) { createTargetDirsWithRetry(description, target, context); return; } if (skipFile(sourceFS, sourceCurrStatus, target)) { LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target); updateSkipCounters(context, sourceCurrStatus); } else { String streamName = null; if (!relPath.toString().isEmpty()) { Path relativePath = new Path(relPath.toString()); if (relativePath.depth() > 2) { // path is for mirror service and is of format // /conduit/streams/<streamName>/2013/09/12 Path tmpPath = relativePath; while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) { tmpPath = tmpPath.getParent(); } streamName = tmpPath.getName(); } else { // path is for merge service and of form /<stream name>/filename.gz streamName = relativePath.getParent().getName(); } } copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received); // generate audit counters if (received != null) { for (Entry<Long, Long> entry : received.entrySet()) { String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(), entry.getKey(), entry.getValue()); context.write(NullWritable.get(), new Text(counterNameValue)); } } } DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes); } catch (IOException exception) { handleFailures(exception, sourceFileStatus, target, context); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java
License:Apache License
private void compareFileLengths(FileStatus sourceFileStatus, Path target, Configuration configuration, long bytesRead) throws IOException { final Path sourcePath = sourceFileStatus.getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); if (fs.getFileStatus(sourcePath).getLen() != bytesRead) throw new IOException("Mismatch in length of source:" + sourcePath + " and target:" + target); }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
private static void touchFile(String path) throws Exception { FileSystem fs; DataOutputStream outputStream = null; GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfiguration()); Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec); OutputStream compressedOut = null; try {//from ww w. j a v a 2 s . com fs = cluster.getFileSystem(); final Path qualifiedPath = new Path(path).makeQualified(fs); final long blockSize = fs.getDefaultBlockSize() * 2; outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize); compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor); Message msg = new Message("generating test data".getBytes()); AuditUtil.attachHeaders(msg, currentTimestamp); byte[] encodeMsg = Base64.encodeBase64(msg.getData().array()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); // Genearate a msg with different timestamp. Default window period is 60sec AuditUtil.attachHeaders(msg, nextMinuteTimeStamp); encodeMsg = Base64.encodeBase64(msg.getData().array()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); compressedOut.flush(); compressedOut.close(); pathList.add(qualifiedPath); ++nFiles; FileStatus fileStatus = fs.getFileStatus(qualifiedPath); System.out.println(fileStatus.getBlockSize()); System.out.println(fileStatus.getReplication()); } finally { compressedOut.close(); IOUtils.cleanup(null, outputStream); CodecPool.returnCompressor(gzipCompressor); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
@Test public void testRun() { try {//from w w w. j av a 2 s . com deleteState(); createSourceData(); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StatusReporter reporter = new StubStatusReporter(); InMemoryWriter writer = new InMemoryWriter(); Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter, writer); copyMapper.setup(context); for (Path path : pathList) { copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fs.getFileStatus(path), context); } // Check that the maps worked. for (Path path : pathList) { final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH)); Assert.assertTrue(fs.exists(targetPath)); Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path)); Assert.assertEquals(fs.getFileStatus(path).getReplication(), fs.getFileStatus(targetPath).getReplication()); Assert.assertEquals(fs.getFileStatus(path).getBlockSize(), fs.getFileStatus(targetPath).getBlockSize()); Assert.assertTrue( !fs.isFile(targetPath) || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path))); } Assert.assertEquals(pathList.size(), reporter.getCounter(CopyMapper.Counter.PATHS_COPIED).getValue()); // Here file is compressed file. So, we should compare the file length // with the number of bytes read long totalSize = 0; for (Path path : pathList) { totalSize += fs.getFileStatus(path).getLen(); } Assert.assertEquals(totalSize, reporter.getCounter(CopyMapper.Counter.BYTES_COPIED).getValue()); long totalCounterValue = 0; for (Text value : writer.values()) { String tmp[] = value.toString().split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); totalCounterValue += numOfMsgs; } Assert.assertEquals(nFiles * NUMBER_OF_MESSAGES_PER_FILE, totalCounterValue); testCopyingExistingFiles(fs, copyMapper, context); } catch (Exception e) { LOG.error("Unexpected exception: ", e); Assert.assertTrue(false); } }