List of usage examples for org.apache.hadoop.fs FileSystem getWorkingDirectory
public abstract Path getWorkingDirectory();
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
protected Path getCurrentFile(Context context) throws IOException { InputSplit split = context.getInputSplit(); if (split != null && split instanceof FileSplit) { FileSplit inputSplit = (FileSplit) split; Path path = inputSplit.getPath(); FileSystem fileSystem = path.getFileSystem(context.getConfiguration()); return path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()); }/*from www. java 2s .c om*/ return null; }
From source file:org.apache.flink.yarn.YarnFileStageTest.java
License:Apache License
/** * Verifies that nested directories are properly copied with a <tt>hdfs://</tt> file * system (from a <tt>file:///absolute/path</tt> source path). *//*w w w .jav a 2 s . c o m*/ @Test public void testCopyFromLocalRecursiveWithScheme() throws Exception { final FileSystem targetFileSystem = hdfsRootPath.getFileSystem(hadoopConfig); final Path targetDir = targetFileSystem.getWorkingDirectory(); testCopyFromLocalRecursive(targetFileSystem, targetDir, tempFolder, true); }
From source file:org.apache.flink.yarn.YarnFileStageTest.java
License:Apache License
/** * Verifies that nested directories are properly copied with a <tt>hdfs://</tt> file * system (from a <tt>/absolute/path</tt> source path). *//* w w w . java2s. c o m*/ @Test public void testCopyFromLocalRecursiveWithoutScheme() throws Exception { final FileSystem targetFileSystem = hdfsRootPath.getFileSystem(hadoopConfig); final Path targetDir = targetFileSystem.getWorkingDirectory(); testCopyFromLocalRecursive(targetFileSystem, targetDir, tempFolder, false); }
From source file:org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java
License:Apache License
/** * Write the contents of input stream into staging path. * * <p>/*from w w w . j a v a 2 s . c o m*/ * WriteAt indicates the path where the contents of the input stream should be written. When this method is called, * the path writeAt.getParent() will exist already, but the path writeAt will not exist. When this method is returned, * the path writeAt must exist. Any data written to any location other than writeAt or a descendant of writeAt * will be ignored. * </p> * * @param inputStream {@link FSDataInputStream} whose contents should be written to staging path. * @param writeAt {@link Path} at which contents should be written. * @param copyableFile {@link org.apache.gobblin.data.management.copy.CopyEntity} that generated this copy operation. * @param record The actual {@link FileAwareInputStream} passed to the write method. * @throws IOException */ protected void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile, FileAwareInputStream record) throws IOException { final short replication = this.state.getPropAsShort(ConfigurationKeys.WRITER_FILE_REPLICATION_FACTOR, copyableFile.getReplication(this.fs)); final long blockSize = copyableFile.getBlockSize(this.fs); final long fileSize = copyableFile.getFileStatus().getLen(); long expectedBytes = fileSize; Long maxBytes = null; // Whether writer must write EXACTLY maxBytes. boolean mustMatchMaxBytes = false; if (record.getSplit().isPresent()) { maxBytes = record.getSplit().get().getHighPosition() - record.getSplit().get().getLowPosition(); if (record.getSplit().get().isLastSplit()) { expectedBytes = fileSize % blockSize; mustMatchMaxBytes = false; } else { expectedBytes = maxBytes; mustMatchMaxBytes = true; } } Predicate<FileStatus> fileStatusAttributesFilter = new Predicate<FileStatus>() { @Override public boolean apply(FileStatus input) { return input.getReplication() == replication && input.getBlockSize() == blockSize; } }; Optional<FileStatus> persistedFile = this.recoveryHelper.findPersistedFile(this.state, copyableFile, fileStatusAttributesFilter); if (persistedFile.isPresent()) { log.info(String.format("Recovering persisted file %s to %s.", persistedFile.get().getPath(), writeAt)); this.fs.rename(persistedFile.get().getPath(), writeAt); } else { // Copy empty directories if (copyableFile.getFileStatus().isDirectory()) { this.fs.mkdirs(writeAt); return; } OutputStream os = this.fs.create(writeAt, true, this.fs.getConf().getInt("io.file.buffer.size", 4096), replication, blockSize); if (encryptionConfig != null) { os = EncryptionFactory.buildStreamCryptoProvider(encryptionConfig).encodeOutputStream(os); } try { FileSystem defaultFS = FileSystem.get(new Configuration()); StreamThrottler<GobblinScopeTypes> throttler = this.taskBroker .getSharedResource(new StreamThrottler.Factory<GobblinScopeTypes>(), new EmptyKey()); ThrottledInputStream throttledInputStream = throttler.throttleInputStream().inputStream(inputStream) .sourceURI(copyableFile.getOrigin().getPath() .makeQualified(defaultFS.getUri(), defaultFS.getWorkingDirectory()).toUri()) .targetURI(this.fs.makeQualified(writeAt).toUri()).build(); StreamCopier copier = new StreamCopier(throttledInputStream, os, maxBytes) .withBufferSize(this.bufferSize); log.info("File {}: Starting copy", copyableFile.getOrigin().getPath()); if (isInstrumentationEnabled()) { copier.withCopySpeedMeter(this.copySpeedMeter); } long numBytes = copier.copy(); if ((this.checkFileSize || mustMatchMaxBytes) && numBytes != expectedBytes) { throw new IOException(String.format("Incomplete write: expected %d, wrote %d bytes.", expectedBytes, numBytes)); } this.bytesWritten.addAndGet(numBytes); if (isInstrumentationEnabled()) { log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate()); } else { log.info("File {} copied.", copyableFile.getOrigin().getPath()); } } catch (NotConfiguredException nce) { log.warn("Broker error. Some features of stream copier may not be available.", nce); } finally { os.close(); inputStream.close(); } } }
From source file:org.apache.hadoop.examples.Sort.java
License:Apache License
/** * The main driver for sort program.//from w ww . j ava2 s. c om * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = conf.get(REDUCES_PER_HOST); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job = Job.getInstance(conf); job.setJobName("sorter"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(num_reduces); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(job, otherArgs.get(0)); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); job.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(job)[0]; FileSystem fs = inputDir.getFileSystem(conf); inputDir = inputDir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(conf, partitionFile); InputSampler.<K, V>writePartitionFile(job, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); job.addCacheFile(partitionUri); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:org.apache.hama.bsp.BSPJobClient.java
License:Apache License
public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException { BSPJob job = pJob;/*from w w w .j av a2 s .co m*/ job.setJobID(jobId); int maxTasks; int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask()); ClusterStatus clusterStatus = getClusterStatus(true); // Re-adjust the maxTasks based on cluster status. if (clusterStatus != null) { maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks(); if (configured > maxTasks) { LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with " + (maxTasks) + " tasks."); job.setNumBspTask(maxTasks); } } else { maxTasks = configured; } Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36)); Path submitSplitFile = new Path(submitJobDir, "job.split"); Path submitJarFile = new Path(submitJobDir, "job.jar"); Path submitJobFile = new Path(submitJobDir, "job.xml"); LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir); FileSystem fs = getFs(); // Create a number of filenames in the BSPMaster's fs namespace fs.delete(submitJobDir, true); submitJobDir = fs.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION); FileSystem.mkdirs(fs, submitJobDir, bspSysPerms); fs.mkdirs(submitJobDir); short replication = (short) job.getInt("bsp.submit.replication", 10); // only create the splits if we have an input if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) { // Create the splits for the job LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile)); InputSplit[] splits = job.getInputFormat().getSplits(job, (maxTasks > configured) ? configured : maxTasks); if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) { LOG.info("Run pre-partitioning job"); job = partition(job, splits, maxTasks); maxTasks = job.getInt("hama.partition.count", maxTasks); } if (job.getBoolean("input.has.partitioned", false)) { splits = job.getInputFormat().getSplits(job, maxTasks); } if (maxTasks < splits.length) { throw new IOException( "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: " + splits.length + ", The number of max tasks: " + maxTasks); } int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks); if (numOfSplits > configured || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) { job.setNumBspTask(numOfSplits); } job.set("bsp.job.split.file", submitSplitFile.toString()); } String originalJarPath = job.getJar(); if (originalJarPath != null) { // copy jar to BSPMaster's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(originalJarPath).getName()); } job.setJar(submitJarFile.toString()); fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile); fs.setReplication(submitJarFile, replication); fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION)); } else { LOG.warn("No job jar file set. User classes may not be found. " + "See BSPJob#setJar(String) or check Your jar file."); } // Set the user's name and working directory job.setUser(getUnixUserName()); job.set("group.name", getUnixUserGroupName(job.getUser())); if (job.getWorkingDirectory() == null) { job.setWorkingDirectory(fs.getWorkingDirectory()); } // Write job file to BSPMaster's fs FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION)); try { job.writeXml(out); } finally { out.close(); } return launchJob(jobId, job, submitJobFile, fs); }
From source file:org.apache.hama.bsp.BSPTaskLauncher.java
License:Apache License
private GetContainerStatusesRequest setupContainer(Container allocatedContainer, ContainerManagementProtocol cm, String user, int id) throws IOException, YarnException { LOG.info("Setting up a container for user " + user + " with id of " + id + " and containerID of " + allocatedContainer.getId() + " as " + user); // Now we setup a ContainerLaunchContext ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class); // Set the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); LocalResource packageResource = Records.newRecord(LocalResource.class); FileSystem fs = FileSystem.get(conf); Path packageFile = new Path(System.getenv(YARNBSPConstants.HAMA_YARN_LOCATION)); URL packageUrl = ConverterUtils .getYarnUrlFromPath(packageFile.makeQualified(fs.getUri(), fs.getWorkingDirectory())); LOG.info("PackageURL has been composed to " + packageUrl.toString()); try {//from w w w . j a v a 2 s . c om LOG.info("Reverting packageURL to path: " + ConverterUtils.getPathFromYarnURL(packageUrl)); } catch (URISyntaxException e) { LOG.fatal("If you see this error the workarround does not work", e); } packageResource.setResource(packageUrl); packageResource.setSize(Long.parseLong(System.getenv(YARNBSPConstants.HAMA_YARN_SIZE))); packageResource.setTimestamp(Long.parseLong(System.getenv(YARNBSPConstants.HAMA_YARN_TIMESTAMP))); packageResource.setType(LocalResourceType.FILE); packageResource.setVisibility(LocalResourceVisibility.APPLICATION); localResources.put(YARNBSPConstants.APP_MASTER_JAR_PATH, packageResource); Path hamaReleaseFile = new Path(System.getenv(YARNBSPConstants.HAMA_RELEASE_LOCATION)); URL hamaReleaseUrl = ConverterUtils .getYarnUrlFromPath(hamaReleaseFile.makeQualified(fs.getUri(), fs.getWorkingDirectory())); LOG.info("Hama release URL has been composed to " + hamaReleaseUrl.toString()); LocalResource hamaReleaseRsrc = Records.newRecord(LocalResource.class); hamaReleaseRsrc.setResource(hamaReleaseUrl); hamaReleaseRsrc.setSize(Long.parseLong(System.getenv(YARNBSPConstants.HAMA_RELEASE_SIZE))); hamaReleaseRsrc.setTimestamp(Long.parseLong(System.getenv(YARNBSPConstants.HAMA_RELEASE_TIMESTAMP))); hamaReleaseRsrc.setType(LocalResourceType.ARCHIVE); hamaReleaseRsrc.setVisibility(LocalResourceVisibility.APPLICATION); localResources.put(YARNBSPConstants.HAMA_SYMLINK, hamaReleaseRsrc); ctx.setLocalResources(localResources); /* * TODO Package classpath seems not to work if you're in pseudo distributed * mode, because the resource must not be moved, it will never be unpacked. * So we will check if our jar file has the file:// prefix and put it into * the CP directly */ StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$()) .append(File.pathSeparatorChar).append("./*"); for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } classPathEnv.append(File.pathSeparator); classPathEnv .append("./" + YARNBSPConstants.HAMA_SYMLINK + "/" + YARNBSPConstants.HAMA_RELEASE_VERSION + "/*"); classPathEnv.append(File.pathSeparator); classPathEnv.append( "./" + YARNBSPConstants.HAMA_SYMLINK + "/" + YARNBSPConstants.HAMA_RELEASE_VERSION + "/lib/*"); Vector<CharSequence> vargs = new Vector<CharSequence>(); vargs.add("${JAVA_HOME}/bin/java"); vargs.add("-cp " + classPathEnv + ""); vargs.add(BSPRunner.class.getCanonicalName()); vargs.add(jobId.getJtIdentifier()); vargs.add(Integer.toString(id)); vargs.add(this.jobFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/bsp.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/bsp.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } List<String> commands = new ArrayList<String>(); commands.add(command.toString()); ctx.setCommands(commands); LOG.info("Starting command: " + commands); StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class); startReq.setContainerLaunchContext(ctx); startReq.setContainerToken(allocatedContainer.getContainerToken()); List<StartContainerRequest> list = new ArrayList<StartContainerRequest>(); list.add(startReq); StartContainersRequest requestList = StartContainersRequest.newInstance(list); cm.startContainers(requestList); GetContainerStatusesRequest statusReq = Records.newRecord(GetContainerStatusesRequest.class); List<ContainerId> containerIds = new ArrayList<ContainerId>(); containerIds.add(allocatedContainer.getId()); statusReq.setContainerIds(containerIds); return statusReq; }
From source file:org.apache.hama.bsp.YARNBSPJobClient.java
License:Apache License
@Override protected RunningJob launchJob(BSPJobID jobId, BSPJob normalJob, Path submitJobFile, FileSystem pFs) throws IOException { YARNBSPJob job = (YARNBSPJob) normalJob; LOG.info("Submitting job..."); if (getConf().get("bsp.child.mem.in.mb") == null) { LOG.warn("BSP Child memory has not been set, YARN will guess your needs or use default values."); }//w w w. ja v a2 s .c o m FileSystem fs = pFs; if (fs == null) { fs = FileSystem.get(getConf()); } if (getConf().get("bsp.user.name") == null) { String s = getUnixUserName(); getConf().set("bsp.user.name", s); LOG.debug("Retrieved username: " + s); } yarnClient.start(); try { YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING); LOG.info("Got Cluster node info from ASM"); for (NodeReport node : clusterNodeReports) { LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress" + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers" + node.getNumContainers()); } QueueInfo queueInfo = yarnClient.getQueueInfo("default"); LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication app = yarnClient.createApplication(); // Create a new ApplicationSubmissionContext //ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext(); id = appContext.getApplicationId(); // set the application name appContext.setApplicationName(job.getJobName()); // Create a new container launch context for the AM's container ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Define the local resources required Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // Lets assume the jar we need for our ApplicationMaster is available in // HDFS at a certain known path to us and we want to make it available to // the ApplicationMaster in the launched container if (job.getJar() == null) { throw new IllegalArgumentException("Jar must be set in order to run the application!"); } Path jarPath = new Path(job.getJar()); jarPath = fs.makeQualified(jarPath); getConf().set("bsp.jar", jarPath.makeQualified(fs.getUri(), jarPath).toString()); FileStatus jarStatus = fs.getFileStatus(jarPath); LocalResource amJarRsrc = Records.newRecord(LocalResource.class); amJarRsrc.setType(LocalResourceType.FILE); amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(jarPath)); amJarRsrc.setTimestamp(jarStatus.getModificationTime()); amJarRsrc.setSize(jarStatus.getLen()); // this creates a symlink in the working directory localResources.put(YARNBSPConstants.APP_MASTER_JAR_PATH, amJarRsrc); // add hama related jar files to localresources for container List<File> hamaJars; if (System.getProperty("hama.home.dir") != null) hamaJars = localJarfromPath(System.getProperty("hama.home.dir")); else hamaJars = localJarfromPath(getConf().get("hama.home.dir")); String hamaPath = getSystemDir() + "/hama"; for (File fileEntry : hamaJars) { addToLocalResources(fs, fileEntry.getCanonicalPath(), hamaPath, fileEntry.getName(), localResources); } // Set the local resources into the launch context amContainer.setLocalResources(localResources); // Set up the environment needed for the launch context Map<String, String> env = new HashMap<String, String>(); // Assuming our classes or jars are available as local resources in the // working directory from which the command will be run, we need to append // "." to the path. // By default, all the hadoop specific classpaths will already be available // in $CLASSPATH, so we should be careful not to overwrite it. StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$()) .append(File.pathSeparatorChar).append("./*"); for (String c : yarnConf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) { classPathEnv.append(File.pathSeparatorChar); classPathEnv.append(c.trim()); } env.put(YARNBSPConstants.HAMA_YARN_LOCATION, jarPath.toUri().toString()); env.put(YARNBSPConstants.HAMA_YARN_SIZE, Long.toString(jarStatus.getLen())); env.put(YARNBSPConstants.HAMA_YARN_TIMESTAMP, Long.toString(jarStatus.getModificationTime())); env.put(YARNBSPConstants.HAMA_LOCATION, hamaPath); env.put("CLASSPATH", classPathEnv.toString()); amContainer.setEnvironment(env); // Set the necessary command to execute on the allocated container Vector<CharSequence> vargs = new Vector<CharSequence>(5); vargs.add("${JAVA_HOME}/bin/java"); vargs.add("-cp " + classPathEnv + ""); vargs.add(ApplicationMaster.class.getCanonicalName()); vargs.add(submitJobFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stderr"); // Get final commmand StringBuilder command = new StringBuilder(); for (CharSequence str : vargs) { command.append(str).append(" "); } List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); LOG.debug("Start command: " + command); Resource capability = Records.newRecord(Resource.class); // we have at least 3 threads, which comsumes 1mb each, for each bsptask and // a base usage of 100mb capability.setMemory(3 * job.getNumBspTask() + getConf().getInt("hama.appmaster.memory.mb", 100)); LOG.info("Set memory for the application master to " + capability.getMemory() + "mb!"); // Set the container launch content into the ApplicationSubmissionContext appContext.setResource(capability); // Setup security tokens if (UserGroupInformation.isSecurityEnabled()) { // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce Credentials credentials = new Credentials(); String tokenRenewer = yarnConf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } appContext.setAMContainerSpec(amContainer); // Create the request to send to the ApplicationsManager ApplicationId appId = appContext.getApplicationId(); yarnClient.submitApplication(appContext); return monitorApplication(appId) ? new NetworkedJob() : null; } catch (YarnException e) { e.printStackTrace(); return null; } }
From source file:org.apache.hama.pipes.util.DistributedCacheUtil.java
License:Apache License
/** * Add the Files to HDFS//ww w. j ava 2 s . c om * * @param conf The job's configuration * @param files Paths that should be transfered to HDFS */ public static String addFilesToHDFS(Configuration conf, String files) { if (files == null) return null; String[] fileArr = files.split(","); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; URI pathURI; try { pathURI = new URI(tmp); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } try { LocalFileSystem local = LocalFileSystem.getLocal(conf); Path pathSrc = new Path(pathURI); // LOG.info("pathSrc: " + pathSrc); if (local.exists(pathSrc)) { FileSystem hdfs = FileSystem.get(conf); Path pathDst = new Path(hdfs.getWorkingDirectory() + "/temp", pathSrc.getName()); // LOG.info("WorkingDirectory: " + hdfs.getWorkingDirectory()); LOG.debug("copyToHDFSFile: " + pathDst); hdfs.copyFromLocalFile(pathSrc, pathDst); hdfs.deleteOnExit(pathDst); finalPath = pathDst.makeQualified(hdfs).toString(); finalArr[i] = finalPath; } } catch (IOException e) { LOG.error(e); } } return StringUtils.arrayToString(finalArr); }
From source file:org.apache.mahout.classifier.chi_rwcs.mapreduce.Builder.java
License:Apache License
/** * Output Directory name/*from w w w . j ava2 s . co m*/ * * @param conf * configuration * @return output dir. path (%WORKING_DIRECTORY%/OUTPUT_DIR_NAME%) * @throws IOException * if we cannot get the default FileSystem */ protected Path getOutputPath(Configuration conf) throws IOException { // the output directory is accessed only by this class, so use the default // file system FileSystem fs = FileSystem.get(conf); return new Path(fs.getWorkingDirectory(), outputDirName); }