List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
@Override protected void startUp() throws Exception { // Creates a temporary directory locally for storing all generated files. File tempDir = createTempDirectory(); cleanupTask = createCleanupTask(tempDir); try {// w w w. j av a 2 s.c om Job job = createJob(new File(tempDir, "mapreduce")); Configuration mapredConf = job.getConfiguration(); classLoader = new MapReduceClassLoader(injector, cConf, mapredConf, context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator()); cleanupTask = createCleanupTask(cleanupTask, classLoader); mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader)); ClassLoaders.setContextClassLoader(mapredConf.getClassLoader()); context.setJob(job); beforeSubmit(job); // Localize additional resources that users have requested via BasicMapReduceContext.localize methods Map<String, String> localizedUserResources = localizeUserResources(job, tempDir); // Override user-defined job name, since we set it and depend on the name. // https://issues.cask.co/browse/CDAP-2441 String jobName = job.getJobName(); if (!jobName.isEmpty()) { LOG.warn("Job name {} is being overridden.", jobName); } job.setJobName(getJobName(context)); // Create a temporary location for storing all generated files through the LocationFactory. Location tempLocation = createTempLocationDirectory(); cleanupTask = createCleanupTask(cleanupTask, tempLocation); // For local mode, everything is in the configuration classloader already, hence no need to create new jar if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper // ClassLoader from here and use it for setting up the job Location pluginArchive = createPluginArchive(tempLocation); if (pluginArchive != null) { job.addCacheArchive(pluginArchive.toURI()); mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName()); } } // set resources for the job TaskType.MAP.setResources(mapredConf, context.getMapperResources()); TaskType.REDUCE.setResources(mapredConf, context.getReducerResources()); // replace user's Mapper & Reducer's with our wrappers in job config MapperWrapper.wrap(job); ReducerWrapper.wrap(job); // packaging job jar which includes cdap classes with dependencies File jobJar = buildJobJar(job, tempDir); job.setJar(jobJar.toURI().toString()); Location programJar = programJarLocation; if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // Copy and localize the program jar in distributed mode programJar = copyProgramJar(tempLocation); job.addCacheFile(programJar.toURI()); List<String> classpath = new ArrayList<>(); // Localize logback.xml Location logbackLocation = createLogbackJar(tempLocation); if (logbackLocation != null) { job.addCacheFile(logbackLocation.toURI()); classpath.add(logbackLocation.getName()); } // Generate and localize the launcher jar to control the classloader of MapReduce containers processes classpath.add("job.jar/lib/*"); classpath.add("job.jar/classes"); Location launcherJar = createLauncherJar( Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)), tempLocation); job.addCacheFile(launcherJar.toURI()); // The only thing in the container classpath is the launcher.jar // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch // the actual MapReduce AM/Task from that // We explicitly localize the mr-framwork, but not use it with the classpath URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf); if (frameworkURI != null) { job.addCacheArchive(frameworkURI); } mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH); mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName()); mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName()); } MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf); // We start long-running tx to be used by mapreduce job tasks. Transaction tx = txClient.startLong(); try { // We remember tx, so that we can re-use it in mapreduce tasks CConfiguration cConfCopy = cConf; contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources); LOG.info("Submitting MapReduce Job: {}", context); // submits job and returns immediately. Shouldn't need to set context ClassLoader. job.submit(); this.job = job; this.transaction = tx; } catch (Throwable t) { Transactions.invalidateQuietly(txClient, tx); throw t; } } catch (Throwable t) { LOG.error("Exception when submitting MapReduce Job: {}", context, t); cleanupTask.run(); throw t; } }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
/** * Creates a MapReduce {@link Job} instance. * * @param hadoopTmpDir directory for the "hadoop.tmp.dir" configuration *///ww w .j a v a2 s . c o m private Job createJob(File hadoopTmpDir) throws IOException { Job job = Job.getInstance(new Configuration(hConf)); Configuration jobConf = job.getConfiguration(); if (MapReduceTaskContextProvider.isLocal(jobConf)) { // Set the MR framework local directories inside the given tmp directory. // Setting "hadoop.tmp.dir" here has no effect due to Explore Service need to set "hadoop.tmp.dir" // as system property for Hive to work in local mode. The variable substitution of hadoop conf // gives system property the highest precedence. jobConf.set("mapreduce.cluster.local.dir", new File(hadoopTmpDir, "local").getAbsolutePath()); jobConf.set("mapreduce.jobtracker.system.dir", new File(hadoopTmpDir, "system").getAbsolutePath()); jobConf.set("mapreduce.jobtracker.staging.root.dir", new File(hadoopTmpDir, "staging").getAbsolutePath()); jobConf.set("mapreduce.cluster.temp.dir", new File(hadoopTmpDir, "temp").getAbsolutePath()); } if (UserGroupInformation.isSecurityEnabled()) { // If runs in secure cluster, this program runner is running in a yarn container, hence not able // to get authenticated with the history. jobConf.unset("mapreduce.jobhistory.address"); jobConf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false); Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); LOG.info("Running in secure mode; adding all user credentials: {}", credentials.getAllTokens()); job.getCredentials().addAll(credentials); } return job; }
From source file:co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java
License:Apache License
/** * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined. * @param job The MapReduce job//from w w w .ja va 2s . co m */ public static void wrap(Job job) { // NOTE: we don't use job.getReducerClass() as we don't need to load user class here Configuration conf = job.getConfiguration(); String reducerClass = conf.get(MRJobConfig.REDUCE_CLASS_ATTR); if (reducerClass != null) { conf.set(ReducerWrapper.ATTR_REDUCER_CLASS, reducerClass); job.setReducerClass(ReducerWrapper.class); } }
From source file:co.cask.cdap.internal.app.runtime.batch.WrapperUtil.java
License:Apache License
static boolean setIfDefined(Job job, String srcKey, String destinationKey) { // NOTE: we don't use job.getXClass or conf.getClass as we don't need to load user class here Configuration conf = job.getConfiguration(); String srcVal = conf.get(srcKey); if (srcVal != null) { conf.set(destinationKey, srcVal); return true; }/* w w w. ja v a2 s .c o m*/ return false; }
From source file:co.cask.cdap.internal.app.runtime.distributed.DistributedSparkProgramRunner.java
License:Apache License
private static Configuration createConfiguration(Configuration hConf) { Configuration configuration = new Configuration(hConf); configuration.set(SparkContextConfig.HCONF_ATTR_EXECUTION_MODE, SparkContextConfig.YARN_EXECUTION_MODE); return configuration; }
From source file:co.cask.cdap.internal.app.runtime.spark.AbstractSparkContext.java
License:Apache License
/** * Sets the input {@link Dataset} with splits in the {@link Configuration} * * @param datasetName the name of the {@link Dataset} to read from * @return updated {@link Configuration} * @throws {@link IllegalArgumentException} if the {@link Dataset} to read is not {@link BatchReadable} */// w ww .j a va 2s . c o m Configuration setInputDataset(String datasetName) { Configuration hConf = new Configuration(getHConf()); Dataset dataset = basicSparkContext.getDataSet(datasetName); List<Split> inputSplits; if (dataset instanceof BatchReadable) { BatchReadable curDataset = (BatchReadable) dataset; inputSplits = curDataset.getSplits(); } else { throw new IllegalArgumentException("Failed to read dataset " + datasetName + ". The dataset does not implement" + " BatchReadable"); } hConf.setClass(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SparkDatasetInputFormat.class, InputFormat.class); hConf.set(SparkDatasetInputFormat.HCONF_ATTR_INPUT_DATASET, datasetName); hConf.set(SparkContextConfig.HCONF_ATTR_INPUT_SPLIT_CLASS, inputSplits.get(0).getClass().getName()); hConf.set(SparkContextConfig.HCONF_ATTR_INPUT_SPLITS, new Gson().toJson(inputSplits)); return hConf; }
From source file:co.cask.cdap.internal.app.runtime.spark.AbstractSparkContext.java
License:Apache License
/** * Sets the output {@link Dataset} with splits in the {@link Configuration} * * @param datasetName the name of the {@link Dataset} to write to * @return updated {@link Configuration} *//* ww w . j a v a2s .c o m*/ Configuration setOutputDataset(String datasetName) { Configuration hConf = new Configuration(getHConf()); hConf.set(SparkDatasetOutputFormat.HCONF_ATTR_OUTPUT_DATASET, datasetName); hConf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, SparkDatasetOutputFormat.class, OutputFormat.class); return hConf; }
From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetInputFormat.java
License:Apache License
/** * Sets the configurations for the dataset name and configurations for the input format. *//*from ww w . j a va 2 s. c om*/ public static void setDataset(Configuration configuration, String dataset, Map<String, String> arguments) { configuration.set(INPUT_DATASET_NAME, dataset); configuration.set(INPUT_DATASET_ARGS, GSON.toJson(arguments, ARGS_TYPE)); }
From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetOutputFormat.java
License:Apache License
/** * Sets the configurations for the dataset name and configurations for the output format. *///from ww w. ja v a 2s .c om public static void setDataset(Configuration configuration, String dataset, Map<String, String> arguments) { configuration.set(OUTPUT_DATASET_NAME, dataset); configuration.set(OUTPUT_DATASET_ARGS, GSON.toJson(arguments, ARGS_TYPE)); }
From source file:co.cask.cdap.internal.app.runtime.spark.ExecutionSparkContext.java
License:Apache License
@Override public <T> T readFromDataset(String datasetName, Class<?> kClass, Class<?> vClass, Map<String, String> userDsArgs) { // Clone the configuration since it's dataset specification and shouldn't affect the global hConf Configuration configuration = new Configuration(hConf); // first try if it is InputFormatProvider Map<String, String> dsArgs = RuntimeArguments.extractScope(Scope.DATASET, datasetName, getRuntimeArguments());// w w w . j a v a2s . c o m dsArgs.putAll(userDsArgs); Dataset dataset = instantiateDataset(datasetName, dsArgs); try { if (dataset instanceof InputFormatProvider) { // get the input format and its configuration from the dataset String inputFormatName = ((InputFormatProvider) dataset).getInputFormatClassName(); // load the input format class if (inputFormatName == null) { throw new DatasetInstantiationException(String .format("Dataset '%s' provided null as the input format class name", datasetName)); } Class<? extends InputFormat> inputFormatClass; try { @SuppressWarnings("unchecked") Class<? extends InputFormat> ifClass = (Class<? extends InputFormat>) SparkClassLoader .findFromContext().loadClass(inputFormatName); inputFormatClass = ifClass; Map<String, String> inputConfig = ((InputFormatProvider) dataset).getInputFormatConfiguration(); if (inputConfig != null) { for (Map.Entry<String, String> entry : inputConfig.entrySet()) { configuration.set(entry.getKey(), entry.getValue()); } } } catch (ClassNotFoundException e) { throw new DatasetInstantiationException( String.format("Cannot load input format class %s provided by dataset '%s'", inputFormatName, datasetName), e); } catch (ClassCastException e) { throw new DatasetInstantiationException( String.format("Input format class %s provided by dataset '%s' is not an input format", inputFormatName, datasetName), e); } return getSparkFacade().createRDD(inputFormatClass, kClass, vClass, configuration); } } finally { commitAndClose(datasetName, dataset); } // it must be supported by SparkDatasetInputFormat SparkDatasetInputFormat.setDataset(configuration, datasetName, dsArgs); return getSparkFacade().createRDD(SparkDatasetInputFormat.class, kClass, vClass, configuration); }