Example usage for org.apache.hadoop.conf Configuration set

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

@Override
protected void startUp() throws Exception {
    // Creates a temporary directory locally for storing all generated files.
    File tempDir = createTempDirectory();
    cleanupTask = createCleanupTask(tempDir);

    try {// w  w  w. j av a 2  s.c om
        Job job = createJob(new File(tempDir, "mapreduce"));
        Configuration mapredConf = job.getConfiguration();

        classLoader = new MapReduceClassLoader(injector, cConf, mapredConf,
                context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator());
        cleanupTask = createCleanupTask(cleanupTask, classLoader);

        mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader));
        ClassLoaders.setContextClassLoader(mapredConf.getClassLoader());

        context.setJob(job);

        beforeSubmit(job);

        // Localize additional resources that users have requested via BasicMapReduceContext.localize methods
        Map<String, String> localizedUserResources = localizeUserResources(job, tempDir);

        // Override user-defined job name, since we set it and depend on the name.
        // https://issues.cask.co/browse/CDAP-2441
        String jobName = job.getJobName();
        if (!jobName.isEmpty()) {
            LOG.warn("Job name {} is being overridden.", jobName);
        }
        job.setJobName(getJobName(context));

        // Create a temporary location for storing all generated files through the LocationFactory.
        Location tempLocation = createTempLocationDirectory();
        cleanupTask = createCleanupTask(cleanupTask, tempLocation);

        // For local mode, everything is in the configuration classloader already, hence no need to create new jar
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper
            // ClassLoader from here and use it for setting up the job
            Location pluginArchive = createPluginArchive(tempLocation);
            if (pluginArchive != null) {
                job.addCacheArchive(pluginArchive.toURI());
                mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName());
            }
        }

        // set resources for the job
        TaskType.MAP.setResources(mapredConf, context.getMapperResources());
        TaskType.REDUCE.setResources(mapredConf, context.getReducerResources());

        // replace user's Mapper & Reducer's with our wrappers in job config
        MapperWrapper.wrap(job);
        ReducerWrapper.wrap(job);

        // packaging job jar which includes cdap classes with dependencies
        File jobJar = buildJobJar(job, tempDir);
        job.setJar(jobJar.toURI().toString());

        Location programJar = programJarLocation;
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // Copy and localize the program jar in distributed mode
            programJar = copyProgramJar(tempLocation);
            job.addCacheFile(programJar.toURI());

            List<String> classpath = new ArrayList<>();

            // Localize logback.xml
            Location logbackLocation = createLogbackJar(tempLocation);
            if (logbackLocation != null) {
                job.addCacheFile(logbackLocation.toURI());
                classpath.add(logbackLocation.getName());
            }

            // Generate and localize the launcher jar to control the classloader of MapReduce containers processes
            classpath.add("job.jar/lib/*");
            classpath.add("job.jar/classes");
            Location launcherJar = createLauncherJar(
                    Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)),
                    tempLocation);
            job.addCacheFile(launcherJar.toURI());

            // The only thing in the container classpath is the launcher.jar
            // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch
            // the actual MapReduce AM/Task from that
            // We explicitly localize the mr-framwork, but not use it with the classpath
            URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf);
            if (frameworkURI != null) {
                job.addCacheArchive(frameworkURI);
            }

            mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
            mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName());
            mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName());
        }

        MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf);
        // We start long-running tx to be used by mapreduce job tasks.
        Transaction tx = txClient.startLong();
        try {
            // We remember tx, so that we can re-use it in mapreduce tasks
            CConfiguration cConfCopy = cConf;
            contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources);

            LOG.info("Submitting MapReduce Job: {}", context);
            // submits job and returns immediately. Shouldn't need to set context ClassLoader.
            job.submit();

            this.job = job;
            this.transaction = tx;
        } catch (Throwable t) {
            Transactions.invalidateQuietly(txClient, tx);
            throw t;
        }
    } catch (Throwable t) {
        LOG.error("Exception when submitting MapReduce Job: {}", context, t);
        cleanupTask.run();
        throw t;
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Creates a MapReduce {@link Job} instance.
 *
 * @param hadoopTmpDir directory for the "hadoop.tmp.dir" configuration
 *///ww w  .j  a  v a2  s .  c o  m
private Job createJob(File hadoopTmpDir) throws IOException {
    Job job = Job.getInstance(new Configuration(hConf));
    Configuration jobConf = job.getConfiguration();

    if (MapReduceTaskContextProvider.isLocal(jobConf)) {
        // Set the MR framework local directories inside the given tmp directory.
        // Setting "hadoop.tmp.dir" here has no effect due to Explore Service need to set "hadoop.tmp.dir"
        // as system property for Hive to work in local mode. The variable substitution of hadoop conf
        // gives system property the highest precedence.
        jobConf.set("mapreduce.cluster.local.dir", new File(hadoopTmpDir, "local").getAbsolutePath());
        jobConf.set("mapreduce.jobtracker.system.dir", new File(hadoopTmpDir, "system").getAbsolutePath());
        jobConf.set("mapreduce.jobtracker.staging.root.dir",
                new File(hadoopTmpDir, "staging").getAbsolutePath());
        jobConf.set("mapreduce.cluster.temp.dir", new File(hadoopTmpDir, "temp").getAbsolutePath());
    }

    if (UserGroupInformation.isSecurityEnabled()) {
        // If runs in secure cluster, this program runner is running in a yarn container, hence not able
        // to get authenticated with the history.
        jobConf.unset("mapreduce.jobhistory.address");
        jobConf.setBoolean(Job.JOB_AM_ACCESS_DISABLED, false);

        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        LOG.info("Running in secure mode; adding all user credentials: {}", credentials.getAllTokens());
        job.getCredentials().addAll(credentials);
    }
    return job;
}

From source file:co.cask.cdap.internal.app.runtime.batch.ReducerWrapper.java

License:Apache License

/**
 * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined.
 * @param job The MapReduce job//from  w w  w .ja va  2s  . co  m
 */
public static void wrap(Job job) {
    // NOTE: we don't use job.getReducerClass() as we don't need to load user class here
    Configuration conf = job.getConfiguration();
    String reducerClass = conf.get(MRJobConfig.REDUCE_CLASS_ATTR);
    if (reducerClass != null) {
        conf.set(ReducerWrapper.ATTR_REDUCER_CLASS, reducerClass);
        job.setReducerClass(ReducerWrapper.class);
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.WrapperUtil.java

License:Apache License

static boolean setIfDefined(Job job, String srcKey, String destinationKey) {
    // NOTE: we don't use job.getXClass or conf.getClass as we don't need to load user class here
    Configuration conf = job.getConfiguration();
    String srcVal = conf.get(srcKey);
    if (srcVal != null) {
        conf.set(destinationKey, srcVal);
        return true;
    }/*  w  w  w.  ja v  a2  s  .c  o  m*/
    return false;
}

From source file:co.cask.cdap.internal.app.runtime.distributed.DistributedSparkProgramRunner.java

License:Apache License

private static Configuration createConfiguration(Configuration hConf) {
    Configuration configuration = new Configuration(hConf);
    configuration.set(SparkContextConfig.HCONF_ATTR_EXECUTION_MODE, SparkContextConfig.YARN_EXECUTION_MODE);
    return configuration;
}

From source file:co.cask.cdap.internal.app.runtime.spark.AbstractSparkContext.java

License:Apache License

/**
 * Sets the input {@link Dataset} with splits in the {@link Configuration}
 *
 * @param datasetName the name of the {@link Dataset} to read from
 * @return updated {@link Configuration}
 * @throws {@link IllegalArgumentException} if the {@link Dataset} to read is not {@link BatchReadable}
 *///  w  ww  .j a va 2s . c  o  m
Configuration setInputDataset(String datasetName) {
    Configuration hConf = new Configuration(getHConf());
    Dataset dataset = basicSparkContext.getDataSet(datasetName);
    List<Split> inputSplits;
    if (dataset instanceof BatchReadable) {
        BatchReadable curDataset = (BatchReadable) dataset;
        inputSplits = curDataset.getSplits();
    } else {
        throw new IllegalArgumentException("Failed to read dataset " + datasetName
                + ". The dataset does not implement" + " BatchReadable");
    }
    hConf.setClass(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, SparkDatasetInputFormat.class, InputFormat.class);
    hConf.set(SparkDatasetInputFormat.HCONF_ATTR_INPUT_DATASET, datasetName);
    hConf.set(SparkContextConfig.HCONF_ATTR_INPUT_SPLIT_CLASS, inputSplits.get(0).getClass().getName());
    hConf.set(SparkContextConfig.HCONF_ATTR_INPUT_SPLITS, new Gson().toJson(inputSplits));
    return hConf;
}

From source file:co.cask.cdap.internal.app.runtime.spark.AbstractSparkContext.java

License:Apache License

/**
 * Sets the output {@link Dataset} with splits in the {@link Configuration}
 *
 * @param datasetName the name of the {@link Dataset} to write to
 * @return updated {@link Configuration}
 *//* ww w  .  j  a  v a2s .c  o  m*/
Configuration setOutputDataset(String datasetName) {
    Configuration hConf = new Configuration(getHConf());
    hConf.set(SparkDatasetOutputFormat.HCONF_ATTR_OUTPUT_DATASET, datasetName);
    hConf.setClass(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, SparkDatasetOutputFormat.class, OutputFormat.class);
    return hConf;
}

From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetInputFormat.java

License:Apache License

/**
 * Sets the configurations for the dataset name and configurations for the input format.
 *//*from ww w .  j a va 2  s.  c  om*/
public static void setDataset(Configuration configuration, String dataset, Map<String, String> arguments) {
    configuration.set(INPUT_DATASET_NAME, dataset);
    configuration.set(INPUT_DATASET_ARGS, GSON.toJson(arguments, ARGS_TYPE));
}

From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetOutputFormat.java

License:Apache License

/**
 * Sets the configurations for the dataset name and configurations for the output format.
 *///from ww w.  ja  v a 2s .c om
public static void setDataset(Configuration configuration, String dataset, Map<String, String> arguments) {
    configuration.set(OUTPUT_DATASET_NAME, dataset);
    configuration.set(OUTPUT_DATASET_ARGS, GSON.toJson(arguments, ARGS_TYPE));
}

From source file:co.cask.cdap.internal.app.runtime.spark.ExecutionSparkContext.java

License:Apache License

@Override
public <T> T readFromDataset(String datasetName, Class<?> kClass, Class<?> vClass,
        Map<String, String> userDsArgs) {
    // Clone the configuration since it's dataset specification and shouldn't affect the global hConf
    Configuration configuration = new Configuration(hConf);

    // first try if it is InputFormatProvider
    Map<String, String> dsArgs = RuntimeArguments.extractScope(Scope.DATASET, datasetName,
            getRuntimeArguments());// w w  w  . j  a v  a2s  . c o m
    dsArgs.putAll(userDsArgs);
    Dataset dataset = instantiateDataset(datasetName, dsArgs);
    try {
        if (dataset instanceof InputFormatProvider) {
            // get the input format and its configuration from the dataset
            String inputFormatName = ((InputFormatProvider) dataset).getInputFormatClassName();
            // load the input format class
            if (inputFormatName == null) {
                throw new DatasetInstantiationException(String
                        .format("Dataset '%s' provided null as the input format class name", datasetName));
            }
            Class<? extends InputFormat> inputFormatClass;
            try {
                @SuppressWarnings("unchecked")
                Class<? extends InputFormat> ifClass = (Class<? extends InputFormat>) SparkClassLoader
                        .findFromContext().loadClass(inputFormatName);
                inputFormatClass = ifClass;
                Map<String, String> inputConfig = ((InputFormatProvider) dataset).getInputFormatConfiguration();
                if (inputConfig != null) {
                    for (Map.Entry<String, String> entry : inputConfig.entrySet()) {
                        configuration.set(entry.getKey(), entry.getValue());
                    }
                }
            } catch (ClassNotFoundException e) {
                throw new DatasetInstantiationException(
                        String.format("Cannot load input format class %s provided by dataset '%s'",
                                inputFormatName, datasetName),
                        e);
            } catch (ClassCastException e) {
                throw new DatasetInstantiationException(
                        String.format("Input format class %s provided by dataset '%s' is not an input format",
                                inputFormatName, datasetName),
                        e);
            }
            return getSparkFacade().createRDD(inputFormatClass, kClass, vClass, configuration);
        }
    } finally {
        commitAndClose(datasetName, dataset);
    }

    // it must be supported by SparkDatasetInputFormat
    SparkDatasetInputFormat.setDataset(configuration, datasetName, dsArgs);
    return getSparkFacade().createRDD(SparkDatasetInputFormat.class, kClass, vClass, configuration);
}