Example usage for org.apache.hadoop.conf Configuration set

List of usage examples for org.apache.hadoop.conf Configuration set

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:co.cask.cdap.data2.transaction.snapshot.SnapshotCodecCompatibilityTest.java

License:Apache License

/**
 * Test full stack serialization for a TransactionManager migrating from DefaultSnapshotCodec to SnapshotCodecV3.
 *///www. j a  v  a 2s . c  om
@Test
public void testV2ToTephraV3Migration() throws Exception {
    File testDir = tmpDir.newFolder("testV2ToTephraV3Migration");
    Configuration conf = new Configuration();
    conf.setStrings(TxConstants.Persist.CFG_TX_SNAPHOT_CODEC_CLASSES, SnapshotCodecV1.class.getName(),
            SnapshotCodecV2.class.getName());
    conf.set(TxConstants.Manager.CFG_TX_SNAPSHOT_LOCAL_DIR, testDir.getAbsolutePath());

    Injector injector = Guice.createInjector(new ConfigModule(conf),
            new DiscoveryModules().getSingleNodeModules(), new TransactionModules().getSingleNodeModules());

    TransactionManager txManager = injector.getInstance(TransactionManager.class);
    txManager.startAndWait();

    txManager.startLong();

    // shutdown to force a snapshot
    txManager.stopAndWait();

    TransactionStateStorage txStorage = injector.getInstance(TransactionStateStorage.class);
    txStorage.startAndWait();

    // confirm that the in-progress entry is missing a type
    TransactionSnapshot snapshot = txStorage.getLatestSnapshot();
    assertNotNull(snapshot);
    assertEquals(1, snapshot.getInProgress().size());
    Map.Entry<Long, TransactionManager.InProgressTx> entry = snapshot.getInProgress().entrySet().iterator()
            .next();
    assertNull(entry.getValue().getType());

    // start a new Tx manager to test fixup
    Configuration conf2 = new Configuration();
    conf2.setStrings(TxConstants.Persist.CFG_TX_SNAPHOT_CODEC_CLASSES, SnapshotCodecV1.class.getName(),
            SnapshotCodecV2.class.getName(), SnapshotCodecV3.class.getName());
    // make sure we work with the default CDAP conf for snapshot codecs
    CConfiguration cconf = CConfiguration.create();
    CConfigurationUtil.copyTxProperties(cconf, conf2);
    // override snapshot dir
    conf2.set(TxConstants.Manager.CFG_TX_SNAPSHOT_LOCAL_DIR, testDir.getAbsolutePath());

    Injector injector2 = Guice.createInjector(new ConfigModule(conf2),
            new DiscoveryModules().getSingleNodeModules(), new TransactionModules().getSingleNodeModules());

    TransactionManager txManager2 = injector2.getInstance(TransactionManager.class);
    txManager2.startAndWait();

    // state should be recovered
    TransactionSnapshot snapshot2 = txManager2.getCurrentState();
    assertEquals(1, snapshot2.getInProgress().size());
    Map.Entry<Long, TransactionManager.InProgressTx> inProgressTx = snapshot2.getInProgress().entrySet()
            .iterator().next();
    assertEquals(TransactionType.LONG, inProgressTx.getValue().getType());

    // save a new snapshot
    txManager2.stopAndWait();

    TransactionStateStorage txStorage2 = injector2.getInstance(TransactionStateStorage.class);
    txStorage2.startAndWait();

    TransactionSnapshot snapshot3 = txStorage2.getLatestSnapshot();
    // full snapshot should have deserialized correctly without any fixups
    assertEquals(snapshot2.getInProgress(), snapshot3.getInProgress());
    assertEquals(snapshot2, snapshot3);
}

From source file:co.cask.cdap.etl.batch.mapreduce.ETLMapReduce.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    if (Boolean.valueOf(context.getSpecification().getProperty(Constants.STAGE_LOGGING_ENABLED))) {
        LogStageInjector.start();// ww w. java2  s . c o  m
    }
    CompositeFinisher.Builder finishers = CompositeFinisher.builder();

    Job job = context.getHadoopJob();
    Configuration hConf = job.getConfiguration();

    // plugin name -> runtime args for that plugin
    Map<String, Map<String, String>> runtimeArgs = new HashMap<>();

    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    PipelinePhase phase = phaseSpec.getPhase();
    PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(context, phaseSpec);

    // we checked at configure time that there is exactly one source
    String sourceName = phaseSpec.getPhase().getSources().iterator().next();

    BatchConfigurable<BatchSourceContext> batchSource = pluginInstantiator.newPluginInstance(sourceName);
    batchSource = new LoggedBatchConfigurable<>(sourceName, batchSource);
    BatchSourceContext sourceContext = new MapReduceSourceContext(context, mrMetrics,
            new DatasetContextLookupProvider(context), sourceName, context.getRuntimeArguments());
    batchSource.prepareRun(sourceContext);
    runtimeArgs.put(sourceName, sourceContext.getRuntimeArguments());
    finishers.add(batchSource, sourceContext);

    Map<String, SinkOutput> sinkOutputs = new HashMap<>();

    for (StageInfo stageInfo : Sets.union(phase.getStagesOfType(Constants.CONNECTOR_TYPE),
            phase.getStagesOfType(BatchSink.PLUGIN_TYPE))) {
        String sinkName = stageInfo.getName();
        // todo: add a better way to get info for all sinks
        if (!phase.getSinks().contains(sinkName)) {
            continue;
        }

        BatchConfigurable<BatchSinkContext> batchSink = pluginInstantiator.newPluginInstance(sinkName);
        batchSink = new LoggedBatchConfigurable<>(sinkName, batchSink);
        MapReduceSinkContext sinkContext = new MapReduceSinkContext(context, mrMetrics,
                new DatasetContextLookupProvider(context), sinkName, context.getRuntimeArguments());
        batchSink.prepareRun(sinkContext);
        runtimeArgs.put(sinkName, sinkContext.getRuntimeArguments());
        finishers.add(batchSink, sinkContext);

        sinkOutputs.put(sinkName,
                new SinkOutput(sinkContext.getOutputNames(), stageInfo.getErrorDatasetName()));
    }
    finisher = finishers.build();
    hConf.set(SINK_OUTPUTS_KEY, GSON.toJson(sinkOutputs));

    // setup time partition for each error dataset
    for (StageInfo stageInfo : Sets.union(phase.getStagesOfType(Transform.PLUGIN_TYPE),
            phase.getStagesOfType(BatchSink.PLUGIN_TYPE))) {
        if (stageInfo.getErrorDatasetName() != null) {
            Map<String, String> args = new HashMap<>();
            args.put(FileSetProperties.OUTPUT_PROPERTIES_PREFIX + "avro.schema.output.key",
                    Constants.ERROR_SCHEMA.toString());
            TimePartitionedFileSetArguments.setOutputPartitionTime(args, context.getLogicalStartTime());
            context.addOutput(Output.ofDataset(stageInfo.getErrorDatasetName(), args));
        }
    }

    job.setMapperClass(ETLMapper.class);
    Set<StageInfo> aggregators = phaseSpec.getPhase().getStagesOfType(BatchAggregator.PLUGIN_TYPE);
    if (!aggregators.isEmpty()) {
        job.setReducerClass(ETLReducer.class);
        String aggregatorName = aggregators.iterator().next().getName();
        BatchAggregator aggregator = pluginInstantiator.newPluginInstance(aggregatorName);
        MapReduceAggregatorContext aggregatorContext = new MapReduceAggregatorContext(context, mrMetrics,
                new DatasetContextLookupProvider(context), aggregatorName, context.getRuntimeArguments());
        aggregator.prepareRun(aggregatorContext);
        finishers.add(aggregator, aggregatorContext);

        if (aggregatorContext.getNumPartitions() != null) {
            job.setNumReduceTasks(aggregatorContext.getNumPartitions());
        }
        // if the plugin sets the output key and value class directly, trust them
        Class<?> outputKeyClass = aggregatorContext.getGroupKeyClass();
        Class<?> outputValClass = aggregatorContext.getGroupValueClass();
        // otherwise, derive it from the plugin's parameters
        if (outputKeyClass == null) {
            outputKeyClass = TypeChecker.getGroupKeyClass(aggregator);
        }
        if (outputValClass == null) {
            outputValClass = TypeChecker.getGroupValueClass(aggregator);
        }
        hConf.set(GROUP_KEY_CLASS, outputKeyClass.getName());
        hConf.set(GROUP_VAL_CLASS, outputValClass.getName());
        // in case the classes are not a WritableComparable, but is some common type we support
        // for example, a String or a StructuredRecord
        WritableConversion writableConversion = WritableConversions.getConversion(outputKeyClass.getName());
        // if the conversion is null, it means the user is using their own object.
        if (writableConversion != null) {
            outputKeyClass = writableConversion.getWritableClass();
        }
        writableConversion = WritableConversions.getConversion(outputValClass.getName());
        if (writableConversion != null) {
            outputValClass = writableConversion.getWritableClass();
        }
        // check classes here instead of letting mapreduce do it, since mapreduce throws a cryptic error
        if (!WritableComparable.class.isAssignableFrom(outputKeyClass)) {
            throw new IllegalArgumentException(String.format(
                    "Invalid aggregator %s. The group key class %s must implement Hadoop's WritableComparable.",
                    aggregatorName, outputKeyClass));
        }
        if (!Writable.class.isAssignableFrom(outputValClass)) {
            throw new IllegalArgumentException(String.format(
                    "Invalid aggregator %s. The group value class %s must implement Hadoop's Writable.",
                    aggregatorName, outputValClass));
        }

        job.setMapOutputKeyClass(outputKeyClass);
        job.setMapOutputValueClass(outputValClass);
    } else {
        job.setNumReduceTasks(0);
    }

    hConf.set(RUNTIME_ARGS_KEY, GSON.toJson(runtimeArgs));
}

From source file:co.cask.cdap.etl.batch.spark.SparkBatchSinkFactory.java

License:Apache License

<K, V> void writeFromRDD(JavaPairRDD<K, V> rdd, JavaSparkExecutionContext sec, String sinkName,
        Class<K> keyClass, Class<V> valueClass) {
    Set<String> outputNames = sinkOutputs.get(sinkName);
    if (outputNames == null || outputNames.size() == 0) {
        // should never happen if validation happened correctly at pipeline configure time
        throw new IllegalArgumentException(
                sinkName + " has no outputs. " + "Please check that the sink calls addOutput at some point.");
    }/*from w w w  .ja  v a 2  s .  c  o m*/

    for (String outputName : outputNames) {
        OutputFormatProvider outputFormatProvider = outputFormatProviders.get(outputName);
        if (outputFormatProvider != null) {
            Configuration hConf = new Configuration();
            hConf.clear();
            for (Map.Entry<String, String> entry : outputFormatProvider.getOutputFormatConfiguration()
                    .entrySet()) {
                hConf.set(entry.getKey(), entry.getValue());
            }
            hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatProvider.getOutputFormatClassName());
            rdd.saveAsNewAPIHadoopDataset(hConf);
        }

        DatasetInfo datasetInfo = datasetInfos.get(outputName);
        if (datasetInfo != null) {
            sec.saveAsDataset(rdd, datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs());
        }
    }
}

From source file:co.cask.cdap.etl.batch.spark.SparkBatchSourceFactory.java

License:Apache License

@SuppressWarnings("unchecked")
public <K, V> JavaPairRDD<K, V> createRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc,
        Class<K> keyClass, Class<V> valueClass) {
    if (streamBatchReadable != null) {
        FormatSpecification formatSpec = streamBatchReadable.getFormatSpecification();
        if (formatSpec != null) {
            return (JavaPairRDD<K, V>) sec.fromStream(streamBatchReadable.getStreamName(), formatSpec,
                    streamBatchReadable.getStartTime(), streamBatchReadable.getEndTime(),
                    StructuredRecord.class);
        }//from   w  w w.  j av  a2s.c  o m

        String decoderType = streamBatchReadable.getDecoderType();
        if (decoderType == null) {
            return (JavaPairRDD<K, V>) sec.fromStream(streamBatchReadable.getStreamName(),
                    streamBatchReadable.getStartTime(), streamBatchReadable.getEndTime(), valueClass);
        } else {
            try {
                Class<StreamEventDecoder<K, V>> decoderClass = (Class<StreamEventDecoder<K, V>>) Thread
                        .currentThread().getContextClassLoader().loadClass(decoderType);
                return sec.fromStream(streamBatchReadable.getStreamName(), streamBatchReadable.getStartTime(),
                        streamBatchReadable.getEndTime(), decoderClass, keyClass, valueClass);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }
    }
    if (inputFormatProvider != null) {
        Configuration hConf = new Configuration();
        hConf.clear();
        for (Map.Entry<String, String> entry : inputFormatProvider.getInputFormatConfiguration().entrySet()) {
            hConf.set(entry.getKey(), entry.getValue());
        }
        ClassLoader classLoader = Objects.firstNonNull(currentThread().getContextClassLoader(),
                getClass().getClassLoader());
        try {
            @SuppressWarnings("unchecked")
            Class<InputFormat> inputFormatClass = (Class<InputFormat>) classLoader
                    .loadClass(inputFormatProvider.getInputFormatClassName());
            return jsc.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass);
        } catch (ClassNotFoundException e) {
            throw Throwables.propagate(e);
        }
    }
    if (datasetInfo != null) {
        return sec.fromDataset(datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs());
    }
    // This should never happen since the constructor is private and it only get calls from static create() methods
    // which make sure one and only one of those source type will be specified.
    throw new IllegalStateException("Unknown source type");
}

From source file:co.cask.cdap.etl.spark.batch.SparkBatchSinkFactory.java

License:Apache License

public <K, V> void writeFromRDD(JavaPairRDD<K, V> rdd, JavaSparkExecutionContext sec, String sinkName,
        Class<K> keyClass, Class<V> valueClass) {
    Set<String> outputNames = sinkOutputs.get(sinkName);
    if (outputNames == null || outputNames.isEmpty()) {
        // should never happen if validation happened correctly at pipeline configure time
        throw new IllegalArgumentException(
                sinkName + " has no outputs. " + "Please check that the sink calls addOutput at some point.");
    }/* w w w  . j a  v  a2s.  c om*/

    for (String outputName : outputNames) {
        OutputFormatProvider outputFormatProvider = outputFormatProviders.get(outputName);
        if (outputFormatProvider != null) {
            Configuration hConf = new Configuration();
            hConf.clear();
            for (Map.Entry<String, String> entry : outputFormatProvider.getOutputFormatConfiguration()
                    .entrySet()) {
                hConf.set(entry.getKey(), entry.getValue());
            }
            hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatProvider.getOutputFormatClassName());
            rdd.saveAsNewAPIHadoopDataset(hConf);
        }

        DatasetInfo datasetInfo = datasetInfos.get(outputName);
        if (datasetInfo != null) {
            sec.saveAsDataset(rdd, datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs());
        }
    }
}

From source file:co.cask.cdap.etl.spark.batch.SparkBatchSourceFactory.java

License:Apache License

@SuppressWarnings("unchecked")
private <K, V> JavaPairRDD<K, V> createInputRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc,
        String inputName, Class<K> keyClass, Class<V> valueClass) {
    if (streams.containsKey(inputName)) {
        Input.StreamInput streamInput = streams.get(inputName);
        FormatSpecification formatSpec = streamInput.getBodyFormatSpec();
        if (formatSpec != null) {
            return (JavaPairRDD<K, V>) sec.fromStream(streamInput.getName(), formatSpec,
                    streamInput.getStartTime(), streamInput.getEndTime(), StructuredRecord.class);
        }/*from  w  w w  .  j  a va2s  .c om*/

        String decoderType = streamInput.getDecoderType();
        if (decoderType == null) {
            return (JavaPairRDD<K, V>) sec.fromStream(streamInput.getName(), streamInput.getStartTime(),
                    streamInput.getEndTime(), valueClass);
        } else {
            try {
                Class<StreamEventDecoder<K, V>> decoderClass = (Class<StreamEventDecoder<K, V>>) Thread
                        .currentThread().getContextClassLoader().loadClass(decoderType);
                return sec.fromStream(streamInput.getName(), streamInput.getStartTime(),
                        streamInput.getEndTime(), decoderClass, keyClass, valueClass);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }
    }

    if (inputFormatProviders.containsKey(inputName)) {
        InputFormatProvider inputFormatProvider = inputFormatProviders.get(inputName);
        Configuration hConf = new Configuration();
        hConf.clear();
        for (Map.Entry<String, String> entry : inputFormatProvider.getInputFormatConfiguration().entrySet()) {
            hConf.set(entry.getKey(), entry.getValue());
        }
        ClassLoader classLoader = Objects.firstNonNull(currentThread().getContextClassLoader(),
                getClass().getClassLoader());
        try {
            @SuppressWarnings("unchecked")
            Class<InputFormat> inputFormatClass = (Class<InputFormat>) classLoader
                    .loadClass(inputFormatProvider.getInputFormatClassName());
            return jsc.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass);
        } catch (ClassNotFoundException e) {
            throw Throwables.propagate(e);
        }
    }

    if (datasetInfos.containsKey(inputName)) {
        DatasetInfo datasetInfo = datasetInfos.get(inputName);
        return sec.fromDataset(datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs());
    }
    // This should never happen since the constructor is private and it only get calls from static create() methods
    // which make sure one and only one of those source type will be specified.
    throw new IllegalStateException("Unknown source type");
}

From source file:co.cask.cdap.explore.service.BaseHiveExploreServiceTest.java

License:Apache License

private static List<Module> createStandaloneModules(CConfiguration cConf, Configuration hConf,
        TemporaryFolder tmpFolder) throws IOException {
    File localDataDir = tmpFolder.newFolder();
    cConf.set(Constants.CFG_LOCAL_DATA_DIR, localDataDir.getAbsolutePath());
    cConf.set(Constants.CFG_DATA_INMEMORY_PERSISTENCE, Constants.InMemoryPersistenceType.LEVELDB.name());
    cConf.set(Constants.Explore.LOCAL_DATA_DIR, tmpFolder.newFolder("hive").getAbsolutePath());

    hConf.set(Constants.CFG_LOCAL_DATA_DIR, localDataDir.getAbsolutePath());
    hConf.set(Constants.AppFabric.OUTPUT_DIR, cConf.get(Constants.AppFabric.OUTPUT_DIR));
    hConf.set("hadoop.tmp.dir",
            new File(localDataDir, cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsolutePath());

    return ImmutableList.of(new ConfigModule(cConf, hConf), new IOModule(),
            new DiscoveryRuntimeModule().getStandaloneModules(),
            new LocationRuntimeModule().getStandaloneModules(), new DataFabricModules().getStandaloneModules(),
            new DataSetsModules().getStandaloneModules(), new DataSetServiceModules().getStandaloneModules(),
            new MetricsClientRuntimeModule().getStandaloneModules(),
            new ExploreRuntimeModule().getStandaloneModules(), new ExploreClientModule(),
            new StreamServiceRuntimeModule().getStandaloneModules(),
            new ViewAdminModules().getStandaloneModules(), new StreamAdminModules().getStandaloneModules(),
            new NotificationServiceRuntimeModule().getStandaloneModules(),
            new NamespaceClientRuntimeModule().getInMemoryModules(),
            new NamespaceStoreModule().getStandaloneModules(), new AbstractModule() {
                @Override//from ww  w  . j  av  a  2 s.  c  o  m
                protected void configure() {
                    bind(NotificationFeedManager.class).to(NoOpNotificationFeedManager.class);

                    Multibinder<HttpHandler> handlerBinder = Multibinder.newSetBinder(binder(),
                            HttpHandler.class, Names.named(Constants.Stream.STREAM_HANDLER));
                    handlerBinder.addBinding().to(StreamHandler.class);
                    handlerBinder.addBinding().to(StreamFetchHandler.class);
                    CommonHandlers.add(handlerBinder);
                    bind(StreamHttpService.class).in(Scopes.SINGLETON);
                }
            });
}

From source file:co.cask.cdap.explore.service.ExploreServiceUtils.java

License:Apache License

/**
 * Change yarn-site.xml file, and return a temp copy of it to which are added
 * necessary options.// w w w  .j a va 2s  . co  m
 */
private static File updateYarnConfFile(File confFile, File tempDir) {
    Configuration conf = new Configuration(false);
    try {
        conf.addResource(confFile.toURI().toURL());
    } catch (MalformedURLException e) {
        LOG.error("File {} is malformed.", confFile, e);
        throw Throwables.propagate(e);
    }

    String yarnAppClassPath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            Joiner.on(",").join(YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH));

    // add the pwd/* at the beginning of classpath. so user's jar will take precedence and without this change,
    // job.jar will be at the beginning of the classpath, since job.jar has old guava version classes,
    // we want to add pwd/* before
    yarnAppClassPath = "$PWD/*," + yarnAppClassPath;

    conf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, yarnAppClassPath);

    File newYarnConfFile = new File(tempDir, "yarn-site.xml");
    try (FileOutputStream os = new FileOutputStream(newYarnConfFile)) {
        conf.writeXml(os);
    } catch (IOException e) {
        LOG.error("Problem creating and writing to temporary yarn-conf.xml conf file at {}", newYarnConfFile,
                e);
        throw Throwables.propagate(e);
    }

    return newYarnConfFile;
}

From source file:co.cask.cdap.explore.service.ExploreServiceUtils.java

License:Apache License

/**
 * Change mapred-site.xml file, and return a temp copy of it to which are added
 * necessary options.//ww  w  .  j a  va2  s .  c  om
 */
private static File updateMapredConfFile(File confFile, File tempDir) {
    Configuration conf = new Configuration(false);
    try {
        conf.addResource(confFile.toURI().toURL());
    } catch (MalformedURLException e) {
        LOG.error("File {} is malformed.", confFile, e);
        throw Throwables.propagate(e);
    }

    String mrAppClassPath = conf.get(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH,
            MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH);

    // Add the pwd/* at the beginning of classpath. Without this change, old jars from mr framework classpath
    // get into classpath.
    mrAppClassPath = "$PWD/*," + mrAppClassPath;

    conf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, mrAppClassPath);

    File newMapredConfFile = new File(tempDir, "mapred-site.xml");
    try (FileOutputStream os = new FileOutputStream(newMapredConfFile)) {
        conf.writeXml(os);
    } catch (IOException e) {
        LOG.error("Problem creating and writing to temporary mapred-site.xml conf file at {}",
                newMapredConfFile, e);
        throw Throwables.propagate(e);
    }

    return newMapredConfFile;
}

From source file:co.cask.cdap.explore.service.ExploreServiceUtils.java

License:Apache License

/**
 * Change hive-site.xml file, and return a temp copy of it to which are added
 * necessary options.//from ww w.  jav  a  2  s. co m
 */
private static File updateHiveConfFile(File confFile, File tempDir) {
    Configuration conf = new Configuration(false);
    try {
        conf.addResource(confFile.toURI().toURL());
    } catch (MalformedURLException e) {
        LOG.error("File {} is malformed.", confFile, e);
        throw Throwables.propagate(e);
    }

    // we prefer jars at container's root directory before job.jar,
    // we edit the YARN_APPLICATION_CLASSPATH in yarn-site.xml using
    // co.cask.cdap.explore.service.ExploreServiceUtils.updateYarnConfFile and
    // setting the MAPREDUCE_JOB_CLASSLOADER and MAPREDUCE_JOB_USER_CLASSPATH_FIRST to false will put
    // YARN_APPLICATION_CLASSPATH before job.jar for container's classpath.
    conf.setBoolean(Job.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false);
    conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false);

    String sparkHome = System.getenv(Constants.SPARK_HOME);
    if (sparkHome != null) {
        LOG.debug("Setting spark.home in hive conf to {}", sparkHome);
        conf.set("spark.home", sparkHome);
    }

    File newHiveConfFile = new File(tempDir, "hive-site.xml");

    try (FileOutputStream os = new FileOutputStream(newHiveConfFile)) {
        conf.writeXml(os);
    } catch (IOException e) {
        LOG.error("Problem creating temporary hive-site.xml conf file at {}", newHiveConfFile, e);
        throw Throwables.propagate(e);
    }
    return newHiveConfFile;
}