List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:co.cask.cdap.data2.transaction.snapshot.SnapshotCodecCompatibilityTest.java
License:Apache License
/** * Test full stack serialization for a TransactionManager migrating from DefaultSnapshotCodec to SnapshotCodecV3. *///www. j a v a 2s . c om @Test public void testV2ToTephraV3Migration() throws Exception { File testDir = tmpDir.newFolder("testV2ToTephraV3Migration"); Configuration conf = new Configuration(); conf.setStrings(TxConstants.Persist.CFG_TX_SNAPHOT_CODEC_CLASSES, SnapshotCodecV1.class.getName(), SnapshotCodecV2.class.getName()); conf.set(TxConstants.Manager.CFG_TX_SNAPSHOT_LOCAL_DIR, testDir.getAbsolutePath()); Injector injector = Guice.createInjector(new ConfigModule(conf), new DiscoveryModules().getSingleNodeModules(), new TransactionModules().getSingleNodeModules()); TransactionManager txManager = injector.getInstance(TransactionManager.class); txManager.startAndWait(); txManager.startLong(); // shutdown to force a snapshot txManager.stopAndWait(); TransactionStateStorage txStorage = injector.getInstance(TransactionStateStorage.class); txStorage.startAndWait(); // confirm that the in-progress entry is missing a type TransactionSnapshot snapshot = txStorage.getLatestSnapshot(); assertNotNull(snapshot); assertEquals(1, snapshot.getInProgress().size()); Map.Entry<Long, TransactionManager.InProgressTx> entry = snapshot.getInProgress().entrySet().iterator() .next(); assertNull(entry.getValue().getType()); // start a new Tx manager to test fixup Configuration conf2 = new Configuration(); conf2.setStrings(TxConstants.Persist.CFG_TX_SNAPHOT_CODEC_CLASSES, SnapshotCodecV1.class.getName(), SnapshotCodecV2.class.getName(), SnapshotCodecV3.class.getName()); // make sure we work with the default CDAP conf for snapshot codecs CConfiguration cconf = CConfiguration.create(); CConfigurationUtil.copyTxProperties(cconf, conf2); // override snapshot dir conf2.set(TxConstants.Manager.CFG_TX_SNAPSHOT_LOCAL_DIR, testDir.getAbsolutePath()); Injector injector2 = Guice.createInjector(new ConfigModule(conf2), new DiscoveryModules().getSingleNodeModules(), new TransactionModules().getSingleNodeModules()); TransactionManager txManager2 = injector2.getInstance(TransactionManager.class); txManager2.startAndWait(); // state should be recovered TransactionSnapshot snapshot2 = txManager2.getCurrentState(); assertEquals(1, snapshot2.getInProgress().size()); Map.Entry<Long, TransactionManager.InProgressTx> inProgressTx = snapshot2.getInProgress().entrySet() .iterator().next(); assertEquals(TransactionType.LONG, inProgressTx.getValue().getType()); // save a new snapshot txManager2.stopAndWait(); TransactionStateStorage txStorage2 = injector2.getInstance(TransactionStateStorage.class); txStorage2.startAndWait(); TransactionSnapshot snapshot3 = txStorage2.getLatestSnapshot(); // full snapshot should have deserialized correctly without any fixups assertEquals(snapshot2.getInProgress(), snapshot3.getInProgress()); assertEquals(snapshot2, snapshot3); }
From source file:co.cask.cdap.etl.batch.mapreduce.ETLMapReduce.java
License:Apache License
@Override public void beforeSubmit(MapReduceContext context) throws Exception { if (Boolean.valueOf(context.getSpecification().getProperty(Constants.STAGE_LOGGING_ENABLED))) { LogStageInjector.start();// ww w. java2 s . c o m } CompositeFinisher.Builder finishers = CompositeFinisher.builder(); Job job = context.getHadoopJob(); Configuration hConf = job.getConfiguration(); // plugin name -> runtime args for that plugin Map<String, Map<String, String>> runtimeArgs = new HashMap<>(); Map<String, String> properties = context.getSpecification().getProperties(); BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class); PipelinePhase phase = phaseSpec.getPhase(); PipelinePluginInstantiator pluginInstantiator = new PipelinePluginInstantiator(context, phaseSpec); // we checked at configure time that there is exactly one source String sourceName = phaseSpec.getPhase().getSources().iterator().next(); BatchConfigurable<BatchSourceContext> batchSource = pluginInstantiator.newPluginInstance(sourceName); batchSource = new LoggedBatchConfigurable<>(sourceName, batchSource); BatchSourceContext sourceContext = new MapReduceSourceContext(context, mrMetrics, new DatasetContextLookupProvider(context), sourceName, context.getRuntimeArguments()); batchSource.prepareRun(sourceContext); runtimeArgs.put(sourceName, sourceContext.getRuntimeArguments()); finishers.add(batchSource, sourceContext); Map<String, SinkOutput> sinkOutputs = new HashMap<>(); for (StageInfo stageInfo : Sets.union(phase.getStagesOfType(Constants.CONNECTOR_TYPE), phase.getStagesOfType(BatchSink.PLUGIN_TYPE))) { String sinkName = stageInfo.getName(); // todo: add a better way to get info for all sinks if (!phase.getSinks().contains(sinkName)) { continue; } BatchConfigurable<BatchSinkContext> batchSink = pluginInstantiator.newPluginInstance(sinkName); batchSink = new LoggedBatchConfigurable<>(sinkName, batchSink); MapReduceSinkContext sinkContext = new MapReduceSinkContext(context, mrMetrics, new DatasetContextLookupProvider(context), sinkName, context.getRuntimeArguments()); batchSink.prepareRun(sinkContext); runtimeArgs.put(sinkName, sinkContext.getRuntimeArguments()); finishers.add(batchSink, sinkContext); sinkOutputs.put(sinkName, new SinkOutput(sinkContext.getOutputNames(), stageInfo.getErrorDatasetName())); } finisher = finishers.build(); hConf.set(SINK_OUTPUTS_KEY, GSON.toJson(sinkOutputs)); // setup time partition for each error dataset for (StageInfo stageInfo : Sets.union(phase.getStagesOfType(Transform.PLUGIN_TYPE), phase.getStagesOfType(BatchSink.PLUGIN_TYPE))) { if (stageInfo.getErrorDatasetName() != null) { Map<String, String> args = new HashMap<>(); args.put(FileSetProperties.OUTPUT_PROPERTIES_PREFIX + "avro.schema.output.key", Constants.ERROR_SCHEMA.toString()); TimePartitionedFileSetArguments.setOutputPartitionTime(args, context.getLogicalStartTime()); context.addOutput(Output.ofDataset(stageInfo.getErrorDatasetName(), args)); } } job.setMapperClass(ETLMapper.class); Set<StageInfo> aggregators = phaseSpec.getPhase().getStagesOfType(BatchAggregator.PLUGIN_TYPE); if (!aggregators.isEmpty()) { job.setReducerClass(ETLReducer.class); String aggregatorName = aggregators.iterator().next().getName(); BatchAggregator aggregator = pluginInstantiator.newPluginInstance(aggregatorName); MapReduceAggregatorContext aggregatorContext = new MapReduceAggregatorContext(context, mrMetrics, new DatasetContextLookupProvider(context), aggregatorName, context.getRuntimeArguments()); aggregator.prepareRun(aggregatorContext); finishers.add(aggregator, aggregatorContext); if (aggregatorContext.getNumPartitions() != null) { job.setNumReduceTasks(aggregatorContext.getNumPartitions()); } // if the plugin sets the output key and value class directly, trust them Class<?> outputKeyClass = aggregatorContext.getGroupKeyClass(); Class<?> outputValClass = aggregatorContext.getGroupValueClass(); // otherwise, derive it from the plugin's parameters if (outputKeyClass == null) { outputKeyClass = TypeChecker.getGroupKeyClass(aggregator); } if (outputValClass == null) { outputValClass = TypeChecker.getGroupValueClass(aggregator); } hConf.set(GROUP_KEY_CLASS, outputKeyClass.getName()); hConf.set(GROUP_VAL_CLASS, outputValClass.getName()); // in case the classes are not a WritableComparable, but is some common type we support // for example, a String or a StructuredRecord WritableConversion writableConversion = WritableConversions.getConversion(outputKeyClass.getName()); // if the conversion is null, it means the user is using their own object. if (writableConversion != null) { outputKeyClass = writableConversion.getWritableClass(); } writableConversion = WritableConversions.getConversion(outputValClass.getName()); if (writableConversion != null) { outputValClass = writableConversion.getWritableClass(); } // check classes here instead of letting mapreduce do it, since mapreduce throws a cryptic error if (!WritableComparable.class.isAssignableFrom(outputKeyClass)) { throw new IllegalArgumentException(String.format( "Invalid aggregator %s. The group key class %s must implement Hadoop's WritableComparable.", aggregatorName, outputKeyClass)); } if (!Writable.class.isAssignableFrom(outputValClass)) { throw new IllegalArgumentException(String.format( "Invalid aggregator %s. The group value class %s must implement Hadoop's Writable.", aggregatorName, outputValClass)); } job.setMapOutputKeyClass(outputKeyClass); job.setMapOutputValueClass(outputValClass); } else { job.setNumReduceTasks(0); } hConf.set(RUNTIME_ARGS_KEY, GSON.toJson(runtimeArgs)); }
From source file:co.cask.cdap.etl.batch.spark.SparkBatchSinkFactory.java
License:Apache License
<K, V> void writeFromRDD(JavaPairRDD<K, V> rdd, JavaSparkExecutionContext sec, String sinkName, Class<K> keyClass, Class<V> valueClass) { Set<String> outputNames = sinkOutputs.get(sinkName); if (outputNames == null || outputNames.size() == 0) { // should never happen if validation happened correctly at pipeline configure time throw new IllegalArgumentException( sinkName + " has no outputs. " + "Please check that the sink calls addOutput at some point."); }/*from w w w .ja v a 2 s . c o m*/ for (String outputName : outputNames) { OutputFormatProvider outputFormatProvider = outputFormatProviders.get(outputName); if (outputFormatProvider != null) { Configuration hConf = new Configuration(); hConf.clear(); for (Map.Entry<String, String> entry : outputFormatProvider.getOutputFormatConfiguration() .entrySet()) { hConf.set(entry.getKey(), entry.getValue()); } hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatProvider.getOutputFormatClassName()); rdd.saveAsNewAPIHadoopDataset(hConf); } DatasetInfo datasetInfo = datasetInfos.get(outputName); if (datasetInfo != null) { sec.saveAsDataset(rdd, datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs()); } } }
From source file:co.cask.cdap.etl.batch.spark.SparkBatchSourceFactory.java
License:Apache License
@SuppressWarnings("unchecked") public <K, V> JavaPairRDD<K, V> createRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, Class<K> keyClass, Class<V> valueClass) { if (streamBatchReadable != null) { FormatSpecification formatSpec = streamBatchReadable.getFormatSpecification(); if (formatSpec != null) { return (JavaPairRDD<K, V>) sec.fromStream(streamBatchReadable.getStreamName(), formatSpec, streamBatchReadable.getStartTime(), streamBatchReadable.getEndTime(), StructuredRecord.class); }//from w w w. j av a2s.c o m String decoderType = streamBatchReadable.getDecoderType(); if (decoderType == null) { return (JavaPairRDD<K, V>) sec.fromStream(streamBatchReadable.getStreamName(), streamBatchReadable.getStartTime(), streamBatchReadable.getEndTime(), valueClass); } else { try { Class<StreamEventDecoder<K, V>> decoderClass = (Class<StreamEventDecoder<K, V>>) Thread .currentThread().getContextClassLoader().loadClass(decoderType); return sec.fromStream(streamBatchReadable.getStreamName(), streamBatchReadable.getStartTime(), streamBatchReadable.getEndTime(), decoderClass, keyClass, valueClass); } catch (Exception e) { throw Throwables.propagate(e); } } } if (inputFormatProvider != null) { Configuration hConf = new Configuration(); hConf.clear(); for (Map.Entry<String, String> entry : inputFormatProvider.getInputFormatConfiguration().entrySet()) { hConf.set(entry.getKey(), entry.getValue()); } ClassLoader classLoader = Objects.firstNonNull(currentThread().getContextClassLoader(), getClass().getClassLoader()); try { @SuppressWarnings("unchecked") Class<InputFormat> inputFormatClass = (Class<InputFormat>) classLoader .loadClass(inputFormatProvider.getInputFormatClassName()); return jsc.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass); } catch (ClassNotFoundException e) { throw Throwables.propagate(e); } } if (datasetInfo != null) { return sec.fromDataset(datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs()); } // This should never happen since the constructor is private and it only get calls from static create() methods // which make sure one and only one of those source type will be specified. throw new IllegalStateException("Unknown source type"); }
From source file:co.cask.cdap.etl.spark.batch.SparkBatchSinkFactory.java
License:Apache License
public <K, V> void writeFromRDD(JavaPairRDD<K, V> rdd, JavaSparkExecutionContext sec, String sinkName, Class<K> keyClass, Class<V> valueClass) { Set<String> outputNames = sinkOutputs.get(sinkName); if (outputNames == null || outputNames.isEmpty()) { // should never happen if validation happened correctly at pipeline configure time throw new IllegalArgumentException( sinkName + " has no outputs. " + "Please check that the sink calls addOutput at some point."); }/* w w w . j a v a2s. c om*/ for (String outputName : outputNames) { OutputFormatProvider outputFormatProvider = outputFormatProviders.get(outputName); if (outputFormatProvider != null) { Configuration hConf = new Configuration(); hConf.clear(); for (Map.Entry<String, String> entry : outputFormatProvider.getOutputFormatConfiguration() .entrySet()) { hConf.set(entry.getKey(), entry.getValue()); } hConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatProvider.getOutputFormatClassName()); rdd.saveAsNewAPIHadoopDataset(hConf); } DatasetInfo datasetInfo = datasetInfos.get(outputName); if (datasetInfo != null) { sec.saveAsDataset(rdd, datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs()); } } }
From source file:co.cask.cdap.etl.spark.batch.SparkBatchSourceFactory.java
License:Apache License
@SuppressWarnings("unchecked") private <K, V> JavaPairRDD<K, V> createInputRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, String inputName, Class<K> keyClass, Class<V> valueClass) { if (streams.containsKey(inputName)) { Input.StreamInput streamInput = streams.get(inputName); FormatSpecification formatSpec = streamInput.getBodyFormatSpec(); if (formatSpec != null) { return (JavaPairRDD<K, V>) sec.fromStream(streamInput.getName(), formatSpec, streamInput.getStartTime(), streamInput.getEndTime(), StructuredRecord.class); }/*from w w w . j a va2s .c om*/ String decoderType = streamInput.getDecoderType(); if (decoderType == null) { return (JavaPairRDD<K, V>) sec.fromStream(streamInput.getName(), streamInput.getStartTime(), streamInput.getEndTime(), valueClass); } else { try { Class<StreamEventDecoder<K, V>> decoderClass = (Class<StreamEventDecoder<K, V>>) Thread .currentThread().getContextClassLoader().loadClass(decoderType); return sec.fromStream(streamInput.getName(), streamInput.getStartTime(), streamInput.getEndTime(), decoderClass, keyClass, valueClass); } catch (Exception e) { throw Throwables.propagate(e); } } } if (inputFormatProviders.containsKey(inputName)) { InputFormatProvider inputFormatProvider = inputFormatProviders.get(inputName); Configuration hConf = new Configuration(); hConf.clear(); for (Map.Entry<String, String> entry : inputFormatProvider.getInputFormatConfiguration().entrySet()) { hConf.set(entry.getKey(), entry.getValue()); } ClassLoader classLoader = Objects.firstNonNull(currentThread().getContextClassLoader(), getClass().getClassLoader()); try { @SuppressWarnings("unchecked") Class<InputFormat> inputFormatClass = (Class<InputFormat>) classLoader .loadClass(inputFormatProvider.getInputFormatClassName()); return jsc.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass); } catch (ClassNotFoundException e) { throw Throwables.propagate(e); } } if (datasetInfos.containsKey(inputName)) { DatasetInfo datasetInfo = datasetInfos.get(inputName); return sec.fromDataset(datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs()); } // This should never happen since the constructor is private and it only get calls from static create() methods // which make sure one and only one of those source type will be specified. throw new IllegalStateException("Unknown source type"); }
From source file:co.cask.cdap.explore.service.BaseHiveExploreServiceTest.java
License:Apache License
private static List<Module> createStandaloneModules(CConfiguration cConf, Configuration hConf, TemporaryFolder tmpFolder) throws IOException { File localDataDir = tmpFolder.newFolder(); cConf.set(Constants.CFG_LOCAL_DATA_DIR, localDataDir.getAbsolutePath()); cConf.set(Constants.CFG_DATA_INMEMORY_PERSISTENCE, Constants.InMemoryPersistenceType.LEVELDB.name()); cConf.set(Constants.Explore.LOCAL_DATA_DIR, tmpFolder.newFolder("hive").getAbsolutePath()); hConf.set(Constants.CFG_LOCAL_DATA_DIR, localDataDir.getAbsolutePath()); hConf.set(Constants.AppFabric.OUTPUT_DIR, cConf.get(Constants.AppFabric.OUTPUT_DIR)); hConf.set("hadoop.tmp.dir", new File(localDataDir, cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsolutePath()); return ImmutableList.of(new ConfigModule(cConf, hConf), new IOModule(), new DiscoveryRuntimeModule().getStandaloneModules(), new LocationRuntimeModule().getStandaloneModules(), new DataFabricModules().getStandaloneModules(), new DataSetsModules().getStandaloneModules(), new DataSetServiceModules().getStandaloneModules(), new MetricsClientRuntimeModule().getStandaloneModules(), new ExploreRuntimeModule().getStandaloneModules(), new ExploreClientModule(), new StreamServiceRuntimeModule().getStandaloneModules(), new ViewAdminModules().getStandaloneModules(), new StreamAdminModules().getStandaloneModules(), new NotificationServiceRuntimeModule().getStandaloneModules(), new NamespaceClientRuntimeModule().getInMemoryModules(), new NamespaceStoreModule().getStandaloneModules(), new AbstractModule() { @Override//from ww w . j av a 2 s. c o m protected void configure() { bind(NotificationFeedManager.class).to(NoOpNotificationFeedManager.class); Multibinder<HttpHandler> handlerBinder = Multibinder.newSetBinder(binder(), HttpHandler.class, Names.named(Constants.Stream.STREAM_HANDLER)); handlerBinder.addBinding().to(StreamHandler.class); handlerBinder.addBinding().to(StreamFetchHandler.class); CommonHandlers.add(handlerBinder); bind(StreamHttpService.class).in(Scopes.SINGLETON); } }); }
From source file:co.cask.cdap.explore.service.ExploreServiceUtils.java
License:Apache License
/** * Change yarn-site.xml file, and return a temp copy of it to which are added * necessary options.// w w w .j a va 2s . co m */ private static File updateYarnConfFile(File confFile, File tempDir) { Configuration conf = new Configuration(false); try { conf.addResource(confFile.toURI().toURL()); } catch (MalformedURLException e) { LOG.error("File {} is malformed.", confFile, e); throw Throwables.propagate(e); } String yarnAppClassPath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH, Joiner.on(",").join(YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)); // add the pwd/* at the beginning of classpath. so user's jar will take precedence and without this change, // job.jar will be at the beginning of the classpath, since job.jar has old guava version classes, // we want to add pwd/* before yarnAppClassPath = "$PWD/*," + yarnAppClassPath; conf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, yarnAppClassPath); File newYarnConfFile = new File(tempDir, "yarn-site.xml"); try (FileOutputStream os = new FileOutputStream(newYarnConfFile)) { conf.writeXml(os); } catch (IOException e) { LOG.error("Problem creating and writing to temporary yarn-conf.xml conf file at {}", newYarnConfFile, e); throw Throwables.propagate(e); } return newYarnConfFile; }
From source file:co.cask.cdap.explore.service.ExploreServiceUtils.java
License:Apache License
/** * Change mapred-site.xml file, and return a temp copy of it to which are added * necessary options.//ww w . j a va2 s . c om */ private static File updateMapredConfFile(File confFile, File tempDir) { Configuration conf = new Configuration(false); try { conf.addResource(confFile.toURI().toURL()); } catch (MalformedURLException e) { LOG.error("File {} is malformed.", confFile, e); throw Throwables.propagate(e); } String mrAppClassPath = conf.get(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH); // Add the pwd/* at the beginning of classpath. Without this change, old jars from mr framework classpath // get into classpath. mrAppClassPath = "$PWD/*," + mrAppClassPath; conf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, mrAppClassPath); File newMapredConfFile = new File(tempDir, "mapred-site.xml"); try (FileOutputStream os = new FileOutputStream(newMapredConfFile)) { conf.writeXml(os); } catch (IOException e) { LOG.error("Problem creating and writing to temporary mapred-site.xml conf file at {}", newMapredConfFile, e); throw Throwables.propagate(e); } return newMapredConfFile; }
From source file:co.cask.cdap.explore.service.ExploreServiceUtils.java
License:Apache License
/** * Change hive-site.xml file, and return a temp copy of it to which are added * necessary options.//from ww w. jav a 2 s. co m */ private static File updateHiveConfFile(File confFile, File tempDir) { Configuration conf = new Configuration(false); try { conf.addResource(confFile.toURI().toURL()); } catch (MalformedURLException e) { LOG.error("File {} is malformed.", confFile, e); throw Throwables.propagate(e); } // we prefer jars at container's root directory before job.jar, // we edit the YARN_APPLICATION_CLASSPATH in yarn-site.xml using // co.cask.cdap.explore.service.ExploreServiceUtils.updateYarnConfFile and // setting the MAPREDUCE_JOB_CLASSLOADER and MAPREDUCE_JOB_USER_CLASSPATH_FIRST to false will put // YARN_APPLICATION_CLASSPATH before job.jar for container's classpath. conf.setBoolean(Job.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false); conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER, false); String sparkHome = System.getenv(Constants.SPARK_HOME); if (sparkHome != null) { LOG.debug("Setting spark.home in hive conf to {}", sparkHome); conf.set("spark.home", sparkHome); } File newHiveConfFile = new File(tempDir, "hive-site.xml"); try (FileOutputStream os = new FileOutputStream(newHiveConfFile)) { conf.writeXml(os); } catch (IOException e) { LOG.error("Problem creating temporary hive-site.xml conf file at {}", newHiveConfFile, e); throw Throwables.propagate(e); } return newHiveConfFile; }