List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:cascading.flow.tez.Hadoop2TezFlowStep.java
License:Open Source License
private static void setWorkingDirectory(Configuration conf) { String name = conf.get(JobContext.WORKING_DIR); if (name != null) return;/*from w ww . j ava 2s . c om*/ try { Path dir = FileSystem.get(conf).getWorkingDirectory(); conf.set(JobContext.WORKING_DIR, dir.toString()); } catch (IOException exception) { throw new RuntimeException(exception); } }
From source file:cascading.flow.tez.Hadoop2TezFlowStep.java
License:Open Source License
protected Map<FlowElement, Configuration> initFromSources(FlowNode flowNode, FlowProcess<TezConfiguration> flowProcess, Configuration conf, Map<String, LocalResource> taskLocalResources) { Set<? extends FlowElement> accumulatedSources = flowNode.getSourceElements(StreamMode.Accumulated); for (FlowElement element : accumulatedSources) { if (element instanceof Tap) { JobConf current = new JobConf(conf); Tap tap = (Tap) element;//from w w w . jav a2 s . c o m if (tap.getIdentifier() == null) throw new IllegalStateException("tap may not have null identifier: " + tap.toString()); tap.sourceConfInit(flowProcess, current); Collection<String> paths = current.getStringCollection(CASCADING_LOCAL_RESOURCES + Tap.id(tap)); if (!paths.isEmpty()) { String flowStagingPath = ((Hadoop2TezFlow) getFlow()).getFlowStagingPath(); String resourceSubPath = Tap.id(tap); Map<Path, Path> pathMap = TezUtil.addToClassPath(current, flowStagingPath, resourceSubPath, paths, LocalResourceType.FILE, taskLocalResources, null); current.setStrings(CASCADING_REMOTE_RESOURCES + Tap.id(tap), taskLocalResources.keySet().toArray(new String[taskLocalResources.size()])); allLocalResources.putAll(taskLocalResources); syncPaths.putAll(pathMap); } Map<String, String> map = flowProcess.diffConfigIntoMap(new TezConfiguration(conf), new TezConfiguration(current)); conf.set("cascading.node.accumulated.source.conf." + Tap.id(tap), pack(map, conf)); setLocalMode(conf, current, tap); } } Set<FlowElement> sources = new HashSet<>(flowNode.getSourceElements()); sources.removeAll(accumulatedSources); if (sources.isEmpty()) throw new IllegalStateException("all sources marked as accumulated"); Map<FlowElement, Configuration> configs = new HashMap<>(); for (FlowElement element : sources) { JobConf current = new JobConf(conf); String id = FlowElements.id(element); current.set("cascading.node.source", id); if (element instanceof Tap) { Tap tap = (Tap) element; if (tap.getIdentifier() == null) throw new IllegalStateException("tap may not have null identifier: " + tap.toString()); tap.sourceConfInit(flowProcess, current); setLocalMode(conf, current, tap); } configs.put(element, current); } return configs; }
From source file:cascading.flow.tez.Hadoop2TezFlowStep.java
License:Open Source License
private Vertex newVertex(FlowNode flowNode, Configuration conf, int parallelism) { conf.set(FlowNode.CASCADING_FLOW_NODE, pack(flowNode, conf)); // todo: pack into payload directly ProcessorDescriptor descriptor = ProcessorDescriptor.create(FlowProcessor.class.getName()); descriptor.setUserPayload(getPayload(conf)); Vertex vertex = Vertex.create(flowNode.getID(), descriptor, parallelism); if (environment != null) vertex.setTaskEnvironment(environment); return vertex; }
From source file:cascading.flow.tez.planner.Hadoop2TezPlanner.java
License:Open Source License
public static void copyProperties(Configuration jobConf, Map<Object, Object> properties) { if (properties instanceof Properties) { Properties props = (Properties) properties; Set<String> keys = props.stringPropertyNames(); for (String key : keys) jobConf.set(key, props.getProperty(key)); } else {/*from ww w . j a va2 s. c o m*/ for (Map.Entry<Object, Object> entry : properties.entrySet()) { if (entry.getValue() != null) jobConf.set(entry.getKey().toString(), entry.getValue().toString()); } } }
From source file:cascading.flow.tez.util.TezUtil.java
License:Open Source License
public static void setSourcePathForSplit(MRInput input, MRReader reader, Configuration configuration) { Path path = null;//from www . j a v a2 s. com if (Util.returnInstanceFieldIfExistsSafe(input, "useNewApi")) { org.apache.hadoop.mapreduce.InputSplit newInputSplit = (org.apache.hadoop.mapreduce.InputSplit) reader .getSplit(); if (newInputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) path = ((org.apache.hadoop.mapreduce.lib.input.FileSplit) newInputSplit).getPath(); } else { org.apache.hadoop.mapred.InputSplit oldInputSplit = (org.apache.hadoop.mapred.InputSplit) reader .getSplit(); if (oldInputSplit instanceof org.apache.hadoop.mapred.FileSplit) path = ((org.apache.hadoop.mapred.FileSplit) oldInputSplit).getPath(); } if (path != null) configuration.set(MultiInputSplit.CASCADING_SOURCE_PATH, path.toString()); }
From source file:cascading.flow.tez.util.TezUtil.java
License:Open Source License
public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) { TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(), context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(), context.getTaskAttemptNumber(), isMapperOutput); config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString()); config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString()); config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput); config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId()); }
From source file:cascading.platform.tez.Hadoop2TezPlatform.java
License:Open Source License
@Override public synchronized void setUp() throws IOException { if (configuration != null) return;//from ww w.j a v a 2 s. c om if (!isUseCluster()) { // Current usage requirements: // 1. Clients need to set "tez.local.mode" to true when creating a TezClient instance. (For the examples this can be done via -Dtez.local.mode=true) // 2. fs.defaultFS must be set to "file:///" // 2.1 If running examples - this must be set in tez-site.xml (so that it's picked up by the client, as well as the conf instances used to configure the Inputs / Outputs). // 2.2 If using programatically (without a tez-site.xml present). All configuration instances used (to crate the client / configure Inputs / Outputs) - must have this property set. // 3. tez.runtime.optimize.local.fetch needs to be set to true (either via tez-site.xml or in all configurations used to create the job (similar to fs.defaultFS in step 2)) // 4. tez.staging-dir must be set (either programatically or via tez-site.xml). // Until TEZ-1337 goes in - the staging-dir for the job is effectively the root of the filesystem (and where inputs are read from / written to if relative paths are used). LOG.info("not using cluster"); configuration = new Configuration(); configuration.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions()); // configuration.setInt( FlowRuntimeProps.GATHER_PARTITIONS, 1 ); // deadlocks if larger than 1 configuration.set(TezConfiguration.TEZ_LOCAL_MODE, "true"); configuration.set("fs.defaultFS", "file:///"); configuration.set("tez.runtime.optimize.local.fetch", "true"); // hack to prevent deadlocks where downstream processors are scheduled before upstream configuration.setInt("tez.am.inline.task.execution.max-tasks", 3); // testHashJoinMergeIntoHashJoinAccumulatedAccumulatedMerge fails if set to 2 configuration.set(TezConfiguration.TEZ_IGNORE_LIB_URIS, "true"); // in local mode, use local classpath configuration.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1); configuration.set(TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS, "true"); configuration.set("tez.am.mode.session", "true"); // allows multiple TezClient instances to be used in a single jvm if (!Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) configuration.set("hadoop.tmp.dir", System.getProperty("hadoop.tmp.dir")); else configuration.set("hadoop.tmp.dir", "build/test/tmp"); fileSys = FileSystem.get(configuration); } else { LOG.info("using cluster"); if (Util.isEmpty(System.getProperty("hadoop.log.dir"))) System.setProperty("hadoop.log.dir", "build/test/log"); if (Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) System.setProperty("hadoop.tmp.dir", "build/test/tmp"); new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored new File(System.getProperty("hadoop.tmp.dir")).mkdirs(); // ignored Configuration defaultConf = new Configuration(); defaultConf.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions()); defaultConf.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1); // defaultConf.set( TezConfiguration.TEZ_AM_LOG_LEVEL, "DEBUG" ); // defaultConf.set( TezConfiguration.TEZ_TASK_LOG_LEVEL, "DEBUG" ); defaultConf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); defaultConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false); defaultConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, System.getProperty("hadoop.tmp.dir")); miniDFSCluster = new MiniDFSCluster.Builder(defaultConf).numDataNodes(4).format(true).racks(null) .build(); fileSys = miniDFSCluster.getFileSystem(); Configuration tezConf = new Configuration(defaultConf); tezConf.set("fs.defaultFS", fileSys.getUri().toString()); // use HDFS tezConf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir"); // see MiniTezClusterWithTimeline as alternate miniTezCluster = new MiniTezCluster(getClass().getName(), 4, 1, 1); // todo: set to 4 miniTezCluster.init(tezConf); miniTezCluster.start(); configuration = miniTezCluster.getConfig(); // stats won't work after completion unless ATS is used if (setTimelineStore(configuration)) // true if ats can be loaded and configured for this hadoop version { configuration.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS, ATSHistoryLoggingService.class.getName()); configuration.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); configuration.set(YarnConfiguration.TIMELINE_SERVICE_ADDRESS, "localhost:10200"); configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS, "localhost:8188"); configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS, "localhost:8190"); yarnHistoryServer = new ApplicationHistoryServer(); yarnHistoryServer.init(configuration); yarnHistoryServer.start(); } } configuration.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 1); configuration.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1); configuration.setInt(TezConfiguration.TEZ_AM_MAX_TASK_FAILURES_PER_NODE, 1); Map<Object, Object> globalProperties = getGlobalProperties(); if (logger != null) globalProperties.put("log4j.logger", logger); FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests Hadoop2TezPlanner.copyProperties(configuration, globalProperties); // copy any external properties Hadoop2TezPlanner.copyConfiguration(properties, configuration); // put all properties on the jobconf ExitUtil.disableSystemExit(); // forbidSystemExitCall(); }
From source file:cascading.plumber.grids.AbstractGridTest.java
License:Apache License
@Test public void shouldCopyConfigurationIntoProperties() { Configuration configuration = new Configuration(); configuration.set(KEY, VALUE); new MockGrid().createFlowConnector(configuration); }
From source file:cascading.scheme.DeprecatedAvroScheme.java
License:Apache License
/** * sinkConfInit is called by cascading to set up the sinks. This happens on the client side before the * job is distributed./* ww w .ja v a 2s. c o m*/ * There is a check for the presence of a schema and an exception is thrown if none has been provided. * After the schema check the conf object is given the options that Avro needs. * * @param flowProcess The cascading FlowProcess object. Should be passed in by cascading automatically. * @param tap The cascading Tap object. Should be passed in by cascading automatically. * @param conf The Hadoop JobConf object. This is passed in by cascading automatically. * @throws RuntimeException If no schema is present this halts the entire process. */ @Override public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (schema == null) { throw new RuntimeException("Must provide sink schema"); } // Set the output schema and output format class conf.set(AvroJob.OUTPUT_SCHEMA, schema.toString()); conf.set("mapred.output.format.class", "org.apache.avro.mapred.AvroOutputFormat"); // add AvroSerialization to io.serializations addAvroSerializations(conf); }
From source file:cascading.scheme.DeprecatedAvroScheme.java
License:Apache License
/** * sourceConfInit is called by cascading to set up the sources. This happens on the client side before the * job is distributed.//w ww. j a v a2 s .c om * There is a check for the presence of a schema and if none has been provided the data is peeked at to get a schema. * After the schema check the conf object is given the options that Avro needs. * * @param flowProcess The cascading FlowProcess object. Should be passed in by cascading automatically. * @param tap The cascading Tap object. Should be passed in by cascading automatically. * @param conf The Hadoop JobConf object. This is passed in by cascading automatically. * @throws RuntimeException If no schema is present this halts the entire process. */ @Override public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { retrieveSourceFields(flowProcess, tap); // Set the input schema and input class conf.set(AvroJob.INPUT_SCHEMA, schema.toString()); conf.set("mapred.input.format.class", "org.apache.avro.mapred.AvroInputFormat"); // add AvroSerialization to io.serializations addAvroSerializations(conf); }