List of usage examples for org.apache.hadoop.conf Configuration get
public String get(String name)
name
property, null
if no such property exists. From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
private static Path getWorkingDirectory(Configuration conf) { String name = conf.get("mapred.working.dir"); if (name != null) { return new Path(name); } else {/*from w ww . j ava 2s. c om*/ try { Path dir = FileSystem.get(conf).getWorkingDirectory(); conf.set("mapred.working.dir", dir.toString()); return dir; } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static Path getOutputPath(Configuration conf) { String name = conf.get("mapred.output.dir"); return name == null ? null : new Path(name); }
From source file:cascading.flow.tez.Hadoop2TezFlowStep.java
License:Open Source License
private static void setWorkingDirectory(Configuration conf) { String name = conf.get(JobContext.WORKING_DIR); if (name != null) return;/* w w w .j a v a 2s . c om*/ try { Path dir = FileSystem.get(conf).getWorkingDirectory(); conf.set(JobContext.WORKING_DIR, dir.toString()); } catch (IOException exception) { throw new RuntimeException(exception); } }
From source file:cascading.flow.tez.Hadoop2TezFlowStep.java
License:Open Source License
public Vertex createVertex(FlowProcess<TezConfiguration> flowProcess, TezConfiguration initializedConfig, FlowNode flowNode) {//from ww w . ja v a 2s. c o m JobConf conf = new JobConf(initializedConfig); addInputOutputMapping(conf, flowNode); conf.setBoolean("mapred.used.genericoptionsparser", true); Map<String, LocalResource> taskLocalResources = new HashMap<>(); Map<FlowElement, Configuration> sourceConfigs = initFromSources(flowNode, flowProcess, conf, taskLocalResources); Map<FlowElement, Configuration> sinkConfigs = initFromSinks(flowNode, flowProcess, conf); initFromTraps(flowNode, flowProcess, conf); initFromNodeConfigDef(flowNode, conf); // force step to local mode if any tap is local setLocalMode(initializedConfig, conf, null); conf.set("cascading.flow.node.num", Integer.toString(flowNode.getOrdinal())); int parallelism = getParallelism(flowNode, conf); if (parallelism == 0) throw new FlowException(getName(), "the default number of gather partitions must be set, see cascading.flow.FlowRuntimeProps"); Vertex vertex = newVertex(flowNode, conf, parallelism); if (!taskLocalResources.isEmpty()) vertex.addTaskLocalFiles(taskLocalResources); for (FlowElement flowElement : sourceConfigs.keySet()) { if (!(flowElement instanceof Tap)) continue; Configuration sourceConf = sourceConfigs.get(flowElement); // not setting the new-api value could result in failures if not set by the Scheme if (sourceConf.get("mapred.mapper.new-api") == null) HadoopUtil.setNewApi(sourceConf, sourceConf.get("mapred.input.format.class", sourceConf.get("mapreduce.job.inputformat.class"))); // unfortunately we cannot just load the input format and set it on the builder with also pulling all other // values out of the configuration. MRInput.MRInputConfigBuilder configBuilder = MRInput.createConfigBuilder(sourceConf, null); // grouping splits loses file name info, breaking partition tap default impl if (flowElement instanceof PartitionTap) // todo: generify configBuilder.groupSplits(false); DataSourceDescriptor dataSourceDescriptor = configBuilder.build(); vertex.addDataSource(FlowElements.id(flowElement), dataSourceDescriptor); } for (FlowElement flowElement : sinkConfigs.keySet()) { if (!(flowElement instanceof Tap)) continue; Configuration sinkConf = sinkConfigs.get(flowElement); Class outputFormatClass; String outputPath; // we have to set sane defaults if not set by the tap // typically the case of MultiSinkTap String formatClassName = sinkConf.get("mapred.output.format.class", sinkConf.get("mapreduce.job.outputformat.class")); if (formatClassName == null) { outputFormatClass = TextOutputFormat.class; // unused, use "new" api, its the default outputPath = Hfs.getTempPath(sinkConf).toString(); // unused } else { outputFormatClass = Util.loadClass(formatClassName); outputPath = getOutputPath(sinkConf); } if (outputPath == null && getOutputPath(sinkConf) == null && isFileOutputFormat(outputFormatClass)) outputPath = Hfs.getTempPath(sinkConf).toString(); // unused MROutput.MROutputConfigBuilder configBuilder = MROutput.createConfigBuilder(sinkConf, outputFormatClass, outputPath); DataSinkDescriptor dataSinkDescriptor = configBuilder.build(); vertex.addDataSink(FlowElements.id(flowElement), dataSinkDescriptor); } addRemoteDebug(flowNode, vertex); addRemoteProfiling(flowNode, vertex); return vertex; }
From source file:cascading.flow.tez.stream.graph.Hadoop2TezStreamGraph.java
License:Open Source License
/** * Maps each input to an ordinal on the flowelement. an input may be bound to multiple ordinals. * * @param element/*from w ww .ja v a 2 s . co m*/ */ private SortedListMultiMap<Integer, LogicalInput> createInputMap(FlowElement element) { String id = FlowElements.id(element); SortedListMultiMap<Integer, LogicalInput> ordinalMap = new SortedListMultiMap<>(); for (LogicalInput logicalInput : inputMap.values()) { Configuration configuration = inputConfigMap.get(logicalInput); String foundID = configuration.get("cascading.node.source"); if (Util.isEmpty(foundID)) throw new IllegalStateException("cascading.node.source property not set on source LogicalInput"); if (!foundID.equals(id)) continue; String values = configuration.get("cascading.node.source.ordinals", ""); List<Integer> ordinals = Util.split(Integer.class, ",", values); for (Integer ordinal : ordinals) ordinalMap.put(ordinal, logicalInput); } return ordinalMap; }
From source file:cascading.flow.tez.util.TezUtil.java
License:Open Source License
public static String getEdgeSourceID(LogicalInput input, Configuration configuration) { String id = configuration.get("cascading.node.source"); if (id == null) throw new IllegalStateException("no source id found: " + input.getClass().getName()); return id;//from w ww. j a v a 2 s . c om }
From source file:cascading.flow.tez.util.TezUtil.java
License:Open Source License
public static String getEdgeSinkID(LogicalOutput output, Configuration configuration) { String id = configuration.get("cascading.node.sink"); if (id == null) throw new IllegalStateException("no sink id found: " + output.getClass().getName()); return id;// w w w . j av a2 s. c o m }
From source file:cascading.tap.hadoop.BaseDistCacheTap.java
License:Open Source License
@Override public void sourceConfInit(FlowProcess<? extends Configuration> process, Configuration conf) { if (HadoopUtil.isLocal(conf) || Tap.id(this).equals(conf.get("cascading.node.source")) || Tap.id(this).equals(conf.get("cascading.step.source"))) { LOG.info("can't use distributed cache. reading '{}' from hdfs", super.getIdentifier()); super.sourceConfInit(process, conf); return;/*w w w. j a v a2s . c o m*/ } try { registerHfs(process, conf, getHfs()); } catch (IOException exception) { throw new TapException(exception); } }
From source file:cascading.tap.hadoop.Hfs.java
License:Open Source License
/** * Based on the configuration, handles and sets {@link CombineFileInputFormat} as the input * format./* w w w . java 2 s . c om*/ */ private void handleCombineFileInputFormat(Configuration conf) { // if combining files, override the configuration to use CombineFileInputFormat if (!getUseCombinedInput(conf)) return; // get the prescribed individual input format from the underlying scheme so it can be used by CombinedInputFormat String individualInputFormat = conf.get("mapred.input.format.class"); if (individualInputFormat == null) throw new TapException("input format is missing from the underlying scheme"); if (individualInputFormat.equals(CombinedInputFormat.class.getName()) && conf.get(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT) == null) throw new TapException( "the input format class is already the combined input format but the underlying input format is missing"); // if safe mode is on (default) throw an exception if the InputFormat is not a FileInputFormat, otherwise log a // warning and don't use the CombineFileInputFormat boolean safeMode = getCombinedInputSafeMode(conf); if (!FileInputFormat.class.isAssignableFrom(conf.getClass("mapred.input.format.class", null))) { if (safeMode) throw new TapException( "input format must be of type org.apache.hadoop.mapred.FileInputFormat, got: " + individualInputFormat); else LOG.warn( "not combining input splits with CombineFileInputFormat, {} is not of type org.apache.hadoop.mapred.FileInputFormat.", individualInputFormat); } else { // set the underlying individual input format conf.set(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT, individualInputFormat); // override the input format class conf.setClass("mapred.input.format.class", CombinedInputFormat.class, InputFormat.class); } }
From source file:cascading.tap.hadoop.Hfs.java
License:Open Source License
public static Path getTempPath(Configuration conf) { String tempDir = conf.get(HfsProps.TEMPORARY_DIRECTORY); if (tempDir == null) tempDir = conf.get("hadoop.tmp.dir"); return new Path(tempDir); }