List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:boa.runtime.BoaRunner.java
License:Apache License
/** * Create a {@link Job} describing the work to be done by this Boa job. * //ww w . ja v a 2 s . co m * @param ins * An array of {@link Path} containing the locations of the input * files * * @param out * A {@link Path} containing the location of the output file * * @param robust * A boolean representing whether the job should ignore most * exceptions * * @return A {@link Job} describing the work to be done by this Boa job * @throws IOException */ public Job job(final Path[] ins, final Path out, final boolean robust) throws IOException { final Configuration configuration = getConf(); configuration.setBoolean("boa.runtime.robust", robust); // faster local reads configuration.setBoolean("dfs.client.read.shortcircuit", true); configuration.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true); // by default our MapFile's index every key, which takes up // a lot of memory - this lets you skip keys in the index and // control the memory requirements (as a tradeoff of slower gets) //configuration.setLong("io.map.index.skip", 128); // map output compression configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.map.output.compression.type", "BLOCK"); configuration.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); configuration.setBoolean("mapred.map.tasks.speculative.execution", false); configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false); configuration.setLong("mapred.job.reuse.jvm.num.tasks", -1); final Job job = new Job(configuration); if (ins != null) for (final Path in : ins) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, out); job.setPartitionerClass(BoaPartitioner.class); job.setMapOutputKeyClass(EmitKey.class); job.setMapOutputValueClass(EmitValue.class); job.setOutputFormatClass(BoaOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); return job; }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static void setIsInflow(Configuration conf) { conf.setBoolean(CASCADING_FLOW_EXECUTING, true); }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static boolean setNewApi(Configuration conf, String className) { if (className == null) // silently return and let the error be caught downstream return false; boolean isStable = className.startsWith("org.apache.hadoop.mapred."); boolean isNew = className.startsWith("org.apache.hadoop.mapreduce."); if (isStable) conf.setBoolean("mapred.mapper.new-api", false); else if (isNew) conf.setBoolean("mapred.mapper.new-api", true); else//from w w w .j a v a 2s. co m throw new IllegalStateException( "cannot determine if class denotes stable or new api, please set 'mapred.mapper.new-api' to the appropriate value"); return true; }
From source file:cascading.flow.tez.util.TezUtil.java
License:Open Source License
public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) { TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(), context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(), context.getTaskAttemptNumber(), isMapperOutput); config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString()); config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString()); config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput); config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId()); }
From source file:cascading.platform.tez.Hadoop2TezPlatform.java
License:Open Source License
@Override public synchronized void setUp() throws IOException { if (configuration != null) return;/*from w w w .j ava2 s.c om*/ if (!isUseCluster()) { // Current usage requirements: // 1. Clients need to set "tez.local.mode" to true when creating a TezClient instance. (For the examples this can be done via -Dtez.local.mode=true) // 2. fs.defaultFS must be set to "file:///" // 2.1 If running examples - this must be set in tez-site.xml (so that it's picked up by the client, as well as the conf instances used to configure the Inputs / Outputs). // 2.2 If using programatically (without a tez-site.xml present). All configuration instances used (to crate the client / configure Inputs / Outputs) - must have this property set. // 3. tez.runtime.optimize.local.fetch needs to be set to true (either via tez-site.xml or in all configurations used to create the job (similar to fs.defaultFS in step 2)) // 4. tez.staging-dir must be set (either programatically or via tez-site.xml). // Until TEZ-1337 goes in - the staging-dir for the job is effectively the root of the filesystem (and where inputs are read from / written to if relative paths are used). LOG.info("not using cluster"); configuration = new Configuration(); configuration.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions()); // configuration.setInt( FlowRuntimeProps.GATHER_PARTITIONS, 1 ); // deadlocks if larger than 1 configuration.set(TezConfiguration.TEZ_LOCAL_MODE, "true"); configuration.set("fs.defaultFS", "file:///"); configuration.set("tez.runtime.optimize.local.fetch", "true"); // hack to prevent deadlocks where downstream processors are scheduled before upstream configuration.setInt("tez.am.inline.task.execution.max-tasks", 3); // testHashJoinMergeIntoHashJoinAccumulatedAccumulatedMerge fails if set to 2 configuration.set(TezConfiguration.TEZ_IGNORE_LIB_URIS, "true"); // in local mode, use local classpath configuration.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1); configuration.set(TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS, "true"); configuration.set("tez.am.mode.session", "true"); // allows multiple TezClient instances to be used in a single jvm if (!Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) configuration.set("hadoop.tmp.dir", System.getProperty("hadoop.tmp.dir")); else configuration.set("hadoop.tmp.dir", "build/test/tmp"); fileSys = FileSystem.get(configuration); } else { LOG.info("using cluster"); if (Util.isEmpty(System.getProperty("hadoop.log.dir"))) System.setProperty("hadoop.log.dir", "build/test/log"); if (Util.isEmpty(System.getProperty("hadoop.tmp.dir"))) System.setProperty("hadoop.tmp.dir", "build/test/tmp"); new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored new File(System.getProperty("hadoop.tmp.dir")).mkdirs(); // ignored Configuration defaultConf = new Configuration(); defaultConf.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions()); defaultConf.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1); // defaultConf.set( TezConfiguration.TEZ_AM_LOG_LEVEL, "DEBUG" ); // defaultConf.set( TezConfiguration.TEZ_TASK_LOG_LEVEL, "DEBUG" ); defaultConf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); defaultConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false); defaultConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, System.getProperty("hadoop.tmp.dir")); miniDFSCluster = new MiniDFSCluster.Builder(defaultConf).numDataNodes(4).format(true).racks(null) .build(); fileSys = miniDFSCluster.getFileSystem(); Configuration tezConf = new Configuration(defaultConf); tezConf.set("fs.defaultFS", fileSys.getUri().toString()); // use HDFS tezConf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir"); // see MiniTezClusterWithTimeline as alternate miniTezCluster = new MiniTezCluster(getClass().getName(), 4, 1, 1); // todo: set to 4 miniTezCluster.init(tezConf); miniTezCluster.start(); configuration = miniTezCluster.getConfig(); // stats won't work after completion unless ATS is used if (setTimelineStore(configuration)) // true if ats can be loaded and configured for this hadoop version { configuration.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS, ATSHistoryLoggingService.class.getName()); configuration.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); configuration.set(YarnConfiguration.TIMELINE_SERVICE_ADDRESS, "localhost:10200"); configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS, "localhost:8188"); configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS, "localhost:8190"); yarnHistoryServer = new ApplicationHistoryServer(); yarnHistoryServer.init(configuration); yarnHistoryServer.start(); } } configuration.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 1); configuration.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1); configuration.setInt(TezConfiguration.TEZ_AM_MAX_TASK_FAILURES_PER_NODE, 1); Map<Object, Object> globalProperties = getGlobalProperties(); if (logger != null) globalProperties.put("log4j.logger", logger); FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests Hadoop2TezPlanner.copyProperties(configuration, globalProperties); // copy any external properties Hadoop2TezPlanner.copyConfiguration(properties, configuration); // put all properties on the jobconf ExitUtil.disableSystemExit(); // forbidSystemExitCall(); }
From source file:cascading.platform.tez.Hadoop2TezPlatform.java
License:Open Source License
protected boolean setTimelineStore(Configuration configuration) { try {/*from ww w .jav a 2 s .com*/ // try hadoop 2.6 Class<?> target = Util.loadClass("org.apache.hadoop.yarn.server.timeline.TimelineStore"); Class<?> type = Util.loadClass("org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore"); configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target); try { // hadoop 2.5 has the above classes, but this one is also necessary for the timeline service with acls to function. Util.loadClass("org.apache.hadoop.yarn.api.records.timeline.TimelineDomain"); } catch (CascadingException exception) { configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true); } return true; } catch (CascadingException exception) { try { // try hadoop 2.4 Class<?> target = Util.loadClass( "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.TimelineStore"); Class<?> type = Util.loadClass( "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.MemoryTimelineStore"); configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target); configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true); return true; } catch (CascadingException ignore) { return false; } } }
From source file:cascading.scheme.hadoop.SequenceFile.java
License:Open Source License
@Override public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { conf.setBoolean("mapred.mapper.new-api", false); conf.setClass("mapred.input.format.class", SequenceFileInputFormat.class, InputFormat.class); }
From source file:cascading.scheme.hadoop.SequenceFile.java
License:Open Source License
@Override public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { conf.setBoolean("mapred.mapper.new-api", false); conf.setClass("mapred.output.key.class", Tuple.class, Object.class); conf.setClass("mapred.output.value.class", Tuple.class, Object.class); conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class); }
From source file:cascading.scheme.hadoop.TextLine.java
License:Open Source License
@Override public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf)))) throw new IllegalStateException("cannot read zip files: " + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf)))); conf.setBoolean("mapred.mapper.new-api", false); conf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class); }
From source file:cascading.scheme.hadoop.TextLine.java
License:Open Source License
@Override public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (tap.getFullIdentifier(conf).endsWith(".zip")) throw new IllegalStateException("cannot write zip files: " + HadoopUtil.getOutputPath(conf)); conf.setBoolean("mapred.mapper.new-api", false); if (getSinkCompression() == Compress.DISABLE) conf.setBoolean("mapred.output.compress", false); else if (getSinkCompression() == Compress.ENABLE) conf.setBoolean("mapred.output.compress", true); conf.setClass("mapred.output.key.class", Text.class, Object.class); conf.setClass("mapred.output.value.class", Text.class, Object.class); conf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class); }