Example usage for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:boa.runtime.BoaRunner.java

License:Apache License

/**
 * Create a {@link Job} describing the work to be done by this Boa job.
 * //ww w .  ja  v  a 2 s .  co m
 * @param ins
 *            An array of {@link Path} containing the locations of the input
 *            files
 * 
 * @param out
 *            A {@link Path} containing the location of the output file
 * 
 * @param robust
 *            A boolean representing whether the job should ignore most
 *            exceptions
 * 
 * @return A {@link Job} describing the work to be done by this Boa job
 * @throws IOException
 */
public Job job(final Path[] ins, final Path out, final boolean robust) throws IOException {
    final Configuration configuration = getConf();

    configuration.setBoolean("boa.runtime.robust", robust);

    // faster local reads
    configuration.setBoolean("dfs.client.read.shortcircuit", true);
    configuration.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true);

    // by default our MapFile's index every key, which takes up
    // a lot of memory - this lets you skip keys in the index and
    // control the memory requirements (as a tradeoff of slower gets)
    //configuration.setLong("io.map.index.skip", 128);

    // map output compression
    configuration.setBoolean("mapred.compress.map.output", true);
    configuration.set("mapred.map.output.compression.type", "BLOCK");
    configuration.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    configuration.setBoolean("mapred.map.tasks.speculative.execution", false);
    configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    configuration.setLong("mapred.job.reuse.jvm.num.tasks", -1);

    final Job job = new Job(configuration);

    if (ins != null)
        for (final Path in : ins)
            FileInputFormat.addInputPath(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(BoaPartitioner.class);

    job.setMapOutputKeyClass(EmitKey.class);
    job.setMapOutputValueClass(EmitValue.class);

    job.setOutputFormatClass(BoaOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    return job;
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static void setIsInflow(Configuration conf) {
    conf.setBoolean(CASCADING_FLOW_EXECUTING, true);
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static boolean setNewApi(Configuration conf, String className) {
    if (className == null) // silently return and let the error be caught downstream
        return false;

    boolean isStable = className.startsWith("org.apache.hadoop.mapred.");
    boolean isNew = className.startsWith("org.apache.hadoop.mapreduce.");

    if (isStable)
        conf.setBoolean("mapred.mapper.new-api", false);
    else if (isNew)
        conf.setBoolean("mapred.mapper.new-api", true);
    else//from w  w w  .j a  v a 2s.  co  m
        throw new IllegalStateException(
                "cannot determine if class denotes stable or new api, please set 'mapred.mapper.new-api' to the appropriate value");

    return true;
}

From source file:cascading.flow.tez.util.TezUtil.java

License:Open Source License

public static void setMRProperties(ProcessorContext context, Configuration config, boolean isMapperOutput) {
    TaskAttemptID taskAttemptId = org.apache.tez.mapreduce.hadoop.mapreduce.TaskAttemptContextImpl
            .createMockTaskAttemptID(context.getApplicationId().getClusterTimestamp(),
                    context.getTaskVertexIndex(), context.getApplicationId().getId(), context.getTaskIndex(),
                    context.getTaskAttemptNumber(), isMapperOutput);

    config.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString());
    config.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString());
    config.setBoolean(JobContext.TASK_ISMAP, isMapperOutput);
    config.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId());
}

From source file:cascading.platform.tez.Hadoop2TezPlatform.java

License:Open Source License

@Override
public synchronized void setUp() throws IOException {
    if (configuration != null)
        return;/*from   w  w w .j ava2 s.c  om*/

    if (!isUseCluster()) {
        // Current usage requirements:
        // 1. Clients need to set "tez.local.mode" to true when creating a TezClient instance. (For the examples this can be done via -Dtez.local.mode=true)
        // 2. fs.defaultFS must be set to "file:///"
        // 2.1 If running examples - this must be set in tez-site.xml (so that it's picked up by the client, as well as the conf instances used to configure the Inputs / Outputs).
        // 2.2 If using programatically (without a tez-site.xml present). All configuration instances used (to crate the client / configure Inputs / Outputs) - must have this property set.
        // 3. tez.runtime.optimize.local.fetch needs to be set to true (either via tez-site.xml or in all configurations used to create the job (similar to fs.defaultFS in step 2))
        // 4. tez.staging-dir must be set (either programatically or via tez-site.xml).
        // Until TEZ-1337 goes in - the staging-dir for the job is effectively the root of the filesystem (and where inputs are read from / written to if relative paths are used).

        LOG.info("not using cluster");
        configuration = new Configuration();

        configuration.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions());
        //      configuration.setInt( FlowRuntimeProps.GATHER_PARTITIONS, 1 ); // deadlocks if larger than 1

        configuration.set(TezConfiguration.TEZ_LOCAL_MODE, "true");
        configuration.set("fs.defaultFS", "file:///");
        configuration.set("tez.runtime.optimize.local.fetch", "true");

        // hack to prevent deadlocks where downstream processors are scheduled before upstream
        configuration.setInt("tez.am.inline.task.execution.max-tasks", 3); // testHashJoinMergeIntoHashJoinAccumulatedAccumulatedMerge fails if set to 2

        configuration.set(TezConfiguration.TEZ_IGNORE_LIB_URIS, "true"); // in local mode, use local classpath
        configuration.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1);
        configuration.set(TezConfiguration.TEZ_GENERATE_DEBUG_ARTIFACTS, "true");

        configuration.set("tez.am.mode.session", "true"); // allows multiple TezClient instances to be used in a single jvm

        if (!Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            configuration.set("hadoop.tmp.dir", System.getProperty("hadoop.tmp.dir"));
        else
            configuration.set("hadoop.tmp.dir", "build/test/tmp");

        fileSys = FileSystem.get(configuration);
    } else {
        LOG.info("using cluster");

        if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
            System.setProperty("hadoop.log.dir", "build/test/log");

        if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            System.setProperty("hadoop.tmp.dir", "build/test/tmp");

        new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored
        new File(System.getProperty("hadoop.tmp.dir")).mkdirs(); // ignored

        Configuration defaultConf = new Configuration();

        defaultConf.setInt(FlowRuntimeProps.GATHER_PARTITIONS, getNumGatherPartitions());

        defaultConf.setInt(YarnConfiguration.DEBUG_NM_DELETE_DELAY_SEC, -1);

        //      defaultConf.set( TezConfiguration.TEZ_AM_LOG_LEVEL, "DEBUG" );
        //      defaultConf.set( TezConfiguration.TEZ_TASK_LOG_LEVEL, "DEBUG" );

        defaultConf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
        defaultConf.setBoolean(TezConfiguration.TEZ_AM_NODE_BLACKLISTING_ENABLED, false);
        defaultConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, System.getProperty("hadoop.tmp.dir"));

        miniDFSCluster = new MiniDFSCluster.Builder(defaultConf).numDataNodes(4).format(true).racks(null)
                .build();

        fileSys = miniDFSCluster.getFileSystem();

        Configuration tezConf = new Configuration(defaultConf);
        tezConf.set("fs.defaultFS", fileSys.getUri().toString()); // use HDFS
        tezConf.set(MRJobConfig.MR_AM_STAGING_DIR, "/apps_staging_dir");

        // see MiniTezClusterWithTimeline as alternate
        miniTezCluster = new MiniTezCluster(getClass().getName(), 4, 1, 1); // todo: set to 4
        miniTezCluster.init(tezConf);
        miniTezCluster.start();

        configuration = miniTezCluster.getConfig();

        // stats won't work after completion unless ATS is used
        if (setTimelineStore(configuration)) // true if ats can be loaded and configured for this hadoop version
        {
            configuration.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
                    ATSHistoryLoggingService.class.getName());
            configuration.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
            configuration.set(YarnConfiguration.TIMELINE_SERVICE_ADDRESS, "localhost:10200");
            configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS, "localhost:8188");
            configuration.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_HTTPS_ADDRESS, "localhost:8190");

            yarnHistoryServer = new ApplicationHistoryServer();
            yarnHistoryServer.init(configuration);
            yarnHistoryServer.start();
        }
    }

    configuration.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 1);
    configuration.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
    configuration.setInt(TezConfiguration.TEZ_AM_MAX_TASK_FAILURES_PER_NODE, 1);

    Map<Object, Object> globalProperties = getGlobalProperties();

    if (logger != null)
        globalProperties.put("log4j.logger", logger);

    FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

    Hadoop2TezPlanner.copyProperties(configuration, globalProperties); // copy any external properties

    Hadoop2TezPlanner.copyConfiguration(properties, configuration); // put all properties on the jobconf

    ExitUtil.disableSystemExit();

    //    forbidSystemExitCall();
}

From source file:cascading.platform.tez.Hadoop2TezPlatform.java

License:Open Source License

protected boolean setTimelineStore(Configuration configuration) {
    try {/*from ww  w  .jav  a  2  s  .com*/
        // try hadoop 2.6
        Class<?> target = Util.loadClass("org.apache.hadoop.yarn.server.timeline.TimelineStore");
        Class<?> type = Util.loadClass("org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore");

        configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target);

        try {
            // hadoop 2.5 has the above classes, but this one is also necessary for the timeline service with acls to function.
            Util.loadClass("org.apache.hadoop.yarn.api.records.timeline.TimelineDomain");
        } catch (CascadingException exception) {
            configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);
        }

        return true;
    } catch (CascadingException exception) {
        try {
            // try hadoop 2.4
            Class<?> target = Util.loadClass(
                    "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.TimelineStore");
            Class<?> type = Util.loadClass(
                    "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.MemoryTimelineStore");

            configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target);
            configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);

            return true;
        } catch (CascadingException ignore) {
            return false;
        }
    }
}

From source file:cascading.scheme.hadoop.SequenceFile.java

License:Open Source License

@Override
public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.input.format.class", SequenceFileInputFormat.class, InputFormat.class);
}

From source file:cascading.scheme.hadoop.SequenceFile.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.output.key.class", Tuple.class, Object.class);
    conf.setClass("mapred.output.value.class", Tuple.class, Object.class);
    conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class);
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

@Override
public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf))))
        throw new IllegalStateException("cannot read zip files: "
                + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf))));

    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class);
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (tap.getFullIdentifier(conf).endsWith(".zip"))
        throw new IllegalStateException("cannot write zip files: " + HadoopUtil.getOutputPath(conf));

    conf.setBoolean("mapred.mapper.new-api", false);

    if (getSinkCompression() == Compress.DISABLE)
        conf.setBoolean("mapred.output.compress", false);
    else if (getSinkCompression() == Compress.ENABLE)
        conf.setBoolean("mapred.output.compress", true);

    conf.setClass("mapred.output.key.class", Text.class, Object.class);
    conf.setClass("mapred.output.value.class", Text.class, Object.class);
    conf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
}