Example usage for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator.java

License:Apache License

/**
 * Customized MR job creation for Avro./*from   w ww. jav a2s  .  com*/
 *
 * @param  dataset  A path or directory which needs compaction
 * @return A configured map-reduce job for avro compaction
 */
public Job createJob(FileSystemDataset dataset) throws IOException {
    Configuration conf = HadoopUtils.getConfFromState(state);

    // Turn on mapreduce output compression by default
    if (conf.get("mapreduce.output.fileoutputformat.compress") == null
            && conf.get("mapred.output.compress") == null) {
        conf.setBoolean("mapreduce.output.fileoutputformat.compress", true);
    }

    // Disable delegation token cancellation by default
    if (conf.get("mapreduce.job.complete.cancel.delegation.tokens") == null) {
        conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
    }

    addJars(conf, this.state, fs);
    Job job = Job.getInstance(conf);
    job.setJobName(MRCompactorJobRunner.HADOOP_JOB_NAME);
    boolean emptyDirectoryFlag = this.configureInputAndOutputPaths(job, dataset);
    if (emptyDirectoryFlag) {
        this.state.setProp(HiveRegistrationPolicy.MAPREDUCE_JOB_INPUT_PATH_EMPTY_KEY, true);
    }
    this.configureMapper(job);
    this.configureReducer(job);
    if (emptyDirectoryFlag || !this.shouldDeduplicate) {
        job.setNumReduceTasks(0);
    }
    // Configure schema at the last step because FilesInputFormat will be used internally
    this.configureSchema(job);
    this.isJobCreated = true;
    this.configuredJob = job;
    return job;
}

From source file:org.apache.gobblin.compaction.mapreduce.MRCompactorJobRunner.java

License:Apache License

@Override
public void run() {
    Configuration conf = HadoopUtils.getConfFromState(this.dataset.jobProps());

    // Turn on mapreduce output compression by default
    if (conf.get("mapreduce.output.fileoutputformat.compress") == null
            && conf.get("mapred.output.compress") == null) {
        conf.setBoolean("mapreduce.output.fileoutputformat.compress", true);
    }/*from w w  w  .j  av  a2  s.  c o m*/

    // Disable delegation token cancellation by default
    if (conf.get("mapreduce.job.complete.cancel.delegation.tokens") == null) {
        conf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
    }

    try {
        DateTime compactionTimestamp = getCompactionTimestamp();
        LOG.info("MR Compaction Job Timestamp " + compactionTimestamp.getMillis());
        if (this.dataset.jobProps().getPropAsBoolean(MRCompactor.COMPACTION_JOB_LATE_DATA_MOVEMENT_TASK,
                false)) {
            List<Path> newLateFilePaths = Lists.newArrayList();
            for (String filePathString : this.dataset.jobProps()
                    .getPropAsList(MRCompactor.COMPACTION_JOB_LATE_DATA_FILES)) {
                if (FilenameUtils.isExtension(filePathString, getApplicableFileExtensions())) {
                    newLateFilePaths.add(new Path(filePathString));
                }
            }

            Path lateDataOutputPath = this.outputDeduplicated ? this.dataset.outputLatePath()
                    : this.dataset.outputPath();
            LOG.info(String.format("Copying %d late data files to %s", newLateFilePaths.size(),
                    lateDataOutputPath));
            if (this.outputDeduplicated) {
                if (!this.fs.exists(lateDataOutputPath)) {
                    if (!this.fs.mkdirs(lateDataOutputPath)) {
                        throw new RuntimeException(
                                String.format("Failed to create late data output directory: %s.",
                                        lateDataOutputPath.toString()));
                    }
                }
            }
            this.copyDataFiles(lateDataOutputPath, newLateFilePaths);
            if (this.outputDeduplicated) {
                dataset.checkIfNeedToRecompact(datasetHelper);
            }
            this.status = Status.COMMITTED;
        } else {
            if (this.fs.exists(this.dataset.outputPath()) && !canOverwriteOutputDir()) {
                LOG.warn(String.format("Output paths %s exists. Will not compact %s.",
                        this.dataset.outputPath(), this.dataset.inputPaths()));
                this.status = Status.COMMITTED;
                return;
            }
            addJars(conf);
            Job job = Job.getInstance(conf);
            this.configureJob(job);
            this.submitAndWait(job);
            if (shouldPublishData(compactionTimestamp)) {
                // remove all invalid empty files due to speculative task execution
                List<Path> goodPaths = CompactionJobConfigurator.getGoodFiles(job, this.dataset.outputTmpPath(),
                        this.tmpFs, ImmutableList.of("avro"));

                if (!this.recompactAllData && this.recompactFromDestPaths) {
                    // append new files without deleting output directory
                    addGoodFilesToOutputPath(goodPaths);
                    // clean up late data from outputLateDirectory, which has been set to inputPath
                    deleteFilesByPaths(this.dataset.inputPaths());
                } else {
                    moveTmpPathToOutputPath();
                    if (this.recompactFromDestPaths) {
                        deleteFilesByPaths(this.dataset.additionalInputPaths());
                    }
                }
                submitSlaEvent(job);
                LOG.info("Successfully published data for input folder " + this.dataset.inputPaths());
                this.status = Status.COMMITTED;
            } else {
                LOG.info("Data not published for input folder " + this.dataset.inputPaths()
                        + " due to incompleteness");
                this.status = Status.ABORTED;
                return;
            }
        }
        if (renameSourceDir) {
            MRCompactor.renameSourceDirAsCompactionComplete(this.fs, this.dataset);
        } else {
            this.markOutputDirAsCompleted(compactionTimestamp);
        }
        this.submitRecordsCountsEvent();
    } catch (Throwable t) {
        throw Throwables.propagate(t);
    }
}

From source file:org.apache.hadoop.examples.BaileyBorweinPlouffe.java

License:Apache License

/** Create and setup a job */
private static Job createJob(String name, Configuration conf) throws IOException {
    final Job job = Job.getInstance(conf, NAME + "_" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClass(BaileyBorweinPlouffe.class);

    // setup mapper
    job.setMapperClass(BbpMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // setup reducer
    job.setReducerClass(BbpReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setNumReduceTasks(1);/*from w w  w  .j a v  a  2  s  .  co m*/

    // setup input
    job.setInputFormatClass(BbpInputFormat.class);

    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);

    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
    return job;
}

From source file:org.apache.hadoop.examples.pi.DistSum.java

License:Apache License

/** Create a job */
private Job createJob(String name, Summation sigma) throws IOException {
    final Job job = Job.getInstance(getConf(), parameters.remoteDir + "/" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClass(DistSum.class);
    jobconf.setInt(N_PARTS, parameters.nParts);
    SummationWritable.write(sigma, DistSum.class, jobconf);

    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);

    return job;/*  w  w  w. ja  v  a 2 s  .  co m*/
}

From source file:org.apache.hama.bsp.TestCheckpoint.java

License:Apache License

public void testCheckpointInterval() throws Exception {
    Configuration config = new Configuration();
    System.setProperty("user.dir", "/tmp");
    config.set(SyncServiceFactory.SYNC_CLIENT_CLASS, TempSyncClient.class.getName());
    config.set(Constants.FAULT_TOLERANCE_CLASS, AsyncRcvdMsgCheckpointImpl.class.getName());
    config.setBoolean(Constants.FAULT_TOLERANCE_FLAG, true);
    config.setBoolean(Constants.CHECKPOINT_ENABLED, true);
    config.setInt(Constants.CHECKPOINT_INTERVAL, 2);
    config.set("bsp.output.dir", "/tmp/hama-test_out");
    config.set("bsp.local.dir", "/tmp/hama-test");

    FileSystem dfs = FileSystem.get(config);
    BSPJob job = new BSPJob(new BSPJobID("checkpttest", 1), "/tmp");
    TaskAttemptID taskId = new TaskAttemptID(new TaskID(job.getJobID(), 1), 1);

    TestMessageManager messenger = new TestMessageManager();
    PeerSyncClient syncClient = SyncServiceFactory.getPeerSyncClient(config);
    @SuppressWarnings("rawtypes")
    BSPPeer bspTask = new TestBSPPeer(job, config, taskId, new Counters(), -1L, (BSPPeerSyncClient) syncClient,
            messenger, TaskStatus.State.RUNNING);

    assertNotNull("BSPPeerImpl should not be null.", bspTask);

    LOG.info("Created bsp peer and other parameters");
    int port = BSPNetUtils.getFreePort(12502);
    LOG.info("Got port = " + port);

    boolean result = syncClient.getInformation(
            syncClient.constructKey(job.getJobID(), "checkpoint", "" + bspTask.getPeerIndex()),
            new ArrayWritable(LongWritable.class));

    assertFalse(result);/*from   ww  w .  j a  v  a  2  s  .  c o  m*/

    bspTask.sync();
    // Superstep 1

    checkSuperstepMsgCount(syncClient, bspTask, job, 1L, 0L);

    Text txtMessage = new Text("data");
    messenger.addMessage(txtMessage);

    bspTask.sync();
    // Superstep 2

    checkSuperstepMsgCount(syncClient, bspTask, job, 1L, 0L);

    messenger.addMessage(txtMessage);

    bspTask.sync();
    // Superstep 3

    checkSuperstepMsgCount(syncClient, bspTask, job, 3L, 1L);

    bspTask.sync();
    // Superstep 4

    checkSuperstepMsgCount(syncClient, bspTask, job, 3L, 1L);

    messenger.addMessage(txtMessage);
    messenger.addMessage(txtMessage);

    bspTask.sync();
    // Superstep 5

    checkSuperstepMsgCount(syncClient, bspTask, job, 5L, 2L);

    bspTask.sync();
    // Superstep 6

    checkSuperstepMsgCount(syncClient, bspTask, job, 5L, 2L);

    dfs.delete(new Path("checkpoint"), true);
}

From source file:org.apache.hama.bsp.TestCheckpoint.java

License:Apache License

@SuppressWarnings("rawtypes")
public void testCheckpoint() throws Exception {
    Configuration config = new Configuration();
    config.set(SyncServiceFactory.SYNC_CLIENT_CLASS, TempSyncClient.class.getName());
    config.setBoolean(Constants.FAULT_TOLERANCE_FLAG, true);
    config.set(Constants.FAULT_TOLERANCE_CLASS, AsyncRcvdMsgCheckpointImpl.class.getName());
    config.setBoolean(Constants.CHECKPOINT_ENABLED, true);
    int port = BSPNetUtils.getFreePort(12502);
    LOG.info("Got port = " + port);

    config.set(Constants.PEER_HOST, Constants.DEFAULT_PEER_HOST);
    config.setInt(Constants.PEER_PORT, port);

    config.set("bsp.output.dir", "/tmp/hama-test_out");
    config.set("bsp.local.dir", "/tmp/hama-test");

    FileSystem dfs = FileSystem.get(config);
    BSPJob job = new BSPJob(new BSPJobID("checkpttest", 1), "/tmp");
    TaskAttemptID taskId = new TaskAttemptID(new TaskID(job.getJobID(), 1), 1);

    TestMessageManager messenger = new TestMessageManager();
    PeerSyncClient syncClient = SyncServiceFactory.getPeerSyncClient(config);
    BSPPeer bspTask = new TestBSPPeer(job, config, taskId, new Counters(), -1L, (BSPPeerSyncClient) syncClient,
            messenger, TaskStatus.State.RUNNING);

    assertNotNull("BSPPeerImpl should not be null.", bspTask);

    LOG.info("Created bsp peer and other parameters");

    @SuppressWarnings("unused")
    FaultTolerantPeerService<Text> service = null;

    bspTask.sync();//from   ww w.j  ava  2s .c o  m
    LOG.info("Completed first sync.");

    checkSuperstepMsgCount(syncClient, bspTask, job, 1L, 0L);

    Text txtMessage = new Text("data");
    messenger.addMessage(txtMessage);

    bspTask.sync();

    LOG.info("Completed second sync.");

    checkSuperstepMsgCount(syncClient, bspTask, job, 2L, 1L);

    // Checking the messages for superstep 2 and peer id 1
    String expectedPath = "checkpoint/job_checkpttest_0001/2/1";
    FSDataInputStream in = dfs.open(new Path(expectedPath));

    String className = in.readUTF();
    Text message = (Text) ReflectionUtils.newInstance(Class.forName(className), config);
    message.readFields(in);

    assertEquals("data", message.toString());

    dfs.delete(new Path("checkpoint"), true);
}

From source file:org.apache.hama.bsp.TestCheckpoint.java

License:Apache License

public void testPeerRecovery() throws Exception {
    Configuration config = new Configuration();
    config.set(SyncServiceFactory.SYNC_CLIENT_CLASS, TempSyncClient.class.getName());
    config.set(Constants.FAULT_TOLERANCE_CLASS, AsyncRcvdMsgCheckpointImpl.class.getName());
    config.setBoolean(Constants.CHECKPOINT_ENABLED, true);
    int port = BSPNetUtils.getFreePort(12502);
    LOG.info("Got port = " + port);

    config.set(Constants.PEER_HOST, Constants.DEFAULT_PEER_HOST);
    config.setInt(Constants.PEER_PORT, port);

    config.set("bsp.output.dir", "/tmp/hama-test_out");
    config.set("bsp.local.dir", "/tmp/hama-test");

    FileSystem dfs = FileSystem.get(config);
    BSPJob job = new BSPJob(new BSPJobID("checkpttest", 1), "/tmp");
    TaskAttemptID taskId = new TaskAttemptID(new TaskID(job.getJobID(), 1), 1);

    TestMessageManager messenger = new TestMessageManager();
    PeerSyncClient syncClient = SyncServiceFactory.getPeerSyncClient(config);

    Text txtMessage = new Text("data");
    String writeKey = "job_checkpttest_0001/checkpoint/1/";

    Writable[] writableArr = new Writable[2];
    writableArr[0] = new LongWritable(3L);
    writableArr[1] = new LongWritable(5L);
    ArrayWritable arrWritable = new ArrayWritable(LongWritable.class);
    arrWritable.set(writableArr);/*from www .j  a  va2s.  co m*/
    syncClient.storeInformation(writeKey, arrWritable, true, null);

    String writePath = "checkpoint/job_checkpttest_0001/3/1";
    FSDataOutputStream out = dfs.create(new Path(writePath));
    for (int i = 0; i < 5; ++i) {
        out.writeUTF(txtMessage.getClass().getCanonicalName());
        txtMessage.write(out);
    }
    out.close();

    @SuppressWarnings("unused")
    BSPPeer<?, ?, ?, ?, Text> bspTask = new TestBSPPeer(job, config, taskId, new Counters(), 3L,
            (BSPPeerSyncClient) syncClient, messenger, TaskStatus.State.RECOVERING);

    BSPMessageBundle<Text> bundleRead = messenger.getLoopbackBundle();
    assertEquals(5, bundleRead.size());

    String recoveredMsg = bundleRead.iterator().next().toString();
    assertEquals(recoveredMsg, "data");
    dfs.delete(new Path("checkpoint"), true);
}

From source file:org.apache.hama.bsp.TestPartitioning.java

License:Apache License

public void testPartitioner() throws Exception {

    Configuration conf = new Configuration();
    conf.set("bsp.local.dir", "/tmp/hama-test/partitioning");
    conf.setBoolean("bsp.input.runtime.partitioning", true);
    BSPJob bsp = new BSPJob(new HamaConfiguration(conf));
    bsp.setJobName("Test partitioning with input");
    bsp.setBspClass(PartionedBSP.class);
    bsp.setNumBspTask(2);/*from  w  ww.  j a  v  a 2  s .c om*/
    conf.setInt(Constants.ZOOKEEPER_SESSION_TIMEOUT, 600);
    bsp.setInputFormat(TextInputFormat.class);
    bsp.setOutputFormat(NullOutputFormat.class);
    FileInputFormat.setInputPaths(bsp, "../CHANGES.txt,../README.md");
    bsp.setPartitioner(HashPartitioner.class);
    assertTrue(bsp.waitForCompletion(true));

    FileSystem fs = FileSystem.get(conf);
    fs.delete(OUTPUT_PATH, true);
}

From source file:org.apache.hama.ml.kmeans.KMeansBSP.java

License:Apache License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    if (args.length < 6) {
        LOG.info(/* w ww.j a  va2  s. c  om*/
                "USAGE: <INPUT_PATH> <OUTPUT_PATH> <COUNT> <K> <DIMENSION OF VECTORS> <MAXITERATIONS> <optional: num of tasks>");
        return;
    }

    Configuration conf = new Configuration();
    int count = Integer.parseInt(args[2]);
    int k = Integer.parseInt(args[3]);
    int dimension = Integer.parseInt(args[4]);
    int iterations = Integer.parseInt(args[5]);
    conf.setInt(MAX_ITERATIONS_KEY, iterations);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    Path center = new Path(in, "center/cen.seq");
    Path centerOut = new Path(out, "center/center_output.seq");

    conf.set(CENTER_IN_PATH, center.toString());
    conf.set(CENTER_OUT_PATH, centerOut.toString());
    // if you're in local mode, you can increase this to match your core sizes
    conf.set("bsp.local.tasks.maximum", "" + Runtime.getRuntime().availableProcessors());
    // deactivate (set to false) if you want to iterate over disk, else it will
    // cache the input vectors in memory
    conf.setBoolean(CACHING_ENABLED_KEY, true);
    BSPJob job = createJob(conf, in, out, false);

    LOG.info("N: " + count + " k: " + k + " Dimension: " + dimension + " Iterations: " + iterations);

    FileSystem fs = FileSystem.get(conf);
    // prepare the input, like deleting old versions and creating centers
    prepareInput(count, k, dimension, conf, in, center, out, fs);
    if (args.length == 7) {
        job.setNumBspTask(Integer.parseInt(args[6]));
    }

    // just submit the job
    job.waitForCompletion(true);
}

From source file:org.apache.hama.pipes.Submitter.java

License:Apache License

/**
 * Set whether to keep the command file for debugging
 * //  w  w w.j a v a 2 s .  c  om
 * @param conf the configuration to modify
 * @param keep the new value
 */
public static void setKeepCommandFile(Configuration conf, boolean keep) {
    conf.setBoolean("hama.pipes.command-file.keep", keep);
}