Example usage for org.apache.hadoop.conf Configuration setBoolean

List of usage examples for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:com.cloudera.oryx.computation.common.JobStep.java

License:Open Source License

/**
 * Creates a new {@link MRPipeline} instance that contains common configuration
 * settings./*from  w ww  .  ja  va2 s. co  m*/
 *
 * @return a new {@link MRPipeline} instance, suitably configured
 */
protected final MRPipeline createBasicPipeline(Class<?> jarClass) throws IOException {
    Configuration conf = OryxConfiguration.get(getConf());

    conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    conf.setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);

    conf.setBoolean("mapred.output.compress", true);
    conf.set("mapred.output.compression.type", "BLOCK");
    conf.setClass("mapred.output.compression.codec", SnappyCodec.class, CompressionCodec.class);
    // Set old-style equivalents for Avro/Crunch's benefit
    conf.set("avro.output.codec", "snappy");

    conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true);
    conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, true);
    conf.setBoolean(TTConfig.TT_OUTOFBAND_HEARBEAT, true);
    conf.setInt(MRJobConfig.JVM_NUMTASKS_TORUN, -1);

    //conf.setBoolean("crunch.disable.deep.copy", true);
    // Giving one mapper a lot of data can cause issues in some stages, so default to disable this
    conf.setBoolean("crunch.disable.combine.file", true);

    Config appConfig = ConfigUtils.getDefaultConfig();

    conf.set("crunch.tmp.dir", appConfig.getString("computation-layer.tmp-dir"));

    int mapMemoryMB = appConfig.getInt("computation-layer.mapper-memory-mb");
    log.info("Mapper memory: {}", mapMemoryMB);
    int mapHeapMB = (int) (mapMemoryMB / 1.3); // Matches Hadoop's default
    log.info("Mappers have {}MB heap and can access {}MB RAM", mapHeapMB, mapMemoryMB);
    if (conf.get(MRJobConfig.MAP_JAVA_OPTS) != null) {
        log.info("Overriding previous setting of {}, which was '{}'", MRJobConfig.MAP_JAVA_OPTS,
                conf.get(MRJobConfig.MAP_JAVA_OPTS));
    }
    conf.set(MRJobConfig.MAP_JAVA_OPTS,
            "-Xmx" + mapHeapMB + "m -XX:+UseCompressedOops -XX:+UseParallelGC -XX:+UseParallelOldGC");
    log.info("Set {} to '{}'", MRJobConfig.MAP_JAVA_OPTS, conf.get(MRJobConfig.MAP_JAVA_OPTS));
    // See comment below on CM
    conf.setInt("mapreduce.map.java.opts.max.heap", mapHeapMB);

    int reduceMemoryMB = appConfig.getInt("computation-layer.reducer-memory-mb");
    log.info("Reducer memory: {}", reduceMemoryMB);
    if (isHighMemoryStep()) {
        reduceMemoryMB *= appConfig.getInt("computation-layer.worker-high-memory-factor");
        log.info("Increasing {} to {} for high-memory step", MRJobConfig.REDUCE_MEMORY_MB, reduceMemoryMB);
    }
    conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, reduceMemoryMB);

    int reduceHeapMB = (int) (reduceMemoryMB / 1.3); // Matches Hadoop's default
    log.info("Reducers have {}MB heap and can access {}MB RAM", reduceHeapMB, reduceMemoryMB);
    if (conf.get(MRJobConfig.REDUCE_JAVA_OPTS) != null) {
        log.info("Overriding previous setting of {}, which was '{}'", MRJobConfig.REDUCE_JAVA_OPTS,
                conf.get(MRJobConfig.REDUCE_JAVA_OPTS));
    }
    conf.set(MRJobConfig.REDUCE_JAVA_OPTS,
            "-Xmx" + reduceHeapMB + "m -XX:+UseCompressedOops -XX:+UseParallelGC -XX:+UseParallelOldGC");
    log.info("Set {} to '{}'", MRJobConfig.REDUCE_JAVA_OPTS, conf.get(MRJobConfig.REDUCE_JAVA_OPTS));
    // I see this in CM but not in Hadoop docs; probably won't hurt as it's supposed to result in
    // -Xmx appended to opts above, which is at worst redundant
    conf.setInt("mapreduce.reduce.java.opts.max.heap", reduceHeapMB);

    conf.setInt("yarn.scheduler.capacity.minimum-allocation-mb", 128);
    conf.setInt("yarn.app.mapreduce.am.resource.mb", 384);

    // Pass total config state
    conf.set(CONFIG_SERIALIZATION_KEY, ConfigUtils.getDefaultConfig().root().render());

    // Make sure to set any args to conf above this line!

    setConf(conf);

    Job job = Job.getInstance(conf);

    // Basic File IO settings
    FileInputFormat.setMaxInputSplitSize(job, 1L << 28); // ~268MB
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);

    log.info("Created pipeline configuration {}", job.getConfiguration());

    return new MRPipeline(jarClass, getCustomJobName(), job.getConfiguration());
}

From source file:com.cloudera.science.quince.LoadVariantsTool.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JCommander jc = new JCommander(this);
    try {//from   ww  w . j  a v a  2  s .c  om
        jc.parse(args);
    } catch (ParameterException e) {
        jc.usage();
        return 1;
    }

    if (paths == null || paths.size() != 2) {
        jc.usage();
        return 1;
    }

    String inputPath = paths.get(0);
    String outputPath = paths.get(1);

    Configuration conf = getConf();
    // Copy records to avoid problem with Parquet string statistics not being correct.
    // This can be removed from parquet 1.8.0
    // (see https://issues.apache.org/jira/browse/PARQUET-251).
    conf.setBoolean(DatasetKeyOutputFormat.KITE_COPY_RECORDS, true);

    Path path = new Path(inputPath);

    if (path.getName().endsWith(".vcf")) {
        int size = 500000;
        byte[] bytes = new byte[size];
        InputStream inputStream = path.getFileSystem(conf).open(path);
        inputStream.read(bytes, 0, size);
        conf.set(VariantContextToVariantFn.VARIANT_HEADER, Base64.encodeBase64String(bytes));
    }

    Pipeline pipeline = new MRPipeline(getClass(), conf);
    PCollection<Variant> records = readVariants(path, conf, pipeline);

    PCollection<FlatVariant> flatRecords = records.parallelDo(new FlattenVariantFn(),
            Avros.specifics(FlatVariant.class));

    DatasetDescriptor desc = new DatasetDescriptor.Builder().schema(FlatVariant.getClassSchema())
            .partitionStrategy(buildPartitionStrategy(segmentSize)).format(Formats.PARQUET)
            .compressionType(CompressionType.Uncompressed).build();

    View<FlatVariant> dataset;
    if (Datasets.exists(outputPath)) {
        dataset = Datasets.load(outputPath, FlatVariant.class).getDataset().with("sample_group", sampleGroup);
    } else {
        dataset = Datasets.create(outputPath, desc, FlatVariant.class).getDataset().with("sample_group",
                sampleGroup);
    }

    int numReducers = conf.getInt("mapreduce.job.reduces", 1);
    System.out.println("Num reducers: " + numReducers);

    final Schema sortKeySchema = SchemaBuilder.record("sortKey").fields().requiredString("sampleId")
            .endRecord();

    PCollection<FlatVariant> partitioned = CrunchDatasets.partitionAndSort(flatRecords, dataset,
            new FlatVariantRecordMapFn(sortKeySchema), sortKeySchema, numReducers, 1);

    try {
        Target.WriteMode writeMode = overwrite ? Target.WriteMode.OVERWRITE : Target.WriteMode.DEFAULT;
        pipeline.write(partitioned, CrunchDatasets.asTarget(dataset), writeMode);
    } catch (CrunchRuntimeException e) {
        LOG.error("Crunch runtime error", e);
        return 1;
    }

    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;

}

From source file:com.cloudera.sqoop.mapreduce.MySQLDumpImportJob.java

License:Apache License

/**
 * Configure the inputformat to use for the job.
 *//*  w w w .j av  a  2s. c o m*/
protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
        throws ClassNotFoundException, IOException {

    if (null == tableName) {
        LOG.error("mysqldump-based import cannot support free-form query imports.");
        LOG.error("Do not use --direct and --query together for MySQL.");
        throw new IOException("null tableName for MySQLDumpImportJob.");
    }

    ConnManager mgr = getContext().getConnManager();
    String username = options.getUsername();
    if (null == username || username.length() == 0) {
        DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString());
    } else {
        DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString(),
                username, options.getPassword());
    }

    String[] colNames = options.getColumns();
    if (null == colNames) {
        colNames = mgr.getColumnNames(tableName);
    }

    String[] sqlColNames = null;
    if (null != colNames) {
        sqlColNames = new String[colNames.length];
        for (int i = 0; i < colNames.length; i++) {
            sqlColNames[i] = mgr.escapeColName(colNames[i]);
        }
    }

    // It's ok if the where clause is null in DBInputFormat.setInput.
    String whereClause = options.getWhereClause();

    // We can't set the class properly in here, because we may not have the
    // jar loaded in this JVM. So we start by calling setInput() with
    // DBWritable and then overriding the string manually.

    // Note that mysqldump also does *not* want a quoted table name.
    DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName, whereClause,
            mgr.escapeColName(splitByCol), sqlColNames);

    Configuration conf = job.getConfiguration();
    conf.setInt(MySQLUtils.OUTPUT_FIELD_DELIM_KEY, options.getOutputFieldDelim());
    conf.setInt(MySQLUtils.OUTPUT_RECORD_DELIM_KEY, options.getOutputRecordDelim());
    conf.setInt(MySQLUtils.OUTPUT_ENCLOSED_BY_KEY, options.getOutputEnclosedBy());
    conf.setInt(MySQLUtils.OUTPUT_ESCAPED_BY_KEY, options.getOutputEscapedBy());
    conf.setBoolean(MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, options.isOutputEncloseRequired());
    String[] extraArgs = options.getExtraArgs();
    if (null != extraArgs) {
        conf.setStrings(MySQLUtils.EXTRA_ARGS_KEY, extraArgs);
    }

    LOG.debug("Using InputFormat: " + inputFormatClass);
    job.setInputFormatClass(getInputFormatClass());
}

From source file:com.cloudera.sqoop.mapreduce.MySQLExportJob.java

License:Apache License

@Override
/**/*from  ww  w .  ja v  a2 s .  c  o m*/
 * Configure the inputformat to use for the job.
 */
protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
        throws ClassNotFoundException, IOException {

    // Configure the delimiters, etc.
    Configuration conf = job.getConfiguration();
    conf.setInt(MySQLUtils.OUTPUT_FIELD_DELIM_KEY, options.getOutputFieldDelim());
    conf.setInt(MySQLUtils.OUTPUT_RECORD_DELIM_KEY, options.getOutputRecordDelim());
    conf.setInt(MySQLUtils.OUTPUT_ENCLOSED_BY_KEY, options.getOutputEnclosedBy());
    conf.setInt(MySQLUtils.OUTPUT_ESCAPED_BY_KEY, options.getOutputEscapedBy());
    conf.setBoolean(MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, options.isOutputEncloseRequired());
    String[] extraArgs = options.getExtraArgs();
    if (null != extraArgs) {
        conf.setStrings(MySQLUtils.EXTRA_ARGS_KEY, extraArgs);
    }

    ConnManager mgr = context.getConnManager();
    String username = options.getUsername();
    if (null == username || username.length() == 0) {
        DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString());
    } else {
        DBConfiguration.configureDB(job.getConfiguration(), mgr.getDriverClass(), options.getConnectString(),
                username, options.getPassword());
    }

    String[] colNames = options.getColumns();
    if (null == colNames) {
        colNames = mgr.getColumnNames(tableName);
    }

    String[] sqlColNames = null;
    if (null != colNames) {
        sqlColNames = new String[colNames.length];
        for (int i = 0; i < colNames.length; i++) {
            sqlColNames[i] = mgr.escapeColName(colNames[i]);
        }
    }

    // Note that mysqldump also does *not* want a quoted table name.
    DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName, null, null, sqlColNames);

    // Configure the actual InputFormat to use. 
    super.configureInputFormat(job, tableName, tableClassName, splitByCol);
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf)
        throws IOException {

    Job job = new Job(new Configuration(conf));
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }/*from  www  .  j ava2s  .c  om*/
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.datasalt.pangool.tuplemr.serialization.TupleSerialization.java

License:Apache License

/**
 * see {@link #CONF_SCHEMA_VALIDATION}/*from   w  w w . j  a v a 2  s  .c o  m*/
 */
public static void enableSchemaValidation(Configuration conf) {
    conf.setBoolean(CONF_SCHEMA_VALIDATION, true);
}

From source file:com.datasalt.pangool.tuplemr.serialization.TupleSerialization.java

License:Apache License

/**
 * see {@link #CONF_SCHEMA_VALIDATION}//from w w  w.  j a  v  a 2  s .  co m
 */
public static void disableSchemaValidation(Configuration conf) {
    conf.setBoolean(CONF_SCHEMA_VALIDATION, false);
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

License:Apache License

/**
 * Configures the Hadoop configuration for the given CSV format.
 *//*from   w  w w.  j a v a  2 s  .co m*/
private void configureReaderFormat(CSVFormat format, Configuration conf) {
    conf.set(CsvOutputFormat.CHARSET, charset);

    // If the format header was explicitly provided by the user then forward it to the record reader. If skipHeaderRecord
    // is enabled then that indicates that field names were detected. We need to ensure that headers are defined in order
    // for the CSV reader to skip the header record.
    conf.setBoolean(CsvInputFormat.STRICT_MODE, strict);
    if (format.getHeader() != null) {
        conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, format.getHeader());
    } else if (format.getSkipHeaderRecord()) {
        Fields fields = getSourceFields();
        String[] columns = new String[fields.size()];
        for (int i = 0; i < fields.size(); i++) {
            columns[i] = fields.get(i).toString();
        }
        conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns);
    }

    conf.setBoolean(CsvInputFormat.CSV_READER_SKIP_HEADER, format.getSkipHeaderRecord());
    conf.set(CsvInputFormat.CSV_READER_DELIMITER, String.valueOf(format.getDelimiter()));

    if (format.getRecordSeparator() != null) {
        conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, format.getRecordSeparator());
    }

    if (format.getQuoteCharacter() != null) {
        conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter()));
    }

    if (format.getQuoteMode() != null) {
        conf.set(CsvInputFormat.CSV_READER_QUOTE_MODE, format.getQuoteMode().name());
    }

    if (format.getEscapeCharacter() != null) {
        conf.set(CsvInputFormat.CSV_READER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter()));
    }

    conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines());
    conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces());

    if (format.getNullString() != null) {
        conf.set(CsvInputFormat.CSV_READER_NULL_STRING, format.getNullString());
    }
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

License:Apache License

/**
 * Configures the Hadoop configuration for the given CSV format.
 *//*w  ww  . j  av  a 2 s  . c o  m*/
private void configureWriterFormat(CSVFormat format, Configuration conf) {
    conf.set(CsvOutputFormat.CHARSET, charset);

    // Apache CSV doesn't really handle the skipHeaderRecord flag correctly when writing output. If the skip flag is set
    // and headers are configured, headers will always be written to the output. Since we always have headers and/or
    // fields configured, we need to use the skipHeaderRecord flag to determine whether headers should be written.
    if (!format.getSkipHeaderRecord()) {
        if (format.getHeader() != null && format.getHeader().length != 0) {
            conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, format.getHeader());
        } else {
            Fields fields = getSinkFields();
            String[] columns = new String[fields.size()];
            for (int i = 0; i < fields.size(); i++) {
                columns[i] = fields.get(i).toString();
            }
            conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, columns);
        }
    }

    conf.setBoolean(CsvOutputFormat.CSV_WRITER_SKIP_HEADER, format.getSkipHeaderRecord());
    conf.set(CsvOutputFormat.CSV_WRITER_DELIMITER, String.valueOf(format.getDelimiter()));

    if (format.getRecordSeparator() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_RECORD_SEPARATOR, format.getRecordSeparator());
    }

    if (format.getQuoteCharacter() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter()));
    }

    if (format.getQuoteMode() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_MODE, format.getQuoteMode().name());
    }

    if (format.getEscapeCharacter() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter()));
    }

    conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines());
    conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces());

    if (format.getNullString() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_NULL_STRING, format.getNullString());
    }
}

From source file:com.datatorrent.stram.client.StramClientUtilsTest.java

License:Apache License

@Test
public void testRMWebAddress() throws UnknownHostException {
    Configuration conf = new YarnConfiguration(new Configuration(false)) {
        @Override//from w w w. j a v a  2 s .c  o m
        public InetSocketAddress getSocketAddr(String name, String defaultAddress, int defaultPort) {
            String rmId = get(ConfigUtils.RM_HA_ID);
            if (rmId != null) {
                name = name + "." + rmId;
            }
            return super.getSocketAddr(name, defaultAddress, defaultPort);
        }
    };

    // basic test
    conf.setBoolean(CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_KEY, false);
    conf.set(YarnConfiguration.RM_WEBAPP_ADDRESS, "192.168.1.1:8032");
    conf.set(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, "192.168.1.2:8032");
    Assert.assertEquals(getHostString("192.168.1.1") + ":8032",
            StramClientUtils.getSocketConnectString(StramClientUtils.getRMWebAddress(conf, null)));
    List<InetSocketAddress> addresses = StramClientUtils.getRMAddresses(conf);
    Assert.assertEquals(1, addresses.size());
    Assert.assertEquals(getHostString("192.168.1.1") + ":8032",
            StramClientUtils.getSocketConnectString(addresses.get(0)));

    conf.setBoolean(CommonConfigurationKeysPublic.HADOOP_SSL_ENABLED_KEY, true);
    Assert.assertEquals(getHostString("192.168.1.2") + ":8032",
            StramClientUtils.getSocketConnectString(StramClientUtils.getRMWebAddress(conf, null)));
    addresses = StramClientUtils.getRMAddresses(conf);
    Assert.assertEquals(1, addresses.size());
    Assert.assertEquals(getHostString("192.168.1.2") + ":8032",
            StramClientUtils.getSocketConnectString(addresses.get(0)));

    // set localhost if host is unknown
    conf.set(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, "someunknownhost.:8032");

    Assert.assertEquals(InetAddress.getLocalHost().getCanonicalHostName() + ":8032",
            StramClientUtils.getSocketConnectString(StramClientUtils.getRMWebAddress(conf, null)));

    // set localhost
    conf.set(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, "127.0.0.1:8032");
    Assert.assertEquals(InetAddress.getLocalHost().getCanonicalHostName() + ":8032",
            StramClientUtils.getSocketConnectString(StramClientUtils.getRMWebAddress(conf, null)));

    // test when HA is enabled
    conf.setBoolean(ConfigUtils.RM_HA_ENABLED, true);
    conf.set(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS + ".rm1", "192.168.1.1:8032");
    conf.set(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS + ".rm2", "192.168.1.2:8032");
    conf.set("yarn.resourcemanager.ha.rm-ids", "rm1,rm2");
    Assert.assertEquals(getHostString("192.168.1.1") + ":8032",
            StramClientUtils.getSocketConnectString(StramClientUtils.getRMWebAddress(conf, "rm1")));
    Assert.assertEquals(getHostString("192.168.1.2") + ":8032",
            StramClientUtils.getSocketConnectString(StramClientUtils.getRMWebAddress(conf, "rm2")));
    addresses = StramClientUtils.getRMAddresses(conf);
    Assert.assertEquals(2, addresses.size());
    Assert.assertEquals(getHostString("192.168.1.1") + ":8032",
            StramClientUtils.getSocketConnectString(addresses.get(0)));
    Assert.assertEquals(getHostString("192.168.1.2") + ":8032",
            StramClientUtils.getSocketConnectString(addresses.get(1)));
}