Example usage for org.apache.hadoop.conf Configuration setBoolean

List of usage examples for org.apache.hadoop.conf Configuration setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setBoolean.

Prototype

public void setBoolean(String name, boolean value) 

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:com.ebay.erl.mobius.core.MobiusJob.java

License:Apache License

/**
 * Performing "Left Outer Join", the result contains all the records of
 * the left {@linkplain Dataset} (the 1st {@linkplain Dataset}) with
 * or without match to the right {@linkplain Dataset}.
 * <p>/*  w w  w  .  jav a  2s .  co m*/
 * 
 * If in a join group, there is no records from the right {@linkplain Dataset} 
 * (the 2nd argument), by default, <code>null</code>(if the output format is 
 * SequenceFileOutputFormat) or empty string (if the output format is 
 * {@link TextOutputFormat}) is written for the selected columns from 
 * the right {@linkplain Dataset}.
 * <p>
 * 
 * If <code>nullReplacement</code> is not null, then it will be used as 
 * the value for the columns from the right dataset when no match in a 
 * join group.
 * <p>
 * 
 * To compose a <code>leftOuterJoin</code> is almost the same as composing
 * a {@link MobiusJob#innerJoin(Dataset...)} job except that instead of calling
 * <code>innerJoin</code>, simply change it to 
 * <code>leftOuterJoin(Dataset, Dataset, Object)</code>. 
 * <p>
 * 
 * @param left left-hand side {@link Dataset}
 * @param right right-hand side {@link Dataset} 
 * @param nullReplacement the value to be used as the value for null columns, 
 * it can be only the type supported by {@link Tuple}
 * 
 */
public JoinOnConfigure leftOuterJoin(Dataset left, Dataset right, Object nullReplacement) throws IOException {
    Configuration conf = this.getConf();
    conf.setBoolean(ConfigureConstants.IS_OUTER_JOIN, true);
    return new JoinOnConfigure(nullReplacement, conf, left, right);
}

From source file:com.elex.dmp.vectorizer.DictionaryVectorizer.java

License:Apache License

/**
 * Create a partial vector using a chunk of features from the input documents. The input documents has to be
 * in the {@link SequenceFile} format/*from   w w w .  j  a va2 s. com*/
 * 
 * @param input
 *          input directory of the documents in {@link SequenceFile} format
 * @param baseConf
 *          job configuration
 * @param maxNGramSize
 *          maximum size of ngrams to generate
 * @param dictionaryFilePath
 *          location of the chunk of features and the id's
 * @param output
 *          output directory were the partial vectors have to be created
 * @param dimension
 * @param sequentialAccess
 *          output vectors should be optimized for sequential access
 * @param namedVectors
 *          output vectors should be named, retaining key (doc id) as a label
 * @param numReducers 
 *          the desired number of reducer tasks
 */
private static void makePartialVectors(Path input, Configuration baseConf, int maxNGramSize,
        Path dictionaryFilePath, Path output, int dimension, boolean sequentialAccess, boolean namedVectors,
        int numReducers) throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration(baseConf);
    // this conf parameter needs to be set enable serialisation of conf values
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInt(PartialVectorMerger.DIMENSION, dimension);
    conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess);
    conf.setBoolean(PartialVectorMerger.NAMED_VECTOR, namedVectors);
    conf.setInt(MAX_NGRAMS, maxNGramSize);
    DistributedCache.setCacheFiles(new URI[] { dictionaryFilePath.toUri() }, conf);

    Job job = new Job(conf);
    job.setJobName("DictionaryVectorizer::MakePartialVectors: input-folder: " + input + ", dictionary-file: "
            + dictionaryFilePath);
    job.setJarByClass(DictionaryVectorizer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StringTuple.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);
    FileInputFormat.setInputPaths(job, input);

    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setReducerClass(TFPartialVectorReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numReducers);

    HadoopUtil.delete(conf, output);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded)
        throw new IllegalStateException("Job failed!");
}

From source file:com.elex.dmp.vectorizer.FixDictionaryVectorizer.java

License:Apache License

/**
 * Create a partial vector using a chunk of features from the input documents. The input documents has to be
 * in the {@link SequenceFile} format//from www . jav  a 2s .  c o m
 * 
 * @param input
 *          input directory of the documents in {@link SequenceFile} format
 * @param baseConf
 *          job configuration
 * @param maxNGramSize
 *          maximum size of ngrams to generate
 * @param dictionaryFilePath
 *          location of the chunk of features and the id's
 * @param output
 *          output directory were the partial vectors have to be created
 * @param dimension
 * @param sequentialAccess
 *          output vectors should be optimized for sequential access
 * @param namedVectors
 *          output vectors should be named, retaining key (doc id) as a label
 * @param numReducers 
 *          the desired number of reducer tasks
 */
private static void makePartialVectors(Path input, Configuration baseConf, int maxNGramSize,
        Path dictionaryFilePath, Path output, int dimension, boolean sequentialAccess, boolean namedVectors,
        int numReducers) throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration(baseConf);
    // this conf parameter needs to be set enable serialisation of conf values
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInt(PartialVectorMerger.DIMENSION, dimension);
    conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess);
    conf.setBoolean(PartialVectorMerger.NAMED_VECTOR, namedVectors);
    conf.setInt(MAX_NGRAMS, maxNGramSize);
    DistributedCache.setCacheFiles(new URI[] { dictionaryFilePath.toUri() }, conf);

    Job job = new Job(conf);
    job.setJobName("DictionaryVectorizer::MakePartialVectors: input-folder: " + input + ", dictionary-file: "
            + dictionaryFilePath);
    job.setJarByClass(FixDictionaryVectorizer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(StringTuple.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);
    FileInputFormat.setInputPaths(job, input);

    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setReducerClass(TFPartialVectorReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numReducers);

    HadoopUtil.delete(conf, output);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded)
        throw new IllegalStateException("Job failed!");
}

From source file:com.example.myapexapp.ApplicationTest.java

private void go(final boolean useUnifier) throws Exception {
    try {/*www  . j av a2 s  . com*/
        LocalMode lma = LocalMode.newInstance();
        Configuration conf = new Configuration(false);
        conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml"));
        conf.setBoolean("dt.application.MyFirstApplication.operator.console.prop.saveTuples", true);
        if (useUnifier) {
            conf.setBoolean("dt.application.MyFirstApplication.operator.range.prop.useUnifier", true);
        }
        lma.prepareDAG(new Application(), conf);
        ToConsole console = (ToConsole) lma.getDAG().getOperatorMeta("console").getOperator();
        LocalMode.Controller lc = lma.getController();
        lc.runAsync(); // runs for 10 seconds and quits

        // wait for tuples to show up
        while (!check(useUnifier, console)) {
            System.out.println("Sleeping ....");
            Thread.sleep(500);
        }

    } catch (ConstraintViolationException e) {
        Assert.fail("constraint violations: " + e.getConstraintViolations());
    }
}

From source file:com.example.ProtobufToParquet.java

License:Apache License

public static void writeProtobufToParquetAvro(String protoFile, String parquetFile) throws IOException {
    ProtobufData model = ProtobufData.get();

    Schema schema = model.getSchema(ExampleMessage.class);
    System.err.println("Using Avro schema: " + schema.toString(true));

    // use the 3-level structure instead of the 2-level
    // 2-level is the default for forward-compatibility until 2.x
    Configuration conf = new Configuration();
    conf.setBoolean("parquet.avro.write-old-list-structure", false);

    ParquetWriter<ExampleMessage> writer = AvroParquetWriter.<ExampleMessage>builder(new Path(parquetFile))
            .withConf(conf) // conf set to use 3-level lists
            .withDataModel(model) // use the protobuf data model
            .withSchema(schema) // Avro schema for the protobuf data
            .build();/*from   www. j  a  va  2s .com*/

    FileInputStream protoStream = new FileInputStream(new File(protoFile));
    try {
        ExampleMessage m;
        while ((m = ExampleMessage.parseDelimitedFrom(protoStream)) != null) {
            writer.write(m);
        }
    } finally {
        protoStream.close();
    }

    writer.close();
}

From source file:com.facebook.hive.orc.OrcConf.java

License:Open Source License

public static void setBoolVar(Configuration conf, ConfVars var, boolean val) {
    conf.setBoolean(var.varname, val);
}

From source file:com.facebook.hiveio.conf.BooleanConfOption.java

License:Apache License

/**
 * Set value in configuration for this key
 * @param conf Configuration/*  w ww .  j  a va 2 s .  c  o  m*/
 * @param value to set
 */
public void set(Configuration conf, boolean value) {
    conf.setBoolean(getKey(), value);
}

From source file:com.facebook.presto.hive.HdfsConfiguration.java

License:Apache License

protected Configuration createConfiguration() {
    Configuration config = new Configuration();

    if (resourcePaths != null) {
        for (String resourcePath : resourcePaths) {
            config.addResource(new Path(resourcePath));
        }//from www.  j a v  a 2s .co  m
    }

    // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local
    config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class,
            DNSToSwitchMapping.class);

    if (socksProxy != null) {
        config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class,
                SocketFactory.class);
        config.set("hadoop.socks.server", socksProxy.toString());
    }

    if (domainSocketPath != null) {
        config.setStrings("dfs.domain.socket.path", domainSocketPath);
    }

    // only enable short circuit reads if domain socket path is properly configured
    if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) {
        config.setBooleanIfUnset("dfs.client.read.shortcircuit", true);
    }

    config.setInt("dfs.socket.timeout", Ints.checkedCast(dfsTimeout.toMillis()));
    config.setInt("ipc.ping.interval", Ints.checkedCast(dfsTimeout.toMillis()));
    config.setInt("ipc.client.connect.timeout", Ints.checkedCast(dfsConnectTimeout.toMillis()));
    config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries);

    // re-map filesystem schemes to match Amazon Elastic MapReduce
    config.set("fs.s3.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem");

    // set AWS credentials for S3
    for (String scheme : ImmutableList.of("s3", "s3bfs", "s3n")) {
        if (s3AwsAccessKey != null) {
            config.set(format("fs.%s.awsAccessKeyId", scheme), s3AwsAccessKey);
        }
        if (s3AwsSecretKey != null) {
            config.set(format("fs.%s.awsSecretAccessKey", scheme), s3AwsSecretKey);
        }
    }

    // set config for S3
    config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled);
    config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries);
    config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries);
    config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString());
    config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString());

    updateConfiguration(config);

    return config;
}

From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java

License:Apache License

public void updateConfiguration(Configuration config) {
    if (resourcePaths != null) {
        for (String resourcePath : resourcePaths) {
            config.addResource(new Path(resourcePath));
        }/*from w w  w  . ja  v  a 2 s  .  c o  m*/
    }

    // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local
    config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class,
            DNSToSwitchMapping.class);

    if (socksProxy != null) {
        config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class,
                SocketFactory.class);
        config.set("hadoop.socks.server", socksProxy.toString());
    }

    if (domainSocketPath != null) {
        config.setStrings("dfs.domain.socket.path", domainSocketPath);
    }

    // only enable short circuit reads if domain socket path is properly configured
    if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) {
        config.setBooleanIfUnset("dfs.client.read.shortcircuit", true);
    }

    config.setInt("dfs.socket.timeout", toIntExact(dfsTimeout.toMillis()));
    config.setInt("ipc.ping.interval", toIntExact(ipcPingInterval.toMillis()));
    config.setInt("ipc.client.connect.timeout", toIntExact(dfsConnectTimeout.toMillis()));
    config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries);

    // re-map filesystem schemes to match Amazon Elastic MapReduce
    config.set("fs.s3.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3a.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName());
    config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem");

    // set AWS credentials for S3
    if (s3AwsAccessKey != null) {
        config.set(PrestoS3FileSystem.S3_ACCESS_KEY, s3AwsAccessKey);
        config.set("fs.s3bfs.awsAccessKeyId", s3AwsAccessKey);
    }
    if (s3AwsSecretKey != null) {
        config.set(PrestoS3FileSystem.S3_SECRET_KEY, s3AwsSecretKey);
        config.set("fs.s3bfs.awsSecretAccessKey", s3AwsSecretKey);
    }
    if (s3Endpoint != null) {
        config.set(PrestoS3FileSystem.S3_ENDPOINT, s3Endpoint);
        config.set("fs.s3bfs.Endpoint", s3Endpoint);
    }
    if (s3SignerType != null) {
        config.set(PrestoS3FileSystem.S3_SIGNER_TYPE, s3SignerType.getSignerType());
    }

    config.setInt("fs.cache.max-size", fileSystemMaxCacheSize);

    configureCompression(config, compressionCodec);

    // set config for S3
    config.setBoolean(PrestoS3FileSystem.S3_USE_INSTANCE_CREDENTIALS, s3UseInstanceCredentials);
    config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled);
    config.setBoolean(PrestoS3FileSystem.S3_SSE_ENABLED, s3SseEnabled);
    if (s3EncryptionMaterialsProvider != null) {
        config.set(PrestoS3FileSystem.S3_ENCRYPTION_MATERIALS_PROVIDER, s3EncryptionMaterialsProvider);
    }
    if (s3KmsKeyId != null) {
        config.set(PrestoS3FileSystem.S3_KMS_KEY_ID, s3KmsKeyId);
    }
    config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries);
    config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries);
    config.set(PrestoS3FileSystem.S3_MAX_BACKOFF_TIME, s3MaxBackoffTime.toString());
    config.set(PrestoS3FileSystem.S3_MAX_RETRY_TIME, s3MaxRetryTime.toString());
    config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString());
    config.set(PrestoS3FileSystem.S3_SOCKET_TIMEOUT, s3SocketTimeout.toString());
    config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString());
    config.setInt(PrestoS3FileSystem.S3_MAX_CONNECTIONS, s3MaxConnections);
    config.setLong(PrestoS3FileSystem.S3_MULTIPART_MIN_FILE_SIZE, s3MultipartMinFileSize.toBytes());
    config.setLong(PrestoS3FileSystem.S3_MULTIPART_MIN_PART_SIZE, s3MultipartMinPartSize.toBytes());
    config.setBoolean(PrestoS3FileSystem.S3_PIN_CLIENT_TO_CURRENT_REGION, pinS3ClientToCurrentRegion);
    config.set(PrestoS3FileSystem.S3_USER_AGENT_PREFIX, s3UserAgentPrefix);
}

From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java

License:Apache License

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) {
    boolean compression = compressionCodec != HiveCompressionCodec.NONE;
    config.setBoolean(COMPRESSRESULT.varname, compression);
    config.setBoolean("mapred.output.compress", compression);
    config.setBoolean(FileOutputFormat.COMPRESS, compression);
    // For DWRF//  w w  w.  j  av  a  2 s. c o  m
    config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compressionCodec.getOrcCompressionKind().name());
    config.set(HIVE_ORC_COMPRESSION.varname, compressionCodec.getOrcCompressionKind().name());
    // For ORC
    config.set(OrcTableProperties.COMPRESSION.getPropName(), compressionCodec.getOrcCompressionKind().name());
    // For RCFile and Text
    if (compressionCodec.getCodec().isPresent()) {
        config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
        config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
    } else {
        config.unset("mapred.output.compression.codec");
        config.unset(FileOutputFormat.COMPRESS_CODEC);
    }
    // For Parquet
    config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
    // For SequenceFile
    config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}