List of usage examples for org.apache.hadoop.conf Configuration setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:com.ebay.erl.mobius.core.MobiusJob.java
License:Apache License
/** * Performing "Left Outer Join", the result contains all the records of * the left {@linkplain Dataset} (the 1st {@linkplain Dataset}) with * or without match to the right {@linkplain Dataset}. * <p>/* w w w . jav a 2s . co m*/ * * If in a join group, there is no records from the right {@linkplain Dataset} * (the 2nd argument), by default, <code>null</code>(if the output format is * SequenceFileOutputFormat) or empty string (if the output format is * {@link TextOutputFormat}) is written for the selected columns from * the right {@linkplain Dataset}. * <p> * * If <code>nullReplacement</code> is not null, then it will be used as * the value for the columns from the right dataset when no match in a * join group. * <p> * * To compose a <code>leftOuterJoin</code> is almost the same as composing * a {@link MobiusJob#innerJoin(Dataset...)} job except that instead of calling * <code>innerJoin</code>, simply change it to * <code>leftOuterJoin(Dataset, Dataset, Object)</code>. * <p> * * @param left left-hand side {@link Dataset} * @param right right-hand side {@link Dataset} * @param nullReplacement the value to be used as the value for null columns, * it can be only the type supported by {@link Tuple} * */ public JoinOnConfigure leftOuterJoin(Dataset left, Dataset right, Object nullReplacement) throws IOException { Configuration conf = this.getConf(); conf.setBoolean(ConfigureConstants.IS_OUTER_JOIN, true); return new JoinOnConfigure(nullReplacement, conf, left, right); }
From source file:com.elex.dmp.vectorizer.DictionaryVectorizer.java
License:Apache License
/** * Create a partial vector using a chunk of features from the input documents. The input documents has to be * in the {@link SequenceFile} format/*from w w w . j a va2 s. com*/ * * @param input * input directory of the documents in {@link SequenceFile} format * @param baseConf * job configuration * @param maxNGramSize * maximum size of ngrams to generate * @param dictionaryFilePath * location of the chunk of features and the id's * @param output * output directory were the partial vectors have to be created * @param dimension * @param sequentialAccess * output vectors should be optimized for sequential access * @param namedVectors * output vectors should be named, retaining key (doc id) as a label * @param numReducers * the desired number of reducer tasks */ private static void makePartialVectors(Path input, Configuration baseConf, int maxNGramSize, Path dictionaryFilePath, Path output, int dimension, boolean sequentialAccess, boolean namedVectors, int numReducers) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); // this conf parameter needs to be set enable serialisation of conf values conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInt(PartialVectorMerger.DIMENSION, dimension); conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess); conf.setBoolean(PartialVectorMerger.NAMED_VECTOR, namedVectors); conf.setInt(MAX_NGRAMS, maxNGramSize); DistributedCache.setCacheFiles(new URI[] { dictionaryFilePath.toUri() }, conf); Job job = new Job(conf); job.setJobName("DictionaryVectorizer::MakePartialVectors: input-folder: " + input + ", dictionary-file: " + dictionaryFilePath); job.setJarByClass(DictionaryVectorizer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(StringTuple.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setReducerClass(TFPartialVectorReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numReducers); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) throw new IllegalStateException("Job failed!"); }
From source file:com.elex.dmp.vectorizer.FixDictionaryVectorizer.java
License:Apache License
/** * Create a partial vector using a chunk of features from the input documents. The input documents has to be * in the {@link SequenceFile} format//from www . jav a 2s . c o m * * @param input * input directory of the documents in {@link SequenceFile} format * @param baseConf * job configuration * @param maxNGramSize * maximum size of ngrams to generate * @param dictionaryFilePath * location of the chunk of features and the id's * @param output * output directory were the partial vectors have to be created * @param dimension * @param sequentialAccess * output vectors should be optimized for sequential access * @param namedVectors * output vectors should be named, retaining key (doc id) as a label * @param numReducers * the desired number of reducer tasks */ private static void makePartialVectors(Path input, Configuration baseConf, int maxNGramSize, Path dictionaryFilePath, Path output, int dimension, boolean sequentialAccess, boolean namedVectors, int numReducers) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); // this conf parameter needs to be set enable serialisation of conf values conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); conf.setInt(PartialVectorMerger.DIMENSION, dimension); conf.setBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, sequentialAccess); conf.setBoolean(PartialVectorMerger.NAMED_VECTOR, namedVectors); conf.setInt(MAX_NGRAMS, maxNGramSize); DistributedCache.setCacheFiles(new URI[] { dictionaryFilePath.toUri() }, conf); Job job = new Job(conf); job.setJobName("DictionaryVectorizer::MakePartialVectors: input-folder: " + input + ", dictionary-file: " + dictionaryFilePath); job.setJarByClass(FixDictionaryVectorizer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(StringTuple.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setReducerClass(TFPartialVectorReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numReducers); HadoopUtil.delete(conf, output); boolean succeeded = job.waitForCompletion(true); if (!succeeded) throw new IllegalStateException("Job failed!"); }
From source file:com.example.myapexapp.ApplicationTest.java
private void go(final boolean useUnifier) throws Exception { try {/*www . j av a2 s . com*/ LocalMode lma = LocalMode.newInstance(); Configuration conf = new Configuration(false); conf.addResource(this.getClass().getResourceAsStream("/META-INF/properties.xml")); conf.setBoolean("dt.application.MyFirstApplication.operator.console.prop.saveTuples", true); if (useUnifier) { conf.setBoolean("dt.application.MyFirstApplication.operator.range.prop.useUnifier", true); } lma.prepareDAG(new Application(), conf); ToConsole console = (ToConsole) lma.getDAG().getOperatorMeta("console").getOperator(); LocalMode.Controller lc = lma.getController(); lc.runAsync(); // runs for 10 seconds and quits // wait for tuples to show up while (!check(useUnifier, console)) { System.out.println("Sleeping ...."); Thread.sleep(500); } } catch (ConstraintViolationException e) { Assert.fail("constraint violations: " + e.getConstraintViolations()); } }
From source file:com.example.ProtobufToParquet.java
License:Apache License
public static void writeProtobufToParquetAvro(String protoFile, String parquetFile) throws IOException { ProtobufData model = ProtobufData.get(); Schema schema = model.getSchema(ExampleMessage.class); System.err.println("Using Avro schema: " + schema.toString(true)); // use the 3-level structure instead of the 2-level // 2-level is the default for forward-compatibility until 2.x Configuration conf = new Configuration(); conf.setBoolean("parquet.avro.write-old-list-structure", false); ParquetWriter<ExampleMessage> writer = AvroParquetWriter.<ExampleMessage>builder(new Path(parquetFile)) .withConf(conf) // conf set to use 3-level lists .withDataModel(model) // use the protobuf data model .withSchema(schema) // Avro schema for the protobuf data .build();/*from www. j a va 2s .com*/ FileInputStream protoStream = new FileInputStream(new File(protoFile)); try { ExampleMessage m; while ((m = ExampleMessage.parseDelimitedFrom(protoStream)) != null) { writer.write(m); } } finally { protoStream.close(); } writer.close(); }
From source file:com.facebook.hive.orc.OrcConf.java
License:Open Source License
public static void setBoolVar(Configuration conf, ConfVars var, boolean val) { conf.setBoolean(var.varname, val); }
From source file:com.facebook.hiveio.conf.BooleanConfOption.java
License:Apache License
/** * Set value in configuration for this key * @param conf Configuration/* w ww . j a va 2 s . c o m*/ * @param value to set */ public void set(Configuration conf, boolean value) { conf.setBoolean(getKey(), value); }
From source file:com.facebook.presto.hive.HdfsConfiguration.java
License:Apache License
protected Configuration createConfiguration() { Configuration config = new Configuration(); if (resourcePaths != null) { for (String resourcePath : resourcePaths) { config.addResource(new Path(resourcePath)); }//from www. j a v a 2s .co m } // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class, DNSToSwitchMapping.class); if (socksProxy != null) { config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class, SocketFactory.class); config.set("hadoop.socks.server", socksProxy.toString()); } if (domainSocketPath != null) { config.setStrings("dfs.domain.socket.path", domainSocketPath); } // only enable short circuit reads if domain socket path is properly configured if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) { config.setBooleanIfUnset("dfs.client.read.shortcircuit", true); } config.setInt("dfs.socket.timeout", Ints.checkedCast(dfsTimeout.toMillis())); config.setInt("ipc.ping.interval", Ints.checkedCast(dfsTimeout.toMillis())); config.setInt("ipc.client.connect.timeout", Ints.checkedCast(dfsConnectTimeout.toMillis())); config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries); // re-map filesystem schemes to match Amazon Elastic MapReduce config.set("fs.s3.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem"); // set AWS credentials for S3 for (String scheme : ImmutableList.of("s3", "s3bfs", "s3n")) { if (s3AwsAccessKey != null) { config.set(format("fs.%s.awsAccessKeyId", scheme), s3AwsAccessKey); } if (s3AwsSecretKey != null) { config.set(format("fs.%s.awsSecretAccessKey", scheme), s3AwsSecretKey); } } // set config for S3 config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled); config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries); config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries); config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString()); config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString()); updateConfiguration(config); return config; }
From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java
License:Apache License
public void updateConfiguration(Configuration config) { if (resourcePaths != null) { for (String resourcePath : resourcePaths) { config.addResource(new Path(resourcePath)); }/*from w w w . ja v a 2 s . c o m*/ } // this is to prevent dfs client from doing reverse DNS lookups to determine whether nodes are rack local config.setClass("topology.node.switch.mapping.impl", NoOpDNSToSwitchMapping.class, DNSToSwitchMapping.class); if (socksProxy != null) { config.setClass("hadoop.rpc.socket.factory.class.default", SocksSocketFactory.class, SocketFactory.class); config.set("hadoop.socks.server", socksProxy.toString()); } if (domainSocketPath != null) { config.setStrings("dfs.domain.socket.path", domainSocketPath); } // only enable short circuit reads if domain socket path is properly configured if (!config.get("dfs.domain.socket.path", "").trim().isEmpty()) { config.setBooleanIfUnset("dfs.client.read.shortcircuit", true); } config.setInt("dfs.socket.timeout", toIntExact(dfsTimeout.toMillis())); config.setInt("ipc.ping.interval", toIntExact(ipcPingInterval.toMillis())); config.setInt("ipc.client.connect.timeout", toIntExact(dfsConnectTimeout.toMillis())); config.setInt("ipc.client.connect.max.retries", dfsConnectMaxRetries); // re-map filesystem schemes to match Amazon Elastic MapReduce config.set("fs.s3.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3a.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3n.impl", PrestoS3FileSystem.class.getName()); config.set("fs.s3bfs.impl", "org.apache.hadoop.fs.s3.S3FileSystem"); // set AWS credentials for S3 if (s3AwsAccessKey != null) { config.set(PrestoS3FileSystem.S3_ACCESS_KEY, s3AwsAccessKey); config.set("fs.s3bfs.awsAccessKeyId", s3AwsAccessKey); } if (s3AwsSecretKey != null) { config.set(PrestoS3FileSystem.S3_SECRET_KEY, s3AwsSecretKey); config.set("fs.s3bfs.awsSecretAccessKey", s3AwsSecretKey); } if (s3Endpoint != null) { config.set(PrestoS3FileSystem.S3_ENDPOINT, s3Endpoint); config.set("fs.s3bfs.Endpoint", s3Endpoint); } if (s3SignerType != null) { config.set(PrestoS3FileSystem.S3_SIGNER_TYPE, s3SignerType.getSignerType()); } config.setInt("fs.cache.max-size", fileSystemMaxCacheSize); configureCompression(config, compressionCodec); // set config for S3 config.setBoolean(PrestoS3FileSystem.S3_USE_INSTANCE_CREDENTIALS, s3UseInstanceCredentials); config.setBoolean(PrestoS3FileSystem.S3_SSL_ENABLED, s3SslEnabled); config.setBoolean(PrestoS3FileSystem.S3_SSE_ENABLED, s3SseEnabled); if (s3EncryptionMaterialsProvider != null) { config.set(PrestoS3FileSystem.S3_ENCRYPTION_MATERIALS_PROVIDER, s3EncryptionMaterialsProvider); } if (s3KmsKeyId != null) { config.set(PrestoS3FileSystem.S3_KMS_KEY_ID, s3KmsKeyId); } config.setInt(PrestoS3FileSystem.S3_MAX_CLIENT_RETRIES, s3MaxClientRetries); config.setInt(PrestoS3FileSystem.S3_MAX_ERROR_RETRIES, s3MaxErrorRetries); config.set(PrestoS3FileSystem.S3_MAX_BACKOFF_TIME, s3MaxBackoffTime.toString()); config.set(PrestoS3FileSystem.S3_MAX_RETRY_TIME, s3MaxRetryTime.toString()); config.set(PrestoS3FileSystem.S3_CONNECT_TIMEOUT, s3ConnectTimeout.toString()); config.set(PrestoS3FileSystem.S3_SOCKET_TIMEOUT, s3SocketTimeout.toString()); config.set(PrestoS3FileSystem.S3_STAGING_DIRECTORY, s3StagingDirectory.toString()); config.setInt(PrestoS3FileSystem.S3_MAX_CONNECTIONS, s3MaxConnections); config.setLong(PrestoS3FileSystem.S3_MULTIPART_MIN_FILE_SIZE, s3MultipartMinFileSize.toBytes()); config.setLong(PrestoS3FileSystem.S3_MULTIPART_MIN_PART_SIZE, s3MultipartMinPartSize.toBytes()); config.setBoolean(PrestoS3FileSystem.S3_PIN_CLIENT_TO_CURRENT_REGION, pinS3ClientToCurrentRegion); config.set(PrestoS3FileSystem.S3_USER_AGENT_PREFIX, s3UserAgentPrefix); }
From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java
License:Apache License
public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) { boolean compression = compressionCodec != HiveCompressionCodec.NONE; config.setBoolean(COMPRESSRESULT.varname, compression); config.setBoolean("mapred.output.compress", compression); config.setBoolean(FileOutputFormat.COMPRESS, compression); // For DWRF// w w w. j av a 2 s. c o m config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compressionCodec.getOrcCompressionKind().name()); config.set(HIVE_ORC_COMPRESSION.varname, compressionCodec.getOrcCompressionKind().name()); // For ORC config.set(OrcTableProperties.COMPRESSION.getPropName(), compressionCodec.getOrcCompressionKind().name()); // For RCFile and Text if (compressionCodec.getCodec().isPresent()) { config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); } else { config.unset("mapred.output.compression.codec"); config.unset(FileOutputFormat.COMPRESS_CODEC); } // For Parquet config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); // For SequenceFile config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); }