List of usage examples for org.apache.hadoop.conf Configuration setClass
public void setClass(String name, Class<?> theClass, Class<?> xface)
name
property to the name of a theClass
implementing the given interface xface
. From source file:license.LicenseDriver.java
public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("usage: [students dataset path] [grades dataset path] [output]"); System.exit(-1);/*from www . ja v a 2s. c om*/ } Configuration configuration = new Configuration(); configuration.setClass(ILicenseNameParsingStrategy.class.getName(), LicenseNameWritableParsingStrategy.class, IParsingStrategy.class); configuration.setClass(ILicenseTypeParsingStrategy.class.getName(), LicenseTypeWritableParsingStrategy.class, IParsingStrategy.class); Job job = Job.getInstance(configuration); job.setOutputKeyClass(LicenseKey.class); job.setOutputValueClass(JoinNameAndLicense.class); MultipleInputs.addInputPath(job, new Path(args[0]), NamesWritableInputFormat.class, NamesDetailsMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), LicensesWritableInputFormat.class, LicensesDetailsMapper.class); job.setReducerClass(LicenseReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setPartitionerClass(LicenseKeyPartitioner.class); job.setGroupingComparatorClass(LicenseGroupingComparator.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.setJarByClass(LicenseDriver.class); job.submit(); }
From source file:net.java.jatextmining.util.Compressor.java
License:Apache License
/** * Run the MapReduce compress files of HDFS. * @param conf Specify the Hadoop Configuration object. * @return If success return true, if not success return false. * @throws IOException Exception for IO. * @throws InterruptedException Exception for threads(waitForComletion()). * @throws ClassNotFoundException Exception for waitForComletion(). *//*ww w. java 2 s . co m*/ private boolean runCompressor(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { conf.setBoolean("mapred.output.compress", true); conf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class); Job job = new Job(conf); job.setJarByClass(Compressor.class); TextInputFormat.addInputPath(job, new Path(in)); FileOutputFormat.setOutputPath(job, new Path(out)); job.setMapperClass(CompressorMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); return job.waitForCompletion(true); }
From source file:org.apache.avro.hadoop.io.AvroSerialization.java
License:Apache License
/** * Sets the data model class for de/seralization. * * @param conf The configuration.//from ww w. j av a 2s.c om * @param modelClass The data model class. */ public static void setDataModelClass(Configuration conf, Class<? extends GenericData> modelClass) { conf.setClass(CONF_DATA_MODEL, modelClass, GenericData.class); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOCassandraIT.java
License:Apache License
/** * Returns Hadoop configuration for reading data from Cassandra. To read data from Cassandra using * HadoopFormatIO, following properties must be set: InputFormat class, InputFormat key class, * InputFormat value class, Thrift address, Thrift port, partitioner class, keyspace and * columnfamily name.//from w w w .ja va 2s . c o m */ private static Configuration getConfiguration(HadoopFormatIOTestOptions options) { Configuration conf = new Configuration(); conf.set(CASSANDRA_THRIFT_PORT_PROPERTY, options.getCassandraServerPort().toString()); conf.set(CASSANDRA_THRIFT_ADDRESS_PROPERTY, options.getCassandraServerIp()); conf.set(CASSANDRA_PARTITIONER_CLASS_PROPERTY, CASSANDRA_PARTITIONER_CLASS_VALUE); conf.set(CASSANDRA_KEYSPACE_PROPERTY, CASSANDRA_KEYSPACE); conf.set(CASSANDRA_COLUMNFAMILY_PROPERTY, CASSANDRA_TABLE); // Set user name and password if Cassandra instance has security configured. conf.set(USERNAME, options.getCassandraUserName()); conf.set(PASSWORD, options.getCassandraPassword()); conf.set(INPUT_KEYSPACE_USERNAME_CONFIG, options.getCassandraUserName()); conf.set(INPUT_KEYSPACE_PASSWD_CONFIG, options.getCassandraPassword()); conf.setClass("mapreduce.job.inputformat.class", org.apache.cassandra.hadoop.cql3.CqlInputFormat.class, InputFormat.class); conf.setClass("key.class", Long.class, Object.class); conf.setClass("value.class", Row.class, Object.class); return conf; }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOCassandraTest.java
License:Apache License
/** * Returns configuration of CqlInutFormat. Mandatory parameters required apart from inputformat * class name, key class, value class are thrift port, thrift address, partitioner class, keyspace * and columnfamily name// w w w . j a v a 2 s .c o m */ private Configuration getConfiguration() { Configuration conf = new Configuration(); conf.set(CASSANDRA_NATIVE_PORT_PROPERTY, String.valueOf(cassandraNativePort)); conf.set(CASSANDRA_THRIFT_PORT_PROPERTY, String.valueOf(cassandraPort)); conf.set(CASSANDRA_THRIFT_ADDRESS_PROPERTY, CASSANDRA_HOST); conf.set(CASSANDRA_PARTITIONER_CLASS_PROPERTY, CASSANDRA_PARTITIONER_CLASS_VALUE); conf.set(CASSANDRA_KEYSPACE_PROPERTY, CASSANDRA_KEYSPACE); conf.set(CASSANDRA_COLUMNFAMILY_PROPERTY, CASSANDRA_TABLE); conf.setClass("mapreduce.job.inputformat.class", org.apache.cassandra.hadoop.cql3.CqlInputFormat.class, InputFormat.class); conf.setClass("key.class", Long.class, Object.class); conf.setClass("value.class", Row.class, Object.class); return conf; }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOElasticIT.java
License:Apache License
/** * Returns Hadoop configuration for reading data from Elasticsearch. Configuration object should * have InputFormat class, key class and value class to be set. Mandatory fields for ESInputFormat * to be set are es.resource, es.nodes, es.port, es.internal.es.version, es.nodes.wan.only. Please * refer <a href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html" * >Elasticsearch Configuration</a> for more details. *//*from ww w. j a va2 s. c o m*/ private static Configuration getConfiguration(HadoopFormatIOTestOptions options) { Configuration conf = new Configuration(); conf.set(ConfigurationOptions.ES_NODES, options.getElasticServerIp()); conf.set(ConfigurationOptions.ES_PORT, options.getElasticServerPort().toString()); conf.set(ConfigurationOptions.ES_NODES_WAN_ONLY, TRUE); // Set username and password if Elasticsearch is configured with security. conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_USER, options.getElasticUserName()); conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_PASS, options.getElasticPassword()); conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE); conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION); conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE); conf.setClass("mapreduce.job.inputformat.class", org.elasticsearch.hadoop.mr.EsInputFormat.class, InputFormat.class); conf.setClass("key.class", Text.class, Object.class); conf.setClass("value.class", LinkedMapWritable.class, Object.class); // Optimizations added to change the max docs per partition, scroll size and batch size of // bytes to improve the test time for large data conf.set("es.input.max.docs.per.partition", "50000"); conf.set("es.scroll.size", "400"); conf.set("es.batch.size.bytes", "8mb"); return conf; }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOElasticTest.java
License:Apache License
/** * Set the Elasticsearch configuration parameters in the Hadoop configuration object. * Configuration object should have InputFormat class, key class and value class set. Mandatory * fields for ESInputFormat to be set are es.resource, es.nodes, es.port, es.internal.es.version. * Please refer to <a//from w w w .j ava2 s . com * href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html" * >Elasticsearch Configuration</a> for more details. */ private Configuration getConfiguration() { Configuration conf = new Configuration(); conf.set(ConfigurationOptions.ES_NODES, ELASTIC_IN_MEM_HOSTNAME); conf.set(ConfigurationOptions.ES_PORT, String.format("%s", port)); conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE); conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION); conf.set(ConfigurationOptions.ES_NODES_DISCOVERY, TRUE); conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE); conf.setClass("mapreduce.job.inputformat.class", EsInputFormat.class, InputFormat.class); conf.setClass("key.class", Text.class, Object.class); conf.setClass("value.class", LinkedMapWritable.class, Object.class); return conf; }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOIT.java
License:Apache License
private static void setupHadoopConfiguration(PostgresIOTestPipelineOptions options) { Configuration conf = new Configuration(); DBConfiguration.configureDB(conf, "org.postgresql.Driver", DatabaseTestHelper.getPostgresDBUrl(options), options.getPostgresUsername(), options.getPostgresPassword()); conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName); conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, "id", "name"); conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, "id ASC"); conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, TestRowDBWritable.class, DBWritable.class); conf.setClass("key.class", LongWritable.class, Object.class); conf.setClass("value.class", TestRowDBWritable.class, Object.class); conf.setClass("mapreduce.job.inputformat.class", DBInputFormat.class, InputFormat.class); conf.set(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName); conf.set(DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY, "2"); conf.setStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, "id", "name"); conf.setClass(HadoopFormatIO.OUTPUT_KEY_CLASS, TestRowDBWritable.class, Object.class); conf.setClass(HadoopFormatIO.OUTPUT_VALUE_CLASS, NullWritable.class, Object.class); conf.setClass(HadoopFormatIO.OUTPUT_FORMAT_CLASS_ATTR, DBOutputFormat.class, OutputFormat.class); conf.set(HadoopFormatIO.JOB_ID, String.valueOf(1)); hadoopConfiguration = new SerializableConfiguration(conf); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java
License:Apache License
/** * This test validates functionality of {@link * HadoopFormatIO.Read#withConfiguration(Configuration) withConfiguration(Configuration)} function * when Hadoop InputFormat class is not provided by the user in configuration. *///from www . j a v a 2 s . c o m @Test public void testReadValidationFailsMissingInputFormatInConf() { Configuration configuration = new Configuration(); configuration.setClass("key.class", Text.class, Object.class); configuration.setClass("value.class", Employee.class, Object.class); thrown.expect(IllegalArgumentException.class); HadoopFormatIO.<Text, Employee>read().withConfiguration(configuration); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java
License:Apache License
/** * This test validates functionality of {@link * HadoopFormatIO.Read#withConfiguration(Configuration) withConfiguration(Configuration)} function * when key class is not provided by the user in configuration. *///from w ww. ja va 2 s . c o m @Test public void testReadValidationFailsMissingKeyClassInConf() { Configuration configuration = new Configuration(); configuration.setClass("mapreduce.job.inputformat.class", EmployeeInputFormat.class, InputFormat.class); configuration.setClass("value.class", Employee.class, Object.class); thrown.expect(IllegalArgumentException.class); HadoopFormatIO.<Text, Employee>read().withConfiguration(configuration); }