Example usage for org.apache.hadoop.conf Configuration setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface)

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:license.LicenseDriver.java

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        System.out.println("usage: [students dataset path] [grades dataset path] [output]");
        System.exit(-1);/*from   www . ja  v a  2s. c om*/
    }
    Configuration configuration = new Configuration();
    configuration.setClass(ILicenseNameParsingStrategy.class.getName(),
            LicenseNameWritableParsingStrategy.class, IParsingStrategy.class);
    configuration.setClass(ILicenseTypeParsingStrategy.class.getName(),
            LicenseTypeWritableParsingStrategy.class, IParsingStrategy.class);

    Job job = Job.getInstance(configuration);
    job.setOutputKeyClass(LicenseKey.class);
    job.setOutputValueClass(JoinNameAndLicense.class);
    MultipleInputs.addInputPath(job, new Path(args[0]), NamesWritableInputFormat.class,
            NamesDetailsMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), LicensesWritableInputFormat.class,
            LicensesDetailsMapper.class);
    job.setReducerClass(LicenseReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setPartitionerClass(LicenseKeyPartitioner.class);
    job.setGroupingComparatorClass(LicenseGroupingComparator.class);
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    job.setJarByClass(LicenseDriver.class);
    job.submit();
}

From source file:net.java.jatextmining.util.Compressor.java

License:Apache License

/**
 * Run the MapReduce compress files of HDFS.
 * @param conf Specify the Hadoop Configuration object.
 * @return If success return true, if not success return false.
 * @throws IOException Exception for IO.
 * @throws InterruptedException Exception for threads(waitForComletion()).
 * @throws ClassNotFoundException Exception for waitForComletion().
 *//*ww w. java 2  s  . co m*/
private boolean runCompressor(Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.setBoolean("mapred.output.compress", true);
    conf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class);
    Job job = new Job(conf);
    job.setJarByClass(Compressor.class);
    TextInputFormat.addInputPath(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));
    job.setMapperClass(CompressorMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    return job.waitForCompletion(true);
}

From source file:org.apache.avro.hadoop.io.AvroSerialization.java

License:Apache License

/**
 * Sets the data model class for de/seralization.
 *
 * @param conf The configuration.//from  ww w.  j  av a  2s.c  om
 * @param modelClass The data model class.
 */
public static void setDataModelClass(Configuration conf, Class<? extends GenericData> modelClass) {
    conf.setClass(CONF_DATA_MODEL, modelClass, GenericData.class);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOCassandraIT.java

License:Apache License

/**
 * Returns Hadoop configuration for reading data from Cassandra. To read data from Cassandra using
 * HadoopFormatIO, following properties must be set: InputFormat class, InputFormat key class,
 * InputFormat value class, Thrift address, Thrift port, partitioner class, keyspace and
 * columnfamily name.//from  w w w  .ja va  2s  .  c  o  m
 */
private static Configuration getConfiguration(HadoopFormatIOTestOptions options) {
    Configuration conf = new Configuration();
    conf.set(CASSANDRA_THRIFT_PORT_PROPERTY, options.getCassandraServerPort().toString());
    conf.set(CASSANDRA_THRIFT_ADDRESS_PROPERTY, options.getCassandraServerIp());
    conf.set(CASSANDRA_PARTITIONER_CLASS_PROPERTY, CASSANDRA_PARTITIONER_CLASS_VALUE);
    conf.set(CASSANDRA_KEYSPACE_PROPERTY, CASSANDRA_KEYSPACE);
    conf.set(CASSANDRA_COLUMNFAMILY_PROPERTY, CASSANDRA_TABLE);
    // Set user name and password if Cassandra instance has security configured.
    conf.set(USERNAME, options.getCassandraUserName());
    conf.set(PASSWORD, options.getCassandraPassword());
    conf.set(INPUT_KEYSPACE_USERNAME_CONFIG, options.getCassandraUserName());
    conf.set(INPUT_KEYSPACE_PASSWD_CONFIG, options.getCassandraPassword());
    conf.setClass("mapreduce.job.inputformat.class", org.apache.cassandra.hadoop.cql3.CqlInputFormat.class,
            InputFormat.class);
    conf.setClass("key.class", Long.class, Object.class);
    conf.setClass("value.class", Row.class, Object.class);
    return conf;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOCassandraTest.java

License:Apache License

/**
 * Returns configuration of CqlInutFormat. Mandatory parameters required apart from inputformat
 * class name, key class, value class are thrift port, thrift address, partitioner class, keyspace
 * and columnfamily name//  w  w  w . j a  v  a  2 s  .c  o m
 */
private Configuration getConfiguration() {
    Configuration conf = new Configuration();
    conf.set(CASSANDRA_NATIVE_PORT_PROPERTY, String.valueOf(cassandraNativePort));
    conf.set(CASSANDRA_THRIFT_PORT_PROPERTY, String.valueOf(cassandraPort));
    conf.set(CASSANDRA_THRIFT_ADDRESS_PROPERTY, CASSANDRA_HOST);
    conf.set(CASSANDRA_PARTITIONER_CLASS_PROPERTY, CASSANDRA_PARTITIONER_CLASS_VALUE);
    conf.set(CASSANDRA_KEYSPACE_PROPERTY, CASSANDRA_KEYSPACE);
    conf.set(CASSANDRA_COLUMNFAMILY_PROPERTY, CASSANDRA_TABLE);
    conf.setClass("mapreduce.job.inputformat.class", org.apache.cassandra.hadoop.cql3.CqlInputFormat.class,
            InputFormat.class);
    conf.setClass("key.class", Long.class, Object.class);
    conf.setClass("value.class", Row.class, Object.class);
    return conf;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOElasticIT.java

License:Apache License

/**
 * Returns Hadoop configuration for reading data from Elasticsearch. Configuration object should
 * have InputFormat class, key class and value class to be set. Mandatory fields for ESInputFormat
 * to be set are es.resource, es.nodes, es.port, es.internal.es.version, es.nodes.wan.only. Please
 * refer <a href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html"
 * >Elasticsearch Configuration</a> for more details.
 *//*from  ww  w.  j a va2  s. c  o m*/
private static Configuration getConfiguration(HadoopFormatIOTestOptions options) {
    Configuration conf = new Configuration();
    conf.set(ConfigurationOptions.ES_NODES, options.getElasticServerIp());
    conf.set(ConfigurationOptions.ES_PORT, options.getElasticServerPort().toString());
    conf.set(ConfigurationOptions.ES_NODES_WAN_ONLY, TRUE);
    // Set username and password if Elasticsearch is configured with security.
    conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_USER, options.getElasticUserName());
    conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_PASS, options.getElasticPassword());
    conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE);
    conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION);
    conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE);
    conf.setClass("mapreduce.job.inputformat.class", org.elasticsearch.hadoop.mr.EsInputFormat.class,
            InputFormat.class);
    conf.setClass("key.class", Text.class, Object.class);
    conf.setClass("value.class", LinkedMapWritable.class, Object.class);
    // Optimizations added to change the max docs per partition, scroll size and batch size of
    // bytes to improve the test time for large data
    conf.set("es.input.max.docs.per.partition", "50000");
    conf.set("es.scroll.size", "400");
    conf.set("es.batch.size.bytes", "8mb");
    return conf;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOElasticTest.java

License:Apache License

/**
 * Set the Elasticsearch configuration parameters in the Hadoop configuration object.
 * Configuration object should have InputFormat class, key class and value class set. Mandatory
 * fields for ESInputFormat to be set are es.resource, es.nodes, es.port, es.internal.es.version.
 * Please refer to <a//from   w  w  w .j  ava2  s . com
 * href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html"
 * >Elasticsearch Configuration</a> for more details.
 */
private Configuration getConfiguration() {
    Configuration conf = new Configuration();
    conf.set(ConfigurationOptions.ES_NODES, ELASTIC_IN_MEM_HOSTNAME);
    conf.set(ConfigurationOptions.ES_PORT, String.format("%s", port));
    conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE);
    conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION);
    conf.set(ConfigurationOptions.ES_NODES_DISCOVERY, TRUE);
    conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE);
    conf.setClass("mapreduce.job.inputformat.class", EsInputFormat.class, InputFormat.class);
    conf.setClass("key.class", Text.class, Object.class);
    conf.setClass("value.class", LinkedMapWritable.class, Object.class);
    return conf;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOIT.java

License:Apache License

private static void setupHadoopConfiguration(PostgresIOTestPipelineOptions options) {
    Configuration conf = new Configuration();
    DBConfiguration.configureDB(conf, "org.postgresql.Driver", DatabaseTestHelper.getPostgresDBUrl(options),
            options.getPostgresUsername(), options.getPostgresPassword());

    conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName);
    conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, "id", "name");
    conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, "id ASC");
    conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, TestRowDBWritable.class, DBWritable.class);

    conf.setClass("key.class", LongWritable.class, Object.class);
    conf.setClass("value.class", TestRowDBWritable.class, Object.class);
    conf.setClass("mapreduce.job.inputformat.class", DBInputFormat.class, InputFormat.class);

    conf.set(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName);
    conf.set(DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY, "2");
    conf.setStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, "id", "name");

    conf.setClass(HadoopFormatIO.OUTPUT_KEY_CLASS, TestRowDBWritable.class, Object.class);
    conf.setClass(HadoopFormatIO.OUTPUT_VALUE_CLASS, NullWritable.class, Object.class);
    conf.setClass(HadoopFormatIO.OUTPUT_FORMAT_CLASS_ATTR, DBOutputFormat.class, OutputFormat.class);
    conf.set(HadoopFormatIO.JOB_ID, String.valueOf(1));

    hadoopConfiguration = new SerializableConfiguration(conf);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates functionality of {@link
 * HadoopFormatIO.Read#withConfiguration(Configuration) withConfiguration(Configuration)} function
 * when Hadoop InputFormat class is not provided by the user in configuration.
 *///from  www  . j a v  a 2 s .  c o m
@Test
public void testReadValidationFailsMissingInputFormatInConf() {
    Configuration configuration = new Configuration();
    configuration.setClass("key.class", Text.class, Object.class);
    configuration.setClass("value.class", Employee.class, Object.class);
    thrown.expect(IllegalArgumentException.class);
    HadoopFormatIO.<Text, Employee>read().withConfiguration(configuration);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates functionality of {@link
 * HadoopFormatIO.Read#withConfiguration(Configuration) withConfiguration(Configuration)} function
 * when key class is not provided by the user in configuration.
 *///from w  ww. ja  va 2 s . c  o m
@Test
public void testReadValidationFailsMissingKeyClassInConf() {
    Configuration configuration = new Configuration();
    configuration.setClass("mapreduce.job.inputformat.class", EmployeeInputFormat.class, InputFormat.class);
    configuration.setClass("value.class", Employee.class, Object.class);
    thrown.expect(IllegalArgumentException.class);
    HadoopFormatIO.<Text, Employee>read().withConfiguration(configuration);
}