Example usage for org.apache.hadoop.conf Configuration setClass

List of usage examples for org.apache.hadoop.conf Configuration setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface) 

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:license.LicenseDriver.java

public static void main(String[] args) throws Exception {
    if (args.length != 3) {
        System.out.println("usage: [students dataset path] [grades dataset path] [output]");
        System.exit(-1);/*from   www . ja  v a  2s. c om*/
    }
    Configuration configuration = new Configuration();
    configuration.setClass(ILicenseNameParsingStrategy.class.getName(),
            LicenseNameWritableParsingStrategy.class, IParsingStrategy.class);
    configuration.setClass(ILicenseTypeParsingStrategy.class.getName(),
            LicenseTypeWritableParsingStrategy.class, IParsingStrategy.class);

    Job job = Job.getInstance(configuration);
    job.setOutputKeyClass(LicenseKey.class);
    job.setOutputValueClass(JoinNameAndLicense.class);
    MultipleInputs.addInputPath(job, new Path(args[0]), NamesWritableInputFormat.class,
            NamesDetailsMapper.class);
    MultipleInputs.addInputPath(job, new Path(args[1]), LicensesWritableInputFormat.class,
            LicensesDetailsMapper.class);
    job.setReducerClass(LicenseReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setPartitionerClass(LicenseKeyPartitioner.class);
    job.setGroupingComparatorClass(LicenseGroupingComparator.class);
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    job.setJarByClass(LicenseDriver.class);
    job.submit();
}

From source file:net.java.jatextmining.util.Compressor.java

License:Apache License

/**
 * Run the MapReduce compress files of HDFS.
 * @param conf Specify the Hadoop Configuration object.
 * @return If success return true, if not success return false.
 * @throws IOException Exception for IO.
 * @throws InterruptedException Exception for threads(waitForComletion()).
 * @throws ClassNotFoundException Exception for waitForComletion().
 *//*ww w. java 2  s  . co m*/
private boolean runCompressor(Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.setBoolean("mapred.output.compress", true);
    conf.setClass("mapred.output.compression.codec", GzipCodec.class, CompressionCodec.class);
    Job job = new Job(conf);
    job.setJarByClass(Compressor.class);
    TextInputFormat.addInputPath(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));
    job.setMapperClass(CompressorMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    return job.waitForCompletion(true);
}

From source file:org.apache.avro.hadoop.io.AvroSerialization.java

License:Apache License

/**
 * Sets the data model class for de/seralization.
 *
 * @param conf The configuration.//from  ww w.  j  av a  2s.c  om
 * @param modelClass The data model class.
 */
public static void setDataModelClass(Configuration conf, Class<? extends GenericData> modelClass) {
    conf.setClass(CONF_DATA_MODEL, modelClass, GenericData.class);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOCassandraIT.java

License:Apache License

/**
 * Returns Hadoop configuration for reading data from Cassandra. To read data from Cassandra using
 * HadoopFormatIO, following properties must be set: InputFormat class, InputFormat key class,
 * InputFormat value class, Thrift address, Thrift port, partitioner class, keyspace and
 * columnfamily name.//from  w w w  .ja va  2s  .  c  o  m
 */
private static Configuration getConfiguration(HadoopFormatIOTestOptions options) {
    Configuration conf = new Configuration();
    conf.set(CASSANDRA_THRIFT_PORT_PROPERTY, options.getCassandraServerPort().toString());
    conf.set(CASSANDRA_THRIFT_ADDRESS_PROPERTY, options.getCassandraServerIp());
    conf.set(CASSANDRA_PARTITIONER_CLASS_PROPERTY, CASSANDRA_PARTITIONER_CLASS_VALUE);
    conf.set(CASSANDRA_KEYSPACE_PROPERTY, CASSANDRA_KEYSPACE);
    conf.set(CASSANDRA_COLUMNFAMILY_PROPERTY, CASSANDRA_TABLE);
    // Set user name and password if Cassandra instance has security configured.
    conf.set(USERNAME, options.getCassandraUserName());
    conf.set(PASSWORD, options.getCassandraPassword());
    conf.set(INPUT_KEYSPACE_USERNAME_CONFIG, options.getCassandraUserName());
    conf.set(INPUT_KEYSPACE_PASSWD_CONFIG, options.getCassandraPassword());
    conf.setClass("mapreduce.job.inputformat.class", org.apache.cassandra.hadoop.cql3.CqlInputFormat.class,
            InputFormat.class);
    conf.setClass("key.class", Long.class, Object.class);
    conf.setClass("value.class", Row.class, Object.class);
    return conf;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOCassandraTest.java

License:Apache License

/**
 * Returns configuration of CqlInutFormat. Mandatory parameters required apart from inputformat
 * class name, key class, value class are thrift port, thrift address, partitioner class, keyspace
 * and columnfamily name//  w  w  w . j a  v  a  2 s  .c  o m
 */
private Configuration getConfiguration() {
    Configuration conf = new Configuration();
    conf.set(CASSANDRA_NATIVE_PORT_PROPERTY, String.valueOf(cassandraNativePort));
    conf.set(CASSANDRA_THRIFT_PORT_PROPERTY, String.valueOf(cassandraPort));
    conf.set(CASSANDRA_THRIFT_ADDRESS_PROPERTY, CASSANDRA_HOST);
    conf.set(CASSANDRA_PARTITIONER_CLASS_PROPERTY, CASSANDRA_PARTITIONER_CLASS_VALUE);
    conf.set(CASSANDRA_KEYSPACE_PROPERTY, CASSANDRA_KEYSPACE);
    conf.set(CASSANDRA_COLUMNFAMILY_PROPERTY, CASSANDRA_TABLE);
    conf.setClass("mapreduce.job.inputformat.class", org.apache.cassandra.hadoop.cql3.CqlInputFormat.class,
            InputFormat.class);
    conf.setClass("key.class", Long.class, Object.class);
    conf.setClass("value.class", Row.class, Object.class);
    return conf;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOElasticIT.java

License:Apache License

/**
 * Returns Hadoop configuration for reading data from Elasticsearch. Configuration object should
 * have InputFormat class, key class and value class to be set. Mandatory fields for ESInputFormat
 * to be set are es.resource, es.nodes, es.port, es.internal.es.version, es.nodes.wan.only. Please
 * refer <a href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html"
 * >Elasticsearch Configuration</a> for more details.
 *//*from  ww  w.  j a va2  s. c  o m*/
private static Configuration getConfiguration(HadoopFormatIOTestOptions options) {
    Configuration conf = new Configuration();
    conf.set(ConfigurationOptions.ES_NODES, options.getElasticServerIp());
    conf.set(ConfigurationOptions.ES_PORT, options.getElasticServerPort().toString());
    conf.set(ConfigurationOptions.ES_NODES_WAN_ONLY, TRUE);
    // Set username and password if Elasticsearch is configured with security.
    conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_USER, options.getElasticUserName());
    conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_PASS, options.getElasticPassword());
    conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE);
    conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION);
    conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE);
    conf.setClass("mapreduce.job.inputformat.class", org.elasticsearch.hadoop.mr.EsInputFormat.class,
            InputFormat.class);
    conf.setClass("key.class", Text.class, Object.class);
    conf.setClass("value.class", LinkedMapWritable.class, Object.class);
    // Optimizations added to change the max docs per partition, scroll size and batch size of
    // bytes to improve the test time for large data
    conf.set("es.input.max.docs.per.partition", "50000");
    conf.set("es.scroll.size", "400");
    conf.set("es.batch.size.bytes", "8mb");
    return conf;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOElasticTest.java

License:Apache License

/**
 * Set the Elasticsearch configuration parameters in the Hadoop configuration object.
 * Configuration object should have InputFormat class, key class and value class set. Mandatory
 * fields for ESInputFormat to be set are es.resource, es.nodes, es.port, es.internal.es.version.
 * Please refer to <a//from   w  w  w .j  ava2  s . com
 * href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html"
 * >Elasticsearch Configuration</a> for more details.
 */
private Configuration getConfiguration() {
    Configuration conf = new Configuration();
    conf.set(ConfigurationOptions.ES_NODES, ELASTIC_IN_MEM_HOSTNAME);
    conf.set(ConfigurationOptions.ES_PORT, String.format("%s", port));
    conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE);
    conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION);
    conf.set(ConfigurationOptions.ES_NODES_DISCOVERY, TRUE);
    conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE);
    conf.setClass("mapreduce.job.inputformat.class", EsInputFormat.class, InputFormat.class);
    conf.setClass("key.class", Text.class, Object.class);
    conf.setClass("value.class", LinkedMapWritable.class, Object.class);
    return conf;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOIT.java

License:Apache License

private static void setupHadoopConfiguration(PostgresIOTestPipelineOptions options) {
    Configuration conf = new Configuration();
    DBConfiguration.configureDB(conf, "org.postgresql.Driver", DatabaseTestHelper.getPostgresDBUrl(options),
            options.getPostgresUsername(), options.getPostgresPassword());

    conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName);
    conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, "id", "name");
    conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, "id ASC");
    conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, TestRowDBWritable.class, DBWritable.class);

    conf.setClass("key.class", LongWritable.class, Object.class);
    conf.setClass("value.class", TestRowDBWritable.class, Object.class);
    conf.setClass("mapreduce.job.inputformat.class", DBInputFormat.class, InputFormat.class);

    conf.set(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName);
    conf.set(DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY, "2");
    conf.setStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, "id", "name");

    conf.setClass(HadoopFormatIO.OUTPUT_KEY_CLASS, TestRowDBWritable.class, Object.class);
    conf.setClass(HadoopFormatIO.OUTPUT_VALUE_CLASS, NullWritable.class, Object.class);
    conf.setClass(HadoopFormatIO.OUTPUT_FORMAT_CLASS_ATTR, DBOutputFormat.class, OutputFormat.class);
    conf.set(HadoopFormatIO.JOB_ID, String.valueOf(1));

    hadoopConfiguration = new SerializableConfiguration(conf);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates functionality of {@link
 * HadoopFormatIO.Read#withConfiguration(Configuration) withConfiguration(Configuration)} function
 * when Hadoop InputFormat class is not provided by the user in configuration.
 *///from  www  . j a v  a 2 s .  c o m
@Test
public void testReadValidationFailsMissingInputFormatInConf() {
    Configuration configuration = new Configuration();
    configuration.setClass("key.class", Text.class, Object.class);
    configuration.setClass("value.class", Employee.class, Object.class);
    thrown.expect(IllegalArgumentException.class);
    HadoopFormatIO.<Text, Employee>read().withConfiguration(configuration);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates functionality of {@link
 * HadoopFormatIO.Read#withConfiguration(Configuration) withConfiguration(Configuration)} function
 * when key class is not provided by the user in configuration.
 *///from w  ww. ja  va 2 s . c  o m
@Test
public void testReadValidationFailsMissingKeyClassInConf() {
    Configuration configuration = new Configuration();
    configuration.setClass("mapreduce.job.inputformat.class", EmployeeInputFormat.class, InputFormat.class);
    configuration.setClass("value.class", Employee.class, Object.class);
    thrown.expect(IllegalArgumentException.class);
    HadoopFormatIO.<Text, Employee>read().withConfiguration(configuration);
}