Example usage for org.apache.hadoop.conf Configuration get

List of usage examples for org.apache.hadoop.conf Configuration get

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.ci.backports.avro.mapreduce.AvroJob.java

License:Apache License

/** Return a job's map input schema. */
public static Schema getInputSchema(Configuration conf) {
    String schemaString = conf.get(INPUT_SCHEMA_CONFIG_FIELD);
    return schemaString != null ? Schema.parse(schemaString) : null;
}

From source file:com.ci.backports.avro.mapreduce.AvroJob.java

License:Apache License

/** Return a job's map output key schema. */
public static Schema getMapOutputKeySchema(Configuration conf) {
    String schemaString = conf.get(KEY_MAP_OUTPUT_SCHEMA_CONFIG_FIELD);
    return schemaString != null ? Schema.parse(schemaString) : null;
}

From source file:com.ci.backports.avro.mapreduce.AvroJob.java

License:Apache License

/** Return a job's map output value schema. */
public static Schema getMapOutputValueSchema(Configuration conf) {
    String schemaString = conf.get(VALUE_MAP_OUTPUT_SCHEMA_CONFIG_FIELD);
    return schemaString != null ? Schema.parse(schemaString) : null;
}

From source file:com.ci.backports.avro.mapreduce.AvroJob.java

License:Apache License

/** Return a job's output schema. */
public static Schema getOutputSchema(Configuration conf) {
    String schemaString = conf.get(OUTPUT_SCHEMA_CONFIG_FIELD);
    return schemaString != null ? Schema.parse(schemaString) : null;
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

/**
 * Configure a MapReduce Job to perform an incremental load into the given
 * table. This//from  w ww.  j  a v a 2s . c  o  m
 * <ul>
 * <li>Inspects the table to configure a total order partitioner</li>
 * <li>Uploads the partitions file to the cluster and adds it to the
 * DistributedCache</li>
 * <li>Sets the number of reduce tasks to match the current number of
 * regions</li>
 * <li>Sets the output key/value class to match HFileOutputFormat's
 * requirements</li>
 * <li>Sets the reducer up to perform the appropriate sorting (either
 * KeyValueSortReducer or PutSortReducer)</li>
 * </ul>
 * The user should be sure to set the map output value class to either
 * KeyValue or Put before running this function.
 */
public static void configureIncrementalLoad(Job job, HTable table,
        Class<? extends HFileOutputFormatBase> hfileOutputFormatBase) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(hfileOutputFormatBase);

    // Based on the configured map output class, set the correct reducer to
    // properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
        job.setReducerClass(TextSortReducer.class);
    } else {
        LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(),
            ResultSerialization.class.getName(), KeyValueSerialization.class.getName());

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName()));
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table);
    LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(table, conf);
    configureBloomType(table, conf);
    configureBlockSize(table, conf);

    // TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured.");
}

From source file:com.cloudera.cdk.data.hcatalog.HCatalog.java

License:Apache License

public HCatalog(Configuration conf) {
    if (conf.get(Loader.HIVE_METASTORE_URI_PROP) == null) {
        LOG.warn("Using a local Hive MetaStore (for testing only)");
    }/*ww  w .  j  a va 2  s  .c  o m*/
    try {
        hiveConf = new HiveConf(conf, HiveConf.class);
        client = HCatUtil.getHiveClient(hiveConf);
    } catch (Exception e) {
        throw new RuntimeException("Hive metastore exception", e);
    }
}

From source file:com.cloudera.cdk.data.hcatalog.impl.Loader.java

License:Apache License

@Override
public void load() {
    // get a default Configuration to configure defaults (so it's okay!)
    final Configuration conf = new Configuration();

    String hiveAuthority;/*from  w  w w  .  j  a  va  2  s .  com*/
    if (conf.get(HIVE_METASTORE_URI_PROP) != null) {
        try {
            hiveAuthority = new URI(conf.get(HIVE_METASTORE_URI_PROP)).getAuthority();
        } catch (URISyntaxException ex) {
            hiveAuthority = "";
        }
    } else {
        hiveAuthority = "";
    }

    // Hive-managed data sets
    final OptionBuilder<DatasetRepository> managedBuilder = new ManagedBuilder(conf);
    Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive")), managedBuilder);
    Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive://" + hiveAuthority + "/")),
            managedBuilder);

    // external data sets
    final OptionBuilder<DatasetRepository> externalBuilder = new ExternalBuilder(conf);

    String hdfsAuthority;
    try {
        // Use a HDFS URI with no authority and the environment's configuration
        // to find the default HDFS information
        final URI hdfs = FileSystem.get(URI.create("hdfs:/"), conf).getUri();
        hdfsAuthority = "&hdfs-host=" + hdfs.getHost() + "&hdfs-port=" + hdfs.getPort();
    } catch (IOException ex) {
        logger.warn("Could not locate HDFS, hdfs-host and hdfs-port "
                + "will not be set by default for Hive repositories.");
        hdfsAuthority = "";
    }

    Accessor.getDefault().registerDatasetRepository(
            new URIPattern(URI.create("hive://" + hiveAuthority + "/*path?absolute=true" + hdfsAuthority)),
            externalBuilder);
    Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive:*path")), externalBuilder);
}

From source file:com.cloudera.crunch.impl.mr.run.CrunchInputs.java

License:Apache License

public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass,
        int nodeIndex) {
    Configuration conf = job.getConfiguration();
    String inputs = JOINER.join(inputFormatClass.getName(), nodeIndex, path.toString());
    String existing = conf.get(RuntimeParameters.MULTI_INPUTS);
    conf.set(RuntimeParameters.MULTI_INPUTS, existing == null ? inputs : existing + RECORD_SEP + inputs);
}

From source file:com.cloudera.crunch.impl.mr.run.CrunchInputs.java

License:Apache License

public static Map<Class<? extends InputFormat>, Map<Integer, List<Path>>> getFormatNodeMap(JobContext job) {
    Map<Class<? extends InputFormat>, Map<Integer, List<Path>>> formatNodeMap = Maps.newHashMap();
    Configuration conf = job.getConfiguration();
    for (String input : Splitter.on(RECORD_SEP).split(conf.get(RuntimeParameters.MULTI_INPUTS))) {
        List<String> fields = ImmutableList.copyOf(SPLITTER.split(input));
        Class<? extends InputFormat> inputFormatClass;
        try {//w w w.j av a  2s .  co  m
            inputFormatClass = (Class<? extends InputFormat>) conf.getClassByName(fields.get(0));
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        if (!formatNodeMap.containsKey(inputFormatClass)) {
            formatNodeMap.put(inputFormatClass, Maps.<Integer, List<Path>>newHashMap());
        }
        Integer nodeIndex = Integer.valueOf(fields.get(1));
        if (!formatNodeMap.get(inputFormatClass).containsKey(nodeIndex)) {
            formatNodeMap.get(inputFormatClass).put(nodeIndex, Lists.<Path>newLinkedList());
        }
        formatNodeMap.get(inputFormatClass).get(nodeIndex).add(new Path(fields.get(2)));
    }
    return formatNodeMap;
}

From source file:com.cloudera.crunch.io.avro.AvroFileSourceTarget.java

License:Open Source License

@Override
public void configureSource(Job job, int inputId) throws IOException {
    SourceTargetHelper.configureSource(job, inputId, AvroInputFormat.class, path);

    Configuration conf = job.getConfiguration();
    String inputSchema = conf.get("avro.input.schema");
    if (inputSchema == null) {
        conf.set("avro.input.schema", ptype.getSchema().toString());
    } else if (!inputSchema.equals(ptype.getSchema().toString())) {
        throw new IllegalStateException("Multiple Avro sources must use the same schema");
    }/*ww w.j a  va2 s  .  com*/
}