List of usage examples for org.apache.hadoop.conf Configuration get
public String get(String name)
name
property, null
if no such property exists. From source file:com.ci.backports.avro.mapreduce.AvroJob.java
License:Apache License
/** Return a job's map input schema. */ public static Schema getInputSchema(Configuration conf) { String schemaString = conf.get(INPUT_SCHEMA_CONFIG_FIELD); return schemaString != null ? Schema.parse(schemaString) : null; }
From source file:com.ci.backports.avro.mapreduce.AvroJob.java
License:Apache License
/** Return a job's map output key schema. */ public static Schema getMapOutputKeySchema(Configuration conf) { String schemaString = conf.get(KEY_MAP_OUTPUT_SCHEMA_CONFIG_FIELD); return schemaString != null ? Schema.parse(schemaString) : null; }
From source file:com.ci.backports.avro.mapreduce.AvroJob.java
License:Apache License
/** Return a job's map output value schema. */ public static Schema getMapOutputValueSchema(Configuration conf) { String schemaString = conf.get(VALUE_MAP_OUTPUT_SCHEMA_CONFIG_FIELD); return schemaString != null ? Schema.parse(schemaString) : null; }
From source file:com.ci.backports.avro.mapreduce.AvroJob.java
License:Apache License
/** Return a job's output schema. */ public static Schema getOutputSchema(Configuration conf) { String schemaString = conf.get(OUTPUT_SCHEMA_CONFIG_FIELD); return schemaString != null ? Schema.parse(schemaString) : null; }
From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This//from w ww. j a v a 2s . c o m * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the * DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of * regions</li> * <li>Sets the output key/value class to match HFileOutputFormat's * requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either * KeyValueSortReducer or PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either * KeyValue or Put before running this function. */ public static void configureIncrementalLoad(Job job, HTable table, Class<? extends HFileOutputFormatBase> hfileOutputFormatBase) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(hfileOutputFormatBase); // Based on the configured map output class, set the correct reducer to // properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName())); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(table, conf); configureBloomType(table, conf); configureBlockSize(table, conf); // TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured."); }
From source file:com.cloudera.cdk.data.hcatalog.HCatalog.java
License:Apache License
public HCatalog(Configuration conf) { if (conf.get(Loader.HIVE_METASTORE_URI_PROP) == null) { LOG.warn("Using a local Hive MetaStore (for testing only)"); }/*ww w . j a va 2 s .c o m*/ try { hiveConf = new HiveConf(conf, HiveConf.class); client = HCatUtil.getHiveClient(hiveConf); } catch (Exception e) { throw new RuntimeException("Hive metastore exception", e); } }
From source file:com.cloudera.cdk.data.hcatalog.impl.Loader.java
License:Apache License
@Override public void load() { // get a default Configuration to configure defaults (so it's okay!) final Configuration conf = new Configuration(); String hiveAuthority;/*from w w w . j a va 2 s . com*/ if (conf.get(HIVE_METASTORE_URI_PROP) != null) { try { hiveAuthority = new URI(conf.get(HIVE_METASTORE_URI_PROP)).getAuthority(); } catch (URISyntaxException ex) { hiveAuthority = ""; } } else { hiveAuthority = ""; } // Hive-managed data sets final OptionBuilder<DatasetRepository> managedBuilder = new ManagedBuilder(conf); Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive")), managedBuilder); Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive://" + hiveAuthority + "/")), managedBuilder); // external data sets final OptionBuilder<DatasetRepository> externalBuilder = new ExternalBuilder(conf); String hdfsAuthority; try { // Use a HDFS URI with no authority and the environment's configuration // to find the default HDFS information final URI hdfs = FileSystem.get(URI.create("hdfs:/"), conf).getUri(); hdfsAuthority = "&hdfs-host=" + hdfs.getHost() + "&hdfs-port=" + hdfs.getPort(); } catch (IOException ex) { logger.warn("Could not locate HDFS, hdfs-host and hdfs-port " + "will not be set by default for Hive repositories."); hdfsAuthority = ""; } Accessor.getDefault().registerDatasetRepository( new URIPattern(URI.create("hive://" + hiveAuthority + "/*path?absolute=true" + hdfsAuthority)), externalBuilder); Accessor.getDefault().registerDatasetRepository(new URIPattern(URI.create("hive:*path")), externalBuilder); }
From source file:com.cloudera.crunch.impl.mr.run.CrunchInputs.java
License:Apache License
public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, int nodeIndex) { Configuration conf = job.getConfiguration(); String inputs = JOINER.join(inputFormatClass.getName(), nodeIndex, path.toString()); String existing = conf.get(RuntimeParameters.MULTI_INPUTS); conf.set(RuntimeParameters.MULTI_INPUTS, existing == null ? inputs : existing + RECORD_SEP + inputs); }
From source file:com.cloudera.crunch.impl.mr.run.CrunchInputs.java
License:Apache License
public static Map<Class<? extends InputFormat>, Map<Integer, List<Path>>> getFormatNodeMap(JobContext job) { Map<Class<? extends InputFormat>, Map<Integer, List<Path>>> formatNodeMap = Maps.newHashMap(); Configuration conf = job.getConfiguration(); for (String input : Splitter.on(RECORD_SEP).split(conf.get(RuntimeParameters.MULTI_INPUTS))) { List<String> fields = ImmutableList.copyOf(SPLITTER.split(input)); Class<? extends InputFormat> inputFormatClass; try {//w w w.j av a 2s . co m inputFormatClass = (Class<? extends InputFormat>) conf.getClassByName(fields.get(0)); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } if (!formatNodeMap.containsKey(inputFormatClass)) { formatNodeMap.put(inputFormatClass, Maps.<Integer, List<Path>>newHashMap()); } Integer nodeIndex = Integer.valueOf(fields.get(1)); if (!formatNodeMap.get(inputFormatClass).containsKey(nodeIndex)) { formatNodeMap.get(inputFormatClass).put(nodeIndex, Lists.<Path>newLinkedList()); } formatNodeMap.get(inputFormatClass).get(nodeIndex).add(new Path(fields.get(2))); } return formatNodeMap; }
From source file:com.cloudera.crunch.io.avro.AvroFileSourceTarget.java
License:Open Source License
@Override public void configureSource(Job job, int inputId) throws IOException { SourceTargetHelper.configureSource(job, inputId, AvroInputFormat.class, path); Configuration conf = job.getConfiguration(); String inputSchema = conf.get("avro.input.schema"); if (inputSchema == null) { conf.set("avro.input.schema", ptype.getSchema().toString()); } else if (!inputSchema.equals(ptype.getSchema().toString())) { throw new IllegalStateException("Multiple Avro sources must use the same schema"); }/*ww w.j a va2 s . com*/ }