Example usage for org.apache.hadoop.conf Configuration get

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration get.

Prototype

public String get(String name)

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:com.cloudera.oryx.als.computation.similar.DistributeSimilarWorkReduceFn.java

License:Open Source License

@Override
public void initialize() {
    super.initialize();
    Configuration conf = getConfiguration();
    String yKey = conf.get(DistributeSimilarWorkStep.Y_KEY_KEY);
    try {/* w w w . ja v a 2 s.  c  o m*/
        partialY = ComputationDataUtils.loadPartialY(getPartition(), getNumPartitions(), yKey, conf);
    } catch (IOException e) {
        throw new CrunchRuntimeException(e);
    }
    numSimilar = ConfigUtils.getDefaultConfig().getInt("model.item-similarity.how-many");
    Preconditions.checkArgument(numSimilar > 0, "# similar must be positive: %s", numSimilar);
}

From source file:com.cloudera.oryx.common.servcomp.OryxConfiguration.java

License:Open Source License

private static void configure(Configuration conf) {
    if (!Namespaces.isLocalComputation() || !Namespaces.isLocalData()) {
        File hadoopConfDir = findHadoopConfDir();
        addResource(hadoopConfDir, "core-site.xml", conf);
        addResource(hadoopConfDir, "hdfs-site.xml", conf);
        addResource(hadoopConfDir, "mapred-site.xml", conf);
        addResource(hadoopConfDir, "yarn-site.xml", conf);

        String fsDefaultFS = conf.get(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY);
        if (fsDefaultFS == null || fsDefaultFS.equals(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_DEFAULT)) {
            // Standard config generated by Hadoop 2.0.x seemed to set fs.default.name instead of fs.defaultFS?
            conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, conf.get("fs.default.name"));
        }/*from  w  w w.j ava2 s  .co  m*/

        fixLzoCodecIssue(conf);
    }
}

From source file:com.cloudera.oryx.common.servcomp.OryxConfiguration.java

License:Open Source License

/**
 * Removes {@code LzoCodec} and {@code LzopCodec} from key {@code io.compression.codecs}.
 * Implementations aren't shipped with Hadoop, but are in some cases instantiated anyway even when unused.
 * So, try to erase them./*from www. j  av  a  2 s. co m*/
 */
private static void fixLzoCodecIssue(Configuration conf) {
    String codecsProperty = conf.get("io.compression.codecs");
    if (codecsProperty != null && codecsProperty.contains(".lzo.Lzo")) {
        List<String> codecs = Lists.newArrayList(Splitter.on(',').split(codecsProperty));
        for (Iterator<String> it = codecs.iterator(); it.hasNext();) {
            if (it.next().contains(".lzo.Lzo")) {
                it.remove();
            }
        }
        conf.set("io.compression.codecs", Joiner.on(',').join(codecs));
    }
}

From source file:com.cloudera.oryx.computation.common.JobStep.java

License:Open Source License

/**
 * Creates a new {@link MRPipeline} instance that contains common configuration
 * settings.//w  w  w .  ja v a 2s.  c om
 *
 * @return a new {@link MRPipeline} instance, suitably configured
 */
protected final MRPipeline createBasicPipeline(Class<?> jarClass) throws IOException {
    Configuration conf = OryxConfiguration.get(getConf());

    conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    conf.setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);

    conf.setBoolean("mapred.output.compress", true);
    conf.set("mapred.output.compression.type", "BLOCK");
    conf.setClass("mapred.output.compression.codec", SnappyCodec.class, CompressionCodec.class);
    // Set old-style equivalents for Avro/Crunch's benefit
    conf.set("avro.output.codec", "snappy");

    conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, true);
    conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, true);
    conf.setBoolean(TTConfig.TT_OUTOFBAND_HEARBEAT, true);
    conf.setInt(MRJobConfig.JVM_NUMTASKS_TORUN, -1);

    //conf.setBoolean("crunch.disable.deep.copy", true);
    // Giving one mapper a lot of data can cause issues in some stages, so default to disable this
    conf.setBoolean("crunch.disable.combine.file", true);

    Config appConfig = ConfigUtils.getDefaultConfig();

    conf.set("crunch.tmp.dir", appConfig.getString("computation-layer.tmp-dir"));

    int mapMemoryMB = appConfig.getInt("computation-layer.mapper-memory-mb");
    log.info("Mapper memory: {}", mapMemoryMB);
    int mapHeapMB = (int) (mapMemoryMB / 1.3); // Matches Hadoop's default
    log.info("Mappers have {}MB heap and can access {}MB RAM", mapHeapMB, mapMemoryMB);
    if (conf.get(MRJobConfig.MAP_JAVA_OPTS) != null) {
        log.info("Overriding previous setting of {}, which was '{}'", MRJobConfig.MAP_JAVA_OPTS,
                conf.get(MRJobConfig.MAP_JAVA_OPTS));
    }
    conf.set(MRJobConfig.MAP_JAVA_OPTS,
            "-Xmx" + mapHeapMB + "m -XX:+UseCompressedOops -XX:+UseParallelGC -XX:+UseParallelOldGC");
    log.info("Set {} to '{}'", MRJobConfig.MAP_JAVA_OPTS, conf.get(MRJobConfig.MAP_JAVA_OPTS));
    // See comment below on CM
    conf.setInt("mapreduce.map.java.opts.max.heap", mapHeapMB);

    int reduceMemoryMB = appConfig.getInt("computation-layer.reducer-memory-mb");
    log.info("Reducer memory: {}", reduceMemoryMB);
    if (isHighMemoryStep()) {
        reduceMemoryMB *= appConfig.getInt("computation-layer.worker-high-memory-factor");
        log.info("Increasing {} to {} for high-memory step", MRJobConfig.REDUCE_MEMORY_MB, reduceMemoryMB);
    }
    conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, reduceMemoryMB);

    int reduceHeapMB = (int) (reduceMemoryMB / 1.3); // Matches Hadoop's default
    log.info("Reducers have {}MB heap and can access {}MB RAM", reduceHeapMB, reduceMemoryMB);
    if (conf.get(MRJobConfig.REDUCE_JAVA_OPTS) != null) {
        log.info("Overriding previous setting of {}, which was '{}'", MRJobConfig.REDUCE_JAVA_OPTS,
                conf.get(MRJobConfig.REDUCE_JAVA_OPTS));
    }
    conf.set(MRJobConfig.REDUCE_JAVA_OPTS,
            "-Xmx" + reduceHeapMB + "m -XX:+UseCompressedOops -XX:+UseParallelGC -XX:+UseParallelOldGC");
    log.info("Set {} to '{}'", MRJobConfig.REDUCE_JAVA_OPTS, conf.get(MRJobConfig.REDUCE_JAVA_OPTS));
    // I see this in CM but not in Hadoop docs; probably won't hurt as it's supposed to result in
    // -Xmx appended to opts above, which is at worst redundant
    conf.setInt("mapreduce.reduce.java.opts.max.heap", reduceHeapMB);

    conf.setInt("yarn.scheduler.capacity.minimum-allocation-mb", 128);
    conf.setInt("yarn.app.mapreduce.am.resource.mb", 384);

    // Pass total config state
    conf.set(CONFIG_SERIALIZATION_KEY, ConfigUtils.getDefaultConfig().root().render());

    // Make sure to set any args to conf above this line!

    setConf(conf);

    Job job = Job.getInstance(conf);

    // Basic File IO settings
    FileInputFormat.setMaxInputSplitSize(job, 1L << 28); // ~268MB
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);

    log.info("Created pipeline configuration {}", job.getConfiguration());

    return new MRPipeline(jarClass, getCustomJobName(), job.getConfiguration());
}

From source file:com.cloudera.recordservice.hcatalog.mapreduce.HCatRSBaseInputFormat.java

License:Apache License

private static HCatSchema getOutputSchema(Configuration conf) throws IOException {
    String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA);
    if (os == null) {
        return getTableSchema(conf);
    } else {/*from w  w w  . j a  v a  2s .co m*/
        return (HCatSchema) HCatUtil.deserialize(os);
    }
}

From source file:com.cloudera.recordservice.hcatalog.mapreduce.HCatRSBaseInputFormat.java

License:Apache License

/**
 * Gets the InputJobInfo object by reading the Configuration and deserializing
 * the string. If InputJobInfo is not present in the configuration, throws an
 * exception since that means HCatInputFormat.setInput has not been called.
 * @param conf the Configuration object// www  . j  a  v a2  s . c o  m
 * @return the InputJobInfo object
 * @throws IOException the exception
 */
private static InputJobInfo getJobInfo(Configuration conf) throws IOException {
    String jobString = conf.get(HCatConstants.HCAT_KEY_JOB_INFO);
    if (jobString == null) {
        throw new IOException(
                "job information not found in JobContext." + " HCatInputFormat.setInput() not called?");
    }

    return (InputJobInfo) HCatRSUtil.deserialize(jobString);
}

From source file:com.cloudera.recordservice.hcatalog.mapreduce.HCatRSInputFormat.java

License:Apache License

/**
 * Initializes the input with a provided filter.
 * See {@link #setInput(Configuration, String, String, String)}
 *///from  www  .  jav  a2 s . c  o  m
public static HCatRSInputFormat setInput(Job job, String location, String filter) throws IOException {
    Configuration conf = job.getConfiguration();
    String kerberosPrincipal = conf.get(ConfVars.KERBEROS_PRINCIPAL_CONF.name);
    Pair<String, String> dbTablePair = HCatUtil.getDbAndTableName(location);
    dbTablePair = HCatRSUtil.cleanQueryPair(dbTablePair);
    String dbName = dbTablePair.first;
    String tableName = dbTablePair.second;
    if (location.toLowerCase().startsWith("select")) {
        RecordServiceConfig.setInputQuery(conf, location);
    } else {
        RecordServiceConfig.setInputTable(conf, dbName, tableName);
    }
    Credentials credentials = job.getCredentials();
    RecordServicePlannerClient.Builder builder = PlanUtil.getBuilder(conf);
    List<NetworkAddress> plannerHosts = PlanUtil.getPlannerHostPorts(conf);
    RecordServicePlannerClient planner = PlanUtil.getPlanner(conf, builder, plannerHosts, kerberosPrincipal,
            credentials);
    try {
        if (planner.isKerberosAuthenticated()) {
            Token<DelegationTokenIdentifier> delegationToken = TokenUtils
                    .fromTDelegationToken(planner.getDelegationToken(""));
            credentials.addToken(DelegationTokenIdentifier.DELEGATION_KIND, delegationToken);
        }
    } catch (RecordServiceException e) {
        throw new IOException(e);
    } finally {
        if (planner != null)
            planner.close();
    }
    job.setInputFormatClass(HCatRSInputFormat.class);
    return setInput(conf, dbName, tableName, filter);
}

From source file:com.cloudera.recordservice.mr.PlanUtil.java

License:Apache License

/**
 * Generates a request from the configs set in jobConf.
 *///from w w  w .ja va  2  s .  c  o m
public static Request getRequest(Configuration jobConf) throws IOException {
    LOG.debug("Generating input splits.");

    String tblName = jobConf.get(ConfVars.TBL_NAME_CONF.name);
    String inputDir = jobConf.get(FileInputFormat.INPUT_DIR);
    String sqlQuery = jobConf.get(ConfVars.QUERY_NAME_CONF.name);

    int numSet = 0;
    if (tblName != null)
        ++numSet;
    if (inputDir != null)
        ++numSet;
    if (sqlQuery != null)
        ++numSet;

    if (numSet == 0) {
        throw new IllegalArgumentException("No input specified. Specify either '" + ConfVars.TBL_NAME_CONF.name
                + "', '" + ConfVars.QUERY_NAME_CONF.name + "' or '" + FileInputFormat.INPUT_DIR + "'");
    }
    if (numSet > 1) {
        throw new IllegalArgumentException("More than one input specified. Can " + "only specify one of '"
                + ConfVars.TBL_NAME_CONF.name + "'=" + tblName + ", '" + FileInputFormat.INPUT_DIR + "'="
                + inputDir + ", '" + ConfVars.QUERY_NAME_CONF.name + "'=" + sqlQuery);
    }

    String[] colNames = jobConf.getStrings(ConfVars.COL_NAMES_CONF.name);
    if (colNames == null)
        colNames = new String[0];

    if (tblName == null && colNames.length > 0) {
        // TODO: support this.
        throw new IllegalArgumentException("Column projections can only be specified with table inputs.");
    }

    Request request = null;
    if (tblName != null) {
        if (colNames.length == 0) {
            // If length of colNames = 0, return all possible columns
            // TODO: this has slightly different meaning than createProjectionRequest()
            // which treats empty columns as an empty projection. i.e. select * vs count(*)
            // Reconcile this.
            request = Request.createTableScanRequest(tblName);
        } else {
            List<String> projection = new ArrayList<String>();
            for (String c : colNames) {
                if (c == null || c.isEmpty()) {
                    throw new IllegalArgumentException(
                            "Cannot specify projection with null or empty column name.");
                }
                projection.add(c);
            }
            request = Request.createProjectionRequest(tblName, projection);
        }
    } else if (inputDir != null) {
        // TODO: inputDir is a comma separate list of paths. The service needs to
        // handle that.
        if (inputDir.contains(",")) {
            throw new IllegalArgumentException("Only reading a single directory is currently supported.");
        }
        request = Request.createPathRequest(inputDir);
    } else if (sqlQuery != null) {
        request = Request.createSqlRequest(sqlQuery);
    } else {
        Preconditions.checkState(false);
    }
    return request;
}

From source file:com.cloudera.recordservice.mr.PlanUtil.java

License:Apache License

/**
 * Given a configuration, returns a list of network addresses for all the planners.
 * This first tries to use the planner auto discovery feature and use ZooKeeper
 * to find all the available planners. If that is not feasible, it tries to use
 * the hardcoded planner host/port lists in the configuration. In case that is
 * not feasible as well, it uses the default planner host/port.
 * @param conf the hadoop job configuration
 * @return a list of network addresses for all the available planners
 *//*from www . j a  v a 2 s  . co m*/
public static List<NetworkAddress> getPlannerHostPorts(Configuration conf) throws IOException {
    List<NetworkAddress> plannerHostPorts = null;
    if (isPlannerDiscoveryEnabled(conf)) {
        try {
            LOG.info("Using planner auto discovery on ZK connection string {}",
                    conf.get(ConfVars.ZOOKEEPER_CONNECTION_STRING_CONF.name));
            plannerHostPorts = ZooKeeperUtil.getPlanners(conf);
        } catch (IOException e) {
            LOG.warn("Planner discovery failed. Now fall back to use " + ConfVars.PLANNER_HOSTPORTS_CONF.name
                    + " in the job configuration.", e);
        }
    }
    if (plannerHostPorts == null || plannerHostPorts.isEmpty()) {
        plannerHostPorts = RecordServiceConfig.getPlannerHostPort(
                conf.get(ConfVars.PLANNER_HOSTPORTS_CONF.name, RecordServiceConfig.DEFAULT_PLANNER_HOSTPORTS));
    }
    return plannerHostPorts;
}

From source file:com.cloudera.recordservice.mr.PlanUtil.java

License:Apache License

/**
 * Returns the kerberos principal to connect with.
 *///from www .  j av a  2  s. c o  m
public static String getKerberosPrincipal(Configuration conf) {
    return conf.get(ConfVars.KERBEROS_PRINCIPAL_CONF.name);
}