Example usage for org.apache.hadoop.conf Configuration setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface)

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java

License:Apache License

private Configuration createConfiguration(int... numberOfLines) {
    Configuration conf = new Configuration();
    if (numberOfLines.length > 0) {
        conf.setLong(NUMBER_OF_LINES_KEY, numberOfLines[0]);
    }/*  ww  w .  j av  a 2  s. co m*/
    conf.set("fs.default.name", "file:///");
    conf.setBoolean("fs.file.impl.disable.cache", false);
    conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
    return conf;
}

From source file:boa.runtime.BoaRunner.java

License:Apache License

/**
 * Create a {@link Job} describing the work to be done by this Boa job.
 * //from w ww  . j  av a2 s . c om
 * @param ins
 *            An array of {@link Path} containing the locations of the input
 *            files
 * 
 * @param out
 *            A {@link Path} containing the location of the output file
 * 
 * @param robust
 *            A boolean representing whether the job should ignore most
 *            exceptions
 * 
 * @return A {@link Job} describing the work to be done by this Boa job
 * @throws IOException
 */
public Job job(final Path[] ins, final Path out, final boolean robust) throws IOException {
    final Configuration configuration = getConf();

    configuration.setBoolean("boa.runtime.robust", robust);

    // faster local reads
    configuration.setBoolean("dfs.client.read.shortcircuit", true);
    configuration.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true);

    // by default our MapFile's index every key, which takes up
    // a lot of memory - this lets you skip keys in the index and
    // control the memory requirements (as a tradeoff of slower gets)
    //configuration.setLong("io.map.index.skip", 128);

    // map output compression
    configuration.setBoolean("mapred.compress.map.output", true);
    configuration.set("mapred.map.output.compression.type", "BLOCK");
    configuration.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class);

    configuration.setBoolean("mapred.map.tasks.speculative.execution", false);
    configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    configuration.setLong("mapred.job.reuse.jvm.num.tasks", -1);

    final Job job = new Job(configuration);

    if (ins != null)
        for (final Path in : ins)
            FileInputFormat.addInputPath(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(BoaPartitioner.class);

    job.setMapOutputKeyClass(EmitKey.class);
    job.setMapOutputValueClass(EmitValue.class);

    job.setOutputFormatClass(BoaOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    return job;
}

From source file:cascading.platform.tez.Hadoop2TezPlatform.java

License:Open Source License

protected boolean setTimelineStore(Configuration configuration) {
    try {/*from w  w w  .  j  a  v a 2 s. c  o  m*/
        // try hadoop 2.6
        Class<?> target = Util.loadClass("org.apache.hadoop.yarn.server.timeline.TimelineStore");
        Class<?> type = Util.loadClass("org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore");

        configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target);

        try {
            // hadoop 2.5 has the above classes, but this one is also necessary for the timeline service with acls to function.
            Util.loadClass("org.apache.hadoop.yarn.api.records.timeline.TimelineDomain");
        } catch (CascadingException exception) {
            configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);
        }

        return true;
    } catch (CascadingException exception) {
        try {
            // try hadoop 2.4
            Class<?> target = Util.loadClass(
                    "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.TimelineStore");
            Class<?> type = Util.loadClass(
                    "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.MemoryTimelineStore");

            configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target);
            configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);

            return true;
        } catch (CascadingException ignore) {
            return false;
        }
    }
}

From source file:cascading.scheme.hadoop.SequenceFile.java

License:Open Source License

@Override
public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.input.format.class", SequenceFileInputFormat.class, InputFormat.class);
}

From source file:cascading.scheme.hadoop.SequenceFile.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.output.key.class", Tuple.class, Object.class);
    conf.setClass("mapred.output.value.class", Tuple.class, Object.class);
    conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class);
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

@Override
public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf))))
        throw new IllegalStateException("cannot read zip files: "
                + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf))));

    conf.setBoolean("mapred.mapper.new-api", false);
    conf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class);
}

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (tap.getFullIdentifier(conf).endsWith(".zip"))
        throw new IllegalStateException("cannot write zip files: " + HadoopUtil.getOutputPath(conf));

    conf.setBoolean("mapred.mapper.new-api", false);

    if (getSinkCompression() == Compress.DISABLE)
        conf.setBoolean("mapred.output.compress", false);
    else if (getSinkCompression() == Compress.ENABLE)
        conf.setBoolean("mapred.output.compress", true);

    conf.setClass("mapred.output.key.class", Text.class, Object.class);
    conf.setClass("mapred.output.value.class", Text.class, Object.class);
    conf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class);
}

From source file:cascading.scheme.hadoop.WritableSequenceFile.java

License:Open Source License

@Override
public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess,
        Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) {
    if (keyType != null)
        conf.setClass("mapred.output.key.class", keyType, Object.class);
    else/*from  w  w w.  j  a va 2s .co m*/
        conf.setClass("mapred.output.key.class", NullWritable.class, Object.class);

    if (valueType != null)
        conf.setClass("mapred.output.value.class", valueType, Object.class);
    else
        conf.setClass("mapred.output.value.class", NullWritable.class, Object.class);

    conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class);
}

From source file:cascading.tap.hadoop.Hfs.java

License:Open Source License

/**
 * Based on the configuration, handles and sets {@link CombineFileInputFormat} as the input
 * format./*from ww w .java 2  s. c  o m*/
 */
private void handleCombineFileInputFormat(Configuration conf) {
    // if combining files, override the configuration to use CombineFileInputFormat
    if (!getUseCombinedInput(conf))
        return;

    // get the prescribed individual input format from the underlying scheme so it can be used by CombinedInputFormat
    String individualInputFormat = conf.get("mapred.input.format.class");

    if (individualInputFormat == null)
        throw new TapException("input format is missing from the underlying scheme");

    if (individualInputFormat.equals(CombinedInputFormat.class.getName())
            && conf.get(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT) == null)
        throw new TapException(
                "the input format class is already the combined input format but the underlying input format is missing");

    // if safe mode is on (default) throw an exception if the InputFormat is not a FileInputFormat, otherwise log a
    // warning and don't use the CombineFileInputFormat
    boolean safeMode = getCombinedInputSafeMode(conf);

    if (!FileInputFormat.class.isAssignableFrom(conf.getClass("mapred.input.format.class", null))) {
        if (safeMode)
            throw new TapException(
                    "input format must be of type org.apache.hadoop.mapred.FileInputFormat, got: "
                            + individualInputFormat);
        else
            LOG.warn(
                    "not combining input splits with CombineFileInputFormat, {} is not of type org.apache.hadoop.mapred.FileInputFormat.",
                    individualInputFormat);
    } else {
        // set the underlying individual input format
        conf.set(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT, individualInputFormat);

        // override the input format class
        conf.setClass("mapred.input.format.class", CombinedInputFormat.class, InputFormat.class);
    }
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job.//from ww  w .j a  v  a2 s. co m
 *
 * @param job               job to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only (alphanumeric)
 * @param outputFormatClass name of the OutputFormat class.
 * @param keyClass          key class
 * @param valueClass        value class
 * @param outputConfigs     configurations for the output
 */
@SuppressWarnings("unchecked")
public static void addNamedOutput(Job job, String namedOutput, String outputFormatClass, Class<?> keyClass,
        Class<?> valueClass, Map<String, String> outputConfigs) {
    assertValidName(namedOutput);
    checkNamedOutputName(namedOutput, getNamedOutputsList(job), false);
    Configuration conf = job.getConfiguration();
    conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput);
    conf.set(MO_PREFIX + namedOutput + FORMAT, outputFormatClass);
    conf.setClass(MO_PREFIX + namedOutput + KEY, keyClass, Object.class);
    conf.setClass(MO_PREFIX + namedOutput + VALUE, valueClass, Object.class);
    ConfigurationUtil.setNamedConfigurations(conf, computePrefixName(namedOutput), outputConfigs);
}