List of usage examples for org.apache.hadoop.conf Configuration setClass
public void setClass(String name, Class<?> theClass, Class<?> xface)
name
property to the name of a theClass
implementing the given interface xface
. From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
private Configuration createConfiguration(int... numberOfLines) { Configuration conf = new Configuration(); if (numberOfLines.length > 0) { conf.setLong(NUMBER_OF_LINES_KEY, numberOfLines[0]); }/* ww w . j av a 2 s. co m*/ conf.set("fs.default.name", "file:///"); conf.setBoolean("fs.file.impl.disable.cache", false); conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class); return conf; }
From source file:boa.runtime.BoaRunner.java
License:Apache License
/** * Create a {@link Job} describing the work to be done by this Boa job. * //from w ww . j av a2 s . c om * @param ins * An array of {@link Path} containing the locations of the input * files * * @param out * A {@link Path} containing the location of the output file * * @param robust * A boolean representing whether the job should ignore most * exceptions * * @return A {@link Job} describing the work to be done by this Boa job * @throws IOException */ public Job job(final Path[] ins, final Path out, final boolean robust) throws IOException { final Configuration configuration = getConf(); configuration.setBoolean("boa.runtime.robust", robust); // faster local reads configuration.setBoolean("dfs.client.read.shortcircuit", true); configuration.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true); // by default our MapFile's index every key, which takes up // a lot of memory - this lets you skip keys in the index and // control the memory requirements (as a tradeoff of slower gets) //configuration.setLong("io.map.index.skip", 128); // map output compression configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.map.output.compression.type", "BLOCK"); configuration.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); configuration.setBoolean("mapred.map.tasks.speculative.execution", false); configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false); configuration.setLong("mapred.job.reuse.jvm.num.tasks", -1); final Job job = new Job(configuration); if (ins != null) for (final Path in : ins) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, out); job.setPartitionerClass(BoaPartitioner.class); job.setMapOutputKeyClass(EmitKey.class); job.setMapOutputValueClass(EmitValue.class); job.setOutputFormatClass(BoaOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); return job; }
From source file:cascading.platform.tez.Hadoop2TezPlatform.java
License:Open Source License
protected boolean setTimelineStore(Configuration configuration) { try {/*from w w w . j a v a 2 s. c o m*/ // try hadoop 2.6 Class<?> target = Util.loadClass("org.apache.hadoop.yarn.server.timeline.TimelineStore"); Class<?> type = Util.loadClass("org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore"); configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target); try { // hadoop 2.5 has the above classes, but this one is also necessary for the timeline service with acls to function. Util.loadClass("org.apache.hadoop.yarn.api.records.timeline.TimelineDomain"); } catch (CascadingException exception) { configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true); } return true; } catch (CascadingException exception) { try { // try hadoop 2.4 Class<?> target = Util.loadClass( "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.TimelineStore"); Class<?> type = Util.loadClass( "org.apache.hadoop.yarn.server.applicationhistoryservice.timeline.MemoryTimelineStore"); configuration.setClass(YarnConfiguration.TIMELINE_SERVICE_STORE, type, target); configuration.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true); return true; } catch (CascadingException ignore) { return false; } } }
From source file:cascading.scheme.hadoop.SequenceFile.java
License:Open Source License
@Override public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { conf.setBoolean("mapred.mapper.new-api", false); conf.setClass("mapred.input.format.class", SequenceFileInputFormat.class, InputFormat.class); }
From source file:cascading.scheme.hadoop.SequenceFile.java
License:Open Source License
@Override public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { conf.setBoolean("mapred.mapper.new-api", false); conf.setClass("mapred.output.key.class", Tuple.class, Object.class); conf.setClass("mapred.output.value.class", Tuple.class, Object.class); conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class); }
From source file:cascading.scheme.hadoop.TextLine.java
License:Open Source License
@Override public void sourceConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (hasZippedFiles(FileInputFormat.getInputPaths(asJobConfInstance(conf)))) throw new IllegalStateException("cannot read zip files: " + Arrays.toString(FileInputFormat.getInputPaths(asJobConfInstance(conf)))); conf.setBoolean("mapred.mapper.new-api", false); conf.setClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class); }
From source file:cascading.scheme.hadoop.TextLine.java
License:Open Source License
@Override public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (tap.getFullIdentifier(conf).endsWith(".zip")) throw new IllegalStateException("cannot write zip files: " + HadoopUtil.getOutputPath(conf)); conf.setBoolean("mapred.mapper.new-api", false); if (getSinkCompression() == Compress.DISABLE) conf.setBoolean("mapred.output.compress", false); else if (getSinkCompression() == Compress.ENABLE) conf.setBoolean("mapred.output.compress", true); conf.setClass("mapred.output.key.class", Text.class, Object.class); conf.setClass("mapred.output.value.class", Text.class, Object.class); conf.setClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class); }
From source file:cascading.scheme.hadoop.WritableSequenceFile.java
License:Open Source License
@Override public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (keyType != null) conf.setClass("mapred.output.key.class", keyType, Object.class); else/*from w w w. j a va 2s .co m*/ conf.setClass("mapred.output.key.class", NullWritable.class, Object.class); if (valueType != null) conf.setClass("mapred.output.value.class", valueType, Object.class); else conf.setClass("mapred.output.value.class", NullWritable.class, Object.class); conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class); }
From source file:cascading.tap.hadoop.Hfs.java
License:Open Source License
/** * Based on the configuration, handles and sets {@link CombineFileInputFormat} as the input * format./*from ww w .java 2 s. c o m*/ */ private void handleCombineFileInputFormat(Configuration conf) { // if combining files, override the configuration to use CombineFileInputFormat if (!getUseCombinedInput(conf)) return; // get the prescribed individual input format from the underlying scheme so it can be used by CombinedInputFormat String individualInputFormat = conf.get("mapred.input.format.class"); if (individualInputFormat == null) throw new TapException("input format is missing from the underlying scheme"); if (individualInputFormat.equals(CombinedInputFormat.class.getName()) && conf.get(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT) == null) throw new TapException( "the input format class is already the combined input format but the underlying input format is missing"); // if safe mode is on (default) throw an exception if the InputFormat is not a FileInputFormat, otherwise log a // warning and don't use the CombineFileInputFormat boolean safeMode = getCombinedInputSafeMode(conf); if (!FileInputFormat.class.isAssignableFrom(conf.getClass("mapred.input.format.class", null))) { if (safeMode) throw new TapException( "input format must be of type org.apache.hadoop.mapred.FileInputFormat, got: " + individualInputFormat); else LOG.warn( "not combining input splits with CombineFileInputFormat, {} is not of type org.apache.hadoop.mapred.FileInputFormat.", individualInputFormat); } else { // set the underlying individual input format conf.set(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT, individualInputFormat); // override the input format class conf.setClass("mapred.input.format.class", CombinedInputFormat.class, InputFormat.class); } }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java
License:Apache License
/** * Adds a named output for the job.//from ww w .j a v a2 s. co m * * @param job job to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only (alphanumeric) * @param outputFormatClass name of the OutputFormat class. * @param keyClass key class * @param valueClass value class * @param outputConfigs configurations for the output */ @SuppressWarnings("unchecked") public static void addNamedOutput(Job job, String namedOutput, String outputFormatClass, Class<?> keyClass, Class<?> valueClass, Map<String, String> outputConfigs) { assertValidName(namedOutput); checkNamedOutputName(namedOutput, getNamedOutputsList(job), false); Configuration conf = job.getConfiguration(); conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput); conf.set(MO_PREFIX + namedOutput + FORMAT, outputFormatClass); conf.setClass(MO_PREFIX + namedOutput + KEY, keyClass, Object.class); conf.setClass(MO_PREFIX + namedOutput + VALUE, valueClass, Object.class); ConfigurationUtil.setNamedConfigurations(conf, computePrefixName(namedOutput), outputConfigs); }