List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java
License:Open Source License
static Job sortOne(Configuration conf, Path inputFile, Path outputDir, String commandName, String samplingInfo) throws IOException, ClassNotFoundException, InterruptedException { conf.set(Utils.WORK_FILENAME_PROPERTY, inputFile.getName()); Utils.configureSampling(outputDir, inputFile.getName(), conf); final Job job = new Job(conf); job.setJarByClass(Summarize.class); job.setMapperClass(Mapper.class); job.setReducerClass(SortReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SortInputFormat.class); job.setOutputFormatClass(SortOutputFormat.class); FileInputFormat.setInputPaths(job, inputFile); FileOutputFormat.setOutputPath(job, outputDir); job.setPartitionerClass(TotalOrderPartitioner.class); final Timer t = new Timer(); System.out.printf("%s :: Sampling%s...\n", commandName, samplingInfo); t.start();/*from w ww . jav a 2 s . c om*/ InputSampler.<LongWritable, Text>writePartitionFile(job, new InputSampler.SplitSampler<LongWritable, Text>( Math.max(1 << 16, conf.getInt("mapred.reduce.tasks", 1)), 10)); System.out.printf("%s :: Sampling complete in %d.%03d s.\n", commandName, t.stopS(), t.fms()); job.submit(); return job; }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java
License:Open Source License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { Configuration conf = ContextUtil.getConfiguration(context); this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); FileSplit split = (FileSplit) genericSplit; start = (split.getStart()) << 16; end = (start + split.getLength()) << 16; final Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); bin = new BlockCompressedInputStream( new WrapSeekable<FSDataInputStream>(fs.open(file), fs.getFileStatus(file).getLen(), file)); in = new LineReader(bin, conf); if (start != 0) { bin.seek(start);//from w ww . j a va2 s .c o m // Skip first line in.readLine(new Text()); start = bin.getFilePointer(); } this.pos = start; }
From source file:fi.tkk.ics.hadoop.bam.SplittingBAMIndexer.java
License:Open Source License
/** * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link * org.apache.hadoop.conf.Configuration} object instead of a supplied * argument list//from w w w. j av a 2 s. com * * @throws java.lang.IllegalArgumentException if the "input" property is not * in the Configuration */ public static void run(final Configuration conf) throws IOException { final String inputString = conf.get("input"); if (inputString == null) throw new IllegalArgumentException("String property \"input\" path not found in given Configuration"); final FileSystem fs = FileSystem.get(conf); // Default to a granularity level of 4096. This is generally sufficient // for very large BAM files, relative to a maximum heap size in the // gigabyte range. final SplittingBAMIndexer indexer = new SplittingBAMIndexer(conf.getInt("granularity", 4096)); final Path input = new Path(inputString); indexer.index(fs.open(input), fs.create(input.suffix(OUTPUT_FILE_EXTENSION)), fs.getFileStatus(input).getLen()); }
From source file:format.OverlapLengthInputFormat.java
License:Apache License
/** * Get record length value//ww w . ja va 2 s . c o m * @param conf configuration * @return the record length, zero means none was set */ public static int getRecordLength(Configuration conf) { return conf.getInt(FIXED_RECORD_LENGTH, 0); }
From source file:format.OverlapLengthInputFormat.java
License:Apache License
/** * Get the overlapping portion of adjacent records * @param conf configuration// www. j a va2 s .c o m * @return the overlapping length, zero means none was set **/ public static int getOverlapLength(Configuration conf) { return conf.getInt(OVERLAP_LENGTH, 0); }
From source file:format.OverlapRecordReader.java
License:BSD License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();//from ww w.j av a 2s . c om end = start + split.getLength(); final Path file = split.getPath(); //Configuration job = HadoopUtils.getConfiguration(context); Configuration job = context.getConfiguration(); maxLineLen = job.getInt(MAX_LINE_LEN_CONF, Integer.MAX_VALUE); FileSystem fs = file.getFileSystem(job); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec == null) { throw new IOException("Codec for file " + file + " not found, cannot run"); } // open the file and seek to the start of the split fileIn = fs.open(split.getPath()); // creates input stream and also reads the file header in = new LineReader(codec.createInputStream(fileIn), job); if (start != 0) { fileIn.seek(start); // read and ignore the first line in.readLine(new Text()); start = fileIn.getPos(); } this.pos = start; }
From source file:FormatStorage1.IHead.java
License:Open Source License
public void fromJobConf(Configuration conf) { this.magic = conf.getInt(ConstVar.HD_magic, ConstVar.NewFormatMagic); this.var = (byte) conf.getInt(ConstVar.HD_var, 0); this.ver = (byte) conf.getInt(ConstVar.HD_ver, 0); this.lineindex = (byte) conf.getInt(ConstVar.HD_lineindex, 1); this.primaryIndex = (short) conf.getInt(ConstVar.HD_primaryIndex, -1); this.compress = (byte) conf.getInt(ConstVar.HD_compress, 0); this.compressStyle = (byte) conf.getInt(ConstVar.HD_compressStyle, 0); this.encode = (byte) conf.getInt(ConstVar.HD_encode, 0); this.encodeStyle = (byte) conf.getInt(ConstVar.HD_encodeStyle, 0); this.fieldMap = new IFieldMap(); String[] fieldStrings = conf.getStrings(ConstVar.HD_fieldMap); if (fieldStrings != null) for (int i = 0; i < fieldStrings.length; i++) { String[] def = fieldStrings[i].split(ConstVar.RecordSplit); byte type = Byte.valueOf(def[0]); int index = Integer.valueOf(def[2]); fieldMap.addFieldType(new IRecord.IFType(type, index)); }/*from w ww . j ava 2 s. c o m*/ this.udi = new IUserDefinedHeadInfo(); String[] udistrs = conf.getStrings(ConstVar.HD_udi); if (udistrs != null) for (int i = 0; i < udistrs.length; i++) { String[] def = udistrs[i].split(ConstVar.RecordSplit); udi.addInfo(Integer.valueOf(def[0]), def[1]); } }
From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();// www. j av a 2 s. c om end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitFastqLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormat.java
License:Apache License
public static int getMaxMapper(Configuration conf) { return conf.getInt(MAX_MAPPERS, Integer.MAX_VALUE); }
From source file:gobblin.runtime.TaskExecutor.java
License:Apache License
/** * Constructor to work with Hadoop {@link org.apache.hadoop.conf.Configuration}. *//*w ww .ja v a 2s .c o m*/ public TaskExecutor(Configuration conf) { this(conf.getInt(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY, ConfigurationKeys.DEFAULT_TASK_EXECUTOR_THREADPOOL_SIZE), conf.getInt(ConfigurationKeys.TASK_RETRY_THREAD_POOL_CORE_SIZE_KEY, ConfigurationKeys.DEFAULT_TASK_RETRY_THREAD_POOL_CORE_SIZE), conf.getLong(ConfigurationKeys.TASK_RETRY_INTERVAL_IN_SEC_KEY, ConfigurationKeys.DEFAULT_TASK_RETRY_INTERVAL_IN_SEC)); }