Example usage for org.apache.hadoop.conf Configuration getInt

List of usage examples for org.apache.hadoop.conf Configuration getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java

License:Open Source License

static Job sortOne(Configuration conf, Path inputFile, Path outputDir, String commandName,
        String samplingInfo) throws IOException, ClassNotFoundException, InterruptedException {
    conf.set(Utils.WORK_FILENAME_PROPERTY, inputFile.getName());
    Utils.configureSampling(outputDir, inputFile.getName(), conf);
    final Job job = new Job(conf);

    job.setJarByClass(Summarize.class);
    job.setMapperClass(Mapper.class);
    job.setReducerClass(SortReducer.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(SortInputFormat.class);
    job.setOutputFormatClass(SortOutputFormat.class);

    FileInputFormat.setInputPaths(job, inputFile);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    final Timer t = new Timer();

    System.out.printf("%s :: Sampling%s...\n", commandName, samplingInfo);
    t.start();/*from  w ww  . jav a  2  s . c  om*/

    InputSampler.<LongWritable, Text>writePartitionFile(job, new InputSampler.SplitSampler<LongWritable, Text>(
            Math.max(1 << 16, conf.getInt("mapred.reduce.tasks", 1)), 10));

    System.out.printf("%s :: Sampling complete in %d.%03d s.\n", commandName, t.stopS(), t.fms());
    job.submit();
    return job;
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.SummarySort.java

License:Open Source License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    Configuration conf = ContextUtil.getConfiguration(context);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);

    FileSplit split = (FileSplit) genericSplit;
    start = (split.getStart()) << 16;
    end = (start + split.getLength()) << 16;

    final Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);

    bin = new BlockCompressedInputStream(
            new WrapSeekable<FSDataInputStream>(fs.open(file), fs.getFileStatus(file).getLen(), file));

    in = new LineReader(bin, conf);

    if (start != 0) {
        bin.seek(start);//from  w  ww  .  j a va2  s  .c  o  m

        // Skip first line
        in.readLine(new Text());
        start = bin.getFilePointer();
    }
    this.pos = start;
}

From source file:fi.tkk.ics.hadoop.bam.SplittingBAMIndexer.java

License:Open Source License

/**
 * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link
 * org.apache.hadoop.conf.Configuration} object instead of a supplied
 * argument list//from   w w  w. j  av a  2  s.  com
 *
 * @throws java.lang.IllegalArgumentException if the "input" property is not
 *                                            in the Configuration
 */
public static void run(final Configuration conf) throws IOException {
    final String inputString = conf.get("input");
    if (inputString == null)
        throw new IllegalArgumentException("String property \"input\" path not found in given Configuration");

    final FileSystem fs = FileSystem.get(conf);

    // Default to a granularity level of 4096. This is generally sufficient
    // for very large BAM files, relative to a maximum heap size in the
    // gigabyte range.
    final SplittingBAMIndexer indexer = new SplittingBAMIndexer(conf.getInt("granularity", 4096));

    final Path input = new Path(inputString);

    indexer.index(fs.open(input), fs.create(input.suffix(OUTPUT_FILE_EXTENSION)),
            fs.getFileStatus(input).getLen());
}

From source file:format.OverlapLengthInputFormat.java

License:Apache License

/**
 * Get record length value//ww w  . ja va 2  s  .  c o  m
 * @param conf configuration
 * @return the record length, zero means none was set
 */
public static int getRecordLength(Configuration conf) {
    return conf.getInt(FIXED_RECORD_LENGTH, 0);
}

From source file:format.OverlapLengthInputFormat.java

License:Apache License

/**
 * Get the overlapping portion of adjacent records
 * @param conf configuration//  www. j a va2  s .c  o m
 * @return the overlapping length, zero means none was set
 **/
public static int getOverlapLength(Configuration conf) {
    return conf.getInt(OVERLAP_LENGTH, 0);
}

From source file:format.OverlapRecordReader.java

License:BSD License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//from  ww w.j av  a 2s  . c  om
    end = start + split.getLength();
    final Path file = split.getPath();
    //Configuration job = HadoopUtils.getConfiguration(context);
    Configuration job = context.getConfiguration();
    maxLineLen = job.getInt(MAX_LINE_LEN_CONF, Integer.MAX_VALUE);

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("Codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:FormatStorage1.IHead.java

License:Open Source License

public void fromJobConf(Configuration conf) {
    this.magic = conf.getInt(ConstVar.HD_magic, ConstVar.NewFormatMagic);
    this.var = (byte) conf.getInt(ConstVar.HD_var, 0);
    this.ver = (byte) conf.getInt(ConstVar.HD_ver, 0);
    this.lineindex = (byte) conf.getInt(ConstVar.HD_lineindex, 1);
    this.primaryIndex = (short) conf.getInt(ConstVar.HD_primaryIndex, -1);
    this.compress = (byte) conf.getInt(ConstVar.HD_compress, 0);
    this.compressStyle = (byte) conf.getInt(ConstVar.HD_compressStyle, 0);
    this.encode = (byte) conf.getInt(ConstVar.HD_encode, 0);
    this.encodeStyle = (byte) conf.getInt(ConstVar.HD_encodeStyle, 0);

    this.fieldMap = new IFieldMap();
    String[] fieldStrings = conf.getStrings(ConstVar.HD_fieldMap);
    if (fieldStrings != null)
        for (int i = 0; i < fieldStrings.length; i++) {
            String[] def = fieldStrings[i].split(ConstVar.RecordSplit);
            byte type = Byte.valueOf(def[0]);
            int index = Integer.valueOf(def[2]);
            fieldMap.addFieldType(new IRecord.IFType(type, index));
        }/*from  w ww . j  ava 2  s. c  o m*/

    this.udi = new IUserDefinedHeadInfo();
    String[] udistrs = conf.getStrings(ConstVar.HD_udi);
    if (udistrs != null)
        for (int i = 0; i < udistrs.length; i++) {
            String[] def = udistrs[i].split(ConstVar.RecordSplit);
            udi.addInfo(Integer.valueOf(def[0]), def[1]);
        }
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.FastqLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();// www.  j  av a  2 s.  c om
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitFastqLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormat.java

License:Apache License

public static int getMaxMapper(Configuration conf) {
    return conf.getInt(MAX_MAPPERS, Integer.MAX_VALUE);
}

From source file:gobblin.runtime.TaskExecutor.java

License:Apache License

/**
 * Constructor to work with Hadoop {@link org.apache.hadoop.conf.Configuration}.
 *//*w ww  .ja  v  a 2s  .c o  m*/
public TaskExecutor(Configuration conf) {
    this(conf.getInt(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY,
            ConfigurationKeys.DEFAULT_TASK_EXECUTOR_THREADPOOL_SIZE),
            conf.getInt(ConfigurationKeys.TASK_RETRY_THREAD_POOL_CORE_SIZE_KEY,
                    ConfigurationKeys.DEFAULT_TASK_RETRY_THREAD_POOL_CORE_SIZE),
            conf.getLong(ConfigurationKeys.TASK_RETRY_INTERVAL_IN_SEC_KEY,
                    ConfigurationKeys.DEFAULT_TASK_RETRY_INTERVAL_IN_SEC));
}