Example usage for org.apache.hadoop.conf Configuration getInt

List of usage examples for org.apache.hadoop.conf Configuration getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:com.elex.dmp.lda.CachingCVB0PerplexityMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    MemoryUtil.startMemoryLogger(5000);/*from  w w  w  .  jav  a2  s . c om*/

    log.info("Retrieving configuration");
    Configuration conf = context.getConfiguration();
    float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN);
    float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN);
    long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L);
    random = RandomUtils.getRandom(seed);
    numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1);
    int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1);
    int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1);
    int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4);
    maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10);
    float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f);
    testFraction = conf.getFloat(CVB0Driver.TEST_SET_FRACTION, 0.1f);

    log.info("Initializing read model");
    TopicModel readModel;
    Path[] modelPaths = CVB0Driver.getModelPaths(conf);
    if (modelPaths != null && modelPaths.length > 0) {
        readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
    } else {
        log.info("No model files found");
        readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null,
                numTrainThreads, modelWeight);
    }

    log.info("Initializing model trainer");
    modelTrainer = new ModelTrainer(readModel, null, numTrainThreads, numTopics, numTerms);

    log.info("Initializing topic vector");
    topicVector = new DenseVector(new double[numTopics]);
}

From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    URI[] localFiles = DistributedCache.getCacheFiles(conf);
    Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
            "missing paths from the DistributedCache");

    dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
    sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
    namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
    maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);

    Path dictionaryFile = new Path(localFiles[0].getPath());
    // key is word value is id
    for (Pair<Writable, IntWritable> record : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile,
            true, conf)) {//  w  w w . j  a va2  s  .  c  o  m
        dictionary.put(record.getFirst().toString(), record.getSecond().get());
    }
}

From source file:com.ery.hadoop.mrddx.file.LineReaders.java

License:Apache License

/**
 * Create a line reader that reads from the given stream using the
 * <code>io.file.buffer.size</code> specified in the given
 * <code>Configuration</code>.
 * //  w  ww.  ja  va  2  s  . co m
 * @param in
 *            input stream
 * @param conf
 *            configuration
 * @throws IOException
 */
public LineReaders(InputStream in, Configuration conf) throws IOException {
    this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE),
            conf.getInt(FileConfiguration.INPUT_FILE_SKIP_ROWNUM, 0));
}

From source file:com.ery.hadoop.mrddx.file.LineReaders.java

License:Apache License

/**
 * Create a line reader that reads from the given stream using the
 * <code>io.file.buffer.size</code> specified in the given
 * <code>Configuration</code>, and using a custom delimiter of array of
 * bytes./*from   w  ww.  j  a  va 2 s. c  om*/
 * 
 * @param in
 *            input stream
 * @param conf
 *            configuration
 * @param recordDelimiterBytes
 *            The delimiter
 * @throws IOException
 */
public LineReaders(InputStream in, Configuration conf, byte[] recordDelimiterBytes, int skipNum)
        throws IOException {
    this.in = in;
    this.bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
    this.perFileSkipRowNum = skipNum;
    this.buffer = new byte[this.bufferSize];
    this.recordDelimiterBytes = recordDelimiterBytes;
}

From source file:com.ery.hadoop.mrddx.file.LineRecordReader.java

License:Apache License

public LineRecordReader(Configuration job, FileSplit split) throws IOException {
    this.perFileSkipRowNum = job.getInt(FileConfiguration.INPUT_FILE_SKIP_ROWNUM, 0);
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.job = job;
    this.split = split;
    for (long l : split.getLengths()) {
        totalend += l;/* w w  w.ja va 2 s .  co  m*/
    }
    this.fileEncodeing = job
            .get(MRConfiguration.FILE_CONTENT_ENCODING, MRConfiguration.FILE_CONTENT_ENCODING_DEFAULT)
            .toLowerCase();
    if (this.fileEncodeing.equals("")) {
        this.fileEncodeing = "utf-8";
    }
    this.split.setFileIndex(0);
    this.openFile();
}

From source file:com.ery.hadoop.mrddx.file.LineRecordReader.java

License:Apache License

public LineRecordReader(InputStream in, long offset, long endOffset, Configuration job) throws IOException {
    this.job = job;
    this.perFileSkipRowNum = job.getInt(FileConfiguration.INPUT_FILE_SKIP_ROWNUM, 0);
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.in = new LineReader(in, job);
    this.start = offset;
    this.pos = offset;
    this.end = endOffset;
    this.filePosition = null;
}

From source file:com.ery.hadoop.mrddx.hFile.LineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();/*from w w w. j  a v a 2s  . c o  m*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());

    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            // tar.gzTarInputStream
            // new TarInputStream(codec.createInputStream(fileIn, decompressor)
            String filename = file.getName();
            if (filename.endsWith(".tar.gz")) {
                in = new LineReader(new TarInputStream(cIn), job);
            } else {
                in = new LineReader(cIn, job);
            }
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            String filename = file.getName();
            if (filename.endsWith(".tar.gz")) {
                in = new LineReader(new TarInputStream(codec.createInputStream(fileIn, decompressor)), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new LineReader(fileIn, job);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.ery.server.util.IOUtils.java

License:Apache License

/**
 * Copies from one stream to another. <strong>closes the input and output
 * streams at the end</strong>.//from w  w w .java2 s . c om
 * 
 * @param in
 *            InputStrem to read from
 * @param out
 *            OutputStream to write to
 * @param conf
 *            the Configuration object
 */
public static void copyBytes(InputStream in, OutputStream out, Configuration conf) throws IOException {
    copyBytes(in, out, conf.getInt("io.file.buffer.size", 4096), true);
}

From source file:com.ery.server.util.IOUtils.java

License:Apache License

/**
 * Copies from one stream to another./*from ww  w . ja v a2 s.c  om*/
 * 
 * @param in
 *            InputStrem to read from
 * @param out
 *            OutputStream to write to
 * @param conf
 *            the Configuration object
 * @param close
 *            whether or not close the InputStream and OutputStream at the
 *            end. The streams are closed in the finally clause.
 */
public static void copyBytes(InputStream in, OutputStream out, Configuration conf, boolean close)
        throws IOException {
    copyBytes(in, out, conf.getInt("io.file.buffer.size", 4096), close);
}

From source file:com.facebook.hive.orc.OrcConf.java

License:Open Source License

public static int getIntVar(Configuration conf, ConfVars var) {
    return conf.getInt(var.varname, var.defaultIntVal);
}