Example usage for org.apache.hadoop.conf Configuration getBoolean

List of usage examples for org.apache.hadoop.conf Configuration getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:org.apache.mahout.utils.nlp.collocations.llr.LLRReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1);
    this.minLLRValue = conf.getFloat(MIN_LLR, DEFAULT_MIN_LLR);

    this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);

    if (log.isInfoEnabled()) {
        log.info("NGram Total is {}", ngramTotal);
        log.info("Min LLR value is {}", minLLRValue);
        log.info("Emit Unitgrams is {}", emitUnigrams);
    }/*from w w w  .  java 2s  .c  o  m*/

    if (ngramTotal == -1) {
        throw new IllegalStateException("No NGRAM_TOTAL available in job config");
    }
}

From source file:org.apache.mahout.utils.vectors.common.PartialVectorMergeReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING);
    dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
    sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
}

From source file:org.apache.mahout.utils.vectors.text.term.TFPartialVectorReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    try {/* www  .  j  a  va  2 s .c  o m*/
        dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
        sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
        maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);
        URI[] localFiles = DistributedCache.getCacheFiles(conf);
        if (localFiles == null || localFiles.length < 1) {
            throw new IllegalArgumentException("missing paths from the DistributedCache");
        }
        Path dictionaryFile = new Path(localFiles[0].getPath());
        FileSystem fs = dictionaryFile.getFileSystem(conf);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, dictionaryFile, conf);
        Writable key = new Text();
        IntWritable value = new IntWritable();

        // key is word value is id
        while (reader.next(key, value)) {
            dictionary.put(key.toString(), value.get());
        }
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:org.apache.mahout.utils.vectors.tfidf.TFIDFPartialVectorReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    try {/*  ww w  .  ja  v a2s .c  o m*/
        Configuration conf = context.getConfiguration();
        URI[] localFiles = DistributedCache.getCacheFiles(conf);
        if (localFiles == null || localFiles.length < 1) {
            throw new IllegalArgumentException("missing paths from the DistributedCache");
        }

        vectorCount = conf.getLong(TFIDFConverter.VECTOR_COUNT, 1);
        featureCount = conf.getLong(TFIDFConverter.FEATURE_COUNT, 1);
        minDf = conf.getInt(TFIDFConverter.MIN_DF, 1);
        maxDfPercent = conf.getInt(TFIDFConverter.MAX_DF_PERCENTAGE, 99);
        sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);

        Path dictionaryFile = new Path(localFiles[0].getPath());
        FileSystem fs = dictionaryFile.getFileSystem(conf);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, dictionaryFile, conf);
        IntWritable key = new IntWritable();
        LongWritable value = new LongWritable();

        // key is feature, value is the document frequency
        while (reader.next(key, value)) {
            dictionary.put(key.get(), value.get());
        }
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:org.apache.mahout.vectorizer.collocations.llr.LLRReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1);
    this.minLLRValue = conf.getFloat(MIN_LLR, DEFAULT_MIN_LLR);

    this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);

    log.info("NGram Total: {}, Min LLR value: {}, Emit Unigrams: {}", ngramTotal, minLLRValue, emitUnigrams);

    if (ngramTotal == -1) {
        throw new IllegalStateException("No NGRAM_TOTAL available in job config");
    }//from w  ww.  j av a 2s. co m
}

From source file:org.apache.mahout.vectorizer.common.PartialVectorMergeReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING);
    dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
    sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
    namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
    logNormalize = conf.getBoolean(PartialVectorMerger.LOG_NORMALIZE, false);
}

From source file:org.apache.mahout.vectorizer.EncodingMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    sequentialVectors = conf.getBoolean(USE_SEQUENTIAL, false);
    namedVectors = conf.getBoolean(USE_NAMED_VECTORS, false);
    String analyzerName = conf.get(ANALYZER_NAME, StandardAnalyzer.class.getName());
    Analyzer analyzer;//  w w w. jav a 2s  .c  o m
    try {
        analyzer = AnalyzerUtils.createAnalyzer(analyzerName);
    } catch (ClassNotFoundException e) {
        //TODO: hmmm, don't like this approach
        throw new IOException("Unable to create Analyzer for name: " + analyzerName, e);
    }

    String encoderName = conf.get(ENCODER_FIELD_NAME, "text");
    cardinality = conf.getInt(CARDINALITY, 5000);
    String encClass = conf.get(ENCODER_CLASS);
    encoder = ClassUtils.instantiateAs(encClass, FeatureVectorEncoder.class, new Class[] { String.class },
            new Object[] { encoderName });
    if (encoder instanceof LuceneTextValueEncoder) {
        ((LuceneTextValueEncoder) encoder).setAnalyzer(analyzer);
    }
}

From source file:org.apache.mahout.vectorizer.pruner.PrunedPartialVectorMergeReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING);
    logNormalize = conf.getBoolean(PartialVectorMerger.LOG_NORMALIZE, false);
}

From source file:org.apache.mahout.vectorizer.term.TFPartialVectorReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();

    dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
    sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
    namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
    maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);

    URI[] localFiles = DistributedCache.getCacheFiles(conf);
    Path dictionaryFile = HadoopUtil.findInCacheByPartOfFilename(DictionaryVectorizer.DICTIONARY_FILE,
            localFiles);/*  w  w  w .  j  a  va  2  s.  c  o m*/
    // key is word value is id
    for (Pair<Writable, IntWritable> record : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile,
            true, conf)) {
        dictionary.put(record.getFirst().toString(), record.getSecond().get());
    }
}