Example usage for org.apache.hadoop.conf Configuration getFloat

List of usage examples for org.apache.hadoop.conf Configuration getFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getFloat.

Prototype

public float getFloat(String name, float defaultValue) 

Source Link

Document

Get the value of the name property as a float.

Usage

From source file:org.apache.mahout.clustering.lda.cvb.CachingCVB0PerplexityMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    MemoryUtil.startMemoryLogger(5000);//from   w  ww . j ava  2  s.  co m

    log.info("Retrieving configuration");
    Configuration conf = context.getConfiguration();
    float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN);
    float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN);
    long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L);
    random = RandomUtils.getRandom(seed);
    numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1);
    int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1);
    int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1);
    int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4);
    maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10);
    float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f);
    testFraction = conf.getFloat(CVB0Driver.TEST_SET_FRACTION, 0.1f);

    log.info("Initializing read model");
    Path[] modelPaths = CVB0Driver.getModelPaths(conf);
    if (modelPaths != null && modelPaths.length > 0) {
        readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
    } else {
        log.info("No model files found");
        readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null,
                numTrainThreads, modelWeight);
    }

    log.info("Initializing model trainer");
    modelTrainer = new ModelTrainer(readModel, null, numTrainThreads, numTopics, numTerms);

    log.info("Initializing topic vector");
    topicVector = new DenseVector(new double[numTopics]);
}

From source file:org.apache.mahout.clustering.lda.cvb.CVBConfig.java

License:Apache License

public CVBConfig read(Configuration conf) {
    setNumTopics(conf.getInt(NUM_TOPICS_PARAM, 0));
    setNumTerms(conf.getInt(NUM_TERMS_PARAM, 0));
    setAlpha(conf.getFloat(DOC_TOPIC_SMOOTHING_PARAM, 0));
    setEta(conf.getFloat(TERM_TOPIC_SMOOTHING_PARAM, 0));
    setRandomSeed(conf.getLong(RANDOM_SEED_PARAM, 0));
    setTestFraction(conf.getFloat(TEST_SET_FRACTION_PARAM, 0));
    setNumTrainThreads(conf.getInt(NUM_TRAIN_THREADS_PARAM, 0));
    setNumUpdateThreads(conf.getInt(NUM_UPDATE_THREADS_PARAM, 0));
    setMaxItersPerDoc(conf.getInt(MAX_ITERATIONS_PER_DOC_PARAM, 0));
    setModelWeight(conf.getFloat(MODEL_WEIGHT_PARAM, 0));
    setUseOnlyLabeledDocs(conf.getBoolean(ONLY_LABELED_DOCS_PARAM, false));
    setMinRelPreplexityDiff(conf.getFloat(MIN_RELATIVE_PERPLEXITY_DIFF_PARAM, -1));
    setMaxInferenceItersPerDoc(conf.getInt(MAX_INFERENCE_ITERATIONS_PER_DOC_PARAM, 0));
    check();/*from  ww w  .  j  av  a  2  s .  c  o  m*/
    return this;
}

From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansMapper.java

License:Apache License

@Override
public void setup(Context context) {
    // At this point the configuration received from the Driver is assumed to be valid.
    // No other checks are made.
    Configuration conf = context.getConfiguration();
    UpdatableSearcher searcher = StreamingKMeansUtilsMR.searcherFromConfiguration(conf);
    int numClusters = conf.getInt(StreamingKMeansDriver.ESTIMATED_NUM_MAP_CLUSTERS, 1);
    double estimatedDistanceCutoff = conf.getFloat(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF,
            StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF);
    if (estimatedDistanceCutoff == StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF) {
        estimateDistanceCutoff = true;//from w  w  w.j  a v a2  s.c  o  m
        estimatePoints = Lists.newArrayList();
    }
    // There is no way of estimating the distance cutoff unless we have some data.
    clusterer = new StreamingKMeans(searcher, numClusters, estimatedDistanceCutoff);
}

From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansReducer.java

License:Apache License

public static Iterable<Vector> getBestCentroids(List<Centroid> centroids, Configuration conf) {

    if (log.isInfoEnabled()) {
        log.info("Number of Centroids: {}", centroids.size());
    }//from  w  w w .j a v a  2s  .c o  m

    int numClusters = conf.getInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1);
    int maxNumIterations = conf.getInt(StreamingKMeansDriver.MAX_NUM_ITERATIONS, 10);
    float trimFraction = conf.getFloat(StreamingKMeansDriver.TRIM_FRACTION, 0.9f);
    boolean kMeansPlusPlusInit = !conf.getBoolean(StreamingKMeansDriver.RANDOM_INIT, false);
    boolean correctWeights = !conf.getBoolean(StreamingKMeansDriver.IGNORE_WEIGHTS, false);
    float testProbability = conf.getFloat(StreamingKMeansDriver.TEST_PROBABILITY, 0.1f);
    int numRuns = conf.getInt(StreamingKMeansDriver.NUM_BALLKMEANS_RUNS, 3);

    BallKMeans ballKMeansCluster = new BallKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(conf),
            numClusters, maxNumIterations, trimFraction, kMeansPlusPlusInit, correctWeights, testProbability,
            numRuns);
    return ballKMeansCluster.cluster(centroids);
}

From source file:org.apache.mahout.knn.experimental.StreamingKMeansMapper.java

License:Apache License

@Override
public void setup(Context context) {
    // At this point the configuration received from the Driver is assumed to be valid.
    // No other checks are made.
    Configuration conf = context.getConfiguration();
    UpdatableSearcher searcher;/*from  ww w.ja  va  2s .co m*/
    searcher = searcherFromConfiguration(conf);
    int numClusters = conf.getInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1);
    clusterer = new StreamingKMeans(searcher, numClusters,
            conf.getFloat(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF, (float) 10e-6));
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.LLRReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1);
    this.minLLRValue = conf.getFloat(MIN_LLR, DEFAULT_MIN_LLR);

    this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);

    if (log.isInfoEnabled()) {
        log.info("NGram Total is {}", ngramTotal);
        log.info("Min LLR value is {}", minLLRValue);
        log.info("Emit Unitgrams is {}", emitUnigrams);
    }/*w w w.j a v a 2 s.co  m*/

    if (ngramTotal == -1) {
        throw new IllegalStateException("No NGRAM_TOTAL available in job config");
    }
}

From source file:org.apache.mahout.utils.vectors.common.PartialVectorMergeReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING);
    dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
    sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
}

From source file:org.apache.mahout.vectorizer.collocations.llr.LLRReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1);
    this.minLLRValue = conf.getFloat(MIN_LLR, DEFAULT_MIN_LLR);

    this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);

    log.info("NGram Total: {}, Min LLR value: {}, Emit Unigrams: {}", ngramTotal, minLLRValue, emitUnigrams);

    if (ngramTotal == -1) {
        throw new IllegalStateException("No NGRAM_TOTAL available in job config");
    }//  ww  w .  ja  v  a2s  . com
}

From source file:org.apache.mahout.vectorizer.common.PartialVectorMergeReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING);
    dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
    sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
    namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
    logNormalize = conf.getBoolean(PartialVectorMerger.LOG_NORMALIZE, false);
}

From source file:org.apache.mahout.vectorizer.pruner.PrunedPartialVectorMergeReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING);
    logNormalize = conf.getBoolean(PartialVectorMerger.LOG_NORMALIZE, false);
}