List of usage examples for org.apache.hadoop.conf Configuration getFloat
public float getFloat(String name, float defaultValue)
name
property as a float
. From source file:org.apache.mahout.clustering.lda.cvb.CachingCVB0PerplexityMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { MemoryUtil.startMemoryLogger(5000);//from w ww . j ava 2 s. co m log.info("Retrieving configuration"); Configuration conf = context.getConfiguration(); float eta = conf.getFloat(CVB0Driver.TERM_TOPIC_SMOOTHING, Float.NaN); float alpha = conf.getFloat(CVB0Driver.DOC_TOPIC_SMOOTHING, Float.NaN); long seed = conf.getLong(CVB0Driver.RANDOM_SEED, 1234L); random = RandomUtils.getRandom(seed); numTopics = conf.getInt(CVB0Driver.NUM_TOPICS, -1); int numTerms = conf.getInt(CVB0Driver.NUM_TERMS, -1); int numUpdateThreads = conf.getInt(CVB0Driver.NUM_UPDATE_THREADS, 1); int numTrainThreads = conf.getInt(CVB0Driver.NUM_TRAIN_THREADS, 4); maxIters = conf.getInt(CVB0Driver.MAX_ITERATIONS_PER_DOC, 10); float modelWeight = conf.getFloat(CVB0Driver.MODEL_WEIGHT, 1.0f); testFraction = conf.getFloat(CVB0Driver.TEST_SET_FRACTION, 0.1f); log.info("Initializing read model"); Path[] modelPaths = CVB0Driver.getModelPaths(conf); if (modelPaths != null && modelPaths.length > 0) { readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths); } else { log.info("No model files found"); readModel = new TopicModel(numTopics, numTerms, eta, alpha, RandomUtils.getRandom(seed), null, numTrainThreads, modelWeight); } log.info("Initializing model trainer"); modelTrainer = new ModelTrainer(readModel, null, numTrainThreads, numTopics, numTerms); log.info("Initializing topic vector"); topicVector = new DenseVector(new double[numTopics]); }
From source file:org.apache.mahout.clustering.lda.cvb.CVBConfig.java
License:Apache License
public CVBConfig read(Configuration conf) { setNumTopics(conf.getInt(NUM_TOPICS_PARAM, 0)); setNumTerms(conf.getInt(NUM_TERMS_PARAM, 0)); setAlpha(conf.getFloat(DOC_TOPIC_SMOOTHING_PARAM, 0)); setEta(conf.getFloat(TERM_TOPIC_SMOOTHING_PARAM, 0)); setRandomSeed(conf.getLong(RANDOM_SEED_PARAM, 0)); setTestFraction(conf.getFloat(TEST_SET_FRACTION_PARAM, 0)); setNumTrainThreads(conf.getInt(NUM_TRAIN_THREADS_PARAM, 0)); setNumUpdateThreads(conf.getInt(NUM_UPDATE_THREADS_PARAM, 0)); setMaxItersPerDoc(conf.getInt(MAX_ITERATIONS_PER_DOC_PARAM, 0)); setModelWeight(conf.getFloat(MODEL_WEIGHT_PARAM, 0)); setUseOnlyLabeledDocs(conf.getBoolean(ONLY_LABELED_DOCS_PARAM, false)); setMinRelPreplexityDiff(conf.getFloat(MIN_RELATIVE_PERPLEXITY_DIFF_PARAM, -1)); setMaxInferenceItersPerDoc(conf.getInt(MAX_INFERENCE_ITERATIONS_PER_DOC_PARAM, 0)); check();/*from ww w . j av a 2 s . c o m*/ return this; }
From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansMapper.java
License:Apache License
@Override public void setup(Context context) { // At this point the configuration received from the Driver is assumed to be valid. // No other checks are made. Configuration conf = context.getConfiguration(); UpdatableSearcher searcher = StreamingKMeansUtilsMR.searcherFromConfiguration(conf); int numClusters = conf.getInt(StreamingKMeansDriver.ESTIMATED_NUM_MAP_CLUSTERS, 1); double estimatedDistanceCutoff = conf.getFloat(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF, StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF); if (estimatedDistanceCutoff == StreamingKMeansDriver.INVALID_DISTANCE_CUTOFF) { estimateDistanceCutoff = true;//from w w w.j a v a2 s.c o m estimatePoints = Lists.newArrayList(); } // There is no way of estimating the distance cutoff unless we have some data. clusterer = new StreamingKMeans(searcher, numClusters, estimatedDistanceCutoff); }
From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansReducer.java
License:Apache License
public static Iterable<Vector> getBestCentroids(List<Centroid> centroids, Configuration conf) { if (log.isInfoEnabled()) { log.info("Number of Centroids: {}", centroids.size()); }//from w w w .j a v a 2s .c o m int numClusters = conf.getInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1); int maxNumIterations = conf.getInt(StreamingKMeansDriver.MAX_NUM_ITERATIONS, 10); float trimFraction = conf.getFloat(StreamingKMeansDriver.TRIM_FRACTION, 0.9f); boolean kMeansPlusPlusInit = !conf.getBoolean(StreamingKMeansDriver.RANDOM_INIT, false); boolean correctWeights = !conf.getBoolean(StreamingKMeansDriver.IGNORE_WEIGHTS, false); float testProbability = conf.getFloat(StreamingKMeansDriver.TEST_PROBABILITY, 0.1f); int numRuns = conf.getInt(StreamingKMeansDriver.NUM_BALLKMEANS_RUNS, 3); BallKMeans ballKMeansCluster = new BallKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(conf), numClusters, maxNumIterations, trimFraction, kMeansPlusPlusInit, correctWeights, testProbability, numRuns); return ballKMeansCluster.cluster(centroids); }
From source file:org.apache.mahout.knn.experimental.StreamingKMeansMapper.java
License:Apache License
@Override public void setup(Context context) { // At this point the configuration received from the Driver is assumed to be valid. // No other checks are made. Configuration conf = context.getConfiguration(); UpdatableSearcher searcher;/*from ww w.ja va 2s .co m*/ searcher = searcherFromConfiguration(conf); int numClusters = conf.getInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1); clusterer = new StreamingKMeans(searcher, numClusters, conf.getFloat(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF, (float) 10e-6)); }
From source file:org.apache.mahout.utils.nlp.collocations.llr.LLRReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1); this.minLLRValue = conf.getFloat(MIN_LLR, DEFAULT_MIN_LLR); this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS); if (log.isInfoEnabled()) { log.info("NGram Total is {}", ngramTotal); log.info("Min LLR value is {}", minLLRValue); log.info("Emit Unitgrams is {}", emitUnigrams); }/*w w w.j a v a 2 s.co m*/ if (ngramTotal == -1) { throw new IllegalStateException("No NGRAM_TOTAL available in job config"); } }
From source file:org.apache.mahout.utils.vectors.common.PartialVectorMergeReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING); dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE); sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false); }
From source file:org.apache.mahout.vectorizer.collocations.llr.LLRReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.ngramTotal = conf.getLong(NGRAM_TOTAL, -1); this.minLLRValue = conf.getFloat(MIN_LLR, DEFAULT_MIN_LLR); this.emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS); log.info("NGram Total: {}, Min LLR value: {}, Emit Unigrams: {}", ngramTotal, minLLRValue, emitUnigrams); if (ngramTotal == -1) { throw new IllegalStateException("No NGRAM_TOTAL available in job config"); }// ww w . ja v a2s . com }
From source file:org.apache.mahout.vectorizer.common.PartialVectorMergeReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING); dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE); sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false); namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false); logNormalize = conf.getBoolean(PartialVectorMerger.LOG_NORMALIZE, false); }
From source file:org.apache.mahout.vectorizer.pruner.PrunedPartialVectorMergeReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); normPower = conf.getFloat(PartialVectorMerger.NORMALIZATION_POWER, PartialVectorMerger.NO_NORMALIZING); logNormalize = conf.getBoolean(PartialVectorMerger.LOG_NORMALIZE, false); }