List of usage examples for org.apache.hadoop.conf Configuration getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); try {// ww w . j a v a 2s . c o m if (inputCategories == null) { Set<String> newCategories = new HashSet<String>(); DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf, GenericsUtil.getClass(newCategories)); String categoriesStr = setStringifier.toString(newCategories); categoriesStr = conf.get("wikipedia.categories", categoriesStr); inputCategories = setStringifier.fromString(categoriesStr); } exactMatchOnly = conf.getBoolean("exact.match.only", false); if (analyzer == null) { String analyzerStr = conf.get("analyzer.class", WikipediaAnalyzer.class.getName()); Class<? extends Analyzer> analyzerClass = Class.forName(analyzerStr).asSubclass(Analyzer.class); analyzer = analyzerClass.newInstance(); } } catch (IOException ex) { throw new IllegalStateException(ex); } catch (ClassNotFoundException e) { throw new IllegalStateException(e); } catch (IllegalAccessException e) { throw new IllegalStateException(e); } catch (InstantiationException e) { throw new IllegalStateException(e); } log.info("Configure: Input Categories size: {} Exact Match: {} Analyzer: {}", new Object[] { inputCategories.size(), exactMatchOnly, analyzer.getClass().getName() }); }
From source file:org.apache.mahout.classifier.chi_rwcs.mapreduce.Builder.java
License:Apache License
/** * Used only for DEBUG purposes. if false, the mappers doesn't output anything, so the builder has nothing * to process//from www.j a va 2s .com * * @param conf * configuration * @return true if the builder has to return output. false otherwise */ protected static boolean isOutput(Configuration conf) { return conf.getBoolean("debug.mahout.fc.output", true); }
From source file:org.apache.mahout.classifier.df.mapreduce.Builder.java
License:Apache License
/** * Used only for DEBUG purposes. if false, the mappers doesn't output anything, so the builder has nothing * to process/*from w ww . j ava2 s . c om*/ * * @param conf * configuration * @return true if the builder has to return output. false otherwise */ protected static boolean isOutput(Configuration conf) { return conf.getBoolean("debug.mahout.rf.output", true); }
From source file:org.apache.mahout.classifier.df.mapreduce.inmem.InMemInputFormat.java
License:Apache License
/** * Used for DEBUG purposes only. if true and a seed is available, all the mappers use the same seed, thus * all the mapper should take the same time to build their trees. *//*from w ww . j a v a 2 s . c o m*/ private static boolean isSingleSeed(Configuration conf) { return conf.getBoolean("debug.mahout.rf.single.seed", false); }
From source file:org.apache.mahout.classifier.df.mapreduce.OversamplingBuilder.java
License:Apache License
public static boolean isOutput(Configuration conf) { return conf.getBoolean("debug.mahout.preprocessing.output", true); }
From source file:org.apache.mahout.classifier.naivebayes.BayesUtils.java
License:Apache License
public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) { float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f); boolean isComplementary = conf.getBoolean(NaiveBayesModel.COMPLEMENTARY_MODEL, true); // read feature sums and label sums Vector scoresPerLabel = null; Vector scoresPerFeature = null; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) { String key = record.getFirst().toString(); VectorWritable value = record.getSecond(); if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) { scoresPerFeature = value.get(); } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) { scoresPerLabel = value.get(); }/*w w w.j a va 2 s. c om*/ } Preconditions.checkNotNull(scoresPerFeature); Preconditions.checkNotNull(scoresPerLabel); Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size()); for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(), conf)) { scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get()); } // perLabelThetaNormalizer is only used by the complementary model, we do not instantiate it for the standard model Vector perLabelThetaNormalizer = null; if (isComplementary) { perLabelThetaNormalizer = scoresPerLabel.like(); for (Pair<Text, VectorWritable> entry : new SequenceFileDirIterable<Text, VectorWritable>( new Path(base, TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob.LABEL_THETA_NORMALIZER)) { perLabelThetaNormalizer = entry.getSecond().get(); } } Preconditions.checkNotNull(perLabelThetaNormalizer); } return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel, perLabelThetaNormalizer, alphaI, isComplementary); }
From source file:org.apache.mahout.clustering.classify.ClusterClassificationMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); String clustersIn = conf.get(ClusterClassificationConfigKeys.CLUSTERS_IN); threshold = conf.getFloat(ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD, 0.0f); emitMostLikely = conf.getBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, false); clusterModels = Lists.newArrayList(); if (clustersIn != null && !clustersIn.isEmpty()) { Path clustersInPath = new Path(clustersIn); clusterModels = populateClusterModels(clustersInPath, conf); ClusteringPolicy policy = ClusterClassifier.readPolicy(finalClustersPath(clustersInPath)); clusterClassifier = new ClusterClassifier(clusterModels, policy); }/*from ww w. ja va 2 s. c o m*/ clusterId = new IntWritable(); }
From source file:org.apache.mahout.clustering.lda.cvb.CVBConfig.java
License:Apache License
public CVBConfig read(Configuration conf) { setNumTopics(conf.getInt(NUM_TOPICS_PARAM, 0)); setNumTerms(conf.getInt(NUM_TERMS_PARAM, 0)); setAlpha(conf.getFloat(DOC_TOPIC_SMOOTHING_PARAM, 0)); setEta(conf.getFloat(TERM_TOPIC_SMOOTHING_PARAM, 0)); setRandomSeed(conf.getLong(RANDOM_SEED_PARAM, 0)); setTestFraction(conf.getFloat(TEST_SET_FRACTION_PARAM, 0)); setNumTrainThreads(conf.getInt(NUM_TRAIN_THREADS_PARAM, 0)); setNumUpdateThreads(conf.getInt(NUM_UPDATE_THREADS_PARAM, 0)); setMaxItersPerDoc(conf.getInt(MAX_ITERATIONS_PER_DOC_PARAM, 0)); setModelWeight(conf.getFloat(MODEL_WEIGHT_PARAM, 0)); setUseOnlyLabeledDocs(conf.getBoolean(ONLY_LABELED_DOCS_PARAM, false)); setMinRelPreplexityDiff(conf.getFloat(MIN_RELATIVE_PERPLEXITY_DIFF_PARAM, -1)); setMaxInferenceItersPerDoc(conf.getInt(MAX_INFERENCE_ITERATIONS_PER_DOC_PARAM, 0)); check();//from w ww . j ava 2 s. c om return this; }
From source file:org.apache.mahout.clustering.minhash.MinHashMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.numHashFunctions = conf.getInt(MinhashOptionCreator.NUM_HASH_FUNCTIONS, 10); this.minHashValues = new int[numHashFunctions]; this.bytesToHash = new byte[4]; this.keyGroups = conf.getInt(MinhashOptionCreator.KEY_GROUPS, 1); this.minVectorSize = conf.getInt(MinhashOptionCreator.MIN_VECTOR_SIZE, 5); String htype = conf.get(MinhashOptionCreator.HASH_TYPE, "linear"); this.debugOutput = conf.getBoolean(MinhashOptionCreator.DEBUG_OUTPUT, false); HashType hashType;//from w w w. j a va2s.c o m try { hashType = HashType.valueOf(htype); } catch (IllegalArgumentException iae) { log.warn("No valid hash type found in configuration for {}, assuming type: {}", htype, HashType.LINEAR); hashType = HashType.LINEAR; } hashFunction = HashFactory.createHashFunctions(hashType, numHashFunctions); }
From source file:org.apache.mahout.clustering.minhash.MinHashReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); this.minClusterSize = conf.getInt(MinhashOptionCreator.MIN_CLUSTER_SIZE, 5); this.debugOutput = conf.getBoolean(MinhashOptionCreator.DEBUG_OUTPUT, false); }