Example usage for org.apache.hadoop.conf Configuration getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue)

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    try {// ww  w . j a v a  2s  . c  o  m
        if (inputCategories == null) {
            Set<String> newCategories = new HashSet<String>();

            DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,
                    GenericsUtil.getClass(newCategories));

            String categoriesStr = setStringifier.toString(newCategories);
            categoriesStr = conf.get("wikipedia.categories", categoriesStr);
            inputCategories = setStringifier.fromString(categoriesStr);

        }
        exactMatchOnly = conf.getBoolean("exact.match.only", false);
        if (analyzer == null) {
            String analyzerStr = conf.get("analyzer.class", WikipediaAnalyzer.class.getName());
            Class<? extends Analyzer> analyzerClass = Class.forName(analyzerStr).asSubclass(Analyzer.class);
            analyzer = analyzerClass.newInstance();
        }
    } catch (IOException ex) {
        throw new IllegalStateException(ex);
    } catch (ClassNotFoundException e) {
        throw new IllegalStateException(e);
    } catch (IllegalAccessException e) {
        throw new IllegalStateException(e);
    } catch (InstantiationException e) {
        throw new IllegalStateException(e);
    }
    log.info("Configure: Input Categories size: {} Exact Match: {} Analyzer: {}",
            new Object[] { inputCategories.size(), exactMatchOnly, analyzer.getClass().getName() });
}

From source file:org.apache.mahout.classifier.chi_rwcs.mapreduce.Builder.java

License:Apache License

/**
 * Used only for DEBUG purposes. if false, the mappers doesn't output anything, so the builder has nothing
 * to process//from   www.j  a va  2s .com
 * 
 * @param conf
 *          configuration
 * @return true if the builder has to return output. false otherwise
 */
protected static boolean isOutput(Configuration conf) {
    return conf.getBoolean("debug.mahout.fc.output", true);
}

From source file:org.apache.mahout.classifier.df.mapreduce.Builder.java

License:Apache License

/**
 * Used only for DEBUG purposes. if false, the mappers doesn't output anything, so the builder has nothing
 * to process/*from  w  ww .  j ava2  s  . c om*/
 * 
 * @param conf
 *          configuration
 * @return true if the builder has to return output. false otherwise
 */
protected static boolean isOutput(Configuration conf) {
    return conf.getBoolean("debug.mahout.rf.output", true);
}

From source file:org.apache.mahout.classifier.df.mapreduce.inmem.InMemInputFormat.java

License:Apache License

/**
 * Used for DEBUG purposes only. if true and a seed is available, all the mappers use the same seed, thus
 * all the mapper should take the same time to build their trees.
 *//*from  w  ww .  j  a  v a  2 s  .  c o m*/
private static boolean isSingleSeed(Configuration conf) {
    return conf.getBoolean("debug.mahout.rf.single.seed", false);
}

From source file:org.apache.mahout.classifier.df.mapreduce.OversamplingBuilder.java

License:Apache License

public static boolean isOutput(Configuration conf) {
    return conf.getBoolean("debug.mahout.preprocessing.output", true);
}

From source file:org.apache.mahout.classifier.naivebayes.BayesUtils.java

License:Apache License

public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) {

    float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);
    boolean isComplementary = conf.getBoolean(NaiveBayesModel.COMPLEMENTARY_MODEL, true);

    // read feature sums and label sums
    Vector scoresPerLabel = null;
    Vector scoresPerFeature = null;
    for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) {
        String key = record.getFirst().toString();
        VectorWritable value = record.getSecond();
        if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) {
            scoresPerFeature = value.get();
        } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) {
            scoresPerLabel = value.get();
        }/*w  w  w.j a  va  2 s.  c  om*/
    }

    Preconditions.checkNotNull(scoresPerFeature);
    Preconditions.checkNotNull(scoresPerLabel);

    Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size());
    for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(),
            conf)) {
        scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
    }

    // perLabelThetaNormalizer is only used by the complementary model, we do not instantiate it for the standard model
    Vector perLabelThetaNormalizer = null;
    if (isComplementary) {
        perLabelThetaNormalizer = scoresPerLabel.like();
        for (Pair<Text, VectorWritable> entry : new SequenceFileDirIterable<Text, VectorWritable>(
                new Path(base, TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), conf)) {
            if (entry.getFirst().toString().equals(TrainNaiveBayesJob.LABEL_THETA_NORMALIZER)) {
                perLabelThetaNormalizer = entry.getSecond().get();
            }
        }
        Preconditions.checkNotNull(perLabelThetaNormalizer);
    }

    return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel,
            perLabelThetaNormalizer, alphaI, isComplementary);
}

From source file:org.apache.mahout.clustering.classify.ClusterClassificationMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    Configuration conf = context.getConfiguration();
    String clustersIn = conf.get(ClusterClassificationConfigKeys.CLUSTERS_IN);
    threshold = conf.getFloat(ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD, 0.0f);
    emitMostLikely = conf.getBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, false);

    clusterModels = Lists.newArrayList();

    if (clustersIn != null && !clustersIn.isEmpty()) {
        Path clustersInPath = new Path(clustersIn);
        clusterModels = populateClusterModels(clustersInPath, conf);
        ClusteringPolicy policy = ClusterClassifier.readPolicy(finalClustersPath(clustersInPath));
        clusterClassifier = new ClusterClassifier(clusterModels, policy);
    }/*from  ww w. ja va  2 s.  c o m*/
    clusterId = new IntWritable();
}

From source file:org.apache.mahout.clustering.lda.cvb.CVBConfig.java

License:Apache License

public CVBConfig read(Configuration conf) {
    setNumTopics(conf.getInt(NUM_TOPICS_PARAM, 0));
    setNumTerms(conf.getInt(NUM_TERMS_PARAM, 0));
    setAlpha(conf.getFloat(DOC_TOPIC_SMOOTHING_PARAM, 0));
    setEta(conf.getFloat(TERM_TOPIC_SMOOTHING_PARAM, 0));
    setRandomSeed(conf.getLong(RANDOM_SEED_PARAM, 0));
    setTestFraction(conf.getFloat(TEST_SET_FRACTION_PARAM, 0));
    setNumTrainThreads(conf.getInt(NUM_TRAIN_THREADS_PARAM, 0));
    setNumUpdateThreads(conf.getInt(NUM_UPDATE_THREADS_PARAM, 0));
    setMaxItersPerDoc(conf.getInt(MAX_ITERATIONS_PER_DOC_PARAM, 0));
    setModelWeight(conf.getFloat(MODEL_WEIGHT_PARAM, 0));
    setUseOnlyLabeledDocs(conf.getBoolean(ONLY_LABELED_DOCS_PARAM, false));
    setMinRelPreplexityDiff(conf.getFloat(MIN_RELATIVE_PERPLEXITY_DIFF_PARAM, -1));
    setMaxInferenceItersPerDoc(conf.getInt(MAX_INFERENCE_ITERATIONS_PER_DOC_PARAM, 0));
    check();//from w ww . j  ava 2  s. c  om
    return this;
}

From source file:org.apache.mahout.clustering.minhash.MinHashMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.numHashFunctions = conf.getInt(MinhashOptionCreator.NUM_HASH_FUNCTIONS, 10);
    this.minHashValues = new int[numHashFunctions];
    this.bytesToHash = new byte[4];
    this.keyGroups = conf.getInt(MinhashOptionCreator.KEY_GROUPS, 1);
    this.minVectorSize = conf.getInt(MinhashOptionCreator.MIN_VECTOR_SIZE, 5);
    String htype = conf.get(MinhashOptionCreator.HASH_TYPE, "linear");
    this.debugOutput = conf.getBoolean(MinhashOptionCreator.DEBUG_OUTPUT, false);

    HashType hashType;//from w  w w. j  a  va2s.c o  m
    try {
        hashType = HashType.valueOf(htype);
    } catch (IllegalArgumentException iae) {
        log.warn("No valid hash type found in configuration for {}, assuming type: {}", htype, HashType.LINEAR);
        hashType = HashType.LINEAR;
    }
    hashFunction = HashFactory.createHashFunctions(hashType, numHashFunctions);
}

From source file:org.apache.mahout.clustering.minhash.MinHashReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.minClusterSize = conf.getInt(MinhashOptionCreator.MIN_CLUSTER_SIZE, 5);
    this.debugOutput = conf.getBoolean(MinhashOptionCreator.DEBUG_OUTPUT, false);
}