Example usage for org.apache.hadoop.conf Configuration getBoolean

List of usage examples for org.apache.hadoop.conf Configuration getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansReducer.java

License:Apache License

public static Iterable<Vector> getBestCentroids(List<Centroid> centroids, Configuration conf) {

    if (log.isInfoEnabled()) {
        log.info("Number of Centroids: {}", centroids.size());
    }/* w  w w. j  a v a2 s .c om*/

    int numClusters = conf.getInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1);
    int maxNumIterations = conf.getInt(StreamingKMeansDriver.MAX_NUM_ITERATIONS, 10);
    float trimFraction = conf.getFloat(StreamingKMeansDriver.TRIM_FRACTION, 0.9f);
    boolean kMeansPlusPlusInit = !conf.getBoolean(StreamingKMeansDriver.RANDOM_INIT, false);
    boolean correctWeights = !conf.getBoolean(StreamingKMeansDriver.IGNORE_WEIGHTS, false);
    float testProbability = conf.getFloat(StreamingKMeansDriver.TEST_PROBABILITY, 0.1f);
    int numRuns = conf.getInt(StreamingKMeansDriver.NUM_BALLKMEANS_RUNS, 3);

    BallKMeans ballKMeansCluster = new BallKMeans(StreamingKMeansUtilsMR.searcherFromConfiguration(conf),
            numClusters, maxNumIterations, trimFraction, kMeansPlusPlusInit, correctWeights, testProbability,
            numRuns);
    return ballKMeansCluster.cluster(centroids);
}

From source file:org.apache.mahout.df.mapred.Builder.java

License:Apache License

protected static boolean isOobEstimate(Configuration conf) {
    return conf.getBoolean("mahout.rf.oob", false);
}

From source file:org.apache.mahout.df.mapred.partial.PartialBuilder.java

License:Apache License

/**
 * Indicates if we should run the second step of the builder.<br>
 * This parameter is only meant for debuging, so we keep it protected.
 * //from w  ww  . j  av a 2 s  .com
 * @param conf
 * @return
 */
protected static boolean isStep2(Configuration conf) {
    return conf.getBoolean("debug.mahout.rf.partial.step2", true);
}

From source file:org.apache.mahout.ga.watchmaker.cd.hadoop.DatasetSplit.java

License:Apache License

static boolean isTraining(Configuration conf) {
    if (conf.get(TRAINING) == null) {
        throw new IllegalArgumentException("TRAINING job parameter not found");
    }//from  w  ww. ja v  a 2  s.  c  o  m

    return conf.getBoolean(TRAINING, true);
}

From source file:org.apache.mahout.knn.experimental.StreamingKMeansDriver.java

License:Apache License

/**
 * Iterate over the input vectors to produce clusters and, if requested, use the results of the final iteration to
 * cluster the input vectors.//from w ww . j a  va 2 s.c o m
 *
 * @param input
 *          the directory pathname for input points
 * @param output
 *          the directory pathname for output points
 */
public static void run(Configuration conf, Path input, Path output)
        throws IOException, InterruptedException, ClassNotFoundException {
    log.info("Starting StreamingKMeans clustering for vectors in {}; results are output to {}",
            input.toString(), output.toString());

    // Prepare Job for submission.
    Job job = new Job(conf, "StreamingKMeans");

    // Input and output file format.
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // Mapper output Key and Value classes.
    // We don't really need to output anything as a key, since there will only be 1 reducer.
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(CentroidWritable.class);

    // Reducer output Key and Value classes.
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(CentroidWritable.class);

    // Mapper and Reducer classes.
    job.setMapperClass(StreamingKMeansMapper.class);
    job.setReducerClass(StreamingKMeansReducer.class);

    // There is only one reducer so that the intermediate centroids get collected on one
    // machine and are clustered in memory to get the right number of clusters.
    job.setNumReduceTasks(1);

    // Set input and output paths for the job.
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    // Set the JAR (so that the required libraries are available) and run.
    job.setJarByClass(StreamingKMeansDriver.class);

    // Prevent StreamingKMeans class from logging debug output by default.
    // TODO(dfilimon): Remove this completely and configure using log files.
    if (!conf.getBoolean("logDebug", false)) {
        ((LoggerContext) LoggerFactory.getILoggerFactory()).getLogger(StreamingKMeans.class)
                .setLevel(Level.INFO);
    }

    long start = System.currentTimeMillis();
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("StreamingKMeans interrupted");
    }
    long end = System.currentTimeMillis();

    if (conf.getBoolean("summarize", true)) {
        EvaluateClustering.summarize(conf, output, log);
    }
    log.info("StreamingKMeans clustering complete. Results are in {}. Took {} ms", output.toString(),
            end - start);
}

From source file:org.apache.mahout.text.SequenceFilesFromMailArchivesMapper.java

License:Apache License

@Override
public void setup(Context context) throws IOException, InterruptedException {

    Configuration configuration = context.getConfiguration();

    // absorb all of the options into the MailOptions object
    this.options = new MailOptions();

    options.setPrefix(configuration.get(KEY_PREFIX_OPTION[1], ""));

    if (!configuration.get(CHUNK_SIZE_OPTION[0], "").equals("")) {
        options.setChunkSize(configuration.getInt(CHUNK_SIZE_OPTION[0], 64));
    }/*from w w w.  j a  v  a 2s.  c  om*/

    if (!configuration.get(CHARSET_OPTION[0], "").equals("")) {
        Charset charset = Charset.forName(configuration.get(CHARSET_OPTION[0], "UTF-8"));
        options.setCharset(charset);
    } else {
        Charset charset = Charset.forName("UTF-8");
        options.setCharset(charset);
    }

    List<Pattern> patterns = Lists.newArrayListWithCapacity(5);
    // patternOrder is used downstream so that we can know what order the
    // text is in instead
    // of encoding it in the string, which
    // would require more processing later to remove it pre feature
    // selection.
    Map<String, Integer> patternOrder = Maps.newHashMap();
    int order = 0;
    if (!configuration.get(FROM_OPTION[1], "").equals("")) {
        patterns.add(MailProcessor.FROM_PREFIX);
        patternOrder.put(MailOptions.FROM, order++);
    }

    if (!configuration.get(TO_OPTION[1], "").equals("")) {
        patterns.add(MailProcessor.TO_PREFIX);
        patternOrder.put(MailOptions.TO, order++);
    }

    if (!configuration.get(REFERENCES_OPTION[1], "").equals("")) {
        patterns.add(MailProcessor.REFS_PREFIX);
        patternOrder.put(MailOptions.REFS, order++);
    }

    if (!configuration.get(SUBJECT_OPTION[1], "").equals("")) {
        patterns.add(MailProcessor.SUBJECT_PREFIX);
        patternOrder.put(MailOptions.SUBJECT, order += 1);
    }

    options.setStripQuotedText(configuration.getBoolean(STRIP_QUOTED_OPTION[1], false));

    options.setPatternsToMatch(patterns.toArray(new Pattern[patterns.size()]));
    options.setPatternOrder(patternOrder);

    options.setIncludeBody(configuration.getBoolean(BODY_OPTION[1], false));

    options.setSeparator("\n");
    if (!configuration.get(SEPARATOR_OPTION[1], "").equals("")) {
        options.setSeparator(configuration.get(SEPARATOR_OPTION[1], ""));
    }
    if (!configuration.get(BODY_SEPARATOR_OPTION[1], "").equals("")) {
        options.setBodySeparator(configuration.get(BODY_SEPARATOR_OPTION[1], ""));
    }
    if (!configuration.get(QUOTED_REGEX_OPTION[1], "").equals("")) {
        options.setQuotedTextPattern(Pattern.compile(configuration.get(QUOTED_REGEX_OPTION[1], "")));
    }

}

From source file:org.apache.mahout.text.wikipedia.WikipediaDatasetCreatorMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    Configuration conf = context.getConfiguration();

    if (inputCategories == null) {
        Set<String> newCategories = Sets.newHashSet();
        DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,
                GenericsUtil.getClass(newCategories));
        String categoriesStr = conf.get("wikipedia.categories", setStringifier.toString(newCategories));
        Set<String> inputCategoriesSet = setStringifier.fromString(categoriesStr);
        inputCategories = Lists.newArrayList(inputCategoriesSet);
        inputCategoryPatterns = Lists.newArrayListWithCapacity(inputCategories.size());
        for (String inputCategory : inputCategories) {
            inputCategoryPatterns.add(Pattern.compile(".*\\b" + inputCategory + "\\b.*"));
        }//w  w w.ja  va 2 s .co  m

    }

    exactMatchOnly = conf.getBoolean("exact.match.only", false);

    if (analyzer == null) {
        String analyzerStr = conf.get("analyzer.class", WikipediaAnalyzer.class.getName());
        analyzer = ClassUtils.instantiateAs(analyzerStr, Analyzer.class);
    }

    log.info("Configure: Input Categories size: {} Exact Match: {} Analyzer: {}", inputCategories.size(),
            exactMatchOnly, analyzer.getClass().getName());
}

From source file:org.apache.mahout.text.wikipedia.WikipediaMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();

    Set<String> newCategories = Sets.newHashSet();
    DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,
            GenericsUtil.getClass(newCategories));

    String categoriesStr = conf.get("wikipedia.categories");
    inputCategories = setStringifier.fromString(categoriesStr);
    exactMatchOnly = conf.getBoolean("exact.match.only", false);
    all = conf.getBoolean("all.files", false);
    removeLabels = conf.getBoolean("remove.labels", false);
    log.info("Configure: Input Categories size: {} All: {} Exact Match: {} Remove Labels from Text: {}",
            inputCategories.size(), all, exactMatchOnly, removeLabels);
}

From source file:org.apache.mahout.text.WikipediaMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    try {//from   ww  w .  jav a  2s. co m
        if (inputCategories == null) {
            Set<String> newCategories = new HashSet<String>();

            DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,
                    GenericsUtil.getClass(newCategories));

            String categoriesStr = setStringifier.toString(newCategories);
            categoriesStr = conf.get("wikipedia.categories", categoriesStr);
            inputCategories = setStringifier.fromString(categoriesStr);
        }
        exactMatchOnly = conf.getBoolean("exact.match.only", false);
        all = conf.getBoolean("all.files", true);
    } catch (IOException ex) {
        throw new IllegalStateException(ex);
    }
    log.info("Configure: Input Categories size: {} All: {} Exact Match: {}",
            new Object[] { inputCategories.size(), all, exactMatchOnly });
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.CollocReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration conf = context.getConfiguration();
    this.minSupport = conf.getInt(MIN_SUPPORT, DEFAULT_MIN_SUPPORT);

    boolean emitUnigrams = conf.getBoolean(CollocDriver.EMIT_UNIGRAMS, CollocDriver.DEFAULT_EMIT_UNIGRAMS);

    log.info("Min support is {}", minSupport);
    log.info("Emit Unitgrams is {}", emitUnigrams);
}