Example usage for org.apache.hadoop.conf Configuration setLong

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setLong.

Prototype

public void setLong(String name, long value)

Source Link

Document

Set the value of the name property to a long.

Usage

From source file:org.apache.lens.lib.query.TestFileSerdeFormatter.java

License:Apache License

/**
 * Test csv with zip formatter.//from  w  w  w. ja  v a 2s  .c o  m
 *
 * @throws IOException Signals that an I/O exception has occurred.
 */
@Test
public void testCSVWithZipFormatter() throws IOException {
    Configuration conf = new Configuration();
    setConf(conf);
    conf.setBoolean(LensConfConstants.RESULT_SPLIT_INTO_MULTIPLE, true);
    conf.setLong(LensConfConstants.RESULT_SPLIT_MULTIPLE_MAX_ROWS, 2L);
    testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".zip", getMockedResultSet());
    // validate rows
    List<String> actual = readZipOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8");
    Assert.assertEquals(actual, getExpectedCSVRowsWithMultiple());
}

From source file:org.apache.mahout.classifier.df.mapreduce.Builder.java

License:Apache License

/**
 * Sets the random seed value/*from  ww  w.  j  a v a 2  s . com*/
 * 
 * @param conf
 *          configuration
 * @param seed
 *          random seed
 */
private static void setRandomSeed(Configuration conf, long seed) {
    conf.setLong("mahout.rf.random.seed", seed);
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainJob.java

License:Apache License

/**
 * Sets the parameters related to this mapper.
 * /* www . java 2s .c  o m*/
 * <p>
 * <ol>
 * <li></li>
 * </ol>
 * 
 * @param conf
 * @param maxIteration
 * @param samplesSize
 *          the size of training samples.
 * @param classNum
 *          the number of classes
 * @param classificationType
 * @param startingClassIndex
 */
public static void setMapperParameters(Configuration conf, int maxIteration, long samplesSize, int classNum,
        int classificationType, int startingClassIndex) {
    // set the columns to be updated
    conf.setInt(SVMParameters.HADOOP_MAX_ITERATION, maxIteration);
    conf.setLong(SVMParameters.HADOOP_SAMPLE_NUMBER, samplesSize);
    conf.setInt(SVMParameters.HADOOP_CLASS_NUMBER, classNum);
    conf.setInt(SVMParameters.HADOOP_MULTI_CLASS_TYPE, classificationType);
    conf.setInt(SVMParameters.HADOOP_STARTING_CLASS_INDEX, startingClassIndex);
}

From source file:org.apache.mahout.clustering.lda.cvb.CVBConfig.java

License:Apache License

public void write(Configuration conf) {
    conf.setInt(NUM_TOPICS_PARAM, numTopics);
    conf.setInt(NUM_TERMS_PARAM, numTerms);
    conf.setFloat(DOC_TOPIC_SMOOTHING_PARAM, alpha);
    conf.setFloat(TERM_TOPIC_SMOOTHING_PARAM, eta);
    conf.setLong(RANDOM_SEED_PARAM, randomSeed);
    conf.setFloat(TEST_SET_FRACTION_PARAM, testFraction);
    conf.setInt(NUM_TRAIN_THREADS_PARAM, numTrainThreads);
    conf.setInt(NUM_UPDATE_THREADS_PARAM, numUpdateThreads);
    conf.setInt(MAX_ITERATIONS_PER_DOC_PARAM, maxItersPerDoc);
    conf.setFloat(MODEL_WEIGHT_PARAM, modelWeight);
    conf.setBoolean(ONLY_LABELED_DOCS_PARAM, useOnlyLabeledDocs);
    conf.setFloat(MIN_RELATIVE_PERPLEXITY_DIFF_PARAM, minRelPreplexityDiff);
    conf.setInt(MAX_INFERENCE_ITERATIONS_PER_DOC_PARAM, maxInferenceItersPerDoc);
}

From source file:org.apache.mahout.df.mapreduce.partial.Step0JobTest.java

License:Apache License

/**
 * Computes the "mapred.max.split.size" that will generate the desired number
 * of input splits// www  .j  a v  a  2  s .  com
 * 
 * @param conf
 * @param inputPath
 * @param numMaps desired number of input splits
 * @throws Exception
 */
public static void setMaxSplitSize(Configuration conf, Path inputPath, int numMaps) throws IOException {
    FileSystem fs = inputPath.getFileSystem(conf);
    FileStatus status = fs.getFileStatus(inputPath);
    long goalSize = status.getLen() / numMaps;
    conf.setLong("mapred.max.split.size", goalSize);
}

From source file:org.apache.mahout.fpm.bigfim.BigFIMDriver.java

License:Apache License

private static void setConfigurationValues(Configuration conf, Config config) {
    conf.setInt(MIN_SUP_KEY, config.getMinSup());
    conf.setInt(NUMBER_OF_MAPPERS_KEY, config.getNumberOfMappers());
    conf.setInt(PREFIX_LENGTH_KEY, config.getPrefixLength());

    conf.setLong(MAPRED_TASK_TIMEOUT_KEY, config.getMapredTaskTimeout());

    conf.setBoolean(CLOSED_SETS_OPTIMIZATION_KEY, config.getClosedSetsOptimization());
    conf.setBoolean(WRITE_SETS_KEY, config.getWriteSets());
}

From source file:org.apache.mahout.fpm.bigfim.BigFIMDriver.java

License:Apache License

private static long startAprioriPhase(String inputFile, String outputFile, Config config)
        throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    long nrLines = -1;
    int prefixSize = config.getPrefixLength();
    for (int i = 1; i <= prefixSize; i++) {
        String outputDir = outputFile + separator + "ap" + i;
        String cacheFile = outputFile + separator + "ap" + (i - 1) + separator + "part-r-00000";
        System.out.println("[AprioriPhase]: Phase: " + i + " input: " + inputFile + ", output: " + outputFile);

        Configuration conf = new Configuration();
        setConfigurationValues(conf, config);
        if (nrLines != -1) {
            conf.setLong(Config.NUMBER_OF_LINES_KEY, nrLines);
        }/*from w  w  w .j  a  v  a  2s . c o m*/

        Job job = new Job(conf, "Apriori Phase" + i);
        job.setJarByClass(BigFIMDriver.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setMapperClass(AprioriPhaseMapper.class);
        job.setReducerClass(AprioriPhaseReducer.class);

        job.setInputFormatClass(SplitByNumberOfMappersTextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setNumReduceTasks(1);

        FileInputFormat.addInputPath(job, new Path(inputFile));
        FileOutputFormat.setOutputPath(job, new Path(outputDir));

        if (i > 1) {
            DistributedCache.addCacheFile(new URI(cacheFile), job.getConfiguration());
        }

        long start = System.currentTimeMillis();
        job.waitForCompletion(true);
        long end = System.currentTimeMillis();
        System.out.println("Job Apriori Phase " + i + " took " + (end - start) / 1000 + "s");

        if (i == 1) {
            nrLines = job.getCounters().findCounter(Task.Counter.MAP_INPUT_RECORDS).getValue();
        }
    }
    return nrLines;
}

From source file:org.apache.mahout.fpm.bigfim.BigFIMDriver.java

License:Apache License

private static void startCreatePrefixGroups(String inputFile, String outputDir, Config config, long nrLines)
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    String cacheFile = outputDir + separator + "ap" + config.getPrefixLength() + separator + "part-r-00000";
    String outputFile = outputDir + separator + "pg";
    System.out.println("[CreatePrefixGroups]: input: " + inputFile + ", output: " + outputDir);

    Configuration conf = new Configuration();
    setConfigurationValues(conf, config);
    int subDbSize = (int) Math.ceil(1.0 * nrLines / config.getNumberOfMappers());
    conf.setLong(NUMBER_OF_LINES_KEY, nrLines);
    conf.setInt(Config.SUBDB_SIZE, subDbSize);

    Job job = new Job(conf, "Create Prefix Groups");
    job.setJarByClass(BigFIMDriver.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntArrayWritable.class);
    job.setOutputKeyClass(IntArrayWritable.class);
    job.setOutputValueClass(IntArrayWritable.class);

    job.setMapperClass(ComputeTidListMapper.class);
    job.setReducerClass(ComputeTidListReducer.class);

    job.setInputFormatClass(SplitByNumberOfMappersTextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setNumReduceTasks(1);//w w  w  . j a va  2 s .co m

    FileInputFormat.addInputPath(job, new Path(inputFile));
    FileOutputFormat.setOutputPath(job, new Path(outputFile));

    DistributedCache.addCacheFile(new URI(cacheFile), job.getConfiguration());

    long start = System.currentTimeMillis();
    job.waitForCompletion(true);
    long end = System.currentTimeMillis();
    System.out.println("Job Prefix Creation took " + (end - start) / 1000 + "s");
}

From source file:org.apache.mahout.fpm.disteclat.DistEclatDriver.java

License:Apache License

/**
 * Passes all configuration flags to the Hadoop Configuration framework
 * /*from  w  w w  .j ava2s  . c  om*/
 * @param conf
 *          the Hadoop configuration
 * @param config
 *          the configuration that has user-defined flags
 */
private static void setConfigurationValues(Configuration conf, Config config) {
    conf.setInt(MIN_SUP_KEY, config.getMinSup());
    conf.setInt(NUMBER_OF_MAPPERS_KEY, config.getNumberOfMappers());
    conf.setInt(PREFIX_LENGTH_KEY, config.getPrefixLength());

    conf.setLong(MAPRED_TASK_TIMEOUT_KEY, config.getMapredTaskTimeout());

    conf.setBoolean(CLOSED_SETS_OPTIMIZATION_KEY, config.getClosedSetsOptimization());
    conf.setBoolean(WRITE_SETS_KEY, config.getWriteSets());
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.CollocDriver.java

License:Apache License

/**
 * pass2: perform the LLR calculation/*from  w ww.  j  ava 2 s . com*/
 */
private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal,
        boolean emitUnigrams, float minLLRValue, int reduceTasks)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration(baseConf);
    conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal);
    conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);

    Job job = new Job(conf);
    job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output);
    job.setJarByClass(CollocDriver.class);

    job.setMapOutputKeyClass(Gram.class);
    job.setMapOutputValueClass(Gram.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY));
    Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY);
    FileOutputFormat.setOutputPath(job, outPath);

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setReducerClass(LLRReducer.class);
    job.setNumReduceTasks(reduceTasks);

    conf.setFloat(LLRReducer.MIN_LLR, minLLRValue);
    job.waitForCompletion(true);
}