List of usage examples for org.apache.hadoop.conf Configuration setLong
public void setLong(String name, long value)
name
property to a long
. From source file:org.apache.lens.lib.query.TestFileSerdeFormatter.java
License:Apache License
/** * Test csv with zip formatter.//from w w w. ja v a 2s .c o m * * @throws IOException Signals that an I/O exception has occurred. */ @Test public void testCSVWithZipFormatter() throws IOException { Configuration conf = new Configuration(); setConf(conf); conf.setBoolean(LensConfConstants.RESULT_SPLIT_INTO_MULTIPLE, true); conf.setLong(LensConfConstants.RESULT_SPLIT_MULTIPLE_MAX_ROWS, 2L); testFormatter(conf, "UTF8", LensConfConstants.RESULT_SET_PARENT_DIR_DEFAULT, ".zip", getMockedResultSet()); // validate rows List<String> actual = readZipOutputFile(new Path(formatter.getFinalOutputPath()), conf, "UTF-8"); Assert.assertEquals(actual, getExpectedCSVRowsWithMultiple()); }
From source file:org.apache.mahout.classifier.df.mapreduce.Builder.java
License:Apache License
/** * Sets the random seed value/*from ww w. j a v a 2 s . com*/ * * @param conf * configuration * @param seed * random seed */ private static void setRandomSeed(Configuration conf, long seed) { conf.setLong("mahout.rf.random.seed", seed); }
From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainJob.java
License:Apache License
/** * Sets the parameters related to this mapper. * /* www . java 2s .c o m*/ * <p> * <ol> * <li></li> * </ol> * * @param conf * @param maxIteration * @param samplesSize * the size of training samples. * @param classNum * the number of classes * @param classificationType * @param startingClassIndex */ public static void setMapperParameters(Configuration conf, int maxIteration, long samplesSize, int classNum, int classificationType, int startingClassIndex) { // set the columns to be updated conf.setInt(SVMParameters.HADOOP_MAX_ITERATION, maxIteration); conf.setLong(SVMParameters.HADOOP_SAMPLE_NUMBER, samplesSize); conf.setInt(SVMParameters.HADOOP_CLASS_NUMBER, classNum); conf.setInt(SVMParameters.HADOOP_MULTI_CLASS_TYPE, classificationType); conf.setInt(SVMParameters.HADOOP_STARTING_CLASS_INDEX, startingClassIndex); }
From source file:org.apache.mahout.clustering.lda.cvb.CVBConfig.java
License:Apache License
public void write(Configuration conf) { conf.setInt(NUM_TOPICS_PARAM, numTopics); conf.setInt(NUM_TERMS_PARAM, numTerms); conf.setFloat(DOC_TOPIC_SMOOTHING_PARAM, alpha); conf.setFloat(TERM_TOPIC_SMOOTHING_PARAM, eta); conf.setLong(RANDOM_SEED_PARAM, randomSeed); conf.setFloat(TEST_SET_FRACTION_PARAM, testFraction); conf.setInt(NUM_TRAIN_THREADS_PARAM, numTrainThreads); conf.setInt(NUM_UPDATE_THREADS_PARAM, numUpdateThreads); conf.setInt(MAX_ITERATIONS_PER_DOC_PARAM, maxItersPerDoc); conf.setFloat(MODEL_WEIGHT_PARAM, modelWeight); conf.setBoolean(ONLY_LABELED_DOCS_PARAM, useOnlyLabeledDocs); conf.setFloat(MIN_RELATIVE_PERPLEXITY_DIFF_PARAM, minRelPreplexityDiff); conf.setInt(MAX_INFERENCE_ITERATIONS_PER_DOC_PARAM, maxInferenceItersPerDoc); }
From source file:org.apache.mahout.df.mapreduce.partial.Step0JobTest.java
License:Apache License
/** * Computes the "mapred.max.split.size" that will generate the desired number * of input splits// www .j a v a 2 s . com * * @param conf * @param inputPath * @param numMaps desired number of input splits * @throws Exception */ public static void setMaxSplitSize(Configuration conf, Path inputPath, int numMaps) throws IOException { FileSystem fs = inputPath.getFileSystem(conf); FileStatus status = fs.getFileStatus(inputPath); long goalSize = status.getLen() / numMaps; conf.setLong("mapred.max.split.size", goalSize); }
From source file:org.apache.mahout.fpm.bigfim.BigFIMDriver.java
License:Apache License
private static void setConfigurationValues(Configuration conf, Config config) { conf.setInt(MIN_SUP_KEY, config.getMinSup()); conf.setInt(NUMBER_OF_MAPPERS_KEY, config.getNumberOfMappers()); conf.setInt(PREFIX_LENGTH_KEY, config.getPrefixLength()); conf.setLong(MAPRED_TASK_TIMEOUT_KEY, config.getMapredTaskTimeout()); conf.setBoolean(CLOSED_SETS_OPTIMIZATION_KEY, config.getClosedSetsOptimization()); conf.setBoolean(WRITE_SETS_KEY, config.getWriteSets()); }
From source file:org.apache.mahout.fpm.bigfim.BigFIMDriver.java
License:Apache License
private static long startAprioriPhase(String inputFile, String outputFile, Config config) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { long nrLines = -1; int prefixSize = config.getPrefixLength(); for (int i = 1; i <= prefixSize; i++) { String outputDir = outputFile + separator + "ap" + i; String cacheFile = outputFile + separator + "ap" + (i - 1) + separator + "part-r-00000"; System.out.println("[AprioriPhase]: Phase: " + i + " input: " + inputFile + ", output: " + outputFile); Configuration conf = new Configuration(); setConfigurationValues(conf, config); if (nrLines != -1) { conf.setLong(Config.NUMBER_OF_LINES_KEY, nrLines); }/*from w w w .j a v a 2s . c o m*/ Job job = new Job(conf, "Apriori Phase" + i); job.setJarByClass(BigFIMDriver.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(AprioriPhaseMapper.class); job.setReducerClass(AprioriPhaseReducer.class); job.setInputFormatClass(SplitByNumberOfMappersTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputFile)); FileOutputFormat.setOutputPath(job, new Path(outputDir)); if (i > 1) { DistributedCache.addCacheFile(new URI(cacheFile), job.getConfiguration()); } long start = System.currentTimeMillis(); job.waitForCompletion(true); long end = System.currentTimeMillis(); System.out.println("Job Apriori Phase " + i + " took " + (end - start) / 1000 + "s"); if (i == 1) { nrLines = job.getCounters().findCounter(Task.Counter.MAP_INPUT_RECORDS).getValue(); } } return nrLines; }
From source file:org.apache.mahout.fpm.bigfim.BigFIMDriver.java
License:Apache License
private static void startCreatePrefixGroups(String inputFile, String outputDir, Config config, long nrLines) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { String cacheFile = outputDir + separator + "ap" + config.getPrefixLength() + separator + "part-r-00000"; String outputFile = outputDir + separator + "pg"; System.out.println("[CreatePrefixGroups]: input: " + inputFile + ", output: " + outputDir); Configuration conf = new Configuration(); setConfigurationValues(conf, config); int subDbSize = (int) Math.ceil(1.0 * nrLines / config.getNumberOfMappers()); conf.setLong(NUMBER_OF_LINES_KEY, nrLines); conf.setInt(Config.SUBDB_SIZE, subDbSize); Job job = new Job(conf, "Create Prefix Groups"); job.setJarByClass(BigFIMDriver.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntArrayWritable.class); job.setOutputKeyClass(IntArrayWritable.class); job.setOutputValueClass(IntArrayWritable.class); job.setMapperClass(ComputeTidListMapper.class); job.setReducerClass(ComputeTidListReducer.class); job.setInputFormatClass(SplitByNumberOfMappersTextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(1);//w w w . j a va 2 s .co m FileInputFormat.addInputPath(job, new Path(inputFile)); FileOutputFormat.setOutputPath(job, new Path(outputFile)); DistributedCache.addCacheFile(new URI(cacheFile), job.getConfiguration()); long start = System.currentTimeMillis(); job.waitForCompletion(true); long end = System.currentTimeMillis(); System.out.println("Job Prefix Creation took " + (end - start) / 1000 + "s"); }
From source file:org.apache.mahout.fpm.disteclat.DistEclatDriver.java
License:Apache License
/** * Passes all configuration flags to the Hadoop Configuration framework * /*from w w w .j ava2s . c om*/ * @param conf * the Hadoop configuration * @param config * the configuration that has user-defined flags */ private static void setConfigurationValues(Configuration conf, Config config) { conf.setInt(MIN_SUP_KEY, config.getMinSup()); conf.setInt(NUMBER_OF_MAPPERS_KEY, config.getNumberOfMappers()); conf.setInt(PREFIX_LENGTH_KEY, config.getPrefixLength()); conf.setLong(MAPRED_TASK_TIMEOUT_KEY, config.getMapredTaskTimeout()); conf.setBoolean(CLOSED_SETS_OPTIMIZATION_KEY, config.getClosedSetsOptimization()); conf.setBoolean(WRITE_SETS_KEY, config.getWriteSets()); }
From source file:org.apache.mahout.utils.nlp.collocations.llr.CollocDriver.java
License:Apache License
/** * pass2: perform the LLR calculation/*from w ww. j ava 2 s . com*/ */ private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal, boolean emitUnigrams, float minLLRValue, int reduceTasks) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal); conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams); Job job = new Job(conf); job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(Gram.class); job.setMapOutputValueClass(Gram.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY)); Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setReducerClass(LLRReducer.class); job.setNumReduceTasks(reduceTasks); conf.setFloat(LLRReducer.MIN_LLR, minLLRValue); job.waitForCompletion(true); }