public void setFloat(String name, float value) 

Source Link


Set the value of the name property to a float.


From source file:org.apache.hcatalog.mapreduce.TestHCatInputFormat.java

License:Apache License

private boolean runJob(float badRecordThreshold) throws Exception {
    Configuration conf = new Configuration();

    conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold);

    Job job = new Job(conf);


    HCatInputFormat.setInput(job, "default", "test_bad_records");


    job.setNumReduceTasks(0);//w  ww.ja v a2s  .  co  m

    Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output");
    if (path.getFileSystem(conf).exists(path)) {
        path.getFileSystem(conf).delete(path, true);

    TextOutputFormat.setOutputPath(job, path);

    return job.waitForCompletion(true);

From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java

License:Apache License

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
        final TaskAttemptContext context, final OutputCommitter committer)
        throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputdir = ((FileOutputCommitter) committer).getWorkPath();
    final Configuration conf = context.getConfiguration();
    LOG.debug("Task output path: " + outputdir);
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from ww w . j ava  2  s .  c  om

    // create a map from column family to the compression algorithm
    final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
    final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
    final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf);
    final DataBlockEncoding overriddenEncoding;
    if (dataBlockEncodingStr != null) {
        overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
    } else {
        overriddenEncoding = null;

    return new RecordWriter<ImmutableBytesWritable, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            if (row == null && kv == null) {
            byte[] rowKey = CellUtil.cloneRow(kv);
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            WriterLength wl = this.writers.get(family);
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            wl.written += length;
            this.previousRow = rowKey;

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                wl.writer = null;
                wl.written = 0;
            this.rollRequested = false;

        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression)
            HFileContext hFileContext = contextBuilder.build();

            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)

            this.writers.put(family, wl);
            return wl;

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {

From source file:org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesTrainer.java

License:Apache License

public static void trainNaiveBayes(Path input, Configuration conf, Iterable<String> inputLabels, Path output,
        int numReducers, float alphaI, boolean trainComplementary)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.setFloat(ALPHA_I, alphaI);
    Path labelMapPath = createLabelMapFile(inputLabels, conf, new Path(output, LABEL_MAP));
    Path classVectorPath = new Path(output, CLASS_VECTORS);
    runNaiveBayesByLabelSummer(input, conf, labelMapPath, classVectorPath, numReducers);
    Path weightFilePath = new Path(output, SUM_VECTORS);
    runNaiveBayesWeightSummer(classVectorPath, conf, labelMapPath, weightFilePath, numReducers);
    Path thetaFilePath = new Path(output, THETA_SUM);
    if (trainComplementary) {
        runNaiveBayesThetaComplementarySummer(classVectorPath, conf, weightFilePath, thetaFilePath,
                numReducers);//from   w  ww  . jav  a2  s  .c o m
    } else {
        runNaiveBayesThetaSummer(classVectorPath, conf, weightFilePath, thetaFilePath, numReducers);

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainJob.java

License:Apache License

 * Sets the parameters related to this mapper.
 * //  w  ww.j  a v  a2 s .c  o m
 * <p>
 * <ol>
 * <li></li>
 * </ol>
 * @param conf
 * @param lambda
 * @param k
 * @param modelFile
 *          model files store path
 * @param hdfsServer
 *          hdfs server address
public static void setReducerParameters(Configuration conf, double lambda, int k, String modelFile,
        String hdfsServer) {
    // set the columns to be updated
    conf.setFloat(SVMParameters.HADOOP_LAMBDA, (float) lambda);
    conf.setInt(SVMParameters.HADOOP_K, k);
    conf.set(SVMParameters.HADOOP_MODLE_PATH, modelFile);
    conf.set(SVMParameters.HDFS_SERVER, hdfsServer);

From source file:org.apache.mahout.clustering.classify.ClusterClassificationDriver.java

License:Apache License

private static void classifyClusterMR(Configuration conf, Path input, Path clustersIn, Path output,
        Double clusterClassificationThreshold, boolean emitMostLikely)
        throws IOException, InterruptedException, ClassNotFoundException {

    conf.setBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, emitMostLikely);
    conf.set(ClusterClassificationConfigKeys.CLUSTERS_IN, clustersIn.toUri().toString());

    Job job = new Job(conf, "Cluster Classification Driver running over input: " + input);


    job.setNumReduceTasks(0);//  w  ww .ja v a 2 s . c om


    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("Cluster Classification Driver Job failed processing " + input);

From source file:org.apache.mahout.clustering.lda.cvb.CVBConfig.java

License:Apache License

public void write(Configuration conf) {
    conf.setInt(NUM_TOPICS_PARAM, numTopics);
    conf.setInt(NUM_TERMS_PARAM, numTerms);
    conf.setFloat(DOC_TOPIC_SMOOTHING_PARAM, alpha);
    conf.setFloat(TERM_TOPIC_SMOOTHING_PARAM, eta);
    conf.setLong(RANDOM_SEED_PARAM, randomSeed);
    conf.setFloat(TEST_SET_FRACTION_PARAM, testFraction);
    conf.setInt(NUM_TRAIN_THREADS_PARAM, numTrainThreads);
    conf.setInt(NUM_UPDATE_THREADS_PARAM, numUpdateThreads);
    conf.setInt(MAX_ITERATIONS_PER_DOC_PARAM, maxItersPerDoc);
    conf.setFloat(MODEL_WEIGHT_PARAM, modelWeight);
    conf.setBoolean(ONLY_LABELED_DOCS_PARAM, useOnlyLabeledDocs);
    conf.setFloat(MIN_RELATIVE_PERPLEXITY_DIFF_PARAM, minRelPreplexityDiff);
    conf.setInt(MAX_INFERENCE_ITERATIONS_PER_DOC_PARAM, maxInferenceItersPerDoc);

From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansDriver.java

License:Apache License

 * Checks the parameters for a StreamingKMeans job and prepares a Configuration with them.
 * @param conf the Configuration to populate
 * @param numClusters k, the number of clusters at the end
 * @param estimatedNumMapClusters O(k log n), the number of clusters requested from each mapper
 * @param estimatedDistanceCutoff an estimate of the minimum distance that separates two clusters (can be smaller and
 *                                will be increased dynamically)
 * @param maxNumIterations the maximum number of iterations of BallKMeans
 * @param trimFraction the fraction of the points to be considered in updating a ball k-means
 * @param randomInit whether to initialize the ball k-means seeds randomly
 * @param ignoreWeights whether to ignore the invalid final ball k-means weights
 * @param testProbability the percentage of vectors assigned to the test set for selecting the best final centers
 * @param numBallKMeansRuns the number of BallKMeans runs in the reducer that determine the centroids to return
 *                          (clusters are computed for the training set and the error is computed on the test set)
 * @param measureClass string, name of the distance measure class; theory works for Euclidean-like distances
 * @param searcherClass string, name of the searcher that will be used for nearest neighbor search
 * @param searchSize the number of closest neighbors to look at for selecting the closest one in approximate nearest
 *                   neighbor searches//from   w  ww.ja  va 2  s. com
 * @param numProjections the number of projected vectors to use for faster searching (only useful for ProjectionSearch
 *                       or FastProjectionSearch); @see org.apache.mahout.math.neighborhood.ProjectionSearch
public static void configureOptionsForWorkers(Configuration conf, int numClusters,
        /* StreamingKMeans */
        int estimatedNumMapClusters, float estimatedDistanceCutoff,
        /* BallKMeans */
        int maxNumIterations, float trimFraction, boolean randomInit, boolean ignoreWeights,
        float testProbability, int numBallKMeansRuns,
        /* Searcher */
        String measureClass, String searcherClass, int searchSize, int numProjections, String method,
        boolean reduceStreamingKMeans) throws ClassNotFoundException {
    // Checking preconditions for the parameters.
    Preconditions.checkArgument(numClusters > 0,
            "Invalid number of clusters requested: " + numClusters + ". Must be: numClusters > 0!");

    // StreamingKMeans
    Preconditions.checkArgument(estimatedNumMapClusters > numClusters, "Invalid number of estimated map "
            + "clusters; There must be more than the final number of clusters (k log n vs k)");
            estimatedDistanceCutoff == INVALID_DISTANCE_CUTOFF || estimatedDistanceCutoff > 0,
            "estimatedDistanceCutoff must be equal to -1 or must be greater then 0!");

    // BallKMeans
    Preconditions.checkArgument(maxNumIterations > 0, "Must have at least one BallKMeans iteration");
    Preconditions.checkArgument(trimFraction > 0, "trimFraction must be positive");
    Preconditions.checkArgument(testProbability >= 0 && testProbability < 1,
            "test probability is not in the " + "interval [0, 1)");
    Preconditions.checkArgument(numBallKMeansRuns > 0, "numBallKMeans cannot be negative");

    // Searcher
    if (!searcherClass.contains("Brute")) {
        // These tests only make sense when a relevant searcher is being used.
        Preconditions.checkArgument(searchSize > 0, "Invalid searchSize. Must be positive.");
        if (searcherClass.contains("Projection")) {
            Preconditions.checkArgument(numProjections > 0, "Invalid numProjections. Must be positive");

    // Setting the parameters in the Configuration.
    conf.setInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, numClusters);
    /* StreamingKMeans */
    conf.setInt(ESTIMATED_NUM_MAP_CLUSTERS, estimatedNumMapClusters);
    if (estimatedDistanceCutoff != INVALID_DISTANCE_CUTOFF) {
        conf.setFloat(ESTIMATED_DISTANCE_CUTOFF, estimatedDistanceCutoff);
    /* BallKMeans */
    conf.setInt(MAX_NUM_ITERATIONS, maxNumIterations);
    conf.setFloat(TRIM_FRACTION, trimFraction);
    conf.setBoolean(RANDOM_INIT, randomInit);
    conf.setBoolean(IGNORE_WEIGHTS, ignoreWeights);
    conf.setFloat(TEST_PROBABILITY, testProbability);
    conf.setInt(NUM_BALLKMEANS_RUNS, numBallKMeansRuns);
    /* Searcher */
    // Checks if the measureClass is available, throws exception otherwise.
    conf.set(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, measureClass);
    // Checks if the searcherClass is available, throws exception otherwise.
    conf.set(SEARCHER_CLASS_OPTION, searcherClass);
    conf.setInt(SEARCH_SIZE_OPTION, searchSize);
    conf.setInt(NUM_PROJECTIONS_OPTION, numProjections);
    conf.set(DefaultOptionCreator.METHOD_OPTION, method);

    conf.setBoolean(REDUCE_STREAMING_KMEANS, reduceStreamingKMeans);

            "Parameters are: [k] numClusters {}; "
                    + "[SKM] estimatedNumMapClusters {}; estimatedDistanceCutoff {} "
                    + "[BKM] maxNumIterations {}; trimFraction {}; randomInit {}; ignoreWeights {}; "
                    + "testProbability {}; numBallKMeansRuns {}; "
                    + "[S] measureClass {}; searcherClass {}; searcherSize {}; numProjections {}; "
                    + "method {}; reduceStreamingKMeans {}",
            numClusters, estimatedNumMapClusters, estimatedDistanceCutoff, maxNumIterations, trimFraction,
            randomInit, ignoreWeights, testProbability, numBallKMeansRuns, measureClass, searcherClass,
            searchSize, numProjections, method, reduceStreamingKMeans);

From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansTestMR.java

License:Apache License

private void configure(Configuration configuration) {
    configuration.set(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, distanceMeasureClassName);
    configuration.setInt(StreamingKMeansDriver.SEARCH_SIZE_OPTION, SEARCH_SIZE);
    configuration.setInt(StreamingKMeansDriver.NUM_PROJECTIONS_OPTION, NUM_PROJECTIONS);
    configuration.set(StreamingKMeansDriver.SEARCHER_CLASS_OPTION, searcherClassName);
    configuration.setInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1 << NUM_DIMENSIONS);
            (1 << NUM_DIMENSIONS) * (int) Math.log(NUM_DATA_POINTS));
    configuration.setFloat(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF, (float) DISTANCE_CUTOFF);
    configuration.setInt(StreamingKMeansDriver.MAX_NUM_ITERATIONS, MAX_NUM_ITERATIONS);

    // Collapse the Centroids in the reducer.
    configuration.setBoolean(StreamingKMeansDriver.REDUCE_STREAMING_KMEANS, true);

From source file:org.apache.mahout.knn.experimental.StreamingKMeansDriver.java

License:Apache License

public static void configureOptionsForWorkers(Configuration conf, int numClusters, int estimatedNumMapClusters,
        float estimatedDistanceCutoff, String measureClass, String searcherClass, int searchSize,
        int numProjections, int maxNumIterations) {
    conf.setInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, numClusters);
    conf.setInt(ESTIMATED_NUM_MAP_CLUSTERS, estimatedNumMapClusters);
    conf.setFloat(ESTIMATED_DISTANCE_CUTOFF, estimatedDistanceCutoff);
    try {//from  w  w w .j av a  2s . com
    } catch (ClassNotFoundException e) {
        log.error("Measure class not found " + measureClass, e);
    conf.set(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, measureClass);
    try {
    } catch (ClassNotFoundException e) {
        log.error("Searcher class not found " + measureClass, e);
    conf.set(SEARCHER_CLASS_OPTION, searcherClass);
    conf.setInt(SEARCH_SIZE_OPTION, searchSize);
    conf.setInt(NUM_PROJECTIONS_OPTION, numProjections);
    conf.setInt(MAX_NUM_ITERATIONS, maxNumIterations);
            "Parameters are: numClusters {}; estimatedNumMapClusters {}; estimatedDistanceCutoff"
                    + " {}; measureClass {}; searcherClass {}; searcherSize {}; numProjections {}; "
                    + "maxNumIterations {}",
            numClusters, estimatedNumMapClusters, estimatedDistanceCutoff, measureClass, searcherClass,
            searchSize, numProjections, maxNumIterations);

From source file:org.apache.mahout.regression.penalizedlinear.LinearCrossValidation.java

License:Apache License

private void runPenalizedLinear() throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = getConf();
    conf.setInt(PenalizedLinearKeySet.NUM_CV, parameter.numOfCV);
    conf.setFloat(PenalizedLinearKeySet.ALPHA, parameter.alpha);
    conf.set(PenalizedLinearKeySet.LAMBDA, parameter.lambda);
    conf.setBoolean(PenalizedLinearKeySet.INTERCEPT, parameter.intercept);

    Job job = new Job(conf, "Penalized Linear Regression Driver running over input: " + input);
    job.setNumReduceTasks(1);/*from   w  w w .  j  a  v a  2s  . com*/

    FileInputFormat.addInputPath(job, new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(output, "output"));
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("Penalized Linear Regression Job failed processing " + input);
    solver = new PenalizedLinearSolver();
    solver.initSolver(new Path(output, "output"), getConf());
    printInfo(parameter, solver);