Example usage for org.apache.hadoop.conf Configuration setFloat

List of usage examples for org.apache.hadoop.conf Configuration setFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setFloat.

Prototype

public void setFloat(String name, float value) 

Source Link

Document

Set the value of the name property to a float.

Usage

From source file:org.apache.hcatalog.mapreduce.TestHCatInputFormat.java

License:Apache License

private boolean runJob(float badRecordThreshold) throws Exception {
    Configuration conf = new Configuration();

    conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold);

    Job job = new Job(conf);
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);

    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    HCatInputFormat.setInput(job, "default", "test_bad_records");

    job.setMapOutputKeyClass(HCatRecord.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);//w  ww.ja v a2s  .  co  m

    Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output");
    if (path.getFileSystem(conf).exists(path)) {
        path.getFileSystem(conf).delete(path, true);
    }

    TextOutputFormat.setOutputPath(job, path);

    return job.waitForCompletion(true);
}

From source file:org.apache.kylin.storage.hbase.steps.HFileOutputFormat3.java

License:Apache License

static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
        final TaskAttemptContext context, final OutputCommitter committer)
        throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputdir = ((FileOutputCommitter) committer).getWorkPath();
    final Configuration conf = context.getConfiguration();
    LOG.debug("Task output path: " + outputdir);
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String defaultCompressionStr = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final Algorithm defaultCompression = AbstractHFileWriter.compressionByName(defaultCompressionStr);
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);//from ww w . j ava  2  s .  c  om

    // create a map from column family to the compression algorithm
    final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
    final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
    final Map<byte[], DataBlockEncoding> datablockEncodingMap = createFamilyDataBlockEncodingMap(conf);
    final DataBlockEncoding overriddenEncoding;
    if (dataBlockEncodingStr != null) {
        overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
    } else {
        overriddenEncoding = null;
    }

    return new RecordWriter<ImmutableBytesWritable, V>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        @Override
        public void write(ImmutableBytesWritable row, V cell) throws IOException {
            KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
            if (row == null && kv == null) {
                rollWriters();
                return;
            }
            byte[] rowKey = CellUtil.cloneRow(kv);
            long length = kv.getLength();
            byte[] family = CellUtil.cloneFamily(kv);
            WriterLength wl = this.writers.get(family);
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", justification = "Not important")
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            Algorithm compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            BloomType bloomType = bloomTypeMap.get(family);
            bloomType = bloomType == null ? BloomType.NONE : bloomType;
            Integer blockSize = blockSizeMap.get(family);
            blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
            DataBlockEncoding encoding = overriddenEncoding;
            encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
            encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf)).withBlockSize(blockSize);
            contextBuilder.withDataBlockEncoding(encoding);
            HFileContext hFileContext = contextBuilder.build();

            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .withFileContext(hFileContext).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        @Override
        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:org.apache.mahout.classifier.naivebayes.trainer.NaiveBayesTrainer.java

License:Apache License

public static void trainNaiveBayes(Path input, Configuration conf, Iterable<String> inputLabels, Path output,
        int numReducers, float alphaI, boolean trainComplementary)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.setFloat(ALPHA_I, alphaI);
    Path labelMapPath = createLabelMapFile(inputLabels, conf, new Path(output, LABEL_MAP));
    Path classVectorPath = new Path(output, CLASS_VECTORS);
    runNaiveBayesByLabelSummer(input, conf, labelMapPath, classVectorPath, numReducers);
    Path weightFilePath = new Path(output, SUM_VECTORS);
    runNaiveBayesWeightSummer(classVectorPath, conf, labelMapPath, weightFilePath, numReducers);
    Path thetaFilePath = new Path(output, THETA_SUM);
    if (trainComplementary) {
        runNaiveBayesThetaComplementarySummer(classVectorPath, conf, weightFilePath, thetaFilePath,
                numReducers);//from   w  ww  . jav  a2  s  .c o m
    } else {
        runNaiveBayesThetaSummer(classVectorPath, conf, weightFilePath, thetaFilePath, numReducers);
    }
}

From source file:org.apache.mahout.classifier.svm.algorithm.parallelalgorithms.ParallelMultiClassifierTrainJob.java

License:Apache License

/**
 * Sets the parameters related to this mapper.
 * //  w  ww.j  a v  a2 s .c  o m
 * <p>
 * <ol>
 * <li></li>
 * </ol>
 * 
 * @param conf
 * @param lambda
 * @param k
 * @param modelFile
 *          model files store path
 * @param hdfsServer
 *          hdfs server address
 */
public static void setReducerParameters(Configuration conf, double lambda, int k, String modelFile,
        String hdfsServer) {
    // set the columns to be updated
    conf.setFloat(SVMParameters.HADOOP_LAMBDA, (float) lambda);
    conf.setInt(SVMParameters.HADOOP_K, k);
    conf.set(SVMParameters.HADOOP_MODLE_PATH, modelFile);
    conf.set(SVMParameters.HDFS_SERVER, hdfsServer);
}

From source file:org.apache.mahout.clustering.classify.ClusterClassificationDriver.java

License:Apache License

private static void classifyClusterMR(Configuration conf, Path input, Path clustersIn, Path output,
        Double clusterClassificationThreshold, boolean emitMostLikely)
        throws IOException, InterruptedException, ClassNotFoundException {

    conf.setFloat(ClusterClassificationConfigKeys.OUTLIER_REMOVAL_THRESHOLD,
            clusterClassificationThreshold.floatValue());
    conf.setBoolean(ClusterClassificationConfigKeys.EMIT_MOST_LIKELY, emitMostLikely);
    conf.set(ClusterClassificationConfigKeys.CLUSTERS_IN, clustersIn.toUri().toString());

    Job job = new Job(conf, "Cluster Classification Driver running over input: " + input);
    job.setJarByClass(ClusterClassificationDriver.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(ClusterClassificationMapper.class);
    job.setNumReduceTasks(0);//  w  ww .ja v a 2 s . c om

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(WeightedPropertyVectorWritable.class);

    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("Cluster Classification Driver Job failed processing " + input);
    }
}

From source file:org.apache.mahout.clustering.lda.cvb.CVBConfig.java

License:Apache License

public void write(Configuration conf) {
    conf.setInt(NUM_TOPICS_PARAM, numTopics);
    conf.setInt(NUM_TERMS_PARAM, numTerms);
    conf.setFloat(DOC_TOPIC_SMOOTHING_PARAM, alpha);
    conf.setFloat(TERM_TOPIC_SMOOTHING_PARAM, eta);
    conf.setLong(RANDOM_SEED_PARAM, randomSeed);
    conf.setFloat(TEST_SET_FRACTION_PARAM, testFraction);
    conf.setInt(NUM_TRAIN_THREADS_PARAM, numTrainThreads);
    conf.setInt(NUM_UPDATE_THREADS_PARAM, numUpdateThreads);
    conf.setInt(MAX_ITERATIONS_PER_DOC_PARAM, maxItersPerDoc);
    conf.setFloat(MODEL_WEIGHT_PARAM, modelWeight);
    conf.setBoolean(ONLY_LABELED_DOCS_PARAM, useOnlyLabeledDocs);
    conf.setFloat(MIN_RELATIVE_PERPLEXITY_DIFF_PARAM, minRelPreplexityDiff);
    conf.setInt(MAX_INFERENCE_ITERATIONS_PER_DOC_PARAM, maxInferenceItersPerDoc);
}

From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansDriver.java

License:Apache License

/**
 * Checks the parameters for a StreamingKMeans job and prepares a Configuration with them.
 *
 * @param conf the Configuration to populate
 * @param numClusters k, the number of clusters at the end
 * @param estimatedNumMapClusters O(k log n), the number of clusters requested from each mapper
 * @param estimatedDistanceCutoff an estimate of the minimum distance that separates two clusters (can be smaller and
 *                                will be increased dynamically)
 * @param maxNumIterations the maximum number of iterations of BallKMeans
 * @param trimFraction the fraction of the points to be considered in updating a ball k-means
 * @param randomInit whether to initialize the ball k-means seeds randomly
 * @param ignoreWeights whether to ignore the invalid final ball k-means weights
 * @param testProbability the percentage of vectors assigned to the test set for selecting the best final centers
 * @param numBallKMeansRuns the number of BallKMeans runs in the reducer that determine the centroids to return
 *                          (clusters are computed for the training set and the error is computed on the test set)
 * @param measureClass string, name of the distance measure class; theory works for Euclidean-like distances
 * @param searcherClass string, name of the searcher that will be used for nearest neighbor search
 * @param searchSize the number of closest neighbors to look at for selecting the closest one in approximate nearest
 *                   neighbor searches//from   w  ww.ja  va 2  s. com
 * @param numProjections the number of projected vectors to use for faster searching (only useful for ProjectionSearch
 *                       or FastProjectionSearch); @see org.apache.mahout.math.neighborhood.ProjectionSearch
 */
public static void configureOptionsForWorkers(Configuration conf, int numClusters,
        /* StreamingKMeans */
        int estimatedNumMapClusters, float estimatedDistanceCutoff,
        /* BallKMeans */
        int maxNumIterations, float trimFraction, boolean randomInit, boolean ignoreWeights,
        float testProbability, int numBallKMeansRuns,
        /* Searcher */
        String measureClass, String searcherClass, int searchSize, int numProjections, String method,
        boolean reduceStreamingKMeans) throws ClassNotFoundException {
    // Checking preconditions for the parameters.
    Preconditions.checkArgument(numClusters > 0,
            "Invalid number of clusters requested: " + numClusters + ". Must be: numClusters > 0!");

    // StreamingKMeans
    Preconditions.checkArgument(estimatedNumMapClusters > numClusters, "Invalid number of estimated map "
            + "clusters; There must be more than the final number of clusters (k log n vs k)");
    Preconditions.checkArgument(
            estimatedDistanceCutoff == INVALID_DISTANCE_CUTOFF || estimatedDistanceCutoff > 0,
            "estimatedDistanceCutoff must be equal to -1 or must be greater then 0!");

    // BallKMeans
    Preconditions.checkArgument(maxNumIterations > 0, "Must have at least one BallKMeans iteration");
    Preconditions.checkArgument(trimFraction > 0, "trimFraction must be positive");
    Preconditions.checkArgument(testProbability >= 0 && testProbability < 1,
            "test probability is not in the " + "interval [0, 1)");
    Preconditions.checkArgument(numBallKMeansRuns > 0, "numBallKMeans cannot be negative");

    // Searcher
    if (!searcherClass.contains("Brute")) {
        // These tests only make sense when a relevant searcher is being used.
        Preconditions.checkArgument(searchSize > 0, "Invalid searchSize. Must be positive.");
        if (searcherClass.contains("Projection")) {
            Preconditions.checkArgument(numProjections > 0, "Invalid numProjections. Must be positive");
        }
    }

    // Setting the parameters in the Configuration.
    conf.setInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, numClusters);
    /* StreamingKMeans */
    conf.setInt(ESTIMATED_NUM_MAP_CLUSTERS, estimatedNumMapClusters);
    if (estimatedDistanceCutoff != INVALID_DISTANCE_CUTOFF) {
        conf.setFloat(ESTIMATED_DISTANCE_CUTOFF, estimatedDistanceCutoff);
    }
    /* BallKMeans */
    conf.setInt(MAX_NUM_ITERATIONS, maxNumIterations);
    conf.setFloat(TRIM_FRACTION, trimFraction);
    conf.setBoolean(RANDOM_INIT, randomInit);
    conf.setBoolean(IGNORE_WEIGHTS, ignoreWeights);
    conf.setFloat(TEST_PROBABILITY, testProbability);
    conf.setInt(NUM_BALLKMEANS_RUNS, numBallKMeansRuns);
    /* Searcher */
    // Checks if the measureClass is available, throws exception otherwise.
    Class.forName(measureClass);
    conf.set(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, measureClass);
    // Checks if the searcherClass is available, throws exception otherwise.
    Class.forName(searcherClass);
    conf.set(SEARCHER_CLASS_OPTION, searcherClass);
    conf.setInt(SEARCH_SIZE_OPTION, searchSize);
    conf.setInt(NUM_PROJECTIONS_OPTION, numProjections);
    conf.set(DefaultOptionCreator.METHOD_OPTION, method);

    conf.setBoolean(REDUCE_STREAMING_KMEANS, reduceStreamingKMeans);

    log.info(
            "Parameters are: [k] numClusters {}; "
                    + "[SKM] estimatedNumMapClusters {}; estimatedDistanceCutoff {} "
                    + "[BKM] maxNumIterations {}; trimFraction {}; randomInit {}; ignoreWeights {}; "
                    + "testProbability {}; numBallKMeansRuns {}; "
                    + "[S] measureClass {}; searcherClass {}; searcherSize {}; numProjections {}; "
                    + "method {}; reduceStreamingKMeans {}",
            numClusters, estimatedNumMapClusters, estimatedDistanceCutoff, maxNumIterations, trimFraction,
            randomInit, ignoreWeights, testProbability, numBallKMeansRuns, measureClass, searcherClass,
            searchSize, numProjections, method, reduceStreamingKMeans);
}

From source file:org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansTestMR.java

License:Apache License

private void configure(Configuration configuration) {
    configuration.set(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, distanceMeasureClassName);
    configuration.setInt(StreamingKMeansDriver.SEARCH_SIZE_OPTION, SEARCH_SIZE);
    configuration.setInt(StreamingKMeansDriver.NUM_PROJECTIONS_OPTION, NUM_PROJECTIONS);
    configuration.set(StreamingKMeansDriver.SEARCHER_CLASS_OPTION, searcherClassName);
    configuration.setInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, 1 << NUM_DIMENSIONS);
    configuration.setInt(StreamingKMeansDriver.ESTIMATED_NUM_MAP_CLUSTERS,
            (1 << NUM_DIMENSIONS) * (int) Math.log(NUM_DATA_POINTS));
    configuration.setFloat(StreamingKMeansDriver.ESTIMATED_DISTANCE_CUTOFF, (float) DISTANCE_CUTOFF);
    configuration.setInt(StreamingKMeansDriver.MAX_NUM_ITERATIONS, MAX_NUM_ITERATIONS);

    // Collapse the Centroids in the reducer.
    configuration.setBoolean(StreamingKMeansDriver.REDUCE_STREAMING_KMEANS, true);
}

From source file:org.apache.mahout.knn.experimental.StreamingKMeansDriver.java

License:Apache License

public static void configureOptionsForWorkers(Configuration conf, int numClusters, int estimatedNumMapClusters,
        float estimatedDistanceCutoff, String measureClass, String searcherClass, int searchSize,
        int numProjections, int maxNumIterations) {
    conf.setInt(DefaultOptionCreator.NUM_CLUSTERS_OPTION, numClusters);
    conf.setInt(ESTIMATED_NUM_MAP_CLUSTERS, estimatedNumMapClusters);
    conf.setFloat(ESTIMATED_DISTANCE_CUTOFF, estimatedDistanceCutoff);
    try {//from  w  w w .j av a  2s . com
        Class.forName(measureClass);
    } catch (ClassNotFoundException e) {
        log.error("Measure class not found " + measureClass, e);
    }
    conf.set(DefaultOptionCreator.DISTANCE_MEASURE_OPTION, measureClass);
    try {
        Class.forName(searcherClass);
    } catch (ClassNotFoundException e) {
        log.error("Searcher class not found " + measureClass, e);
    }
    conf.set(SEARCHER_CLASS_OPTION, searcherClass);
    conf.setInt(SEARCH_SIZE_OPTION, searchSize);
    conf.setInt(NUM_PROJECTIONS_OPTION, numProjections);
    conf.setInt(MAX_NUM_ITERATIONS, maxNumIterations);
    log.info(
            "Parameters are: numClusters {}; estimatedNumMapClusters {}; estimatedDistanceCutoff"
                    + " {}; measureClass {}; searcherClass {}; searcherSize {}; numProjections {}; "
                    + "maxNumIterations {}",
            numClusters, estimatedNumMapClusters, estimatedDistanceCutoff, measureClass, searcherClass,
            searchSize, numProjections, maxNumIterations);
}

From source file:org.apache.mahout.regression.penalizedlinear.LinearCrossValidation.java

License:Apache License

private void runPenalizedLinear() throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = getConf();
    conf.setInt(PenalizedLinearKeySet.NUM_CV, parameter.numOfCV);
    conf.setFloat(PenalizedLinearKeySet.ALPHA, parameter.alpha);
    conf.set(PenalizedLinearKeySet.LAMBDA, parameter.lambda);
    conf.setBoolean(PenalizedLinearKeySet.INTERCEPT, parameter.intercept);

    Job job = new Job(conf, "Penalized Linear Regression Driver running over input: " + input);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(PenalizedLinearMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setReducerClass(PenalizedLinearReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);
    job.setCombinerClass(PenalizedLinearReducer.class);
    job.setNumReduceTasks(1);/*from   w  w w .  j  a  v a  2s  . com*/
    job.setJarByClass(LinearRegularizePath.class);

    FileInputFormat.addInputPath(job, new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(output, "output"));
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("Penalized Linear Regression Job failed processing " + input);
    }
    solver = new PenalizedLinearSolver();
    solver.setAlpha(parameter.alpha);
    solver.setIntercept(parameter.intercept);
    solver.setLambdaString(parameter.lambda);
    solver.initSolver(new Path(output, "output"), getConf());
    solver.crossValidate();
    printInfo(parameter, solver);
}