Example usage for org.apache.hadoop.conf Configuration getFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration getFloat.

Prototype

public float getFloat(String name, float defaultValue)

Source Link

Document

Get the value of the name property as a float.

Usage

From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java

License:Open Source License

/***
 * Create a partitioner for a particular job
 * @param in//from w w w. j  a  v a  2 s.c om
 * @param out
 * @param job
 * @param partitionerName
 * @return
 * @throws IOException
 */
public static Partitioner createPartitioner(Path[] ins, Path out, Configuration job, String partitionerName)
        throws IOException {
    try {
        Partitioner partitioner = null;
        Class<? extends Partitioner> partitionerClass = PartitionerClasses.get(partitionerName.toLowerCase());
        if (partitionerClass == null) {
            // Try to parse the name as a class name
            try {
                partitionerClass = Class.forName(partitionerName).asSubclass(Partitioner.class);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException("Unknown index type '" + partitionerName + "'");
            }
        }

        if (PartitionerReplicate.containsKey(partitionerName.toLowerCase())) {
            boolean replicate = PartitionerReplicate.get(partitionerName.toLowerCase());
            job.setBoolean("replicate", replicate);
        }
        partitioner = partitionerClass.newInstance();

        long t1 = System.currentTimeMillis();
        final Rectangle inMBR = (Rectangle) OperationsParams.getShape(job, "mbr");
        // Determine number of partitions
        long inSize = 0;
        for (Path in : ins) {
            inSize += FileUtil.getPathSize(in.getFileSystem(job), in);
        }
        long estimatedOutSize = (long) (inSize * (1.0 + job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f)));
        FileSystem outFS = out.getFileSystem(job);
        long outBlockSize = outFS.getDefaultBlockSize(out);
        int numPartitions = Math.max(1, (int) Math.ceil((float) estimatedOutSize / outBlockSize));
        LOG.info("Partitioning the space into " + numPartitions + " partitions");

        final Vector<Point> sample = new Vector<Point>();
        float sample_ratio = job.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f);
        long sample_size = job.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024);

        LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%");
        ResultCollector<Point> resultCollector = new ResultCollector<Point>() {
            @Override
            public void collect(Point p) {
                sample.add(p.clone());
            }
        };
        OperationsParams params2 = new OperationsParams(job);
        params2.setFloat("ratio", sample_ratio);
        params2.setLong("size", sample_size);
        params2.setClass("outshape", Point.class, Shape.class);
        Sampler.sample(ins, resultCollector, params2);
        long t2 = System.currentTimeMillis();
        System.out.println("Total time for sampling in millis: " + (t2 - t1));
        LOG.info("Finished reading a sample of " + sample.size() + " records");

        partitioner.createFromPoints(inMBR, sample.toArray(new Point[sample.size()]), numPartitions);

        return partitioner;
    } catch (InstantiationException e) {
        e.printStackTrace();
        return null;
    } catch (IllegalAccessException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java

License:Open Source License

/**
 * Calculates number of partitions required to index the given file.
 * @param conf The current configuration which can contain user-defined parameters
 * @param inFileSize The size of the input file in bytes
 * @param outFs The output file system where the index will be written
 * @param outFile The path of the output file which is used to get the output block size.
 * @param blockSize If set, this will override the default output block size.
 * @return The number of blocks needed to write the index file
 *///from  w w w .j a  v  a2s  . c o m
public static int calculateNumberOfPartitions(Configuration conf, long inFileSize, FileSystem outFs,
        Path outFile, long blockSize) {
    final float IndexingOverhead = conf.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f);
    long indexedFileSize = (long) (inFileSize * (1 + IndexingOverhead));
    if (blockSize == 0)
        blockSize = outFs.getDefaultBlockSize(outFile);
    return (int) Math.ceil((float) indexedFileSize / blockSize);
}

From source file:external.nutch.scoring.initial.InitialStaticScoringFilter.java

License:Apache License

public void setConf(Configuration conf) {
    this.conf = conf;
    initialScore = conf.getFloat("initial.static.score", 1.00f);
}

From source file:gov.jgi.meta.exec.BlastCommand.java

License:Open Source License

/**
 * new blast command based on values stored in the configuration.
 * <p/>//from ww  w  . j  a  v  a2 s. c om
 * Looks for the following config values: blast.commandline,
 * blast.commandpath, and blast.tmpdir, blast.cleanup
 *
 * @param config is the hadoop configuration with overriding values
 *               for commandline options and paths
 * @throws IOException if executable can not be found
 */
public BlastCommand(Configuration config) throws IOException {
    log.info("initializing");
    String c;

    log.info("initializing new blast command");

    if ((c = config.get("blast.commandline")) != null) {
        commandLine = c;
    }
    if ((c = config.get("blast.commandpath")) != null) {
        commandPath = c;
    }
    if ((c = config.get("formatdb.commandline")) != null) {
        formatdbCommandLine = c;
    }
    if ((c = config.get("formatdb.commandpath")) != null) {
        formatdbCommandPath = c;
    }

    if ((c = config.get("blast.tmpdir")) != null) {
        tmpDir = c;
    }

    docleanup = config.getBoolean("blast.cleanup", true);

    effectiveSize = config.getLong("blast.effectivedatabasesize", 0);
    useScaledEValue = config.getBoolean("blast.usescaledevalue", false);
    useEffectiveSize = config.getBoolean("blast.useeffectivesize", false);
    useEValue = config.getFloat("blast.useevalue", 10F);

    /*
     * do sanity check to make sure all paths exist
     */
    checkFileExists(commandLine);
    checkFileExists(commandPath);
    checkDirExists(tmpDir);

    /*
     * if all is good, create a working space inside tmpDir
     */

    tmpDirFile = MetaUtils.createTempDir("blast_", tmpDir);

    log.info("done initializing: tmp dir = " + tmpDirFile);
}

From source file:idgs.ConfVar.java

License:Open Source License

public static Float getFloatVar(Configuration conf, ConfVar variable) {
    require(variable.valClass == Float.class);
    return conf.getFloat(variable.varname, variable.defaultFloatVal);
}

From source file:io.hops.common.IDsMonitor.java

License:Apache License

public void setConfiguration(Configuration conf) {
    setConfiguration(/*from  ww  w .  j  a v  a  2s  .co m*/
            conf.getInt(DFSConfigKeys.DFS_NAMENODE_INODEID_BATCH_SIZE,
                    DFSConfigKeys.DFS_NAMENODE_INODEID_BATCH_SIZE_DEFAULT),
            conf.getInt(DFSConfigKeys.DFS_NAMENODE_BLOCKID_BATCH_SIZE,
                    DFSConfigKeys.DFS_NAMENODE_BLOCKID_BATCH_SIZE_DEFAULT),
            conf.getInt(DFSConfigKeys.DFS_NAMENODE_QUOTA_UPDATE_ID_BATCH_SIZE,
                    DFSConfigKeys.DFS_NAMENODE_QUOTA_UPDATE_ID_BATCH_SIZ_DEFAULT),
            conf.getFloat(DFSConfigKeys.DFS_NAMENODE_INODEID_UPDATE_THRESHOLD,
                    DFSConfigKeys.DFS_NAMENODE_INODEID_UPDATE_THRESHOLD_DEFAULT),
            conf.getFloat(DFSConfigKeys.DFS_NAMENODE_BLOCKID_UPDATE_THRESHOLD,
                    DFSConfigKeys.DFS_NAMENODE_BLOCKID_UPDATE_THRESHOLD_DEFAULT),
            conf.getFloat(DFSConfigKeys.DFS_NAMENODE_QUOTA_UPDATE_ID_UPDATE_THRESHOLD,
                    DFSConfigKeys.DFS_NAMENODE_QUOTA_UPDATE_ID_UPDATE_THRESHOLD_DEFAULT),
            conf.getInt(DFSConfigKeys.DFS_NAMENODE_IDSMONITOR_CHECK_INTERVAL_IN_MS,
                    DFSConfigKeys.DFS_NAMENODE_IDSMONITOR_CHECK_INTERVAL_IN_MS_DEFAULT));
}

From source file:ir.co.bayan.simorq.zal.extractor.nutch.OPICScoringFilter.java

License:Apache License

@Override
public void setConf(Configuration conf) {
    this.conf = conf;
    scorePower = conf.getFloat("indexer.score.power", 0.5f);
    internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f);
    externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f);
    countFiltered = conf.getBoolean("db.score.count.filtered", false);
}

From source file:it.crs4.seal.recab.RecabTableReducer.java

License:Open Source License

public void setup(Configuration conf) {
    smoothing = conf.getFloat(CONF_SMOOTHING, CONF_SMOOTHING_DEFAULT);
    if (smoothing < 0.0)
        throw new IllegalArgumentException(CONF_SMOOTHING + " can't be less than 0");

    maxQscore = conf.getInt(CONF_MAX_QSCORE, CONF_MAX_QSCORE_DEFAULT);
    if (maxQscore <= 0)
        throw new IllegalArgumentException(CONF_MAX_QSCORE + " must be greater than 0");
}

From source file:ivory.core.index.BuildLPInvertedIndexDocSorted.java

License:Apache License

public int runTool() throws Exception {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get(Constants.IndexPath);
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    String collectionName = env.readCollectionName();

    int reduceTasks = conf.getInt(Constants.NumReduceTasks, 0);
    int minSplitSize = conf.getInt(Constants.MinSplitSize, 0);
    int collectionDocCount = env.readCollectionDocumentCount();

    String postingsType = conf.get(Constants.PostingsListsType,
            PostingsListDocSortedPositional.class.getCanonicalName());
    @SuppressWarnings("unchecked")
    Class<? extends PostingsList> postingsClass = (Class<? extends PostingsList>) Class.forName(postingsType);

    // These are the default values for the LP algorithm.
    float mapMemoryThreshold = conf.getFloat(Constants.IndexingMapMemoryThreshold, 0.9f);
    float reduceMemoryThreshold = conf.getFloat(Constants.IndexingReduceMemoryThreshold, 0.9f);
    int maxHeap = conf.getInt(Constants.MaxHeap, 2048);
    int maxNDocsBeforeFlush = conf.getInt(Constants.MaxNDocsBeforeFlush, 50000);

    LOG.info("PowerTool: " + BuildLPInvertedIndexDocSorted.class.getSimpleName());
    LOG.info(String.format(" - %s: %s", Constants.IndexPath, indexPath));
    LOG.info(String.format(" - %s: %s", Constants.CollectionName, collectionName));
    LOG.info(String.format(" - %s: %s", Constants.CollectionDocumentCount, collectionDocCount));
    LOG.info(String.format(" - %s: %s", Constants.PostingsListsType, postingsClass.getCanonicalName()));
    LOG.info(String.format(" - %s: %s", Constants.NumReduceTasks, reduceTasks));
    LOG.info(String.format(" - %s: %s", Constants.MinSplitSize, minSplitSize));
    LOG.info(String.format(" - %s: %s", Constants.IndexingMapMemoryThreshold, mapMemoryThreshold));
    LOG.info(String.format(" - %s: %s", Constants.IndexingReduceMemoryThreshold, reduceMemoryThreshold));
    LOG.info(String.format(" - %s: %s", Constants.MaxHeap, maxHeap));
    LOG.info(String.format(" - %s: %s", Constants.MaxNDocsBeforeFlush, maxNDocsBeforeFlush));

    if (!fs.exists(new Path(indexPath))) {
        fs.mkdirs(new Path(indexPath));
    }// w w  w.jav  a  2  s  .co  m

    Path inputPath = new Path(env.getIntDocVectorsDirectory());
    Path postingsPath = new Path(env.getPostingsDirectory());

    if (fs.exists(postingsPath)) {
        LOG.info("Postings already exist: no indexing will be performed.");
        return 0;
    }

    conf.setInt(Constants.CollectionDocumentCount, collectionDocCount);

    conf.setInt("mapred.min.split.size", minSplitSize);
    //conf.set("mapred.child.java.opts", "-Xmx" + maxHeap + "m");
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");

    Job job = Job.getInstance(conf, BuildLPInvertedIndexDocSorted.class.getSimpleName() + ":" + collectionName);
    job.setJarByClass(BuildLPInvertedIndexDocSorted.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, postingsPath);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PostingsListDocSortedPositional.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PostingsListDocSortedPositional.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    env.writePostingsType("ivory.data.PostingsListDocSortedPositional");

    return 0;
}

From source file:mlbench.bayes.BayesUtils.java

License:Apache License

public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) {

    float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f);

    // read feature sums and label sums
    Vector scoresPerLabel = null;
    Vector scoresPerFeature = null;
    for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) {
        String key = record.getFirst().toString();
        VectorWritable value = record.getSecond();
        if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) {
            scoresPerFeature = value.get();
        } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) {
            scoresPerLabel = value.get();
        }/*w w w.  ja  va2 s  .c  o m*/
    }

    // Preconditions.checkNotNull(scoresPerFeature);
    // Preconditions.checkNotNull(scoresPerLabel);

    Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size());
    for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(),
            conf)) {
        scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get());
    }

    Vector perlabelThetaNormalizer = scoresPerLabel.like();
    /*
     * for (Pair<Text,VectorWritable> entry : new
     * SequenceFileDirIterable<Text,VectorWritable>( new Path(base,
     * TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(),
     * conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob.
     * LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer =
     * entry.getSecond().get(); } }
     * 
     * Preconditions.checkNotNull(perlabelThetaNormalizer);
     */
    return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel,
            perlabelThetaNormalizer, alphaI, false);
}