List of usage examples for org.apache.hadoop.conf Configuration getFloat
public float getFloat(String name, float defaultValue)
name
property as a float
. From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java
License:Open Source License
/*** * Create a partitioner for a particular job * @param in//from w w w. j a v a 2 s.c om * @param out * @param job * @param partitionerName * @return * @throws IOException */ public static Partitioner createPartitioner(Path[] ins, Path out, Configuration job, String partitionerName) throws IOException { try { Partitioner partitioner = null; Class<? extends Partitioner> partitionerClass = PartitionerClasses.get(partitionerName.toLowerCase()); if (partitionerClass == null) { // Try to parse the name as a class name try { partitionerClass = Class.forName(partitionerName).asSubclass(Partitioner.class); } catch (ClassNotFoundException e) { throw new RuntimeException("Unknown index type '" + partitionerName + "'"); } } if (PartitionerReplicate.containsKey(partitionerName.toLowerCase())) { boolean replicate = PartitionerReplicate.get(partitionerName.toLowerCase()); job.setBoolean("replicate", replicate); } partitioner = partitionerClass.newInstance(); long t1 = System.currentTimeMillis(); final Rectangle inMBR = (Rectangle) OperationsParams.getShape(job, "mbr"); // Determine number of partitions long inSize = 0; for (Path in : ins) { inSize += FileUtil.getPathSize(in.getFileSystem(job), in); } long estimatedOutSize = (long) (inSize * (1.0 + job.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f))); FileSystem outFS = out.getFileSystem(job); long outBlockSize = outFS.getDefaultBlockSize(out); int numPartitions = Math.max(1, (int) Math.ceil((float) estimatedOutSize / outBlockSize)); LOG.info("Partitioning the space into " + numPartitions + " partitions"); final Vector<Point> sample = new Vector<Point>(); float sample_ratio = job.getFloat(SpatialSite.SAMPLE_RATIO, 0.01f); long sample_size = job.getLong(SpatialSite.SAMPLE_SIZE, 100 * 1024 * 1024); LOG.info("Reading a sample of " + (int) Math.round(sample_ratio * 100) + "%"); ResultCollector<Point> resultCollector = new ResultCollector<Point>() { @Override public void collect(Point p) { sample.add(p.clone()); } }; OperationsParams params2 = new OperationsParams(job); params2.setFloat("ratio", sample_ratio); params2.setLong("size", sample_size); params2.setClass("outshape", Point.class, Shape.class); Sampler.sample(ins, resultCollector, params2); long t2 = System.currentTimeMillis(); System.out.println("Total time for sampling in millis: " + (t2 - t1)); LOG.info("Finished reading a sample of " + sample.size() + " records"); partitioner.createFromPoints(inMBR, sample.toArray(new Point[sample.size()]), numPartitions); return partitioner; } catch (InstantiationException e) { e.printStackTrace(); return null; } catch (IllegalAccessException e) { e.printStackTrace(); return null; } }
From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java
License:Open Source License
/** * Calculates number of partitions required to index the given file. * @param conf The current configuration which can contain user-defined parameters * @param inFileSize The size of the input file in bytes * @param outFs The output file system where the index will be written * @param outFile The path of the output file which is used to get the output block size. * @param blockSize If set, this will override the default output block size. * @return The number of blocks needed to write the index file *///from w w w .j a v a2s . c o m public static int calculateNumberOfPartitions(Configuration conf, long inFileSize, FileSystem outFs, Path outFile, long blockSize) { final float IndexingOverhead = conf.getFloat(SpatialSite.INDEXING_OVERHEAD, 0.1f); long indexedFileSize = (long) (inFileSize * (1 + IndexingOverhead)); if (blockSize == 0) blockSize = outFs.getDefaultBlockSize(outFile); return (int) Math.ceil((float) indexedFileSize / blockSize); }
From source file:external.nutch.scoring.initial.InitialStaticScoringFilter.java
License:Apache License
public void setConf(Configuration conf) { this.conf = conf; initialScore = conf.getFloat("initial.static.score", 1.00f); }
From source file:gov.jgi.meta.exec.BlastCommand.java
License:Open Source License
/** * new blast command based on values stored in the configuration. * <p/>//from ww w . j a v a2 s. c om * Looks for the following config values: blast.commandline, * blast.commandpath, and blast.tmpdir, blast.cleanup * * @param config is the hadoop configuration with overriding values * for commandline options and paths * @throws IOException if executable can not be found */ public BlastCommand(Configuration config) throws IOException { log.info("initializing"); String c; log.info("initializing new blast command"); if ((c = config.get("blast.commandline")) != null) { commandLine = c; } if ((c = config.get("blast.commandpath")) != null) { commandPath = c; } if ((c = config.get("formatdb.commandline")) != null) { formatdbCommandLine = c; } if ((c = config.get("formatdb.commandpath")) != null) { formatdbCommandPath = c; } if ((c = config.get("blast.tmpdir")) != null) { tmpDir = c; } docleanup = config.getBoolean("blast.cleanup", true); effectiveSize = config.getLong("blast.effectivedatabasesize", 0); useScaledEValue = config.getBoolean("blast.usescaledevalue", false); useEffectiveSize = config.getBoolean("blast.useeffectivesize", false); useEValue = config.getFloat("blast.useevalue", 10F); /* * do sanity check to make sure all paths exist */ checkFileExists(commandLine); checkFileExists(commandPath); checkDirExists(tmpDir); /* * if all is good, create a working space inside tmpDir */ tmpDirFile = MetaUtils.createTempDir("blast_", tmpDir); log.info("done initializing: tmp dir = " + tmpDirFile); }
From source file:idgs.ConfVar.java
License:Open Source License
public static Float getFloatVar(Configuration conf, ConfVar variable) { require(variable.valClass == Float.class); return conf.getFloat(variable.varname, variable.defaultFloatVal); }
From source file:io.hops.common.IDsMonitor.java
License:Apache License
public void setConfiguration(Configuration conf) { setConfiguration(/*from ww w . j a v a 2s .co m*/ conf.getInt(DFSConfigKeys.DFS_NAMENODE_INODEID_BATCH_SIZE, DFSConfigKeys.DFS_NAMENODE_INODEID_BATCH_SIZE_DEFAULT), conf.getInt(DFSConfigKeys.DFS_NAMENODE_BLOCKID_BATCH_SIZE, DFSConfigKeys.DFS_NAMENODE_BLOCKID_BATCH_SIZE_DEFAULT), conf.getInt(DFSConfigKeys.DFS_NAMENODE_QUOTA_UPDATE_ID_BATCH_SIZE, DFSConfigKeys.DFS_NAMENODE_QUOTA_UPDATE_ID_BATCH_SIZ_DEFAULT), conf.getFloat(DFSConfigKeys.DFS_NAMENODE_INODEID_UPDATE_THRESHOLD, DFSConfigKeys.DFS_NAMENODE_INODEID_UPDATE_THRESHOLD_DEFAULT), conf.getFloat(DFSConfigKeys.DFS_NAMENODE_BLOCKID_UPDATE_THRESHOLD, DFSConfigKeys.DFS_NAMENODE_BLOCKID_UPDATE_THRESHOLD_DEFAULT), conf.getFloat(DFSConfigKeys.DFS_NAMENODE_QUOTA_UPDATE_ID_UPDATE_THRESHOLD, DFSConfigKeys.DFS_NAMENODE_QUOTA_UPDATE_ID_UPDATE_THRESHOLD_DEFAULT), conf.getInt(DFSConfigKeys.DFS_NAMENODE_IDSMONITOR_CHECK_INTERVAL_IN_MS, DFSConfigKeys.DFS_NAMENODE_IDSMONITOR_CHECK_INTERVAL_IN_MS_DEFAULT)); }
From source file:ir.co.bayan.simorq.zal.extractor.nutch.OPICScoringFilter.java
License:Apache License
@Override public void setConf(Configuration conf) { this.conf = conf; scorePower = conf.getFloat("indexer.score.power", 0.5f); internalScoreFactor = conf.getFloat("db.score.link.internal", 1.0f); externalScoreFactor = conf.getFloat("db.score.link.external", 1.0f); countFiltered = conf.getBoolean("db.score.count.filtered", false); }
From source file:it.crs4.seal.recab.RecabTableReducer.java
License:Open Source License
public void setup(Configuration conf) { smoothing = conf.getFloat(CONF_SMOOTHING, CONF_SMOOTHING_DEFAULT); if (smoothing < 0.0) throw new IllegalArgumentException(CONF_SMOOTHING + " can't be less than 0"); maxQscore = conf.getInt(CONF_MAX_QSCORE, CONF_MAX_QSCORE_DEFAULT); if (maxQscore <= 0) throw new IllegalArgumentException(CONF_MAX_QSCORE + " must be greater than 0"); }
From source file:ivory.core.index.BuildLPInvertedIndexDocSorted.java
License:Apache License
public int runTool() throws Exception { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); String indexPath = conf.get(Constants.IndexPath); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); String collectionName = env.readCollectionName(); int reduceTasks = conf.getInt(Constants.NumReduceTasks, 0); int minSplitSize = conf.getInt(Constants.MinSplitSize, 0); int collectionDocCount = env.readCollectionDocumentCount(); String postingsType = conf.get(Constants.PostingsListsType, PostingsListDocSortedPositional.class.getCanonicalName()); @SuppressWarnings("unchecked") Class<? extends PostingsList> postingsClass = (Class<? extends PostingsList>) Class.forName(postingsType); // These are the default values for the LP algorithm. float mapMemoryThreshold = conf.getFloat(Constants.IndexingMapMemoryThreshold, 0.9f); float reduceMemoryThreshold = conf.getFloat(Constants.IndexingReduceMemoryThreshold, 0.9f); int maxHeap = conf.getInt(Constants.MaxHeap, 2048); int maxNDocsBeforeFlush = conf.getInt(Constants.MaxNDocsBeforeFlush, 50000); LOG.info("PowerTool: " + BuildLPInvertedIndexDocSorted.class.getSimpleName()); LOG.info(String.format(" - %s: %s", Constants.IndexPath, indexPath)); LOG.info(String.format(" - %s: %s", Constants.CollectionName, collectionName)); LOG.info(String.format(" - %s: %s", Constants.CollectionDocumentCount, collectionDocCount)); LOG.info(String.format(" - %s: %s", Constants.PostingsListsType, postingsClass.getCanonicalName())); LOG.info(String.format(" - %s: %s", Constants.NumReduceTasks, reduceTasks)); LOG.info(String.format(" - %s: %s", Constants.MinSplitSize, minSplitSize)); LOG.info(String.format(" - %s: %s", Constants.IndexingMapMemoryThreshold, mapMemoryThreshold)); LOG.info(String.format(" - %s: %s", Constants.IndexingReduceMemoryThreshold, reduceMemoryThreshold)); LOG.info(String.format(" - %s: %s", Constants.MaxHeap, maxHeap)); LOG.info(String.format(" - %s: %s", Constants.MaxNDocsBeforeFlush, maxNDocsBeforeFlush)); if (!fs.exists(new Path(indexPath))) { fs.mkdirs(new Path(indexPath)); }// w w w.jav a 2 s .co m Path inputPath = new Path(env.getIntDocVectorsDirectory()); Path postingsPath = new Path(env.getPostingsDirectory()); if (fs.exists(postingsPath)) { LOG.info("Postings already exist: no indexing will be performed."); return 0; } conf.setInt(Constants.CollectionDocumentCount, collectionDocCount); conf.setInt("mapred.min.split.size", minSplitSize); //conf.set("mapred.child.java.opts", "-Xmx" + maxHeap + "m"); conf.set("mapreduce.map.memory.mb", "2048"); conf.set("mapreduce.map.java.opts", "-Xmx2048m"); conf.set("mapreduce.reduce.memory.mb", "2048"); conf.set("mapreduce.reduce.java.opts", "-Xmx2048m"); Job job = Job.getInstance(conf, BuildLPInvertedIndexDocSorted.class.getSimpleName() + ":" + collectionName); job.setJarByClass(BuildLPInvertedIndexDocSorted.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, postingsPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PostingsListDocSortedPositional.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PostingsListDocSortedPositional.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); env.writePostingsType("ivory.data.PostingsListDocSortedPositional"); return 0; }
From source file:mlbench.bayes.BayesUtils.java
License:Apache License
public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) { float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f); // read feature sums and label sums Vector scoresPerLabel = null; Vector scoresPerFeature = null; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) { String key = record.getFirst().toString(); VectorWritable value = record.getSecond(); if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) { scoresPerFeature = value.get(); } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) { scoresPerLabel = value.get(); }/*w w w. ja va2 s .c o m*/ } // Preconditions.checkNotNull(scoresPerFeature); // Preconditions.checkNotNull(scoresPerLabel); Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size()); for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(), conf)) { scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get()); } Vector perlabelThetaNormalizer = scoresPerLabel.like(); /* * for (Pair<Text,VectorWritable> entry : new * SequenceFileDirIterable<Text,VectorWritable>( new Path(base, * TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), * conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob. * LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer = * entry.getSecond().get(); } } * * Preconditions.checkNotNull(perlabelThetaNormalizer); */ return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel, perlabelThetaNormalizer, alphaI, false); }