Example usage for org.apache.hadoop.conf Configuration setFloat

List of usage examples for org.apache.hadoop.conf Configuration setFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.conf Configuration setFloat.

Prototype

public void setFloat(String name, float value) 

Source Link

Document

Set the value of the name property to a float.

Usage

From source file:org.clueweb.clueweb12.app.RMRetrieval.java

License:Apache License

/**
 * Runs this tool./*from w  ww  .j  av  a 2s  .co m*/
 */
@SuppressWarnings({ "static-access", "deprecation" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg()
            .withDescription("input path (pfor format expected, add * to retrieve files)")
            .create(DOCVECTOR_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("queries").create(QUERIES_OPTION));
    options.addOption(
            OptionBuilder.withArgName("float").hasArg().withDescription("smoothing").create(SMOOTHING));
    options.addOption(OptionBuilder.withArgName("int").hasArg().withDescription("topk").create(TOPK));
    options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg()
            .withDescription("preprocessing").create(PREPROCESSING));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("rmmodel file").create(RMMODEL));
    options.addOption(
            OptionBuilder.withArgName("float").hasArg().withDescription("queryLambda").create(QUERY_LAMBDA));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(DOCVECTOR_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(QUERIES_OPTION)
            || !cmdline.hasOption(SMOOTHING) || !cmdline.hasOption(TOPK) || !cmdline.hasOption(QUERY_LAMBDA)
            || !cmdline.hasOption(PREPROCESSING)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String docvector = cmdline.getOptionValue(DOCVECTOR_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);
    String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION);
    String queries = cmdline.getOptionValue(QUERIES_OPTION);
    String smoothing = cmdline.getOptionValue(SMOOTHING);
    String topk = cmdline.getOptionValue(TOPK);
    String preprocessing = cmdline.getOptionValue(PREPROCESSING);
    String rmmodel = cmdline.getOptionValue(RMMODEL);
    String queryLambda = cmdline.getOptionValue(QUERY_LAMBDA);

    LOG.info("Tool name: " + RMRetrieval.class.getSimpleName());
    LOG.info(" - docvector: " + docvector);
    LOG.info(" - output: " + output);
    LOG.info(" - dictionary: " + dictionary);
    LOG.info(" - queries: " + queries);
    LOG.info(" - smoothing: " + smoothing);
    LOG.info(" - topk: " + topk);
    LOG.info(" - preprocessing: " + preprocessing);
    LOG.info(" - rmmodel: " + rmmodel);
    LOG.info(" - queryLambda: " + queryLambda);

    Configuration conf = getConf();
    conf.set(DICTIONARY_OPTION, dictionary);
    conf.set(QUERIES_OPTION, queries);
    conf.setFloat(SMOOTHING, Float.parseFloat(smoothing));
    conf.setInt(TOPK, Integer.parseInt(topk));
    conf.set(PREPROCESSING, preprocessing);
    conf.set(RMMODEL, rmmodel);
    conf.setFloat(QUERY_LAMBDA, Float.parseFloat(queryLambda));

    conf.set("mapreduce.map.memory.mb", "10048");
    conf.set("mapreduce.map.java.opts", "-Xmx10048m");
    conf.set("mapreduce.reduce.memory.mb", "10048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx10048m");
    conf.set("mapred.task.timeout", "6000000");// default is 600000

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(output)))
        fs.delete(new Path(output));

    Job job = new Job(conf, RMRetrieval.class.getSimpleName() + ":" + docvector);
    job.setJarByClass(RMRetrieval.class);

    FileInputFormat.setInputPaths(job, docvector);
    FileOutputFormat.setOutputPath(job, new Path(output));

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setMapOutputKeyClass(PairOfIntString.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MyMapper.class);
    job.setPartitionerClass(MyPartitioner.class);
    job.setReducerClass(MyReducer.class);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}

From source file:org.lilyproject.hadooptestfw.HBaseTestingUtilityFactory.java

License:Apache License

/**
 * Creates an HBaseTestingUtility with settings applied such that everything will be stored below the
 * supplied directory and makes (to some extent) use of standard port numbers.
 *
 * @param conf HBase conf to use, as created by HBaseConfiguration.create().
 * @param tmpDir directory under which data of dfs, zookeeper, mr, ... will be stored
 * @param clearData can data be cleared (at startup or shutdown), use true unless you need the data from a previous
 *                  run//from  w w  w.j  av a 2  s.com
 */
public static HBaseTestingUtility create(Configuration conf, File tmpDir, boolean clearData)
        throws IOException {

    // This location will be used for dfs, zookeeper, ...
    conf.set(TEST_DIR_KEY, createSubDir(tmpDir, "hbase-test-util"));

    // This property is picked up by our fork of MiniMRCluster (the default implementation was hardcoded
    // to use build/test/mapred/local)
    System.setProperty("mapred.local.dir", createSubDir(tmpDir, "mapred-local"));

    conf.set("mapred.local.dir", createSubDir(tmpDir, "mapred-local"));

    // Properties used for MiniMRCluster
    conf.set("hadoop.log.dir", createSubDir(tmpDir, "hadoop-logs"));
    conf.set("hadoop.tmp.dir", createSubDir(tmpDir, "mapred-output"));

    conf.set("mapred.system.dir", "/tmp/hadoop/mapred/system");
    conf.set("mapreduce.jobtracker.staging.root.dir", "/tmp/hadoop/mapred/staging");

    // Only use one MR child VM, should be lighter on developer machines
    conf.set("mapred.tasktracker.map.tasks.maximum", "1");

    // Force default port numbers
    conf.set("hbase.master.info.port", "60010");
    conf.set("hbase.regionserver.info.port", "60030");

    // Allow more clients to connect concurrently (HBase default is 10)
    conf.set("hbase.regionserver.handler.count", "30");

    // Allow more clients to connect concurrently to hdfs (default is 3)
    conf.set("dfs.datanode.handler.count", "6");

    // Generic performance related settings
    conf.set("io.file.buffer.size", "65536");
    conf.set("hbase.hregion.memstore.flush.size", "268435456");

    // Disable the automatic closing of Hadoop FileSystem objects by its shutdown hook.
    // Otherwise, when stopping 'launch-test-lily' (LilyLauncher), the shutdown hook closes the filesystem
    // before HBase had the opportunity to flush its data. This then leads to (possibly long) recoveries
    // on the next startup (and even then, I've seen data loss, maybe sync is not active for the mini cluster?).
    conf.set("fs.automatic.close", "false");

    // Replication parameters needed for the SEP
    conf.set("hbase.replication", "true");
    conf.setFloat("replication.source.ratio", 1.0f);
    conf.set("replication.source.nb.capacity", "200");
    conf.set("replication.replicationsource.implementation", "com.ngdata.sep.impl.SepReplicationSource");

    // make replication react a little quicker
    conf.setLong("replication.source.sleepforretries", 200);

    // make retries in ZooKeeper a little quicker
    // This was added with CDH 4.2, where on shutdown HBase's snapshot manager closed a zookeeper
    // connection which later on was still used by another component, which then got into a retry loop,
    // leading to a slow shutdown.
    conf.setInt("zookeeper.recovery.retry.intervalmill", 100);

    return new HBaseTestingUtility(conf, clearData);
}

From source file:org.mrgeo.data.tile.TiledInputFormatContext.java

License:Apache License

public void save(final Configuration conf) {
    conf.set(INPUTS, StringUtils.join(inputs, ","));
    conf.setInt(ZOOM_LEVEL, zoomLevel);//from  w ww  .  java  2s  .co  m
    conf.setInt(TILE_SIZE, tileSize);
    if (bounds != null) {
        conf.set(BOUNDS, bounds.toDelimitedString());
    }
    conf.setBoolean(INCLUDE_EMPTY_TILES, includeEmptyTiles);
    if (includeEmptyTiles) {
        conf.setFloat(FILL_VALUE, (float) fillValue);
    }
    conf.setInt(PROVIDER_PROPERTY_COUNT,
            ((inputProviderProperties == null) ? 0 : inputProviderProperties.size()));
    if (inputProviderProperties != null) {
        Set<String> keySet = inputProviderProperties.stringPropertyNames();
        String[] keys = new String[keySet.size()];
        keySet.toArray(keys);
        for (int i = 0; i < keys.length; i++) {
            conf.set(PROVIDER_PROPERTY_KEY + i, keys[i]);
            String v = inputProviderProperties.getProperty(keys[i]);
            if (v != null) {
                conf.set(PROVIDER_PROPERTY_VALUE + i, v);
            }
        }
    }
}

From source file:org.mrgeo.ingest.IngestImageDriver.java

License:Apache License

private static boolean runJob(final String[] inputs, final String output, final Configuration config,
        final TiledInputFormatProvider<RasterWritable> formatProvider, final Bounds bounds, final Number nodata,
        final boolean categorical, final int zoomlevel, final int tilesize, final int bands,
        final Map<String, String> tags, final String protectionLevel, final Properties providerProperties)
        throws Exception {

    Configuration conf = config;
    if (conf == null) {
        conf = HadoopUtils.createConfiguration();
    }/*from w  w  w. ja  v  a 2  s.c  o m*/

    final Job job = new Job(conf, "IngestImage");
    conf = job.getConfiguration();

    HadoopUtils.setJar(job, IngestImageDriver.class);

    job.setMapperClass(IngestImageMapper.class);
    job.setReducerClass(IngestImageReducer.class);

    for (final String input : inputs) {
        // using FileInputFormat for convenience. It creates "mapred.input.dir" in the config
        FileInputFormat.addInputPath(job, new Path(input));
    }

    formatProvider.setupJob(job, providerProperties);

    // getInputFormat takes an image name, but we don't need it here, so we'll just send an empty string
    job.setInputFormatClass(formatProvider.getInputFormat("").getClass());

    final AdHocDataProvider metadataProvider = DataProviderFactory.createAdHocDataProvider(providerProperties);
    final AdHocDataProvider statsProvider = DataProviderFactory.createAdHocDataProvider(providerProperties);

    // get the ad hoc providers set up for map/reduce
    metadataProvider.setupJob(job);
    statsProvider.setupJob(job);

    conf.set("metadata.provider", metadataProvider.getResourceName());
    conf.set("stats.provider", statsProvider.getResourceName());
    conf.setInt("zoomlevel", zoomlevel);
    conf.setInt("tilesize", tilesize);
    conf.setFloat("nodata", nodata.floatValue());
    conf.setInt("bands", bands);

    if (categorical) {
        conf.set("classification", Classification.Categorical.name());
    } else {
        conf.set("classification", Classification.Continuous.name());
    }

    String useProtectionLevel = protectionLevel;
    {
        MrsImageDataProvider dp = DataProviderFactory.getMrsImageDataProvider(output, AccessMode.OVERWRITE,
                conf);
        useProtectionLevel = ProtectionLevelUtils.getAndValidateProtectionLevel(dp, protectionLevel);
    }

    MrsImageOutputFormatProvider provider = MrsImageDataProvider.setupMrsPyramidOutputFormat(job, output,
            bounds, zoomlevel, tilesize, useProtectionLevel, providerProperties);

    try {
        job.submit();

        final boolean success = job.waitForCompletion(true);
        if (success) {
            provider.teardown(job);

            ImageStats[] stats = ImageStats.readStats(statsProvider);
            aggregateMetadata(metadataProvider, provider, output, stats, tags, useProtectionLevel,
                    providerProperties);
        }

        return success;
    } catch (final ClassNotFoundException e) {
        throw new IOException("Error running ingest map/reduce", e);
    } catch (final InterruptedException e) {
        throw new IOException("Error running ingest map/reduce", e);
    } finally {
        statsProvider.delete();
        metadataProvider.delete();
    }
}

From source file:org.mrgeo.ingest.IngestImageMapperTest.java

License:Apache License

@Test
@Category(UnitTest.class)
public void map() throws Exception {
    Job job = new Job();
    Configuration config = job.getConfiguration();

    // only need to set the metadata params we'll use in the splitter
    MrsImagePyramidMetadata metadata = new MrsImagePyramidMetadata();
    metadata.setTilesize(512);/*from w  ww .jav a2  s .co m*/

    try {
        HadoopUtils.setMetadata(config, metadata);
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail("Catestrophic exception");
    }

    final AdHocDataProvider metadataProvider = DataProviderFactory
            .createAdHocDataProvider(HadoopUtils.createConfiguration());
    metadataProvider.setupJob(job);
    config.set("metadata.provider", metadataProvider.getResourceName());
    config.setInt("zoomlevel", metadata.getMaxZoomLevel());
    config.setInt("tilesize", metadata.getTilesize());
    config.setFloat("nodata", (float) metadata.getDefaultValueShort(0));
    config.setInt("bands", metadata.getBands());
    config.set("classification", Classification.Continuous.name());

    TileIdWritable key = new TileIdWritable(100);
    RasterWritable value;
    try {
        value = RasterWritable.toWritable(raster);

        MapDriver<TileIdWritable, RasterWritable, TileIdWritable, RasterWritable> driver = new MapDriver<TileIdWritable, RasterWritable, TileIdWritable, RasterWritable>()
                .withConfiguration(config).withMapper(new IngestImageMapper()).withInputKey(key)
                .withInputValue(value);

        java.util.List<Pair<TileIdWritable, RasterWritable>> results = driver.run();

        // Test the results
        Assert.assertEquals("Bad number of maps returned", 1, results.size());

        java.util.ListIterator<Pair<TileIdWritable, RasterWritable>> iter = results.listIterator();

        Assert.assertTrue("Map iterator doesn't have a next item", iter.hasNext());

        Pair<TileIdWritable, RasterWritable> item = iter.next();
        Assert.assertEquals("Input tileid doesn't match output", key.get(), item.getFirst().get());
        TestUtils.compareRasters(RasterWritable.toRaster(value), RasterWritable.toRaster(item.getSecond()));

        // test the counters
        Assert.assertEquals("Tile count (counter) incorrect.", 1,
                driver.getCounters().findCounter("Ingest Mapper", "Mapper Tiles Processed").getValue());
        metadataProvider.delete();
    } catch (Exception e) {
        Assert.fail("Catastrophic Exception" + e);
    }

}

From source file:org.mrgeo.mapreduce.FillRasterDriver.java

License:Apache License

public static void run(final Job job, final MrsImagePyramid input, final String output, final double value,
        final String fillType, final Bounds bounds, final Progress progress, final JobListener jobListener,
        final String protectionLevel, final Properties providerProperties)
        throws IOException, JobFailedException, JobCancelledException {
    // create a new unique job name
    final String now = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss").format(new Date());

    final String jobName = "FillRaster_" + now + "_" + UUID.randomUUID().toString();
    job.setJobName(jobName);/*from w w  w  .ja v a 2  s  .  c  o  m*/

    MapReduceUtils.setupTiledJob(job);

    final Configuration conf = job.getConfiguration();

    HadoopUtils.setJar(job, FillRasterDriver.class);

    final MrsImagePyramidMetadata metadata = input.getMetadata();

    final int zoomlevel = metadata.getMaxZoomLevel();
    final int tilesize = metadata.getTilesize();
    final double nodata = metadata.getDefaultValue(0);

    // set some constants
    conf.set(FILL_TYPE, fillType);
    conf.setFloat(FILL_VALUE, (float) value);
    conf.set(BOUNDS, bounds.toDelimitedString());
    conf.setFloat(NODATA, (float) nodata);
    conf.setInt(ZOOM, zoomlevel);

    MrsImageDataProvider.setupMrsImagePyramidAllTilesSingleInputFormat(job, metadata.getPyramid(), zoomlevel,
            tilesize, bounds, value, providerProperties);

    job.setMapperClass(FillRasterMapper.class);

    job.setOutputKeyClass(TileIdWritable.class);
    job.setOutputValueClass(RasterWritable.class);

    HadoopUtils.setMetadata(job, metadata);

    final AdHocDataProvider statsProvider = DataProviderFactory.createAdHocDataProvider(providerProperties);
    // get the ad hoc provider set up for map/reduce
    statsProvider.setupJob(job);
    conf.set(STATS_PROVIDER, statsProvider.getResourceName());
    MrsImageOutputFormatProvider ofProvider = MrsImageDataProvider.setupMrsPyramidOutputFormat(job, output,
            bounds, zoomlevel, tilesize, metadata.getTileType(), metadata.getBands(), protectionLevel,
            providerProperties);

    if (MapReduceUtils.runJob(job, progress, jobListener)) {
        ofProvider.teardown(job);

        // save the metadata
        MrsImagePyramid.calculateMetadataWithProvider(output, zoomlevel, ofProvider.getImageProvider(),
                statsProvider, metadata.getDefaultValues(), bounds, conf, protectionLevel, providerProperties);
    }
    statsProvider.delete();
}

From source file:org.mrgeo.mapreduce.formats.EmptyTileInputFormat.java

License:Apache License

public static void setRasterInfo(final Job job, final int tilesize, final int bands, final int datatype,
        final double nodata) {
    job.setInputFormatClass(EmptyTileInputFormat.class);

    final Configuration conf = job.getConfiguration();

    conf.setInt(TILESIZE, tilesize);/*from w  ww .j a v a2  s.  c  o m*/
    conf.setInt(BANDS, bands);
    conf.setInt(DATATYPE, datatype);
    conf.setFloat(NODATA, (float) nodata); // there is no setDouble!?
}

From source file:org.qcri.pca.NormalizeJob.java

public void run(Configuration conf, Path matrixInputPath, String meanSpanFileName, Path matrixOutputPath,
        double sampleRate) throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MEANSPANOPTION, meanSpanFileName);
    conf.setFloat(SAMPLERATE, (float) sampleRate);
    Job job = new Job(conf);
    job.setJobName("Normalize");
    job.setJarByClass(NormalizeJob.class);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(NormalizeMapper.class);
    job.setNumReduceTasks(0);/*from ww w . jav a2  s  .c  o  m*/
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:root.hap.availability.HierarchicalAvailabilityDriver.java

License:Apache License

/**
 * This method allows {@link HierarchicalAvailabilityDriver} to act as a 
 * {@link ToolRunner} and interface properly with any Driver.
 * /*ww w . j  av a  2s  . c  o  m*/
 * @param args Configuration arguments
 * @return Exit status
 * @see ToolRunner
 */
@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();

    addArguments();

    if (parseArguments(args) == null) {
        return -1;
    }

    initArguments();

    conf.setInt("matrixN", Integer.valueOf(matrixN));
    conf.setFloat("lambda", Float.valueOf(lambda));
    conf.setInt("numLevels", Integer.valueOf(numLevels));
    conf.setInt("numIteration", Integer.valueOf(numIteration));

    Job job = new Job(conf, "HierarchicalAvailability");
    job.setJarByClass(HierarchicalAvailabilityDriver.class);

    job.setMapperClass(AvailabilityMapper.class);
    job.setReducerClass(AvailabilityReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);

    FileInputFormat.addInputPath(job, new Path(inputDirectory));
    FileOutputFormat.setOutputPath(job, new Path(outputDirectory));

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:root.hap.responsibility.HierarchicalResponsibilityDriver.java

License:Apache License

/**
 * This method allows {@link HierarchicalResponsibilityDriver} to act as a 
 * {@link ToolRunner} and interface properly with any Driver.
 * //w ww .j  a v a2 s . c o  m
 * @param args Configuration arguments
 * @return Exit status
 * @see ToolRunner
 */
@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();

    addArguments();

    if (parseArguments(args) == null) {
        return -1;
    }

    initArguments();

    conf.setInt("matrixN", Integer.valueOf(matrixN));
    conf.setFloat("lambda", Float.valueOf(lambda));
    conf.setInt("numLevels", Integer.valueOf(numLevels));
    conf.setInt("numIteration", Integer.valueOf(numIteration));

    Job job = new Job(conf, "HierarchicalResponsibility");
    job.setJarByClass(HierarchicalResponsibilityDriver.class);

    job.setMapperClass(ResponsibilityMapper.class);
    job.setReducerClass(ResponsibilityReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(VectorWritable.class);

    FileInputFormat.addInputPath(job, new Path(inputDirectory));
    FileOutputFormat.setOutputPath(job, new Path(outputDirectory));

    return job.waitForCompletion(true) ? 0 : 1;

}