List of usage examples for org.apache.hadoop.conf Configuration setFloat
public void setFloat(String name, float value)
name
property to a float
. From source file:org.clueweb.clueweb12.app.RMRetrieval.java
License:Apache License
/** * Runs this tool./*from w ww .j av a 2s .co m*/ */ @SuppressWarnings({ "static-access", "deprecation" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("input path (pfor format expected, add * to retrieve files)") .create(DOCVECTOR_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("queries").create(QUERIES_OPTION)); options.addOption( OptionBuilder.withArgName("float").hasArg().withDescription("smoothing").create(SMOOTHING)); options.addOption(OptionBuilder.withArgName("int").hasArg().withDescription("topk").create(TOPK)); options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg() .withDescription("preprocessing").create(PREPROCESSING)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("rmmodel file").create(RMMODEL)); options.addOption( OptionBuilder.withArgName("float").hasArg().withDescription("queryLambda").create(QUERY_LAMBDA)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(DOCVECTOR_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(QUERIES_OPTION) || !cmdline.hasOption(SMOOTHING) || !cmdline.hasOption(TOPK) || !cmdline.hasOption(QUERY_LAMBDA) || !cmdline.hasOption(PREPROCESSING)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String docvector = cmdline.getOptionValue(DOCVECTOR_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION); String queries = cmdline.getOptionValue(QUERIES_OPTION); String smoothing = cmdline.getOptionValue(SMOOTHING); String topk = cmdline.getOptionValue(TOPK); String preprocessing = cmdline.getOptionValue(PREPROCESSING); String rmmodel = cmdline.getOptionValue(RMMODEL); String queryLambda = cmdline.getOptionValue(QUERY_LAMBDA); LOG.info("Tool name: " + RMRetrieval.class.getSimpleName()); LOG.info(" - docvector: " + docvector); LOG.info(" - output: " + output); LOG.info(" - dictionary: " + dictionary); LOG.info(" - queries: " + queries); LOG.info(" - smoothing: " + smoothing); LOG.info(" - topk: " + topk); LOG.info(" - preprocessing: " + preprocessing); LOG.info(" - rmmodel: " + rmmodel); LOG.info(" - queryLambda: " + queryLambda); Configuration conf = getConf(); conf.set(DICTIONARY_OPTION, dictionary); conf.set(QUERIES_OPTION, queries); conf.setFloat(SMOOTHING, Float.parseFloat(smoothing)); conf.setInt(TOPK, Integer.parseInt(topk)); conf.set(PREPROCESSING, preprocessing); conf.set(RMMODEL, rmmodel); conf.setFloat(QUERY_LAMBDA, Float.parseFloat(queryLambda)); conf.set("mapreduce.map.memory.mb", "10048"); conf.set("mapreduce.map.java.opts", "-Xmx10048m"); conf.set("mapreduce.reduce.memory.mb", "10048"); conf.set("mapreduce.reduce.java.opts", "-Xmx10048m"); conf.set("mapred.task.timeout", "6000000");// default is 600000 FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(output))) fs.delete(new Path(output)); Job job = new Job(conf, RMRetrieval.class.getSimpleName() + ":" + docvector); job.setJarByClass(RMRetrieval.class); FileInputFormat.setInputPaths(job, docvector); FileOutputFormat.setOutputPath(job, new Path(output)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(PairOfIntString.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(MyMapper.class); job.setPartitionerClass(MyPartitioner.class); job.setReducerClass(MyReducer.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:org.lilyproject.hadooptestfw.HBaseTestingUtilityFactory.java
License:Apache License
/** * Creates an HBaseTestingUtility with settings applied such that everything will be stored below the * supplied directory and makes (to some extent) use of standard port numbers. * * @param conf HBase conf to use, as created by HBaseConfiguration.create(). * @param tmpDir directory under which data of dfs, zookeeper, mr, ... will be stored * @param clearData can data be cleared (at startup or shutdown), use true unless you need the data from a previous * run//from w w w.j av a 2 s.com */ public static HBaseTestingUtility create(Configuration conf, File tmpDir, boolean clearData) throws IOException { // This location will be used for dfs, zookeeper, ... conf.set(TEST_DIR_KEY, createSubDir(tmpDir, "hbase-test-util")); // This property is picked up by our fork of MiniMRCluster (the default implementation was hardcoded // to use build/test/mapred/local) System.setProperty("mapred.local.dir", createSubDir(tmpDir, "mapred-local")); conf.set("mapred.local.dir", createSubDir(tmpDir, "mapred-local")); // Properties used for MiniMRCluster conf.set("hadoop.log.dir", createSubDir(tmpDir, "hadoop-logs")); conf.set("hadoop.tmp.dir", createSubDir(tmpDir, "mapred-output")); conf.set("mapred.system.dir", "/tmp/hadoop/mapred/system"); conf.set("mapreduce.jobtracker.staging.root.dir", "/tmp/hadoop/mapred/staging"); // Only use one MR child VM, should be lighter on developer machines conf.set("mapred.tasktracker.map.tasks.maximum", "1"); // Force default port numbers conf.set("hbase.master.info.port", "60010"); conf.set("hbase.regionserver.info.port", "60030"); // Allow more clients to connect concurrently (HBase default is 10) conf.set("hbase.regionserver.handler.count", "30"); // Allow more clients to connect concurrently to hdfs (default is 3) conf.set("dfs.datanode.handler.count", "6"); // Generic performance related settings conf.set("io.file.buffer.size", "65536"); conf.set("hbase.hregion.memstore.flush.size", "268435456"); // Disable the automatic closing of Hadoop FileSystem objects by its shutdown hook. // Otherwise, when stopping 'launch-test-lily' (LilyLauncher), the shutdown hook closes the filesystem // before HBase had the opportunity to flush its data. This then leads to (possibly long) recoveries // on the next startup (and even then, I've seen data loss, maybe sync is not active for the mini cluster?). conf.set("fs.automatic.close", "false"); // Replication parameters needed for the SEP conf.set("hbase.replication", "true"); conf.setFloat("replication.source.ratio", 1.0f); conf.set("replication.source.nb.capacity", "200"); conf.set("replication.replicationsource.implementation", "com.ngdata.sep.impl.SepReplicationSource"); // make replication react a little quicker conf.setLong("replication.source.sleepforretries", 200); // make retries in ZooKeeper a little quicker // This was added with CDH 4.2, where on shutdown HBase's snapshot manager closed a zookeeper // connection which later on was still used by another component, which then got into a retry loop, // leading to a slow shutdown. conf.setInt("zookeeper.recovery.retry.intervalmill", 100); return new HBaseTestingUtility(conf, clearData); }
From source file:org.mrgeo.data.tile.TiledInputFormatContext.java
License:Apache License
public void save(final Configuration conf) { conf.set(INPUTS, StringUtils.join(inputs, ",")); conf.setInt(ZOOM_LEVEL, zoomLevel);//from w ww . java 2s .co m conf.setInt(TILE_SIZE, tileSize); if (bounds != null) { conf.set(BOUNDS, bounds.toDelimitedString()); } conf.setBoolean(INCLUDE_EMPTY_TILES, includeEmptyTiles); if (includeEmptyTiles) { conf.setFloat(FILL_VALUE, (float) fillValue); } conf.setInt(PROVIDER_PROPERTY_COUNT, ((inputProviderProperties == null) ? 0 : inputProviderProperties.size())); if (inputProviderProperties != null) { Set<String> keySet = inputProviderProperties.stringPropertyNames(); String[] keys = new String[keySet.size()]; keySet.toArray(keys); for (int i = 0; i < keys.length; i++) { conf.set(PROVIDER_PROPERTY_KEY + i, keys[i]); String v = inputProviderProperties.getProperty(keys[i]); if (v != null) { conf.set(PROVIDER_PROPERTY_VALUE + i, v); } } } }
From source file:org.mrgeo.ingest.IngestImageDriver.java
License:Apache License
private static boolean runJob(final String[] inputs, final String output, final Configuration config, final TiledInputFormatProvider<RasterWritable> formatProvider, final Bounds bounds, final Number nodata, final boolean categorical, final int zoomlevel, final int tilesize, final int bands, final Map<String, String> tags, final String protectionLevel, final Properties providerProperties) throws Exception { Configuration conf = config; if (conf == null) { conf = HadoopUtils.createConfiguration(); }/*from w w w. ja v a 2 s.c o m*/ final Job job = new Job(conf, "IngestImage"); conf = job.getConfiguration(); HadoopUtils.setJar(job, IngestImageDriver.class); job.setMapperClass(IngestImageMapper.class); job.setReducerClass(IngestImageReducer.class); for (final String input : inputs) { // using FileInputFormat for convenience. It creates "mapred.input.dir" in the config FileInputFormat.addInputPath(job, new Path(input)); } formatProvider.setupJob(job, providerProperties); // getInputFormat takes an image name, but we don't need it here, so we'll just send an empty string job.setInputFormatClass(formatProvider.getInputFormat("").getClass()); final AdHocDataProvider metadataProvider = DataProviderFactory.createAdHocDataProvider(providerProperties); final AdHocDataProvider statsProvider = DataProviderFactory.createAdHocDataProvider(providerProperties); // get the ad hoc providers set up for map/reduce metadataProvider.setupJob(job); statsProvider.setupJob(job); conf.set("metadata.provider", metadataProvider.getResourceName()); conf.set("stats.provider", statsProvider.getResourceName()); conf.setInt("zoomlevel", zoomlevel); conf.setInt("tilesize", tilesize); conf.setFloat("nodata", nodata.floatValue()); conf.setInt("bands", bands); if (categorical) { conf.set("classification", Classification.Categorical.name()); } else { conf.set("classification", Classification.Continuous.name()); } String useProtectionLevel = protectionLevel; { MrsImageDataProvider dp = DataProviderFactory.getMrsImageDataProvider(output, AccessMode.OVERWRITE, conf); useProtectionLevel = ProtectionLevelUtils.getAndValidateProtectionLevel(dp, protectionLevel); } MrsImageOutputFormatProvider provider = MrsImageDataProvider.setupMrsPyramidOutputFormat(job, output, bounds, zoomlevel, tilesize, useProtectionLevel, providerProperties); try { job.submit(); final boolean success = job.waitForCompletion(true); if (success) { provider.teardown(job); ImageStats[] stats = ImageStats.readStats(statsProvider); aggregateMetadata(metadataProvider, provider, output, stats, tags, useProtectionLevel, providerProperties); } return success; } catch (final ClassNotFoundException e) { throw new IOException("Error running ingest map/reduce", e); } catch (final InterruptedException e) { throw new IOException("Error running ingest map/reduce", e); } finally { statsProvider.delete(); metadataProvider.delete(); } }
From source file:org.mrgeo.ingest.IngestImageMapperTest.java
License:Apache License
@Test @Category(UnitTest.class) public void map() throws Exception { Job job = new Job(); Configuration config = job.getConfiguration(); // only need to set the metadata params we'll use in the splitter MrsImagePyramidMetadata metadata = new MrsImagePyramidMetadata(); metadata.setTilesize(512);/*from w ww .jav a2 s .co m*/ try { HadoopUtils.setMetadata(config, metadata); } catch (IOException e) { e.printStackTrace(); Assert.fail("Catestrophic exception"); } final AdHocDataProvider metadataProvider = DataProviderFactory .createAdHocDataProvider(HadoopUtils.createConfiguration()); metadataProvider.setupJob(job); config.set("metadata.provider", metadataProvider.getResourceName()); config.setInt("zoomlevel", metadata.getMaxZoomLevel()); config.setInt("tilesize", metadata.getTilesize()); config.setFloat("nodata", (float) metadata.getDefaultValueShort(0)); config.setInt("bands", metadata.getBands()); config.set("classification", Classification.Continuous.name()); TileIdWritable key = new TileIdWritable(100); RasterWritable value; try { value = RasterWritable.toWritable(raster); MapDriver<TileIdWritable, RasterWritable, TileIdWritable, RasterWritable> driver = new MapDriver<TileIdWritable, RasterWritable, TileIdWritable, RasterWritable>() .withConfiguration(config).withMapper(new IngestImageMapper()).withInputKey(key) .withInputValue(value); java.util.List<Pair<TileIdWritable, RasterWritable>> results = driver.run(); // Test the results Assert.assertEquals("Bad number of maps returned", 1, results.size()); java.util.ListIterator<Pair<TileIdWritable, RasterWritable>> iter = results.listIterator(); Assert.assertTrue("Map iterator doesn't have a next item", iter.hasNext()); Pair<TileIdWritable, RasterWritable> item = iter.next(); Assert.assertEquals("Input tileid doesn't match output", key.get(), item.getFirst().get()); TestUtils.compareRasters(RasterWritable.toRaster(value), RasterWritable.toRaster(item.getSecond())); // test the counters Assert.assertEquals("Tile count (counter) incorrect.", 1, driver.getCounters().findCounter("Ingest Mapper", "Mapper Tiles Processed").getValue()); metadataProvider.delete(); } catch (Exception e) { Assert.fail("Catastrophic Exception" + e); } }
From source file:org.mrgeo.mapreduce.FillRasterDriver.java
License:Apache License
public static void run(final Job job, final MrsImagePyramid input, final String output, final double value, final String fillType, final Bounds bounds, final Progress progress, final JobListener jobListener, final String protectionLevel, final Properties providerProperties) throws IOException, JobFailedException, JobCancelledException { // create a new unique job name final String now = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss").format(new Date()); final String jobName = "FillRaster_" + now + "_" + UUID.randomUUID().toString(); job.setJobName(jobName);/*from w w w .ja v a 2 s . c o m*/ MapReduceUtils.setupTiledJob(job); final Configuration conf = job.getConfiguration(); HadoopUtils.setJar(job, FillRasterDriver.class); final MrsImagePyramidMetadata metadata = input.getMetadata(); final int zoomlevel = metadata.getMaxZoomLevel(); final int tilesize = metadata.getTilesize(); final double nodata = metadata.getDefaultValue(0); // set some constants conf.set(FILL_TYPE, fillType); conf.setFloat(FILL_VALUE, (float) value); conf.set(BOUNDS, bounds.toDelimitedString()); conf.setFloat(NODATA, (float) nodata); conf.setInt(ZOOM, zoomlevel); MrsImageDataProvider.setupMrsImagePyramidAllTilesSingleInputFormat(job, metadata.getPyramid(), zoomlevel, tilesize, bounds, value, providerProperties); job.setMapperClass(FillRasterMapper.class); job.setOutputKeyClass(TileIdWritable.class); job.setOutputValueClass(RasterWritable.class); HadoopUtils.setMetadata(job, metadata); final AdHocDataProvider statsProvider = DataProviderFactory.createAdHocDataProvider(providerProperties); // get the ad hoc provider set up for map/reduce statsProvider.setupJob(job); conf.set(STATS_PROVIDER, statsProvider.getResourceName()); MrsImageOutputFormatProvider ofProvider = MrsImageDataProvider.setupMrsPyramidOutputFormat(job, output, bounds, zoomlevel, tilesize, metadata.getTileType(), metadata.getBands(), protectionLevel, providerProperties); if (MapReduceUtils.runJob(job, progress, jobListener)) { ofProvider.teardown(job); // save the metadata MrsImagePyramid.calculateMetadataWithProvider(output, zoomlevel, ofProvider.getImageProvider(), statsProvider, metadata.getDefaultValues(), bounds, conf, protectionLevel, providerProperties); } statsProvider.delete(); }
From source file:org.mrgeo.mapreduce.formats.EmptyTileInputFormat.java
License:Apache License
public static void setRasterInfo(final Job job, final int tilesize, final int bands, final int datatype, final double nodata) { job.setInputFormatClass(EmptyTileInputFormat.class); final Configuration conf = job.getConfiguration(); conf.setInt(TILESIZE, tilesize);/*from w ww .j a v a2 s. c o m*/ conf.setInt(BANDS, bands); conf.setInt(DATATYPE, datatype); conf.setFloat(NODATA, (float) nodata); // there is no setDouble!? }
From source file:org.qcri.pca.NormalizeJob.java
public void run(Configuration conf, Path matrixInputPath, String meanSpanFileName, Path matrixOutputPath, double sampleRate) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MEANSPANOPTION, meanSpanFileName); conf.setFloat(SAMPLERATE, (float) sampleRate); Job job = new Job(conf); job.setJobName("Normalize"); job.setJarByClass(NormalizeJob.class); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(NormalizeMapper.class); job.setNumReduceTasks(0);/*from ww w . jav a2 s .c o m*/ job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:root.hap.availability.HierarchicalAvailabilityDriver.java
License:Apache License
/** * This method allows {@link HierarchicalAvailabilityDriver} to act as a * {@link ToolRunner} and interface properly with any Driver. * /*ww w . j av a 2s . c o m*/ * @param args Configuration arguments * @return Exit status * @see ToolRunner */ @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); addArguments(); if (parseArguments(args) == null) { return -1; } initArguments(); conf.setInt("matrixN", Integer.valueOf(matrixN)); conf.setFloat("lambda", Float.valueOf(lambda)); conf.setInt("numLevels", Integer.valueOf(numLevels)); conf.setInt("numIteration", Integer.valueOf(numIteration)); Job job = new Job(conf, "HierarchicalAvailability"); job.setJarByClass(HierarchicalAvailabilityDriver.class); job.setMapperClass(AvailabilityMapper.class); job.setReducerClass(AvailabilityReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); FileInputFormat.addInputPath(job, new Path(inputDirectory)); FileOutputFormat.setOutputPath(job, new Path(outputDirectory)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:root.hap.responsibility.HierarchicalResponsibilityDriver.java
License:Apache License
/** * This method allows {@link HierarchicalResponsibilityDriver} to act as a * {@link ToolRunner} and interface properly with any Driver. * //w ww .j a v a2 s . c o m * @param args Configuration arguments * @return Exit status * @see ToolRunner */ @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); addArguments(); if (parseArguments(args) == null) { return -1; } initArguments(); conf.setInt("matrixN", Integer.valueOf(matrixN)); conf.setFloat("lambda", Float.valueOf(lambda)); conf.setInt("numLevels", Integer.valueOf(numLevels)); conf.setInt("numIteration", Integer.valueOf(numIteration)); Job job = new Job(conf, "HierarchicalResponsibility"); job.setJarByClass(HierarchicalResponsibilityDriver.class); job.setMapperClass(ResponsibilityMapper.class); job.setReducerClass(ResponsibilityReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); FileInputFormat.addInputPath(job, new Path(inputDirectory)); FileOutputFormat.setOutputPath(job, new Path(outputDirectory)); return job.waitForCompletion(true) ? 0 : 1; }