List of usage examples for org.apache.hadoop.conf Configuration setLong
public void setLong(String name, long value)
name
property to a long
. From source file:ParascaleFsTestCase.java
License:Apache License
/** * Creates a new Hadoop Configuration object. * * @return a new Hadoop configuration object * * @see Configuration/* w w w . ja va 2 s . co m*/ */ protected Configuration getConf() { final Configuration conf = new Configuration(); if (setDefaultBlockSize) { conf.setLong(RawParascaleFileSystem.PS_DEFAULT_BLOCKSIZE, defaultBlockSize); } if (setDefaultReplication) { conf.setLong(RawParascaleFileSystem.PS_DEFAULT_REPLICATION, defaultReplication); } if (setMountPoint) { conf.set(RawParascaleFileSystem.PS_MOUNT_POINT, String.format("%s/%s", getTempDir(), mountPoint)); } if (setDefaultFsName) { conf.set(FS_DEFAULT_NAME, String.format("%s%s@%s", fsScheme, virtualFs, controlNode)); } return conf; }
From source file:PairsPMI_M.java
License:Apache License
/** * Runs this tool./*from w w w.ja va 2 s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } // First MapReduce Job String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - tmp path: " + outputPath + "/tmp"); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(PairsPMI_M.class.getSimpleName()); job.setJarByClass(PairsPMI_M.class); // Delete the tmp directory if it exists already Path tmpDir = new Path("tmp_wj"); FileSystem.get(getConf()).delete(tmpDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path("tmp_wj")); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); double time1 = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + time1 + " seconds"); numRecords = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS") .getValue(); /* * Second MapReduce Job */ LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName()); LOG.info("second stage of MapReduce"); LOG.info(" - input from tmp path: " + outputPath + "/tmp_wj"); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); // set the global variable Configuration conf = getConf(); conf.setLong("numRec", numRecords); job = Job.getInstance(getConf()); job.setJobName(PairsPMI_M.class.getSimpleName()); job.setJarByClass(PairsPMI_M.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path("tmp_wj/part*")); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); // job.setOutputKeyClass(PairOfStrings.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapperSecond.class); // job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducerSecond.class); job.setPartitionerClass(MyPartitioner.class); startTime = System.currentTimeMillis(); job.waitForCompletion(true); double time2 = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Second job finished in " + time2 + " seconds"); System.out.println("Total time: " + (time1 + time2) + " seconds"); return 0; }
From source file:at.illecker.hama.hybrid.examples.kmeans.KMeansHybridBSP.java
License:Apache License
public static void main(String[] args) throws Exception { // Defaults//from www .j a v a 2s.c o m int numBspTask = 1; int numGpuBspTask = 1; int blockSize = BLOCK_SIZE; int gridSize = GRID_SIZE; long n = 10; // input vectors int k = 3; // start vectors int vectorDimension = 2; int maxIteration = 10; boolean useTestExampleInput = false; boolean isDebugging = false; boolean timeMeasurement = false; int GPUPercentage = 80; Configuration conf = new HamaConfiguration(); FileSystem fs = FileSystem.get(conf); // Set numBspTask to maxTasks // BSPJobClient jobClient = new BSPJobClient(conf); // ClusterStatus cluster = jobClient.getClusterStatus(true); // numBspTask = cluster.getMaxTasks(); if (args.length > 0) { if (args.length == 12) { numBspTask = Integer.parseInt(args[0]); numGpuBspTask = Integer.parseInt(args[1]); blockSize = Integer.parseInt(args[2]); gridSize = Integer.parseInt(args[3]); n = Long.parseLong(args[4]); k = Integer.parseInt(args[5]); vectorDimension = Integer.parseInt(args[6]); maxIteration = Integer.parseInt(args[7]); useTestExampleInput = Boolean.parseBoolean(args[8]); GPUPercentage = Integer.parseInt(args[9]); isDebugging = Boolean.parseBoolean(args[10]); timeMeasurement = Boolean.parseBoolean(args[11]); } else { System.out.println("Wrong argument size!"); System.out.println(" Argument1=numBspTask"); System.out.println(" Argument2=numGpuBspTask"); System.out.println(" Argument3=blockSize"); System.out.println(" Argument4=gridSize"); System.out.println(" Argument5=n | Number of input vectors (" + n + ")"); System.out.println(" Argument6=k | Number of start vectors (" + k + ")"); System.out.println( " Argument7=vectorDimension | Dimension of each vector (" + vectorDimension + ")"); System.out.println( " Argument8=maxIterations | Number of maximal iterations (" + maxIteration + ")"); System.out.println(" Argument9=testExample | Use testExample input (true|false=default)"); System.out.println(" Argument10=GPUPercentage (percentage of input)"); System.out.println(" Argument11=isDebugging (true|false=defaul)"); System.out.println(" Argument12=timeMeasurement (true|false=defaul)"); return; } } // Set config variables conf.setBoolean(CONF_DEBUG, isDebugging); conf.setBoolean("hama.pipes.logging", false); conf.setBoolean(CONF_TIME, timeMeasurement); // Set CPU tasks conf.setInt("bsp.peers.num", numBspTask); // Set GPU tasks conf.setInt("bsp.peers.gpu.num", numGpuBspTask); // Set GPU blockSize and gridSize conf.set(CONF_BLOCKSIZE, "" + blockSize); conf.set(CONF_GRIDSIZE, "" + gridSize); // Set maxIterations for KMeans conf.setInt(CONF_MAX_ITERATIONS, maxIteration); // Set n for KMeans conf.setLong(CONF_N, n); // Set GPU workload conf.setInt(CONF_GPU_PERCENTAGE, GPUPercentage); LOG.info("NumBspTask: " + conf.getInt("bsp.peers.num", 0)); LOG.info("NumGpuBspTask: " + conf.getInt("bsp.peers.gpu.num", 0)); LOG.info("bsp.tasks.maximum: " + conf.get("bsp.tasks.maximum")); LOG.info("GPUPercentage: " + conf.get(CONF_GPU_PERCENTAGE)); LOG.info("BlockSize: " + conf.get(CONF_BLOCKSIZE)); LOG.info("GridSize: " + conf.get(CONF_GRIDSIZE)); LOG.info("isDebugging: " + conf.get(CONF_DEBUG)); LOG.info("timeMeasurement: " + conf.get(CONF_TIME)); LOG.info("useTestExampleInput: " + useTestExampleInput); LOG.info("inputPath: " + CONF_INPUT_DIR); LOG.info("centersPath: " + CONF_CENTER_DIR); LOG.info("outputPath: " + CONF_OUTPUT_DIR); LOG.info("n: " + n); LOG.info("k: " + k); LOG.info("vectorDimension: " + vectorDimension); LOG.info("maxIteration: " + maxIteration); Path centerIn = new Path(CONF_CENTER_DIR, "center_in.seq"); Path centerOut = new Path(CONF_CENTER_DIR, "center_out.seq"); conf.set(CONF_CENTER_IN_PATH, centerIn.toString()); conf.set(CONF_CENTER_OUT_PATH, centerOut.toString()); // prepare Input if (useTestExampleInput) { // prepareTestInput(conf, fs, input, centerIn); prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, null, GPUPercentage); } else { prepareInputData(conf, fs, CONF_INPUT_DIR, centerIn, numBspTask, numGpuBspTask, n, k, vectorDimension, new Random(3337L), GPUPercentage); } BSPJob job = createKMeansHybridBSPConf(conf, CONF_INPUT_DIR, CONF_OUTPUT_DIR); long startTime = System.currentTimeMillis(); if (job.waitForCompletion(true)) { LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); if (isDebugging) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); printOutput(conf, fs, ".log", new IntWritable(), new PipesVectorWritable()); } if (k < 50) { printFile(conf, fs, centerOut, new PipesVectorWritable(), NullWritable.get()); } } }
From source file:backup.integration.MiniClusterTestBase.java
License:Apache License
private Configuration setupConfig(File hdfsDir) throws Exception { Configuration conf = new Configuration(); File backup = new File(tmpHdfs, "backup"); backup.mkdirs();// w w w .j a v a 2 s. co m conf.set(DFS_BACKUP_NAMENODE_LOCAL_DIR_KEY, backup.getAbsolutePath()); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, hdfsDir.getAbsolutePath()); conf.set(DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY, BackupFsDatasetSpiFactory.class.getName()); conf.set(DFSConfigKeys.DFS_DATANODE_PLUGINS_KEY, DataNodeBackupServicePlugin.class.getName()); conf.set(DFSConfigKeys.DFS_NAMENODE_PLUGINS_KEY, NameNodeBackupServicePlugin.class.getName()); conf.setInt(BackupConstants.DFS_BACKUP_DATANODE_RPC_PORT_KEY, 0); conf.setInt(BackupConstants.DFS_BACKUP_NAMENODE_HTTP_PORT_KEY, 0); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 2);// 3 conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_MINIMUM_INTERVAL_KEY, 2);// 3 conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 6000);// 30000 conf.setLong(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 6000);// 5*60*1000 org.apache.commons.configuration.Configuration configuration = BackupUtil.convert(conf); setupBackupStore(configuration); @SuppressWarnings("unchecked") Iterator<String> keys = configuration.getKeys(); while (keys.hasNext()) { String key = keys.next(); conf.set(key, configuration.getString(key)); } return conf; }
From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java
License:Apache License
private boolean runAprioriOncPhaseOnce(FIMOptions opt, long nrLines, int i, String info, String outputDir, String cacheFile) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException { int prefixSize = opt.prefixLength; System.out/*from w w w . ja va2s. c o m*/ .println("[AprioriPhase]: Phase: " + i + " input: " + opt.inputFile + ", output: " + opt.outputDir); Job job = prepareJob(new Path(opt.inputFile), new Path(outputDir), SplitByKTextInputFormat.class, AprioriPhaseMapper.class, Text.class, Text.class, AprioriPhaseReducer.class, Text.class, IntWritable.class, TextOutputFormat.class); job.setJobName(info); job.setJarByClass(BigFIMDriver.class); job.setNumReduceTasks(1); Configuration conf = job.getConfiguration(); setConfigurationValues(conf, opt); if (nrLines != -1) { conf.setLong(NUMBER_OF_LINES_KEY, nrLines); } if (cacheFile != null) { addCacheFile(new URI(cacheFile.replace(" ", "%20")), conf); } runJob(job, info); if (prefixSize <= i && job.getCounters().findCounter(COUNTER_GROUPNAME, COUNTER_NRLARGEPREFIXGROUPS).getValue() == 0) { return false; } if (prefixSize < i) { System.out.println( "[AprioriPhase]: Prefix group length updated! Now " + (i) + " instead of " + prefixSize); } return true; }
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
private Configuration createConfiguration(int... numberOfLines) { Configuration conf = new Configuration(); if (numberOfLines.length > 0) { conf.setLong(NUMBER_OF_LINES_KEY, numberOfLines[0]); }/*w ww . ja v a2 s . c o m*/ conf.set("fs.default.name", "file:///"); conf.setBoolean("fs.file.impl.disable.cache", false); conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class); return conf; }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static void setRefSize(Configuration conf, long val) { conf.setLong(refSize, val); }
From source file:boa.runtime.BoaRunner.java
License:Apache License
/** * Create a {@link Job} describing the work to be done by this Boa job. * /*from ww w .java 2s . c om*/ * @param ins * An array of {@link Path} containing the locations of the input * files * * @param out * A {@link Path} containing the location of the output file * * @param robust * A boolean representing whether the job should ignore most * exceptions * * @return A {@link Job} describing the work to be done by this Boa job * @throws IOException */ public Job job(final Path[] ins, final Path out, final boolean robust) throws IOException { final Configuration configuration = getConf(); configuration.setBoolean("boa.runtime.robust", robust); // faster local reads configuration.setBoolean("dfs.client.read.shortcircuit", true); configuration.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true); // by default our MapFile's index every key, which takes up // a lot of memory - this lets you skip keys in the index and // control the memory requirements (as a tradeoff of slower gets) //configuration.setLong("io.map.index.skip", 128); // map output compression configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.map.output.compression.type", "BLOCK"); configuration.setClass("mapred.map.output.compression.codec", SnappyCodec.class, CompressionCodec.class); configuration.setBoolean("mapred.map.tasks.speculative.execution", false); configuration.setBoolean("mapred.reduce.tasks.speculative.execution", false); configuration.setLong("mapred.job.reuse.jvm.num.tasks", -1); final Job job = new Job(configuration); if (ins != null) for (final Path in : ins) FileInputFormat.addInputPath(job, in); FileOutputFormat.setOutputPath(job, out); job.setPartitionerClass(BoaPartitioner.class); job.setMapOutputKeyClass(EmitKey.class); job.setMapOutputValueClass(EmitValue.class); job.setOutputFormatClass(BoaOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); return job; }
From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java
License:Apache License
/** Create and setup a job */ @SuppressWarnings("deprecation") private static Job createJob(String name, Configuration conf) throws IOException { final Job job = new Job(conf, NAME + "_" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(BaileyBorweinPlouffe.class); // setup mapper job.setMapperClass(BbpMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); // setup reducer job.setReducerClass(BbpReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(1);// w w w .j av a 2 s .c o m // setup input job.setInputFormatClass(BbpInputFormat.class); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job; }
From source file:co.cask.cdap.data.stream.AbstractStreamInputFormat.java
License:Apache License
/** * Sets the TTL for the stream events./*from ww w. j av a 2 s.c o m*/ * * @param conf The configuration to modify * @param ttl TTL of the stream in milliseconds. */ public static void setTTL(Configuration conf, long ttl) { Preconditions.checkArgument(ttl >= 0, "TTL must be >= 0"); conf.setLong(STREAM_TTL, ttl); }