List of usage examples for org.apache.hadoop.conf Configuration setInt
public void setInt(String name, int value)
name
property to an int
. From source file:clustering.inverted_index.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n", getClass().getSimpleName()); System.exit(1);// w w w. j a v a 2 s. c om } Path normDir = new Path(args[1] + "/normed"); Path resultDir = new Path(args[1] + "/result"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("deci.number", Integer.valueOf(args[2])); } else { conf.setInt("deci.number", 4); } if (args.length > 3) { conf.setBoolean("pruning", true); conf.setDouble("pruning.threshold", Double.valueOf(args[3])); } else { conf.setBoolean("pruning", false); } JobControl jobControl = new JobControl("inverted-index jobs"); /* step 1, normalize the vector lenth of each document */ Job job1 = Job.getInstance(conf, "tf idf normalizer job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Mapper.class); job1.setReducerClass(NormalizerReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, normDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate inverted index */ Job job2 = Job.getInstance(conf, "inverted index job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, normDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Mapper.class); job2.setReducerClass(InvertedIndexReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, resultDir); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.mst.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s similarity_result_dir document_count_file output_dir " + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName()); System.exit(1);/* www . j av a 2s. co m*/ } Path step1_OutputDir = new Path(args[2] + "/step1"); Path resultDir = new Path(args[2] + "/result"); URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 3) { conf.setDouble("final.threshold", Double.valueOf(args[3])); } else { conf.setDouble("final.threshold", 0.2d); } if (args.length > 4) { conf.setInt("reduce.task.num", Integer.valueOf(args[4])); } else { conf.setInt("reduce.task.num", 5); } JobControl jobControl = new JobControl("mst jobs"); /* step 1, split and calculate the child msts */ Job childJob = Job.getInstance(conf, "mst child job"); childJob.setJarByClass(Driver.class); childJob.addCacheFile(docCntFile); if (args.length > 5 && args[5].equals("0")) { FileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(KeyValueTextInputFormat.class); } else { SequenceFileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class); } FileOutputFormat.setOutputPath(childJob, step1_OutputDir); childJob.setMapperClass(ChildMapper.class); childJob.setMapOutputKeyClass(DoubleWritable.class); childJob.setMapOutputValueClass(Text.class); childJob.setPartitionerClass(ChildPartitioner.class); childJob.setReducerClass(ChildReducer.class); childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1)); childJob.setOutputKeyClass(DoubleWritable.class); childJob.setOutputValueClass(Text.class); ControlledJob controlledChildJob = new ControlledJob(conf); controlledChildJob.setJob(childJob); jobControl.addJob(controlledChildJob); /* step 2, merge step 1's output and calculate final mst */ Job finalJob = Job.getInstance(conf, "mst final job"); finalJob.setJarByClass(FinalReducer.class); finalJob.addCacheFile(docCntFile); FileInputFormat.addInputPath(finalJob, step1_OutputDir); finalJob.setInputFormatClass(KeyValueTextInputFormat.class); finalJob.setMapperClass(FinalMapper.class); finalJob.setMapOutputKeyClass(DoubleWritable.class); finalJob.setMapOutputValueClass(Text.class); finalJob.setReducerClass(FinalReducer.class); finalJob.setOutputKeyClass(IntWritable.class); finalJob.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(finalJob, resultDir); ControlledJob finalControlledJob = new ControlledJob(conf); finalControlledJob.setJob(finalJob); finalControlledJob.addDependingJob(controlledChildJob); jobControl.addJob(finalControlledJob); // run jobs MapReduceUtils.runJobs(jobControl); return finalJob.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.simhash.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s init_result_dir output_dir [simhash_threshold]\n", getClass().getSimpleName()); System.exit(1);/* w w w.j a v a 2 s .co m*/ } Path step1_outputDir = new Path(args[1] + "/step1"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("simhash.threshold", Integer.valueOf(args[2])); } else { conf.setInt("simhash.threshold", 3); } JobControl jobControl = new JobControl("simhash jobs"); Job job1 = Job.getInstance(conf, "simhash step1 job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Step1Mapper.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(Text.class); job1.setReducerClass(Step1Reducer.class); job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, step1_outputDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); Job job2 = Job.getInstance(conf, "simhash step2 job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, step1_outputDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Step2Mapper.class); job2.setMapOutputKeyClass(IntWritable.class); job2.setMapOutputValueClass(Text.class); job2.setReducerClass(Step2Reducer.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, new Path(args[1] + "/result")); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); long starttime = System.currentTimeMillis(); clustering.Utils.MapReduceUtils.runJobs(jobControl); boolean complete = job2.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("simhash job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.similarity.ISimDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s simpre_dir output_dir " + "[compression_or_not] [reduce_task_number]\n", getClass().getSimpleName()); System.exit(1);//from w w w . j a v a 2 s.co m } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); Job job = Job.getInstance(conf, "isim job"); job.setJarByClass(ISimDriver.class); if (args.length > 2 && args[2].equals("0")) { FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(KeyValueTextInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { job.setInputFormatClass(SequenceFileAsTextInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(args[0])); conf.setBoolean("mapreduce.map.output.compress", true); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.GzipCodec"); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); } if (args.length > 3) { conf.setInt("reduce.num", Integer.valueOf(args[3])); } else { conf.setInt("reduce.num", 5); } job.setMapperClass(ISimMapper.class); job.setMapOutputKeyClass(IntIntTupleWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setCombinerClass(ISimCombiner.class); job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(conf.getInt("reduce.num", 1)); job.setReducerClass(ISimReducer.class); job.setOutputKeyClass(IntIntTupleWritable.class); job.setOutputValueClass(DoubleWritable.class); long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("inverted similarity job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.similarity.PreDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf(/*from w w w . j av a2 s . c o m*/ "usage: %s inverted_index_result_dir output_dir" + " [compress_or_not] [reducer_number] [deci_number]\n", this.getClass().getSimpleName()); System.exit(1); } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); conf.set("mapreduce.reduce.speculative", "false"); // TODO: 17-4-24 calculate split number from reducer number conf.setInt("split.num", 8); if (args.length > 3) { conf.setInt("reducer.num", Integer.valueOf(args[3])); } else { conf.setInt("reducer.num", 29); } if (args.length > 4) { conf.setInt("deci.number", Integer.valueOf(args[4])); } else { conf.setInt("deci.number", 3); } Job job = Job.getInstance(conf, "pre job"); job.setJarByClass(PreDriver.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(PreMapper.class); job.setMapOutputKeyClass(IntIntTupleWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(PrePartitioner.class); job.setNumReduceTasks(conf.getInt("reducer.num", 29)); job.setReducerClass(PreReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // set default compression if (args.length > 2 && args[2].equals("0")) { FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); } long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("inverted similarity pre job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:cn.easyhbase.client.hbase.HBaseAsyncOperationFactory.java
License:Apache License
public static HBaseAsyncOperation create(Configuration configuration) throws IOException { boolean enableAsyncMethod = configuration.getBoolean(ENABLE_ASYNC_METHOD, DEFAULT_ENABLE_ASYNC_METHOD); LOGGER.info("hbase.client.async.enable: " + enableAsyncMethod); if (!enableAsyncMethod) { return DisabledHBaseAsyncOperation.INSTANCE; }/*ww w. j ava 2 s .c o m*/ int queueSize = configuration.getInt(ASYNC_IN_QUEUE_SIZE, DEFAULT_ASYNC_IN_QUEUE_SIZE); if (configuration.get(ASYNC_PERIODIC_FLUSH_TIME, null) == null) { configuration.setInt(ASYNC_PERIODIC_FLUSH_TIME, DEFAULT_ASYNC_PERIODIC_FLUSH_TIME); } if (configuration.get(ASYNC_RETRY_COUNT, null) == null) { configuration.setInt(ASYNC_RETRY_COUNT, DEFAULT_ASYNC_RETRY_COUNT); } return new HBaseAsyncTemplate(configuration, queueSize); }
From source file:cn.easyhbase.client.hbase.HBaseAsyncOperationFactory.java
License:Apache License
public static HBaseAsyncOperation create(Connection connection, Configuration configuration) throws IOException { boolean enableAsyncMethod = configuration.getBoolean(ENABLE_ASYNC_METHOD, DEFAULT_ENABLE_ASYNC_METHOD); if (!enableAsyncMethod) { return DisabledHBaseAsyncOperation.INSTANCE; }/*from ww w . j ava2 s .c o m*/ int queueSize = configuration.getInt(ASYNC_IN_QUEUE_SIZE, DEFAULT_ASYNC_IN_QUEUE_SIZE); if (configuration.get(ASYNC_PERIODIC_FLUSH_TIME, null) == null) { configuration.setInt(ASYNC_PERIODIC_FLUSH_TIME, DEFAULT_ASYNC_PERIODIC_FLUSH_TIME); } if (configuration.get(ASYNC_RETRY_COUNT, null) == null) { configuration.setInt(ASYNC_RETRY_COUNT, DEFAULT_ASYNC_RETRY_COUNT); } return new HBaseAsyncTemplate(connection, configuration, queueSize); }
From source file:co.cask.cdap.hbase.wd.RowKeyDistributorTestBase.java
License:Apache License
@BeforeClass public static void beforeClass() throws Exception { if (!runBefore) { return;/*from www .ja va 2s .co m*/ } testingUtility = new HBaseTestingUtility(); Configuration hConf = testingUtility.getConfiguration(); hConf.set("yarn.is.minicluster", "true"); // Tune down the connection thread pool size hConf.setInt("hbase.hconnection.threads.core", 5); hConf.setInt("hbase.hconnection.threads.max", 10); // Tunn down handler threads in regionserver hConf.setInt("hbase.regionserver.handler.count", 10); // Set to random port hConf.setInt("hbase.master.port", Networks.getRandomPort()); hConf.setInt("hbase.master.info.port", Networks.getRandomPort()); hConf.setInt("hbase.regionserver.port", Networks.getRandomPort()); hConf.setInt("hbase.regionserver.info.port", Networks.getRandomPort()); testingUtility.startMiniCluster(); hTable = testingUtility.createTable(TABLE, CF); }
From source file:co.cask.cdap.hbase.wd.RowKeyDistributorTestBase.java
License:Apache License
private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException, InterruptedException, ClassNotFoundException { int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // Reading data Configuration conf = new Configuration(testingUtility.getConfiguration()); conf.set("fs.defaultFS", "file:///"); conf.set("fs.default.name", "file:///"); conf.setInt("mapreduce.local.map.tasks.maximum", 16); conf.setInt("mapreduce.local.reduce.tasks.maximum", 16); Job job = Job.getInstance(conf, "testMapReduceInternal()-Job"); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); // Substituting standard TableInputFormat which was set in TableMapReduceUtil.initTableMapperJob(...) job.setInputFormatClass(WdTableInputFormat.class); keyDistributor.addInfo(job.getConfiguration()); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0);// w w w . j av a2s. c o m boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded); long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue(); Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords); // Need to kill the job after completion, after it could leave MRAppMaster running not terminated. // Not sure what causing this, but maybe problem in MiniYarnCluster job.killJob(); }
From source file:co.cask.cdap.internal.app.runtime.batch.inmemory.LocalClientProtocolProvider.java
License:Apache License
@Override public ClientProtocol create(Configuration conf) throws IOException { String framework = conf.get(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME); LOG.info("Using framework: " + framework); if (!MRConfig.LOCAL_FRAMEWORK_NAME.equals(framework)) { return null; }/*from w ww .j a v a 2 s . c om*/ // We have to use something unique like "clocal" to make sure Hadoop's LocalClientProtocolProvider will fail to // provide the ClientProtocol String tracker = conf.get(JTConfig.JT_IPC_ADDRESS, "clocal"); LOG.info("Using tracker: " + tracker); if ("clocal".equals(tracker)) { conf.setInt("mapreduce.job.maps", 1); return new LocalJobRunnerWithFix(conf); } else { throw new IOException("Invalid \"" + JTConfig.JT_IPC_ADDRESS + "\" configuration value for LocalJobRunner : \"" + tracker + "\""); } }