List of usage examples for org.apache.hadoop.conf Configuration getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:cc.wikitools.lucene.hadoop.FileSystemDirectory.java
License:Apache License
/** * Constructor//from ww w . ja v a 2s . com * @param fs * @param directory * @param create * @param conf * @throws IOException */ public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf) throws IOException { this.fs = fs; this.directory = directory; this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096); if (create) { create(); } boolean isDir = false; try { FileStatus status = fs.getFileStatus(directory); if (status != null) { isDir = status.isDir(); } } catch (IOException e) { // file does not exist, isDir already set to false } if (!isDir) { throw new IOException(directory + " is not a directory"); } }
From source file:clustering.inverted_index.InvertedIndexReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int deci_num = conf.getInt("deci.number", 4); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("0."); for (int i = 0; i < deci_num; i++) { stringBuilder.append('0'); }//w ww . ja va2 s.co m this.decimalFormat = new DecimalFormat(stringBuilder.toString()); this.pruning = conf.getBoolean("pruning", false); this.pruningThreshold = conf.getDouble("pruning.threshold", 0.001d); }
From source file:clustering.mst.ChildMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { int docCnt = 0; if (context.getCacheFiles() != null && context.getCacheFiles().length > 0) { FileReader fileReader = new FileReader("./docCnt"); BufferedReader bufferedReader = new BufferedReader(fileReader); docCnt = Integer.valueOf(bufferedReader.readLine()); bufferedReader.close();/*from w ww .jav a 2 s. com*/ fileReader.close(); } Configuration conf = context.getConfiguration(); int reduceTaskNum = conf.getInt("reduce.task.num", 3); this.docsInSeg = docCnt / reduceTaskNum; if (docCnt % reduceTaskNum != 0) { this.docsInSeg++; } }
From source file:clustering.mst.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s similarity_result_dir document_count_file output_dir " + "[cluster_threshold] [reduce_number] [compression]\n", getClass().getSimpleName()); System.exit(1);// w w w . j ava 2 s .c om } Path step1_OutputDir = new Path(args[2] + "/step1"); Path resultDir = new Path(args[2] + "/result"); URI docCntFile = new URI(args[1] + "/part-r-00000#docCnt"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 3) { conf.setDouble("final.threshold", Double.valueOf(args[3])); } else { conf.setDouble("final.threshold", 0.2d); } if (args.length > 4) { conf.setInt("reduce.task.num", Integer.valueOf(args[4])); } else { conf.setInt("reduce.task.num", 5); } JobControl jobControl = new JobControl("mst jobs"); /* step 1, split and calculate the child msts */ Job childJob = Job.getInstance(conf, "mst child job"); childJob.setJarByClass(Driver.class); childJob.addCacheFile(docCntFile); if (args.length > 5 && args[5].equals("0")) { FileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(KeyValueTextInputFormat.class); } else { SequenceFileInputFormat.addInputPath(childJob, new Path(args[0])); childJob.setInputFormatClass(SequenceFileAsTextInputFormat.class); } FileOutputFormat.setOutputPath(childJob, step1_OutputDir); childJob.setMapperClass(ChildMapper.class); childJob.setMapOutputKeyClass(DoubleWritable.class); childJob.setMapOutputValueClass(Text.class); childJob.setPartitionerClass(ChildPartitioner.class); childJob.setReducerClass(ChildReducer.class); childJob.setNumReduceTasks(conf.getInt("reduce.task.num", 1)); childJob.setOutputKeyClass(DoubleWritable.class); childJob.setOutputValueClass(Text.class); ControlledJob controlledChildJob = new ControlledJob(conf); controlledChildJob.setJob(childJob); jobControl.addJob(controlledChildJob); /* step 2, merge step 1's output and calculate final mst */ Job finalJob = Job.getInstance(conf, "mst final job"); finalJob.setJarByClass(FinalReducer.class); finalJob.addCacheFile(docCntFile); FileInputFormat.addInputPath(finalJob, step1_OutputDir); finalJob.setInputFormatClass(KeyValueTextInputFormat.class); finalJob.setMapperClass(FinalMapper.class); finalJob.setMapOutputKeyClass(DoubleWritable.class); finalJob.setMapOutputValueClass(Text.class); finalJob.setReducerClass(FinalReducer.class); finalJob.setOutputKeyClass(IntWritable.class); finalJob.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(finalJob, resultDir); ControlledJob finalControlledJob = new ControlledJob(conf); finalControlledJob.setJob(finalJob); finalControlledJob.addDependingJob(controlledChildJob); jobControl.addJob(finalControlledJob); // run jobs MapReduceUtils.runJobs(jobControl); return finalJob.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.similarity.ISimDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s simpre_dir output_dir " + "[compression_or_not] [reduce_task_number]\n", getClass().getSimpleName()); System.exit(1);//from w ww.j ava 2s .co m } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); Job job = Job.getInstance(conf, "isim job"); job.setJarByClass(ISimDriver.class); if (args.length > 2 && args[2].equals("0")) { FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(KeyValueTextInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { job.setInputFormatClass(SequenceFileAsTextInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(args[0])); conf.setBoolean("mapreduce.map.output.compress", true); conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.GzipCodec"); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); } if (args.length > 3) { conf.setInt("reduce.num", Integer.valueOf(args[3])); } else { conf.setInt("reduce.num", 5); } job.setMapperClass(ISimMapper.class); job.setMapOutputKeyClass(IntIntTupleWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setCombinerClass(ISimCombiner.class); job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(conf.getInt("reduce.num", 1)); job.setReducerClass(ISimReducer.class); job.setOutputKeyClass(IntIntTupleWritable.class); job.setOutputValueClass(DoubleWritable.class); long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("inverted similarity job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.similarity.PreDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf(/*w ww . j av a2s . co m*/ "usage: %s inverted_index_result_dir output_dir" + " [compress_or_not] [reducer_number] [deci_number]\n", this.getClass().getSimpleName()); System.exit(1); } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); conf.set("mapreduce.reduce.speculative", "false"); // TODO: 17-4-24 calculate split number from reducer number conf.setInt("split.num", 8); if (args.length > 3) { conf.setInt("reducer.num", Integer.valueOf(args[3])); } else { conf.setInt("reducer.num", 29); } if (args.length > 4) { conf.setInt("deci.number", Integer.valueOf(args[4])); } else { conf.setInt("deci.number", 3); } Job job = Job.getInstance(conf, "pre job"); job.setJarByClass(PreDriver.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(PreMapper.class); job.setMapOutputKeyClass(IntIntTupleWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(PrePartitioner.class); job.setNumReduceTasks(conf.getInt("reducer.num", 29)); job.setReducerClass(PreReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // set default compression if (args.length > 2 && args[2].equals("0")) { FileOutputFormat.setOutputPath(job, new Path(args[1])); } else { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[1])); } long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("inverted similarity pre job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.similarity.PreMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.splitNum = conf.getInt("split.num", 6); this.lengthThreshold = conf.getInt("length.threshold", 1000); }
From source file:clustering.similarity.PreReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int deciNum = conf.getInt("deci.number", 3); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("#0."); for (int i = 0; i < deciNum; i++) { stringBuilder.append('0'); }/*from w ww. j av a 2 s . com*/ this.formatBase = stringBuilder.toString(); this.decimalFormat = new DecimalFormat(this.formatBase); }
From source file:cn.easyhbase.client.hbase.HBaseAsyncOperationFactory.java
License:Apache License
public static HBaseAsyncOperation create(Configuration configuration) throws IOException { boolean enableAsyncMethod = configuration.getBoolean(ENABLE_ASYNC_METHOD, DEFAULT_ENABLE_ASYNC_METHOD); LOGGER.info("hbase.client.async.enable: " + enableAsyncMethod); if (!enableAsyncMethod) { return DisabledHBaseAsyncOperation.INSTANCE; }// ww w . jav a 2 s. co m int queueSize = configuration.getInt(ASYNC_IN_QUEUE_SIZE, DEFAULT_ASYNC_IN_QUEUE_SIZE); if (configuration.get(ASYNC_PERIODIC_FLUSH_TIME, null) == null) { configuration.setInt(ASYNC_PERIODIC_FLUSH_TIME, DEFAULT_ASYNC_PERIODIC_FLUSH_TIME); } if (configuration.get(ASYNC_RETRY_COUNT, null) == null) { configuration.setInt(ASYNC_RETRY_COUNT, DEFAULT_ASYNC_RETRY_COUNT); } return new HBaseAsyncTemplate(configuration, queueSize); }
From source file:cn.easyhbase.client.hbase.HBaseAsyncOperationFactory.java
License:Apache License
public static HBaseAsyncOperation create(Connection connection, Configuration configuration) throws IOException { boolean enableAsyncMethod = configuration.getBoolean(ENABLE_ASYNC_METHOD, DEFAULT_ENABLE_ASYNC_METHOD); if (!enableAsyncMethod) { return DisabledHBaseAsyncOperation.INSTANCE; }//w ww . jav a 2s . c o m int queueSize = configuration.getInt(ASYNC_IN_QUEUE_SIZE, DEFAULT_ASYNC_IN_QUEUE_SIZE); if (configuration.get(ASYNC_PERIODIC_FLUSH_TIME, null) == null) { configuration.setInt(ASYNC_PERIODIC_FLUSH_TIME, DEFAULT_ASYNC_PERIODIC_FLUSH_TIME); } if (configuration.get(ASYNC_RETRY_COUNT, null) == null) { configuration.setInt(ASYNC_RETRY_COUNT, DEFAULT_ASYNC_RETRY_COUNT); } return new HBaseAsyncTemplate(connection, configuration, queueSize); }