List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:DAAL.SVD.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); /* Put shared libraries into the distributed cache */ DistributedCache.createSymlink(conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libJavaAPI.so#libJavaAPI.so"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libtbb.so.2#libtbb.so.2"), conf); DistributedCache.addCacheFile(new URI("/Hadoop/Libraries/libiomp5.so#libiomp5.so"), conf); Job job = new Job(conf, "SVD Job (step1 and step2)"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path("/Hadoop/SVD/step2")); job.setMapperClass(SVDStep1Mapper.class); job.setReducerClass(SVDStep2Reducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WriteableData.class); job.setJarByClass(SVD.class); job.waitForCompletion(true);//www . j a v a 2 s.c o m Job job1 = new Job(conf, "SVD Job (step3)"); FileInputFormat.setInputPaths(job1, new Path("/Hadoop/SVD/step2")); FileOutputFormat.setOutputPath(job1, new Path(args[1])); job1.setMapperClass(SVDStep3Mapper.class); job1.setNumReduceTasks(0); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setJarByClass(SVD.class); return job1.waitForCompletion(true) ? 0 : 1; }
From source file:DataCubeRefresh.Grep.java
License:Apache License
/** * Run function.//from ww w. j a v a2s .c o m * @param args arguments * @return error code * @throws Exception if an exception occurs */ public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inUrl> <outUrl> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Job grepJob = new Job(getConf()); Job sortJob = new Job(getConf()); String tempStreamTag = UUID.randomUUID().toString(); try { grepJob.setJobName("grep-search"); TextHStreamingInputFormat.addInputStream(grepJob, 1000, 600, -1, "", false, args[0]); HStreamingJobConf.setIsStreamingJob(grepJob, true); grepJob.setMapperClass(RegexMapper.class); grepJob.getConfiguration().set("mapred.mapper.regex", args[2]); if (args.length == 4) grepJob.getConfiguration().set("mapred.mapper.regex.group", args[3]); grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); grepJob.setInputFormatClass(TextHStreamingInputFormat.class); grepJob.setOutputFormatClass(TextHStreamingOutputFormat.class); HStreamingOutputFormat.setOutputStreamTag(grepJob, tempStreamTag); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); grepJob.setJobName("grep-search"); grepJob.setJarByClass(this.getClass()); grepJob.submit(); sortJob.setJobName("grep-sort"); sortJob.setInputFormatClass(TextHStreamingInputFormat.class); HStreamingJobConf.setIsStreamingJob(sortJob, true); // add previous stream partition/reducer 0 as input. HStreamingInputFormat.addInputStreamTag(sortJob, tempStreamTag, 0); sortJob.setMapperClass(InverseTextMapper.class); sortJob.setNumReduceTasks(1); // single output stream sortJob.setOutputFormatClass(TextHStreamingOutputFormat.class); TextHStreamingOutputFormat.setOutputPath(sortJob, args[1]); sortJob.setSortComparatorClass( // sort by decreasing fre LongWritable.DecreasingComparator.class); sortJob.setJarByClass(this.getClass()); sortJob.submit(); return sortJob.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); try { grepJob.killJob(); } catch (Exception e1) { // ignore } try { sortJob.killJob(); } catch (Exception e2) { // ignore } } return 0; }
From source file:de.bankmark.bigbench.queries.q18.MRlinearRegression.java
License:Apache License
@Override public int run(String[] args) throws Exception { int NUMBER_REDUCERS = 1; Job job = Job.getInstance(getConf()); job.setJarByClass(MRlinearRegression.class); if (args.length != 2) { usage(job);//from w w w. j a v a2s. c o m return 2; } System.out.println("input:"); job.setJobName(MRlinearRegression.class.getSimpleName() + "::" + args[0] + "->" + args[1]); Path input = new Path(args[0]); Path output = new Path(args[1]); System.out.println("Input: " + input + " out -> " + output); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(MRlinearRegression.LRmapper.class); job.setReducerClass(MRlinearRegression.LRreducer.class); job.setNumReduceTasks(NUMBER_REDUCERS); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DoubleArrayWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.bankmark.bigbench.queries.q28.ToSequenceFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(ToSequenceFile.class); if (args.length != 2) { usage(job);/*from w w w . j a v a2s . c om*/ return 2; } System.out.println("input:"); job.setJobName(ToSequenceFile.class.getSimpleName() + "::" + args[0] + "->" + args[1]); Path input = new Path(args[0]); Path output = new Path(args[1]); System.out.println("Input: " + input + " out -> " + output); FileInputFormat.addInputPath(job, input); SequenceFileOutputFormat.setOutputPath(job, output); job.setMapperClass(IdentityMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.gesundkrank.wikipedia.hadoop.util.RepackToMapFile.java
License:Open Source License
public int run(String basePath, String outputPath, boolean checkNew, boolean skipRedirect) throws Exception { Configuration configuration = getConf(); configuration.setBoolean("skipRedirect", skipRedirect); LOGGER.info("Tool name: " + getClass().getSimpleName()); Job job = Job.getInstance(configuration, getClass().getSimpleName()); job.setJarByClass(getClass());//from ww w.j a v a2 s .c o m job.setMapperClass(WikiMapper.class); job.setInputFormatClass(WikiInputFormat.class); job.setOutputFormatClass(MapFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(WikiRevisionWritable.class); WikiDumpLoader wikiDumpLoader = new WikiDumpLoader(checkNew); wikiDumpLoader.addWikiDump(job, basePath); MapFileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.examples.PredicateAnalysis.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(PredicateAnalysis.class); job.setJobName("Predicate Analysis"); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); //job.setOutputKeyClass(Text.class); //job.setOutputValueClass(Text.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); //job.setReducerClass(Reduce.class); job.setNumReduceTasks(0); job.setInputFormatClass(TripleInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); TripleInputFormat.setStoreAddress(job, args[0]); TripleInputFormat.setIndex(job, "POS"); TripleInputFormat.setPattern(job, Triple.newPattern(null, args[1], null)); TripleInputFormat.setAggregationLevel2(job); SequenceFileOutputFormat.setOutputPath(job, new Path(args[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.examples.TripleCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(TripleCount.class); job.setJobName("TripleCount"); job.setMapOutputKeyClass(ByteWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1); job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); int argc = 0; TripleInputFormat.setStoreAddress(job, args[argc++]); TripleInputFormat.setIndex(job, args[argc++]); if ("-p".equals(args[argc])) { argc++;/*from w w w .ja va 2 s.c om*/ String s = args[argc++]; String p = args[argc++]; String o = args[argc++]; if ("*".equals(s)) s = null; if ("*".equals(p)) p = null; if ("*".equals(o)) o = null; TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o)); } else { TextOutputFormat.setOutputPath(job, new Path(args[argc])); } boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.IndexLoader.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (3 != args.length) { System.out.println(/*w w w. j a v a2s.c o m*/ "Usage: IndexLoader <StoreAddres> <SourceIndex> " + "<TargetIndex1>[,<TargetIndex2>...]"); return 0; } Job job = new Job(getConf()); job.setJarByClass(IndexLoader.class); job.setJobName("HDRS Index Loader"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TripleOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(TripleOutputFormat.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(TripleOutputFormat.class); TripleInputFormat.setStoreAddress(job, args[0]); TripleInputFormat.setIndex(job, args[1]); TripleOutputFormat.setStoreAddress(job, args[0]); TripleOutputFormat.setOutputIndexes(job, args[2]); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.l3s.common.features.hadoop.TimeSeriesJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options opts = new Options(); Option jnameOpt = OptionBuilder.withArgName("job-name").hasArg(true).withDescription("Timeseries analysis") .create(JOB_NAME);/* www . jav a 2 s . c o m*/ Option inputOpt = OptionBuilder.withArgName("input-path").hasArg(true) .withDescription("Timeseries file path (required)").create(INPUT_OPT); Option outputOpt = OptionBuilder.withArgName("output-path").hasArg(true) .withDescription("output file path (required)").create(OUTPUT_OPT); Option reduceOpt = OptionBuilder.withArgName("reduce-no").hasArg(true) .withDescription("number of reducer nodes").create(REDUCE_NO); Option rmOpt = OptionBuilder.withArgName("remove-out").hasArg(false) .withDescription("remove the output then create again before writing files onto it") .create(REMOVE_OUTPUT); Option cOpt = OptionBuilder.withArgName("compress-option").hasArg(true) .withDescription("compression option").create(COMPRESS_OPT); opts.addOption(jnameOpt); opts.addOption(inputOpt); opts.addOption(reduceOpt); opts.addOption(outputOpt); opts.addOption(rmOpt); opts.addOption(cOpt); CommandLine cl; CommandLineParser parser = new GnuParser(); try { cl = parser.parse(opts, args); } catch (ParseException e) { System.err.println("Error parsing command line: " + e.getMessage()); return -1; } if (!cl.hasOption(INPUT_OPT) || !cl.hasOption(OUTPUT_OPT)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(getClass().getName(), opts); ToolRunner.printGenericCommandUsage(System.out); return -1; } int reduceNo = DEFAULT_REDUCER_NO; if (cl.hasOption(REDUCE_NO)) { try { reduceNo = Integer.parseInt(cl.getOptionValue(REDUCE_NO)); } catch (NumberFormatException e) { System.err.println("Error parsing reducer number: " + e.getMessage()); } } String jobName = "Distributed timeseries [R] correlation"; if (cl.hasOption(JOB_NAME)) { jobName = cl.getOptionValue(JOB_NAME); jobName = jobName.replace('-', ' '); } if (cl.hasOption(REMOVE_OUTPUT)) { } String input = cl.getOptionValue(INPUT_OPT); String output = cl.getOptionValue(OUTPUT_OPT); Configuration conf = getConf(); //DistributedCache.createSymlink(conf); //DistributedCache.addCacheFile(new URI("hdfs://master.hadoop:8020/user/nguyen/lib/"), conf); Job job = Job.getInstance(conf, jobName); job.setJarByClass(TimeSeriesJob.class); job.setMapperClass(TimeSeriesMapper.class); job.setReducerClass(TimeSeriesReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Timeseries.class); job.setNumReduceTasks(reduceNo); job.setInputFormatClass(WholeFileInputFormat.class); WholeFileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, new Path(output)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.l3s.content.timex.extracting.ClueWeb09Timex.java
License:Apache License
/** * Runs this tool./* w w w. j ava 2s . c o m*/ */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("input").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("output").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption(OptionBuilder.withArgName("column").hasArg() .withDescription("column to store row data into (must exist)").create(COLUMN)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); cmdline = parser.parse(options, args); if (!cmdline.hasOption(INPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } if (!cmdline.hasOption(OUTPUT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); // String column = cmdline.getOptionValue(COLUMN); LOG.info("Tool name: " + ClueWeb09Timex.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); // LOG.info(" - column: " + column); Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "node05.ib,node03.ib,node04.ib"); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.set("hbase.master", "master.ib"); // conf.set("conf.column", column); long milliSeconds = 10000 * 60 * 60; //x10 default conf.setLong("mapred.task.timeout", milliSeconds); Job job = Job.getInstance(conf, ClueWeb09Timex.class.getSimpleName() + " time-confident extraction + annotation + HBase import: " + input); //Configuration conf = new Configuration(); //Job job = Job.getInstance(conf, "web pages count"); job.setJarByClass(ClueWeb09Timex.class); job.setNumReduceTasks(0); job.setInputFormatClass(ClueWeb09InputFormat.class); job.setOutputFormatClass(TableOutputFormat.class); job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, output); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Writable.class); job.setMapperClass(TMapper.class); //job.setReducerClass(IntSumReducer.class); //job.setOutputKeyClass(Text.class); //job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(input)); //FileOutputFormat.setOutputPath(job, new Path(output)); job.waitForCompletion(true); return 0; }