List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:ExtractTopPersonalizedPageRankNodes.java
License:Apache License
/** * Runs this tool.//from w w w.j a v a 2 s. co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("top n").create(TOP)); options.addOption(OptionBuilder.withArgName("src").hasArg().withDescription("source node").create(SRC)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(TOP)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = "abc";//cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(TOP)); //LOG.info("Tool name: " + ExtractTopPersonalizedPageRankNodes.class.getSimpleName()); //LOG.info(" - input: " + inputPath); //LOG.info(" - output: " + outputPath); //LOG.info(" - top: " + n); Configuration conf = getConf(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setInt(TOP_PG, n); Job job = Job.getInstance(conf); job.setJobName(ExtractTopPersonalizedPageRankNodes.class.getName() + ":" + inputPath); job.setJarByClass(ExtractTopPersonalizedPageRankNodes.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(PairOfIntFloat.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(FloatWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setPartitionerClass(MyPartitioner.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(outputPath + "/part-r-00000"); ; //MapFile.Reader reader = new MapFile.Reader(new Path(outputPath+ "/part-r-00000"),conf); // InputStream fis=new FileInputStream(outputPath+"/part-r-00000"); BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(path))); String s; float key;//=new FloatWritable(); int value;//=new IntWritable(); while ((s = br.readLine()) != null) { String[] sources = s.split("\\s+"); key = Float.parseFloat(sources[0]); value = Integer.parseInt(sources[1]); if (key == 0.0f) { System.out.print("\n" + "Source: " + value + "\n"); } else { System.out.print(String.format("%.5f %d", key, value) + "\n"); } } //reader.close(); br.close(); //while(!SysOut.isEmpty()) //{ // System.out.print(SysOut.poll()); //} return 0; }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start//from w w w .ja v a 2s .c o m final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); //System.out.println("test"); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } //print final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:GraphCompare.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job1 = Job.getInstance(conf, "graph compare 1"); job1.setJarByClass(GraphCompare.class); job1.setMapperClass(Map1.class); job1.setReducerClass(Reduce1.class); job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(IntWritable.class); job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(IntWritable.class); job1.setNumReduceTasks(30);// w ww.j a v a 2 s .c om job1.getConfiguration().set("mapreduce.input.fileinputformat.split.maxsize", "25000"); FileInputFormat.addInputPath(job1, new Path(args[0])); FileInputFormat.addInputPath(job1, new Path(args[1])); FileOutputFormat.setOutputPath(job1, new Path(args[2])); job1.waitForCompletion(true); Job job2 = Job.getInstance(conf, "graph compare 2"); job2.setJarByClass(GraphCompare.class); job2.setMapperClass(Map2.class); job2.setReducerClass(Reduce2.class); job2.setMapOutputKeyClass(IntWritable.class); job2.setMapOutputValueClass(IntWritable.class); job2.setOutputKeyClass(IntWritable.class); job2.setOutputValueClass(IntWritable.class); job2.setNumReduceTasks(30); FileInputFormat.addInputPath(job2, new Path(args[2])); FileOutputFormat.setOutputPath(job2, new Path(args[3])); job2.waitForCompletion(true); }
From source file:BigramRelativeFrequencyJson.java
License:Apache License
/** * Runs this tool./* w w w.java 2 s . c o m*/ */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int reduceTasks = Integer.parseInt(args[2]); LOG.info("Tool name: " + BigramRelativeFrequencyJson.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BigramRelativeFrequencyJson.class.getSimpleName()); job.setJarByClass(BigramRelativeFrequencyJson.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(MyTuple.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(MyTuple.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:TestBAM.java
License:Open Source License
public int run(String[] args) throws Exception { final Configuration conf = getConf(); conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]); DistributedCache.addFileToClassPath(new Path("hdfs:///libjars/hadoop-bam-7.0.0-jar-with-dependencies.jar"), conf);/*w ww . ja v a2 s . co m*/ final Job job = new Job(conf); job.setJarByClass(TestBAM.class); job.setMapperClass(TestBAMMapper.class); job.setReducerClass(TestBAMReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SAMRecordWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(TestBAM.MyOutputFormat.class); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0])); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1])); job.submit(); if (!job.waitForCompletion(true)) { System.err.println("sort :: Job failed."); return 1; } return 0; }
From source file:PairsPMI_M.java
License:Apache License
/** * Runs this tool./* www .j av a 2 s . c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } // First MapReduce Job String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - tmp path: " + outputPath + "/tmp"); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(PairsPMI_M.class.getSimpleName()); job.setJarByClass(PairsPMI_M.class); // Delete the tmp directory if it exists already Path tmpDir = new Path("tmp_wj"); FileSystem.get(getConf()).delete(tmpDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path("tmp_wj")); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); double time1 = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + time1 + " seconds"); numRecords = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS") .getValue(); /* * Second MapReduce Job */ LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName()); LOG.info("second stage of MapReduce"); LOG.info(" - input from tmp path: " + outputPath + "/tmp_wj"); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); // set the global variable Configuration conf = getConf(); conf.setLong("numRec", numRecords); job = Job.getInstance(getConf()); job.setJobName(PairsPMI_M.class.getSimpleName()); job.setJarByClass(PairsPMI_M.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path("tmp_wj/part*")); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); // job.setOutputKeyClass(PairOfStrings.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapperSecond.class); // job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducerSecond.class); job.setPartitionerClass(MyPartitioner.class); startTime = System.currentTimeMillis(); job.waitForCompletion(true); double time2 = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Second job finished in " + time2 + " seconds"); System.out.println("Total time: " + (time1 + time2) + " seconds"); return 0; }
From source file:ComputeCooccurrenceMatrixStripes.java
License:Apache License
/** * Runs this tool.//w w w . j a va 2s .c om */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2; LOG.info("Tool: " + ComputeCooccurrenceMatrixStripes.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - window: " + window); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(ComputeCooccurrenceMatrixStripes.class.getSimpleName()); job.setJarByClass(ComputeCooccurrenceMatrixStripes.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.getConfiguration().setInt("window", window); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setOutputValueClass(String2IntOpenHashMapWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(String2IntOpenHashMapWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:BuildPageRankRecords.java
License:Apache License
/** * Runs this tool.//from w ww . j ava 2 s. c o m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); LOG.info("Tool name: " + BuildPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); Job job = Job.getInstance(conf); job.setJobName(BuildPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:BigBWA.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); for (String argumento : args) { LOG.info("Arg: " + argumento); }//from w ww . j a v a 2 s. c o m String inputPath = ""; String outputPath = ""; boolean useReducer = false; BwaOptions options = new BwaOptions(args); //We set the timeout and stablish the bwa library to call BWA methods conf.set("mapreduce.task.timeout", "0"); conf.set("mapreduce.map.env", "LD_LIBRARY_PATH=./bwa.zip/"); //==================Algorithm election================== //One of the algorithms is going to be in use, because tge default is always specified. if (options.isMemAlgorithm()) { //Case of the mem algorithm conf.set("mem", "true"); conf.set("aln", "false"); conf.set("bwasw", "false"); } else if (options.isAlnAlgorithm()) { // Case of aln algorithm conf.set("mem", "false"); conf.set("aln", "true"); conf.set("bwasw", "false"); } else if (options.isBwaswAlgorithm()) { // Case of bwasw algorithm conf.set("mem", "false"); conf.set("aln", "false"); conf.set("bwasw", "true"); } //==================Index election================== if (options.getIndexPath() != "") { conf.set("indexRoute", options.getIndexPath()); } else { System.err.println("No index has been found. Aborting."); System.exit(1); } //==================Type of reads election================== //There is always going to be a type of reads, because default is paired if (options.isPairedReads()) { conf.set("paired", "true"); conf.set("single", "false"); } else if (options.isSingleReads()) { conf.set("paired", "false"); conf.set("single", "true"); } //==================Use of reducer================== if (options.isUseReducer()) { useReducer = true; conf.set("useReducer", "true"); } else { conf.set("useReducer", "false"); } //==================Number of threads per map================== if (options.getNumThreads() != "0") { conf.set("bwathreads", options.getNumThreads()); } //==================RG Header=================== if (options.getReadgroupHeader() != "") { conf.set("rgheader", options.getReadgroupHeader()); } //==================Input and output paths================== inputPath = options.getInputPath(); outputPath = options.getOutputPath(); conf.set("outputGenomics", outputPath); //==================Partition number================== if (options.getPartitionNumber() != 0) { try { FileSystem fs = FileSystem.get(conf); Path inputFilePath = new Path(inputPath); ContentSummary cSummary = fs.getContentSummary(inputFilePath); long length = cSummary.getLength(); fs.close(); conf.set("mapreduce.input.fileinputformat.split.maxsize", String.valueOf((length) / options.getPartitionNumber())); conf.set("mapreduce.input.fileinputformat.split.minsize", String.valueOf((length) / options.getPartitionNumber())); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); LOG.error(e.toString()); System.exit(1); } } //Job job = new Job(conf,"BigBWA_"+outputPath); Job job = Job.getInstance(conf, "BigBWA_" + outputPath); job.setJarByClass(BigBWA.class); job.setMapperClass(BigBWAMap.class); //job.setCombinerClass(BigBWACombiner.class); if (useReducer) { job.setReducerClass(BigBWAReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(1); } else { job.setNumReduceTasks(0); } job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:BigramRelativeFrequencyTuple.java
License:Apache License
/** * Runs this tool.//from w ww.ja v a 2 s . c o m */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int reduceTasks = Integer.parseInt(args[2]); LOG.info("Tool name: " + BigramRelativeFrequencyTuple.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BigramRelativeFrequencyTuple.class.getSimpleName()); job.setJarByClass(BigramRelativeFrequencyTuple.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(BinSedesTuple.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(BinSedesTuple.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }