List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:Assignment3_P5_Top25Movies.Top25MovieRatingDriver.java
/** * @param args the command line arguments *///from ww w. j a va 2 s .c om public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job1 = Job.getInstance(conf, "Movie Rating Count"); job1.setJarByClass(Top25MovieRatingDriver.class); // the usual - get basic mapred ready job1.setMapperClass(Top25MovieRating_Mapper.class); job1.setCombinerClass(Top25MovieRating_Reducer.class); job1.setReducerClass(Top25MovieRating_Reducer.class); // this will basically out -> movieId, average rating job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(FloatWritable.class); FileInputFormat.addInputPath(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1])); boolean complete = job1.waitForCompletion(true); // here's where we sort Configuration conf2 = new Configuration(); Job job2 = Job.getInstance(conf2, "Movie Rating Count"); if (complete) { job2.setJarByClass(Top25MovieRatingDriver.class); // namesake fellow, take it and go types - mostly useless job2.setMapperClass(Top25MovieRating_Mapper1.class); job2.setMapOutputKeyClass(FloatWritable.class); job2.setMapOutputValueClass(IntWritable.class); // this is where we would ideally sort descendingly job2.setSortComparatorClass(Top25MovieRating_SortComparator.class); // o/p top 25, man job2.setNumReduceTasks(1); job2.setReducerClass(Top25MovieRating_Reducer1.class); job2.setOutputKeyClass(FloatWritable.class); job2.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job2, new Path(args[1])); FileOutputFormat.setOutputPath(job2, new Path(args[2])); System.exit(job2.waitForCompletion(true) ? 0 : 1); } }
From source file:Assignment4_P2_StockAverageWithCombiner.StockAverageDriver.java
/** * @param args the command line arguments *//*from w w w. ja v a 2s . c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Average Stock Price"); job.setJarByClass(StockAverageDriver.class); job.setMapperClass(StockAverage_Mapper.class); job.setCombinerClass(StockAverage_Combiner.class); job.setReducerClass(StockAverage_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StockAverage_CompositeValueWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment4_P3_InMemoryStdDeviation.MovieRatingStdDevDriver.java
/** * @param args the command line arguments *///w ww . ja va2s .co m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Movie Rating Standard Deviation"); job.setJarByClass(MovieRatingStdDevDriver.class); job.setMapperClass(MovieRatingStdDev_Mapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setReducerClass(MovieRatingStdDev_Reducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment4_P4_MemoryConscious.MovieRatingMemConsciousDriver.java
/** * @param args the command line arguments *//*from www . j a va 2 s. c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Movie Rating Mem Conscious Standard Deviation"); job.setJarByClass(MovieRatingMemConsciousDriver.class); job.setMapperClass(MovieRatingMemConscious_Mapper.class); job.setCombinerClass(MovingRatingMemConscious_Combiner.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SortedMapWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setReducerClass(MovieRatingMemConscious_Reducer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P2_DistinctIPAddress.DistinctIPAddressDriver.java
/** * @param args the command line arguments */// w ww . java2s . co m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address Count"); job.setJarByClass(DistinctIPAddressDriver.class); job.setMapperClass(DistinctIPAddress_Mapper.class); job.setCombinerClass(DistinctIPAddress_Reducer.class); job.setReducerClass(DistinctIPAddress_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P3_PartitionPattern.Partition_IPAddress_By_MonthDriver.java
/** * @param args the command line arguments *//*from w w w . java 2 s . c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address By Date"); job.setJarByClass(Partition_IPAddress_By_MonthDriver.class); job.setMapperClass(Partition_IPAddress_By_Month_Mapper.class); //job.setCombinerClass(Partition_IPAddress_By_Month_Reducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); // partitioner class inclusion job.setPartitionerClass(Partition_IPAddress_By_Month_Partitioner.class); // set num of reduce tasks based on partition we need (here we need 12 cos total no.of months in a year) job.setNumReduceTasks(12); job.setReducerClass(Partition_IPAddress_By_Month_Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P4_BinningPattern.Binning_IPAddress_By_DayDriver.java
/** * @param args the command line arguments *///from w ww. j a v a 2s . c o m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address By Hour"); job.setJarByClass(Binning_IPAddress_By_DayDriver.class); job.setMapperClass(Binning_IPAddress_By_Day_Mapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); MultipleOutputs.addNamedOutput(job, "textualBins", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, "massaBins", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); // set num of reduce tasks to 0 job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P6_StructureToHierarchyPattern.Structure_HierarchyDriver.java
/** * @param args the command line arguments *//*from w w w . j av a2s . co m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Structure to Hierarchy"); job.setJarByClass(Structure_HierarchyDriver.class); // pass file 1 to this mapper in Text format MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, Structure_Hierarchy_Movie_Mapper.class); // pass file 2 to this mapper in Text format MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, Structure_Hierarchy_Tag_Mapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setReducerClass(Structure_Hierarchy_Reducer.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); System.exit(job.waitForCompletion(true) ? 0 : 2); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { HalvadeConf.setIsPass2(pass1Conf, false); HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, true, halvadeOpts.useBamInput);/*from www. ja va 2 s. c o m*/ Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline"); pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf); try { if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in)); for (FileStatus file : files) { if (!file.isDirectory()) { FileInputFormat.addInputPath(pass1Job, file.getPath()); } } } else { FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in)); } } catch (IOException | IllegalArgumentException e) { Logger.EXCEPTION(e); } FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf); boolean skipPass1 = false; if (outFs.exists(new Path(tmpOutDir))) { // check if genome already exists skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS")); if (skipPass1) Logger.DEBUG("pass1 genome already created, skipping pass 1"); else { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } } if (!skipPass1) { FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir)); pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); pass1Job.setInputFormatClass(HalvadeTextInputFormat.class); pass1Job.setMapOutputKeyClass(GenomeSJ.class); pass1Job.setMapOutputValueClass(Text.class); pass1Job.setSortComparatorClass(GenomeSJSortComparator.class); pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class); pass1Job.setNumReduceTasks(1); pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class); pass1Job.setOutputKeyClass(LongWritable.class); pass1Job.setOutputValueClass(Text.class); return runTimedJob(pass1Job, "Halvade pass 1 Job"); } else return 0; }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { String pipeline = ""; if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { HalvadeConf.setIsPass2(halvadeConf, true); HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = RNA_PASS2;/*from w ww .j a v a 2 s. com*/ } else if (jobType == HalvadeResourceManager.DNA) { HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = DNA; } HalvadeConf.setOutDir(halvadeConf, tmpOutDir); FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf); if (outFs.exists(new Path(tmpOutDir))) { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } if (halvadeOpts.useBamInput) setHeaderFile(halvadeOpts.in, halvadeConf); Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline); halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob); FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir)); if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class); } else if (jobType == HalvadeResourceManager.DNA) { halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class); } halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class); halvadeJob.setMapOutputValueClass(SAMRecordWritable.class); halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class); halvadeJob.setOutputKeyClass(Text.class); if (halvadeOpts.mergeBam) { halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class); halvadeJob.setOutputValueClass(SAMRecordWritable.class); } else { halvadeJob.setPartitionerClass(ChrRgPartitioner.class); halvadeJob.setSortComparatorClass(ChrRgSortComparator.class); halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class); halvadeJob.setOutputValueClass(VariantContextWritable.class); } if (halvadeOpts.justAlign) halvadeJob.setNumReduceTasks(0); else if (halvadeOpts.mergeBam) { halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class); halvadeJob.setNumReduceTasks(1); } else halvadeJob.setNumReduceTasks(halvadeOpts.reduces); if (halvadeOpts.useBamInput) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class); halvadeJob.setInputFormatClass(BAMInputFormat.class); } return runTimedJob(halvadeJob, "Halvade Job"); }