List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:CalculateHistogram.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w ww .java 2s . c om } Job job = new Job(conf, "MRDT - Generate Histogram"); job.setJarByClass(CalculateHistogram.class); job.setMapperClass(HistogramMap.class); job.setReducerClass(HistogramReduce.class); //job.setOutputValueClass(HistogramBucket.class); //job.setMapOutputKeyClass(LongWritable.class); //job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:MaleUsersBelow7Years.java
public static void main(String args[]) throws Exception { Configuration configuration = new Configuration(); Job job = new Job(configuration, "CountMaleUsersLessThan7"); job.setJarByClass(MaleUsersBelow7Years.class); job.setMapperClass(Map.class); job.setReducerClass(Reducer.class); job.setCombinerClass(Reducer.class); //set output and input formats;mapper-input reducer-output job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); //path for input file FileOutputFormat.setOutputPath(job, new Path(args[1])); // Path for output file System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:PrimeDivisor.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w ww . j a v a 2s .com } Job job = new Job(conf, "word count"); job.setJarByClass(PrimeDivisor.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start/*from w w w . jav a2 s. co m*/ final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); //System.out.println("tessssssaaat"); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } //print final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:Inlinks.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: inlinks <in> [<in>...] <out>"); System.exit(2);/*from w w w.j a v a2 s . c o m*/ } Job job = new Job(conf, "inlinks"); job.setJarByClass(Inlinks.class); job.setMapperClass(TokenizerMapper.class); //job.setCombinerClass(IdentityReducer.class); job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(10); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start//from w ww . j av a 2s . co m final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); //System.out.println("tessssssaaat"); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } //print final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:TopFiveAverageMoviesRatedByFemales.java
public static void main(String[] args) throws Exception { JobConf conf1 = new JobConf(); Job job1 = new Job(conf1, "TopFiveAverageMoviesRatedByFemales"); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[0]), TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapRatings.class); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job1, new Path(args[1]), TextInputFormat.class, TopFiveAverageMoviesRatedByFemales.MapGender.class); job1.setReducerClass(TopFiveAverageMoviesRatedByFemales.ReduceToMovieIdAndRatings.class); job1.setMapOutputKeyClass(Text.class); job1.setMapOutputValueClass(Text.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); job1.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job1.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job1, new Path(args[3])); boolean flag = job1.waitForCompletion(true); boolean flag1 = false; boolean flag2 = false; if (flag) {//from www . j av a2 s . co m JobConf conf2 = new JobConf(); Job job2 = new Job(conf2, "AverageCalculation"); //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[2]), TextInputFormat.class, Map2_1.class); //org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job2, new Path(args[3]), TextInputFormat.class, Map2_2.class); job2.setMapperClass(MapAverage.class); job2.setReducerClass(ReduceAverage.class); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(Text.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); job2.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job2.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job2, new Path(args[3])); FileOutputFormat.setOutputPath(job2, new Path(args[4])); flag1 = job2.waitForCompletion(true); } if (flag1) { JobConf conf3 = new JobConf(); Job job3 = new Job(conf3, "AverageCalculation"); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[4]), TextInputFormat.class, MapAverageTop5.class); org.apache.hadoop.mapreduce.lib.input.MultipleInputs.addInputPath(job3, new Path(args[2]), TextInputFormat.class, MapMovieName.class); //job3.setMapperClass(MapAverageTop5.class); job3.setReducerClass(ReduceAverageTop5.class); job3.setMapOutputKeyClass(Text.class); job3.setMapOutputValueClass(Text.class); job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(Text.class); job3.setJarByClass(TopFiveAverageMoviesRatedByFemales.class); job3.setOutputFormatClass(TextOutputFormat.class); //FileInputFormat.addInputPath(job3, new Path(args[4])); FileOutputFormat.setOutputPath(job3, new Path(args[5])); flag2 = job3.waitForCompletion(true); } }
From source file:GenIndex.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w w w. j a v a2 s . c o m } String tmpPath = "/local_scratch/wordcount/tmp"; String stopWord = "/local_scratch/wordcount/stopword"; // Job to count the words Job count_job = new Job(conf, "word count"); count_job.setJarByClass(GenIndex.class); count_job.setMapperClass(Mapper1_Count.class); count_job.setCombinerClass(Reducer1_Count.class); count_job.setReducerClass(Reducer1_Count.class); count_job.setOutputKeyClass(Text.class); count_job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(count_job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(count_job, new Path(tmpPath)); count_job.waitForCompletion(true); Job sort_job = new Job(conf, "word sort"); sort_job.setJarByClass(GenIndex.class); sort_job.setMapperClass(Mapper2_Sort.class); sort_job.setCombinerClass(Reducer2_Sort.class); sort_job.setReducerClass(Reducer2_Sort.class); sort_job.setSortComparatorClass(SortReducerByValuesKeyComparator.class); sort_job.setOutputKeyClass(IntWritable.class); sort_job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(sort_job, new Path(tmpPath)); FileOutputFormat.setOutputPath(sort_job, new Path(stopWord)); sort_job.waitForCompletion(true); // job to generate the index Job index_job = new Job(conf, "word index"); index_job.setJarByClass(GenIndex.class); index_job.setMapperClass(Mapper3_index.class); index_job.setCombinerClass(Reducer3_index.class); index_job.setReducerClass(Reducer3_index.class); index_job.setOutputKeyClass(Text.class); index_job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(index_job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(index_job, new Path(otherArgs[1])); index_job.waitForCompletion(true); System.exit(0); }
From source file:WordCount.java
License:Apache License
public int run(String[] args) throws Exception { ///start/*from w w w . ja v a 2 s.c o m*/ final long startTime = System.currentTimeMillis(); String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1]; } logger.info("output reducer type: " + outputReducerType); // use a smaller page size that doesn't divide the row count evenly to exercise the paging logic better ConfigHelper.setRangeBatchSize(getConf(), 99); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "userId"; Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); //System.out.println("test"); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setInputPartitioner(job.getConfiguration(), "RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // this will cause the predicate to be ignored in favor of scanning everything as a wide row //Son degisiklik // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY, true); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setOutputPartitioner(job.getConfiguration(), "RandomPartitioner"); job.waitForCompletion(true); } //print final double duration = (System.currentTimeMillis() - startTime) / 1000.0; // after System.out.println(); System.out.println("Job Finished in " + duration + " seconds"); System.out.println(); return 0; }
From source file:PopularURLs.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "popularurls"); job.setJarByClass(PopularURLs.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true);//from www . ja v a2 s .c o m }