List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.neu.cs6240.Xml2csvComments.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Setting up the xml tag configurator for splitter conf.set("xmlinput.start", "<row "); conf.set("xmlinput.end", " />"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Xml2csvPosts <in> <out>"); System.exit(2);/*from w ww . j a va 2s . co m*/ } Job job = new Job(conf, "Converts Posts.xml to .csv"); job.setJarByClass(Xml2csvPosts.class); job.setInputFormatClass(XmlInputFormat.class); job.setMapperClass(CommentsMapper.class); job.setReducerClass(CommentsReducer.class); job.setPartitionerClass(PostsPartitioner.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set as per your file size job.setNumReduceTasks(10); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.neu.cs6240.Xml2csvPosts.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Setting up the xml tag configurator for splitter conf.set("xmlinput.start", "<row "); conf.set("xmlinput.end", " />"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: Xml2csvPosts <in> <out>"); System.exit(2);/*from ww w.ja v a 2 s .co m*/ } Job job = new Job(conf, "Converts Posts.xml to .csv"); job.setJarByClass(Xml2csvPosts.class); job.setInputFormatClass(XmlInputFormat.class); job.setMapperClass(PostsMapper.class); job.setReducerClass(PostsReducer.class); job.setPartitionerClass(PostsPartitioner.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set as per your file size job.setNumReduceTasks(15); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.neusoft.hbase.test.hadoop.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w ww . j a v a 2 s .c o m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.nistfortunetellers.cleaning.NISTClean.java
License:Apache License
/** Runs a Job that is Text in and Out, and TextInput in and out, too! */ @SuppressWarnings({ "deprecation", "rawtypes" }) static void runTextJob(String jobName, Configuration jobConfig, String inputPath, String outputPath, Class<? extends Mapper> mapper, Class<? extends Reducer> reducer) { try {/*from ww w.j a v a 2 s .c om*/ Job genericJob = new Job(jobConfig, jobName); // DEBUG //genericJob.setNumReduceTasks(0); // END DEBUG genericJob.setJarByClass(NISTClean.class); genericJob.setOutputKeyClass(Text.class); genericJob.setOutputValueClass(Text.class); genericJob.setMapperClass(mapper); genericJob.setReducerClass(reducer); genericJob.setInputFormatClass(TextInputFormat.class); genericJob.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(genericJob, new Path(inputPath)); FileOutputFormat.setOutputPath(genericJob, new Path(outputPath)); genericJob.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:com.nnapz.hbaseexplorer.mr.TableStats.java
License:Apache License
/** * M/R Job setup. No reduce.//from w w w . jav a 2 s.c o m * * @param conf a suitable hadoop+hbase configuration * @param tableName the table we want to get stats from * @return the Job object, to be started * @throws java.io.IOException any hadoop IO problem */ public static Job createSubmittableJob(Configuration conf, String tableName) throws IOException { Job job = new Job(conf, NAME + "_" + tableName); if (job.getJar() == null) { job.setJarByClass(TableStats.class); // otherwise set in conf already } Scan scan = new Scan(); scan.setMaxVersions(10000); // todo fixme TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCountMapper.class, Text.class, Result.class, job); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); return job; }
From source file:com.paperbook.test.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length != 2) { // System.err.println("Usage: wordcount <in> <out>"); // System.exit(2); // }/*from www . j a va 2s .c o m*/ Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // The path is local path on the local file system not the hdfs FileInputFormat.addInputPath(job, new Path("resources/words.txt")); FileOutputFormat.setOutputPath(job, new Path("output")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.phantom.hadoop.examples.BaileyBorweinPlouffe.java
License:Apache License
/** Create and setup a job */ private static Job createJob(String name, Configuration conf) throws IOException { final Job job = new Job(conf, NAME + "_" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(BaileyBorweinPlouffe.class); // setup mapper job.setMapperClass(BbpMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); // setup reducer job.setReducerClass(BbpReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); job.setNumReduceTasks(1);/*from w w w . jav a2s. c om*/ // setup input job.setInputFormatClass(BbpInputFormat.class); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job; }
From source file:com.phantom.hadoop.examples.pi.DistSum.java
License:Apache License
/** Create a job */ private Job createJob(String name, Summation sigma) throws IOException { final Job job = new Job(getConf(), parameters.remoteDir + "/" + name); final Configuration jobconf = job.getConfiguration(); job.setJarByClass(DistSum.class); jobconf.setInt(N_PARTS, parameters.nParts); SummationWritable.write(sigma, DistSum.class, jobconf); // disable task timeout jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0); // do not use speculative execution jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false); jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false); return job;/* w w w. j ava 2 s. c o m*/ }
From source file:com.phantom.hadoop.examples.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2);/*ww w .j av a 2s. com*/ } Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.phantom.hadoop.examples.WordMean.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordmean <in> <out>"); return 0; }/*from ww w . ja v a 2 s . co m*/ Configuration conf = getConf(); @SuppressWarnings("deprecation") Job job = new Job(conf, "word mean"); job.setJarByClass(WordMean.class); job.setMapperClass(WordMeanMapper.class); job.setCombinerClass(WordMeanReducer.class); job.setReducerClass(WordMeanReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); mean = readAndCalcMean(outputpath, conf); return (result ? 0 : 1); }