List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java
License:Apache License
protected static Job buildMapRedCounterJobWithoutCombiner(String name, @SuppressWarnings("rawtypes") Class<? extends OutputFormat> outputFormat, String outPath, Configuration conf) throws IOException { Job job = new Job(conf, name); Path output = new Path(outPath); HadoopUtils.deleteIfExists(FileSystem.get(conf), output); job.setJarByClass(MapRedCounter.class); job.setReducerClass(MapRedCountReducer.class); job.setMapOutputKeyClass(CounterKey.class); job.setMapOutputValueClass(CounterValue.class); job.setOutputFormatClass(outputFormat); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); // Secondary sorting configuration. job.setGroupingComparatorClass(CounterKey.IdGroupComparator.class); job.setPartitionerClass(CounterKey.IdGroupPartitioner.class); FileOutputFormat.setOutputPath(job, output); String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try {/*from w w w. j a v a 2 s. c om*/ DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf); job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); // Multioutput configuration PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTFILE.toString(), new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterKey.class, LongWritable.class); PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTDISTINCTFILE.toString(), new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterDistinctKey.class, LongPairWritable.class); } catch (URISyntaxException e) { e.printStackTrace(); throw new IOException(e); } return job; }
From source file:com.datasalt.utils.mapred.joiner.MultiJoiner.java
License:Apache License
public Job getJob() throws IOException { if (job == null) { job = new Job(conf, name); HadoopUtils.deleteIfExists(FileSystem.get(conf), outputPath); job.setJarByClass((jarByClass != null) ? jarByClass : reducer); job.setReducerClass(reducer);/*from w ww. j a v a2 s. co m*/ job.setMapOutputValueClass(MultiJoinDatum.class); job.setMapOutputKeyClass(MultiJoinPair.class); job.setOutputFormatClass(outputFormat); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); job.setGroupingComparatorClass(MultiJoinPair.GroupComparator.class); job.setPartitionerClass(MultiJoinPair.GroupPartitioner.class); FileOutputFormat.setOutputPath(job, outputPath); setMultiJoinPairClass(MultiJoinPair.class); } return job; }
From source file:com.dipwater.accountAnalyze.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "192.168.1.51:9001"); conf.set("fs.default.name", "hdfs://192.168.1.51:9000"); String[] ars = new String[] { "input", "newout" }; String[] otherArgs = new GenericOptionsParser(conf, ars).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w w w. j a v a2s . c o m } Job job = new Job(conf, "word count"); File jarFile = EJob.createTempJar("bin"); EJob.addClasspath("/home/hadoop/hadoop-1.2.1/conf"); ClassLoader classLoader = EJob.getClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); ((JobConf) job.getConfiguration()).setJar(jarFile.toString()); //job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.elephantscale.hbase.book.chapter1.SimpleMR.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: SimpleMR <in> <out>"); return;//w ww . ja v a 2 s . c om } Job job = new Job(conf, "SimpleMR"); job.setJarByClass(SimpleMR.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); }
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "Calculating perplexity for " + modelPath; log.info("About to run: " + jobName); Job job = new Job(conf, jobName); job.setJarByClass(CachingCVB0PerplexityMapper.class); job.setMapperClass(CachingCVB0PerplexityMapper.class); job.setCombinerClass(DualDoubleSumReducer.class); job.setReducerClass(DualDoubleSumReducer.class); job.setNumReduceTasks(1);// w ww . j a va 2s . c om job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, corpusPath); Path outputPath = perplexityPath(modelPath.getParent(), iteration); FileOutputFormat.setOutputPath(job, outputPath); setModelPaths(job, modelPath); HadoopUtil.delete(conf, outputPath); if (!job.waitForCompletion(true)) { throw new InterruptedException("Failed to calculate perplexity for: " + modelPath); } return readPerplexity(conf, modelPath.getParent(), iteration); }
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
private static Job writeTopicModel(Configuration conf, Path modelInput, Path output) throws IOException, InterruptedException, ClassNotFoundException { String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output); log.info("About to run: " + jobName); Job job = new Job(conf, jobName); job.setJarByClass(CVB0Driver.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CVB0TopicTermVectorNormalizerMapper.class); job.setNumReduceTasks(0);//w w w. j a v a 2s .c o m job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, modelInput); FileOutputFormat.setOutputPath(job, output); job.submit(); return job; }
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output) throws IOException, ClassNotFoundException, InterruptedException { String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output); log.info("About to run: " + jobName); Job job = new Job(conf, jobName); job.setMapperClass(CVB0DocInferenceMapper.class); job.setNumReduceTasks(0);//from w w w. ja va2 s . co m job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); FileSystem fs = FileSystem.get(corpus.toUri(), conf); if (modelInput != null && fs.exists(modelInput)) { FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter()); URI[] modelUris = new URI[statuses.length]; for (int i = 0; i < statuses.length; i++) { modelUris[i] = statuses[i].getPath().toUri(); } DistributedCache.setCacheFiles(modelUris, conf); } setModelPaths(job, modelInput);//bug:mahout-1147 FileInputFormat.addInputPath(job, corpus); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(CVB0Driver.class); job.submit(); return job; }
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
public static void runIteration(Configuration conf, Path corpusInput, Path modelInput, Path modelOutput, int iterationNumber, int maxIterations, int numReduceTasks) throws IOException, ClassNotFoundException, InterruptedException { String jobName = String.format("Iteration %d of %d, input path: %s", iterationNumber, maxIterations, modelInput);//ww w.j a v a2 s .c o m log.info("About to run: " + jobName); Job job = new Job(conf, jobName); job.setJarByClass(CVB0Driver.class); job.setMapperClass(CachingCVB0Mapper.class); job.setCombinerClass(VectorSumReducer.class); job.setReducerClass(VectorSumReducer.class); job.setNumReduceTasks(numReduceTasks); job.setOutputKeyClass(Text.class);//0.7IntWritable job.setOutputValueClass(VectorWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, corpusInput); FileOutputFormat.setOutputPath(job, modelOutput); setModelPaths(job, modelInput); HadoopUtil.delete(conf, modelOutput); if (!job.waitForCompletion(true)) { throw new InterruptedException( String.format("Failed to complete iteration %d stage 1", iterationNumber)); } }
From source file:com.facebook.hiveio.mapreduce.output.WritingTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); handleCommandLine(args, conf);//w w w. ja va2 s . com HadoopUtils.setMapAttempts(conf, 1); adjustConfigurationForHive(conf); HiveTools.setupJob(conf); Job job = new Job(conf, "hive-io-writing"); if (job.getJar() == null) { job.setJarByClass(getClass()); } job.setMapperClass(SampleMapper.class); job.setInputFormatClass(SampleInputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(HiveWritableRecord.class); job.setOutputFormatClass(SampleOutputFormat.class); job.setNumReduceTasks(0); job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.frdeso.app.Sleepy.java
License:Apache License
/** * Performs integer summation of all the values for each key. *//* www . j a va2s . com*/ @Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: wordmean <in> <out> <number second>"); return 0; } Configuration conf = getConf(); conf.set("mapSleepTime", args[2]); @SuppressWarnings("deprecation") Job job = new Job(conf, "joba"); job.setJarByClass(Sleepy.class); job.setMapperClass(SleepyMapper.class); job.setCombinerClass(Reducer.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); return (result ? 0 : 1); }