List of usage examples for org.apache.hadoop.mapreduce Job setOutputValueClass
public void setOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.goldsaxfoundation.bigdata.Module5.SimpleMapReduce.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "wordcount"); job.setJarByClass(SimpleMapReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true);/* ww w . java 2 s.c o m*/ }
From source file:com.google.cloud.bigtable.mapreduce.Export.java
License:Apache License
/** * Sets up the actual job./*w w w. j a v a 2 s .c o m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws java.io.IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { conf.setIfUnset("hbase.client.connection.impl", BigtableConfiguration.getConnectionClass().getName()); conf.setIfUnset(BigtableOptionsFactory.BIGTABLE_RPC_TIMEOUT_MS_KEY, "60000"); conf.setBoolean(TableInputFormat.SHUFFLE_MAPS, true); String tableName = args[0]; Path outputDir = new Path(args[1]); Job job = Job.getInstance(conf, NAME + "_" + tableName); job.setJobName(NAME + "_" + tableName); job.setJarByClass(Export.class); // Set optional scan parameters Scan s = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, s, IdentityTableMapper.class, ImmutableBytesWritable.class, Result.class, job, false); // No reducers. Just write straight to output files. job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Result.class); FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs. return job; }
From source file:com.gsinnovations.howdah.AbstractJob.java
License:Apache License
protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from www. j av a 2 s .c o m*/ job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setJobName(getCustomJobName(job, mapper, reducer)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.gsinnovations.howdah.Driver.java
License:Apache License
public static void job(Path input, Path output, int numReduceTasks) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(TikaMapper.class); //job.setCombinerClass(KMeansCombiner.class); //job.setReducerClass(KMeansReducer.class); job.setNumReduceTasks(numReduceTasks); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(Driver.class); HadoopUtil.overwriteOutput(output);/*from www.j a va 2 s . co m*/ job.waitForCompletion(true); }
From source file:com.gsvic.csmr.CSMRBase.java
License:Apache License
public static void generatePairs(String in, String out) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); path = out;/*from w w w. ja va 2 s .c om*/ Job job; Path input, output; input = new Path(in); output = new Path(path + "/CSMRPairs"); job = new Job(conf); job.setJobName("CSMR Pairs Job"); job.setJarByClass(CSMRBase.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(CSMRMapper.class); job.setReducerClass(CSMRReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DocumentWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorArrayWritable.class); job.waitForCompletion(true); }
From source file:com.gsvic.csmr.CSMRBase.java
License:Apache License
public static void StartCSMR() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job; job = new Job(conf); job.setJobName("CSMR Cosine Similarity Job"); job.setJarByClass(CSMRBase.class); FileInputFormat.addInputPath(job, new Path(path + "/CSMRPairs/part-r-00000")); FileOutputFormat.setOutputPath(job, new Path(path + "/Results")); job.setMapperClass(Mapper.class); job.setReducerClass(CosineSimilarityReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(VectorArrayWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); System.exit(job.waitForCompletion(true) ? 1 : 0); }
From source file:com.hadoop.examples.secondSort.SecondarySort.java
License:Apache License
public static void main(String[] args) throws Exception { // ?hadoop?/*w ww. j a va 2s . c om*/ Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); System.exit(2); } // ? Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); // Mapper job.setMapperClass(MapClass.class); // ???CombinerCombiner<Text, IntWritable>Reduce<IntPair, IntWritable>? //job.setCombinerClass(Reduce.class); // Reducer job.setReducerClass(Reduce.class); // * // *group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); //setSortComparatorClass()hadoopkey?(?2.Hadoopkey?) //IntPair?compareTo() //job.setSortComparatorClass(cls); // * job.setGroupingComparatorClass(FirstGroupingComparator.class); // map Key // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); // mapValue job.setMapOutputValueClass(IntWritable.class); // rduceKeyTextOutputFormatClassTextOutputFormat // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); // rduceValue job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); // ??job System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hadoop.mapreduce.examples.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapreduce.app-submission.cross-platform", "true"); String ioArgs[] = { "input", "output2" }; String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);// w w w . j a v a 2 s. com } //job Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); //map, combine, reduce job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.hadoop.mapreduce.TestLzoLazyLoading.java
License:Open Source License
private void runWordCount(Configuration cf, boolean compressIn, boolean compressOut) throws IOException, InterruptedException, ClassNotFoundException { Configuration thisConf = new Configuration(cf); if (compressIn) { thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-map", true); }/*from w w w . j a va2 s .co m*/ if (compressOut) { thisConf.setBoolean("mapred.compression.lzo.test.codec-checked-after-reduce", true); } Path pathIn = new Path(TEST_ROOT_DIR + "/in"); Path pathOut = new Path(TEST_ROOT_DIR + "/out"); localFs.delete(pathIn, true); localFs.delete(pathOut, true); writeFile(makeFileName("in/part1", compressIn), "this is a test\nof word count test\ntest\n"); writeFile(makeFileName("in/part2", compressIn), "more test"); Job job = new Job(thisConf, "word count"); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); if (compressOut) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, LzoCodec.class); } FileInputFormat.addInputPath(job, pathIn); FileOutputFormat.setOutputPath(job, pathOut); job.submit(); assertEquals("IsLzoChecked (client)?", compressIn, LzoCodec.isNativeLzoChecked()); assertTrue(job.waitForCompletion(false)); String result = readFile(makeFileName("out/part-r-00000", compressOut)); System.out.println(result); assertEquals("a\t1\ncount\t1\nis\t1\nmore\t1\nof\t1\ntest\t4\nthis\t1\nword\t1\n", result); }
From source file:com.hadoop.secondarysort.SecondarySortDESC.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // if (otherArgs.length != 2) { // System.err.println("Usage: secondarysrot <in> <out>"); // System.exit(2); // }//from w ww .j a v a 2 s. co m // JobConf jobConf = new JobConf(); Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySortDESC.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // conf.setClass("mapred.output.key.comparator.class", // KeyComparator.class, RawComparator.class); // job.setSortComparatorClass(SecondGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); FileSystem fileSystem = FileSystem.get(conf); if (fileSystem.exists(new Path(outPath))) { fileSystem.delete(new Path(outPath)); } System.exit(job.waitForCompletion(true) ? 0 : 1); }