List of usage examples for org.apache.hadoop.mapreduce Job setMapOutputValueClass
public void setMapOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:de.hpi.fgis.hdrs.mapreduce.examples.PredicateAnalysis.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(PredicateAnalysis.class); job.setJobName("Predicate Analysis"); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); //job.setOutputKeyClass(Text.class); //job.setOutputValueClass(Text.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); //job.setReducerClass(Reduce.class); job.setNumReduceTasks(0);/*from www . jav a 2 s .c o m*/ job.setInputFormatClass(TripleInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); TripleInputFormat.setStoreAddress(job, args[0]); TripleInputFormat.setIndex(job, "POS"); TripleInputFormat.setPattern(job, Triple.newPattern(null, args[1], null)); TripleInputFormat.setAggregationLevel2(job); SequenceFileOutputFormat.setOutputPath(job, new Path(args[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.examples.TripleCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(TripleCount.class); job.setJobName("TripleCount"); job.setMapOutputKeyClass(ByteWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1);//from w w w . j a va2s.c om job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); int argc = 0; TripleInputFormat.setStoreAddress(job, args[argc++]); TripleInputFormat.setIndex(job, args[argc++]); if ("-p".equals(args[argc])) { argc++; String s = args[argc++]; String p = args[argc++]; String o = args[argc++]; if ("*".equals(s)) s = null; if ("*".equals(p)) p = null; if ("*".equals(o)) o = null; TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o)); } else { TextOutputFormat.setOutputPath(job, new Path(args[argc])); } boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.examples.TripleSize.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(TripleSize.class); job.setJobName("TripleSize"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setCombinerClass(Combine.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); int argc = 0; TripleInputFormat.setStoreAddress(job, args[argc++]); TripleInputFormat.setIndex(job, args[argc++]); if ("-p".equals(args[argc])) { argc++;//from w w w.ja v a 2 s.com String s = args[argc++]; String p = args[argc++]; String o = args[argc++]; if ("*".equals(s)) s = null; if ("*".equals(p)) p = null; if ("*".equals(o)) o = null; TripleInputFormat.setPattern(job, Triple.newPattern(s, p, o)); } else { TextOutputFormat.setOutputPath(job, new Path(args[argc])); } boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.hpi.fgis.hdrs.mapreduce.IndexLoader.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (3 != args.length) { System.out.println(// w ww .jav a2 s.c om "Usage: IndexLoader <StoreAddres> <SourceIndex> " + "<TargetIndex1>[,<TargetIndex2>...]"); return 0; } Job job = new Job(getConf()); job.setJarByClass(IndexLoader.class); job.setJobName("HDRS Index Loader"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TripleOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(TripleOutputFormat.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TripleInputFormat.class); job.setOutputFormatClass(TripleOutputFormat.class); TripleInputFormat.setStoreAddress(job, args[0]); TripleInputFormat.setIndex(job, args[1]); TripleOutputFormat.setStoreAddress(job, args[0]); TripleOutputFormat.setOutputIndexes(job, args[2]); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:de.l3s.common.features.hadoop.TimeSeriesJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options opts = new Options(); Option jnameOpt = OptionBuilder.withArgName("job-name").hasArg(true).withDescription("Timeseries analysis") .create(JOB_NAME);//from w ww .ja v a2 s.c om Option inputOpt = OptionBuilder.withArgName("input-path").hasArg(true) .withDescription("Timeseries file path (required)").create(INPUT_OPT); Option outputOpt = OptionBuilder.withArgName("output-path").hasArg(true) .withDescription("output file path (required)").create(OUTPUT_OPT); Option reduceOpt = OptionBuilder.withArgName("reduce-no").hasArg(true) .withDescription("number of reducer nodes").create(REDUCE_NO); Option rmOpt = OptionBuilder.withArgName("remove-out").hasArg(false) .withDescription("remove the output then create again before writing files onto it") .create(REMOVE_OUTPUT); Option cOpt = OptionBuilder.withArgName("compress-option").hasArg(true) .withDescription("compression option").create(COMPRESS_OPT); opts.addOption(jnameOpt); opts.addOption(inputOpt); opts.addOption(reduceOpt); opts.addOption(outputOpt); opts.addOption(rmOpt); opts.addOption(cOpt); CommandLine cl; CommandLineParser parser = new GnuParser(); try { cl = parser.parse(opts, args); } catch (ParseException e) { System.err.println("Error parsing command line: " + e.getMessage()); return -1; } if (!cl.hasOption(INPUT_OPT) || !cl.hasOption(OUTPUT_OPT)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(getClass().getName(), opts); ToolRunner.printGenericCommandUsage(System.out); return -1; } int reduceNo = DEFAULT_REDUCER_NO; if (cl.hasOption(REDUCE_NO)) { try { reduceNo = Integer.parseInt(cl.getOptionValue(REDUCE_NO)); } catch (NumberFormatException e) { System.err.println("Error parsing reducer number: " + e.getMessage()); } } String jobName = "Distributed timeseries [R] correlation"; if (cl.hasOption(JOB_NAME)) { jobName = cl.getOptionValue(JOB_NAME); jobName = jobName.replace('-', ' '); } if (cl.hasOption(REMOVE_OUTPUT)) { } String input = cl.getOptionValue(INPUT_OPT); String output = cl.getOptionValue(OUTPUT_OPT); Configuration conf = getConf(); //DistributedCache.createSymlink(conf); //DistributedCache.addCacheFile(new URI("hdfs://master.hadoop:8020/user/nguyen/lib/"), conf); Job job = Job.getInstance(conf, jobName); job.setJarByClass(TimeSeriesJob.class); job.setMapperClass(TimeSeriesMapper.class); job.setReducerClass(TimeSeriesReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Timeseries.class); job.setNumReduceTasks(reduceNo); job.setInputFormatClass(WholeFileInputFormat.class); WholeFileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, new Path(output)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:de.tuberlin.dima.aim3.HadoopJob.java
License:Open Source License
protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } else {// www. j a v a 2s . co m job.setJarByClass(mapper); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setJobName(getCustomJobName(job, mapper)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:de.tudarmstadt.lt.wiki.statistics.ResourceInlinkCount.java
public static boolean runJob1(String inDir, String outDir) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.child.java.opts", "-Xmx1200M"); conf.set("mapred.job.map.memory.mb", "1280"); conf.set("mapreduce.job.queuename", "smalljob"); Job job = Job.getInstance(conf); job.setJarByClass(ResourceInlinkCount.class); FileInputFormat.addInputPath(job, new Path(inDir)); FileOutputFormat.setOutputPath(job, new Path(outDir)); job.setMapperClass(Map.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); return job.waitForCompletion(true); }
From source file:de.tudarmstadt.lt.wiki.statistics.ResourceInlinkCount.java
public static boolean runJob2(String inDir, String outDir) throws Exception { Configuration conf = new Configuration(); conf.set("mapreduce.job.queuename", "smalljob"); Job job = Job.getInstance(conf); job.setJarByClass(ResourceInlinkCount.class); FileInputFormat.addInputPath(job, new Path(inDir)); FileOutputFormat.setOutputPath(job, new Path(outDir)); job.setMapperClass(Map2.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); return job.waitForCompletion(true); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java
License:Apache License
/** * pass1: generate collocations, ngrams/*from w w w .j a va2 s.c o m*/ */ private static long generateCollocations(Path input, Path output, Configuration baseConf, boolean emitUnigrams, int maxNGramSize, int reduceTasks, int minSupport, Window mode, int winsize) throws IOException, ClassNotFoundException, InterruptedException { Configuration con = new Configuration(baseConf); con.setBoolean(EMIT_UNIGRAMS, emitUnigrams); con.setInt(CollocMapper.MAX_SHINGLE_SIZE, maxNGramSize); con.setInt(CollocReducer.MIN_SUPPORT, minSupport); con.set(WINDOW_TYPE, mode.toString()); con.setInt(WINDOW_SIZE, winsize); if (mode.toString().equalsIgnoreCase("DOCUMENT")) { con.setInt("mapred.job.map.memory.mb", 3000); con.set("mapred.child.java.opts", "-Xmx2900M"); con.set("mapred.reduce.child.java.opts", "-Xmx8000M"); con.setInt("mapred.job.reduce.memory.mb", 8120); } else { con.setInt("mapred.job.map.memory.mb", 2000); con.set("mapred.child.java.opts", "-Xmx1900M"); con.set("mapred.reduce.child.java.opts", "-Xmx2900M"); con.setInt("mapred.job.reduce.memory.mb", 3000); } con.setBoolean("mapred.compress.map.output", true); con.setStrings("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); con.setBoolean("mapred.compress.output", true); con.setStrings("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec"); con.setInt("mapred.task.timeout", 6000000); con.setInt("io.sort.factor", 50); con.setInt("mapreduce.map.tasks", 256); con.setInt("dfs.replication", 1); Job job = new Job(con); job.setJobName(CollocDriver.class.getSimpleName() + ".generateCollocations:" + input); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(GramKey.class); job.setMapOutputValueClass(Gram.class); job.setPartitionerClass(GramKeyPartitioner.class); job.setGroupingComparatorClass(GramKeyGroupComparator.class); job.setOutputKeyClass(Gram.class); job.setOutputValueClass(Gram.class); job.setCombinerClass(CollocCombiner.class); FileInputFormat.setInputPaths(job, input); Path outputPath = new Path(output, SUBGRAM_OUTPUT_DIRECTORY); FileOutputFormat.setOutputPath(job, outputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CollocMapper.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setReducerClass(CollocReducer.class); job.setNumReduceTasks(512); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } return job.getCounters().findCounter(CollocMapper.Count.NGRAM_TOTAL).getValue(); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.CollocDriver.java
License:Apache License
/** * pass2: perform the LLR calculation/*w ww .jav a 2s . co m*/ */ private static void computeNGramsPruneByLLR(Path output, Configuration baseConf, long nGramTotal, boolean emitUnigrams, float minValue, int reduceTasks) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(baseConf); conf.setLong(AssocReducer.NGRAM_TOTAL, nGramTotal); conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams); conf.setFloat(AssocReducer.MIN_VALUE, minValue); conf.setInt("mapred.job.map.memory.mb", 1280); conf.setInt("mapred.job.reduce.memory.mb", 2560); conf.set("mapred.reduce.child.java.opts", "-Xmx2G"); conf.setInt("mapred.task.timeout", 6000000); conf.set(AssocReducer.ASSOC_METRIC, "llr"); Job job = new Job(conf); job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " + output + " pruning: " + minValue); job.setJarByClass(CollocDriver.class); job.setMapOutputKeyClass(Gram.class); job.setMapOutputValueClass(Gram.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.setInputPaths(job, new Path(output, SUBGRAM_OUTPUT_DIRECTORY)); Path outPath = new Path(output, NGRAM_OUTPUT_DIRECTORY + "_llr"); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class); job.setReducerClass(AssocReducer.class); job.setNumReduceTasks(reduceTasks); // Defines additional single text based output 'text' for the job MultipleOutputs.addNamedOutput(job, "contingency", TextOutputFormat.class, Text.class, Text.class); // Defines additional multi sequencefile based output 'sequence' for the // job MultipleOutputs.addNamedOutput(job, "llr", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "pmi", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "chi", TextOutputFormat.class, Text.class, DoubleWritable.class); MultipleOutputs.addNamedOutput(job, "dice", TextOutputFormat.class, Text.class, DoubleWritable.class); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }