List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass
public void setJarByClass(Class<?> cls)
From source file:chaohParse.huangWordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//ww w.j av a 2s.c o m } Job job = new Job(conf, "word count"); job.setJarByClass(huangWordCount.class); job.setMapperClass(WordMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(WordCombiner.class); job.setReducerClass(WordReducer.class); job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:chaohParse.searchWord.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from www . jav a2 s .c o m } Job job = new Job(conf, "word count"); job.setJarByClass(wordcount.class); job.setMapperClass(TokenizerMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Integer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(FileInputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:chaohParse.uniword.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from w ww . j a v a 2s. c o m } Job job = new Job(conf, "word count"); job.setJarByClass(uniword.class); job.setMapperClass(TokenizerMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //job.setCombinerClass(myUniwordCombiner.class); job.setReducerClass(myUniwordReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(ZipFileInputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:chapter7.src.InputDriver.java
License:Apache License
public static void runJob(Path input, Path output, String vectorClassName, Configuration config) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = config;//w w w .java 2s .c o m conf.set("vector.implementation.class.name", vectorClassName); Job job = new Job(conf, "Input Driver running over input: " + input); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(InputMapper.class); job.setNumReduceTasks(0); job.setJarByClass(InputDriver.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); job.waitForCompletion(true); }
From source file:cienciaCelularMR.Main.java
@Override public int run(String[] args) throws Exception { for (int i = 0; i < args.length; i++) { System.out.println("Hadoop - arg[" + i + "] es: " + args[i]); }// www . ja v a 2s .co m //Configuracin de memoria de YARN Configuration conf = new Configuration(); conf.set("mapreduce.map.memory.mb", "1400"); conf.set("mapreduce.reduce.memory.mb", "2800"); conf.set("mapreduce.map.java.opts", "-Xmx1120m"); conf.set("mapreduce.reduce.java.opts", "-Xmx2240m"); conf.set("yarn.app.mapreduce.am.resource.mb", "2800"); conf.set("yarn.app.mapreduce.am.command-opts", "-Xmx2240m"); conf.set("yarn.nodemanager.resource.memory-mb", "5040"); conf.set("yarn.scheduler.minimum-allocation-mb", "1400"); conf.set("yarn.scheduler.maximum-allocation-mb", "5040"); conf.set("mapreduce.task.timeout", "18000000");//5 horas //Creacin del Job Job job = Job.getInstance(conf); job.setInputFormatClass(WholeFileInputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[5])); FileOutputFormat.setOutputPath(job, new Path(args[6])); //Salidas alternativas de Mapper para brindar informacin MultipleOutputs.addNamedOutput(job, "controloutput", TextOutputFormat.class, KeyMcell.class, Text.class); MultipleOutputs.addNamedOutput(job, "errormcell", TextOutputFormat.class, KeyMcell.class, Text.class); //Archivos copiados a cache de los nodos job.addCacheFile(new Path("wasb:///mcell.exe").toUri()); job.addCacheFile(new Path("wasb:///fernet.exe").toUri()); job.addCacheFile(new Path("wasb:///fernet.cfg").toUri()); job.addCacheFile(new Path("wasb:///libconfig_d.dll").toUri()); job.addCacheFile(new Path("wasb:///libtiff3.dll").toUri()); job.addCacheFile(new Path("wasb:///jpeg62.dll").toUri()); job.addCacheFile(new Path("wasb:///zlib1.dll").toUri()); job.addCacheFile(new Path("wasb:///msvcr100d.dll").toUri()); job.setJarByClass(Main.class); Configuration mapAConf = new Configuration(false); ChainMapper.addMapper(job, McellMapper.class, KeyMcell.class, BytesWritable.class, KeyMcell.class, Text.class, mapAConf); Configuration mapBConf = new Configuration(false); ChainMapper.addMapper(job, FernetMapper.class, KeyMcell.class, Text.class, KeyMcell.class, FernetOutput.class, mapBConf); job.setReducerClass(ResultReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.submit(); return 0; }
From source file:cityhub.CityHub.java
@Override public int run(String[] strings) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "ReduceJoin"); job.setJarByClass(CityHub.class); MultipleInputs.addInputPath(job, new Path(strings[0]), TextInputFormat.class, JoinMapper1.class); MultipleInputs.addInputPath(job, new Path(strings[1]), TextInputFormat.class, JoinMapper2.class); job.getConfiguration().set("join.type", "innerjoin"); job.setReducerClass(JoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(strings[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean complete = job.waitForCompletion(true); Configuration conf1 = new Configuration(); Job job2 = Job.getInstance(conf1, "chaining"); if (complete) { job2.setJarByClass(CityHub.class); MultipleInputs.addInputPath(job2, new Path(strings[2]), TextInputFormat.class, JoinMapper3.class); MultipleInputs.addInputPath(job2, new Path(strings[3]), TextInputFormat.class, JoinMapper4.class); job2.getConfiguration().set("join.type", "innerjoin"); job2.setReducerClass(JoinReducer1.class); job2.setOutputFormatClass(TextOutputFormat.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); TextOutputFormat.setOutputPath(job2, new Path(strings[4])); }/*from w ww . j a v a2 s . com*/ boolean success = job2.waitForCompletion(true); return success ? 0 : 4; }
From source file:cityhubpartitioningcountry.CityHubPartitioning.java
/** * @param args the command line arguments *//*from ww w. java 2 s .c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "partitioner"); job.setJarByClass(CityHubPartitioning.class); job.setMapperClass(PartitionMonthMapper.class); job.setReducerClass(countryReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(PartitionCountryPartitioner.class); job.setNumReduceTasks(27); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cityhubtopten.CityHubTopTen.java
/** * @param args the command line arguments *//*from w ww .j a va2 s . c o m*/ public static void main(String[] args) { try { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Top10"); job.setJarByClass(CityHubTopTen.class); job.setMapperClass(Top10Mapper.class); job.setReducerClass(Top10Reducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException | InterruptedException | ClassNotFoundException ex) { System.out.println("Erorr Message" + ex.getMessage()); } }
From source file:cloud9.ComputeCooccurrenceMatrixStripesOOM.java
License:Apache License
/** * Runs this tool./*from w w w . j a va 2 s.co m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int window = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: ComputeCooccurrenceMatrixStripes"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - window: " + window); sLogger.info(" - number of reducers: " + reduceTasks); Job job = new Job(getConf(), "CooccurrenceMatrixStripes"); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.getConfiguration().setInt("window", window); job.setJarByClass(ComputeCooccurrenceMatrixStripesOOM.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(String2IntOpenHashMapWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.getConfiguration().setInt("io.sort.mb", 400); job.getConfiguration().set("mapred.child.java.opts", "-Xmx1000m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps"); //job.getConfiguration().set("mapred.child.java.opts", "-Xmx1000m"); job.getConfiguration().setInt("child.monitor.jstat.seconds", 2); job.getConfiguration().set("fs.default.name", "hdfs://master:9000"); job.getConfiguration().set("mapred.job.tracker", "master:9001"); //conf.set("user.name", "xulijie"); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", 1); //job.getConfiguration().setFloat("io.sort.record.percent", 0.2f); //job.getConfiguration().setFloat("io.sort.spill.percent", 0.95f); // conf.setFloat("mapred.job.shuffle.input.buffer.percent", 0.9f); // conf.setFloat("mapred.job.shuffle.merge.percent", 0.9f); //conf.setFloat("mapred.job.reduce.input.buffer.percent", 0.4f); //conf.set("mapred.job.tracker", "local"); //conf.set("fs.default.name", "file:///"); job.getConfiguration().setLong("mapred.min.split.size", 512 * 1024 * 1024L); job.getConfiguration().setLong("mapred.max.split.size", 512 * 1024 * 1024L); job.getConfiguration().setInt("mapred.map.max.attempts", 0); job.getConfiguration().setInt("mapred.reduce.max.attempts", 0); //job.getConfiguration().set("heapdump.reduce.input.groups", "3,897,853[5]"); //job.getConfiguration().set("heapdump.reduce.input.records", "8407734;8407737;8407740;8407743;8407746;8407749;8407750"); //job.getConfiguration().set("omit.reduce.input.records", "8407733;8407750"); //job.getConfiguration().set("heapdump.reduce.input.records", "8407751"); //job.getConfiguration().set("heapdump.reduce.output.records", "3897853"); job.getConfiguration().set("heapdump.task.attempt.ids", "attempt_201404281552_0001_r_000000_0"); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:clustering.init.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s input_dir output_dir [column_splitter] [dict_path]\n", this.getClass().getSimpleName()); System.exit(1);// w ww. j av a 2 s . c om } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.set("column.splitter", args[2]); } else { conf.set("column.splitter", ","); } if (args.length > 3) { conf.set("dict.path", args[3]); } else { conf.set("dict.path", "./dicts"); } Job job = Job.getInstance(conf, "Initialization job"); job.setJarByClass(Driver.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setMapperClass(WordSepMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("Initialization job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }