List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass
public void setJarByClass(Class<?> cls)
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java
License:Open Source License
public int run1(String[] args) throws IOException { if (args.length != 3) { System.err.println("Usage: java " + getClass().getName() + " <inputDir> <outDir> <ntasks>"); ToolRunner.printGenericCommandUsage(System.err); return -1; }// w w w . j a va 2 s . com Configuration conf = getConf(); final Job job2 = new Job(conf, "ConsineMain cartesian product"); job2.setJarByClass(CosineMain.class); job2.setInputFormatClass(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, args[0]); CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, args[0]); FileOutputFormat.setOutputPath(job2, new Path(args[1])); job2.setMapperClass(CartesianProductMapper.class); job2.setMapOutputKeyClass(DoubleWritable.class); job2.setMapOutputValueClass(Text.class); job2.setSortComparatorClass(DescendingKeyComparator.class); job2.setReducerClass(CartesianProductReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(DoubleWritable.class); job2.setNumReduceTasks(Integer.parseInt(args[2])); System.out.println("\nStarting Job-2 ..."); final long startTime = System.currentTimeMillis(); try { if (!job2.waitForCompletion(true)) { System.out.println("Job-2 failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.HistogramMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.HistogramMain <input> <output>"); System.exit(2);/*ww w . j a v a 2s.c om*/ } Job job = new Job(conf, "HistogramMain"); job.setJarByClass(HistogramMain.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.PARMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.PARMain <input> <output>"); System.exit(2);//from ww w . j av a 2 s . c o m } Job job = new Job(conf, "PARMain"); job.setJarByClass(PARMain.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(DoubleWritable.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.ThreelMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.ThreelMain <input> <output>"); System.exit(2);/* w w w . j av a2 s . co m*/ } Job job = new Job(conf, "ThreelMain"); job.setJarByClass(ThreelMain.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(ArrayPrimitiveWritable.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:CalculateSentiment.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Path tempDir = new Path("wordcount-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: wordcount <in> <out> <category>"); System.exit(2);/*w ww . j a v a2 s . c o m*/ } conf.set("category", otherArgs[2]); // try { // String filePath = otherArgs[0]; // BufferedReader br = new BufferedReader(new FileReader(filePath)); // String line = br.readLine(); // conf.set("category", line); // } catch (Exception e) { // e.printStackTrace(); // } // conf.set("category", WordCount.read(otherArgs[2])); DistributedCache.createSymlink(conf); String path = "CalculateSentiment.obj"; Path filePath = new Path(path); String uriWithLink = filePath.toUri().toString() + "#" + "object"; DistributedCache.addCacheFile(new URI(uriWithLink), conf); // DistributedCache.addCacheFile(new URI("/CalculateSentiment.obj"), conf); Job job = new Job(conf, "Test"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(DoubleSumReducer.class); job.setReducerClass(DoubleSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:capturer.valueMerge.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/*from ww w . j a va 2s . c om*/ } Job job = new Job(conf, "so fast"); job.setJarByClass(valueMerge.class); job.setMapperClass(TokenizerMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cassandra_mapreduce.MapReduceCassandraDB.java
License:GNU General Public License
public int run(String[] args) throws Exception { String columnName = "value"; getConf().set(CONF_COLUMN_NAME, columnName); getConf().set("mapred.job.tracker", args[0] + ":8021"); Job job = new Job(getConf(), "Phase1"); job.setJarByClass(MapReduceCassandraDB.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setInputFormatClass(ColumnFamilyInputFormat.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setRangeBatchSize(job.getConfiguration(), 800); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), args[0]); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes()))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true);//from w ww. j a v a 2 s . com //Phase 2 Job job2 = new Job(getConf(), "Phase2"); job2.setJarByClass(MapReduceCassandraDB.class); job2.setMapperClass(Mapper2.class); job2.setReducerClass(Reducer2.class); job2.setMapOutputKeyClass(Text.class); job2.setMapOutputValueClass(IntWritable.class); job2.setOutputKeyClass(ByteBuffer.class); job2.setOutputValueClass(List.class); job2.setInputFormatClass(ColumnFamilyInputFormat.class); job2.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job2.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY2); ConfigHelper.setRpcPort(job2.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job2.getConfiguration(), args[0]); ConfigHelper.setPartitioner(job2.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job2.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); SlicePredicate predicate2 = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes()))); ConfigHelper.setInputSlicePredicate(job2.getConfiguration(), predicate2); job2.waitForCompletion(true); // job.setCombinerClass(IntSumReducer.class); // job.setReducerClass(IntSumReducer.class); // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(Text.class); // // job.setInputFormatClass(ColumnFamilyInputFormat.class); // FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX)); // // ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); // ConfigHelper.setInitialAddress(job.getConfiguration(), args[0]); // ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); // ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); // SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes()))); // ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); // // job.waitForCompletion(true); return 0; }
From source file:cc.slda.AnnotateDocuments.java
License:Apache License
/** * Runs this tool./*ww w .ja v a 2 s.c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg() .withDescription("probability of topic assignment").create(PCUTOFF)); options.addOption(OptionBuilder.withArgName(INDEX).hasArg() .withDescription("path to data directory containing term and title indices").create(INDEX)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String indexPath = cmdline.getOptionValue(INDEX); String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; float cutoff = 0.9f; if (cmdline.hasOption(PCUTOFF)) { cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF)); } LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName()); LOG.info(" - indices path: " + indexPath); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); LOG.info(" - log(probCutoff): " + Math.log(cutoff)); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Job job = Job.getInstance(conf); job.setJobName(AnnotateDocuments.class.getSimpleName()); job.setJarByClass(AnnotateDocuments.class); String termIndex = indexPath + Path.SEPARATOR + TERM; String titleIndex = indexPath + Path.SEPARATOR + TITLE; Path termIndexPath = new Path(termIndex); Path titleIndexPath = new Path(titleIndex); Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath); DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration()); Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath); DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration()); job.setNumReduceTasks(reduceTasks); conf.setFloat(PCUTOFF, cutoff); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(HMapSIW.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(HMapSIW.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:cgl.hadoop.apps.runner.DataAnalysis.java
License:Open Source License
/** * Launch the MapReduce computation.//w ww. jav a2s . c om * This method first, remove any previous working directories and create a new one * Then the data (file names) is copied to this new directory and launch the * MapReduce (map-only though) computation. * @param numMapTasks - Number of map tasks. * @param numReduceTasks - Number of reduce tasks =0. * @param programDir - The directory where the Cap3 program is. * @param execName - Name of the executable. * @param dataDir - Directory where the data is located. * @param outputDir - Output directory to place the output. * @param cmdArgs - These are the command line arguments to the Cap3 program. * @throws Exception - Throws any exception occurs in this program. */ void launch(int numReduceTasks, String programDir, String execName, String workingDir, String databaseArchive, String databaseName, String dataDir, String outputDir, String cmdArgs) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, execName); // First get the file system handler, delete any previous files, add the // files and write the data to it, then pass its name as a parameter to // job Path hdMainDir = new Path(outputDir); FileSystem fs = FileSystem.get(conf); fs.delete(hdMainDir, true); Path hdOutDir = new Path(hdMainDir, "out"); // Starting the data analysis. Configuration jc = job.getConfiguration(); jc.set(WORKING_DIR, workingDir); jc.set(EXECUTABLE, execName); jc.set(PROGRAM_DIR, programDir); // this the name of the executable archive jc.set(DB_ARCHIVE, databaseArchive); jc.set(DB_NAME, databaseName); jc.set(PARAMETERS, cmdArgs); jc.set(OUTPUT_DIR, outputDir); // using distributed cache // flush it //DistributedCache.releaseCache(new URI(programDir), jc); //DistributedCache.releaseCache(new URI(databaseArchive), jc); //DistributedCache.purgeCache(jc); // reput the data into cache long startTime = System.currentTimeMillis(); //DistributedCache.addCacheArchive(new URI(databaseArchive), jc); DistributedCache.addCacheArchive(new URI(programDir), jc); System.out.println( "Add Distributed Cache in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); FileInputFormat.setInputPaths(job, dataDir); FileOutputFormat.setOutputPath(job, hdOutDir); job.setJarByClass(DataAnalysis.class); job.setMapperClass(RunnerMap.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(DataFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(numReduceTasks); startTime = System.currentTimeMillis(); int exitStatus = job.waitForCompletion(true) ? 0 : 1; System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); //clean the cache System.exit(exitStatus); }
From source file:chaohBIM.BIMGetIndex.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w w w.ja v a2 s . com } Job job = new Job(conf, "getTfidf"); job.setJarByClass(BIMGetIndex.class); job.setMapperClass(tfidfMapper.class); job.setCombinerClass(tfidfCombiner.class); job.setReducerClass(tfidfdReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(ZipFileInputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }