List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass
public void setJarByClass(Class<?> cls)
From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java
/** * Forma de chamada//from www. j ava 2 s .co m * <> {numero de rainhas} {diretorio raiz} -F * * @param strings * @return * @throws Exception */ public int run(String[] args) throws Exception { // Configuration processed by ToolRunner Configuration conf = getConf(); // Create a JobConf using the processed conf Job job = new Job(conf, "nqueens-counter"); job.setJarByClass(NQueenCounter.class); int queensNumber = Integer.parseInt(args[0]); String workingFolder = args.length >= 2 ? args[1] : null; boolean isFinal = args.length >= 3 && "-F".equals(args[2]) ? true : false; Path sourcePath = this.setWorkingFolder(queensNumber, workingFolder, isFinal, job); job.setOutputKeyClass(org.apache.hadoop.io.Text.class); job.setOutputValueClass(org.apache.hadoop.io.Text.class); if (isFinal) { job.setMapperClass(br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterResultMapper.class); job.setReducerClass(br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterResultReducer.class); } else { job.setMapperClass(br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterMapper.class); job.setReducerClass(br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterReducer.class); } // Submit the job, then poll for progress until the job is complete boolean result = job.waitForCompletion(true); if (sourcePath != null) { FileSystem fs = FileSystem.get(conf); fs.delete(sourcePath, true); } return result ? 0 : 1; }
From source file:br.ufpr.inf.hpath.HPath.java
License:Apache License
/** * Execute the XPath query as a Hadoop job * @param xpath_query XPath query submitted by the user via cli. * @param inputFile XML file which has all data. * @param outputFile Query's result is stored in this file. * @throws Exception//from ww w.j av a2s . c o m */ public static void main(String[] args) throws Exception { if (args.length < 1) { System.out.println("USAGE: hpath [xpath_query] [input_file] [<output_dir>]"); System.exit(-1); } System.out.println("***************"); System.out.println(" Query -> " + args[2]); System.out.println(" Input -> " + args[0]); System.out.println(" Output -> " + args[1]); System.out.println("***************"); String xpath_query = args[2]; String inputFile = args[0]; String outputFile = args[1]; String tag = ""; // tag = getFisrtQueryTag(xpath_query); tag = getLastQueryTag(xpath_query); Configuration conf = new Configuration(); conf.set("xmlinput.start", "<" + tag); conf.set("xmlinput.end", "</" + tag + ">"); conf.set("xpath.query", xpath_query); @SuppressWarnings("deprecation") Job job = new Job(conf, "HPath"); FileSystem fs = FileSystem.get(conf); Path inFile = new Path(inputFile); Path outFile = new Path(outputFile); if (!fs.exists(inFile)) { System.out.println("error: Input file not found."); System.exit(-1); } if (!fs.isFile(inFile)) { System.out.println("error: Input should be a file."); System.exit(-1); } if (fs.exists(outFile)) { System.out.println("error: Output already exists."); System.exit(-1); } job.setJarByClass(HPath.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(XmlItemInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, inFile); FileOutputFormat.setOutputPath(job, outFile); job.waitForCompletion(true); }
From source file:bulkload.ImportTsv.java
License:Apache License
/** * Sets up the actual job.//from w w w. j a v a 2 s .c o m * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { Job job = null; try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } TableName tableName = TableName.valueOf(args[0]); if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } Path inputDir = new Path(args[1]); String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(TsvImporter.class); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TsvImporter.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { try (HTable table = (HTable) connection.getTable(tableName)) { Path outputDir = new Path(hfileOutPath); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputDir)) { if (!fs.delete(outputDir, true)) { throw new IllegalStateException("delete path:" + outputDir + " failed"); } } FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setReducerClass(PutSortReducer.class); HFileOutputFormat2.configureIncrementalLoad(job, table, table); } } else { // No reducers. Just write straight to table. Call // initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); // TableMapReduceUtil.addDependencyJars(job); // TableMapReduceUtil.addDependencyJars(job.getConfiguration(), // com.google.common.base.Function.class /* Guava used by TsvParser */); } // Workaround to remove unnecessary hadoop dependencies String[] jars = job.getConfiguration().get("tmpjars").split(",", -1); StringBuilder filteredJars = new StringBuilder(); for (String j : jars) { String[] parts = j.split("/", -1); String fileName = parts[parts.length - 1]; if (fileName.indexOf("hadoop-") != 0) { filteredJars.append(j); filteredJars.append(","); } } job.getConfiguration().set("tmpjars", filteredJars.toString()); } } return job; }
From source file:Business.MapReduceOne.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "FirstJob"); job.setJarByClass(MapReduceOne.class); final File f = new File(MapReduceOne.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/inFiles/"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outFiles/OutputOne"; //use the arguments instead if provided. if (args.length > 1) { inFiles = args[1];//from ww w .ja v a 2 s . c o m outFiles = args[2]; } Path in = new Path(inFiles); Path out = new Path(outFiles); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setMapperClass(Mapper1.class); job.setCombinerClass(Reducer1.class); job.setReducerClass(Reducer1.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); return 0; }
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
public Job createSubmittableJob(String[] args) { TABLE_NAME = args[1];// w w w . j a va2 s. c o m Job job = null; try { job = new Job(new Configuration(), NAME); job.setJarByClass(HexastoreBulkImport.class); job.setMapperClass(sampler.TotalOrderPrep.Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Combiner.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(TotalOrderPartitioner.class); //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000")); TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000")); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(HFileOutputFormat.class); Path out = new Path("out"); FileOutputFormat.setOutputPath(job, out); Configuration conf = new Configuration(); FileSystem fs; try { fs = FileSystem.get(conf); if (fs.exists(out)) { fs.delete(out, true); } } catch (IOException e) { e.printStackTrace(); } HBaseAdmin hadmin = new HBaseAdmin(conf); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats"); HColumnDescriptor family = new HColumnDescriptor("size"); desc.addFamily(family); conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME + "_stats")) { //hadmin.disableTable(TABLE_NAME+"_stats"); //hadmin.deleteTable(TABLE_NAME+"_stats"); } else { hadmin.createTable(desc); } FileInputFormat.setInputPaths(job, new Path(args[0])); //job.getConfiguration().setInt("mapred.map.tasks", 18); job.getConfiguration().set("h2rdf.tableName", TABLE_NAME); job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); job.getConfiguration().setInt("io.sort.mb", 100); job.getConfiguration().setInt("io.file.buffer.size", 131072); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864); job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432); job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5); job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5); //job.getConfiguration().setInt("io.sort.mb", 100); } catch (IOException e2) { e2.printStackTrace(); } return job; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.PerfectX.java
License:Apache License
/** * Runs this tool.//from w w w . java 2 s. c om */ @Override public int run(String[] argv) throws Exception { final Args args = new Args(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); try { parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } LOG.info("Tool: " + PerfectX.class.getSimpleName()); LOG.info(" - input path: " + args.input); LOG.info(" - output path: " + args.output); LOG.info(" - number of reducers: " + args.numReducers); LOG.info(" - use in-mapper combining: " + args.imc); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(PerfectX.class.getSimpleName()); job.setJarByClass(PerfectX.class); job.setNumReduceTasks(args.numReducers); FileInputFormat.setInputPaths(job, new Path(args.input)); FileOutputFormat.setOutputPath(job, new Path(args.output)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(args.output); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.WordCount.java
License:Apache License
/** * Runs this tool./*from ww w . j a v a 2 s .c o m*/ */ @Override public int run(String[] argv) throws Exception { final Args args = new Args(); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); try { parser.parseArgument(argv); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } LOG.info("Tool: " + WordCount.class.getSimpleName()); LOG.info(" - input path: " + args.input); LOG.info(" - output path: " + args.output); LOG.info(" - number of reducers: " + args.numReducers); LOG.info(" - use in-mapper combining: " + args.imc); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(WordCount.class.getSimpleName()); job.setJarByClass(WordCount.class); job.setNumReduceTasks(args.numReducers); FileInputFormat.setInputPaths(job, new Path(args.input)); FileOutputFormat.setOutputPath(job, new Path(args.output)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(args.output); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:ca.uwaterloo.cs.bigdata2017w.assignment4.BuildPersonalizedPageRankRecords.java
License:Apache License
/** * Runs this tool.//from ww w.j av a 2 s . c om */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption( OptionBuilder.withArgName("sources").hasArg().withDescription("source nodes").create(SOURCES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); String sourcesString = cmdline.getOptionValue(SOURCES); String[] sources = sourcesString.split(","); for (int i = 0; i < sources.length; i++) { sources[i] = sources[i].trim(); } LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); LOG.info(" - use sources: " + sourcesString); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setStrings(SOURCES, sources); Job job = Job.getInstance(conf); job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPersonalizedPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.meterperfile.ThreelMain <input> <output>"); System.exit(2);// w w w.j ava 2s . c o m } conf.set("mapreduce.input.fileinputformat.split.maxsize", "100"); Job job = new Job(conf, "ThreelMain"); job.setJarByClass(ThreelMain.class); job.setInputFormatClass(UnsplitableTextInputFormat.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); // job.setOutputKeyClass(LongWritable.class); //job.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { System.out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Duration is " + duration + " seconds."); } return 0; }
From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java
License:Open Source License
public int run(String[] args) throws IOException { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.ConsineMain <input> <output>"); System.exit(2);/*from ww w . j av a 2 s . co m*/ } Job job1 = new Job(conf, "ConsineMain"); job1.setJarByClass(CosineMain.class); job1.setMapperClass(AggregateReadingsMapper.class); job1.setMapOutputKeyClass(LongWritable.class); job1.setMapOutputValueClass(DoubleWritable.class); job1.setReducerClass(AggregateReadingsReducer.class); job1.setOutputKeyClass(LongWritable.class); job1.setOutputValueClass(Text.class); FileInputFormat.setInputDirRecursive(job1, true); FileInputFormat.setInputPaths(job1, new Path(otherArgs[0])); int lastIdx = otherArgs[0].lastIndexOf("/"); String tempOutput = otherArgs[0].substring(0, lastIdx) + "/temp"; FileOutputFormat.setOutputPath(job1, new Path(tempOutput)); System.out.println("\nStarting Job-1 ..."); final long startTime = System.currentTimeMillis(); try { final long startTimeJob1 = System.currentTimeMillis(); if (!job1.waitForCompletion(true)) { System.out.println("Job-1 failed."); } else { System.out.println("Duration of Job1 " + ((System.currentTimeMillis() - startTimeJob1) / 1000.0) + " seconds."); final Job job2 = new Job(conf, "ConsineMain Aggregate"); job2.setJarByClass(CosineMain.class); job2.setInputFormatClass(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, tempOutput); CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, tempOutput); FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1])); job2.setMapperClass(CartesianProductMapper.class); job2.setMapOutputKeyClass(DoubleWritable.class); job2.setMapOutputValueClass(Text.class); job2.setSortComparatorClass(DescendingKeyComparator.class); job2.setReducerClass(CartesianProductReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(DoubleWritable.class); job2.setNumReduceTasks(10); final long startTimeJob2 = System.currentTimeMillis(); System.out.println("\nStarting Job-2 ..."); if (!job2.waitForCompletion(true)) { System.out.println("Job-2 failed."); } else { System.out.println("Duration of Job2: " + ((System.currentTimeMillis() - startTimeJob2) / 1000.0) + " seconds."); } } FileSystem fs = FileSystem.get(conf); fs.delete(new Path(tempOutput), true); } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Total Duration: " + duration + " seconds."); } return 0; }