List of usage examples for org.apache.hadoop.mapreduce Job Job
Job(JobStatus status, JobConf conf) throws IOException
From source file:com.asp.tranlog.ImportTsv.java
License:Apache License
/** * Sets up the actual job.//from w ww . j a v a 2 s . co m * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, new String(Base64.encodeBytes(actualSeparator.getBytes()))); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; String tableName = args[0]; Path inputDir = new Path(args[1]); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); String inputCodec = conf.get(INPUT_LZO_KEY); if (inputCodec == null) { FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split // size = // 64m job.setInputFormatClass(TextInputFormat.class); } else { if (inputCodec.equalsIgnoreCase("lzo")) job.setInputFormatClass(LzoTextInputFormat.class); else { usage("not supported compression codec!"); System.exit(-1); } } job.setMapperClass(mapperClass); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { HTable table = new HTable(conf, tableName); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call // initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* * Guava used by TsvParser */); return job; }
From source file:com.bah.applefox.main.plugins.fulltextindex.FTLoader.java
License:Apache License
/** * run takes the comandline args as arguments (in this case from a * configuration file), creates a new job, configures it, initiates it, * waits for completion, and returns 0 if it is successful (1 if it is not) * /*from w w w .ja v a 2 s . c o m*/ * @param args * the commandline arguments (in this case from a configuration * file) * * @return 0 if the job ran successfully and 1 if it isn't */ public int run(String[] args) throws Exception { try { // Initialize variables FTLoader.articleFile = args[8]; FTLoader.maxNGrams = Integer.parseInt(args[9]); FTLoader.stopWords = getStopWords(); FTLoader.dTable = args[10]; FTLoader.urlCheckedTable = args[11]; FTLoader.divsFile = args[20]; FTLoader.exDivs = getExDivs(); // Give the job a name String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); // Create job and set the jar Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); String urlTable = args[5]; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setNumReduceTasks(Integer.parseInt(args[4])); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; } catch (IOException e) { if (e.getMessage() != null) { log.error(e.getMessage()); } else { log.error(e.getStackTrace()); } } catch (InterruptedException e) { if (e.getMessage() != null) { log.error(e.getMessage()); } else { log.error(e.getStackTrace()); } } catch (ClassNotFoundException e) { if (e.getMessage() != null) { log.error(e.getMessage()); } else { log.error(e.getStackTrace()); } } return 1; }
From source file:com.bah.applefox.main.plugins.imageindex.ImageLoader.java
License:Apache License
/** * run takes the comandline args as arguments (in this case from a * configuration file), creates a new job, configures it, initiates it, * waits for completion, and returns 0 if it is successful (1 if it is not) * // w ww . j av a 2s . c o m * @param args * the commandline arguments (in this case from a configuration * file) * * @return 0 if the job ran successfully and 1 if it isn't */ public int run(String[] args) throws Exception { checkedImages = args[18]; hashTable = args[17]; tagTable = args[19]; divsFile = args[20]; UserAgent = args[6]; // Create the table AccumuloUtils.setSplitSize(args[23]); AccumuloUtils.connectBatchWrite(checkedImages).close(); // Give the job a name String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); // Create the job and set its jar Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); // Set the url table to read from String urlTable = args[5]; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setNumReduceTasks(Integer.parseInt(args[4])); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable); AccumuloUtils.setSplitSize(args[22]); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.pageranking.utilities.CountURLs.java
License:Apache License
public int run(String[] args) throws Exception { String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); mappedInput = args[12] + "From"; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), mappedInput, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, args[15]); job.waitForCompletion(true);/*w w w .j av a 2s.com*/ return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.pageranking.utilities.DampenTable.java
License:Apache License
public int run(String[] args) throws Exception { String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); tablePrefix = args[13];/*ww w. jav a 2 s. co m*/ dampeningFactor = Double.parseDouble(args[14]); job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), tablePrefix + "New", new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, tablePrefix + "New"); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.pageranking.utilities.InitializePRTables.java
License:Apache License
public int run(String[] args) throws Exception { tablePrefix = args[13];// w w w . j ava2 s . c o m String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[12] + "To", new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, tablePrefix + "Old"); AccumuloUtils.connectBatchWrite(tablePrefix + "New"); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.pageranking.utilities.MRPageRanking.java
License:Apache License
public int run(String[] args) throws Exception { String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); tablePrefix = args[13];/*from ww w . j a v a 2s.c o m*/ outboundLinks = args[15]; job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloInputFormat.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), args[12] + "To", new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setReducerClass(ReducerClass.class); job.setOutputFormatClass(AccumuloOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, tablePrefix + "New"); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.bah.applefox.main.plugins.webcrawler.WebCrawler.java
License:Apache License
/** * run takes the comandline args as arguments (in this case from a * configuration file), creates a new job, configures it, initiates it, * waits for completion, and returns 0 if it is successful (1 if it is not) * //from w ww.j a v a2 s . c o m * @param args * the commandline arguments (in this case from a configuration * file) * * @return 0 if the job ran successfully and 1 if it isn't */ public int run(String[] args) throws Exception { userAgent = args[6]; String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = new Job(getConf(), jobName); job.setJarByClass(this.getClass()); String clone = args[5]; String clone2 = args[12]; table = clone; AccumuloUtils.setSplitSize(args[24]); table2 = clone2 + "From"; table3 = clone2 + "To"; job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), clone, new Authorizations()); job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]); AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, clone); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:com.baidu.cloud.bmr.mapreduce.AccessLogAnalyzer.java
License:Open Source License
public static void main(String[] args) { Configuration conf = new Configuration(); if (args.length != 2) { System.err.println("Usage: AccessLogAnalyzer <input path> <output path>"); System.exit(-1);/* w w w.j av a 2 s.c o m*/ } String inputPath = args[0]; String outputPath = args[1]; try { Job job = new Job(conf, "AccessLogAnalyzer"); job.setJarByClass(AccessLogAnalyzer.class); job.setMapperClass(AccessLogAnalyzerMapper.class); job.setReducerClass(AccessLogAnalyzerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException | ClassNotFoundException | InterruptedException e) { } }
From source file:com.basho.riak.hadoop.RiakWordCount.java
License:Apache License
public int run(String[] args) throws Exception { String[] keys = new String[10000]; for (int i = 0; i < 10000; i++) { keys[i] = String.valueOf(i + 1000); }//from w w w . ja v a 2s . co m Configuration conf = getConf(); conf = RiakConfig.setKeyLister(conf, new BucketKeyLister("wordcount")); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 11087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 12087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 13087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 14087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 15087)); conf = RiakConfig.setOutputBucket(conf, "wordcount_out"); conf = RiakConfig.setHadoopClusterSize(conf, 4); Job job = new Job(conf, "Riak-WordCount"); job.setJarByClass(RiakWordCount.class); job.setInputFormatClass(RiakInputFormat.class); job.setMapperClass(TokenCounterMapper.class); job.setReducerClass(TokenCounterReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(RiakOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(WordCountResult.class); job.setNumReduceTasks(4); job.submit(); return job.waitForCompletion(true) ? 0 : 1; }