List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:co.nubetech.hiho.merge.MergeJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { populateConfiguration(args);/*from w ww . j a v a 2 s . co m*/ try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); Configuration conf = getConf(); conf.set(HIHOConf.MERGE_OLD_PATH, oldPath); conf.set(HIHOConf.MERGE_NEW_PATH, newPath); Job job = new Job(conf); job.setJobName("Merge job"); job.setJarByClass(MergeJob.class); if (mergeBy.equals("key")) { job.setMapperClass(MergeKeyMapper.class); job.setReducerClass(MergeKeyReducer.class); } else if (mergeBy.equals("value")) { job.setMapperClass(MergeValueMapper.class); job.setReducerClass(MergeValueReducer.class); } job.setInputFormatClass(inputFormatClass); DelimitedTextInputFormat.setProperties(job, delimiter, column); job.setMapOutputKeyClass(HihoTuple.class); job.setMapOutputValueClass(HihoValue.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); FileInputFormat.setInputPaths(job, oldPath + "," + newPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsOld = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_OLD).getValue(); totalRecordsNew = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue(); badRecords = counters.findCounter(MergeRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(MergeRecordCounter.OUTPUT).getValue(); logger.info("Total old records read are: " + totalRecordsOld); logger.info("Total new records read are: " + totalRecordsNew); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); } } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:co.nubetech.hiho.similarity.ngram.NGramJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args);//from w w w. jav a2 s.c o m try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("NGram job"); job.setJarByClass(NGramJob.class); Class inputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat"); Class outputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"); // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat Class inputKeyClass = Class.forName("org.apache.hadoop.io.Text"); Class inputValueClass = Class.forName("org.apache.hadoop.io.Text"); Class outputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair"); Class outputValueClass = Class.forName("org.apache.hadoop.io.IntWritable"); job.setMapperClass(NGramMapper.class); job.setReducerClass(NGramReducer.class); job.setInputFormatClass(inputFormatClass); job.setMapOutputKeyClass(inputKeyClass); job.setMapOutputValueClass(inputValueClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); job.setOutputFormatClass(outputFormatClass); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path("outputOfNGramJob")); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.similarity.ngram.ScoreJob.java
License:Apache License
@Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); job.setJobName("Score job"); job.setJarByClass(ScoreJob.class); Class inputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"); Class outputFormatClass = Class.forName("org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"); // org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat // org.apache.hadoop.mapreduce.lib.output.TextOutputFormat Class inputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair"); Class inputValueClass = Class.forName("org.apache.hadoop.io.IntWritable"); Class outputKeyClass = Class.forName("co.nubetech.hiho.similarity.ngram.ValuePair"); Class outputValueClass = Class.forName("org.apache.hadoop.io.LongWritable"); job.setMapperClass(ScoreMapper.class); job.setReducerClass(ScoreReducer.class); job.setInputFormatClass(inputFormatClass); job.setMapOutputKeyClass(inputKeyClass); job.setMapOutputValueClass(inputValueClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); job.setOutputFormatClass(outputFormatClass); FileInputFormat.setInputPaths(job, "outputOfNGramJob"); FileOutputFormat.setOutputPath(job, new Path("outputOfScoreJob")); int ret = 0;//from ww w . jav a 2s .co m try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:code.DemoWordCount.java
License:Apache License
/** * Runs this tool./* www . j a v a2 s.c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + DemoWordCount.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(DemoWordCount.class.getSimpleName()); job.setJarByClass(DemoWordCount.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:com.accumulobook.advanced.mapreduce.MapReduceFilesExample.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(this.getConf()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WordCount.WordCountMapper.class); job.setCombinerClass(WordCount.WordCountCombiner.class); job.setReducerClass(WordCount.WordCountReducer.class); // clone the articles table ZooKeeperInstance inst = new ZooKeeperInstance(args[0], args[1]); Connector conn = inst.getConnector(args[2], new PasswordToken(args[3])); conn.tableOperations().clone(WikipediaConstants.ARTICLES_TABLE, WikipediaConstants.ARTICLES_TABLE_CLONE, true, Collections.EMPTY_MAP, Collections.EMPTY_SET); // take cloned table offline, waiting until the operation is complete boolean wait = true; conn.tableOperations().offline(WikipediaConstants.ARTICLES_TABLE_CLONE, wait); ClientConfiguration zkiConfig = new ClientConfiguration().withInstance(args[0]).withZkHosts(args[1]); // input// w w w. j a v a 2 s. c o m job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setInputTableName(job, WikipediaConstants.ARTICLES_TABLE_CLONE); List<Pair<Text, Text>> columns = new ArrayList<>(); columns.add(new Pair(WikipediaConstants.CONTENTS_FAMILY_TEXT, new Text(""))); AccumuloInputFormat.fetchColumns(job, columns); AccumuloInputFormat.setZooKeeperInstance(job, zkiConfig); AccumuloInputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3])); // configure to use underlying RFiles AccumuloInputFormat.setOfflineTableScan(job, true); // output job.setOutputFormatClass(AccumuloOutputFormat.class); BatchWriterConfig bwConfig = new BatchWriterConfig(); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); AccumuloOutputFormat.setZooKeeperInstance(job, zkiConfig); AccumuloOutputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3])); AccumuloOutputFormat.setDefaultTableName(job, WikipediaConstants.WORD_COUNT_TABLE); AccumuloOutputFormat.setCreateTables(job, true); job.setJarByClass(WordCount.class); job.waitForCompletion(true); //job.submit(); return 0; }
From source file:com.aerospike.hadoop.examples.aggregateintinput.AggregateIntInput.java
License:Apache License
public int run(final String[] args) throws Exception { final Configuration conf = getConf(); @SuppressWarnings("deprecation") final Job job = new Job(conf, "AerospikeAggregateIntInput"); log.info("run starting on bin " + binName); job.setJarByClass(AggregateIntInput.class); job.setInputFormatClass(AerospikeInputFormat.class); job.setMapperClass(Map.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LongWritable.class); // job.setCombinerClass(Reduce.class); // no combiner job.setReducerClass(Reduce.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[0])); int status = job.waitForCompletion(true) ? 0 : 1; log.info("run finished, status=" + status); return status; }
From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.CsvBulkLoadTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { HBaseConfiguration.addHbaseResources(getConf()); Configuration conf = getConf(); String quorum = conf.get("hbase.zookeeper.quorum"); String clientPort = conf.get("hbase.zookeeper.property.clientPort"); LOG.info("hbase.zookeeper.quorum=" + quorum); LOG.info("hbase.zookeeper.property.clientPort=" + clientPort); LOG.info("phoenix.query.dateFormat=" + conf.get("phoenix.query.dateFormat")); CommandLine cmdLine = null;/* ww w . j a v a 2s. c om*/ try { cmdLine = parseOptions(args); LOG.info("JdbcUrl=" + getJdbcUrl(quorum + ":" + clientPort)); } catch (IllegalStateException e) { printHelpAndExit(e.getMessage(), getOptions()); } Class.forName(DriverManager.class.getName()); Connection conn = DriverManager.getConnection(getJdbcUrl(quorum + ":" + clientPort)); String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt()); String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt()); String qualifiedTableName = getQualifiedTableName(schemaName, tableName); List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName); LOG.info("tableName=" + tableName); LOG.info("schemaName=" + schemaName); LOG.info("qualifiedTableName=" + qualifiedTableName); configureOptions(cmdLine, importColumns, getConf()); try { validateTable(conn, schemaName, tableName); } finally { conn.close(); } Path inputPath = new Path(cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt())); Path outputPath = null; if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) { outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt())); } else { outputPath = new Path("/tmp/" + UUID.randomUUID()); } LOG.info("Configuring HFile output path to {}", outputPath); Job job = new Job(getConf(), "Phoenix MapReduce import for " + getConf().get(PhoenixCsvToKeyValueMapper.TABLE_NAME_CONFKEY)); // Allow overriding the job jar setting by using a -D system property at startup if (job.getJar() == null) { job.setJarByClass(PhoenixCsvToKeyValueMapper.class); } job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, inputPath); FileSystem.get(getConf()); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(PhoenixCsvToKeyValueMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); HTable htable = new HTable(getConf(), qualifiedTableName); // Auto configure partitioner and reducer according to the Main Data table HFileOutputFormat.configureIncrementalLoad(job, htable); LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath); boolean success = job.waitForCompletion(true); if (!success) { LOG.error("Import job failed, check JobTracker for details"); return 1; } LOG.info("Loading HFiles from {}", outputPath); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(getConf()); loader.doBulkLoad(outputPath, htable); htable.close(); LOG.info("Incremental load complete"); LOG.info("Removing output directory {}", outputPath); if (!FileSystem.get(getConf()).delete(outputPath, true)) { LOG.error("Removing output directory {} failed", outputPath); } return 0; }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java
License:Apache License
private static Object[] runJob(Configuration conf, String tableName, String inputPath, String tmpOutputPath) throws Exception { Long everyJobInputLine = 0L;//from w w w . java2 s . co m Long everyJobOutputLine = 0L; Long everyJobBadLine = 0L; Job job = null; try { job = createSubmittableJob(conf, tableName, inputPath, tmpOutputPath); } catch (Exception e) { System.err.println( "ERROR:mutiplecolumn bulkload when create submittableJob error is :" + e.fillInStackTrace()); return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine }; } boolean completion = false; try { if (job == null) return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine }; completion = job.waitForCompletion(true); everyJobBadLine = job.getCounters().getGroup("ImportTsv").findCounter("Bad Lines").getValue(); everyJobInputLine = job.getCounters().getGroup("ImportTsv").findCounter("total Lines").getValue(); everyJobOutputLine = everyJobInputLine - everyJobBadLine; } catch (Exception e) { System.err.println("ERROR:mutiplecolumn bulkload when execute Job error is :" + e.fillInStackTrace()); return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine }; } try { if (completion && !StringUtils.isEmpty(tmpOutputPath)) { String[] toolRunnerArgs = new String[] { tmpOutputPath, tableName }; int ret = ToolRunner.run(new LoadIncrementalHFiles(conf), toolRunnerArgs); return new Object[] { ret == 0, everyJobInputLine, everyJobOutputLine, everyJobBadLine }; } else { return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine }; } } catch (Exception e) { System.err.println( "ERROR:mutiplecolumn bulkload when LoadIncrementalHFiles error is :" + e.fillInStackTrace()); return new Object[] { false, everyJobInputLine, everyJobOutputLine, everyJobBadLine }; } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
private static boolean runJob(Configuration conf, String tableName, String inputPath, String tmpOutputPath) { Job job = null; try {//from www . j a v a 2 s. com job = createSubmittableJob(conf, tableName, inputPath, tmpOutputPath); } catch (Exception e) { System.err.println( "ERROR:singlecolumn bulkload when create submittableJob error is :" + e.fillInStackTrace()); return false; } boolean completion = false; try { if (job == null) return false; completion = job.waitForCompletion(true); } catch (Exception e) { System.err.println("ERROR:singlecolumn bulkload when execute Job error is :" + e.fillInStackTrace()); return false; } try { if (completion && !StringUtils.isEmpty(tmpOutputPath)) { String[] toolRunnerArgs = new String[] { tmpOutputPath, tableName }; int ret = ToolRunner.run(new LoadIncrementalHFiles(conf), toolRunnerArgs); return ret == 0; } else { return false; } } catch (Exception e) { System.err.println( "ERROR:singlecolumn bulkload when LoadIncrementalHFiles error is :" + e.fillInStackTrace()); return false; } }
From source file:com.airline.analytics.AirlineDelayAnalytics.java
@Override public int run(String[] strings) throws Exception { Job job = Job.getInstance(getConf(), "Hadoop Airline Delay Analytics"); job.setJarByClass(AirlineDelayAnalytics.class); job.setMapperClass(AirlineMapper.class); // job.setCombinerClass(AirlineReducer.class); job.setReducerClass(AirlineReducer.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(strings[0])); FileOutputFormat.setOutputPath(job, new Path(strings[1])); return job.waitForCompletion(true) ? 0 : 1; }