List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.cloudera.recordservice.examples.terasort.TeraGen.java
License:Apache License
/** * @param args the cli arguments/*from w w w.ja va 2 s . co m*/ */ @Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } setNumberOfRows(job, parseHumanLong(args[0])); Path outputDir = new Path(args[1]); if (outputDir.getFileSystem(getConf()).exists(outputDir)) { throw new IOException("Output directory " + outputDir + " already exists."); } FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RangeInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.terasort.TeraSort.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; if (args.length != 2 && args.length != 3) { usage();/*w w w . j a va2 s .co m*/ return 1; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } LOG.info("starting"); Job job = Job.getInstance(getConf()); boolean useSimplePartitioner = getUseSimplePartitioner(job); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); useSimplePartitioner = true; } else { Path inputDir = new Path(args[0]); TeraInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TeraInputFormat.class); } Path outputDir = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else { long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { LOG.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); TeraOutputFormat.setFinalSync(job, true); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }
From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; if (args.length != 2 && args.length != 3) { usage();/*from ww w .j a v a 2 s.co m*/ return 1; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } Job job = Job.getInstance(getConf()); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); } else { TeraInputFormat.setInputPaths(job, new Path(args[0])); job.setInputFormatClass(TeraInputFormat.class); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver.java
@Override public int run(String[] args) throws Exception { Configuration config = getConf(); args = new GenericOptionsParser(config, args).getRemainingArgs(); if (args.length < 6) { /*System.out.println("hadoop jar HBASEBulkLoad.jar " + "com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver" + " <inputpath> <outputpath> <hbaseTable> <hbaseColumnFamily" + " \"<hbaseColumns (delimiter seperated)>\" <column delimiter>");*/ ToolRunner.printGenericCommandUsage(System.out); return 2; }//from w ww .ja v a 2 s . c om String hbaseTab = args[2]; String hbaseColumnFamily = args[3]; String hbaseColumns = args[4]; String hbaseColumnSeperator = args[5]; config.set(HBASEBulkLoadConstants.HBASE_TABLE_KEY, hbaseTab.trim().toLowerCase(Locale.ENGLISH)); config.set(HBASEBulkLoadConstants.HBASE_COLUMN_FAMILY_KEY, hbaseColumnFamily); config.set(HBASEBulkLoadConstants.HBASE_COLUMNS_KEY, hbaseColumns.trim().toLowerCase(Locale.ENGLISH)); config.set(HBASEBulkLoadConstants.HBASE_COLUMN_SEPERATOR_KEY, hbaseColumnSeperator); System.out.println(2); Job job = Job.getInstance(config, this.getClass().getName() + "-" + hbaseTab); HBaseConfiguration.addHbaseResources(config); job.setInputFormatClass(TextInputFormat.class); job.setJarByClass(HBASEBulkLoadDriver.class); job.setMapperClass(HBASEBulkLoadKeyValueMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); job.setReducerClass(PutSortReducer.class); Connection connection = ConnectionFactory.createConnection(config); Table hTab = connection.getTable(TableName.valueOf(hbaseTab)); FileSystem.get(getConf()).delete(new Path(args[1]), true); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //job.setOutputFormatClass(HFileOutputFormat2.class); TableMapReduceUtil.initTableReducerJob(hTab.getName().getNameAsString(), null, job); //job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); HFileOutputFormat2.configureIncrementalLoadMap(job, hTab); int exitCode = job.waitForCompletion(true) ? HBASEBulkLoadConstants.SUCCESS : HBASEBulkLoadConstants.FAILURE; System.out.println(8); if (HBASEBulkLoadConstants.SUCCESS == exitCode) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config); loader.doBulkLoad(new Path(args[1]), (HTable) hTab); connection.close(); } return exitCode; }
From source file:com.cloudera.sa.securewordcount.SecureWordCountDriver.java
@Override public int run(String[] args) throws Exception { Configuration config = getConf(); args = new GenericOptionsParser(config, args).getRemainingArgs(); if (args.length < 2) { ToolRunner.printGenericCommandUsage(System.out); return 2; }/*from www . ja va 2s . c om*/ Job job = Job.getInstance(config, this.getClass().getName() + "-wordcount"); job.setJarByClass(SecureWordCountDriver.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java
License:Apache License
public void testDateSplits() throws Exception { Statement s = connection.createStatement(); final String DATE_TABLE = "datetable"; final String COL = "foo"; try {/* w ww. j a v a2 s . c o m*/ try { // delete the table if it already exists. s.executeUpdate("DROP TABLE " + DATE_TABLE); } catch (SQLException e) { // Ignored; proceed regardless of whether we deleted the table; // it may have simply not existed. } // Create the table. s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')"); // commit this tx. connection.commit(); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.getLocal(conf); fs.delete(new Path(OUT_DIR), true); // now do a dd import Job job = new Job(conf); job.setMapperClass(ValMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(DateCol.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(DateCol.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); job.getConfiguration().setInt("mapreduce.map.tasks", 2); FileOutputFormat.setOutputPath(job, new Path(OUT_DIR)); DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null); DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL); boolean ret = job.waitForCompletion(true); assertTrue("job failed", ret); // Check to see that we imported as much as we thought we did. assertEquals("Did not get all the records", 4, job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue()); } finally { s.close(); } }
From source file:com.cloudera.test.UseHCat.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); args = new GenericOptionsParser(conf, args).getRemainingArgs(); // Get the input and output table names as arguments String inputTableName = args[0]; String outputTableName = args[1]; // Assume the default database String dbName = null;//from w w w. ja va 2 s . com Job job = new Job(conf, "UseHCat"); HCatInputFormat.setInput(job, dbName, inputTableName); job.setJarByClass(UseHCat.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); // An HCatalog record as input job.setInputFormatClass(HCatInputFormat.class); // Mapper emits a string as key and an integer as value job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Ignore the key for the reducer output; emitting an HCatalog record as value job.setOutputKeyClass(WritableComparable.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null)); HCatSchema s = HCatOutputFormat.getTableSchema(job); System.err.println("INFO: output schema explicitly set for writing:" + s); HCatOutputFormat.setSchema(job, s); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.traffic.AveragerRunner.java
License:Apache License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = new Job(conf); job.setJarByClass(AveragerRunner.class); job.setMapperClass(AveragerMapper.class); job.setReducerClass(AveragerReducer.class); job.setCombinerClass(AveragerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AverageWritable.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true); }
From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayJob.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = ConfigurationUtil.loginAuthentication(args, SEPCIFIC_CONFIG_NAME, getConf()); // ?()//from w w w . j ava 2s . com String statDate = DateUtil.getFilterDate(args); if (statDate == null) { System.exit(1); } conf.set(STAT_DAY, statDate); // ?job Job job = Job.getInstance(conf, JOB_NAME + ":" + statDate); job.setJarByClass(ScoreInfoDayJob.class); String scoreInfoInput = conf.get(SCORE_INFO_INPUT_PATH); Path scoreInfoPath = new Path(scoreInfoInput); String acctPhoneMapInfoInput = conf.get(ACCT_PHONE_MAP_INPUT_PATH); Path accPhoneMapInfoPath = new Path(acctPhoneMapInfoInput); // ? if (FileSystemUtil.exists(scoreInfoPath)) { MultipleInputs.addInputPath(job, scoreInfoPath, SequenceFileInputFormat.class, ScoreInfoDayMapper.class); logger.info("SocreInfoPath is " + scoreInfoInput); } else { logger.error("Path [{}] not exist!", scoreInfoInput); } // ?? // if (FileSystemUtil.exists(accPhoneMapInfoPath)) { // MultipleInputs.addInputPath(job, accPhoneMapInfoPath, TextInputFormat.class, // AcctPhoneMapper.class); // logger.info("AccPhoneMapInfoPath is " + acctPhoneMapInfoInput); // } else { // logger.error("Path [{}] not exist!", acctPhoneMapInfoInput); // } // job job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ScoreInfo.class); job.setNumReduceTasks(conf.getInt(REDUCE_NUMBER, 40)); job.setOutputFormatClass(NullOutputFormat.class); // TableMapReduceUtil.initTableReducerJob(HBaseTableSchema.USER_INFO_TABLE2, // ScoreInfoDayReducer.class, job); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.conversantmedia.mapreduce.example.WordCount.java
License:Apache License
public static void main(String[] args) { try {/*from ww w . j a v a2s . co m*/ Job job = Job.getInstance(new Configuration(), "WordCount v2"); job.setInputFormatClass(FileInputFormat.class); job.setOutputFormatClass(FileOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } catch (IOException | ClassNotFoundException | InterruptedException e) { e.printStackTrace(); } }