List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion
public boolean waitForCompletion(boolean verbose) throws IOException, InterruptedException, ClassNotFoundException
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
@Override protected void execute() throws Exception { lastProcessedFile.clear();// ww w . ja v a2s . co m List<AuditMessage> auditMsgList = new ArrayList<AuditMessage>(); try { FileSystem fs = FileSystem.get(srcCluster.getHadoopConf()); // Cleanup tmpPath before everyRun to avoid // any old data being used in this run if the old run was aborted cleanUpTmp(fs); LOG.info("TmpPath is [" + tmpPath + "]"); long commitTime = srcCluster.getCommitTime(); publishMissingPaths(fs, srcCluster.getLocalFinalDestDirRoot(), commitTime, streamsToProcess); Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); /* checkpointPaths table contains streamname as rowkey, source(collector) name as column key and checkpoint value as value */ Table<String, String, String> checkpointPaths = HashBasedTable.create(); long totalSize = createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths); if (fileListing.size() == 0) { LOG.info("Nothing to do!"); for (String eachStream : streamsToProcess) { if (lastProcessedFile.get(eachStream) != null) { ConduitMetrics.updateAbsoluteGauge(getServiceType(), LAST_FILE_PROCESSED, eachStream, lastProcessedFile.get(eachStream)); } } return; } Job job = createJob(tmpJobInputPath, totalSize); long jobStartTime = System.nanoTime(); job.waitForCompletion(true); long jobExecutionTimeInSecs = (System.nanoTime() - jobStartTime) / (NANO_SECONDS_IN_SECOND); LOG.info("Time taken to complete " + job.getJobID() + " job : " + jobExecutionTimeInSecs + "secs"); updateJobTimeCounter(jobExecutionTimeInSecs); if (job.isSuccessful()) { commitTime = srcCluster.getCommitTime(); LOG.info("Commiting mvPaths and ConsumerPaths"); commit(prepareForCommit(commitTime), false, auditMsgList, commitTime); updatePathsTobeRegisteredWithLatestDir(commitTime); checkPoint(checkpointPaths); LOG.info("Commiting trashPaths"); commit(populateTrashCommitPaths(trashSet), true, null, commitTime); LOG.info("Committed successfully at " + getLogDateString(commitTime)); for (String eachStream : streamsToProcess) { if (lastProcessedFile.get(eachStream) != null) { ConduitMetrics.updateAbsoluteGauge(getServiceType(), LAST_FILE_PROCESSED, eachStream, lastProcessedFile.get(eachStream)); } } } else { throw new IOException("LocaStreamService job failure: Job " + job.getJobID() + " has failed. "); } } catch (Exception e) { LOG.warn("Error in running LocalStreamService ", e); throw e; } finally { publishAuditMessages(auditMsgList); try { registerPartitions(); } catch (Exception e) { LOG.warn("Got exception while registering partitions. ", e); } } }
From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java
License:Apache License
private void testClusterName(String configName, String currentClusterName) throws Exception { ConduitConfigParser parser = new ConduitConfigParser(configName); ConduitConfig config = parser.getConfig(); Set<String> streamsToProcess = new HashSet<String>(); streamsToProcess.addAll(config.getSourceStreams().keySet()); Set<String> clustersToProcess = new HashSet<String>(); Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>(); Cluster currentCluster = null;/*from w w w. j a v a 2 s . c o m*/ for (SourceStream sStream : config.getSourceStreams().values()) { for (String cluster : sStream.getSourceClusters()) { clustersToProcess.add(cluster); } } if (currentClusterName != null) { currentCluster = config.getClusters().get(currentClusterName); } for (String clusterName : clustersToProcess) { Cluster cluster = config.getClusters().get(clusterName); cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker")); TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster, new NullCheckPointProvider(), streamsToProcess); services.add(service); } for (TestLocalStreamService service : services) { FileSystem fs = service.getFileSystem(); service.preExecute(); if (currentClusterName != null) Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName); // creating a job with empty input path Path tmpJobInputPath = new Path("/tmp/job/input/path"); Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); // checkpointKey, CheckPointPath Table<String, String, String> checkpointPaths = HashBasedTable.create(); service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths); Job testJobConf = service.createJob(tmpJobInputPath, 1000); testJobConf.waitForCompletion(true); int numberOfCountersPerFile = 0; long sumOfCounterValues = 0; Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()), "counters"); FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() { public boolean accept(Path path) { return path.toString().contains("part"); } }); for (FileStatus fileSt : statuses) { Scanner scanner = new Scanner(fs.open(fileSt.getPath())); while (scanner.hasNext()) { String counterNameValue = null; try { counterNameValue = scanner.next(); String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); numberOfCountersPerFile++; sumOfCounterValues += numOfMsgs; } catch (Exception e) { LOG.error("Counters file has malformed line with counter name =" + counterNameValue + "..skipping the line", e); } } } // Should have 2 counters for each file Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile); // sum of all counter values should be equal to total number of messages Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues); Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY), service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY)); Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY), service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY)); if (currentCluster == null) Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY), testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY)); service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true); } }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
@Override protected void execute() throws Exception { try {/*from w w w .j a v a2 s .c o m*/ FileSystem fs = FileSystem.get(cluster.getHadoopConf()); // Cleanup tmpPath before everyRun to avoid // any old data being used in this run if the old run was aborted cleanUpTmp(fs); LOG.info("TmpPath is [" + tmpPath + "]"); publishMissingPaths(fs, cluster.getLocalFinalDestDirRoot()); Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>(); Set<FileStatus> trashSet = new HashSet<FileStatus>(); // checkpointKey, CheckPointPath Map<String, FileStatus> checkpointPaths = new TreeMap<String, FileStatus>(); createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths); if (fileListing.size() == 0) { LOG.info("Nothing to do!"); return; } Job job = createJob(tmpJobInputPath); job.waitForCompletion(true); if (job.isSuccessful()) { long commitTime = cluster.getCommitTime(); LOG.info("Commiting mvPaths and ConsumerPaths"); commit(prepareForCommit(commitTime, fileListing)); checkPoint(checkpointPaths); LOG.info("Commiting trashPaths"); commit(populateTrashCommitPaths(trashSet)); LOG.info("Committed successfully at " + getLogDateString(commitTime)); } } catch (Exception e) { LOG.warn("Error in running LocalStreamService " + e); throw e; } }
From source file:com.intel.hadoop.hbase.dot.KEY.java
License:Apache License
private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, String mrTableName) throws IOException, ClassNotFoundException, InterruptedException { this.conf.set(KEY.INPUT_TABLE, mrTableName); Job job = new Job(this.conf); job.setJobName("Generate Data for [" + mrTableName + "]"); job.setJarByClass(GenerateTestTable.class); job.setInputFormatClass(inputFormatClass); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); FileSystem fs = FileSystem.get(conf); Path path = new Path("/tmp", "tempout"); fs.delete(path, true);//w w w. ja v a 2s.co m FileOutputFormat.setOutputPath(job, path); job.setMapperClass(mapperClass); job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); // Add a Class from the hbase.jar so it gets registered too. TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java
License:Apache License
/** * Main entry point./* w w w .ja v a2 s . c o m*/ * * @param args The command line parameters. * @throws Exception When running the job fails. */ public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { usage("Wrong number of arguments: " + otherArgs.length); System.exit(-1); } // Make sure columns are specified String columns[] = conf.getStrings(COLUMNS_CONF_KEY); if (columns == null) { usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=..."); System.exit(-1); } // Make sure they specify exactly one column as the row key int rowkeysFound = 0; for (String col : columns) { if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC)) rowkeysFound++; } if (rowkeysFound != 1) { usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC); System.exit(-1); } // Make sure one or more columns are specified if (columns.length < 2) { usage("One or more columns in addition to the row key are required"); System.exit(-1); } hbaseAdmin = new HBaseAdmin(conf); Job job = createSubmittableJob(conf, otherArgs); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.intel.hadoop.hbase.dot.TestHiveIntegration.java
License:Apache License
@Test public void importtsv() { String[] args = new String[] { "-D" + "importtsv.mapper.class" + "=com.intel.hadoop.hbase.dot.mapreduce.DotTsvImporterMapper", "-D" + "importtsv.separator" + "=|", "-D" + "importtsv.bulk.output" + "=/bulkload", "-D" + "importtsv.columns" + "=HBASE_ROW_KEY,f1:doc1.field1,f1:doc1.field2,f1:doc1.field3,f1:doc1.field4", "-D" + "hbase.dot.enable" + "=true", "-D" + "hbase.dot.type" + "=ANALYTICAL", new String(name), "/tsvfile" }; boolean success = true; try {/*from w ww.j a va 2 s . c om*/ String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); LOG.info("remaining args: " + otherArgs[0] + " " + otherArgs[1]); DotImportTsv.createHbaseAdmin(conf); Job job = DotImportTsv.createSubmittableJob(conf, otherArgs); job.waitForCompletion(true); assertTrue("DotImportTSV job failed", job.isSuccessful()); } catch (IOException e) { success = false; } catch (ClassNotFoundException e) { success = false; } catch (InterruptedException e) { success = false; } assertTrue("DotImportTSV operation failed", success); }
From source file:com.j.distributed.counter.CounterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass());/*from w ww . j a v a 2 s.co m*/ job.setMapperClass(CounterMapper.class); job.setCombinerClass(CounterReducer.class); job.setReducerClass(CounterReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(options[0])); FileOutputFormat.setOutputPath(job, new Path(options[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.j.distributed.sorter.SorterJob.java
@Override public int run(String... options) throws Exception { Job job = Job.getInstance(getConf(), getClass().toString()); job.setJarByClass(getClass());/* w ww.jav a 2s .com*/ job.setMapperClass(SorterMapper.class); job.setCombinerClass(SorterReducer.class); job.setReducerClass(SorterReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setSortComparatorClass(LongWritable.DecreasingComparator.class); FileInputFormat.addInputPath(job, new Path(options[1])); FileOutputFormat.setOutputPath(job, new Path(options[2])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.javiertordable.mrif.MapReduceQuadraticSieve.java
License:Apache License
/** * Setup the MapReduce parameters and run it. * * Tool parses the command line arguments for us. *//*from w w w. jav a 2s . com*/ public int run(String[] args) throws Exception { Configuration conf = getConf(); // Check the arguments. we need the integer to attempt to factor. if (args.length < 1) { System.out.println("Please indicate the integer to factor"); LOGGER.severe("No integer to factor. Exit."); System.exit(1); } // Parse N and add it to the job configuration, so that the workers can // access it as well. BigInteger N = new BigInteger(args[0]); LOGGER.info("Attempting factorization of: " + N.toString()); conf.set(INTEGER_TO_FACTOR_NAME, N.toString()); // Obtain the factor base for the integer N. FactorBaseArray factorBase = SieveInput.factorBase(N); LOGGER.info("Factor base of size: " + factorBase.size()); conf.set(FACTOR_BASE_NAME, factorBase.toString()); // Prepare the input of the mapreduce. LOGGER.info("Sieve of size: " + SieveInput.fullSieveIntervalSize(N)); try { // Write the full sieve interval to disk. SieveInput.writeFullSieveInterval(N, "input/" + INPUT_FILE_NAME); } catch (FileNotFoundException e) { System.out.println("Unable to open the file for writing."); } catch (IOException e) { System.out.println("Unable to write to the output file."); } // Configure the classes of the mapreducer Job job = new Job(conf, "QuadraticSieve"); job.setJarByClass(MapReduceQuadraticSieve.class); job.setMapperClass(SieveMapper.class); job.setReducerClass(FindSquaresReducer.class); // Output will be two pairs of strings: // <"Factor1", "59"> // <"Factor2", "101"> job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path("input/")); FileOutputFormat.setOutputPath(job, new Path("output/")); // Submit the job. job.waitForCompletion(true); return 0; }
From source file:com.jbw.mutioutputformat.PatitionByStation.java
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); Path input = new Path(conf.get("input")); Path output = new Path(conf.get("output")); Job job = Job.getInstance(); job.setJarByClass(PatitionByStation.class); job.setJobName("papapa"); job.setMapperClass(StationMapper.class); job.setMapOutputKeyClass(Text.class); job.setReducerClass(StationReducer.class); job.setOutputKeyClass(NullWritable.class); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); return job.waitForCompletion(true) ? 0 : 1; }