Example usage for org.apache.hadoop.mapreduce Job waitForCompletion

List of usage examples for org.apache.hadoop.mapreduce Job waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job waitForCompletion.

Prototype

public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and wait for it to finish.

Usage

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

@Override
protected void execute() throws Exception {
    lastProcessedFile.clear();// ww w . ja  v  a2s  .  co  m
    List<AuditMessage> auditMsgList = new ArrayList<AuditMessage>();
    try {
        FileSystem fs = FileSystem.get(srcCluster.getHadoopConf());
        // Cleanup tmpPath before everyRun to avoid
        // any old data being used in this run if the old run was aborted
        cleanUpTmp(fs);
        LOG.info("TmpPath is [" + tmpPath + "]");
        long commitTime = srcCluster.getCommitTime();
        publishMissingPaths(fs, srcCluster.getLocalFinalDestDirRoot(), commitTime, streamsToProcess);
        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        /* checkpointPaths table contains streamname as rowkey,
        source(collector) name as column key and checkpoint value as value */
        Table<String, String, String> checkpointPaths = HashBasedTable.create();

        long totalSize = createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);

        if (fileListing.size() == 0) {
            LOG.info("Nothing to do!");
            for (String eachStream : streamsToProcess) {
                if (lastProcessedFile.get(eachStream) != null) {
                    ConduitMetrics.updateAbsoluteGauge(getServiceType(), LAST_FILE_PROCESSED, eachStream,
                            lastProcessedFile.get(eachStream));
                }
            }
            return;
        }
        Job job = createJob(tmpJobInputPath, totalSize);
        long jobStartTime = System.nanoTime();
        job.waitForCompletion(true);
        long jobExecutionTimeInSecs = (System.nanoTime() - jobStartTime) / (NANO_SECONDS_IN_SECOND);
        LOG.info("Time taken to complete " + job.getJobID() + " job : " + jobExecutionTimeInSecs + "secs");
        updateJobTimeCounter(jobExecutionTimeInSecs);
        if (job.isSuccessful()) {
            commitTime = srcCluster.getCommitTime();
            LOG.info("Commiting mvPaths and ConsumerPaths");

            commit(prepareForCommit(commitTime), false, auditMsgList, commitTime);
            updatePathsTobeRegisteredWithLatestDir(commitTime);
            checkPoint(checkpointPaths);
            LOG.info("Commiting trashPaths");
            commit(populateTrashCommitPaths(trashSet), true, null, commitTime);
            LOG.info("Committed successfully at " + getLogDateString(commitTime));
            for (String eachStream : streamsToProcess) {
                if (lastProcessedFile.get(eachStream) != null) {
                    ConduitMetrics.updateAbsoluteGauge(getServiceType(), LAST_FILE_PROCESSED, eachStream,
                            lastProcessedFile.get(eachStream));
                }
            }
        } else {
            throw new IOException("LocaStreamService job failure: Job " + job.getJobID() + " has failed. ");
        }
    } catch (Exception e) {
        LOG.warn("Error in running LocalStreamService ", e);
        throw e;
    } finally {
        publishAuditMessages(auditMsgList);
        try {
            registerPartitions();
        } catch (Exception e) {
            LOG.warn("Got exception while registering partitions. ", e);
        }
    }
}

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void testClusterName(String configName, String currentClusterName) throws Exception {
    ConduitConfigParser parser = new ConduitConfigParser(configName);
    ConduitConfig config = parser.getConfig();
    Set<String> streamsToProcess = new HashSet<String>();
    streamsToProcess.addAll(config.getSourceStreams().keySet());
    Set<String> clustersToProcess = new HashSet<String>();
    Set<TestLocalStreamService> services = new HashSet<TestLocalStreamService>();
    Cluster currentCluster = null;/*from  w  w  w.  j a  v  a  2  s  . c  o m*/
    for (SourceStream sStream : config.getSourceStreams().values()) {
        for (String cluster : sStream.getSourceClusters()) {
            clustersToProcess.add(cluster);
        }
    }
    if (currentClusterName != null) {
        currentCluster = config.getClusters().get(currentClusterName);
    }
    for (String clusterName : clustersToProcess) {
        Cluster cluster = config.getClusters().get(clusterName);
        cluster.getHadoopConf().set("mapred.job.tracker", super.CreateJobConf().get("mapred.job.tracker"));
        TestLocalStreamService service = new TestLocalStreamService(config, cluster, currentCluster,
                new NullCheckPointProvider(), streamsToProcess);
        services.add(service);
    }

    for (TestLocalStreamService service : services) {
        FileSystem fs = service.getFileSystem();
        service.preExecute();
        if (currentClusterName != null)
            Assert.assertEquals(service.getCurrentCluster().getName(), currentClusterName);
        // creating a job with empty input path
        Path tmpJobInputPath = new Path("/tmp/job/input/path");
        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        // checkpointKey, CheckPointPath
        Table<String, String, String> checkpointPaths = HashBasedTable.create();
        service.createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);
        Job testJobConf = service.createJob(tmpJobInputPath, 1000);
        testJobConf.waitForCompletion(true);

        int numberOfCountersPerFile = 0;
        long sumOfCounterValues = 0;
        Path outputCounterPath = new Path(new Path(service.getCluster().getTmpPath(), service.getName()),
                "counters");
        FileStatus[] statuses = fs.listStatus(outputCounterPath, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus fileSt : statuses) {
            Scanner scanner = new Scanner(fs.open(fileSt.getPath()));
            while (scanner.hasNext()) {
                String counterNameValue = null;
                try {
                    counterNameValue = scanner.next();
                    String tmp[] = counterNameValue.split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
                    Assert.assertEquals(4, tmp.length);
                    Long numOfMsgs = Long.parseLong(tmp[3]);
                    numberOfCountersPerFile++;
                    sumOfCounterValues += numOfMsgs;
                } catch (Exception e) {
                    LOG.error("Counters file has malformed line with counter name =" + counterNameValue
                            + "..skipping the line", e);
                }
            }
        }
        // Should have 2 counters for each file
        Assert.assertEquals(NUMBER_OF_FILES * 2, numberOfCountersPerFile);
        // sum of all counter values should be equal to total number of messages
        Assert.assertEquals(NUMBER_OF_FILES * 3, sumOfCounterValues);

        Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                service.getCurrentCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        Assert.assertEquals(testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY),
                service.getCluster().getHadoopConf().get(FS_DEFAULT_NAME_KEY));
        if (currentCluster == null)
            Assert.assertEquals(testJobConf.getConfiguration().get(FS_DEFAULT_NAME_KEY),
                    testJobConf.getConfiguration().get(SRC_FS_DEFAULT_NAME_KEY));
        service.getFileSystem().delete(new Path(service.getCluster().getRootDir()), true);
    }

}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

@Override
protected void execute() throws Exception {
    try {/*from  w  w w .j  a v a2 s .c  o m*/

        FileSystem fs = FileSystem.get(cluster.getHadoopConf());
        // Cleanup tmpPath before everyRun to avoid
        // any old data being used in this run if the old run was aborted
        cleanUpTmp(fs);
        LOG.info("TmpPath is [" + tmpPath + "]");

        publishMissingPaths(fs, cluster.getLocalFinalDestDirRoot());

        Map<FileStatus, String> fileListing = new TreeMap<FileStatus, String>();
        Set<FileStatus> trashSet = new HashSet<FileStatus>();
        // checkpointKey, CheckPointPath
        Map<String, FileStatus> checkpointPaths = new TreeMap<String, FileStatus>();

        createMRInput(tmpJobInputPath, fileListing, trashSet, checkpointPaths);

        if (fileListing.size() == 0) {
            LOG.info("Nothing to do!");
            return;
        }
        Job job = createJob(tmpJobInputPath);
        job.waitForCompletion(true);
        if (job.isSuccessful()) {
            long commitTime = cluster.getCommitTime();
            LOG.info("Commiting mvPaths and ConsumerPaths");
            commit(prepareForCommit(commitTime, fileListing));
            checkPoint(checkpointPaths);
            LOG.info("Commiting trashPaths");
            commit(populateTrashCommitPaths(trashSet));
            LOG.info("Committed successfully at " + getLogDateString(commitTime));
        }
    } catch (Exception e) {
        LOG.warn("Error in running LocalStreamService " + e);
        throw e;
    }
}

From source file:com.intel.hadoop.hbase.dot.KEY.java

License:Apache License

private void doMapReduce(Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass,
        String mrTableName) throws IOException, ClassNotFoundException, InterruptedException {

    this.conf.set(KEY.INPUT_TABLE, mrTableName);
    Job job = new Job(this.conf);
    job.setJobName("Generate Data for [" + mrTableName + "]");
    job.setJarByClass(GenerateTestTable.class);

    job.setInputFormatClass(inputFormatClass);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path("/tmp", "tempout");
    fs.delete(path, true);//w  w w. ja  v a 2s.co m

    FileOutputFormat.setOutputPath(job, path);

    job.setMapperClass(mapperClass);
    job.setNumReduceTasks(0);

    TableMapReduceUtil.addDependencyJars(job);
    // Add a Class from the hbase.jar so it gets registered too.
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hadoop.hbase.util.Bytes.class);

    TableMapReduceUtil.initCredentials(job);

    job.waitForCompletion(true);

}

From source file:com.intel.hadoop.hbase.dot.mapreduce.DotImportTsv.java

License:Apache License

/**
 * Main entry point./*  w w  w .ja  v a2 s  . c o m*/
 *
 * @param args  The command line parameters.
 * @throws Exception When running the job fails.
 */
public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        usage("Wrong number of arguments: " + otherArgs.length);
        System.exit(-1);
    }

    // Make sure columns are specified
    String columns[] = conf.getStrings(COLUMNS_CONF_KEY);
    if (columns == null) {
        usage("No columns specified. Please specify with -D" + COLUMNS_CONF_KEY + "=...");
        System.exit(-1);
    }

    // Make sure they specify exactly one column as the row key
    int rowkeysFound = 0;
    for (String col : columns) {
        if (col.equals(TsvParser.ROWKEY_COLUMN_SPEC))
            rowkeysFound++;
    }
    if (rowkeysFound != 1) {
        usage("Must specify exactly one column as " + TsvParser.ROWKEY_COLUMN_SPEC);
        System.exit(-1);
    }

    // Make sure one or more columns are specified
    if (columns.length < 2) {
        usage("One or more columns in addition to the row key are required");
        System.exit(-1);
    }
    hbaseAdmin = new HBaseAdmin(conf);
    Job job = createSubmittableJob(conf, otherArgs);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.intel.hadoop.hbase.dot.TestHiveIntegration.java

License:Apache License

@Test
public void importtsv() {

    String[] args = new String[] {
            "-D" + "importtsv.mapper.class" + "=com.intel.hadoop.hbase.dot.mapreduce.DotTsvImporterMapper",
            "-D" + "importtsv.separator" + "=|", "-D" + "importtsv.bulk.output" + "=/bulkload",
            "-D" + "importtsv.columns"
                    + "=HBASE_ROW_KEY,f1:doc1.field1,f1:doc1.field2,f1:doc1.field3,f1:doc1.field4",
            "-D" + "hbase.dot.enable" + "=true", "-D" + "hbase.dot.type" + "=ANALYTICAL", new String(name),
            "/tsvfile" };

    boolean success = true;
    try {/*from w ww.j a va  2  s  . c om*/
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        LOG.info("remaining args: " + otherArgs[0] + " " + otherArgs[1]);
        DotImportTsv.createHbaseAdmin(conf);
        Job job = DotImportTsv.createSubmittableJob(conf, otherArgs);
        job.waitForCompletion(true);
        assertTrue("DotImportTSV job failed", job.isSuccessful());
    } catch (IOException e) {
        success = false;
    } catch (ClassNotFoundException e) {
        success = false;
    } catch (InterruptedException e) {
        success = false;
    }

    assertTrue("DotImportTSV operation failed", success);

}

From source file:com.j.distributed.counter.CounterJob.java

@Override
public int run(String... options) throws Exception {

    Job job = Job.getInstance(getConf(), getClass().toString());
    job.setJarByClass(getClass());/*from w ww . j a v  a  2  s.co m*/

    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(CounterReducer.class);
    job.setReducerClass(CounterReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(options[0]));
    FileOutputFormat.setOutputPath(job, new Path(options[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.j.distributed.sorter.SorterJob.java

@Override
public int run(String... options) throws Exception {

    Job job = Job.getInstance(getConf(), getClass().toString());
    job.setJarByClass(getClass());/*  w  ww.jav  a 2s  .com*/

    job.setMapperClass(SorterMapper.class);
    job.setCombinerClass(SorterReducer.class);
    job.setReducerClass(SorterReducer.class);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    job.setSortComparatorClass(LongWritable.DecreasingComparator.class);

    FileInputFormat.addInputPath(job, new Path(options[1]));
    FileOutputFormat.setOutputPath(job, new Path(options[2]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.javiertordable.mrif.MapReduceQuadraticSieve.java

License:Apache License

/**
 * Setup the MapReduce parameters and run it.
 *
 * Tool parses the command line arguments for us.
 *//*from  w  w  w. jav  a 2s . com*/
public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    // Check the arguments. we need the integer to attempt to factor.
    if (args.length < 1) {
        System.out.println("Please indicate the integer to factor");
        LOGGER.severe("No integer to factor. Exit.");
        System.exit(1);
    }

    // Parse N and add it to the job configuration, so that the workers can
    // access it as well.
    BigInteger N = new BigInteger(args[0]);
    LOGGER.info("Attempting factorization of: " + N.toString());
    conf.set(INTEGER_TO_FACTOR_NAME, N.toString());

    // Obtain the factor base for the integer N.
    FactorBaseArray factorBase = SieveInput.factorBase(N);
    LOGGER.info("Factor base of size: " + factorBase.size());
    conf.set(FACTOR_BASE_NAME, factorBase.toString());

    // Prepare the input of the mapreduce.
    LOGGER.info("Sieve of size: " + SieveInput.fullSieveIntervalSize(N));
    try {
        // Write the full sieve interval to disk.
        SieveInput.writeFullSieveInterval(N, "input/" + INPUT_FILE_NAME);
    } catch (FileNotFoundException e) {
        System.out.println("Unable to open the file for writing.");
    } catch (IOException e) {
        System.out.println("Unable to write to the output file.");
    }

    // Configure the classes of the mapreducer
    Job job = new Job(conf, "QuadraticSieve");
    job.setJarByClass(MapReduceQuadraticSieve.class);
    job.setMapperClass(SieveMapper.class);
    job.setReducerClass(FindSquaresReducer.class);

    // Output will be two pairs of strings:
    // <"Factor1", "59">
    // <"Factor2", "101">
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path("input/"));
    FileOutputFormat.setOutputPath(job, new Path("output/"));

    // Submit the job.
    job.waitForCompletion(true);

    return 0;
}

From source file:com.jbw.mutioutputformat.PatitionByStation.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    Path input = new Path(conf.get("input"));
    Path output = new Path(conf.get("output"));
    Job job = Job.getInstance();
    job.setJarByClass(PatitionByStation.class);
    job.setJobName("papapa");
    job.setMapperClass(StationMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setReducerClass(StationReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    return job.waitForCompletion(true) ? 0 : 1;
}