Example usage for org.apache.hadoop.mapreduce Job submit

List of usage examples for org.apache.hadoop.mapreduce Job submit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job submit.

Prototype

public void submit() throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and return immediately.

Usage

From source file:org.apache.kylin.engine.mr.common.AbstractHadoopJob.java

License:Apache License

protected int waitForCompletion(Job job) throws IOException, InterruptedException, ClassNotFoundException {
    int retVal = 0;
    long start = System.nanoTime();
    if (isAsync) {
        job.submit();
    } else {/*  w w  w .  j a v  a2 s  .  c o  m*/
        job.waitForCompletion(true);
        retVal = job.isSuccessful() ? 0 : 1;
        logger.debug("Job '" + job.getJobName() + "' finished "
                + (job.isSuccessful() ? "successfully in " : "with failures.  Time taken ")
                + formatTime((System.nanoTime() - start) / 1000000L));
    }
    return retVal;
}

From source file:org.apache.mahout.clustering.lda.cvb.CVB0Driver.java

License:Apache License

private Job writeTopicModel(Configuration conf, Path modelInput, Path output)
        throws IOException, InterruptedException, ClassNotFoundException {
    String jobName = String.format("Writing final topic/term distributions from %s to %s", modelInput, output);
    log.info("About to run: {}", jobName);

    Job job = prepareJob(modelInput, output, SequenceFileInputFormat.class,
            CVB0TopicTermVectorNormalizerMapper.class, IntWritable.class, VectorWritable.class,
            SequenceFileOutputFormat.class, jobName);
    job.submit();
    return job;// ww  w .  ja  va  2s.c o m
}

From source file:org.apache.mahout.clustering.lda.cvb.CVB0Driver.java

License:Apache License

private Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
        throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
    log.info("About to run: {}", jobName);

    Job job = prepareJob(corpus, output, SequenceFileInputFormat.class, CVB0DocInferenceMapper.class,
            IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class, jobName);

    FileSystem fs = FileSystem.get(corpus.toUri(), conf);
    if (modelInput != null && fs.exists(modelInput)) {
        FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
        URI[] modelUris = new URI[statuses.length];
        for (int i = 0; i < statuses.length; i++) {
            modelUris[i] = statuses[i].getPath().toUri();
        }//from   www .  j a  va 2s  .  c  o  m
        DistributedCache.setCacheFiles(modelUris, conf);
        setModelPaths(job, modelInput);
    }
    job.submit();
    return job;
}

From source file:org.apache.mahout.freqtermsets.PFPGrowth.java

License:Apache License

/**
 * Count the frequencies of various features in parallel using Map/Reduce
 *///from  w ww. jav a  2  s  .c  o  m
public static void startParallelCounting(Parameters params, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(PFP_PARAMETERS, params.toString());

    conf.set("mapred.compress.map.output", "true");
    conf.set("mapred.output.compression.type", "BLOCK");

    // if(Boolean.parseBoolean(params.get(PFPGrowth.PSEUDO, "false"))){
    // conf.set("mapred.tasktracker.map.tasks.maximum", "3");
    // conf.set("mapred.tasktracker.reduce.tasks.maximum", "3");
    // conf.set("mapred.map.child.java.opts", "-Xmx777M");
    // conf.set("mapred.reduce.child.java.opts", "-Xmx777M");
    // conf.setInt("mapred.max.map.failures.percent", 0);
    // }
    conf.set("mapred.child.java.opts", "-XX:-UseGCOverheadLimit -XX:+HeapDumpOnOutOfMemoryError");

    // String input = params.get(INPUT);
    // Job job = new Job(conf, "Parallel Counting Driver running over input: " + input);
    long startTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_START));
    // Long.toString(PFPGrowth.TREC2011_MIN_TIMESTAMP)); //GMT23JAN2011));
    long endTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_END));
    // Long.toString(Long.MAX_VALUE));

    long windowSize = Long
            .parseLong(params.get(PFPGrowth.PARAM_WINDOW_SIZE, Long.toString(endTime - startTime)));
    long stepSize = Long.parseLong(params.get(PFPGrowth.PARAM_STEP_SIZE, Long.toString(windowSize)));

    endTime = Math.min(endTime, startTime + windowSize);

    FileSystem fs = FileSystem.get(conf); // TODONE: do I need?getLocal(conf);

    Job[] jobArr = new Job[(int) Math.ceil(windowSize / stepSize)];
    for (int j = 0; startTime < endTime; startTime += stepSize, ++j) {
        long jobEnd = startTime + stepSize;
        Job job = new Job(conf, "Parallel counting running over inerval " + startTime + "-" + jobEnd); // endTime);

        // Path outPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING);
        Path outRoot = new Path(params.get(OUTROOT));
        Path stepOutput = new Path(outRoot, startTime + "");
        stepOutput = new Path(stepOutput, jobEnd + "");
        if (fs.exists(stepOutput)) {
            continue;
        }
        jobArr[j] = job;
        Path outPath = new Path(stepOutput, PARALLEL_COUNTING);
        FileOutputFormat.setOutputPath(job, outPath);
        // HadoopUtil.delete(conf, outPath);

        job.setJarByClass(PFPGrowth.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        PartitionByTimestamp.setInputPaths(job, params, conf);
        // FileInputFormat.addInputPath(job, new Path(input));

        // job.setInputFormatClass(HtmlTweetInputFormat.class);
        job.setInputFormatClass(CSVTweetInputFormat.class);
        job.setMapperClass(ParallelCountingMapper.class);
        job.setCombinerClass(ParallelCountingReducer.class);
        job.setReducerClass(ParallelCountingReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.submit();

        // boolean succeeded = job.waitForCompletion(true);
        // if (!succeeded) {
        // throw new IllegalStateException("Job failed!");
        // }
    }

    boolean allCompleted;
    do {
        Thread.sleep(1000);
        allCompleted = true;
        for (int j = 0; j < jobArr.length; ++j) {
            if (jobArr[j] == null) {
                continue;
            }
            boolean complete = jobArr[j].isComplete();
            allCompleted &= complete;
            if (!complete) {
                String report = (j + " (" + jobArr[j].getJobName() + "): map "
                        + StringUtils.formatPercent(jobArr[j].mapProgress(), 0) + " reduce "
                        + StringUtils.formatPercent(jobArr[j].reduceProgress(), 0) + " - Tracking: "
                        + jobArr[j].getTrackingURL());
                LOG.info(report);
            }
        }
    } while (!allCompleted);

    boolean allSuccess = true;
    for (int j = 0; j < jobArr.length; ++j) {
        if (jobArr[j] == null) {
            continue;
        }
        boolean success = jobArr[j].isSuccessful();
        allSuccess &= success;
        if (!success) {
            String report = (j + " (" + jobArr[j].getJobName() + "): FAILED - Tracking: "
                    + jobArr[j].getTrackingURL());
            LOG.info(report);
        }
    }
    if (!allSuccess) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:org.apache.mahout.math.hadoop.MatrixColumnMeansJob.java

License:Apache License

/**
 * Job for calculating column-wise mean of a DistributedRowMatrix
 *
 * @param initialConf/*from  ww w.ja  va 2 s .c o m*/
 * @param inputPath
 *          path to DistributedRowMatrix input
 * @param outputVectorTmpPath
 *          path for temporary files created during job
 * @param vectorClass
 *          String of desired class for returned vector e.g. DenseVector,
 *          RandomAccessSparseVector (may be null for {@link DenseVector} )
 * @return Vector containing column-wise mean of DistributedRowMatrix
 */
public static Vector run(Configuration initialConf, Path inputPath, Path outputVectorTmpPath,
        String vectorClass) throws IOException {

    try {
        initialConf.set(VECTOR_CLASS, vectorClass == null ? DenseVector.class.getName() : vectorClass);

        Job job = new Job(initialConf, "MatrixColumnMeansJob");
        job.setJarByClass(MatrixColumnMeansJob.class);

        FileOutputFormat.setOutputPath(job, outputVectorTmpPath);

        outputVectorTmpPath.getFileSystem(job.getConfiguration()).delete(outputVectorTmpPath, true);
        job.setNumReduceTasks(1);
        FileOutputFormat.setOutputPath(job, outputVectorTmpPath);
        FileInputFormat.addInputPath(job, inputPath);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        FileOutputFormat.setOutputPath(job, outputVectorTmpPath);

        job.setMapperClass(MatrixColumnMeansMapper.class);
        job.setReducerClass(MatrixColumnMeansReducer.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(VectorWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(VectorWritable.class);
        job.submit();
        job.waitForCompletion(true);

        Path tmpFile = new Path(outputVectorTmpPath, "part-r-00000");
        SequenceFileValueIterator<VectorWritable> iterator = new SequenceFileValueIterator<VectorWritable>(
                tmpFile, true, initialConf);
        try {
            if (iterator.hasNext()) {
                return iterator.next().get();
            } else {
                return (Vector) Class.forName(vectorClass).getConstructor(int.class).newInstance(0);
            }
        } finally {
            Closeables.close(iterator, true);
        }
    } catch (IOException ioe) {
        throw ioe;
    } catch (Throwable thr) {
        throw new IOException(thr);
    }
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.ABtDenseOutJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputAPaths, Path inputBtGlob, Path xiPath, Path sqPath,
        Path sbPath, Path outputPath, int aBlockRows, int minSplitSize, int k, int p, int outerProdBlockHeight,
        int numReduceTasks, boolean broadcastBInput)
        throws ClassNotFoundException, InterruptedException, IOException {

    JobConf oldApiJob = new JobConf(conf);

    Job job = new Job(oldApiJob);
    job.setJobName("ABt-job");
    job.setJarByClass(ABtDenseOutJob.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, inputAPaths);
    if (minSplitSize > 0) {
        FileInputFormat.setMinInputSplitSize(job, minSplitSize);
    }/*from www.j a  va  2 s  .  c  o  m*/

    FileOutputFormat.setOutputPath(job, outputPath);

    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setMapOutputKeyClass(SplitPartitionedWritable.class);
    job.setMapOutputValueClass(DenseBlockWritable.class);

    job.setOutputKeyClass(SplitPartitionedWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(ABtMapper.class);
    job.setReducerClass(QRReducer.class);

    job.getConfiguration().setInt(QJob.PROP_AROWBLOCK_SIZE, aBlockRows);
    job.getConfiguration().setInt(BtJob.PROP_OUTER_PROD_BLOCK_HEIGHT, outerProdBlockHeight);
    job.getConfiguration().setInt(QRFirstStep.PROP_K, k);
    job.getConfiguration().setInt(QRFirstStep.PROP_P, p);
    job.getConfiguration().set(PROP_BT_PATH, inputBtGlob.toString());

    /*
     * PCA-related options, MAHOUT-817
     */
    if (xiPath != null) {
        job.getConfiguration().set(PROP_XI_PATH, xiPath.toString());
        job.getConfiguration().set(PROP_SB_PATH, sbPath.toString());
        job.getConfiguration().set(PROP_SQ_PATH, sqPath.toString());
    }

    job.setNumReduceTasks(numReduceTasks);

    // broadcast Bt files if required.
    if (broadcastBInput) {
        job.getConfiguration().set(PROP_BT_BROADCAST, "y");

        FileSystem fs = FileSystem.get(inputBtGlob.toUri(), conf);
        FileStatus[] fstats = fs.globStatus(inputBtGlob);
        if (fstats != null) {
            for (FileStatus fstat : fstats) {
                /*
                 * new api is not enabled yet in our dependencies at this time, still
                 * using deprecated one
                 */
                DistributedCache.addCacheFile(fstat.getPath().toUri(), job.getConfiguration());
            }
        }
    }

    job.submit();
    job.waitForCompletion(false);

    if (!job.isSuccessful()) {
        throw new IOException("ABt job unsuccessful.");
    }

}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.ABtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputAPaths, Path inputBtGlob, Path outputPath,
        int aBlockRows, int minSplitSize, int k, int p, int outerProdBlockHeight, int numReduceTasks,
        boolean broadcastBInput) throws ClassNotFoundException, InterruptedException, IOException {

    JobConf oldApiJob = new JobConf(conf);

    // MultipleOutputs
    // .addNamedOutput(oldApiJob,
    // QJob.OUTPUT_QHAT,
    // org.apache.hadoop.mapred.SequenceFileOutputFormat.class,
    // SplitPartitionedWritable.class,
    // DenseBlockWritable.class);
    ////from   w w  w .j a v a2  s.c o m
    // MultipleOutputs
    // .addNamedOutput(oldApiJob,
    // QJob.OUTPUT_RHAT,
    // org.apache.hadoop.mapred.SequenceFileOutputFormat.class,
    // SplitPartitionedWritable.class,
    // VectorWritable.class);

    Job job = new Job(oldApiJob);
    job.setJobName("ABt-job");
    job.setJarByClass(ABtJob.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, inputAPaths);
    if (minSplitSize > 0) {
        FileInputFormat.setMinInputSplitSize(job, minSplitSize);
    }

    FileOutputFormat.setOutputPath(job, outputPath);

    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setMapOutputKeyClass(SplitPartitionedWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);

    job.setOutputKeyClass(SplitPartitionedWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(ABtMapper.class);
    job.setCombinerClass(BtJob.OuterProductCombiner.class);
    job.setReducerClass(QRReducer.class);

    job.getConfiguration().setInt(QJob.PROP_AROWBLOCK_SIZE, aBlockRows);
    job.getConfiguration().setInt(BtJob.PROP_OUTER_PROD_BLOCK_HEIGHT, outerProdBlockHeight);
    job.getConfiguration().setInt(QRFirstStep.PROP_K, k);
    job.getConfiguration().setInt(QRFirstStep.PROP_P, p);
    job.getConfiguration().set(PROP_BT_PATH, inputBtGlob.toString());

    // number of reduce tasks doesn't matter. we don't actually
    // send anything to reducers.

    job.setNumReduceTasks(numReduceTasks);

    // broadcast Bt files if required.
    if (broadcastBInput) {
        job.getConfiguration().set(PROP_BT_BROADCAST, "y");

        FileSystem fs = FileSystem.get(inputBtGlob.toUri(), conf);
        FileStatus[] fstats = fs.globStatus(inputBtGlob);
        if (fstats != null) {
            for (FileStatus fstat : fstats) {
                /*
                 * new api is not enabled yet in our dependencies at this time, still
                 * using deprecated one
                 */
                DistributedCache.addCacheFile(fstat.getPath().toUri(), conf);
            }
        }
    }

    job.submit();
    job.waitForCompletion(false);

    if (!job.isSuccessful()) {
        throw new IOException("ABt job unsuccessful.");
    }

}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.BBtJob.java

License:Apache License

public static void run(Configuration conf, Path btPath, Path outputPath, int numReduceTasks)
        throws IOException, ClassNotFoundException, InterruptedException {

    Job job = new Job(conf);
    job.setJobName("BBt-job");
    job.setJarByClass(BBtJob.class);

    // input/* ww w.j av  a2s.  c om*/
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, btPath);

    // map
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setMapperClass(BBtMapper.class);
    job.setReducerClass(BBtReducer.class);

    // combiner and reducer
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    // output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    job.getConfiguration().set("mapreduce.output.basename", OUTPUT_BBT);

    // run
    job.submit();
    job.waitForCompletion(false);
    if (!job.isSuccessful()) {
        throw new IOException("BBt job failed.");
    }
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.BtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPathA, Path inputPathQJob, Path xiPath, Path outputPath,
        int minSplitSize, int k, int p, int btBlockHeight, int numReduceTasks, boolean broadcast,
        Class<? extends Writable> labelClass, boolean outputBBtProducts)
        throws ClassNotFoundException, InterruptedException, IOException {

    JobConf oldApiJob = new JobConf(conf);

    MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_Q, org.apache.hadoop.mapred.SequenceFileOutputFormat.class,
            labelClass, VectorWritable.class);

    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_BBT,
                org.apache.hadoop.mapred.SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
        /*/*  w  ww .  j  a  v a  2 s.  com*/
         * MAHOUT-1067: if we are asked to output BBT products then named vector
         * names should be propagated to Q too so that UJob could pick them up
         * from there.
         */
        oldApiJob.setBoolean(PROP_NV, true);
    }
    if (xiPath != null) {
        // compute pca -related stuff as well
        MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_SQ,
                org.apache.hadoop.mapred.SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
        MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_SB,
                org.apache.hadoop.mapred.SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }

    /*
     * HACK: we use old api multiple outputs since they are not available in the
     * new api of either 0.20.2 or 0.20.203 but wrap it into a new api job so we
     * can use new api interfaces.
     */

    Job job = new Job(oldApiJob);
    job.setJobName("Bt-job");
    job.setJarByClass(BtJob.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, inputPathA);
    if (minSplitSize > 0) {
        FileInputFormat.setMinInputSplitSize(job, minSplitSize);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // WARN: tight hadoop integration here:
    job.getConfiguration().set("mapreduce.output.basename", OUTPUT_BT);

    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    job.getConfiguration().setInt(QJob.PROP_K, k);
    job.getConfiguration().setInt(QJob.PROP_P, p);
    job.getConfiguration().set(PROP_QJOB_PATH, inputPathQJob.toString());
    job.getConfiguration().setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.getConfiguration().setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, btBlockHeight);

    job.setNumReduceTasks(numReduceTasks);

    /*
     * PCA-related options, MAHOUT-817
     */
    if (xiPath != null) {
        job.getConfiguration().set(PROP_XI_PATH, xiPath.toString());
    }

    /*
     * we can broadhast Rhat files since all of them are reuqired by each job,
     * but not Q files which correspond to splits of A (so each split of A will
     * require only particular Q file, each time different one).
     */

    if (broadcast) {
        job.getConfiguration().set(PROP_RHAT_BROADCAST, "y");

        FileSystem fs = FileSystem.get(inputPathQJob.toUri(), conf);
        FileStatus[] fstats = fs.globStatus(new Path(inputPathQJob, QJob.OUTPUT_RHAT + "-*"));
        if (fstats != null) {
            for (FileStatus fstat : fstats) {
                /*
                 * new api is not enabled yet in our dependencies at this time, still
                 * using deprecated one
                 */
                DistributedCache.addCacheFile(fstat.getPath().toUri(), job.getConfiguration());
            }
        }
    }

    job.submit();
    job.waitForCompletion(false);

    if (!job.isSuccessful()) {
        throw new IOException("Bt job unsuccessful.");
    }
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.QJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, Path sbPath, Path outputPath, int aBlockRows,
        int minSplitSize, int k, int p, long seed, int numReduceTasks)
        throws ClassNotFoundException, InterruptedException, IOException {

    JobConf oldApiJob = new JobConf(conf);
    MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_QHAT,
            org.apache.hadoop.mapred.SequenceFileOutputFormat.class, SplitPartitionedWritable.class,
            DenseBlockWritable.class);
    MultipleOutputs.addNamedOutput(oldApiJob, OUTPUT_RHAT,
            org.apache.hadoop.mapred.SequenceFileOutputFormat.class, SplitPartitionedWritable.class,
            VectorWritable.class);

    Job job = new Job(oldApiJob);
    job.setJobName("Q-job");
    job.setJarByClass(QJob.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, inputPaths);
    if (minSplitSize > 0) {
        FileInputFormat.setMinInputSplitSize(job, minSplitSize);
    }// w  w w.  j  a  v a  2  s.  co  m

    FileOutputFormat.setOutputPath(job, outputPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setMapOutputKeyClass(SplitPartitionedWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    job.setOutputKeyClass(SplitPartitionedWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(QMapper.class);

    job.getConfiguration().setInt(PROP_AROWBLOCK_SIZE, aBlockRows);
    job.getConfiguration().setLong(PROP_OMEGA_SEED, seed);
    job.getConfiguration().setInt(PROP_K, k);
    job.getConfiguration().setInt(PROP_P, p);
    if (sbPath != null) {
        job.getConfiguration().set(PROP_SB_PATH, sbPath.toString());
    }

    /*
     * number of reduce tasks doesn't matter. we don't actually send anything to
     * reducers.
     */

    job.setNumReduceTasks(0 /* numReduceTasks */);

    job.submit();
    job.waitForCompletion(false);

    if (!job.isSuccessful()) {
        throw new IOException("Q job unsuccessful.");
    }

}