org.broadinstitute.sting.gatk.hadoop.GATKJobClient.java Source code

Java tutorial

Introduction

Here is the source code for org.broadinstitute.sting.gatk.hadoop.GATKJobClient.java

Source

/* Copyright (c) 2012-2013 by Virginia Polytechnic Institute and State
 * University
 * All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT REPRESENTATIONS 
 * OR WARRANTIES OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT 
 * NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT, OR THE 
 * ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT 
 * DISCOVERABLE. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 * HOLDERS BE LIABLE FOR ANY CLAIM, INCIDENTAL OR CONSEQUENTIAL 
 * DAMAGES OF ANY KIND, OR OTHER LIABILITY, WHETHER IN AN ACTION 
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 
 * IN THE SOFTWARE.
 */

package org.broadinstitute.sting.gatk.hadoop;

import java.io.*;
import java.net.*;
import java.lang.Integer;
import java.util.List;
import java.util.Map;

import org.broadinstitute.sting.gatk.hadoop.SplittingBAMIndexer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import org.broadinstitute.sting.gatk.hadoop.NLineXInputFormat;
import org.broadinstitute.sting.gatk.hadoop.hadoopsrc.InputSampler;
import org.broadinstitute.sting.gatk.hadoop.hadoopsrc.TotalOrderPartitioner;
import org.broadinstitute.sting.gatk.hadoop.hbamsrc.SAMRecordWritable;
import org.broadinstitute.sting.gatk.hadoop.IndelMapReduce.*;
import org.broadinstitute.sting.gatk.hadoop.RecalMapReduce.*;
import org.broadinstitute.sting.gatk.hadoop.RecalCovMapReduce.*;
import org.broadinstitute.sting.gatk.hadoop.VariantMapReduce.*;
import org.broadinstitute.sting.gatk.hadoop.BWAMapReduce.*;
import org.broadinstitute.sting.gatk.hadoop.ContigMapReduce.*;
import org.broadinstitute.sting.gatk.hadoop.IndexMapReduce.*;
import org.broadinstitute.sting.gatk.hadoop.SortOutputFormat;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.util.GenericOptionsParser;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import org.apache.commons.cli.*;

import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.BAMIndexer;
import net.sf.samtools.util.BlockCompressedStreamConstants;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;

public class GATKJobClient extends Configured implements Tool {

    static {
        Configuration.addDefaultResource("hdfs-default.xml");
        Configuration.addDefaultResource("hdfs-site.xml");
        Configuration.addDefaultResource("mapred-default.xml");
        Configuration.addDefaultResource("mapred-site.xml");
    }

    public static String BWAOutPath, SortBWAOutPath, BAMInputPath, IndelOutPath, RmdupOutPath, RecalOutPath,
            FinalBAMPath, readFile1, readFile2, outputDir, refFileName, gatk_binary_loc, bwa_binary_loc, refFileLoc,
            knownSitesLoc, fqInput, bamIndexFile, platform;

    public static boolean is_azure = true, printUsage = true, noalign = false, norealign = false, nomarkdup = false,
            noqrecab = false, novariant = false, nofvariant = false, nomresults = false, xVariantCall = false;

    public static int fq_read_size, nReducers = 0, nThreads = 0;

    public static long reads_per_split = 0; // If 0, calculated automatically

    static void validatePath(String path, Configuration conf) throws ParseException, IOException {
        Path checkPath = new Path(path);
        if (!checkPath.getFileSystem(conf).exists(checkPath)) {
            printUsage = false;
            throw new ParseException("File " + checkPath + " does not exist");
        }
    }

    static void parseCommandLineArgs(String[] argv, Configuration conf) {

        CommandLineParser parser = new PosixParser();

        Options options = new Options();

        Option gatkdLocOpt = OptionBuilder.withArgName("depjar_loc").hasArg()
                .withDescription("Complete HDFS path of gatk dependency jar").create("djarloc");
        options.addOption(gatkdLocOpt);
        Option bwaLocOpt = OptionBuilder.withArgName("bwa_loc").hasArg()
                .withDescription("Complete HDFS path of bwa binary or bwa.exe file").create("bwaloc");
        options.addOption(bwaLocOpt);
        Option fq1Opt = OptionBuilder.withArgName("fastq_file1").hasArg()
                .withDescription("Complete HDFS path or path relative to user directory for 1st fastq file")
                .create("r1");
        options.addOption(fq1Opt);
        Option fq2Opt = OptionBuilder.withArgName("fastq_file2").hasArg()
                .withDescription("Complete HDFS path or path relative to user directory for 2nd fastq file")
                .create("r2");
        options.addOption(fq2Opt);
        Option bamOpt = OptionBuilder.withArgName("bam_directory").hasArg()
                .withDescription(
                        "Complete HDFS directory path or path relative to user directory for input BAM file")
                .create("b");
        options.addOption(bamOpt);
        Option outOpt = OptionBuilder.withArgName("output_directory").hasArg()
                .withDescription("Complete HDFS path or path relative to user directory for output directory")
                .create("o");
        options.addOption(outOpt);
        Option rSizeOpt = OptionBuilder.withArgName("fastq_read_size").hasArg()
                .withDescription("Number of bytes of a read sequence in input FastQ file").create("rsize");
        options.addOption(rSizeOpt);
        Option rPSplitOpt = OptionBuilder.withArgName("reads_per_map_split").hasArg()
                .withDescription("Optional number of reads to be processed by a mapper").create("reads_per_split");
        options.addOption(rPSplitOpt);
        Option nRedOpt = OptionBuilder.withArgName("number_of_reducers").hasArg()
                .withDescription("Optional number of reducers").create("nred");
        options.addOption(nRedOpt);
        Option nThreadOpt = OptionBuilder.withArgName("number_of_threads").hasArg()
                .withDescription("Optional number of threads").create("nthreads");
        options.addOption(nThreadOpt);
        Option refFileOpt = OptionBuilder.withArgName("path_to_reference_dir").hasArg()
                .withDescription("Complete HDFS path of reference directory").create("ref");
        options.addOption(refFileOpt);
        Option kSiteFileOpt = OptionBuilder.withArgName("path_to_knownsites_dir").hasArg()
                .withDescription("Complete HDFS path of known-sites db directory").create("dbfile");
        options.addOption(kSiteFileOpt);

        Option platformOpt = OptionBuilder.withArgName("Linux/Windows").hasArg()
                .withDescription("Platform to run on").create("p");
        options.addOption(platformOpt);

        Option noAlignOpt = new Option("na", "noalign", false, "Don't run Alignment stage");
        options.addOption(noAlignOpt);

        Option noReAlignOpt = new Option("nra", "norealign", false, "Do not run Local Realignment stage");
        options.addOption(noReAlignOpt);

        Option noMarkDupOpt = new Option("nmd", "nomarkdup", false, "Do not run Mark Duplicates stage");
        options.addOption(noMarkDupOpt);

        Option noQRecabOpt = new Option("nqr", "noqrecab", false, "Do not run Quality Recalibration stage");
        options.addOption(noQRecabOpt);

        Option noVarOpt = new Option("nv", "novariant", false, "Do not run Structural Variant stage");
        options.addOption(noVarOpt);

        Option noFVarOpt = new Option("nfv", "nofvariant", false, "Do not run Filter Variant stage");
        options.addOption(noFVarOpt);

        Option noMerOpt = new Option("nm", "nomresults", false, "Do not Merge Results");
        options.addOption(noMerOpt);

        Option isXVariantOpt = new Option("xv", "xvariant", false,
                "enable flag, if variant calling should be done independently for INDELs and SNPs");
        options.addOption(isXVariantOpt);

        try {
            // parse the command line arguments
            String[] args = new GenericOptionsParser(conf, options, argv).getRemainingArgs();
            CommandLine line = parser.parse(options, args);

            if (line.hasOption(noAlignOpt.getOpt()))
                noalign = true;
            if (line.hasOption(noReAlignOpt.getOpt()))
                norealign = true;
            if (line.hasOption(noMarkDupOpt.getOpt()))
                nomarkdup = true;
            if (line.hasOption(noQRecabOpt.getOpt()))
                noqrecab = true;
            if (line.hasOption(noVarOpt.getOpt()))
                novariant = true;
            if (line.hasOption(noFVarOpt.getOpt()))
                nofvariant = true;
            if (line.hasOption(noMerOpt.getOpt()))
                nomresults = true;

            if (line.hasOption(fq1Opt.getOpt()) && line.hasOption(bamOpt.getOpt())) {
                throw new ParseException(
                        "Invalid Usage: fastq file and BAM file cannot be given together as input");
            }
            if (line.hasOption(fq2Opt.getOpt()) && !line.hasOption(fq1Opt.getOpt())) {
                throw new ParseException("Invalid Usage: fastq file2 is invalid without fastq file1");
            }
            if (!line.hasOption(fq2Opt.getOpt()) && !line.hasOption(fq1Opt.getOpt())
                    && !line.hasOption(bamOpt.getOpt())) {
                throw new ParseException(
                        "Invalid Usage: Either the fastq file or BAM file has to be provided as input");
            }
            if (line.hasOption(gatkdLocOpt.getOpt())) {
                gatk_binary_loc = line.getOptionValue(gatkdLocOpt.getOpt());
                validatePath(gatk_binary_loc, conf);
            } else {
                throw new ParseException(
                        "Invalid Usage: GATK dependency jar location (-djarloc) is mandatory for running the pipeline");
            }

            if (!noalign) {
                if (line.hasOption(fq1Opt.getOpt())) {
                    readFile1 = line.getOptionValue(fq1Opt.getOpt());
                    validatePath(readFile1, conf);
                    fqInput = (new Path(readFile1).getParent()).toString();
                }
                if (line.hasOption(fq2Opt.getOpt())) {
                    readFile2 = line.getOptionValue(fq2Opt.getOpt());
                    conf.setBoolean("gatk.hadoop.pairedend", true);
                    validatePath(readFile2, conf);
                    conf.set("gatk.hadoop.readfile2", readFile2);
                    ;
                }
                if (line.hasOption(rSizeOpt.getOpt())) {
                    fq_read_size = Integer.parseInt(line.getOptionValue(rSizeOpt.getOpt()));
                } else {
                    throw new ParseException("Invalid Usage: read size (-rsize) is mandatory for Alignment");
                }
                if (line.hasOption(bwaLocOpt.getOpt())) {
                    bwa_binary_loc = line.getOptionValue(bwaLocOpt.getOpt());
                    validatePath(bwa_binary_loc, conf);
                } else {
                    throw new ParseException(
                            "Invalid Usage: bwa binary/exe location (-bwaloc) is mandatory for Alignment");
                }
                if (line.hasOption(rPSplitOpt.getOpt())) {
                    reads_per_split = Integer.parseInt(line.getOptionValue(rPSplitOpt.getOpt()));
                }
            }
            if (line.hasOption(nRedOpt.getOpt())) {
                nReducers = Integer.parseInt(line.getOptionValue(nRedOpt.getOpt()));
            }
            if (line.hasOption(nThreadOpt.getOpt())) {
                nThreads = Integer.parseInt(line.getOptionValue(nThreadOpt.getOpt()));
                conf.setInt("gatk.hadoop.nthreads", nThreads);
            }
            if (line.hasOption(bamOpt.getOpt())) {
                int rcount = 0;
                BAMInputPath = line.getOptionValue(bamOpt.getOpt());
                validatePath(BAMInputPath, conf);
                Path BAMPath = new Path(BAMInputPath);
                FileSystem fs = BAMPath.getFileSystem(conf);
                FileStatus[] content = fs.listStatus(BAMPath);
                for (int i = 0; i < content.length; i++) {
                    String filename = content[i].getPath().getName();
                    if (filename.endsWith(".bam")) {
                        String prefix = filename.substring(0, 6);
                        try {
                            Long value = Long.valueOf(prefix);
                        } catch (NumberFormatException e) {
                            String tmpFile = BAMInputPath + Path.SEPARATOR + String.format("%06d", rcount) + "-"
                                    + filename;
                            boolean rename = fs.rename(content[i].getPath(), new Path(tmpFile));
                        }
                        rcount++;
                    }
                }
            }
            if (line.hasOption(outOpt.getOpt())) {
                outputDir = line.getOptionValue(outOpt.getOpt());
                if (!(new Path(outputDir).getFileSystem(conf).mkdirs(new Path(outputDir)))) {
                    throw new Exception("MKDIR failure");
                }
                if (!noalign) {
                    BWAOutPath = outputDir + Path.SEPARATOR + "AlignerOut";
                    SortBWAOutPath = outputDir + Path.SEPARATOR + "SortedAlignerOut";
                    BAMInputPath = outputDir + Path.SEPARATOR + "BAMInput";
                }
                IndelOutPath = outputDir + Path.SEPARATOR + "IndelRealignOut";
                RmdupOutPath = outputDir + Path.SEPARATOR + "DedupOut";
                RecalOutPath = outputDir + Path.SEPARATOR + "RecalibrationOut";
                FinalBAMPath = outputDir + Path.SEPARATOR + "FinalBAMOut";
            } else {
                throw new ParseException("Invalid Usage: output directory is mandatory");
            }
            if (line.hasOption(refFileOpt.getOpt())) {
                Path refFileDir = new Path(line.getOptionValue(refFileOpt.getOpt()));
                FileSystem fs = refFileDir.getFileSystem(conf);
                FileStatus[] content = fs.listStatus(refFileDir);
                for (int i = 0; i < content.length; i++) {
                    if ((content[i].getPath().getName()).endsWith(".fa")
                            || (content[i].getPath().getName()).endsWith(".fasta")) {
                        refFileLoc = content[i].getPath().toString();
                    }
                }
                validatePath(refFileLoc, conf);
                refFileName = refFileLoc.substring(0, refFileLoc.lastIndexOf("."));
            } else {
                throw new ParseException("Invalid Usage: reference fasta file is mandatory");
            }
            if (line.hasOption(kSiteFileOpt.getOpt())) {
                Path knownSitesDir = new Path(line.getOptionValue(kSiteFileOpt.getOpt()));
                FileSystem fs = knownSitesDir.getFileSystem(conf);
                FileStatus[] content = fs.listStatus(knownSitesDir);
                for (int i = 0; i < content.length; i++) {
                    if ((content[i].getPath().getName()).endsWith(".vcf")) {
                        knownSitesLoc = content[i].getPath().toString();
                    }
                }
                validatePath(knownSitesLoc, conf);
            }
            if (line.hasOption(platformOpt.getOpt())) {
                platform = line.getOptionValue(platformOpt.getOpt());
                if (platform.equalsIgnoreCase("Linux")) {
                    is_azure = false;
                    conf.setBoolean("gatk.hadoop.isazure", false);
                }
            }
            if (line.hasOption(isXVariantOpt.getOpt())) {
                xVariantCall = true;
            }
        } catch (ParseException exp) {
            System.out.println(exp.getMessage());
            if (printUsage) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("hadoop jar {/local/path/to/SeqInCloud.jar} {options}", options);
            }
            System.exit(-1);
        } catch (Exception exp) {
            System.out.println("Command line parsing error: " + exp.getMessage());
            System.exit(-1);
        }
    }

    @Override
    public int run(String[] argv) throws Exception {
        try {
            Configuration conf;
            FileSystem srcFs, outFs, fs;
            Path inputPath = null, mergeOutFile, inputDir, partition = null, outputPath;
            int maxMapTasks, maxReduceTasks, max_splits = Integer.MAX_VALUE, granularity = 100;
            FileStatus[] content;
            ClusterStatus status;
            int numNodes, mapSlotsPerNode;
            long mapOutputBytes, iMBytesPerRed, mapOutBufSize, inputSize, cacheSize, startTime, blockSize, endTime,
                    splitSize;
            float inputBufpcnt;
            FSDataOutputStream out;
            FSDataInputStream in;
            SAMFileReader fileReader;
            InputSampler.Sampler<LongWritable, SAMRecordWritable> sampler;
            double sampling_frequency = 0.01;

            // Job object can be used for Aligner job if enabled
            conf = getConf();
            Job job = new Job(conf);

            parseCommandLineArgs(argv, conf);

            maxMapTasks = new JobClient(new JobConf(conf)).getClusterStatus().getMaxMapTasks();

            maxReduceTasks = new JobClient(new JobConf(conf)).getClusterStatus().getMaxReduceTasks();
            if (!noalign) {
                System.out.println("Starting Alignment Job");
                startTime = System.currentTimeMillis();

                status = new JobClient(new JobConf(conf)).getClusterStatus();
                numNodes = status.getTaskTrackers();
                // Job specific setting of number of Reducers..
                if (nReducers == 0)
                    nReducers = numNodes;
                conf.setInt("mapred.reduce.tasks", nReducers);

                Path refPath = new Path(refFileLoc);
                fs = refPath.getFileSystem(conf);
                blockSize = fs.getFileStatus(refPath).getBlockSize();
                splitSize = Math.round(fs.getFileStatus(refPath).getBlockSize());

                if (reads_per_split == 0) {
                    inputPath = new Path(readFile1);
                    long readSize = (inputPath.getFileSystem(conf)).getFileStatus(inputPath).getLen();
                    long numSplits = Math.round(readSize / splitSize);

                    if (numSplits < maxMapTasks)
                        numSplits = maxMapTasks;

                    if (numSplits < nReducers)
                        numSplits = nReducers;

                    long numReads = Math.round(readSize / (long) fq_read_size);
                    reads_per_split = numReads / numSplits;

                    // Total Order Partitioner
                    if ((double) reads_per_split <= (1 / sampling_frequency)) {
                        sampling_frequency = 1;
                        granularity = 1;
                    } else if (((double) reads_per_split > (1 / sampling_frequency))
                            && ((double) reads_per_split <= (1 / sampling_frequency * 100))) {
                        sampling_frequency = 0.1;
                        granularity = 10;
                    }
                }

                job.setJarByClass(GATKJobClient.class);
                job.setInputFormatClass(NLineXInputFormat.class);
                FileInputFormat.addInputPath(job, new Path(fqInput));
                FileOutputFormat.setOutputPath(job, new Path(BWAOutPath));

                DistributedCache.addCacheFile(new URI(refFileLoc + "#" + "ref.fa"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".amb#" + "ref.fa.amb"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".ann#" + "ref.fa.ann"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".bwt#" + "ref.fa.bwt"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".pac#" + "ref.fa.pac"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".sa#" + "ref.fa.sa"), job.getConfiguration());
                if (!is_azure) {
                    DistributedCache.addCacheFile(new URI(bwa_binary_loc + "#" + "bwa"), job.getConfiguration());
                    DistributedCache.addCacheFile(new URI(refFileLoc + ".rbwt#" + "ref.fa.rbwt"),
                            job.getConfiguration());
                    DistributedCache.addCacheFile(new URI(refFileLoc + ".rpac#" + "ref.fa.rpac"),
                            job.getConfiguration());
                    DistributedCache.addCacheFile(new URI(refFileLoc + ".rsa#" + "ref.fa.rsa"),
                            job.getConfiguration());
                } else {
                    DistributedCache.addCacheFile(new URI(bwa_binary_loc + "#" + "bwa.exe"),
                            job.getConfiguration());
                }
                DistributedCache.createSymlink(job.getConfiguration());

                // Setting local.cache.size - Add up the size of the files
                // distributed through the cache

                cacheSize = fs.getFileStatus(new Path(refFileLoc)).getLen()
                        + fs.getFileStatus(new Path(refFileLoc + ".amb")).getLen()
                        + fs.getFileStatus(new Path(refFileLoc + ".ann")).getLen()
                        + fs.getFileStatus(new Path(refFileLoc + ".bwt")).getLen()
                        + fs.getFileStatus(new Path(refFileLoc + ".pac")).getLen()
                        + fs.getFileStatus(new Path(refFileLoc + ".sa")).getLen();
                if (!is_azure) {
                    cacheSize = cacheSize + fs.getFileStatus(new Path(refFileLoc + ".rbwt")).getLen()
                            + fs.getFileStatus(new Path(refFileLoc + ".rpac")).getLen()
                            + fs.getFileStatus(new Path(refFileLoc + ".rsa")).getLen();
                }

                if (cacheSize > 8 * 1024 * 1024 * 1024) {
                    conf.setLong("local.cache.size", cacheSize + (1 * 1024 * 1024 * 1024));
                }

                conf.setLong("mapred.task.timeout", 86400000L); // 24 hrs..
                conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
                conf.setLong("mapred.line.input.format.linespermap", reads_per_split * 4);
                conf.setInt("dfs.datanode.socket.write.timeout", 600000);
                conf.setInt("dfs.socket.timeout", 600000);
                // conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                // conf.setBoolean("mapred.compress.map.output", true); // Default compression ratio 3.5:1

                job.setMapOutputKeyClass(LongWritable.class);
                job.setMapOutputValueClass(Text.class);
                job.setPartitionerClass(BWAPartitioner.class);
                job.setReducerClass(BWAReducer.class);
                job.setOutputKeyClass(NullWritable.class);
                job.setOutputValueClass(NullWritable.class);

                if (job.waitForCompletion(true)) {
                    System.out.println("BWA Alignment done");
                }

                content = fs.listStatus(new Path(BWAOutPath));

                for (int i = 0; i < content.length; i++) {
                    if (!((content[i].getPath().getName()).endsWith(".bam"))
                            && !((content[i].getPath().getName()).startsWith("_"))) {
                        fs.delete(content[i].getPath(), false);
                    }
                }
                endTime = System.currentTimeMillis();
                System.out.println("BWA Alignment took: " + (endTime - startTime));
                startTime = System.currentTimeMillis();
                System.out.println("Starting Splitting BAM Indexing Job");
                job = new Job();
                job.setJarByClass(GATKJobClient.class);
                conf = job.getConfiguration();
                inputPath = new Path(BWAOutPath);
                FileInputFormat.addInputPath(job, inputPath);
                job.setInputFormatClass(WholeFileInputFormat.class);

                Path output = new Path(outputDir + Path.SEPARATOR + "DeleteThisDir1");
                FileOutputFormat.setOutputPath(job, output);
                conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                conf.setInt("mapred.reduce.tasks", 0);
                conf.setInt("gatk.hadoop.granularity", granularity);
                conf.setBoolean("gatk.hadoop.issindex", true);
                conf.setBoolean("gatk.hadoop.isindex", false);
                conf.setBoolean("gatk.hadoop.ismarkdup", false);

                job.setMapperClass(IndexMapper.class);
                job.setMapOutputKeyClass(NullWritable.class);
                job.setMapOutputValueClass(NullWritable.class);

                DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());

                if (job.waitForCompletion(true)) {
                    System.out.println("SplittingBAM Indexing job done");
                }
                output.getFileSystem(conf).delete(output, true);

                endTime = System.currentTimeMillis();
                System.out.println("Splitting BAM Indexing took: " + (endTime - startTime));

                startTime = System.currentTimeMillis();
                System.out.println("Starting Sort Job");
                job = new Job();
                job.setJarByClass(GATKJobClient.class);
                conf = job.getConfiguration();

                if (norealign && nomarkdup && noqrecab && novariant && !nomresults)
                    conf.setBoolean("gatk.hadoop.ismerge", true);
                inputPath = new Path(BWAOutPath);
                FileInputFormat.addInputPath(job, inputPath);
                FileOutputFormat.setOutputPath(job, new Path(SortBWAOutPath));
                job.setInputFormatClass(ContigInputFormat.class);
                job.setPartitionerClass(ContigPartitioner.class);

                DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());

                fs = inputPath.getFileSystem(conf);
                content = fs.listStatus(inputPath);
                for (int i = 0; i < content.length; i++) {
                    if (content[i].getPath().getName().endsWith(".bam")) {
                        in = fs.open(content[i].getPath());
                        List<SAMSequenceRecord> sequences = (new SAMFileReader(in).getFileHeader())
                                .getSequenceDictionary().getSequences();
                        conf.setInt("mapred.reduce.tasks", sequences.size());

                        break;
                    }
                }

                conf.setLong("mapred.task.timeout", 86400000L);
                conf.setInt("dfs.datanode.socket.write.timeout", 600000);
                conf.setInt("dfs.socket.timeout", 600000);
                //conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                //conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
                //conf.setBoolean("mapred.compress.map.output", true); // Default compression ratio 3.5:1

                job.setReducerClass(SortReducer.class);
                job.setMapOutputKeyClass(LongWritable.class);
                job.setOutputKeyClass(NullWritable.class);
                job.setOutputValueClass(SAMRecordWritable.class);
                job.setOutputFormatClass(SortOutputFormat.class);

                if (job.waitForCompletion(true)) {
                    System.out.println("Sort completed successfully");
                }
                endTime = System.currentTimeMillis();
                System.out.println("Sort job took: " + (endTime - startTime));
            }

            if (!norealign) {
                if (!noalign)
                    BAMInputPath = SortBWAOutPath;

                startTime = System.currentTimeMillis();
                System.out.println("Starting Indexing Job");
                job = new Job();
                job.setJarByClass(GATKJobClient.class);
                conf = job.getConfiguration();
                inputPath = new Path(BAMInputPath);
                FileInputFormat.addInputPath(job, inputPath);
                job.setInputFormatClass(WholeFileInputFormat.class);
                Path output = new Path(outputDir + Path.SEPARATOR + "DeleteThisDir2");
                FileOutputFormat.setOutputPath(job, output);

                conf.setLong("mapred.task.timeout", 86400000L);
                conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
                conf.setInt("mapred.reduce.tasks", 0);
                conf.setBoolean("gatk.hadoop.isindex", true);
                conf.setBoolean("gatk.hadoop.issindex", true);
                conf.setBoolean("gatk.hadoop.ismarkdup", false);

                job.setMapperClass(IndexMapper.class);
                job.setOutputKeyClass(NullWritable.class);
                job.setOutputValueClass(NullWritable.class);

                DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());
                if (job.waitForCompletion(true)) {
                    System.out.println("Indexing job done");
                }
                output.getFileSystem(conf).delete(output, true);

                endTime = System.currentTimeMillis();
                System.out.println("Indexing job took: " + (endTime - startTime));

                startTime = System.currentTimeMillis();
                System.out.println("Starting Realigner Job");
                job = new Job();
                job.setJarByClass(GATKJobClient.class);
                conf = job.getConfiguration();

                inputPath = new Path(BAMInputPath);
                FileInputFormat.addInputPath(job, inputPath);

                job.setInputFormatClass(BAMInputFormat.class);

                srcFs = new Path(outputDir).getFileSystem(conf);
                if (!srcFs.mkdirs(new Path(outputDir + Path.SEPARATOR + "Partition")))
                    System.out.println("mkdir failed");
                inputDir = new Path(outputDir + Path.SEPARATOR + "Partition");
                inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
                partition = new Path(inputDir, "_partition");
                job.setPartitionerClass(TotalOrderPartitioner.class);
                TotalOrderPartitioner.setPartitionFile(conf, partition);

                try {
                    URI partitionURI = new URI(partition.toString() + "#_partition");
                    DistributedCache.addCacheFile(partitionURI, conf);
                } catch (URISyntaxException e) {
                    assert false;
                }

                if (nReducers == 0) {
                    if (!nomarkdup || !noqrecab || !novariant) {
                        conf.setInt("mapred.reduce.tasks", maxMapTasks);
                    } else {
                        conf.setInt("mapred.reduce.tasks", Math.max(1, maxReduceTasks * 9 / 10));
                    }
                } else {
                    conf.setInt("mapred.reduce.tasks", nReducers);
                }

                conf.setLong("mapred.task.timeout", 86400000L);
                conf.setInt("dfs.datanode.socket.write.timeout", 600000);
                conf.setInt("dfs.socket.timeout", 600000);
                conf.setBoolean("mapred.compress.map.output", true); // Default compression ratio 3.5:1

                if (nomarkdup && noqrecab && novariant && !nomresults)
                    conf.setBoolean("gatk.hadoop.ismerge", true);
                conf.setBoolean("gatk.hadoop", true);
                conf.setBoolean("gatk.hadoop.isazure", is_azure);
                job.setMapperClass(IndelMapper.class);
                job.setReducerClass(SortReducer.class);
                job.setMapOutputKeyClass(LongWritable.class);
                job.setOutputKeyClass(NullWritable.class);
                job.setOutputValueClass(SAMRecordWritable.class);
                job.setOutputFormatClass(SortOutputFormat.class);
                FileOutputFormat.setOutputPath(job, new Path(IndelOutPath));

                sampler = new InputSampler.IntervalSampler<LongWritable, SAMRecordWritable>(sampling_frequency,
                        max_splits);
                InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, sampler);
                job.setInputFormatClass(LociInputFormat.class);

                DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + "#" + "ref.fa"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".fai#" + "ref.fa.fai"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileName + ".dict#" + "ref.dict"), job.getConfiguration());
                DistributedCache.createSymlink(job.getConfiguration());

                if (job.waitForCompletion(true)) {
                    System.out.println("Indel realignment done");
                }
                endTime = System.currentTimeMillis();
                System.out.println("Indel Realigner took: " + (endTime - startTime));
            }

            if (!nomarkdup || !noqrecab || !novariant) {
                /* 
                 * MarkDuplicate and Indexing Job 
                 * FixMateInformation is not required as it is handled
                 * automatically by GATK after IndelRealignment.
                 */
                System.out.println("Starting MarkDup/Indexing job");
                startTime = System.currentTimeMillis();
                job = new Job();
                job.setJarByClass(GATKJobClient.class);
                conf = job.getConfiguration();
                if (!norealign)
                    inputPath = new Path(IndelOutPath);
                else if (!noalign)
                    inputPath = new Path(SortBWAOutPath);
                else
                    inputPath = new Path(BAMInputPath);
                FileInputFormat.addInputPath(job, inputPath);
                job.setInputFormatClass(WholeFileInputFormat.class);

                conf.setLong("mapred.task.timeout", 86400000L);
                conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
                conf.setInt("mapred.reduce.tasks", 0);
                if (!nomarkdup) {
                    System.out.println("Starting MarkDuplicates job");
                    conf.setBoolean("gatk.hadoop.ismarkdup", true);
                    FileOutputFormat.setOutputPath(job, new Path(RmdupOutPath));
                }
                if (!noqrecab || !novariant) {
                    conf.setBoolean("gatk.hadoop.issindex", true);
                    conf.setBoolean("gatk.hadoop.isindex", true);
                    if (nomarkdup) {
                        System.out.println("Starting Indexing job");
                        FileOutputFormat.setOutputPath(job,
                                new Path(outputDir + Path.SEPARATOR + "DeleteThisDir3"));
                    }
                }
                job.setMapperClass(IndexMapper.class);
                job.setOutputKeyClass(NullWritable.class);
                job.setOutputValueClass(NullWritable.class);

                DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());
                if (job.waitForCompletion(true)) {
                    System.out.println("Markdup/Indexing job done !!!");
                }
                Path toDelete = new Path(outputDir + Path.SEPARATOR + "DeleteThisDir3");
                fs = toDelete.getFileSystem(conf);
                if (fs.exists(toDelete)) {
                    fs.delete(toDelete, true);
                }

                if (!nomarkdup) {
                    Path rmdupOutPath = new Path(RmdupOutPath);
                    fs = rmdupOutPath.getFileSystem(conf);
                    content = fs.listStatus(rmdupOutPath);

                    for (int i = 0; i < content.length; i++) {
                        if ((content[i].getPath().getName()).startsWith("part")) {
                            fs.delete(content[i].getPath(), false);
                        }
                    }
                    endTime = System.currentTimeMillis();
                    System.out.println("MarkDuplicates took: " + (endTime - startTime));
                } else {
                    endTime = System.currentTimeMillis();
                    System.out.println("Indexing took: " + (endTime - startTime));
                }
            }

            if (!noqrecab) {
                startTime = System.currentTimeMillis();
                System.out.println("Starting Recal - Count Covariates Job");
                job = new Job();
                job.setJarByClass(GATKJobClient.class);
                conf = job.getConfiguration();
                if (!nomarkdup)
                    inputPath = new Path(RmdupOutPath);
                else if (!norealign)
                    inputPath = new Path(IndelOutPath);
                else if (!noalign)
                    inputPath = new Path(SortBWAOutPath);
                else
                    inputPath = new Path(BAMInputPath);
                FileInputFormat.addInputPath(job, inputPath);
                job.setInputFormatClass(LociInputFormat.class);

                conf.setLong("local.cache.size", 20106127360L);
                conf.setInt("mapred.reduce.tasks", 1);
                conf.setLong("mapred.task.timeout", 86400000L);
                conf.set("gatk.hadoop.outputpath", outputDir);
                // conf.setInt("mapred.tasktracker.map.tasks.maximum", 1);
                // conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 1);
                // conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                // conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
                // conf.setBoolean("mapred.compress.map.output", true); // Default compression ration 3.5:1

                conf.setBoolean("gatk.hadoop", true);
                conf.setBoolean("gatk.hadoop.isazure", is_azure);
                job.setMapperClass(RecalCovMapper.class);
                job.setCombinerClass(RecalCovCombiner.class);
                job.setReducerClass(RecalCovReducer.class);
                job.setMapOutputKeyClass(Text.class);
                job.setOutputKeyClass(NullWritable.class);
                job.setOutputValueClass(Text.class);
                FileOutputFormat.setOutputPath(job, new Path(outputDir + Path.SEPARATOR + "CovariateOut"));

                DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());
                // Standard inputs
                DistributedCache.addCacheFile(new URI(knownSitesLoc + "#" + "ref.vcf"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(knownSitesLoc + ".idx#" + "ref.vcf.idx"),
                        job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + "#" + "ref.fa"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".fai#" + "ref.fa.fai"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileName + ".dict#" + "ref.dict"), job.getConfiguration());

                DistributedCache.createSymlink(job.getConfiguration());

                if (job.waitForCompletion(true)) {
                    System.out.println("CountCovariates done");
                }
                endTime = System.currentTimeMillis();
                System.out.println("CountCovariates took: " + (endTime - startTime));
            }

            if (!noqrecab || !novariant) {
                startTime = System.currentTimeMillis();
                System.out.println("Starting Table Recalibration / Unified Genotyper Job");
                if (!nomarkdup)
                    inputPath = new Path(RmdupOutPath);
                else if (!norealign)
                    inputPath = new Path(IndelOutPath);
                else if (!noalign)
                    inputPath = new Path(SortBWAOutPath);
                else
                    inputPath = new Path(BAMInputPath);
                job = new Job();
                job.setJarByClass(GATKJobClient.class);
                conf = job.getConfiguration();
                FileInputFormat.addInputPath(job, inputPath);

                if (!noqrecab) {
                    conf.setBoolean("gatk.hadoop.recab", true);
                    if (norealign) {
                        job.setInputFormatClass(BAMInputFormat.class);
                        srcFs = new Path(outputDir).getFileSystem(conf);
                        if (!srcFs.mkdirs(new Path(outputDir + "/" + "Partition")))
                            System.out.println("mkdir failed");
                    } else {
                        job.setInputFormatClass(LociInputFormat.class);
                    }
                    inputDir = new Path(outputDir + "/" + "Partition");
                    inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
                    partition = new Path(inputDir, "_partition");
                    job.setPartitionerClass(TotalOrderPartitioner.class);
                    TotalOrderPartitioner.setPartitionFile(conf, partition);
                    try {
                        URI partitionURI = new URI(partition.toString() + "#_partition");
                        DistributedCache.addCacheFile(partitionURI, conf);
                    } catch (URISyntaxException e) {
                        assert false;
                    }

                    if (nReducers == 0) {
                        conf.setInt("mapred.reduce.tasks", maxMapTasks);
                    } else {
                        conf.setInt("mapred.reduce.tasks", nReducers);
                    }
                    conf.setBoolean("mapred.compress.map.output", true); // Default compression ratio 3.5:1
                    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
                    if (!nomresults)
                        conf.setBoolean("gatk.hadoop.ismerge", true);
                    job.setReducerClass(SortReducer.class);
                    job.setMapOutputKeyClass(LongWritable.class);
                    job.setOutputKeyClass(NullWritable.class);
                    job.setOutputValueClass(SAMRecordWritable.class);
                    job.setOutputFormatClass(SortOutputFormat.class);
                    FileOutputFormat.setOutputPath(job, new Path(RecalOutPath));
                } else {
                    job.setInputFormatClass(LociInputFormat.class);
                    conf.setInt("mapred.reduce.tasks", 0);
                    FileOutputFormat.setOutputPath(job, new Path(outputDir + Path.SEPARATOR + "DeleteThisDir4"));
                }

                job.setMapperClass(RecalMapper.class);
                conf.setLong("mapred.task.timeout", 86400000L);
                conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                conf.setInt("dfs.datanode.socket.write.timeout", 600000);
                conf.setInt("dfs.socket.timeout", 600000);

                conf.set("gatk.hadoop.outputpath", outputDir);
                conf.setBoolean("gatk.hadoop", true);
                conf.setBoolean("gatk.hadoop.isazure", is_azure);
                if (!novariant) {
                    conf.setBoolean("gatk.hadoop.variant", true);
                    if (!nofvariant)
                        conf.setBoolean("gatk.hadoop.fvariant", true);
                    conf.setInt("gatk.hadoop.nthreads", nThreads);
                    conf.setBoolean("gatk.hadoop.xvariant", xVariantCall);
                }

                if (!noqrecab && norealign) {
                    sampler = new InputSampler.IntervalSampler<LongWritable, SAMRecordWritable>(sampling_frequency,
                            max_splits);
                    InputSampler.<LongWritable, SAMRecordWritable>writePartitionFile(job, sampler);
                    job.setInputFormatClass(LociInputFormat.class);
                }

                DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());
                // Standard inputs
                DistributedCache.addCacheFile(new URI(refFileLoc + "#" + "ref.fa"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".fai#" + "ref.fa.fai"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileName + ".dict#" + "ref.dict"), job.getConfiguration());

                DistributedCache.createSymlink(job.getConfiguration());

                if (job.waitForCompletion(true)) {
                    System.out.println("TableRecalibration Job done !!");
                }
                endTime = System.currentTimeMillis();
                Path toDelete = new Path(outputDir + Path.SEPARATOR + "DeleteThisDir4");
                fs = toDelete.getFileSystem(conf);
                if (fs.exists(toDelete)) {
                    fs.delete(toDelete, true);
                }
                System.out.println("TableRecalibraion / UnifiedGenotyper job took: " + (endTime - startTime));
            }
            if (!novariant && !nomresults) {
                startTime = System.currentTimeMillis();
                System.out.println("Merge Variant Job");
                job = new Job();
                job.setJarByClass(GATKJobClient.class);
                conf = job.getConfiguration();
                inputPath = new Path(outputDir + Path.SEPARATOR + "VariantOut");
                FileInputFormat.addInputPath(job, inputPath);
                job.setInputFormatClass(WholeFileInputFormat.class);

                conf.setInt("mapred.reduce.tasks", 1);
                conf.setLong("mapred.task.timeout", 86400000L);
                conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

                conf.setBoolean("gatk.hadoop", true);
                conf.setBoolean("gatk.hadoop.isazure", is_azure);
                job.setReducerClass(VariantReducer.class);
                job.setMapOutputKeyClass(LongWritable.class);
                job.setMapOutputValueClass(Text.class);
                job.setOutputKeyClass(NullWritable.class);
                job.setOutputValueClass(NullWritable.class);
                FileOutputFormat.setOutputPath(job, new Path(outputDir + Path.SEPARATOR + "FinalVariantOut"));

                DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());
                // Standard inputs
                DistributedCache.addCacheFile(new URI(refFileLoc + "#" + "ref.fa"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileLoc + ".fai#" + "ref.fa.fai"), job.getConfiguration());
                DistributedCache.addCacheFile(new URI(refFileName + ".dict#" + "ref.dict"), job.getConfiguration());

                DistributedCache.createSymlink(job.getConfiguration());

                if (job.waitForCompletion(true)) {
                    System.out.println("Merge Variants done");
                }
                endTime = System.currentTimeMillis();
                System.out.println("MergeVariant job took: " + (endTime - startTime));

                if (xVariantCall && !novariant && !nomresults) {
                    startTime = System.currentTimeMillis();

                    System.out.println("Merge INDEL Variant Job");
                    job = new Job();
                    job.setJarByClass(GATKJobClient.class);
                    conf = job.getConfiguration();
                    inputPath = new Path(outputDir + Path.SEPARATOR + "IVariantOut");
                    FileInputFormat.addInputPath(job, inputPath);
                    job.setInputFormatClass(WholeFileInputFormat.class);

                    conf.setInt("mapred.reduce.tasks", 1);
                    conf.setLong("mapred.task.timeout", 86400000L);
                    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
                    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);

                    conf.setBoolean("gatk.hadoop", true);
                    conf.setBoolean("gatk.hadoop.isazure", is_azure);
                    job.setReducerClass(VariantReducer.class);
                    job.setMapOutputKeyClass(LongWritable.class);
                    job.setMapOutputValueClass(Text.class);
                    job.setOutputKeyClass(NullWritable.class);
                    job.setOutputValueClass(NullWritable.class);
                    FileOutputFormat.setOutputPath(job, new Path(outputDir + Path.SEPARATOR + "FinalIVariantOut"));

                    DistributedCache.addArchiveToClassPath(new Path(gatk_binary_loc), job.getConfiguration());
                    // Standard inputs
                    DistributedCache.addCacheFile(new URI(refFileLoc + "#" + "ref.fa"), job.getConfiguration());
                    DistributedCache.addCacheFile(new URI(refFileLoc + ".fai#" + "ref.fa.fai"),
                            job.getConfiguration());
                    DistributedCache.addCacheFile(new URI(refFileName + ".dict#" + "ref.dict"),
                            job.getConfiguration());

                    DistributedCache.createSymlink(job.getConfiguration());

                    if (job.waitForCompletion(true)) {
                        System.out.println("Merge INDEL Variants done");
                    }
                    endTime = System.currentTimeMillis();
                    System.out.println("MergeINDELVariant job took: " + (endTime - startTime));
                }
            }

            if (!nomresults) {
                startTime = System.currentTimeMillis();
                System.out.println("Starting Merge BAM Job");

                outputPath = new Path(FinalBAMPath);
                outFs = outputPath.getFileSystem(conf);

                if (!outFs.mkdirs(outputPath))
                    System.out.println("mkdir failed");
                // Currently no support to merge output from MarkDuplicates 
                // from Job Client. Need to have a separate MR job for it.
                if (!noqrecab)
                    inputPath = new Path(RecalOutPath);
                else if (!norealign)
                    inputPath = new Path(IndelOutPath);
                else if (!noalign)
                    inputPath = new Path(SortBWAOutPath);
                else if (!nomarkdup)
                    throw new Exception("Merge not implemented MarkDuplicates output.");
                else if (noqrecab && noalign && norealign && novariant && nomarkdup && nofvariant)
                    inputPath = new Path(BAMInputPath);

                fs = inputPath.getFileSystem(conf);

                content = fs.listStatus(inputPath);
                mergeOutFile = new Path(FinalBAMPath, "GATKAnalysisResult.bam");

                Path p = null;
                int nfiles = 0;
                for (int i = 0; i < content.length; i++) {
                    p = content[i].getPath();
                    ++nfiles;
                }

                if (nfiles == 1) {
                    boolean rename = fs.rename(p, mergeOutFile);
                } else {
                    out = outFs.create(mergeOutFile, true);

                    for (int i = 0; i < content.length; i++) {
                        p = content[i].getPath();
                        if ((p.getName()).endsWith(".bam")) {
                            in = fs.open(p);
                            IOUtils.copyBytes(in, out, conf, false);
                            in.close();
                        }
                    }

                    out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
                    out.close();
                }

                endTime = System.currentTimeMillis();
                System.out.println("Final Merge took: " + (endTime - startTime));
            }
            System.out.println("JobCompleted");
        } catch (IOException e) {
            System.err.printf("Hadoop Error : %s\n", e);
            return -1;
        } catch (InterruptedException e) {
            System.err.printf("Hadoop Error : %s\n", e);
            return -1;
        } catch (ClassNotFoundException e) {
            System.err.printf("Hadoop Error : %s\n", e);
            return -1;
        } catch (Exception e) {
            System.err.printf("Hadoop Error : %s\n", e);
            return -1;
        }
        return 0;
    }

    public static void main(String[] argv) throws Exception {
        int exitCode = ToolRunner.run(new GATKJobClient(), argv);
        System.exit(exitCode);
    }
}