BwaPairedAlignment.java Source code

Java tutorial

Introduction

Here is the source code for BwaPairedAlignment.java

Source

/**
 * Copyright 2016 Jos Manuel Abun Mosquera <josemanuel.abuin@usc.es>
 * 
 * This file is part of SparkBWA.
 *
 * SparkBWA is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * SparkBWA is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with SparkBWA. If not, see <http://www.gnu.org/licenses/>.
 */

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.function.Function2;

import scala.Tuple2;

/**
 * Class to perform the alignment over a split from the RDD
 * @author Jos M. Abun
 * @return A RDD containing the resulting Sam files from the alignment.
 */
public class BwaPairedAlignment extends BwaAlignmentBase
        implements Function2<Integer, Iterator<Tuple2<String, String>>, Iterator<String>> {

    public BwaPairedAlignment(SparkContext context, Bwa bwaInterpreter) {
        super(context, bwaInterpreter);
    }

    /**
     * Code to run in each one of the mappers. This is, the alignment with the corresponding entry data
     * The entry data has to be written into the local filesystem
     */
    @Override
    public Iterator<String> call(Integer arg0, Iterator<Tuple2<String, String>> arg1) throws Exception {

        // STEP 1: Input fastq reads tmp file creation
        LOG.info("JMAbuin:: Tmp dir: " + this.tmpDir);
        String fastqFileName1 = this.tmpDir + this.appId + "-RDD" + arg0 + "_1";
        String fastqFileName2 = this.tmpDir + this.appId + "-RDD" + arg0 + "_2";

        LOG.info("JMAbuin:: Writing file: " + fastqFileName1);
        LOG.info("JMAbuin:: Writing file: " + fastqFileName2);

        File FastqFile1 = new File(fastqFileName1);
        File FastqFile2 = new File(fastqFileName2);

        FileOutputStream fos1;
        FileOutputStream fos2;

        BufferedWriter bw1;
        BufferedWriter bw2;

        ArrayList<String> returnedValues = new ArrayList<String>();

        //We write the data contained in this split into the two tmp files
        try {
            fos1 = new FileOutputStream(FastqFile1);
            fos2 = new FileOutputStream(FastqFile2);

            bw1 = new BufferedWriter(new OutputStreamWriter(fos1));
            bw2 = new BufferedWriter(new OutputStreamWriter(fos2));

            Tuple2<String, String> newFastqRead;

            while (arg1.hasNext()) {
                newFastqRead = arg1.next();

                bw1.write(newFastqRead._1.toString());
                bw1.newLine();

                bw2.write(newFastqRead._2.toString());
                bw2.newLine();
            }

            bw1.close();
            bw2.close();

            arg1 = null;

            returnedValues = this.runAlignmentProcess(arg0, fastqFileName1, fastqFileName2);

            // Delete temporary files, as they have now been copied to the
            // output directory
            LOG.info("JMAbuin:: Deleting file: " + fastqFileName1);
            FastqFile1.delete();
            LOG.info("JMAbuin:: Deleting file: " + fastqFileName2);
            FastqFile2.delete();

        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            LOG.error(e.toString());
        }

        return returnedValues.iterator();
    }
}