savant.data.sources.BAMDataSource.java Source code

Java tutorial

Introduction

Here is the source code for savant.data.sources.BAMDataSource.java

Source

/**
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package savant.data.sources;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import net.sf.samtools.*;
import net.sf.samtools.util.SeekableStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import savant.api.adapter.BAMDataSourceAdapter;
import savant.api.adapter.RangeAdapter;
import savant.api.adapter.RecordFilterAdapter;
import savant.api.data.DataFormat;
import savant.api.util.Resolution;
import savant.controller.LocationController;
import savant.data.types.BAMIntervalRecord;
import savant.util.IndexCache;
import savant.util.MiscUtils;
import savant.util.NetworkUtils;

/**
 * Class to represent a track of BAM intervals. Uses SAMTools to read data
 * within a range.
 *
 * @author vwilliams
 */
public class BAMDataSource extends DataSource<BAMIntervalRecord> implements BAMDataSourceAdapter {

    private static final Log LOG = LogFactory.getLog(BAMDataSource.class);
    private SAMFileReader samFileReader;
    private SAMFileHeader samFileHeader;
    private URI uri;

    public BAMDataSource(URI uri) throws IOException {
        this.uri = uri.normalize();

        File indexFile = IndexCache.getIndexFile(uri, "bai", "bam");
        SeekableStream stream = NetworkUtils.getSeekableStreamForURI(uri);
        samFileReader = new SAMFileReader(stream, indexFile, false);
        samFileReader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
        samFileHeader = samFileReader.getFileHeader();
    }

    @Override
    public List<BAMIntervalRecord> getRecords(String reference, RangeAdapter range, Resolution resolution,
            RecordFilterAdapter filt) throws InterruptedException {

        SAMRecordIterator recordIterator = null;
        List<BAMIntervalRecord> result = new ArrayList<BAMIntervalRecord>();
        try {
            // todo: actually use the given reference

            String ref;
            if (getReferenceNames().contains(reference)) {
                ref = reference;
            } else if (getReferenceNames().contains(MiscUtils.homogenizeSequence(reference))) {
                ref = MiscUtils.homogenizeSequence(reference);
            } else {
                ref = guessSequence();
            }

            recordIterator = samFileReader.query(ref, range.getFrom(), range.getTo(), false);

            SAMRecord samRecord;
            BAMIntervalRecord record;
            while (recordIterator.hasNext()) {

                samRecord = recordIterator.next();

                // Don't keep unmapped reads
                if (samRecord.getReadUnmappedFlag()) {
                    continue;
                }

                record = BAMIntervalRecord.valueOf(samRecord);
                if (filt == null || filt.accept(record)) {
                    result.add(record);
                }
                if (Thread.interrupted()) {
                    throw new InterruptedException();
                }
            }
        } finally {
            if (recordIterator != null) {
                recordIterator.close();
            }
        }

        return result;
    }

    /*
     * Use the length of the reference genome to guess which sequence from the dictionary
     * we should search for reads.
     */
    private String guessSequence() {

        // Find out what sequence we're using, by reading the header for sequences and lengths
        LocationController locationController = LocationController.getInstance();
        int referenceSequenceLength = locationController.getMaxRangeEnd() - locationController.getMaxRangeStart();
        assert Math.abs(referenceSequenceLength) < Integer.MAX_VALUE;

        String sequenceName = null;
        SAMSequenceDictionary sequenceDictionary = samFileHeader.getSequenceDictionary();
        // find the first sequence with the smallest difference in length from our reference sequence
        int leastDifferenceInSequenceLength = Integer.MAX_VALUE;
        int closestSequenceIndex = Integer.MAX_VALUE;
        int i = 0;
        for (SAMSequenceRecord sequenceRecord : sequenceDictionary.getSequences()) {
            int lengthDelta = Math.abs(sequenceRecord.getSequenceLength() - (int) referenceSequenceLength);
            if (lengthDelta < leastDifferenceInSequenceLength) {
                leastDifferenceInSequenceLength = lengthDelta;
                closestSequenceIndex = i;
            }
            i++;
        }
        if (closestSequenceIndex != Integer.MAX_VALUE) {
            sequenceName = sequenceDictionary.getSequence(closestSequenceIndex).getSequenceName();
        }
        return sequenceName;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void close() {

        if (samFileReader != null) {
            samFileReader.close();
        }
    }

    Set<String> referenceNames;

    @Override
    public Set<String> getReferenceNames() {

        if (referenceNames == null) {
            SAMSequenceDictionary ssd = samFileHeader.getSequenceDictionary();

            List<SAMSequenceRecord> seqs = ssd.getSequences();

            referenceNames = new HashSet<String>();
            for (SAMSequenceRecord ssr : seqs) {
                referenceNames.add(ssr.getSequenceName());
            }
        }
        return referenceNames;
    }

    @Override
    public URI getURI() {
        return uri;
    }

    @Override
    public final DataFormat getDataFormat() {
        return DataFormat.ALIGNMENT;
    }

    @Override
    public final String[] getColumnNames() {
        return new String[] { "Read Name", "Sequence", "Length", "First of Pair", "Position", "Strand +",
                "Mapping Quality", "Base Qualities", "CIGAR", "Mate Position", "Strand +", "Inferred Insert Size" };
    }

    /**
     * For use by the Data Table plugin, which needs to know the header for
     * export purposes.
     */
    @Override
    public SAMFileHeader getHeader() {
        return samFileHeader;
    }
}