org.mskcc.shenkers.control.alignment.chain.ChainGFFAlignmentSource1.java Source code

Java tutorial

Introduction

Here is the source code for org.mskcc.shenkers.control.alignment.chain.ChainGFFAlignmentSource1.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.mskcc.shenkers.control.alignment.chain;

import htsjdk.samtools.liftover.ChainGFFConverter1;
import htsjdk.samtools.liftover.ChainParser;
import htsjdk.samtools.tabix.AlignmentContext;
import htsjdk.samtools.tabix.ChainContext;
import htsjdk.samtools.tabix.GFFAlignmentContextCodec;
import htsjdk.samtools.tabix.GFFAlignmentContextEncoder;
import htsjdk.samtools.tabix.GFFChainContextCodec;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.BlockCompressedOutputStream;
import htsjdk.samtools.util.Interval;
import htsjdk.tribble.CloseableTribbleIterator;
import htsjdk.tribble.TabixFeatureReader;
import htsjdk.tribble.index.Index;
import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.tribble.index.tabix.TabixIndexCreator;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.util.LittleEndianOutputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.logging.Level;
import javafx.util.Pair;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.mskcc.shenkers.control.alignment.AlignmentSource;
import org.mskcc.shenkers.control.alignment.LocalAlignment;
import org.mskcc.shenkers.model.datatypes.GenomeSpan;

/**
 *
 * @author sol
 */
public class ChainGFFAlignmentSource1 implements AlignmentSource {

    final static GFFChainContextCodec codec = new GFFChainContextCodec();

    private static final Logger logger = LogManager.getLogger();

    TabixFeatureReader<ChainContext, LineIterator> alignments;

    public ChainGFFAlignmentSource1(String uri, boolean force_convert) throws IOException {

        File chain_gff_bgz = new File(uri.concat(".gff.bgz"));
        File chain_gff_bgz_tbi = new File(uri.concat(".gff.bgz.tbi"));

        logger.info("converting {} to {}", uri, chain_gff_bgz.getAbsoluteFile());
        logger.info("{} already exists? {}", chain_gff_bgz.getAbsoluteFile(), chain_gff_bgz.exists());
        if (force_convert || !chain_gff_bgz.exists()) {
            logger.info("converting chain to gff");
            convertChainToGffBgz(new File(uri), chain_gff_bgz);
            logger.info("creating tabix index");
            createTabixIndex(chain_gff_bgz);
        }
        logger.info("{} already exists? {}", chain_gff_bgz_tbi.getAbsoluteFile(), chain_gff_bgz_tbi.exists());
        if (!chain_gff_bgz_tbi.exists()) {
            logger.info("creating tabix index");
            createTabixIndex(chain_gff_bgz);
        }
        alignments = new TabixFeatureReader(chain_gff_bgz.getAbsolutePath(), codec);
    }

    private void convertChainToGffBgz(File f, File chain_gff_bgz) throws IOException {
        OutputStream gffOutputStream = new BlockCompressedOutputStream(chain_gff_bgz);
        ChainGFFConverter1 converter = new ChainGFFConverter1(codec);
        converter.convert(f, gffOutputStream);
    }

    private void createTabixIndex(File chain_gff_bgz) throws IOException {
        TabixIndexCreator indexCreator = new TabixIndexCreator(TabixFormat.GFF);
        BlockCompressedInputStream inputStream = new BlockCompressedInputStream(chain_gff_bgz);

        long p = 0;
        String line = inputStream.readLine();

        while (line != null) {
            //add the feature to the index
            ChainContext decode = codec.decode(line);
            indexCreator.addFeature(decode, p);
            // read the next line if available
            p = inputStream.getFilePointer();
            line = inputStream.readLine();
        }
        // write the index to a file
        Index index = indexCreator.finalizeIndex(inputStream.getFilePointer());
        index.writeBasedOnFeatureFile(chain_gff_bgz);
    }

    @Override
    public List<LocalAlignment> getAlignment(GenomeSpan span1, GenomeSpan span2) {
        try {

            logger.info("querying alignment file");
            CloseableTribbleIterator<ChainContext> itt = alignments.query(span1.getChr(), span1.getStart(),
                    span1.getEnd());

            List<LocalAlignment> chainIntersections = new ArrayList<>();

            // find all overlapping chains
            logger.info("iterating alignments");
            while (itt.hasNext()) {
                ChainContext next = itt.next();
                logger.info("alignment from {}:{}-{} to {}:{}-{}", next.getChr(), next.getStart(), next.getEnd(),
                        next.getTargetChr(), next.getTargetStart(), next.getTargetEnd());
                // check for intersections with the displayed interval
                // retaining only those blocks that intersect
                Optional<LocalAlignment> alignedBlocks = getAlignedBlocks(next, span1);
                logger.info("aligned blocks? {}", alignedBlocks.isPresent());
                alignedBlocks.ifPresent(l -> chainIntersections.add(l));
            }

            return chainIntersections;
        } catch (IOException ex) {
            logger.error("error parsing alignment file", ex);
            throw new RuntimeException(ex);
        }
    }

    /**
     *
     * @return a local alignment restricted to the given query interval
     */
    public static Optional<LocalAlignment> getAlignedBlocks(final ChainContext chain, GenomeSpan interval) {

        List<Integer> queryGaps = chain.getQueryGaps();
        List<Integer> targetGaps = chain.getTargetGaps();
        List<Integer> blockLengths = chain.getBlockLengths();

        List<Pair<Integer, Integer>> fromBlocks = new ArrayList<>();
        List<Pair<Integer, Integer>> toBlocks = new ArrayList<>();

        boolean hasIntersection = false;

        int qPos = chain.getStart();
        int tPos = chain.getToNegativeStrand() ? chain.getTargetEnd() : chain.getTargetStart();

        int start = interval.getStart();
        int end = interval.getEnd();

        int nBlocks = blockLengths.size();
        for (int i = 0; i < nBlocks; ++i) {

            int length = blockLengths.get(i);

            // get the boundaries of the current block in the query
            int qStart = qPos;
            int qEnd = qStart + length - 1;

            // get the boundaries of the current block in the target
            int tStart = chain.getToNegativeStrand() ? tPos - length + 1 : tPos;
            int tEnd = chain.getToNegativeStrand() ? tPos : tPos + length - 1;

            if (qStart > end) {
                logger.info("breaking");
                break;
            }
            if (qEnd >= start) {
                logger.info("overlaps");
                hasIntersection = true;

                // determine if the query interval is offset from the block
                int fromStart = start > qStart ? start : qStart;
                int fromEnd = qEnd > end ? end : qEnd;

                int startOffset = (start > qStart) ? start - qStart : 0;

                int offsetFromEnd = (qEnd > end) ? qEnd - end : 0;

                // apply the offset to the interval in the target genome
                int toStart = tStart + (chain.getToNegativeStrand() ? offsetFromEnd : startOffset);
                int toEnd = tEnd - (chain.getToNegativeStrand() ? startOffset : offsetFromEnd);

                fromBlocks.add(new Pair<>(fromStart, fromEnd));
                toBlocks.add(new Pair<>(toStart, toEnd));
            }

            if (i < nBlocks - 1) {
                int qGap = queryGaps.get(i);
                int tGap = targetGaps.get(i);

                qPos += length + qGap;
                tPos += chain.getToNegativeStrand() ? -(length + tGap) : length + tGap;
            }

        }

        Optional<LocalAlignment> alignment = hasIntersection ? Optional.of(new LocalAlignment(chain.getChr(),
                chain.getTargetChr(), chain.getToNegativeStrand(), fromBlocks, toBlocks)) : Optional.empty();

        return alignment;
    }

}