org.mskcc.shenkers.control.alignment.ChainParserNGTest.java Source code

Java tutorial

Introduction

Here is the source code for org.mskcc.shenkers.control.alignment.ChainParserNGTest.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package org.mskcc.shenkers.control.alignment;

import org.mskcc.shenkers.control.alignment.LocalAlignment;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.liftover.ChainParser;
import htsjdk.samtools.util.Interval;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Scanner;
import java.util.zip.GZIPInputStream;
import javafx.util.Pair;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.mskcc.shenkers.model.datatypes.Genome;
import org.mskcc.shenkers.model.datatypes.GenomeSpan;
import org.mskcc.shenkers.util.IntervalTools;
import static org.testng.Assert.*;
import org.testng.annotations.AfterClass;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

/**
 *
 * @author Soma
 */
public class ChainParserNGTest {

    public ChainParserNGTest() {
    }

    @BeforeClass
    public static void setUpClass() throws Exception {
    }

    @AfterClass
    public static void tearDownClass() throws Exception {
    }

    @BeforeMethod
    public void setUpMethod() throws Exception {
    }

    @AfterMethod
    public void tearDownMethod() throws Exception {
    }

    public static LocalAlignment trim(LocalAlignment blocks, GenomeSpan query_i, GenomeSpan target_i) {

        List<Pair<Integer, Integer>> fromBlocks = new ArrayList<>();
        List<Pair<Integer, Integer>> toBlocks = new ArrayList<>();

        for (int i = 0; i < blocks.fromBlocks.size(); i++) {

            Pair<Integer, Integer> fromBlock = blocks.fromBlocks.get(i);
            Pair<Integer, Integer> toBlock = blocks.toBlocks.get(i);

            assert IntervalTools.isContained(fromBlock.getKey(), fromBlock.getValue(), query_i.getStart(), query_i
                    .getEnd()) : "it is assumed that all blocks in query will be contained in the query interval";

            // if this block overlaps it is either OK as is, or needs to be trimmed
            if (IntervalTools.overlaps(toBlock.getKey(), toBlock.getValue(), target_i.getStart(),
                    target_i.getEnd())) {
                if (IntervalTools.isContained(toBlock.getKey(), toBlock.getValue(), target_i.getStart(),
                        target_i.getEnd())) {
                    fromBlocks.add(fromBlock);
                    toBlocks.add(toBlock);
                } else {
                    int offsetTargetStart = toBlock.getKey() < target_i.getStart()
                            ? target_i.getStart() - toBlock.getKey()
                            : 0;
                    int offsetTargetEnd = toBlock.getValue() > target_i.getEnd()
                            ? toBlock.getKey() - target_i.getEnd()
                            : 0;

                    Pair<Integer, Integer> offsetToBlock = new Pair<>(toBlock.getKey() + offsetTargetStart,
                            toBlock.getValue() - offsetTargetEnd);
                    Pair<Integer, Integer> offsetFromBlock = blocks.toNegativeStrand
                            ? new Pair<>(fromBlock.getKey() + offsetTargetEnd,
                                    fromBlock.getValue() - offsetTargetStart)
                            : new Pair<>(fromBlock.getKey() + offsetTargetStart,
                                    fromBlock.getValue() - offsetTargetEnd);

                    fromBlocks.add(offsetFromBlock);
                    toBlocks.add(offsetToBlock);
                }
            }
        }

        return new LocalAlignment(blocks.fromSequenceName, blocks.toSequenceName, blocks.toNegativeStrand,
                fromBlocks, toBlocks);
    }

    /**
     * Test of getChainIntersections method, of class ChainParser.
     */
    @Test
    public void testGetChainIntersections() throws FileNotFoundException {
        //        File f = new File("/home/sol/Downloads/dm3.droYak2.chain");
        File f = new File("/home/sol/Downloads/dm3.droYak2.chain");

        //        Scanner scan = new Scanner(f);
        //        int i=0;
        //        int j=0;
        //        PrintStream out = new PrintStream(new File("C:/Users/sol/Downloads/dm3.droYak2.chain"));
        //        while(scan.hasNext()){
        //            String line = scan.nextLine();
        //            System.out.println(i+"\t"+line);
        //            i++;
        //            
        //            if(i==185681)
        //                break;
        //            
        //            out.println(line);
        //            
        //        }
        //        out.close();
        //        System.exit(0);
        //        chrX:411720-413080
        //        chr3R 27905053 28832112
        //        chain 747933599 chr3R 27905053 + 46 27900188 chr3R 28832112 + 366441 28829329 2
        //        chr3R:9536-9791
        //        90   chain 970691389 chr3L 24543557 + 43412 23775837 chr3L 24197627 + 750 24183090 1
        //chr3L:74548-74869
        Interval interval = new Interval("chr3L", 74548, 74869);

        System.out.println("loading chain file");
        ChainParser instance = new ChainParser(f);
        System.out.println("calculating intersections");
        List<LocalAlignment> chainIntersections = instance.getChainIntersections(interval);
        for (LocalAlignment blocks : chainIntersections) {
            //            chr3L:74548-74869
            //chr3L:42085-42438
            blocks = trim(blocks, new GenomeSpan("chr3L", 74548, 74869, false),
                    new GenomeSpan("chr3L", 42088, 42438, false));
            StringBuilder gapped1 = new StringBuilder();
            StringBuilder gapped2 = new StringBuilder();

            gapped1.append(StringUtils.repeat("X", blocks.blockSizes.get(0)));
            gapped2.append(StringUtils.repeat("X", blocks.blockSizes.get(0)));
            int last1 = blocks.fromBlocks.get(0).getValue();
            int last2 = blocks.toNegativeStrand ? blocks.toBlocks.get(0).getKey()
                    : blocks.toBlocks.get(0).getValue();
            for (int i = 1; i < blocks.fromBlocks.size(); i++) {
                System.out.println(String.format("%s:%d-%d %d", blocks.fromSequenceName,
                        blocks.fromBlocks.get(i).getKey(), blocks.fromBlocks.get(i).getValue(),
                        blocks.fromBlocks.get(i).getValue() - blocks.fromBlocks.get(i).getKey()));
                System.out.println(String.format("%s:%d-%d %d", blocks.toSequenceName,
                        blocks.toBlocks.get(i).getKey(), blocks.toBlocks.get(i).getValue(),
                        blocks.toBlocks.get(i).getValue() - blocks.toBlocks.get(i).getKey()));
                System.err.println("");

                gapped1.append(StringUtils.repeat("X", blocks.fromBlocks.get(i).getKey() - last1 - 1));
                gapped2.append(StringUtils.repeat("-", blocks.fromBlocks.get(i).getKey() - last1 - 1));
                int gap2 = blocks.toNegativeStrand ? last2 - blocks.toBlocks.get(i).getValue() - 1
                        : blocks.toBlocks.get(i).getKey() - last2 - 1;
                gapped2.append(StringUtils.repeat("X", gap2));
                gapped1.append(StringUtils.repeat("-", gap2));

                gapped1.append(StringUtils.repeat("X", blocks.blockSizes.get(i)));
                gapped2.append(StringUtils.repeat("X", blocks.blockSizes.get(i)));

                last1 = blocks.fromBlocks.get(i).getValue();
                last2 = blocks.toNegativeStrand ? blocks.toBlocks.get(i).getKey()
                        : blocks.toBlocks.get(i).getValue();

            }
            System.out.println(gapped1.toString());
            System.out.println(gapped2.toString());
            System.out.println(gapped1.toString().replaceAll("-", "").length());
            System.out.println(gapped2.toString().replaceAll("-", "").length());
            System.out.println(74869 - 74548 + 1);
            System.out.println(42438 - 42085 + 1);
        }

        //        chr3L:74548-74869
        //chr3L:42085-42438
        System.out.println(chainIntersections.size());
        //        List expResult = null;
        //        List result = instance.getChainIntersections(interval);
        //        assertEquals(result, expResult);
        //        // TODO review the generated test code and remove the default call to fail.
        //        fail("The test case is a prototype.");
        ;
    }

    Logger logger = LogManager.getLogger();

    @Test
    public void testChainWeaving() throws FileNotFoundException {
        //        File f = new File("/home/sol/Downloads/dm3.droYak2.chain");
        logger.info("test: chain weaving");

        Genome g1 = new Genome("dm3", "mel");
        Genome g2 = new Genome("yak", "yak");
        Genome g3 = new Genome("vir", "vir");

        GenomeSpan melInterval = new GenomeSpan("3L", 74548, 74869, false);
        GenomeSpan yakInterval = new GenomeSpan("3L", 42088, 42438, false);
        GenomeSpan virInterval = new GenomeSpan("scaffold_13049", 2917506, 2917933, false);

        AlignmentWeaver weaver = new AlignmentWeaver(g1, melInterval);
        {
            File f = new File("/home/sol/lailab/sol/genomes/chains/netChainSubset/dm3.droYak3.net.chain");

            System.out.println("loading chain file");
            ChainParser instance = new ChainParser(f);
            System.out.println("calculating intersections");
            List<LocalAlignment> chainIntersections = instance.getChainIntersections(melInterval);

            NucleotideMapping mapping = new NucleotideMapping(melInterval, yakInterval);
            for (LocalAlignment a : chainIntersections) {
                logger.info("{}:{}-{}", a.fromSequenceName, a.getFromStart(), a.getFromEnd());
                logger.info("{}:{}-{}", a.toSequenceName, a.getToStart(), a.getToEnd());
                LocalAlignment trimmed = trim(a, melInterval, yakInterval);
                mapping.add(trimmed);
            }
            weaver.add(yakInterval, g1, g2, false, mapping);
        }

        {
            File f = new File("/home/sol/lailab/sol/genomes/chains/netChainSubset/dm3.droVir3.net.chain");

            System.out.println("loading chain file");
            ChainParser instance = new ChainParser(f);
            System.out.println("calculating intersections");
            List<LocalAlignment> chainIntersections = instance.getChainIntersections(melInterval);
            logger.info("vir intersections size {}", chainIntersections.size());
            NucleotideMapping mapping = new NucleotideMapping(melInterval, virInterval);
            for (LocalAlignment a : chainIntersections) {
                logger.info("{}:{}-{}", a.toSequenceName, a.getToStart(), a.getToEnd());
                LocalAlignment trimmed = trim(a, melInterval, virInterval);
                mapping.add(trimmed);
            }
            weaver.add(virInterval, g1, g3, true, mapping);
        }

        weaver.printAli2(Arrays.asList(g1, g2, g3));
    }

}