com.milaboratory.core.motif.BitapPatternTest.java Source code

Java tutorial

Introduction

Here is the source code for com.milaboratory.core.motif.BitapPatternTest.java

Source

/*
 * Copyright 2015 MiLaboratory.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.milaboratory.core.motif;

import com.milaboratory.core.mutations.MutationType;
import com.milaboratory.core.mutations.generator.UniformMutationsGenerator;
import com.milaboratory.core.sequence.NucleotideSequence;
import com.milaboratory.core.sequence.SequencesUtils;
import com.milaboratory.test.TestUtil;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;
import org.junit.Assert;
import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

public class BitapPatternTest {
    @Test
    public void testExact1() throws Exception {
        Motif<NucleotideSequence> motif = new Motif<>(NucleotideSequence.ALPHABET, "ATTAGACA");
        NucleotideSequence seq = new NucleotideSequence("ACTGCGATAAATTAGACAGTACGTA");
        assertEquals(10, motif.toBitapPattern().exactSearch(seq));
    }

    @Test
    public void testExact2() throws Exception {
        Motif<NucleotideSequence> motif = new Motif<>(NucleotideSequence.ALPHABET, "ATTRGACA");
        NucleotideSequence seq = new NucleotideSequence("ACTGCGATAAATTAGACAGTACGTA");
        assertEquals(10, motif.toBitapPattern().exactSearch(seq));
        seq = new NucleotideSequence("ACTGCGATAAATTGGACAGTACGTA");
        assertEquals(10, motif.toBitapPattern().exactSearch(seq));
    }

    @Test
    public void testMismatchIndel1() throws Exception {
        Motif<NucleotideSequence> motif = new Motif<>(NucleotideSequence.ALPHABET, "ATTAGACA");
        NucleotideSequence seq;
        BitapMatcher bitapMatcher;

        // Exact
        seq = new NucleotideSequence("ACTGCGATAAATTAGACAGTACGTA");
        bitapMatcher = motif.toBitapPattern().substitutionAndIndelMatcherLast(1, seq);
        boolean t = false;
        int pos;
        while ((pos = bitapMatcher.findNext()) > 0)
            if (bitapMatcher.getNumberOfErrors() == 0) {
                t = true;
                break;
            }
        assertTrue(t);
        assertEquals(17, pos);

        // Deletion
        seq = new NucleotideSequence("ACTGCGATAAATAGACAGTACGTA");
        bitapMatcher = motif.toBitapPattern().substitutionAndIndelMatcherLast(1, seq);
        assertEquals(16, bitapMatcher.findNext());
        assertEquals(1, bitapMatcher.getNumberOfErrors());
        assertEquals(-1, bitapMatcher.findNext());

        // Insertion
        seq = new NucleotideSequence("ACTGCGATAAATTATGACAGTACGTA");
        bitapMatcher = motif.toBitapPattern().substitutionAndIndelMatcherLast(1, seq);
        assertEquals(18, bitapMatcher.findNext());
        assertEquals(1, bitapMatcher.getNumberOfErrors());
        assertEquals(-1, bitapMatcher.findNext());

        // Mismatch
        seq = new NucleotideSequence("ACTGCGATAAATTACACAGTACGTA");
        bitapMatcher = motif.toBitapPattern().substitutionAndIndelMatcherLast(1, seq);
        assertEquals(17, bitapMatcher.findNext());
        assertEquals(1, bitapMatcher.getNumberOfErrors());
        assertEquals(-1, bitapMatcher.findNext());
    }

    @Test
    public void testMismatchIndel2() throws Exception {
        Motif<NucleotideSequence> motif = new Motif<>(NucleotideSequence.ALPHABET, "ATTAGACA");
        NucleotideSequence seq;
        BitapMatcher bitapMatcher;

        // Exact
        seq = new NucleotideSequence("ACTGCGATAAATTAGACAGTACGTA");
        bitapMatcher = motif.toBitapPattern().mismatchAndIndelMatcherFirst(1, seq);
        boolean t = false;
        int pos;
        while ((pos = bitapMatcher.findNext()) > 0)
            if (bitapMatcher.getNumberOfErrors() == 0) {
                t = true;
                break;
            }
        assertTrue(t);
        assertEquals(10, pos);

        // Deletion
        seq = new NucleotideSequence("ACTGCGATAAATAGACAGTACGTA");
        bitapMatcher = motif.toBitapPattern().mismatchAndIndelMatcherFirst(1, seq);
        assertEquals(10, bitapMatcher.findNext());
        assertEquals(1, bitapMatcher.getNumberOfErrors());
        assertEquals(9, bitapMatcher.findNext());
        assertEquals(1, bitapMatcher.getNumberOfErrors());
        assertEquals(-1, bitapMatcher.findNext());

        // Insertion
        seq = new NucleotideSequence("ACTGCGATAAATTATGACAGTACGTA");
        bitapMatcher = motif.toBitapPattern().mismatchAndIndelMatcherFirst(1, seq);
        assertEquals(10, bitapMatcher.findNext());
        assertEquals(1, bitapMatcher.getNumberOfErrors());
        assertEquals(-1, bitapMatcher.findNext());

        // Mismatch
        seq = new NucleotideSequence("ACTGCGATAAATTACACAGTACGTA");
        bitapMatcher = motif.toBitapPattern().mismatchAndIndelMatcherFirst(1, seq);
        assertEquals(10, bitapMatcher.findNext());
        assertEquals(1, bitapMatcher.getNumberOfErrors());
        assertEquals(-1, bitapMatcher.findNext());
    }

    @Test
    public void testMismatch1() throws Exception {
        Motif<NucleotideSequence> motif = new Motif<>(NucleotideSequence.ALPHABET, "ATTRGACA");
        NucleotideSequence seq = new NucleotideSequence("ACTGCGATAAATTAGACAGTACGTA");
        BitapMatcher matcher = motif.toBitapPattern().substitutionOnlyMatcherFirst(1, seq);
        Assert.assertEquals(10, matcher.findNext());
        Assert.assertEquals(0, matcher.getNumberOfErrors());
    }

    @Test
    public void testMismatch2() throws Exception {
        Motif<NucleotideSequence> motif = new Motif<>(NucleotideSequence.ALPHABET, "ATTRGACA");
        NucleotideSequence seq = new NucleotideSequence("ACTGCGATAAATCAGACAGTACGTA");
        BitapMatcher matcher = motif.toBitapPattern().substitutionOnlyMatcherFirst(1, seq);
        Assert.assertEquals(10, matcher.findNext());
        Assert.assertEquals(1, matcher.getNumberOfErrors());
    }

    @Test
    public void testRandomMM1() throws Exception {
        RandomGenerator rg = new Well19937c();
        long seed = rg.nextLong();
        rg = new Well19937c(seed);
        int its = TestUtil.its(1000, 100000);

        out: for (int i = 0; i < its; ++i) {
            NucleotideSequence seq = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 5, 60);

            NucleotideSequence seqM = seq;
            int mms = 1 + rg.nextInt(Math.min(10, seq.size()));
            for (int j = 0; j < mms; ++j)
                seqM = UniformMutationsGenerator.createUniformMutationAsObject(seqM, rg, MutationType.Substitution)
                        .mutate(seqM);

            int realMMs = SequencesUtils.mismatchCount(seq, 0, seqM, 0, seqM.size());

            NucleotideSequence seqLeft = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 0, 40);
            NucleotideSequence seqRight = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 0, 40);
            NucleotideSequence fullSeq = SequencesUtils.concatenate(seqLeft, seqM, seqRight);

            Motif<NucleotideSequence> motif = new Motif<>(seq);
            BitapPattern bitapPattern = motif.toBitapPattern();

            // Not filtered

            BitapMatcher bitapMatcher = bitapPattern.substitutionOnlyMatcherFirst(mms, fullSeq);

            boolean found = false;

            int pos;
            while ((pos = bitapMatcher.findNext()) >= 0) {
                if (pos == seqLeft.size()) {
                    found = true;
                    assertEquals(realMMs, bitapMatcher.getNumberOfErrors());
                }
                assertTrue("On iteration = " + i + " with seed " + seed,
                        SequencesUtils.mismatchCount(fullSeq, pos, seq, 0, seq.size()) <= mms);
            }

            assertTrue("On iteration = " + i + " with seed " + seed, found);
        }
    }

    @Test
    public void testRandomMM2() throws Exception {
        RandomGenerator rg = new Well19937c();
        long seed = rg.nextLong();
        rg = new Well19937c(seed);
        int its = TestUtil.its(1000, 100000);

        int e = 0;

        out: for (int i = 0; i < its; ++i) {
            NucleotideSequence seq = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 10, 60);

            NucleotideSequence seqM = seq;
            int mms = 1 + rg.nextInt(3);
            for (int j = 0; j < mms; ++j)
                seqM = UniformMutationsGenerator.createUniformMutationAsObject(seqM, rg, MutationType.Substitution)
                        .mutate(seqM);

            int realMMs = SequencesUtils.mismatchCount(seq, 0, seqM, 0, seqM.size());

            NucleotideSequence seqLeft = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 0, 40);
            NucleotideSequence seqRight = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 0, 40);
            NucleotideSequence fullSeq = SequencesUtils.concatenate(seqLeft, seqM, seqRight);

            Motif<NucleotideSequence> motif = new Motif<>(seq);
            BitapPattern bitapPattern = motif.toBitapPattern();

            // Filtered

            BitapMatcherFilter bitapMatcher = new BitapMatcherFilter(
                    bitapPattern.substitutionOnlyMatcherFirst(mms, fullSeq));

            boolean found = false;

            int pos;
            while ((pos = bitapMatcher.findNext()) >= 0) {
                if (pos == seqLeft.size()) {
                    found = true;
                    assertEquals(realMMs, bitapMatcher.getNumberOfErrors());
                }
                assertTrue("On iteration = " + i + " with seed " + seed,
                        SequencesUtils.mismatchCount(fullSeq, pos, seq, 0, seq.size()) <= mms);
            }

            if (!found)
                ++e;
        }

        assertTrue(e <= Math.max(5E-5 * its, 1.0));
    }

    @Test
    public void testRandomMMIndelLast1() throws Exception {
        RandomGenerator rg = new Well19937c();
        long seed = rg.nextLong();
        rg = new Well19937c(seed);
        int its = TestUtil.its(1000, 100000);

        out: for (int i = 0; i < its; ++i) {
            NucleotideSequence seq = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 5, 60);

            NucleotideSequence seqM = seq;
            int muts = 1 + rg.nextInt(Math.min(10, seq.size()));
            for (int j = 0; j < muts; ++j)
                seqM = UniformMutationsGenerator.createUniformMutationAsObject(seqM, rg).mutate(seqM);

            NucleotideSequence seqLeft = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 0, 40);
            NucleotideSequence seqRight = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 0, 40);
            NucleotideSequence fullSeq = SequencesUtils.concatenate(seqLeft, seqM, seqRight);

            Motif<NucleotideSequence> motif = new Motif<>(seq);
            BitapPattern bitapPattern = motif.toBitapPattern();
            BitapMatcher bitapMatcher = bitapPattern.substitutionAndIndelMatcherLast(muts, fullSeq);

            boolean found = false;

            int pos;
            while ((pos = bitapMatcher.findNext()) >= 0) {
                if (pos == seqLeft.size() + seqM.size() - 1)
                    found = true;
            }

            assertTrue("On iteration = " + i + " with seed " + seed, found);
        }
    }

    @Test
    public void testRandomMMIndelFirst1() throws Exception {
        RandomGenerator rg = new Well19937c();
        long seed = rg.nextLong();
        rg = new Well19937c(seed);
        int its = TestUtil.its(1000, 100000);

        out: for (int i = 0; i < its; ++i) {
            NucleotideSequence seq = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 5, 60);

            NucleotideSequence seqM = seq;
            int muts = 1 + rg.nextInt(Math.min(10, seq.size()));
            for (int j = 0; j < muts; ++j)
                seqM = UniformMutationsGenerator.createUniformMutationAsObject(seqM, rg).mutate(seqM);

            NucleotideSequence seqLeft = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 0, 40);
            NucleotideSequence seqRight = TestUtil.randomSequence(NucleotideSequence.ALPHABET, 0, 40);
            NucleotideSequence fullSeq = SequencesUtils.concatenate(seqLeft, seqM, seqRight);

            Motif<NucleotideSequence> motif = new Motif<>(seq);
            BitapPattern bitapPattern = motif.toBitapPattern();
            BitapMatcher bitapMatcher = bitapPattern.mismatchAndIndelMatcherFirst(muts, fullSeq);

            boolean found = false;

            int pos;
            while ((pos = bitapMatcher.findNext()) >= 0) {
                if (pos == seqLeft.size())
                    found = true;
            }

            assertTrue("On iteration = " + i + " with seed " + seed, found);
        }
    }
}