edu.cornell.med.icb.goby.modes.TestDiscoverSVMethylationRatesMode.java Source code

Java tutorial

Introduction

Here is the source code for edu.cornell.med.icb.goby.modes.TestDiscoverSVMethylationRatesMode.java

Source

/*
 * Copyright (C) 2009-2011 Institute for Computational Biomedicine,
 *                    Weill Medical College of Cornell University
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package edu.cornell.med.icb.goby.modes;

import com.google.protobuf.ByteString;
import com.martiansoftware.jsap.JSAPException;
import edu.cornell.med.icb.goby.algorithmic.data.GroupComparison;
import edu.cornell.med.icb.goby.alignments.*;
import edu.cornell.med.icb.goby.reads.RandomAccessSequenceTestSupport;
import edu.cornell.med.icb.goby.reads.ReadsWriter;
import edu.cornell.med.icb.goby.reads.ReadsWriterImpl;
import edu.cornell.med.icb.goby.util.TestFiles;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.lang.MutableString;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.*;
import java.util.ArrayList;
import java.util.Collections;

import static org.junit.Assert.assertTrue;

/**
 * @author Fabien Campagne
 *         Date: Mar 12, 2011
 *         Time: 12:15:11 PM
 */
public class TestDiscoverSVMethylationRatesMode extends TestFiles {

    private static final Log LOG = LogFactory.getLog(TestDiscoverSVMethylationRatesMode.class);
    private static final String BASE_TEST_DIR = "test-results/discover-variants-methylation";
    private static final int NUM_BASENAME_PER_GROUP = 5;
    private static final String[] basenames = new String[NUM_BASENAME_PER_GROUP * 2];
    private static final int NUM_TRIALS = 10;
    private static String statsFilename = BASE_TEST_DIR + "/variations-stats2.tsv";
    private static String[] specificAlignments;
    private String header = "##fileformat=VCFv4.1\n" + "##Goby=UNKNOWN\n"
            + "##INFO=<ID=BIOMART_COORDS,Number=1,Type=String,Description=\"Coordinates for use with Biomart.\">\n"
            + "##INFO=<ID=Strand,Number=1,Type=String,Description=\"Strand of the cytosine site on the reference sequence.\">\n"
            + "##INFO=<ID=LOD[methylated/not-so],Number=1,Type=Float,Description=\"Log2 of the odds-ratio of observing methylation in  group methylated versus group not-so\">\n"
            + "##INFO=<ID=LOD_SE[methylated/not-so],Number=1,Type=Float,Description=\"Standard Error of the log2 of the odds-ratio between group methylated and group not-so\">\n"
            + "##INFO=<ID=LOD_Z[methylated/not-so],Number=1,Type=Float,Description=\"Z value of the odds-ratio between group methylated and group not-so\">\n"
            + "##INFO=<ID=FisherP[methylated/not-so],Number=1,Type=Float,Description=\"Fisher exact P-value of observing as large a difference by chance between group methylated and group not-so.\">\n"
            + "##INFO=<ID=#C Group[methylated],Number=1,Type=Integer,Description=\"Number of unmethylated Cytosines at site in group methylated.\">\n"
            + "##INFO=<ID=#Cm Group[methylated],Number=1,Type=Integer,Description=\"Number of methylated Cytosines at site in group methylated.\">\n"
            + "##INFO=<ID=#C Group[not-so],Number=1,Type=Integer,Description=\"Number of unmethylated Cytosines at site in group not-so.\">\n"
            + "##INFO=<ID=#Cm Group[not-so],Number=1,Type=Integer,Description=\"Number of methylated Cytosines at site in group not-so.\">\n"
            + "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total depth of sequencing across groups at this site\">\n"
            + "##FORMAT=<ID=MR,Number=1,Type=Integer,Description=\"Methylation rate. 0-100%, 100% indicate fully methylated.\">\n"
            + "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n"
            + "##FORMAT=<ID=BC,Number=1,Type=String,Description=\"Base counts in format A=?;T=?;C=?;G=?;N=?.\">\n"
            + "##FORMAT=<ID=GB,Number=1,Type=String,Description=\"Number of bases that pass base filters in this sample.\">\n"
            + "##FORMAT=<ID=FB,Number=1,Type=String,Description=\"Number of bases that failed base filters in this sample.\">\n"
            + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tmethylated\tnot-so\n";

    RandomAccessSequenceTestSupport genome;

    private String[] sequences = { "ATTTACCGG", //CpG
            "TAGATACGGAT", //CpG
            "ACTCTAGACTA", //CpT
            "CATTTTGCAAC", //CpA
            "ATCTATGCCTA", //CpG - negative
    };

    @Test
    public void testMethylationFormatSwap() throws IOException, JSAPException {
        final MethylationRateVCFOutputFormat outputFormat = new MethylationRateVCFOutputFormat();
        DiscoverVariantIterateSortedAlignments iterator = new DiscoverVariantIterateSortedAlignments(outputFormat) {
            @Override
            public CharSequence getReferenceId(int targetIndex) {
                return "ref-id";
            }
        };
        DiscoverSequenceVariantsMode mode = new DiscoverSequenceVariantsMode() {
            @Override
            public String[] getSamples() {
                return new String[] { "not-so", "methylated" };
            }

            @Override
            public String[] getGroups() {
                return new String[] { "not-so", "methylated", };
            }

            @Override
            public int[] getReaderIndexToGroupIndex() {
                return new int[] { 0, 1 }; // sample index is group index;
            }

            @Override
            public ArrayList<GroupComparison> getGroupComparisons() {
                ArrayList<GroupComparison> list = new ArrayList<GroupComparison>();
                list.add(new GroupComparison("methylated", "not-so", 0, 1, 0));
                return list;
            }
        };
        StringWriter writer = new StringWriter();
        outputFormat.allocateStorage(2, 2);
        outputFormat.defineColumns(new PrintWriter(writer), mode);
        outputFormat.setGenome(genome);

        outputFormat.setMinimumEventThreshold(0);
        outputFormat.writeRecord(iterator, makeTwoSampleCounts(), 0, 0, list4(), 0, 1);
        String stringB = writer.getBuffer().toString();
        assertTrue(stringB, stringB.contains("1/2:A=10,T=4,N=2:16:0:33"));
        assertTrue(stringB, stringB.contains("0/1/2:A=5,T=1,C=9,N=1:16:0:0"));
        assertTrue(stringB, stringB.contains("#Cm_Group[methylated]=10;"));
        assertTrue(stringB, stringB.contains("#C_Group[methylated]=20;"));
        // check biomart-span included in result:
        assertTrue("biomart span must be included in result: " + stringB, stringB.contains("ref-id:1:1"));
    }

    @Test
    public void testMethylationFormat() throws IOException, JSAPException {
        final MethylationRateVCFOutputFormat outputFormat = new MethylationRateVCFOutputFormat();
        DiscoverVariantIterateSortedAlignments iterator = new DiscoverVariantIterateSortedAlignments(outputFormat) {
            @Override
            public CharSequence getReferenceId(int targetIndex) {
                return "ref-id";
            }
        };
        DiscoverSequenceVariantsMode mode = new DiscoverSequenceVariantsMode() {
            @Override
            public String[] getSamples() {
                return new String[] { "methylated", "not-so" };
            }

            @Override
            public String[] getGroups() {
                return new String[] { "methylated", "not-so" };
            }

            @Override
            public int[] getReaderIndexToGroupIndex() {
                return new int[] { 0, 1 }; // sample index is group index;
            }

            @Override
            public ArrayList<GroupComparison> getGroupComparisons() {
                ArrayList<GroupComparison> list = new ArrayList<GroupComparison>();
                list.add(new GroupComparison("methylated", "not-so", 0, 1, 0));
                return list;
            }
        };
        StringWriter writer = new StringWriter();
        outputFormat.allocateStorage(2, 2);
        outputFormat.setGenome(genome);
        outputFormat.defineColumns(new PrintWriter(writer), mode);
        outputFormat.setMinimumEventThreshold(0);
        outputFormat.writeRecord(iterator, makeTwoSampleCounts(), 0, 0, list1(), 0, 1);
        writer.flush();

        String stringB = writer.getBuffer().toString();
        assertTrue(stringB, stringB.contains("#Cm_Group[methylated]=1;"));
        assertTrue(stringB, stringB.contains("#Cm_Group[not-so]=0;"));

        writer = new StringWriter();
        outputFormat.allocateStorage(2, 2);
        outputFormat.defineColumns(new PrintWriter(writer), mode);

        final SampleCountInfo[] sampleCounts = makeTwoSampleCounts();
        sampleCounts[0].referenceBase = 'G';
        outputFormat.writeRecord(iterator, sampleCounts, 0, 0, list2(), 0, 1);
        writer.flush();
        stringB = writer.getBuffer().toString();
        assertTrue(stringB, stringB.contains("#Cm_Group[methylated]=1;"));
        assertTrue(stringB, stringB.contains("#Cm_Group[not-so]=0;"));
        assertTrue(stringB, stringB.contains("MR[methylated]=100.0;"));
        assertTrue(stringB, stringB.contains("MR[not-so]=NaN;"));

        writer = new StringWriter();

        outputFormat.allocateStorage(2, 2);
        outputFormat.defineColumns(new PrintWriter(writer), mode);

        outputFormat.writeRecord(iterator, makeTwoSampleCounts(), 0, 0, list3(), 0, 1);
        stringB = writer.getBuffer().toString();
        assertTrue(stringB, stringB.contains("#Cm_Group[methylated]=1;"));
        assertTrue(stringB, stringB.contains("#C_Group[methylated]=1;"));
        assertTrue(stringB, stringB.contains("#Cm_Group[not-so]=0;"));
        assertTrue(stringB, stringB.contains("#C_Group[not-so]=0;"));

        /*  org.junit.Assert.assertEquals("content must match", header+
            "ref-id\t1\t\tC\tA,T,N\t\t\tBIOMART_COORDS=ref-id:1:1;Strand=+;LOD[methylated/not-so]=NaN;LOD_SE[methylated/not-so]=NaN;LOD_Z[methylated/not-so]=NaN;FisherP[methylated/not-so]=NaN;#C Group[methylated]=0;#Cm Group[methylated]=1;#C Group[not-so]=0;#Cm Group[not-so]=0;DP=1\tMR:GT:BC:GB:FB\t100:0/1/2/3:A=5,T=1,C=9,G=0,N=1:16:0\t0:1/2/3:A=10,T=4,C=0,G=0,N=2:16:0",
            buffer.toString());
        */
    }

    @Test
    public void testMethylationFormatGenomicContext() throws IOException, JSAPException {
        final MethylationRateVCFOutputFormat outputFormat = new MethylationRateVCFOutputFormat();

        DiscoverVariantIterateSortedAlignments iterator = new DiscoverVariantIterateSortedAlignments(outputFormat) {
            @Override
            public CharSequence getReferenceId(int targetIndex) {
                return Integer.toString(targetIndex);
            }
        };
        DiscoverSequenceVariantsMode mode = new DiscoverSequenceVariantsMode() {
            @Override
            public String[] getSamples() {
                return new String[] { "methylated", "not-so" };
            }

            @Override
            public String[] getGroups() {
                return new String[] { "methylated", "not-so" };
            }

            @Override
            public int[] getReaderIndexToGroupIndex() {
                return new int[] { 0, 1 }; // sample index is group index;
            }

            @Override
            public ArrayList<GroupComparison> getGroupComparisons() {
                ArrayList<GroupComparison> list = new ArrayList<GroupComparison>();
                list.add(new GroupComparison("methylated", "not-so", 0, 1, 0));
                return list;
            }
        };
        outputFormat.setGenome(genome);

        StringWriter writer = new StringWriter();
        outputFormat.allocateStorage(2, 2);
        outputFormat.defineColumns(new PrintWriter(writer), mode);
        outputFormat.setMinimumEventThreshold(0);
        outputFormat.writeRecord(iterator, makeTwoSampleCounts(), 1, 6, list1(), 0, 1);
        writer.flush();

        String stringB = writer.getBuffer().toString();
        assertTrue(stringB, stringB.contains("Context=CpG"));

        writer = new StringWriter();
        outputFormat.allocateStorage(2, 2);
        outputFormat.defineColumns(new PrintWriter(writer), mode);
        outputFormat.setMinimumEventThreshold(0);

        final int referenceIndex = 2;
        outputFormat.setGenomeReferenceIndex(referenceIndex);
        outputFormat.writeRecord(iterator, makeTwoSampleCounts(), referenceIndex, 1, list1(), 0, 1);
        writer.flush();

        String stringC = writer.getBuffer().toString();
        assertTrue(stringC, stringC.contains("Context=CpT"));

        writer = new StringWriter();
        outputFormat.allocateStorage(2, 2);
        outputFormat.defineColumns(new PrintWriter(writer), mode);
        outputFormat.setMinimumEventThreshold(0);
        final int referenceIndex1 = 1;
        outputFormat.setGenomeReferenceIndex(referenceIndex1);
        outputFormat.writeRecord(iterator, makeTwoSampleCounts(), referenceIndex1, 2, list1(), 0, 1);
        stringB = writer.getBuffer().toString();
        assertTrue(stringB, stringB.contains("CpT"));

        writer = new StringWriter();
        outputFormat.allocateStorage(2, 2);
        outputFormat.defineColumns(new PrintWriter(writer), mode);
        outputFormat.setMinimumEventThreshold(0);
        final int referenceIndex4 = 4;
        outputFormat.setGenomeReferenceIndex(referenceIndex4);
        outputFormat.writeRecord(iterator, makeTwoSampleCounts(), referenceIndex4, 7, list1(), 0, 1);
        stringB = writer.getBuffer().toString();
        assertTrue(stringB, stringB.contains("CpC"));

    }

    private DiscoverVariantPositionData list4() {
        DiscoverVariantPositionData result = new DiscoverVariantPositionData();
        PositionBaseInfo info;
        // make 33% methylation in sampleIndex=1 "methylated"
        for (int i = 0; i < 10; i++) {
            info = new PositionBaseInfo();
            info.matchesReference = true;
            info.matchesForwardStrand = true;
            info.from = 'C';
            info.to = 'C';
            info.readerIndex = 1;
            result.add(info);
        }
        for (int i = 0; i < 20; i++) {
            info = new PositionBaseInfo();
            info.matchesReference = false; // not methylated
            info.matchesForwardStrand = true;
            info.from = 'C';
            info.to = 'T'; // not methylated
            info.readerIndex = 1;
            result.add(info);
        }
        return result;
    }

    private DiscoverVariantPositionData list3() {
        DiscoverVariantPositionData result = new DiscoverVariantPositionData();
        PositionBaseInfo info = new PositionBaseInfo();
        info.matchesReference = true;
        info.matchesForwardStrand = true;
        info.from = 'C';
        info.to = 'C';

        result.add(info);

        info = new PositionBaseInfo();
        info.matchesReference = false;
        info.matchesForwardStrand = true;
        info.from = 'C';
        info.to = 'T';

        result.add(info);

        return result;
    }

    private DiscoverVariantPositionData list2() {
        DiscoverVariantPositionData result = new DiscoverVariantPositionData();
        PositionBaseInfo info = new PositionBaseInfo();
        info.matchesReference = true;
        info.matchesForwardStrand = false;
        info.from = 'G';
        info.to = 'G';

        result.add(info);

        return result;
    }

    private DiscoverVariantPositionData list1() {
        DiscoverVariantPositionData result = new DiscoverVariantPositionData();
        final PositionBaseInfo info = new PositionBaseInfo();
        info.matchesReference = true;
        info.matchesForwardStrand = true;
        info.from = 'C';
        info.to = 'C';

        result.add(info);
        return result;
    }

    private SampleCountInfo[] makeTwoSampleCounts() {
        SampleCountInfo[] sampleCounts = new SampleCountInfo[2];
        sampleCounts[0] = new SampleCountInfo();
        sampleCounts[0].counts[SampleCountInfo.BASE_A_INDEX] = 5;
        sampleCounts[0].counts[SampleCountInfo.BASE_T_INDEX] = 1;
        sampleCounts[0].counts[SampleCountInfo.BASE_C_INDEX] = 9;
        sampleCounts[0].counts[SampleCountInfo.BASE_OTHER_INDEX] = 1;
        sampleCounts[0].referenceBase = 'C';
        sampleCounts[0].refCount = 5;
        sampleCounts[0].varCount = 11;
        sampleCounts[0].sampleIndex = 0;

        sampleCounts[1] = new SampleCountInfo();
        sampleCounts[1].counts[SampleCountInfo.BASE_A_INDEX] = 10;
        sampleCounts[1].counts[SampleCountInfo.BASE_T_INDEX] = 4;
        sampleCounts[1].counts[SampleCountInfo.BASE_C_INDEX] = 0;
        sampleCounts[1].counts[SampleCountInfo.BASE_OTHER_INDEX] = 2;
        sampleCounts[1].referenceBase = 'C';
        sampleCounts[1].refCount = 10;
        sampleCounts[1].varCount = 6;
        sampleCounts[1].sampleIndex = 1;
        return sampleCounts;
    }

    private ObjectArrayList<ReadIndexStats> makeReadIndexStats() {
        ObjectArrayList<ReadIndexStats> result = new ObjectArrayList<ReadIndexStats>();
        ReadIndexStats stat = new ReadIndexStats();

        stat.basename = "basename1";
        stat.readerIndex = 0;
        stat.countReferenceBases = new long[] { 1000, 1000, 1000 };
        stat.countVariationBases = new long[] { 100, 500, 900 };
        result.add(stat);

        stat = new ReadIndexStats();
        stat.basename = "basename2";
        stat.readerIndex = 0;
        stat.countReferenceBases = new long[] { 1000, 1000, 1000 };
        stat.countVariationBases = new long[] { 10, 10, 10 };
        result.add(stat);
        return result;
    }

    private DiscoverVariantPositionData makeList(SampleCountInfo[] sampleCounts, IntArrayList readIndices) {
        IntIterator nextReadIndexIterator = readIndices.iterator();
        DiscoverVariantPositionData list = new DiscoverVariantPositionData();
        for (SampleCountInfo sampleInfo : sampleCounts) {
            for (int baseIndex = 0; baseIndex < SampleCountInfo.BASE_MAX_INDEX; baseIndex++) {

                for (int i = 0; i < sampleInfo.counts[baseIndex]; i++) {

                    PositionBaseInfo info = new PositionBaseInfo();
                    final char base = sampleInfo.base(baseIndex);
                    info.to = base;
                    if (base == 'A') {
                        info.matchesReference = true;
                        info.from = base;
                        info.matchesForwardStrand = true;
                    }
                    info.readerIndex = sampleInfo.sampleIndex;
                    if (!nextReadIndexIterator.hasNext()) {

                        // wrap back to the start of read indices:
                        nextReadIndexIterator = readIndices.iterator();
                    }
                    info.readIndex = nextReadIndexIterator.nextInt();
                    list.add(info);
                    System.out.println("info: " + info);
                }

            }
        }
        return list;
    }

    private String[] shuffle(String[] strings) {

        final ObjectArrayList<String> list = ObjectArrayList.wrap(strings);
        Collections.shuffle(list);
        return list.toArray(new String[list.size()]);
    }

    private String[] add(String[] basenames, String[] specificAlignments) {
        String result[] = new String[basenames.length + specificAlignments.length];
        int i = 0;
        for (String s : basenames) {
            result[i++] = s;
        }
        for (String s : specificAlignments) {
            result[i++] = s;
        }
        return result;
    }

    @AfterClass
    public static void cleanupTestDirectory() throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Deleting base test directory: " + BASE_TEST_DIR);
        }
        //    FileUtils.forceDeleteOnExit(new File(BASE_TEST_DIR));
    }

    @Before
    public void setUp() throws IOException {

        final File dir = new File(BASE_TEST_DIR);
        dir.mkdirs();

        genome = new RandomAccessSequenceTestSupport(sequences);
        final ReadsWriter referenceWriter = new ReadsWriterImpl(FileUtils.openOutputStream(
                new File("test-results/alignments/last-to-compact/last-reference.compact-reads")));
        referenceWriter.setIdentifier("0");
        referenceWriter.appendEntry();
        referenceWriter.close();

    }

    private static String constructArgumentString(String[] basenames, Object outputfilename, Object evalString) {
        MutableString basenamesString = new MutableString();
        for (String basename : basenames) {
            basenamesString.append(basename).append(" ");
        }

        MutableString groups = new MutableString();
        groups.append("A=");
        for (int i = 0; i < NUM_BASENAME_PER_GROUP; i++) {
            groups.append(FilenameUtils.getBaseName(basenames[i]));
            if (i != NUM_BASENAME_PER_GROUP - 1) {
                groups.append(",");
            }
        }
        groups.append("/B=");
        for (int i = NUM_BASENAME_PER_GROUP; i < NUM_BASENAME_PER_GROUP * 2; i++) {
            groups.append(FilenameUtils.getBaseName(basenames[i]));
            if (i != NUM_BASENAME_PER_GROUP * 2 - 1) {
                groups.append(",");
            }
        }

        return String.format(
                "--mode discover-sequence-variants " + "--variation-stats %s " + "--groups %s " + "--compare A/B "
                        + "--eval %s " + "--vcf " + "--minimum-variation-support 1 "
                        + "--threshold-distinct-read-indices 1 " + "--output %s " + "%s",
                statsFilename, groups, evalString, outputfilename, basenamesString);
    }

    @BeforeClass
    public static void initializeTestDirectory() throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Creating base test directory: " + BASE_TEST_DIR);
        }

        FileUtils.forceMkdir(new File(BASE_TEST_DIR));
        PrintWriter outTSV = new PrintWriter(statsFilename);
        outTSV.println(
                "basename\tread-index\tcount-variation-bases\tbases-at-index/all-variations-bases\tbases-at-index/all-reference-bases\tcount-reference-bases\tcount-reference-bases-at-index");
        for (int i = 0; i < NUM_BASENAME_PER_GROUP * 2; i++) {
            basenames[i] = BASE_TEST_DIR + "/basen" + i;
            writeAlignment(basenames, i, 'G');
            appendToStats(basenames, i, outTSV);
        }
        specificAlignments = new String[1];
        // write a new alignment that is different from all others:
        specificAlignments[0] = BASE_TEST_DIR + "/align-specific-sample";
        writeAlignment(specificAlignments, 0, 'C');
        appendToStats(specificAlignments, 0, outTSV);

        outTSV.close();

    }

    private static void appendToStats(String[] basenames, int basenameIndex, PrintWriter outTSV)
            throws IOException {
        String tokens = "basename\tread-index\tcount-variation-bases\tbases-at-index/all-variations-bases\tbases-at-index/all-reference-bases\tcount-reference-bases\tcount-reference-bases-at-index\n"
                + "basename0\t1\t2924830\t0.097711641\t-0.004026\t-726568406\t68804818\n"
                + "basename0\t2\t1683737\t0.056249664\t-0.002317\t-726568406\t70050107\n"
                + "basename0\t3\t973088\t0.032508565\t-0.001339\t-726568406\t70793744\n"
                + "basename0\t4\t624546\t0.020864602\t-0.00086\t-726568406\t71124447\n"
                + "basename0\t5\t458581\t0.015320105\t-0.000631\t-726568406\t71275227\n"
                + "basename0\t6\t455194\t0.015206953\t-0.000626\t-726568406\t71268937\n"
                + "basename0\t7\t338682\t0.011314563\t-0.000466\t-726568406\t71383137\n"
                + "basename0\t8\t342731\t0.011449831\t-0.000472\t-726568406\t71377688\n"
                + "basename0\t9\t305846\t0.01021759\t-0.000421\t-726568406\t71414272\n"
                + "basename0\t10\t391497\t0.013078987\t-0.000539\t-726568406\t71328565\n"
                + "basename0\t11\t285087\t0.009524081\t-0.000392\t-726568406\t71434921\n"
                + "basename0\t12\t448654\t0.014988467\t-0.000617\t-726568406\t71271330\n"
                + "basename0\t13\t263024\t0.008787009\t-0.000362\t-726568406\t71456737\n"
                + "basename0\t14\t259276\t0.008661797\t-0.000357\t-726568406\t71460537\n"
                + "basename0\t15\t255673\t0.008541429\t-0.000352\t-726568406\t71463985\n"
                + "basename0\t16\t265280\t0.008862376\t-0.000365\t-726568406\t71454330\n"
                + "basename0\t17\t286275\t0.00956377\t-0.000394\t-726568406\t71433717\n"
                + "basename0\t18\t253905\t0.008482364\t-0.000349\t-726568406\t71466066\n"
                + "basename0\t19\t242706\t0.008108232\t-0.000334\t-726568406\t71477255\n"
                + "basename0\t20\t255099\t0.008522253\t-0.000351\t-726568406\t71464680\n"
                + "basename0\t21\t244071\t0.008153834\t-0.000336\t-726568406\t71475793\n"
                + "basename0\t22\t243361\t0.008130114\t-0.000335\t-726568406\t71476488\n"
                + "basename0\t23\t237500\t0.007934312\t-0.000327\t-726568406\t71482128\n"
                + "basename0\t24\t393818\t0.013156526\t-0.000542\t-726568406\t71325748\n"
                + "basename0\t25\t290470\t0.009703915\t-0.0004\t-726568406\t71429370\n"
                + "basename0\t26\t283203\t0.009461141\t-0.00039\t-726568406\t71436438\n"
                + "basename0\t27\t398203\t0.013303019\t-0.000548\t-726568406\t71321346\n"
                + "basename0\t28\t244253\t0.008159914\t-0.000336\t-726568406\t71475340\n"
                + "basename0\t29\t250658\t0.00837389\t-0.000345\t-726568406\t71469218\n"
                + "basename0\t30\t252561\t0.008437465\t-0.000348\t-726568406\t71466726\n"
                + "basename0\t31\t270395\t0.009033256\t-0.000372\t-726568406\t71449213\n"
                + "basename0\t32\t255815\t0.008546173\t-0.000352\t-726568406\t71463771\n"
                + "basename0\t33\t264515\t0.008836819\t-0.000364\t-726568406\t71455248\n"
                + "basename0\t34\t300673\t0.010044773\t-0.000414\t-726568406\t71419253\n"
                + "basename0\t35\t297241\t0.009930118\t-0.000409\t-726568406\t71422416\n"
                + "basename0\t36\t295019\t0.009855886\t-0.000406\t-726568406\t71424664\n"
                + "basename0\t37\t307167\t0.010261722\t-0.000423\t-726568406\t71412201\n"
                + "basename0\t38\t310107\t0.01035994\t-0.000427\t-726568406\t71409148\n"
                + "basename0\t39\t491224\t0.01641063\t-0.000676\t-726568406\t71228249\n"
                + "basename0\t40\t339371\t0.011337581\t-0.000467\t-726568406\t71380591\n"
                + "basename0\t41\t447670\t0.014955594\t-0.000616\t-726568406\t71273454\n"
                + "basename0\t42\t381186\t0.012734521\t-0.000525\t-726568406\t71343713\n"
                + "basename0\t43\t445198\t0.01487301\t-0.000613\t-726568406\t71287803\n"
                + "basename0\t44\t482766\t0.016128068\t-0.000664\t-726568406\t71264200\n"
                + "basename0\t45\t656445\t0.021930272\t-0.000903\t-726568406\t71075738\n"
                + "basename0\t46\t704999\t0.023552346\t-0.00097\t-726568406\t71016151\n"
                + "basename0\t47\t1049645\t0.035066153\t-0.001445\t-726568406\t70666569\n"
                + "basename0\t48\t1674529\t0.055942047\t-0.002305\t-726568406\t70040028\n"
                + "basename0\t49\t2453779\t0.081974943\t-0.003377\t-726568406\t69260573\n"
                + "basename0\t50\t3343931\t0.111712812\t-0.004602\t-726568406\t68370412\n"
                + "basename0\t51\t1009797\t0.033734925\t-0.00139\t-726568406\t11172400\n";
        StringReader reader = new StringReader(tokens);
        BufferedReader br = new BufferedReader(reader);
        String line;

        // skip header line:
        br.readLine();

        while ((line = br.readLine()) != null) {
            String[] toks = line.split("[\\t]");

            outTSV.print(FilenameUtils.getBaseName(basenames[basenameIndex]));

            for (int i = 1; i < toks.length; i++) {

                outTSV.print("\t");
                outTSV.print(toks[i]);

            }
            outTSV.println();

        }

    }

    private static void writeAlignment(String[] basenames, int basenameIndex, char toBase) throws IOException {

        AlignmentWriter writer = new AlignmentWriterImpl(basenames[basenameIndex]);
        writer.setTargetLengths(new int[] { 10000 });
        writer.setSorted(true);
        writer.setTargetIdentifiersArray(new String[] { "target1" });

        int numAlignmentEntries = 10;
        int readIndex = 0;
        int referencePosition = 1;
        int positionStart = 100;
        writeAlignmentEntries(toBase, writer, numAlignmentEntries, readIndex, referencePosition, positionStart);
        if (toBase == 'C') {
            referencePosition = 1;
            writeAlignmentEntries(toBase, writer, numAlignmentEntries, readIndex, referencePosition, positionStart);
        }
        writer.close();
    }

    private static void writeAlignmentEntries(char toBase, AlignmentWriter writer, int numAlignmentEntries,
            int readIndex, int referencePosition, int positionStart) throws IOException {
        for (int j = 0; j < numAlignmentEntries; j++) {
            final Alignments.AlignmentEntry.Builder builder = Alignments.AlignmentEntry.newBuilder();

            builder.setQueryIndex(readIndex++);
            builder.setTargetIndex(0);

            builder.setQueryLength(50);
            builder.setPosition(referencePosition++ + positionStart);
            builder.setMatchingReverseStrand(j % 2 == 0);
            //  builder.setMatchingReverseStrand(false);

            builder.setScore(50);
            builder.setNumberOfIndels(0);

            builder.setQueryAlignedLength(50);
            int multiplicity = 1;
            builder.setMultiplicity(multiplicity);
            builder.setTargetAlignedLength(50);
            Alignments.SequenceVariation.Builder varBuilder = Alignments.SequenceVariation.newBuilder();
            varBuilder.setFrom("A");
            varBuilder.setTo(Character.toString(toBase));

            varBuilder.setToQuality(ByteString.copyFrom(new byte[] { 40 }));
            varBuilder.setPosition(25);
            varBuilder.setReadIndex(25);

            //   System.out.printf("%s j=%d var A/G at %d%n", basenames[basenameIndex], j, 25 + referencePosition + positionStart - 1);
            builder.addSequenceVariations(varBuilder);
            final Alignments.AlignmentEntry entry = builder.build();
            writer.appendEntry(entry);

        }
    }

}