edu.cornell.med.icb.goby.reads.TestReadsWriter.java Source code

Java tutorial

Introduction

Here is the source code for edu.cornell.med.icb.goby.reads.TestReadsWriter.java

Source

/*
 * Copyright (C) 2009-2010 Institute for Computational Biomedicine,
 *                         Weill Medical College of Cornell University
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package edu.cornell.med.icb.goby.reads;

import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.lang.MutableString;
import org.apache.commons.io.FileUtils;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

/**
 * @author Fabien Campagne
 *         Date: Apr 28, 2009
 *         Time: 12:46:25 PM
 */
public class TestReadsWriter {
    @BeforeClass
    public static void setUp() throws IOException {
        FileUtils.forceMkdir(new File("test-results/reads"));
    }

    @Test
    public void testReadWriteSequences() throws IOException {
        testReadWriteSequences(false);
        testReadWriteSequences(true);
    }

    public void testReadWriteSequences(boolean writeMetaData) throws IOException {
        final String[] sequences = { "ACTGCGCGCG", "AAAAATTTTGGGGGCCCCCCC",
                "AAAAATTTTGGGGGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" };
        final String[] descriptions = { "hello world", "descr2", "description 3" };

        final ReadsWriter writer = new ReadsWriterImpl(
                new FileOutputStream(new File("test-results/reads/written-101.bin")));

        int expectedCount = 0;
        writer.setNumEntriesPerChunk(9);
        final int totalNumSeqs = 13;
        for (int i = 0; i < totalNumSeqs; i++) {
            int j = 0;
            for (final String sequence : sequences) {
                writer.setSequence(sequence);
                writer.setDescription(descriptions[j]);
                if (writeMetaData) {
                    writer.appendMetaData("key1", "value1");
                    writer.appendMetaData("key2", "value2");
                }
                writer.appendEntry();
                ++j;
                expectedCount++;
            }
        }

        writer.close();
        writer.printStats(System.out);

        final ReadsReader reader = new ReadsReader(
                new FastBufferedInputStream(new FileInputStream(new File("test-results/reads/written-101.bin"))));
        final MutableString sequence = new MutableString();
        int count = 0;
        while (reader.hasNext()) {
            final Reads.ReadEntry entry = reader.next();
            ReadsReader.decodeSequence(entry, sequence);
            // check that the sequence matches what was encoded:
            assertEquals(sequences[count % sequences.length], sequence.toString());
            assertEquals(descriptions[count % descriptions.length], entry.getDescription());
            assertEquals(count, entry.getReadIndex());
            count++;
        }

        assertEquals(expectedCount, count);
    }

    @Test
    public void testReadFastBufferredOneChunk() throws IOException {
        final String[] sequences = { "ACTGCGCGCG", "AAAAATTTTGGGGGCCCCCCC",
                "AAAAATTTTGGGGGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" };
        final String[] descriptions = { "hello world", "descr2", "description 3" };
        final String filename = "test-results/reads/written-101.bin";

        final ReadsWriter writer = new ReadsWriterImpl(new FileOutputStream(new File(filename)));
        ReadsReader reader;

        int expectedCount = 0;
        writer.setNumEntriesPerChunk(9);
        final int totalNumSeqs = 13;
        for (int i = 0; i < totalNumSeqs; i++) {
            int j = 0;
            for (final String sequence : sequences) {
                writer.setSequence(sequence);
                writer.setDescription(descriptions[j]);
                writer.appendEntry();
                ++j;
                expectedCount++;
            }
        }
        writer.close();

        FastBufferedInputStream stream = new FastBufferedInputStream(FileUtils.openInputStream(new File(filename)));
        // start at 10 and end at 15 should return no sequences at all.
        reader = new ReadsReader(10, 15, stream);
        MutableString sequence = new MutableString();
        assertFalse(reader.hasNext());

        stream = new FastBufferedInputStream(FileUtils.openInputStream(new File(filename)));
        // start at 138 (precise start of a chunk) and end at 139 should return 9 sequences exactly.
        reader = new ReadsReader(138, 139, stream);
        sequence = new MutableString();
        int count = 0;

        while (reader.hasNext()) {
            final Reads.ReadEntry entry = reader.next();
            ReadsReader.decodeSequence(entry, sequence);
            // check that the sequence matches what was encoded:
            assertEquals(sequences[count % sequences.length], sequence.toString());
            assertEquals(descriptions[count % descriptions.length], entry.getDescription());
            //  assertEquals(count+9, entry.getReadIndex());
            //        System.out.println("desc: " +entry.getDescription() );
            count++;
        }
        // should have skipped 2:
        assertEquals(9, count);
    }

    @Test
    public void testReadFastBufferedInChunks() throws IOException {
        final String[] sequences = { "ACTGCGCGCG", "AAAAATTTTGGGGGCCCCCCC",
                "AAAAATTTTGGGGGCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" };
        final String[] descriptions = { "hello world", "descr2", "description 3" };
        final String filename = "test-results/reads/written-101.bin";
        final ReadsWriter writer = new ReadsWriterImpl(new FileOutputStream(new File(filename)));

        int expectedCount = 0;
        writer.setNumEntriesPerChunk(9);
        final int totalNumSeqs = 13;
        for (int i = 0; i < totalNumSeqs; i++) {
            int j = 0;
            for (final String sequence : sequences) {
                writer.setSequence(sequence);
                writer.setDescription(descriptions[j]);
                writer.appendEntry();
                ++j;
                expectedCount++;
            }
        }
        writer.close();

        FastBufferedInputStream stream = new FastBufferedInputStream(FileUtils.openInputStream(new File(filename)));
        writer.printStats(System.out);

        final MutableString sequence = new MutableString();
        int count = 0;

        ReadsReader reader = new ReadsReader(0, Long.MAX_VALUE, stream);

        while (reader.hasNext()) {
            final Reads.ReadEntry entry = reader.next();
            ReadsReader.decodeSequence(entry, sequence);
            // check that the sequence matches what was encoded:
            assertEquals(sequences[count % sequences.length], sequence.toString());
            assertEquals(descriptions[count % descriptions.length], entry.getDescription());
            assertEquals(count, entry.getReadIndex());
            count++;
        }

        assertEquals(expectedCount, count);

        stream.close();

        // now start at position 10, will skip the first collection.
        stream = new FastBufferedInputStream(FileUtils.openInputStream(new File(filename)));
        reader = new ReadsReader(10, Long.MAX_VALUE, stream);

        count = 0;
        while (reader.hasNext()) {
            final Reads.ReadEntry entry = reader.next();
            ReadsReader.decodeSequence(entry, sequence);
            // check that the sequence matches what was encoded:
            assertEquals(sequences[count % sequences.length], sequence.toString());
            assertEquals(descriptions[count % descriptions.length], entry.getDescription());
            //  assertEquals(count+9, entry.getReadIndex());
            count++;
        }

        assertEquals(expectedCount - 9, count);

        // start at 150 is past the start of the second read collection, so this collection is not returned:
        reader = new ReadsReader(150, Long.MAX_VALUE, stream);

        count = 0;
        while (reader.hasNext()) {
            final Reads.ReadEntry entry = reader.next();
            ReadsReader.decodeSequence(entry, sequence);
            // check that the sequence matches what was encoded:
            assertEquals(sequences[count % sequences.length], sequence.toString());
            assertEquals(descriptions[count % descriptions.length], entry.getDescription());
            //  assertEquals(count+9, entry.getReadIndex());
            count++;
        }
        // should have skipped 2:
        assertEquals(expectedCount - 9 * 2, count);

        // start at 0 and end at 150 should return only the second read collection, 9 sequences exactly.
        reader = new ReadsReader(10, 150, stream);

        count = 0;
        while (reader.hasNext()) {
            final Reads.ReadEntry entry = reader.next();
            ReadsReader.decodeSequence(entry, sequence);
            // check that the sequence matches what was encoded:
            assertEquals(sequences[count % sequences.length], sequence.toString());
            assertEquals(descriptions[count % descriptions.length], entry.getDescription());
            //  assertEquals(count+9, entry.getReadIndex());
            count++;
        }

        // should have skipped 2:
        assertEquals(9, count);
    }
}