Java tutorial
/* * Copyright (C) 2009-2012 Institute for Computational Biomedicine, * Weill Medical College of Cornell University * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package edu.cornell.med.icb.goby.alignments; import edu.cornell.med.icb.goby.modes.SamHelper; import edu.cornell.med.icb.goby.readers.FastXEntry; import edu.cornell.med.icb.goby.readers.FastXReader; import edu.cornell.med.icb.goby.reads.QualityEncoding; import edu.cornell.med.icb.goby.reads.RandomAccessSequenceCache; import edu.cornell.med.icb.identifier.DoubleIndexedIdentifier; import it.unimi.dsi.fastutil.bytes.ByteArrayList; import it.unimi.dsi.fastutil.bytes.ByteList; import it.unimi.dsi.fastutil.chars.CharArrayList; import it.unimi.dsi.fastutil.chars.CharList; import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.lang.MutableString; import org.apache.commons.io.FileUtils; import org.junit.Test; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; /** * Test ExportableAlignmentEntryData, a class which assists with exporting from Compact to other formats such * as SAM. */ public class TestExportableAlignmentEntryData { @Test public void testRandomAccess() throws IOException, ClassNotFoundException { final RandomAccessSequenceCache genome = new RandomAccessSequenceCache(); genome.load("test-data/seq-var-test/small-synth.random-access-genome"); final List<Character> refBases = new ArrayList<Character>(); for (int i = 10; i <= 15; i++) { final char base = genome.get(0, i); refBases.add(base); } assertTrue("Incorrect base read from randomSequenceCache", 'G' == refBases.get(0)); assertTrue("Incorrect base read from randomSequenceCache", 'C' == refBases.get(1)); assertTrue("Incorrect base read from randomSequenceCache", 'T' == refBases.get(2)); assertTrue("Incorrect base read from randomSequenceCache", 'G' == refBases.get(3)); assertTrue("Incorrect base read from randomSequenceCache", 'G' == refBases.get(4)); assertTrue("Incorrect base read from randomSequenceCache", 'A' == refBases.get(5)); } @Test public void testMakeExportable() throws IOException, ClassNotFoundException { final RandomAccessSequenceCache genome = new RandomAccessSequenceCache(); genome.load("test-data/seq-var-test/small-synth.random-access-genome"); final Int2ObjectMap<Map<String, String>> samDetailsMap = readSamFileToMap(genome, "test-data/seq-var-test/seq-var-reads-gsnap.sam"); final FastXReader fastqReader = new FastXReader("test-data/seq-var-test/seq-var-reads.fq"); final Map<Integer, ReadsDataEntry> reads = new LinkedHashMap<Integer, ReadsDataEntry>(); int readIndex = 0; for (final FastXEntry fastqEntry : fastqReader) { final ReadsDataEntry entry = new ReadsDataEntry(readIndex++, fastqEntry.getEntryHeader(), fastqEntry.getSequence(), fastqEntry.getQuality(), -64); reads.put(entry.readIndex, entry); } final AlignmentReader reader = new AlignmentReaderImpl( "test-data/seq-var-test/sorted-seq-var-reads-gsnap.entries"); reader.readHeader(); final DoubleIndexedIdentifier targetIdentifiers = new DoubleIndexedIdentifier( reader.getTargetIdentifiers()); final ExportableAlignmentEntryData exportData = new ExportableAlignmentEntryData(genome, QualityEncoding.PHRED, targetIdentifiers); while (reader.hasNext()) { final Alignments.AlignmentEntry alignmentEntry = reader.next(); final ReadsDataEntry actualReadsEntry = reads.get(alignmentEntry.getQueryIndex()); if (actualReadsEntry == null) { fail("Couldn't find actual read for alignmentEntry.queryIndex=" + alignmentEntry.getQueryIndex()); } System.out.printf("Processing queryIndex=%d with description '%s'%n", alignmentEntry.getQueryIndex(), actualReadsEntry.readName); exportData.buildFrom(alignmentEntry, actualReadsEntry.readBases, actualReadsEntry.readQuals); assertFalse(exportData.getInvalidMessage(), exportData.isInvalid()); validateEntry(exportData, samDetailsMap.get(exportData.getQueryIndex())); } } private void validateEntry(final ExportableAlignmentEntryData exportData, final Map<String, String> samData) { final int qi = exportData.getQueryIndex(); if (samData == null) { fail("Couldn't find sam data for queryIndex=" + qi); } assertEquals("Pair flags are incorrect for qi=" + qi, (int) Integer.valueOf(samData.get("pairFlags")), exportData.getPairFlags()); assertEquals("targetIndex incorrect for qi=" + qi, (int) Integer.valueOf(samData.get("targetIndex")), exportData.getTargetIndex()); assertEquals("position incorrect for qi=" + qi, (int) Integer.valueOf(samData.get("position")), exportData.getStartPosition()); assertEquals("mapq incorrect for qi=" + qi, (int) Integer.valueOf(samData.get("mapq")), exportData.getMappingQuality()); assertEquals("cigar incorrect for qi=" + qi, samData.get("cigar"), exportData.getCigarString()); validateSequence(qi, samData.get("read"), exportData.getReadBasesOriginal()); assertEquals("mismatches incorrect for qi=" + qi, samData.get("mismatches"), exportData.getMismatchString()); assertEquals("read length and qual length must match for qi=" + qi, exportData.getReadBasesOriginal().length(), exportData.getReadQualities().size()); } private void validateSequence(final int qi, final String expected, final String actual) { assertEquals("Read lengths don't match qi=" + qi, expected.length(), actual.length()); for (int i = 0; i < expected.length(); i++) { final char actualBase = actual.charAt(i); final char expectedBase = expected.charAt(i); if (actualBase != expectedBase && actualBase != 'N') { assertEquals("Base incorrect at qi=" + qi + " i=" + i, expectedBase, actualBase); } } } private Int2ObjectMap<Map<String, String>> readSamFileToMap(final RandomAccessSequenceCache genome, final String filename) throws IOException { final List<String> lines = FileUtils.readLines(new File(filename)); final Int2ObjectMap<Map<String, String>> result = new Int2ObjectArrayMap<Map<String, String>>(); for (final String line : lines) { if (line.startsWith("@")) { continue; } final String[] parts = line.split("\t"); final String target = parts[2]; if ("*".equals(target)) { continue; } final int queryIndex = Integer.valueOf(parts[0]); final Map<String, String> entry = new HashMap<String, String>(); result.put(queryIndex, entry); entry.put("pairFlags", parts[1]); entry.put("targetIndex", String.valueOf(genome.getReferenceIndex(target))); entry.put("position", parts[3]); entry.put("mapq", parts[4]); entry.put("cigar", parts[5]); entry.put("read", parts[9]); for (int i = 10; i < parts.length; i++) { if (parts[i].startsWith("MD:Z")) { entry.put("mismatches", SamHelper.canonicalMdz(parts[i].substring(5))); break; } } } return result; } private static class ReadsDataEntry { final int readIndex; final String readName; final CharList readBases; final ByteList readQuals; private ReadsDataEntry(final int readIndex, final MutableString readName, final MutableString readBases, final MutableString readQuals, final int qualityOffset) { this.readIndex = readIndex; this.readName = readName.toString(); this.readBases = new CharArrayList(readBases.length()); for (int i = 0; i < readBases.length(); i++) { this.readBases.add(readBases.charAt(i)); } this.readQuals = new ByteArrayList(readQuals.length()); for (int i = 0; i < readQuals.length(); i++) { this.readQuals.add((byte) ((byte) readQuals.charAt(i) + qualityOffset)); } } } }