edu.berkeley.compbio.sequtils.sequencefragmentiterator.SequentialMatePairSFI.java Source code

Java tutorial

Introduction

Here is the source code for edu.berkeley.compbio.sequtils.sequencefragmentiterator.SequentialMatePairSFI.java

Source

/*
 * Copyright (c) 2007-2013  David Soergel  <dev@davidsoergel.com>
 * Licensed under the Apache License, Version 2.0
 * http://www.apache.org/licenses/LICENSE-2.0
 */

package edu.berkeley.compbio.sequtils.sequencefragmentiterator;

import edu.berkeley.compbio.sequtils.NotEnoughSequenceException;
import edu.berkeley.compbio.sequtils.SequenceFragmentMetadata;
import edu.berkeley.compbio.sequtils.sequencereader.SectionList;
import edu.berkeley.compbio.sequtils.strings.SequenceFragment;
import org.apache.commons.lang.NotImplementedException;
import org.apache.log4j.Logger;
import org.jetbrains.annotations.NotNull;

import java.io.IOException;
import java.util.NoSuchElementException;

/**
 * Provides a SequenceFragmentIterator that reads sequential mate-pairs
 *
 * @author David Tulga
 * @version $Id: SequentialMatePairSFI.java 1324 2010-03-08 22:44:49Z soergel $
 */
public class SequentialMatePairSFI extends ScanningSFI {
    // ------------------------------ FIELDS ------------------------------

    private static final Logger logger = Logger.getLogger(SequentialMatePairSFI.class);

    //private SectionList sectionList;

    //private KcountScanner scanner;

    private long charactersRead = 0;

    //   private SequenceFragment theNextKcount = null;

    // -------------------------- STATIC METHODS --------------------------

    public static boolean isMatePair(SequenceFragmentMetadata k1, SequenceFragmentMetadata k2) {
        if (k1 == null || k2 == null) {
            return false;
        }

        String id1 = k1.getSequenceName();
        String id2 = k2.getSequenceName();
        //System.out.println(id1);
        //System.out.println(id2);
        //assert id1.substring(0,id1.length()-2).equals(id2.substring(0,id2.length()-2));
        //assert id1.substring(id1.length()-1,id1.length()).equals(id2.substring(id2.length()-1,id2.length()));
        //assert id1.substring(id1.length()-2,id1.length()-1).equals("b");
        //assert id2.substring(id2.length()-2,id2.length()-1).equals("g");

        String base1 = id1.substring(0, id1.length() - 3);
        String base2 = id2.substring(0, id2.length() - 3);

        char sep1 = id1.charAt(id1.length() - 3);
        char sep2 = id2.charAt(id2.length() - 3);

        char end1 = id1.charAt(id1.length() - 2);
        char end2 = id2.charAt(id2.length() - 2);

        char cloneNumber1 = id1.charAt(id1.length() - 1);
        char cloneNumber2 = id2.charAt(id2.length() - 1);

        if (sep1 != '.') {
            logger.warn("Unknown id format (not mate pair): " + id1);
            return false;
        }
        if (sep2 != '.') {
            logger.warn("Unknown id format (not mate pair): " + id2);
            return false;
        }

        if (base1.equals(base2) && cloneNumber1 == cloneNumber2 && ((end1 == 'b' && end2 == 'g')
                || (end2 == 'b' && end1 == 'g') || (end1 == 'x' && end2 == 'y') || (end2 == 'x' && end1 == 'y'))) {
            return true;
        } else {
            //logger.warn("Not mate pairs: " + id1 + " " + id2);
            return false;
        }
    }

    @NotNull
    public static SequenceFragment joinMatePair(SequenceFragment k1, SequenceFragment k2) {
        // redundant, but so what, it's fast
        String id1 = k1.getSequenceName();
        String base1 = id1.substring(0, id1.length() - 3);
        char sep1 = id1.charAt(id1.length() - 3);
        char cloneNumber1 = id1.charAt(id1.length() - 1);

        SequenceFragment result = k1.plus(k2);
        result.setSequenceName(base1 + sep1 + cloneNumber1);
        // note the length is already set

        return result;
    }

    // --------------------------- CONSTRUCTORS ---------------------------

    public SequentialMatePairSFI(SectionList input) throws IOException {
        super(input);
        //sectionList = input;//new SectionSequenceFragmentIterator(input);
        //this.scanner = scanner;
        //scanner = new ExactKcountScanner();
    }

    // --------------------- GETTER / SETTER METHODS ---------------------

    @Override
    public long getCharactersRead() {
        return charactersRead;
    }

    // ------------------------ INTERFACE METHODS ------------------------

    // --------------------- Interface Iterator ---------------------

    public SequenceFragment next() {
        // return theSectionList.hasNext(); // doesn't guarantee that the next NucleotideKcount is not null

        // we don't use a SectionSequenceFragmentIterator here, because that way we'd unnecessarily count
        // every section whether or not it ends up in a mate pair

        try {

            SequenceFragmentMetadata s1;
            SequenceFragmentMetadata s2 = sectionList.next();
            SequenceFragment result = null;
            while (true) //(s2 != null)
            {
                s1 = s2;
                s2 = sectionList.next();
                if (isMatePair(s1, s2)) {
                    sectionList.seek(s1);
                    SequenceFragment k1 = new SequenceFragment(s1.getParentMetadata(), s1.getSequenceName(),
                            s1.getStartPosition(), sectionList, SequenceFragment.UNKNOWN_LENGTH, spectrumScanner);
                    k1.checkAvailable();
                    //Kcount k1 = scanner.scanSequence(theSectionList, Integer.MAX_VALUE);

                    sectionList.seek(s2);
                    //Kcount k2 = scanner.scanSequence(theSectionList, Integer.MAX_VALUE);
                    SequenceFragment k2 = new SequenceFragment(s1.getParentMetadata(), s1.getSequenceName(),
                            s1.getStartPosition(), sectionList, SequenceFragment.UNKNOWN_LENGTH, spectrumScanner);
                    k2.checkAvailable();
                    result = joinMatePair(k1, k2);
                    break;
                }
                //theNextKcount = joinPotentialMatePair(s1, s2);
            }
            charactersRead += result.getLength();
            return result;
        } catch (IOException e) {
            logger.error("Error", e);
            throw new NoSuchElementException();
        } catch (NotEnoughSequenceException e) {
            // no problem, end of sequence
            throw new NoSuchElementException();
        }
    }

    /*
    public SequenceFragment next()
       {
       if (!hasNext())
     {
     throw new NoSuchElementException();
     }
       SequenceFragment result = theNextKcount;
       theNextKcount = null;
       charactersRead += result.getLength();
       return result;
       }
       */

    // -------------------------- OTHER METHODS --------------------------

    @Override
    public void close() {
        sectionList.close();
    }

    @Override
    public int estimatedTotalSamples() {
        throw new NotImplementedException();
    }

    @Override
    public long getTotalSequence() {
        return sectionList.getTotalSequence();
    }

    /*@Override
    public void releaseCachedResources()
       {
       theSectionList.releaseCachedResources();
       }
    */

    public void reset() //throws IOException
    {
        sectionList.reset();
    }
}