uk.ac.babraham.SeqMonk.ProbeGenerators.MacsPeakCaller.java Source code

Java tutorial

Introduction

Here is the source code for uk.ac.babraham.SeqMonk.ProbeGenerators.MacsPeakCaller.java

Source

/**
 * Copyright 2012-15 Simon Andrews
 *
 *    This file is part of SeqMonk.
 *
 *    SeqMonk is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    SeqMonk is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with SeqMonk; if not, write to the Free Software
 *    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
package uk.ac.babraham.SeqMonk.ProbeGenerators;

import java.awt.GridBagConstraints;
import java.awt.GridBagLayout;
import java.util.Arrays;
import java.util.Collections;
import java.util.Vector;

import javax.swing.JCheckBox;
import javax.swing.JLabel;
import javax.swing.JList;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextField;
import javax.swing.event.ListSelectionEvent;
import javax.swing.event.ListSelectionListener;

import org.apache.commons.math3.distribution.BinomialDistribution;
import org.apache.commons.math3.distribution.PoissonDistribution;

import uk.ac.babraham.SeqMonk.SeqMonkApplication;
import uk.ac.babraham.SeqMonk.SeqMonkException;
import uk.ac.babraham.SeqMonk.Analysis.Statistics.SimpleStats;
import uk.ac.babraham.SeqMonk.DataTypes.DataCollection;
import uk.ac.babraham.SeqMonk.DataTypes.DataStore;
import uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome;
import uk.ac.babraham.SeqMonk.DataTypes.Genome.Location;
import uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe;
import uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet;
import uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceRead;
import uk.ac.babraham.SeqMonk.Dialogs.Renderers.TypeColourRenderer;
import uk.ac.babraham.SeqMonk.Utilities.LongVector;
import uk.ac.babraham.SeqMonk.Utilities.NumberKeyListener;

public class MacsPeakCaller extends ProbeGenerator implements Runnable, ListSelectionListener {

    public boolean cancel = false;

    /** The options panel. */
    private JPanel optionPanel = null;

    private JList chipStoresList;
    private JList inputStoresList;
    private DataStore[] selectedChIPStores;
    private DataStore[] selectedInputStores;
    private JTextField pValueField;
    private double pValue = 0.00001d;
    private JCheckBox skipDeduplicationBox;

    private JTextField fragmentSizeField;
    private int fragmentSize = 600; // Average selected fragment size

    private double minFoldEnrichment = 20;
    private double maxFoldEnrichment = 100;

    public MacsPeakCaller(DataCollection collection) {
        super(collection);
    }

    public void cancel() {
        cancel = true;
    }

    public boolean requiresExistingProbeSet() {
        return false;
    }

    public JPanel getOptionsPanel() {
        if (optionPanel != null) {
            // We've done this already
            return optionPanel;
        }

        optionPanel = new JPanel();
        optionPanel.setLayout(new GridBagLayout());
        GridBagConstraints gbc = new GridBagConstraints();
        gbc.gridx = 1;
        gbc.gridy = 1;
        gbc.weightx = 0.5;
        gbc.weighty = 0.1;
        gbc.fill = GridBagConstraints.BOTH;

        optionPanel.add(new JLabel("ChIP Stores to use"), gbc);

        gbc.gridy++;
        gbc.weighty = 0.5;

        DataStore[] stores = collection.getAllDataStores();

        chipStoresList = new JList(stores);
        chipStoresList.getSelectionModel().addListSelectionListener(this);
        chipStoresList.setCellRenderer(new TypeColourRenderer());
        optionPanel.add(new JScrollPane(chipStoresList), gbc);

        gbc.gridy++;
        gbc.weighty = 0.1;

        optionPanel.add(new JLabel("Input Stores to use"), gbc);

        gbc.gridy++;
        gbc.weighty = 0.5;

        inputStoresList = new JList(stores);
        inputStoresList.getSelectionModel().addListSelectionListener(this);
        inputStoresList.setCellRenderer(new TypeColourRenderer());
        optionPanel.add(new JScrollPane(inputStoresList), gbc);

        gbc.gridy++;

        JPanel bottomPanel = new JPanel();
        bottomPanel.setLayout(new GridBagLayout());
        GridBagConstraints bgbc = new GridBagConstraints();
        bgbc.gridx = 0;
        bgbc.gridy = 0;
        bgbc.weightx = 0.5;
        bgbc.weighty = 0.5;
        bgbc.fill = GridBagConstraints.HORIZONTAL;

        bottomPanel.add(new JLabel("P-value cutoff "), bgbc);
        pValueField = new JTextField("" + pValue);
        pValueField.addKeyListener(new NumberKeyListener(true, false, 1));
        bgbc.gridx = 1;
        bottomPanel.add(pValueField, bgbc);

        bgbc.gridx = 0;
        bgbc.gridy++;

        bottomPanel.add(new JLabel("Sonicated fragment size "), bgbc);
        fragmentSizeField = new JTextField("300");
        fragmentSizeField.addKeyListener(new NumberKeyListener(false, false));
        bgbc.gridx = 1;
        bottomPanel.add(fragmentSizeField, bgbc);

        bgbc.gridx = 0;
        bgbc.gridy++;

        bottomPanel.add(new JLabel("Skip deduplication step "), bgbc);
        skipDeduplicationBox = new JCheckBox();
        bgbc.gridx = 1;
        bottomPanel.add(skipDeduplicationBox, bgbc);

        optionPanel.add(bottomPanel, gbc);
        return optionPanel;
    }

    public boolean isReady() {
        try {
            pValue = Double.parseDouble(pValueField.getText());
            fragmentSize = Integer.parseInt(fragmentSizeField.getText());

            Object[] o = chipStoresList.getSelectedValues();
            if (o == null || o.length == 0) {
                throw new SeqMonkException("No selected ChIP stores");
            }
            selectedChIPStores = new DataStore[o.length];
            for (int i = 0; i < o.length; i++) {
                selectedChIPStores[i] = (DataStore) o[i];
            }

            o = inputStoresList.getSelectedValues();
            // It's OK to have no input stores
            if (o == null) {
                throw new SeqMonkException("No selected input stores");
            }
            selectedInputStores = new DataStore[o.length];
            for (int i = 0; i < o.length; i++) {
                selectedInputStores[i] = (DataStore) o[i];
            }

        } catch (Exception ex) {
            optionsNotReady();
            return false;
        }

        optionsReady();
        return true;
    }

    public String toString() {
        return "MACS peak caller";
    }

    public void generateProbes() {
        if (!isReady()) {
            generationCancelled();
        }
        cancel = false;
        Thread t = new Thread(this);
        t.start();
    }

    /**
     * Gets a text description of the current set of options.
     * 
     * @return Text describing the current options
     */
    private String getDescription() {
        StringBuffer b = new StringBuffer();
        b.append("MACS peaks using chip samples ");

        for (int i = 0; i < selectedChIPStores.length; i++) {
            b.append(selectedChIPStores[i].name());
            if (i < selectedChIPStores.length - 1) {
                b.append(",");
            }
        }

        if (selectedInputStores.length > 0) {
            b.append(" and input samples ");
            for (int i = 0; i < selectedInputStores.length; i++) {
                b.append(selectedInputStores[i].name());
                if (i < selectedInputStores.length - 1) {
                    b.append(",");
                }
            }

        } else {
            b.append(" and no input samples");
        }

        b.append(". Fragment size was ");
        b.append(fragmentSize);
        b.append(". Significance threshold was ");
        b.append(pValue);
        //b.append(".");
        if (skipDeduplicationBox.isSelected()) {
            b.append(". Deduplication step skipped");
        }

        return b.toString();
    }

    public void run() {

        //      for (int i=0;i<selectedChIPStores.length;i++) {
        //         System.err.println("Selcted ChIP="+selectedChIPStores[i]);
        //      }
        //      for (int i=0;i<selectedInputStores.length;i++) {
        //         System.err.println("Selcted Input="+selectedInputStores[i]);
        //      }

        // First find the tag offsets between the watson and crick strands

        // Work out the total average coverage for all of the combined ChIP samples
        long totalChIPCoverage = 0;

        for (int i = 0; i < selectedChIPStores.length; i++) {
            totalChIPCoverage += selectedChIPStores[i].getTotalReadLength();
        }

        if (cancel) {
            generationCancelled();
            return;
        }

        double averageChIPCoveragePerBase = totalChIPCoverage / (double) collection.genome().getTotalGenomeLength();

        double lowerCoverage = averageChIPCoveragePerBase * minFoldEnrichment;
        double upperCoverage = averageChIPCoveragePerBase * maxFoldEnrichment;

        System.err.println("Coverage range for high confidence peaks is " + lowerCoverage + " - " + upperCoverage);

        // Now we go through the data to find locations for our high confidence peaks so we can
        // randomly select 1000 of these to use to find the offset between the two strands

        Chromosome[] chromosomes = collection.genome().getAllChromosomes();

        Vector<Probe> potentialHighConfidencePeaks = new Vector<Probe>();

        for (int c = 0; c < chromosomes.length; c++) {

            if (cancel) {
                generationCancelled();
                return;
            }

            // Time for an update
            updateGenerationProgress("Finding high confidence peaks on chromosome " + chromosomes[c].name(), c,
                    chromosomes.length);

            Probe lastValidProbe = null;

            for (int startPosition = 1; startPosition < chromosomes[c].length()
                    - fragmentSize; startPosition += fragmentSize / 2) {

                // See if we need to quit
                if (cancel) {
                    generationCancelled();
                    return;
                }

                long totalLength = 0;
                Probe probe = new Probe(chromosomes[c], startPosition, startPosition + fragmentSize);

                for (int s = 0; s < selectedChIPStores.length; s++) {
                    long[] reads = selectedChIPStores[s].getReadsForProbe(probe);
                    for (int j = 0; j < reads.length; j++) {
                        totalLength += SequenceRead.length(reads[j]);
                    }
                }

                if (totalLength >= (lowerCoverage * probe.length())
                        && totalLength <= upperCoverage * probe.length()) {

                    // We have a potential high quality peak.

                    // See if we can merge it with the last valid probe

                    if (lastValidProbe != null
                            && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {

                        lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());

                    } else if (lastValidProbe != null) {

                        // Check that the overall density over the region falls within our limits
                        totalLength = 0;
                        for (int s = 0; s < selectedChIPStores.length; s++) {
                            long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
                            for (int j = 0; j < reads.length; j++) {
                                totalLength += SequenceRead.length(reads[j]);
                            }
                        }

                        if (totalLength >= (lowerCoverage * lastValidProbe.length())
                                && totalLength <= upperCoverage * lastValidProbe.length()) {
                            potentialHighConfidencePeaks.add(lastValidProbe);
                        }

                        lastValidProbe = probe;
                    } else {
                        lastValidProbe = probe;
                    }
                }

            }

            if (lastValidProbe != null) {
                long totalLength = 0;
                for (int s = 0; s < selectedChIPStores.length; s++) {
                    long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
                    for (int j = 0; j < reads.length; j++) {
                        totalLength += SequenceRead.length(reads[j]);
                    }
                }

                if (totalLength >= (lowerCoverage * lastValidProbe.length())
                        && totalLength <= upperCoverage * lastValidProbe.length()) {
                    potentialHighConfidencePeaks.add(lastValidProbe);
                }
            }
        }

        if (potentialHighConfidencePeaks.size() == 0) {

            JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No high confidence peaks found",
                    "Quitting generator", JOptionPane.INFORMATION_MESSAGE);
            generationCancelled();
            return;
        }

        //      System.err.println("Found "+potentialHighConfidencePeaks.size()+" high confidence peaks");

        // Now we select 1000 random probes from this set
        Probe[] highConfidencePeaks = potentialHighConfidencePeaks.toArray(new Probe[0]);

        Collections.shuffle(Arrays.asList(highConfidencePeaks));

        Probe[] randomHighConfidenceProbes = new Probe[Math.min(highConfidencePeaks.length, 1000)];

        for (int i = 0; i < randomHighConfidenceProbes.length; i++) {
            randomHighConfidenceProbes[i] = highConfidencePeaks[i];
        }

        // Now find the average distance between forward / reverse reads in the candidate peaks

        int[] distances = new int[highConfidencePeaks.length];

        // Sort the candidates so we don't do stupid stuff with the cache
        Arrays.sort(randomHighConfidenceProbes);

        for (int p = 0; p < randomHighConfidenceProbes.length; p++) {

            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }

            distances[p] = getInterStrandDistance(randomHighConfidenceProbes[p], selectedChIPStores);
        }

        int medianInterStrandDistance = (int) SimpleStats.median(distances);

        if (medianInterStrandDistance < 0)
            medianInterStrandDistance = 0;

        //      System.err.println("Median inter strand difference = "+medianInterStrandDistance);

        // Now we find the depth cutoff for overrepresented single tags using a binomial distribution
        int totalReadCount = 0;
        for (int i = 0; i < selectedChIPStores.length; i++) {
            totalReadCount += selectedChIPStores[i].getTotalReadCount();
        }

        BinomialDistribution bin = new BinomialDistribution(totalReadCount,
                1d / collection.genome().getTotalGenomeLength());

        // We want to know what depth has a chance of less than 1^-5
        int redundantThreshold = bin.inverseCumulativeProbability(1 - 0.00001d);

        if (redundantThreshold < 1)
            redundantThreshold = 1;

        //      System.err.println("Redundancy threshold is "+redundantThreshold);

        // Now we construct a poisson distribution to work out the threshold to use for
        // constructing a full candidate peak set.

        updateGenerationProgress("Counting non-redundant reads", 0, 1);

        // To do this we need to get the full non-redundant length from the whole set
        int totalNonRedCount = getNonRedundantReadCount(selectedChIPStores, redundantThreshold);

        //      System.err.println("Total non-redundant sequences is "+totalNonRedCount);

        // We need to know the median read length for the data
        int readLength = 0;
        for (int i = 0; i < selectedChIPStores.length; i++) {
            readLength += selectedChIPStores[i].getTotalReadLength() / selectedChIPStores[i].getTotalReadCount();
        }
        readLength /= selectedChIPStores.length;

        double expectedCountsPerWindow = getExpectedCountPerWindow(totalNonRedCount,
                collection.genome().getTotalGenomeLength(), fragmentSize, readLength);

        PoissonDistribution poisson = new PoissonDistribution(expectedCountsPerWindow);

        int readCountCutoff = poisson.inverseCumulativeProbability(1 - pValue);

        //      System.err.println("Threshold for enrichment in a window is "+readCountCutoff+" reads using a p-value of "+pValue+" and a mean of "+(totalNonRedCount/(collection.genome().getTotalGenomeLength()/(double)fragmentSize)));

        // Now we go back through the whole dataset to do a search for all possible candidate probes 

        // We re-use the peak vector we came up with before.
        potentialHighConfidencePeaks.clear();

        for (int c = 0; c < chromosomes.length; c++) {
            // Time for an update
            updateGenerationProgress("Finding candidate peaks on chromosome " + chromosomes[c].name(), c,
                    chromosomes.length);

            Probe lastValidProbe = null;

            for (int startPosition = 1; startPosition < chromosomes[c].length()
                    - fragmentSize; startPosition += fragmentSize / 2) {

                // See if we need to quit
                if (cancel) {
                    generationCancelled();
                    return;
                }

                // We expand the region we're looking at by the inter-strand distance as we're going to
                // be adjusting the read positions
                Probe probe = new Probe(chromosomes[c], startPosition, (startPosition + fragmentSize - 1));

                long[] mergedProbeReads = getReadsFromDataStoreCollection(probe, selectedChIPStores,
                        medianInterStrandDistance);

                mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);

                SequenceRead.sort(mergedProbeReads);

                int thisProbeOverlapCount = 0;
                for (int i = 0; i < mergedProbeReads.length; i++) {
                    if (SequenceRead.overlaps(mergedProbeReads[i], probe.packedPosition())) {
                        ++thisProbeOverlapCount;
                    }
                }

                if (thisProbeOverlapCount > readCountCutoff) {

                    // We have a potential high quality peak.

                    // See if we can merge it with the last valid probe

                    if (lastValidProbe != null
                            && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {

                        lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());

                    } else if (lastValidProbe != null) {
                        potentialHighConfidencePeaks.add(lastValidProbe);
                        lastValidProbe = probe;
                    } else {
                        lastValidProbe = probe;
                    }
                }

            }

            if (lastValidProbe != null) {
                potentialHighConfidencePeaks.add(lastValidProbe);
            }
        }

        // Finally we re-filter the peaks we have using local poisson distributions with densities taken
        // from either the input samples (if there are any), or the local region.  The densities are 
        // estimated over 1,5 and 10kb around the peak and genome wide and the max of these is taken.
        // If there is no input then the 1kb region is not used.

        Probe[] allCandidateProbes = potentialHighConfidencePeaks.toArray(new Probe[0]);

        // Work out which stores we're using to validate against.
        DataStore[] validationStores;
        boolean useInput = false;
        double inputCorrection = 1;

        int validationNonRedCount;

        if (selectedInputStores != null && selectedInputStores.length > 0) {

            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }

            validationStores = selectedInputStores;
            useInput = true;

            // We also need to work out the total number of nonredundant seqences
            // in the input so we can work out a scaling factor so that the densities
            // for input and chip are comparable.
            validationNonRedCount = getNonRedundantReadCount(validationStores, redundantThreshold);

            inputCorrection = totalNonRedCount / (double) validationNonRedCount;

            System.err.println("From chip=" + totalNonRedCount + " input=" + validationNonRedCount
                    + " correction is " + inputCorrection);

        } else {
            validationStores = selectedChIPStores;
            validationNonRedCount = totalNonRedCount;
        }

        Vector<Probe> finalValidatedProbes = new Vector<Probe>();

        for (int p = 0; p < allCandidateProbes.length; p++) {

            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }

            if (p % 100 == 0) {
                updateGenerationProgress("Validated " + p + " out of " + allCandidateProbes.length + " raw peaks",
                        p, allCandidateProbes.length);
            }

            //         System.err.println("Validating "+allCandidateProbes[p].chromosome()+":"+allCandidateProbes[p].start()+"-"+allCandidateProbes[p].end());

            // We now need to find the maximum read density per 2*bandwidth against which
            // we're going to validate this peak

            // We're going to get all reads within 10kb of the peak, and then we can subselect from there

            int midPoint = allCandidateProbes[p].middle();

            Probe region10kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 5000, 1),
                    Math.min(midPoint + 4999, allCandidateProbes[p].chromosome().length()),
                    allCandidateProbes[p].strand());
            Probe region5kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 2500, 1),
                    Math.min(midPoint + 2499, allCandidateProbes[p].chromosome().length()),
                    allCandidateProbes[p].strand());
            Probe region1kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 500, 1),
                    Math.min(midPoint + 499, allCandidateProbes[p].chromosome().length()),
                    allCandidateProbes[p].strand());

            // Get the probes for the largest region
            long[] thisRegionReads = getReadsFromDataStoreCollection(region10kb, validationStores, 0);

            // Deduplicate so it's a fair comparison
            thisRegionReads = deduplicateReads(thisRegionReads, redundantThreshold); // Should we recalculate the redundant threshold based on the input coverage?

            int region10kbcount = thisRegionReads.length;
            int region5kbcount = 0;
            int region1kbcount = 0;

            // Go through the reads seeing if they fit into the 5 or 1kb regions
            for (int r = 0; r < thisRegionReads.length; r++) {
                if (SequenceRead.overlaps(region5kb.packedPosition(), thisRegionReads[r]))
                    ++region5kbcount;
                if (SequenceRead.overlaps(region1kb.packedPosition(), thisRegionReads[r]))
                    ++region1kbcount;
            }

            //         System.err.println("Input counts 10kb="+region10kbcount+" 5kb="+region5kbcount+" 1kb="+region1kbcount);

            // Convert to densities per window and ajdust for global coverage

            double globalDensity = getExpectedCountPerWindow(validationNonRedCount,
                    collection.genome().getTotalGenomeLength(), allCandidateProbes[p].length(), readLength)
                    * inputCorrection;
            double density10kb = getExpectedCountPerWindow(region10kbcount, region10kb.length(),
                    allCandidateProbes[p].length(), readLength) * inputCorrection;
            double density5kb = getExpectedCountPerWindow(region5kbcount, region5kb.length(),
                    allCandidateProbes[p].length(), readLength) * inputCorrection;
            double density1kb = getExpectedCountPerWindow(region1kbcount, region1kb.length(),
                    allCandidateProbes[p].length(), readLength) * inputCorrection;

            // Find the highest density to use for the validation
            double highestDensity = globalDensity;
            if (density10kb > highestDensity)
                highestDensity = density10kb;
            if (density5kb > highestDensity)
                highestDensity = density5kb;
            if (useInput && density1kb > highestDensity)
                highestDensity = density1kb;

            //         System.err.println("Global="+globalDensity+" 10kb="+density10kb+" 5kb="+density5kb+" 1kb="+density1kb+" using="+highestDensity);

            // Construct a poisson distribution with this density
            PoissonDistribution localPoisson = new PoissonDistribution(highestDensity);

            //         System.err.println("Cutoff from global="+(new PoissonDistribution(globalDensity)).inverseCumulativeProbability(1-pValue)+" 10kb="+(new PoissonDistribution(density10kb)).inverseCumulativeProbability(1-pValue)+" 5kb="+(new PoissonDistribution(density5kb)).inverseCumulativeProbability(1-pValue)+" 1kb="+(new PoissonDistribution(density1kb)).inverseCumulativeProbability(1-pValue));
            // Now check to see if the actual count from this peak is enough to still pass
            long[] mergedProbeReads = getReadsFromDataStoreCollection(allCandidateProbes[p], selectedChIPStores,
                    medianInterStrandDistance);
            mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);

            SequenceRead.sort(mergedProbeReads);

            int thisProbeOverlapCount = 0;
            for (int i = 0; i < mergedProbeReads.length; i++) {
                if (SequenceRead.overlaps(mergedProbeReads[i], allCandidateProbes[p].packedPosition())) {
                    ++thisProbeOverlapCount;
                }
            }

            //         System.err.println("Read count in ChIP is "+thisProbeOverlapCount);

            if (thisProbeOverlapCount > localPoisson.inverseCumulativeProbability(1 - pValue)) {
                finalValidatedProbes.add(allCandidateProbes[p]);
                //            System.err.println("Adding probe to final set");
            }

        }

        //      System.err.println("From "+allCandidateProbes.length+" candidates "+finalValidatedProbes.size()+" peaks were validated");

        ProbeSet finalSet = new ProbeSet(getDescription(), finalValidatedProbes.toArray(new Probe[0]));

        generationComplete(finalSet);

    }

    private double getExpectedCountPerWindow(int totalCount, long totalLength, int windowSize, int readLength) {

        // Basic count is simply the count divided by the number of
        // windows

        double expectedCount = totalCount / (totalLength / (double) windowSize);

        // We now need to account for how many reads are going to span more than
        // one window
        double readOverlap = (readLength - 1d) / windowSize;

        // We can now multiply this by the expected count and add that to the expected
        // value to get the true expected value

        expectedCount += (expectedCount * readOverlap);

        return expectedCount;
    }

    private long[] deduplicateReads(long[] reads, int threshold) {

        if (skipDeduplicationBox.isSelected())
            return reads;

        //      System.err.println("Threshold is "+threshold);

        LongVector passed = new LongVector();

        int lastReadCount = 0;

        if (reads.length > 0) {
            lastReadCount = 1;
        }

        for (int r = 1; r < reads.length; r++) {
            if (reads[r] == reads[r - 1]) {
                ++lastReadCount;
            } else {
                for (int i = 0; i < Math.min(lastReadCount, threshold); i++) {
                    passed.add(reads[r - 1]);
                }
                lastReadCount = 1;
            }
        }

        for (int i = 0; i < Math.min(lastReadCount, threshold); i++) {
            passed.add(reads[reads.length - 1]);
        }

        return passed.toArray();
    }

    private int getNonRedundantReadCount(DataStore[] stores, int threshold) {

        int totalCount = 0;

        Chromosome[] chromosomes = collection.genome().getAllChromosomes();

        for (int c = 0; c < chromosomes.length; c++) {

            long[] mergedChrReads = getReadsFromDataStoreCollection(chromosomes[c], stores, 0);

            //         System.err.println("Found "+mergedChrReads.length+" raw reads on "+chromosomes[c]);
            SequenceRead.sort(mergedChrReads);

            mergedChrReads = deduplicateReads(mergedChrReads, threshold);
            //         System.err.println("Found "+mergedChrReads.length+" deduplicated reads on "+chromosomes[c]);

            totalCount += mergedChrReads.length;
        }

        return totalCount;

    }

    private long[] getReadsFromDataStoreCollection(Probe probe, DataStore[] stores, int strandOffset) {

        Probe p = probe;
        if (strandOffset > 0) {
            p = new Probe(probe.chromosome(), Math.max(probe.start() - strandOffset, 1),
                    Math.min(probe.end() + strandOffset, probe.chromosome().length()));
        }
        long[][] allProbeReads = new long[stores.length][];
        for (int d = 0; d < stores.length; d++) {
            allProbeReads[d] = stores[d].getReadsForProbe(p);
        }

        int totalProbeReadCount = 0;
        for (int i = 0; i < allProbeReads.length; i++) {
            totalProbeReadCount += allProbeReads[i].length;
        }
        long[] mergedChrReads = new long[totalProbeReadCount];

        int index = 0;
        for (int i = 0; i < allProbeReads.length; i++) {
            for (int j = 0; j < allProbeReads[i].length; j++) {
                if (strandOffset == 0 || SequenceRead.strand(allProbeReads[i][j]) == Location.UNKNOWN) {
                    mergedChrReads[index] = allProbeReads[i][j];
                } else if (SequenceRead.strand(allProbeReads[i][j]) == Location.FORWARD) {
                    mergedChrReads[index] = SequenceRead.packPosition(
                            Math.min(SequenceRead.start(allProbeReads[i][j]) + strandOffset,
                                    p.chromosome().length()),
                            Math.min(SequenceRead.end(allProbeReads[i][j]) + strandOffset, p.chromosome().length()),
                            SequenceRead.strand(allProbeReads[i][j]));
                } else if (SequenceRead.strand(allProbeReads[i][j]) == Location.REVERSE) {
                    mergedChrReads[index] = SequenceRead.packPosition(
                            Math.max(SequenceRead.start(allProbeReads[i][j]) - strandOffset, 1),
                            Math.max(SequenceRead.end(allProbeReads[i][j]) - strandOffset, 1),
                            SequenceRead.strand(allProbeReads[i][j]));
                }
                index++;
            }
            allProbeReads[i] = null;
        }

        return mergedChrReads;

    }

    private long[] getReadsFromDataStoreCollection(Chromosome c, DataStore[] stores, int strandOffset) {

        long[][] allChrReads = new long[stores.length][];
        for (int d = 0; d < stores.length; d++) {
            allChrReads[d] = stores[d].getReadsForChromosome(c);
        }

        int totalProbeReadCount = 0;
        for (int i = 0; i < allChrReads.length; i++) {
            totalProbeReadCount += allChrReads[i].length;
        }
        long[] mergedChrReads = new long[totalProbeReadCount];

        int index = 0;
        for (int i = 0; i < allChrReads.length; i++) {
            for (int j = 0; j < allChrReads[i].length; j++) {
                if (strandOffset == 0 || SequenceRead.strand(allChrReads[i][j]) == Location.UNKNOWN) {
                    mergedChrReads[index] = allChrReads[i][j];
                } else if (SequenceRead.strand(allChrReads[i][j]) == Location.FORWARD) {
                    mergedChrReads[index] = SequenceRead.packPosition(
                            Math.min(SequenceRead.start(allChrReads[i][j]) + strandOffset, c.length()),
                            Math.min(SequenceRead.end(allChrReads[i][j]) + strandOffset, c.length()),
                            SequenceRead.strand(allChrReads[i][j]));
                } else if (SequenceRead.strand(allChrReads[i][j]) == Location.REVERSE) {
                    mergedChrReads[index] = SequenceRead.packPosition(
                            Math.max(SequenceRead.start(allChrReads[i][j]) - strandOffset, 1),
                            Math.max(SequenceRead.end(allChrReads[i][j]) - strandOffset, 1),
                            SequenceRead.strand(allChrReads[i][j]));
                }
                index++;
            }
            allChrReads[i] = null;
        }

        return mergedChrReads;

    }

    private int getInterStrandDistance(Probe p, DataStore[] stores) {

        int[] forwardCounts = new int[p.length()];
        int[] reverseCounts = new int[p.length()];

        //      System.err.println("Getting reads for region "+p.chromosome()+":"+p.start()+"-"+p.end());

        for (int d = 0; d < stores.length; d++) {
            long[] reads = stores[d].getReadsForProbe(p);
            for (int r = 0; r < reads.length; r++) {

                //            System.err.println("Looking at read from "+SequenceRead.start(reads[r])+"-"+SequenceRead.end(reads[r]));

                if (SequenceRead.strand(reads[r]) == Location.FORWARD) {
                    for (int pos = SequenceRead.start(reads[r]); pos <= SequenceRead.end(reads[r]); pos++) {

                        int index = pos - p.start();

                        //                  System.err.println("For pos="+pos+" index="+index+" from start="+p.start());

                        if (index >= 0 && index < forwardCounts.length) {
                            forwardCounts[index]++;
                        }
                        //                  else {
                        //                     System.err.println("Skipping pos "+index+" from "+pos+" with start "+p.start());
                        //                  }
                    }
                } else if (SequenceRead.strand(reads[r]) == Location.REVERSE) {
                    for (int pos = SequenceRead.start(reads[r]); pos <= SequenceRead.end(reads[r]); pos++) {
                        int index = pos - p.start();

                        //                  System.err.println("Rev pos="+pos+" index="+index+" from start="+SequenceRead.start(reads[r]));

                        if (index >= 0 && index < reverseCounts.length) {
                            reverseCounts[index]++;
                        }
                        //                  else {
                        //                     System.err.println("Skipping pos "+index+" from "+pos+" with start "+SequenceRead.start(reads[r]));
                        //                  }
                    }
                }
            }
        }

        // Now find the max depth for each of the forward and reverse sets
        int maxForIndex = 0;
        int maxForCount = 0;
        int maxRevIndex = 0;
        int maxRevCount = 0;

        for (int i = 0; i < forwardCounts.length; i++) {

            //         System.err.println("Position "+i+" for="+forwardCounts[i]+" rev="+reverseCounts[i]);

            if (forwardCounts[i] > maxForCount) {
                maxForCount = forwardCounts[i];
                maxForIndex = i;
            }
            if (reverseCounts[i] > maxRevCount) {
                maxRevCount = reverseCounts[i];
                maxRevIndex = i;
            }
        }

        //      System.err.println("For= "+maxForCount+" at "+maxForIndex+" Rev="+maxRevCount+" at "+maxRevIndex);

        return maxRevIndex - maxForIndex;
    }

    public void valueChanged(ListSelectionEvent arg0) {
        isReady();
    }

}