ubic.basecode.bio.geneset.GeneAnnotations.java Source code

Java tutorial

Introduction

Here is the source code for ubic.basecode.bio.geneset.GeneAnnotations.java

Source

/*
 * The baseCode project
 * 
 * Copyright (c) 2006 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.basecode.bio.geneset;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.swing.table.AbstractTableModel;
import javax.swing.table.TableModel;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.basecode.util.CancellationException;
import ubic.basecode.util.FileTools;
import ubic.basecode.util.StatusViewer;
import ubic.basecode.util.StringUtil;

/**
 * Reads tab-delimited file to create maps of probes to classes, classes to probes, probes to genes, genes to probes.
 * <p>
 * Maintains the following important data structures, all derived from the input file:
 * <ol>
 * <li>probe-&gt;Classes -- each value is a Set of the Classes that a probe belongs to.
 * <li>Classes-&gt;probe -- each value is a Set of the probes that belong to a class
 * <li>probe-&gt;gene -- each value is the gene name corresponding to the probe.
 * <li>gene-&gt;list of probes -- each value is a list of probes corresponding to a gene
 * <li>probe-&gt;description -- each value is a text description of the probe (actually...of the gene)
 * </ol>
 * 
 * @author Paul Pavlidis
 * @author Shamhil Merchant
 * @author Homin Lee
 * @version $Id: GeneAnnotations.java,v 1.21 2010/05/07 00:07:10 paul Exp $
 */
public class GeneAnnotations {

    /**
     * 
     */
    public static final int AFFYCSV = 1;

    /**
     * 
     */
    public static final int AGILENT = 2;

    /**
     * 
     */
    public static final int DEFAULT = 0;

    /**
     * Whether to filter out probes that hit more than one gene
     */
    public static final boolean DEFAULT_FILTER_NONSPECIFIC = false;

    /**
     * The minimum size of a 'set' of genes.
     */
    private static final int ABSOLUTE_MINIMUM_GENESET_SIZE = 2;

    /**
     * String used to indicate a gene has no description associated with it.
     */
    private static final String NO_DESCRIPTION = "[No description]";

    /**
     * The maximum size of gene sets ever considered.
     */
    private static final int PRACTICAL_MAXIMUM_GENESET_SIZE = 5000;

    private static Log log = LogFactory.getLog(GeneAnnotations.class.getName());
    private Map<String, Collection<String>> geneSetToGeneMap; // stores Classes->genes map
    private Map<String, Collection<String>> geneSetToProbeMap; // stores Classes->probes map
    private Map<String, Collection<String>> geneSetToRedundantMap;
    private Map<String, Collection<String>> geneToGeneSetMap;
    private Map<String, Collection<String>> geneToProbeMap;
    private StatusViewer messenger;
    private Map<String, String> probeToDescription;
    private Map<String, String> probeToGeneName;
    private Map<String, Collection<String>> probeToGeneSetMap;

    private List<String> selectedProbes;

    private List<String> selectedSets;
    private List<String> sortedGeneSets;
    private Map<String, Collection<String>> oldGeneSets;
    private int tick = 0;

    private Pattern pipePattern = Pattern.compile("\\s*[\\s\\|,]\\s*");

    private Collection<String> activeProbes = null;

    private Map<String, Collection<String>> classToActiveProbeCache = new HashMap<String, Collection<String>>();

    private Collection<String> genesForActiveProbesCache;

    private Map<String, Collection<String>> geneToActiveProbesCache = new HashMap<String, Collection<String>>();

    private Map<String, Collection<String>> geneSetActiveGenesCache = new HashMap<String, Collection<String>>();

    private Map<String, Collection<String>> geneSetActiveProbesCache = new HashMap<String, Collection<String>>();

    private Collection<String> activeGeneSetCache;

    private boolean filterNonSpecific = DEFAULT_FILTER_NONSPECIFIC;

    public GeneAnnotations() {
        this.setUpdataStructures();
    }

    /**
     * @param goNames This is for creating GeneAnnotations by pruning a copy.
     * @param geneData GeneAnnotations copy to prune from
     * @param activeProbes Set only include these probes
     */
    public GeneAnnotations(GeneAnnotations geneData, Set<String> activeProbes) {

        if (activeProbes == null || geneData == null)
            throw new IllegalArgumentException("GeneAnnotations can't be constructed from null data");

        this.activeProbes = activeProbes;
        activeProbesDirty();

        // make a deep copy of the probeToGeneSetMap
        this.probeToGeneSetMap = new LinkedHashMap<String, Collection<String>>();
        for (Object element : geneData.probeToGeneSetMap.keySet()) {
            String key = (String) element;
            this.probeToGeneSetMap.put(key, new HashSet<String>(geneData.probeToGeneSetMap.get(key)));
        }

        // make a deep copy of the classToProbeMap, which is a map of sets. Shallow copy is BAD.
        this.geneSetToProbeMap = new LinkedHashMap<String, Collection<String>>();
        for (Object element : geneData.geneSetToProbeMap.keySet()) {
            String key = (String) element;
            this.geneSetToProbeMap.put(key, new HashSet<String>(geneData.geneSetToProbeMap.get(key)));
        }

        // make a deep copy of the old gene sets.
        this.oldGeneSets = new LinkedHashMap<String, Collection<String>>();
        for (Object element : geneData.oldGeneSets.keySet()) {
            String key = (String) element;
            this.oldGeneSets.put(key, new HashSet<String>(geneData.oldGeneSets.get(key)));
        }

        probeToGeneName = new HashMap<String, String>(geneData.probeToGeneName); // shallow copy, okay
        probeToDescription = new HashMap<String, String>(geneData.probeToDescription); // shallow copy, okay
        geneToProbeMap = new HashMap<String, Collection<String>>(geneData.geneToProbeMap); // shallow copy, okay?
        geneToGeneSetMap = new HashMap<String, Collection<String>>(geneData.geneToGeneSetMap); // shallow copy,
        // okay?
        geneSetToRedundantMap = new HashMap<String, Collection<String>>(geneData.geneSetToRedundantMap);
        filterNonSpecific = geneData.getFilterNonSpecific();

        List<String> allProbes = new Vector<String>(probeToGeneName.keySet());
        for (String probe : allProbes) {
            if (!activeProbes.contains(probe)) { // remove probes not in data set.
                removeProbeFromMaps(probe);
            }
        }
        setUp(null); // creates the classToGene map.
    }

    /**
     * 
     */
    private void activeProbesDirty() {
        genesForActiveProbesCache = null;
        activeGeneSetCache = null;
        if (geneToActiveProbesCache != null)
            geneToActiveProbesCache.clear();
        if (classToActiveProbeCache != null)
            classToActiveProbeCache.clear();
        if (geneSetActiveGenesCache != null)
            geneSetActiveGenesCache.clear();
        if (geneSetActiveProbesCache != null)
            geneSetActiveProbesCache.clear();
    }

    /**
     * @param stream
     * @param activeGenes
     * @param messenger
     * @param goNames
     * @throws IOException
     */
    public GeneAnnotations(InputStream stream, Set<String> activeGenes, StatusViewer messenger, GONames goNames,
            boolean filterNonSpecific) throws IOException {
        this.filterNonSpecific = filterNonSpecific;
        this.messenger = messenger;
        setUpdataStructures();
        this.read(stream, activeGenes);
        this.activeProbes = null; // using all.
        setUp(goNames);
    }

    public GeneAnnotations(InputStream stream, Set<String> activeGenes, StatusViewer messenger, GONames goNames)
            throws IOException {
        this(stream, activeGenes, messenger, goNames, DEFAULT_FILTER_NONSPECIFIC);

    }

    /**
     * Constructor designed for use when a file is not the immediate input of the data.
     * 
     * @param probes A List of probes
     * @param geneSymbols A List of gene symbols (e.g., ACTB), corresponding to the probes (in the same order)
     * @param geneNames A List of gene names (e.g., "Actin"), corresponding to the probes (in the same order). This can
     *        be null.
     * @param goTerms A List of Collections of Strings corresponding to the GO terms for each probe.
     * @throws IllegaArgumentException if any of the required arguments are null, don't have sizes that match, etc.
     */
    public GeneAnnotations(List<String> probes, List<String> geneSymbols, List<String> geneNames,
            List<Collection<String>> goTerms) {
        checkValidData(probes, geneSymbols, geneNames, goTerms);
        assert probes != null;
        setUpdataStructures();

        this.activeProbes = probes;
        activeProbesDirty();

        Collection<String> probeIds = new ArrayList<String>();
        for (int i = 0; i < probes.size(); i++) {
            String probe = probes.get(i);
            String geneSymbol = geneSymbols.get(i);
            String geneName = null;
            if (geneNames != null) {
                geneName = geneNames.get(i);
            }
            Collection<String> goTermsForProbe = goTerms.get(i);

            storeProbeAndGene(probeIds, probe, geneSymbol);

            if (geneName != null) {
                probeToDescription.put(probe.intern(), geneName.intern());
            } else {
                probeToDescription.put(probe.intern(), NO_DESCRIPTION);
            }

            for (String string : goTermsForProbe) {
                String go = string.intern();
                probeToGeneSetMap.get(probe).add(go);

                if (!geneSetToProbeMap.containsKey(go)) {
                    geneSetToProbeMap.put(go, new HashSet<String>());
                }
                geneSetToProbeMap.get(go).add(probe);
            }

            if (messenger != null && i % 500 == 0) {
                messenger.showStatus("Read " + i + " probes");
                try {
                    Thread.sleep(10);
                } catch (InterruptedException e) {
                    throw new RuntimeException("Interrupted");
                }
            }
        }
        resetSelectedProbes();
        this.setUp(null);
    }

    /**
     * @param goNames
     * @param fileName
     */
    public GeneAnnotations(String fileName, Set<String> activeGenes, StatusViewer messenger, GONames goNames,
            boolean filterNonSpecific) throws IOException {
        this.messenger = messenger;
        this.filterNonSpecific = filterNonSpecific;
        setUpdataStructures();
        InputStream i = FileTools.getInputStreamFromPlainOrCompressedFile(fileName);
        this.read(i, activeGenes);
        this.activeProbes = this.probeToGeneName.keySet();
        if (activeProbes != null)
            activeProbesDirty();
        setUp(goNames);
    }

    public GeneAnnotations(String fileName, Set<String> activeGenes, StatusViewer messenger, GONames goNames)
            throws IOException {
        this(fileName, activeGenes, messenger, goNames, DEFAULT_FILTER_NONSPECIFIC);
    }

    /**
     * Create GeneAnnotations by reading from a file
     * 
     * @param goNames
     * @param filename String
     * @param messenger StatusViewer to print status updates to.
     * @throws IOException
     */
    public GeneAnnotations(String filename, StatusViewer messenger, GONames goNames) throws IOException {
        this(filename, messenger, goNames, DEFAULT, DEFAULT_FILTER_NONSPECIFIC);
    }

    public GeneAnnotations(String filename, StatusViewer messenger, GONames goNames, boolean filterNonSpecific)
            throws IOException {
        this(filename, messenger, goNames, DEFAULT, filterNonSpecific);
    }

    /**
     * Create GeneAnnotations by reading from a file, with a selected input file format.
     * 
     * @param filename
     * @param messenger
     * @param goNames
     * @param format
     * @throws IOException
     */
    public GeneAnnotations(String filename, StatusViewer messenger, GONames goNames, int format,
            boolean filterNonSpecific) throws IOException {
        log.debug("Entering GeneAnnotations constructor");
        setUpdataStructures();
        this.messenger = messenger;
        this.filterNonSpecific = filterNonSpecific;

        if (format == DEFAULT) {
            this.read(filename);
        } else if (format == AFFYCSV) {
            this.readAffyCsv(filename);
        } else if (format == AGILENT) {
            this.readAgilent(filename);
        } else {
            throw new IllegalArgumentException("Unknown format");
        }
        this.activeProbes = null; // using all.
        setUp(goNames);
    }

    public GeneAnnotations(String filename, StatusViewer messenger, GONames goNames, int format)
            throws IOException {
        this(filename, messenger, goNames, format, DEFAULT_FILTER_NONSPECIFIC);
    }

    /**
     * Add a new gene set. Used to set up user-defined gene sets.
     * <p>
     * 
     * @param id String class to be added
     * @param probesForNew collection of members.
     */
    public void addGeneSet(String geneSetId, Collection<String> probesForNew) {

        if (probesForNew == null)
            throw new IllegalArgumentException("Null probes for new gene set");

        if (probesForNew.size() == 0) {
            log.debug("No probes to add for " + geneSetId);
            return;
        }

        if (geneSetToProbeMap.containsKey(geneSetId)) {
            // then we should save a backup.
            log.info("Saving backup version of " + geneSetId);
            oldGeneSets.put(geneSetId, new HashSet<String>(geneSetToProbeMap.get(geneSetId)));
        }

        geneSetToProbeMap.put(geneSetId, new HashSet<String>(probesForNew));
        Set<String> genes = new HashSet<String>();
        for (String probe : probesForNew) {
            if (!probeToGeneSetMap.containsKey(probe)) {
                probeToGeneSetMap.put(probe, new HashSet<String>());
            }
            probeToGeneSetMap.get(probe).add(geneSetId);

            if (!probeToGeneName.containsKey(probe))
                continue;
            genes.add(probeToGeneName.get(probe));
        }

        for (Object element : genes) {
            String gene = (String) element;
            if (!geneToGeneSetMap.containsKey(gene))
                geneToGeneSetMap.put(gene, new HashSet<String>());
            geneToGeneSetMap.get(gene).add(geneSetId);
        }
        geneSetToGeneMap.put(geneSetId, genes);

        log.debug("Added new gene set: " + genes.size() + " genes to gene set id " + geneSetId + " with "
                + probesForNew.size() + " probes");

        resetSelectedSets();
    }

    /**
     * Restore the previous version of a gene set. If no previous version is found, then nothing is done.
     * 
     * @param id
     */
    public void restoreGeneSet(String id) {
        if (!oldGeneSets.containsKey(id))
            return;
        log.info("Restoring " + id);
        removeClassFromMaps(id);
        addGeneSet(id, oldGeneSets.get(id));
    }

    /**
     * @param parents
     */
    public void addGoTermsToGene(String gene, Collection<String> parents) {

        for (String id : parents) {

            if (!geneSetToGeneMap.containsKey(id))
                geneSetToGeneMap.put(id, new HashSet<String>());
            if (!geneSetToProbeMap.containsKey(id))
                geneSetToProbeMap.put(id, new HashSet<String>());

            geneSetToGeneMap.get(id).add(gene);
            geneToGeneSetMap.get(gene).add(id);
            Collection<String> probes = geneToProbeMap.get(gene);

            geneSetToProbeMap.get(id).addAll(probes);

            for (Object element : probes) {
                String probe = (String) element;
                probeToGeneSetMap.get(probe).add(id);
            }
        }
    }

    /**
     * Returns true if the class is in the classToProbe map
     * 
     * @param id String a class id
     * @return boolean
     */
    public boolean geneSetExists(String id) {
        return geneSetToProbeMap.containsKey(id);
    }

    /**
     * @return Map
     */
    public Map<String, Collection<String>> geneSetToRedundantMap() {
        return geneSetToRedundantMap;
    }

    /**
     * @param id String class id
     * @return List list of probes in class
     */
    public Collection<String> getClassToProbes(String id) {
        if (activeProbes == null) {
            return geneSetToProbeMap.get(id);
        }

        assert classToActiveProbeCache != null;
        if (!classToActiveProbeCache.containsKey(id)) {
            Collection<String> finalList = new HashSet<String>();
            Collection<String> startingList = geneSetToProbeMap.get(id);
            if (startingList == null) {
                log.warn("No probes for class: " + id);
                return finalList;
            }
            for (Object element : startingList) {
                String probe = (String) element;
                if (activeProbes.contains(probe)) {
                    finalList.add(probe);
                }
            }
            classToActiveProbeCache.put(id, finalList);
            return finalList;
        }

        return classToActiveProbeCache.get(id);

    }

    /**
     * Return a collection of all currently active genes.
     * 
     * @return
     */
    public Collection<String> getGenes() {
        if (activeProbes == null)
            return geneToGeneSetMap.keySet();

        if (genesForActiveProbesCache == null) {
            Collection<String> finalList = new HashSet<String>();
            for (Object element : geneToGeneSetMap.keySet()) {
                String gene = (String) element;
                Collection<String> probes = this.getGeneProbeList(gene);
                if (probes != null && probes.size() > 0) {
                    finalList.add(gene);
                }
            }
            genesForActiveProbesCache = finalList;
            return finalList;
        }
        return genesForActiveProbesCache;
    }

    /**
     * Get a list of the probes that correspond to a particular gene.
     * 
     * @param g String a gene name
     * @return Collection of the probes for gene g
     */
    public Collection<String> getGeneProbeList(String gene) {
        if (activeProbes == null) {
            return geneToProbeMap.get(gene);
        }

        if (!geneToActiveProbesCache.containsKey(gene)) {
            Collection<String> finalList = new HashSet<String>();
            Collection<String> probes = geneToProbeMap.get(gene);
            if (probes == null) {
                log.debug("No probes for " + gene);
                return null;
            }
            for (Object element : probes) {
                String probe = (String) element;
                if (activeProbes.contains(probe)) {
                    finalList.add(probe);
                }
            }
            geneToActiveProbesCache.put(gene, finalList);
            return finalList;
        }
        return geneToActiveProbesCache.get(gene);
    }

    /**
     * Get a class by an integer index i from the sorted list.
     * 
     * @param i
     * @return
     */
    public String getGeneSetByIndex(int i) {
        return sortedGeneSets.get(i);
    }

    // /**
    // * @return Returns the classToGeneMap.
    // */
    // public Map getGeneSetToGeneMap() {
    // return geneSetToGeneMap;
    // }

    /**
     * 
     */
    public Collection<String> getActiveGeneSetGenes(String geneSetId) {
        if (activeProbes == null)
            return this.geneSetToGeneMap.get(geneSetId);

        if (!geneSetActiveGenesCache.containsKey(geneSetId)) {
            Collection<String> finalList = new HashSet<String>();
            Collection<String> genes = geneSetToGeneMap.get(geneSetId);
            for (String gene : genes) {
                Collection<String> probes = geneToProbeMap.get(gene);
                if (probes == null)
                    continue;
                for (String probe : probes) {
                    if (activeProbes.contains(probe)) {
                        finalList.add(gene);
                        break;
                    }
                }
            }
            geneSetActiveGenesCache.put(geneSetId, finalList);
            return finalList;
        }
        return geneSetActiveGenesCache.get(geneSetId);
    }

    /**
     * @param geneSetId
     * @return
     */
    public Collection<String> getGeneSetProbes(String geneSetId) {
        if (activeProbes == null)
            return this.geneSetToProbeMap.get(geneSetId);

        if (!geneSetActiveProbesCache.containsKey(geneSetId)) {
            Collection<String> finalList = new HashSet<String>();
            Collection<String> probes = geneSetToProbeMap.get(geneSetId);
            if (probes == null)
                return finalList;
            for (String probe : probes) {
                if (activeProbes.contains(probe)) {
                    finalList.add(probe);
                }
            }
            geneSetActiveProbesCache.put(geneSetId, finalList);
            return finalList;
        }
        return geneSetActiveProbesCache.get(geneSetId);
    }

    /**
     * Get the gene sets a gene belongs to.
     * 
     * @param gene
     * @return
     */
    public Collection<String> getGeneGeneSets(String gene) {
        return this.geneToGeneSetMap.get(gene);
    }

    /**
     * Get the description for a gene.
     * 
     * @param p String
     * @return String
     */
    public String getProbeDescription(String p) {
        return probeToDescription.get(p);
    }

    /**
     * Get the gene that a probe belongs to.
     * 
     * @param p String
     * @return String
     */
    public String getProbeGeneName(String p) {
        return probeToGeneName.get(p);
    }

    /**
     * @return Map
     */
    public Map<String, String> getProbeToGeneMap() {
        return probeToGeneName;
    }

    /** 
     */
    public Map<String, Collection<String>> getGeneToProbeMap() {
        return geneToProbeMap;
    }

    /**
     * @return Map
     */
    public Map<String, Collection<String>> getProbeToGeneSetMap() {
        return probeToGeneSetMap;
    }

    /**
     * @return the list of selected probes. Note that selected probes are distinct from active probes. Selected probes
     *         is more transient.
     */
    public List<String> getSelectedProbes() {
        return selectedProbes;
    }

    /**
     * @return list of selected gene sets.
     */
    public List<String> getSelectedSets() {
        return selectedSets;
    }

    /**
     * Redefine a class.
     * 
     * @param classId String class to be modified
     * @param probesForNew Collection current user-defined list of members. The gene set is recreated to look like this
     *        one.
     */
    public void modifyGeneSet(String classId, Collection<String> probesForNew) {
        if (!geneSetToProbeMap.containsKey(classId)) {
            log.warn("No such class to modify: " + classId);
            return;
        }

        log.debug("Saving backup version of " + classId + ", replacing with new version that has "
                + probesForNew.size() + " probes.");
        oldGeneSets.put(classId, new HashSet<String>(geneSetToProbeMap.get(classId)));

        removeClassFromMaps(classId);
        addGeneSet(classId, probesForNew);
    }

    /**
     * Compute how many genes have Gene set annotations.
     * 
     * @return
     */
    public int numAnnotatedGenes() {
        int count = 0;
        for (Object element2 : geneToGeneSetMap.keySet()) {
            Collection<String> element = geneToGeneSetMap.get(element2);
            if (element.size() > 0) {
                count++;
            }
        }
        return count;
    }

    /**
     * How many genes are currently available
     */
    public int numGenes() {
        if (activeProbes == null) {
            return geneToProbeMap.size();
        }
        return this.getGenes().size();
    }

    /**
     * Get a collection of all (active) gene sets.
     * 
     * @return
     */
    public Collection<String> getGeneSets() {
        if (activeProbes == null)
            return geneSetToGeneMap.keySet();

        if (activeGeneSetCache == null) {
            Collection<String> finalSet = new HashSet<String>();
            for (Object element : geneSetToGeneMap.keySet()) {
                String geneSet = (String) element;
                Collection<String> probes = getClassToProbes(geneSet);
                if (probes.size() > 0) {
                    finalSet.add(geneSet);
                }
            }
            activeGeneSetCache = finalSet;
            return finalSet;
        }
        return activeGeneSetCache;
    }

    /**
     * Get the number of gene sets currently available.
     * 
     * @return
     */
    public int numGeneSets() {
        return this.getGeneSets().size();
    }

    /**
     * Get the number of genes in a gene set, identified by id.
     * 
     * @param id String a class id
     * @return int number of genes in the class
     */
    public int numActiveGenesInGeneSet(String id) {
        if (!geneSetToGeneMap.containsKey(id)) {
            return 0;
        }
        return getActiveGeneSetGenes(id).size();
    }

    /**
     * @param id
     * @return
     */
    public int numGenesInGeneSet(String id) {
        if (!geneSetToGeneMap.containsKey(id)) {
            return 0;
        }
        return geneSetToGeneMap.get(id).size();
    }

    /**
     * Get how many probes point to the same gene. This is like the old "numReplicates".
     * 
     * @param g
     * @return
     */
    public int numProbesForGene(String g) {
        if (!geneToProbeMap.containsKey(g))
            return 0;
        if (activeProbes == null)
            return geneToProbeMap.get(g).size();

        return this.getGeneProbeList(g).size();
    }

    /**
     * Get the number of probes in a gene set, identified by id.
     * 
     * @param id String a class id
     * @return int number of probes in the class
     */
    public int numActiveProbesInGeneSet(String id) {
        if (!geneSetToProbeMap.containsKey(id)) {
            log.debug("No such gene set " + id);
            return 0;
        }
        if (activeProbes == null)
            return geneSetToProbeMap.get(id).size();

        int result = 0;
        Collection<String> startingList = geneSetToProbeMap.get(id);
        for (Iterator<String> iter = startingList.iterator(); iter.hasNext();) {
            String probe = iter.next();
            if (activeProbes.contains(probe)) {
                result++;
            }
        }
        return result;
    }

    /**
     * @param id
     * @return
     */
    public int numProbesInGeneSet(String id) {
        if (!geneSetToProbeMap.containsKey(id)) {
            log.debug("No such gene set " + id);
            return 0;
        }
        return this.geneSetToProbeMap.get(id).size();
    }

    /**
     * Print out the gene annotations in the same format we got them in, but if the gene sets have been modified, this
     * will be reflected.
     * 
     * @param out
     * @throws IOException
     */
    public void print(Writer out) throws IOException {
        out.write("Probe\tSymbol\tName\tGeneSets\n");
        out.flush();
        for (String probe : probeToGeneName.keySet()) {
            String gene = probeToGeneName.get(probe);
            String desc = getProbeDescription(probe);
            out.write(probe + "\t" + gene + "\t" + desc + "\t");
            Collection<String> geneSets = probeToGeneSetMap.get(probe);

            for (String element : geneSets) {
                out.write(element + "|");
            }
            out.write("\n");
        }
    }

    /**
     * Remove a gene set (class) from all the maps that reference it.
     * <p>
     * 
     * @param id
     */
    public void removeClassFromMaps(String id) {
        if (geneSetToProbeMap.containsKey(id)) {
            for (String probe : geneSetToProbeMap.get(id)) {
                if (probeToGeneSetMap.containsKey(probe) && probeToGeneSetMap.get(probe).contains(id)) {
                    if (!probeToGeneSetMap.get(probe).remove(id)) {
                        log.error("Couldn't remove " + id + " from probe to class map for" + probe);
                    }
                }
            }
            if (geneSetToProbeMap.remove(id) == null)
                log.error("Couldn't remove " + id + " from classToProbeMap");

            if (geneSetToGeneMap.remove(id) == null)
                log.error("Couldn't remove " + id + " from classToGeneMap");
        }
        if (geneSetToRedundantMap.containsKey(id))
            geneSetToRedundantMap.remove(id);
        if (this.getSelectedSets() != null)
            this.getSelectedSets().remove(id);
    }

    /**
     * Set the selected gene set to be the entire set.
     */
    public void resetSelectedProbes() {
        selectedProbes = new Vector<String>(probeToGeneName.keySet());
    }

    /**
     * Set the selected gene set to be the entire set.
     */
    public void resetSelectedSets() {
        selectedSets = new Vector<String>(geneSetToProbeMap.keySet());
    }

    /**
     * @return the number of probes currently on the 'selected' list.
     */
    public int numSelectedProbes() {
        return selectedProbes.size();
    }

    /**
     * @return the number of sets currently on the 'selected' list.
     */
    public int selectedSets() {
        return selectedSets.size();
    }

    /**
     * Create a selected probes list based on a search string.
     * 
     * @param searchOn A string to be searched.
     */
    public void selectProbesBySearch(String searchOn) {

        String searchOnUp = searchOn.toUpperCase();
        resetSelectedProbes();
        Set<String> removeUs = new HashSet<String>();
        for (String probe : probeToGeneName.keySet()) {
            String candidate = probeToGeneName.get((probe)).toUpperCase();

            // look in descriptions.
            String candidateD = probeToDescription.get((probe)).toUpperCase();

            if (!candidate.startsWith(searchOnUp) && candidateD.indexOf(searchOnUp) < 0) {
                removeUs.add(probe);
            }

        }

        for (Object element : removeUs) {
            selectedProbes.remove(element);
        }
    }

    /**
     * Select a given set of gene sets.
     * 
     * @param selectedGeneSets
     */
    public void setSelectedSets(Collection<String> selectedGeneSets) {
        this.selectedSets.clear();
        this.selectedSets.addAll(selectedGeneSets);
    }

    /**
     * @param searchOn
     * @param goData
     */
    public void selectSets(String searchOn, GONames goData) {

        String searchOnUp = searchOn.toUpperCase();
        resetSelectedSets();
        Set<String> removeUs = new HashSet<String>();
        for (Object element : geneSetToProbeMap.keySet()) {
            String candidate = (String) element;

            // look in the name too
            if (goData.getNameForId(candidate) == null)
                continue;
            String candidateN = goData.getNameForId(candidate).toUpperCase();

            if (!candidate.toUpperCase().startsWith(searchOnUp) && candidateN.indexOf(searchOnUp) < 0) {
                removeUs.add(candidate);
            }
        }

        for (Object element : removeUs) {
            selectedSets.remove(element);
        }
    }

    /**
     * Identify gene sets that contain a particular gene or probe.
     * 
     * @param searchOn
     * @param goData
     */
    public void selectSetsByGene(String searchOn) {

        String searchOnUp = searchOn.toUpperCase();
        resetSelectedSets();
        Set<String> removeUs = new HashSet<String>();

        for (String candidateGeneSet : geneSetToProbeMap.keySet()) {
            boolean found = false;
            Collection<String> probes = geneSetToProbeMap.get(candidateGeneSet);

            for (Object element : probes) {
                String candidate = (String) element;
                if (candidate.toUpperCase().startsWith(searchOnUp)) {
                    found = true;
                    log.debug("Found " + candidate + " in " + candidateGeneSet);
                    break;
                }
            }

            if (found)
                continue;

            Collection<String> genes = geneSetToGeneMap.get(candidateGeneSet);
            for (String candidate : genes) {
                if (candidate.toUpperCase().startsWith(searchOnUp)) {
                    found = true;
                    log.debug("Found " + candidate + " in " + candidateGeneSet);
                    break;
                }

            }

            if (!found) {
                removeUs.add(candidateGeneSet);
            }
        }

        for (Object element : removeUs) {
            selectedSets.remove(element);
        }
    }

    /**
     * Sort the gene sets, filling out the sortedGeneSets. This should be called after any changes have been made to the
     * classToProbeMap. The sort is just in order of id.
     */
    public void sortGeneSets() {

        if (this.geneSetToProbeMap.size() == 0) {
            throw new IllegalStateException(
                    "Could not sort because there are no gene sets in the geneSetToProbeMap");
        }

        if (this.sortedGeneSets == null) {
            this.sortedGeneSets = new Vector<String>();
        }

        List<String> vec = new Vector<String>(geneSetToProbeMap.keySet());
        Collections.sort(vec);
        for (String string : vec) {
            this.sortedGeneSets.add(string);
        }
    }

    /**
     * @return
     */
    public List<String> sortGeneSetsBySize() {

        List<GeneSet> sets = new Vector<GeneSet>();
        for (String name : geneSetToGeneMap.keySet()) {
            sets.add(new GeneSet(name, geneSetToGeneMap.get(name)));
        }

        Collections.sort(sets, new ClassSizeComparator());

        List<String> returnVal = new Vector<String>();
        for (GeneSet geneSet : sets) {
            returnVal.add(geneSet.getName());
        }

        return returnVal;
    }

    /**
     * @return
     */
    public TableModel toTableModel() {
        return new AbstractTableModel() {
            private static final long serialVersionUID = 1L;
            private String[] columnNames = { "Probe", "Gene", "Description" };

            public int getColumnCount() {
                return 3;
            }

            @Override
            public String getColumnName(int i) {
                return columnNames[i];
            }

            public int getRowCount() {
                return selectedProbes.size();
            }

            public Object getValueAt(int i, int j) {

                String probeid = selectedProbes.get(i);
                switch (j) {
                case 0:
                    return probeid;
                case 1:
                    return getProbeGeneName(probeid);
                case 2:
                    return getProbeDescription(probeid);
                default:
                    return null;
                }
            }

        };
    }

    /**
     * @param probes
     * @param geneSymbols
     * @param geneNames
     * @param goTerms
     */
    private void checkValidData(List<String> probes, List<String> geneSymbols, List<String> geneNames,
            List<Collection<String>> goTerms) {
        if (probes == null || geneSymbols == null || goTerms == null) {
            throw new IllegalArgumentException("Probes, gene symbols, GO terms and GO data must not be null");
        }
        int size = probes.size();
        if (size == 0) {
            throw new IllegalArgumentException("Empty list");
        }
        if (size != geneSymbols.size() && size != geneNames.size() && size != goTerms.size()) {
            throw new IllegalArgumentException("All lists must have same number of elements");
        }

    }

    /*******************************************************************************************************************
     * Private or protected methods
     ******************************************************************************************************************/

    /**
     * @param limit
     * @param header
     * @param pattern
     */
    private int findField(String header, String sep, String pattern) {
        String[] fields = header.split(sep);
        if (fields == null || fields.length == 0)
            throw new IllegalArgumentException("No header!");
        for (int i = 0; i < fields.length; i++) {
            if (fields[i].replaceAll("\"", "").compareToIgnoreCase(pattern) == 0) {
                return i;
            }
        }
        return -1;
    }

    /**
     * @throws IOException
     * @param header
     * @return
     */
    private int getAffyBpIndex(String header) throws IOException {
        String pattern = "Gene Ontology Biological Process";
        return findField(header, ",", pattern);
    }

    /**
     * @throws IOException
     * @param header
     * @return
     */
    private int getAffyCcIndex(String header) throws IOException {
        String pattern = "Gene Ontology Cellular Component";
        return findField(header, ",", pattern);
    }

    /**
     * @throws IOException
     * @param header
     * @return
     */
    private int getAffyGeneNameIndex(String header) throws IOException {
        String pattern = "Gene Title";
        return findField(header, ",", pattern);
    }

    private int getAffyAlternateGeneSymbolIndex(String header) {

        String[] alternates = new String[] { "Transcript ID", "Transcript ID(Array Design)", "UniGene ID" };
        for (String pattern : alternates) {
            int i = findField(header, ",", pattern);
            if (i >= 0)
                return i;
        }
        return -1;
    }

    /**
     * @throws IOException
     * @param header
     * @return
     */
    private int getAffyGeneSymbolIndex(String header) throws IOException {
        String pattern = "Gene Symbol";
        return findField(header, ",", pattern);
    }

    /**
     * @throws IOException
     * @param header
     * @return
     */
    private int getAffyMfIndex(String header) throws IOException {
        String pattern = "Gene Ontology Molecular Function";
        return findField(header, ",", pattern);
    }

    /**
     * @param header
     * @return
     */
    private int getAffyNumFields(String header) {
        String[] fields = header.split(",");
        return fields.length;
    }

    /**
     * @throws IOException
     * @param header
     * @return
     */
    private int getAffyProbeIndex(String header) throws IOException {
        String pattern = "Probe Set ID";
        return findField(header, ",", pattern);
    }

    /**
     * Fill in the classToGeneMap with information from the classToProbeMap.
     * 
     * @return mapping of gene sets to genes.
     */
    private Map<String, Collection<String>> makeClassToGeneMap() {
        Map<String, Collection<String>> gsToGeneMap = new HashMap<String, Collection<String>>();
        for (String geneSetId : geneSetToProbeMap.keySet()) {
            Collection<String> probesInSet = geneSetToProbeMap.get(geneSetId);

            Set<String> genesInSet = new HashSet<String>();
            for (String probe : probesInSet) {
                genesInSet.add(probeToGeneName.get(probe));
            }
            gsToGeneMap.put(geneSetId, genesInSet);
        }
        return gsToGeneMap;
    }

    /**
     * @param go
     * @return
     */
    private String padGoTerm(String go) {
        if (!go.startsWith("GO:")) {
            int needZeros = 7 - go.length();
            for (int j = 0; j < needZeros; j++) {
                go = "0" + go;
            }
            go = "GO:" + go;
        }
        return go;
    }

    /**
     * @param probe
     * @param pat
     * @param goi
     */
    private void parseGoTerm(String probe, Pattern pat, String goi) {
        Matcher mat = pat.matcher(goi);
        if (mat.find()) {
            int start = mat.start();
            int end = mat.end();
            String go = goi.substring(start, end);

            go = padGoTerm(go);

            assert go.startsWith("GO:");

            this.probeToGeneSetMap.get(probe).add(go);

            if (!geneSetToProbeMap.containsKey(go)) {
                geneSetToProbeMap.put(go, new HashSet<String>());
            }
            this.geneSetToProbeMap.get(go).add(probe);
        }
    }

    /**
     * Remove classes that have too few members
     * <p>
     * FIXME this overlaps with functionality in GeneSetMapTools
     * 
     * @param lowThreshold
     * @param highThreshold
     */
    private void prune(int lowThreshold, int highThreshold) {

        if (this.geneSetToProbeMap.isEmpty()) {
            throw new IllegalStateException("No gene sets!");
        }

        Set<String> removeUs = new HashSet<String>();
        for (String id : geneSetToProbeMap.keySet()) {
            int numActiveProbesInGeneSet = numActiveProbesInGeneSet(id);
            int numActiveGenesInGeneSet = numActiveGenesInGeneSet(id);
            if (numActiveProbesInGeneSet < lowThreshold || numActiveGenesInGeneSet < lowThreshold
                    || numActiveProbesInGeneSet > highThreshold || numActiveGenesInGeneSet > highThreshold) {
                log.debug("Pruning gene set : " + id + ", size =" + numActiveProbesInGeneSet + " probes, "
                        + numActiveGenesInGeneSet + " genes.");
                removeUs.add(id);
            }
        }

        for (Object element : removeUs) {
            String id = (String) element;
            removeClassFromMaps(id);
        }

        if (this.geneSetToProbeMap.isEmpty()) {
            throw new IllegalStateException(
                    "All gene sets were removed due to being too small or too big; size range=" + lowThreshold
                            + " - " + highThreshold + ". Your annotation file may contain too few GO terms.");
        }

        sortGeneSets();
    }

    private void read(InputStream bis) throws IOException {
        this.read(bis, null);
    }

    // read in from a file.
    private void read(String fileName) throws IOException {
        InputStream i = FileTools.getInputStreamFromPlainOrCompressedFile(fileName);
        read(i);
    }

    private void readAgilent(InputStream bis) throws IOException {
        this.readAgilent(bis, null);
    }

    /**
     * @param filename
     */
    private void readAgilent(String fileName) throws IOException {
        InputStream i = FileTools.getInputStreamFromPlainOrCompressedFile(fileName);
        readAgilent(i);
    }

    /**
     * @param bis
     */
    private void readAffyCsv(InputStream bis) throws IOException {
        this.readAffyCsv(bis, null);
    }

    /**
     * @param filename
     */
    private void readAffyCsv(String fileName) throws IOException {
        InputStream i = FileTools.getInputStreamFromPlainOrCompressedFile(fileName);
        readAffyCsv(i);
    }

    /**
     * @param probe
     */
    private void removeProbeFromMaps(String probe) {
        if (probeToGeneName.containsKey(probe)) {
            String gene = probeToGeneName.get(probe);
            probeToGeneName.remove(probe);
            if (geneToProbeMap.containsKey(gene)) {
                geneToProbeMap.get(gene).remove(probe);
            }
        }
        if (probeToGeneSetMap.containsKey(probe)) {
            Iterator<String> cit = probeToGeneSetMap.get(probe).iterator();
            while (cit.hasNext()) {
                String geneSet = cit.next();
                if (geneSetToProbeMap.containsKey(geneSet)) {
                    geneSetToProbeMap.get(geneSet).remove(probe);
                }
            }
            if (probeToGeneSetMap.remove(probe) == null) {
                System.err.println("Could not remove " + probe + " from probeToClassMap");
            }
        }
        if (probeToDescription.containsKey(probe))
            probeToDescription.remove(probe);
    }

    /**
     * 
     */
    private void setUpdataStructures() {
        probeToGeneSetMap = new LinkedHashMap<String, Collection<String>>();
        geneSetToProbeMap = new LinkedHashMap<String, Collection<String>>();
        probeToGeneName = new HashMap<String, String>();
        probeToDescription = new HashMap<String, String>();
        geneToProbeMap = new HashMap<String, Collection<String>>();
        geneToGeneSetMap = new HashMap<String, Collection<String>>();
        geneSetToRedundantMap = new HashMap<String, Collection<String>>();
        oldGeneSets = new HashMap<String, Collection<String>>();
        classToActiveProbeCache = new HashMap<String, Collection<String>>();
        geneToActiveProbesCache = new HashMap<String, Collection<String>>();
        geneSetActiveGenesCache = new HashMap<String, Collection<String>>();
        geneSetActiveProbesCache = new HashMap<String, Collection<String>>();
    }

    /**
     * @param probeIds
     * @param probe
     * @param geneSymbol
     */
    private void storeProbeAndGene(Collection<String> probeIds, String probe, String geneSymbol) {

        if (StringUtils.isBlank(geneSymbol)) {
            throw new IllegalArgumentException("Blank gene symbol");
        }
        if (StringUtils.isBlank(probe)) {
            throw new IllegalArgumentException("Blank probe name");
        }

        probeToGeneName.put(probe, geneSymbol);

        // create the list if need be.
        if (geneToProbeMap.get(geneSymbol) == null) {
            geneToProbeMap.put(geneSymbol, new HashSet<String>());
        }
        geneToProbeMap.get(geneSymbol).add(probe);

        probeIds.add(probe);
        if (!probeToGeneSetMap.containsKey(probe)) {
            probeToGeneSetMap.put(probe.intern(), new HashSet<String>());
        }
        geneToGeneSetMap.put(geneSymbol, probeToGeneSetMap.get(probe));
    }

    /**
     * @param bis
     * @param activeGenes
     * @throws IOException
     */
    protected void read(InputStream bis, Set<String> activeGenes) throws IOException {
        log.debug("Entering GeneAnnotations.read");
        if (bis == null) {
            throw new IOException("Inputstream was null");
        }

        if (bis.available() == 0) {
            throw new IOException("No bytes to read from the annotation file.");
        }

        BufferedReader dis = new BufferedReader(new InputStreamReader(bis));
        Collection<String> probeIds = new ArrayList<String>();
        String classIds = null;

        // loop through rows. Makes hash map of probes to go, and map of go to
        // probes.
        int n = 0;
        String line = "";
        tick();
        while ((line = dis.readLine()) != null) {

            if (line.startsWith("#"))
                continue;

            // String[] tokens = StringUtils.splitPreserveAllTokens( line, "\t" );
            String[] tokens = line.split("\t");
            int length = tokens.length;
            if (length < 2)
                continue;

            String probe = tokens[0].intern();
            String gene = tokens[1].intern();

            if (filterNonSpecific && (gene.contains("|") || gene.contains(","))) {
                continue;
            }

            if (activeGenes != null && !activeGenes.contains(probe)) {
                continue;
            }

            // if ( log.isDebugEnabled() ) log.debug( "probe: " + probe );

            storeProbeAndGene(probeIds, probe, gene);

            /* read gene description */
            if (length >= 3) {
                String description = tokens[2].intern();
                if (description.length() > 0) {
                    probeToDescription.put(probe.intern(), description.intern());
                } else {
                    probeToDescription.put(probe.intern(), NO_DESCRIPTION);
                }
            } else {
                probeToDescription.put(probe.intern(), NO_DESCRIPTION);
                continue;
            }

            /* read GO data */
            if (length >= 4) {
                classIds = tokens[3];
                extractPipeDelimitedGoIds(classIds, probe);
            }

            if (messenger != null && n % 500 == 0) {
                messenger.showStatus("Read " + n + " probes");
                try {
                    Thread.sleep(2);
                } catch (InterruptedException e) {
                    dis.close();
                    throw new CancellationException();
                }
            }
            n++;

        }

        /* Fill in the genegroupreader and the classmap */
        dis.close();
        resetSelectedProbes();

        if (probeToGeneName.size() == 0 || geneSetToProbeMap.size() == 0) {
            throw new IllegalArgumentException(
                    "The gene annotations had invalid information. Please check the format.");
        }

    }

    /**
     * @param classIds
     * @param probe
     */
    private void extractPipeDelimitedGoIds(String classIds, String probe) {
        String[] classIdAry = pipePattern.split(classIds);
        if (classIdAry.length == 0)
            return;

        Collection<String> probeCol = probeToGeneSetMap.get(probe);
        probeCol.addAll(Arrays.asList(classIdAry));

        for (String element : classIdAry) {
            String go = element.intern();

            if (!geneSetToProbeMap.containsKey(go)) {
                geneSetToProbeMap.put(go, new HashSet<String>());
            }
            geneSetToProbeMap.get(go).add(probe);
        }
    }

    /**
     * @param bis
     * @param object
     */
    protected void readAffyCsv(InputStream bis, Set<String> activeGenes) throws IOException {
        if (bis == null) {
            throw new IOException("Inputstream was null");
        }
        BufferedReader dis = new BufferedReader(new InputStreamReader(bis));
        Collection<String> probeIds = new ArrayList<String>();
        String classIds = null;

        String header = dis.readLine();

        if (header == null) {
            throw new IOException("File had no header");
        }

        int numFields = getAffyNumFields(header);
        int probeIndex = getAffyProbeIndex(header);
        int goBpIndex = getAffyBpIndex(header);
        int goCcIndex = getAffyCcIndex(header);
        int goMfIndex = getAffyMfIndex(header);
        int geneNameIndex = getAffyGeneNameIndex(header);
        int geneSymbolIndex = getAffyGeneSymbolIndex(header);

        int alternateGeneSymbolIndex = getAffyAlternateGeneSymbolIndex(header);

        if (probeIndex < 0) {
            throw new IllegalStateException("Invalid AFFY file format: could not find the probe set id column");
        }
        if (geneNameIndex < 0) {
            throw new IllegalStateException("Invalid AFFY file format: could not find the gene name column");
        }
        if (geneSymbolIndex < 0) {
            throw new IllegalStateException("Invalid AFFY file format: could not find the gene symbol column");
        }

        if (goBpIndex < 0) {
            throw new IllegalStateException("Invalid AFFY file format: No biological process data were found");
        } else if (goCcIndex < 0) {
            throw new IllegalStateException("Invalid AFFY file format: No cellular component data were found");
        } else if (goMfIndex < 0) {
            throw new IllegalStateException("Invalid AFFY file format: No molecular function data were found");
        }

        log.debug("Read header");

        tick();
        assert (numFields > probeIndex + 1 && numFields > geneSymbolIndex + 1);
        Pattern pat = Pattern.compile("[0-9]+");
        // loop through rows. Makes hash map of probes to go, and map of go to
        // probes.
        int n = 0;
        String line = "";

        log.debug("File opened okay, parsing Affy CSV");

        while ((line = dis.readLine()) != null) {

            if (Thread.currentThread().isInterrupted()) {
                dis.close();
                throw new CancellationException();
            }

            String[] fields = StringUtil.csvSplit(line);
            if (fields.length < probeIndex + 1 || fields.length < geneSymbolIndex + 1) {
                continue; // skip lines that don't meet criteria.
            }

            String probe = fields[probeIndex];
            String gene = fields[geneSymbolIndex];

            if (StringUtils.isBlank(probe) || probe.equals("---")) {
                throw new IllegalStateException("Probe name was missing or invalid at line " + n
                        + "; it is possible the file format is not readable; contact the developers.");
            }

            if (StringUtils.isBlank(gene) || gene.equals("---")) {
                gene = fields[alternateGeneSymbolIndex];
                if (StringUtils.isBlank(gene) || gene.equals("---")) {
                    throw new IllegalStateException("Gene name was missing or invalid at line " + n
                            + "; it is possible the file format is not readable; contact the developers.");
                }
            }

            if (activeGenes != null && !activeGenes.contains(gene)) {
                continue;
            }

            // log.debug("Probe=" + probe + " Gene=" + gene); // PP temporary for user problems.

            storeProbeAndGene(probeIds, probe, gene);

            /* read gene description */

            String description = fields[geneNameIndex].intern();
            if (!description.startsWith("GO:")) {
                this.probeToDescription.put(probe.intern(), description.intern());
            } else {
                this.probeToDescription.put(probe.intern(), NO_DESCRIPTION);
            }

            /*
             * Each field is like this: 0000166 // nucleotide binding // inferred from electronic annotation
             */

            classIds = " // " + fields[goBpIndex] + " // " + fields[goMfIndex] + " // " + fields[goCcIndex];
            String[] goinfo = classIds.split("/+");
            for (String element : goinfo) {
                if (StringUtils.isBlank(element)) {
                    continue;
                }
                element = StringUtils.strip(element);
                parseGoTerm(probe, pat, element);
            }

            if (messenger != null && n % 5000 == 0) {
                messenger.showStatus("Read " + n + " probes");
                try {
                    Thread.sleep(10);
                } catch (InterruptedException e) {
                    dis.close();
                    throw new RuntimeException("Interrupted");
                }
            }
            n++;

        }

        /* Fill in the genegroupreader and the classmap */
        dis.close();
        tick();
        resetSelectedProbes();

        if (this.probeToGeneName.size() == 0 || this.geneSetToProbeMap.size() == 0) {
            throw new IllegalArgumentException(
                    "The gene annotations had invalid information. Please check the format.");
        }

    }

    /**
     * @param bis
     * @param activeGenes
     * @throws IOException
     */
    protected void readAgilent(InputStream bis, Set<String> activeGenes) throws IOException {
        if (bis == null) {
            throw new IOException("Inputstream was null");
        }
        BufferedReader dis = new BufferedReader(new InputStreamReader(bis));
        Collection<String> probeIds = new ArrayList<String>();
        String classIds = null;

        String header = dis.readLine();
        int numFields = getAgilentNumFields(header);
        int probeIndex = getAgilentProbeIndex(header);
        int goIndex = getAgilentGoIndex(header);
        int geneNameIndex = getAgilentGeneNameIndex(header);
        int geneSymbolIndex = getAgilentGeneSymbolIndex(header);

        tick();
        assert (numFields > probeIndex + 1 && numFields > geneSymbolIndex + 1);
        Pattern pat = Pattern.compile("[0-9]+");
        // loop through rows. Makes hash map of probes to go, and map of go to
        // probes.
        int n = 0;
        String line = "";
        while ((line = dis.readLine()) != null) {

            if (Thread.currentThread().isInterrupted()) {
                dis.close();
                throw new CancellationException();
            }

            String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
            if (fields.length < probeIndex + 1 || fields.length < geneSymbolIndex + 1) {
                continue; // skip lines that don't meet criteria.
            }

            String probe = fields[probeIndex];
            String gene = fields[geneSymbolIndex];

            if (activeGenes != null && !activeGenes.contains(gene)) {
                continue;
            }

            storeProbeAndGene(probeIds, probe, gene);

            /* read gene description */

            String description = fields[geneNameIndex].intern();
            if (!description.startsWith("GO:")) {
                probeToDescription.put(probe.intern(), description.intern());
            } else {
                probeToDescription.put(probe.intern(), NO_DESCRIPTION);
            }

            if (fields.length < goIndex + 1) {
                continue;
            }

            classIds = fields[goIndex];

            if (StringUtils.isNotBlank(classIds)) {
                String[] goinfo = classIds.split("\\|");
                for (String element : goinfo) {
                    String goi = element.intern();
                    parseGoTerm(probe, pat, goi);
                }
            }

            if (messenger != null && n % 500 == 0) {
                messenger.showStatus("Read " + n + " probes");
                try {
                    Thread.sleep(10);
                } catch (InterruptedException e) {
                    dis.close();
                    throw new RuntimeException("Interrupted");
                }
            }
            n++;

        }

        /* Fill in the genegroupreader and the classmap */
        dis.close();
        tick();
        resetSelectedProbes();

        if (probeToGeneName.size() == 0 || geneSetToProbeMap.size() == 0) {
            throw new IllegalArgumentException(
                    "The gene annotations had invalid information. Please check the format.");
        }

    }

    /**
     * @param header
     * @return
     */
    private int getAgilentGeneSymbolIndex(String header) {
        String pattern = "GeneSymbol";
        return findField(header, "\t", pattern);
    }

    /**
     * @param header
     * @return
     */
    private int getAgilentGeneNameIndex(String header) {
        String pattern = "GeneName";
        return findField(header, "\t", pattern);
    }

    /**
     * @param header
     * @return
     */
    private int getAgilentGoIndex(String header) {
        String pattern = "GO";
        return findField(header, "\t", pattern);
    }

    /**
     * @param header
     * @return
     */
    private int getAgilentProbeIndex(String header) {
        String pattern = "ProbeID";
        return findField(header, "\t", pattern);
    }

    /**
     * @param header
     * @return
     */
    private int getAgilentNumFields(String header) {
        String[] fields = header.split("\t");
        return fields.length;
    }

    /**
     * 
     */
    private void tick() {
        tick++;
    }

    public int ticks() {
        return tick;
    }

    /**
     * Initialize the gene sets and other data structures that needs special handling before use.
     * 
     * @param goNames
     */
    protected void setUp(GONames goNames) {
        this.geneSetToGeneMap = makeClassToGeneMap();
        if (goNames != null)
            GeneSetMapTools.addParents(this, goNames, messenger);
        GeneSetMapTools.collapseGeneSets(this, messenger);
        prune(ABSOLUTE_MINIMUM_GENESET_SIZE, PRACTICAL_MAXIMUM_GENESET_SIZE);
        tick();
        resetSelectedProbes();
        resetSelectedSets();
        sortGeneSets();
    }

    /**
     * @return
     */
    public int numProbes() {
        assert activeProbes != null;
        return this.activeProbes.size();
    }

    /**
     * @return Returns the activeProbes.
     */
    public Collection<String> getActiveProbes() {
        return this.activeProbes;
    }

    /**
     * @param probeId
     * @return
     */
    public boolean hasProbe(String probeId) {
        return this.probeToGeneName.containsKey(probeId);
    }

    /**
     * @param geneSymbol
     * @return
     */
    public Collection<String> getGeneProbes(String geneSymbol) {
        if (activeProbes == null)
            return this.geneToProbeMap.get(geneSymbol);

        Collection<String> finalList = new HashSet<String>();
        Collection<String> probes = geneToProbeMap.get(geneSymbol);
        for (String probe : probes) {
            if (activeProbes.contains(probe)) {
                finalList.add(probe);
            }
        }
        return finalList;

    }

    /**
     * @param probe
     * @return
     */
    public String probeToGene(String probe) {
        return this.probeToGeneName.get(probe);
    }

    /**
     * @param id
     * @return
     */
    public boolean hasGeneSet(String id) {
        if (activeProbes == null)
            return this.geneSetToGeneMap.containsKey(id);

        return this.getGeneSets().contains(id);
    }

    /**
     * @param set
     */
    public void setActiveProbes(Collection<String> set) {
        this.activeProbes = set;
        activeProbesDirty();
    }

    public boolean getFilterNonSpecific() {
        return filterNonSpecific;
    }

    public void setFilterNonSpecific(boolean filterNonSpecific) {
        this.filterNonSpecific = filterNonSpecific;
    }

}

class ClassSizeComparator implements Comparator<GeneSet> {

    /*
     * (non-Javadoc)
     * 
     * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
     */
    public int compare(GeneSet a, GeneSet b) {

        int sizea = a.size();
        int sizeb = b.size();

        if (sizea > sizeb) {
            return 1;
        } else if (sizeb < sizea) {
            return -1;
        }

        return 0;
    }
}

// used for the comparator.

class GeneSet {
    private Collection<String> items;
    private String name;

    public GeneSet(String name, Collection<String> items) {
        this.name = name;
        this.items = items;
    }

    /**
     * @return Returns the items.
     */
    public Collection<String> getItems() {
        return items;
    }

    /**
     * @return Returns the name.
     */
    public String getName() {
        return name;
    }

    /**
     * @param items The items to set.
     */
    public void setItems(Set<String> items) {
        this.items = items;
    }

    /**
     * @param name The name to set.
     */
    public void setName(String name) {
        this.name = name;
    }

    public int size() {
        return items.size();
    }
}