chibi.gemmaanalysis.LinkEvalCli.java Source code

Java tutorial

Introduction

Here is the source code for chibi.gemmaanalysis.LinkEvalCli.java

Source

/*
 * The Gemma project
 * 
 * Copyright (c) 2007 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package chibi.gemmaanalysis;

import cern.colt.list.DoubleArrayList;
import cern.jet.stat.Descriptive;

import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.StopWatch;

import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.dataStructure.matrix.SparseRaggedDoubleMatrix;
import ubic.basecode.math.RandomChooser;
import ubic.basecode.ontology.model.OntologyTerm;
import ubic.gemma.apps.GemmaCLI.CommandGroup;
import ubic.gemma.genome.gene.service.GeneService;
import ubic.gemma.genome.taxon.service.TaxonService;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.designElement.CompositeSequenceService;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.ontology.GoMetric;
import ubic.gemma.ontology.GoMetric.Metric;
import ubic.gemma.ontology.providers.GeneOntologyService;
import ubic.gemma.ontology.providers.GeneOntologyServiceImpl;
import ubic.gemma.ontology.providers.GeneOntologyServiceImpl.GOAspect;
import ubic.gemma.util.AbstractCLIContextCLI;
import ubic.gemma.util.Settings;

import java.io.*;
import java.util.*;

/**
 * @author meeta
 * @version $Id$
 */
public class LinkEvalCli extends AbstractCLIContextCLI {

    private static class GeneComparator implements Comparator<Gene> {

        @Override
        public int compare(Gene g1, Gene g2) {

            Long i1 = g1.getId();
            Long i2 = g2.getId();

            if (i1 > i2)
                return 1;
            else if (i1 < i2)
                return -1;

            return 0;
        }

    }

    @Override
    public CommandGroup getCommandGroup() {
        return CommandGroup.ANALYSIS;
    }

    private static class GenePair extends ArrayList<List<Gene>> implements Comparable<GenePair> {

        /**
         * 
         */
        private static final long serialVersionUID = 1L;

        public GenePair() {
            super(0);
            this.add(new ArrayList<Gene>());
            this.add(new ArrayList<Gene>());
        }

        public void addFirstGene(Gene g) {
            List<Gene> firstGenes = this.get(0);
            for (Gene firstGene : firstGenes) {
                if (firstGene.getId().equals(g.getId())) {
                    return;
                }
            }
            this.get(0).add(g);
        }

        public void addSecondGene(Gene g) {
            List<Gene> secondGenes = this.get(1);
            for (Gene secondGene : secondGenes) {
                if (secondGene.getId().equals(g.getId())) {
                    return;
                }
            }
            this.get(1).add(g);
        }

        /*
         * (non-Javadoc)
         * 
         * @see java.lang.Comparable#compareTo(java.lang.Object)
         */
        @Override
        public int compareTo(GenePair o2) {
            return this.getFirstGenes().iterator().next().getName()
                    .compareTo(o2.getFirstGenes().iterator().next().getName());
        }

        /*
         * (non-Javadoc)
         * 
         * @see java.lang.Object#equals(java.lang.Object)
         */
        @Override
        public boolean equals(Object obj) {
            if (this == obj)
                return true;
            if (obj == null)
                return false;
            if (getClass() != obj.getClass())
                return false;
            final GenePair other = (GenePair) obj;

            boolean eqOrdered = true;
            boolean eqAlternate = true;

            List<Gene> thisFirst = this.getFirstGenes();
            List<Gene> thisSecond = this.getSecondGenes();
            List<Gene> otherFirst = other.getFirstGenes();
            List<Gene> otherSecond = other.getSecondGenes();

            // Check if first and second genes are exactly the same in both pairs
            if (thisFirst.size() == otherFirst.size() && thisSecond.size() == otherSecond.size()) {
                for (int i = 0; i < thisFirst.size(); i++) {
                    String s1 = thisFirst.get(i).toString();
                    String s2 = otherFirst.get(i).toString();
                    if (!s1.equals(s2)) {
                        eqOrdered = false;
                    }
                }
                for (int i = 0; i < thisSecond.size(); i++) {
                    String s1 = thisSecond.get(i).toString();
                    String s2 = otherSecond.get(i).toString();
                    if (!s1.equals(s2)) {
                        eqOrdered = false;
                    }
                }
            } else {
                eqOrdered = false;
            }

            // Check if first is same as second in the other, and vice versa
            if (thisFirst.size() == otherSecond.size() && thisSecond.size() == otherFirst.size()) {
                for (int i = 0; i < thisFirst.size(); i++) {
                    String s1 = thisFirst.get(i).toString();
                    String s2 = otherSecond.get(i).toString();
                    if (!s1.equals(s2)) {
                        eqAlternate = false;
                    }
                }
                for (int i = 0; i < thisSecond.size(); i++) {
                    String s1 = thisSecond.get(i).toString();
                    String s2 = otherFirst.get(i).toString();
                    if (!s1.equals(s2)) {
                        eqAlternate = false;
                    }
                }
            } else {
                eqAlternate = false;
            }

            // if gene pairs are equal in the same order or equal in alternate order, they are equal
            if (eqOrdered || eqAlternate) {
                return true;
            }
            return false;

        }

        /**
         * @return the firstGenes
         */
        public List<Gene> getFirstGenes() {
            return this.get(0);
        }

        /**
         * @return the secondGenes
         */
        public List<Gene> getSecondGenes() {
            return this.get(1);
        }

        /*
         * (non-Javadoc)
         * 
         * @see java.lang.Object#hashCode()
         */
        @Override
        public int hashCode() {
            final int PRIME = 31;
            int result1 = 1;
            int result2 = 1;
            int result3 = 1;
            int result4 = 1;
            for (int i = 0; i < getFirstGenes().size(); i++) {
                Gene g = getFirstGenes().get(i);
                result1 = PRIME * result1 + ((g == null) ? 0 : g.hashCode());
            }
            for (int i = getFirstGenes().size() - 1; i >= 0; i--) {
                Gene g = getFirstGenes().get(i);
                result2 = PRIME * result2 + ((g == null) ? 0 : g.hashCode());
            }
            for (int i = 0; i < getSecondGenes().size(); i++) {
                Gene g = getSecondGenes().get(i);
                result3 = PRIME * result3 + ((g == null) ? 0 : g.hashCode());
            }
            for (int i = getSecondGenes().size() - 1; i >= 0; i--) {
                Gene g = getSecondGenes().get(i);
                result4 = PRIME * result4 + ((g == null) ? 0 : g.hashCode());
            }
            return result1 * result2 * result3 * result4;
        }

        @Override
        public String toString() {
            StringBuilder buf = new StringBuilder();
            Collections.sort(this.get(0), new GeneComparator());
            Collections.sort(this.get(1), new GeneComparator());
            for (Iterator<Gene> it = this.get(0).iterator(); it.hasNext();) {
                buf.append(it.next().getName());
                if (it.hasNext())
                    buf.append(",");
            }
            buf.append("\t");
            for (Iterator<Gene> it = this.get(1).iterator(); it.hasNext();) {
                buf.append(it.next().getName());
                if (it.hasNext())
                    buf.append(",");
            }
            return buf.toString();
        }

    }

    private static class ProbeComparator implements Comparator<CompositeSequence> {

        @Override
        public int compare(CompositeSequence o1, CompositeSequence o2) {

            Long g1 = o1.getId();
            Long g2 = o2.getId();

            if (g1 > g2)
                return 1;
            else if (g1 < g2)
                return -1;
            return 0;
        }

    }

    private static final String GENE_CACHE = "geneCache";

    private static final String GENE_GO_MAP_FILE = "GeneGOMap";

    private static final String GO_PROB_MAP = "GoProbMap";

    private static final String HOME_DIR = Settings.getString("gemma.appdata.home");

    private static final String RANDOM_SUBSET = "RandomSubset1K";

    private static Integer SET_SIZE = 0;

    private static final String VECTOR_MATRIX = "VectorMatrix";

    public static void main(String[] args) {
        LinkEvalCli p = new LinkEvalCli();
        try {
            Exception ex = p.doWork(args);
            if (ex != null) {
                ex.printStackTrace();
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private DoubleMatrix<Long, String> geneVectorMatrix = new SparseRaggedDoubleMatrix<Long, String>();

    private Map<String, Integer> GOcountMap = new HashMap<String, Integer>();

    private Map<String, Double> GOProbMap = new HashMap<String, Double>();

    private String adShortName = "";// holds inputted string indicating array design short name

    private ArrayDesign arrayDesign = null; // holds actual array design used

    private ArrayDesignService arrayDesignService;

    private String termsOutPath = null;

    private int cCount = 0;

    private String component = "http://purl.org/obo/owl/GO#GO_0005575";

    private CompositeSequenceService css;

    private int fCount = 0;

    private String file_path = "";

    private int firstProbeColumn = 0;

    private List<CompositeSequence> firstProbes;

    private String function = "http://purl.org/obo/owl/GO#GO_0003674";

    private Map<String, Gene> geneCache = new HashMap<String, Gene>();

    private Map<Long, Collection<String>> geneGoMap = new HashMap<Long, Collection<String>>();

    // A list of service beans
    private GeneService geneService;

    private Collection<Gene> goMappedGenes;

    private GoMetric goMetric;

    private boolean max = false;

    private final int MAX_GO_SIM = 200;

    private Metric metric = GoMetric.Metric.simple;

    private boolean noZeros = false;// true when gene pairs with genes containing no GO terms wish to be excluded

    private int numberOfRandomRuns = 1;

    private GeneOntologyService goService;

    private String outFile;

    private String outRandLinksFile;

    // INCLUDE PARTOF OR CHANGE STRINGENCY
    private boolean partOf = true;

    private int pCount = 0;

    private boolean printRandomLinks = false;

    private String probenames_file_path = "";

    private String process = "http://purl.org/obo/owl/GO#GO_0008150";

    // inputted

    private boolean randomFromArray = false;// true when selecting random pairs from array design is desired

    private boolean randomFromSubset = false;// true when selecting random probe pairs from a given file is desired

    private boolean randomFromTaxon = false;// true when selecting random gene pairs from a particular genome(human,
    // mouse, rat) is desired

    private Map<String, Integer> rootMap = new HashMap<String, Integer>();

    private int secondProbeColumn = 1;

    private List<CompositeSequence> secondProbes;

    private Map<CompositeSequence, Collection<Gene>> probemap;

    private boolean selectSubset = false;

    private int subsetLinks = 0;

    private Double subsetSize = 0.0;

    private boolean subsetUsed = false;

    private Taxon taxon;

    private TaxonService taxonService;

    private boolean weight = false;

    private GOAspect goAspectToUse = null;

    /**
     * @param f
     * @return
     */
    public Serializable getCacheFromDisk(File f) {

        if (!f.exists())
            return null;
        Serializable returnObject = null;

        try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(f));) {

            returnObject = (Serializable) ois.readObject();

            return returnObject;
        } catch (IOException | ClassNotFoundException e) {
            return null;
        }
    }

    /**
     * @param toSave
     * @param filename
     */
    public void saveCacheToDisk(Object toSave, String filename) {

        log.info("Generating file ... ");
        File f = new File(HOME_DIR + File.separatorChar + filename);
        if (f.exists()) {
            f.delete();
        }
        try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f));) {
            oos.writeObject(toSave);
            oos.flush();
        } catch (Throwable e) {
            log.error("Cannot write to file.");
            return;
        }
        log.info("Done making report.");
    }

    @Override
    @SuppressWarnings("static-access")
    protected void buildOptions() {
        Option goMetricOption = OptionBuilder.hasArg().withArgName("Choice of GO Metric")
                .withDescription("resnik, lin, jiang, percent, cosine, kappa; default = simple")
                .withLongOpt("metric").create('m');
        addOption(goMetricOption);

        Option maxOption = OptionBuilder.hasArg().withArgName("Choice of using MAX calculation")
                .withDescription("MAX").withLongOpt("max").create('x');
        addOption(maxOption);

        Option weightedOption = OptionBuilder.hasArg().withArgName("Choice of using weighted matrix")
                .withDescription("weight").withLongOpt("weight").create('w');
        addOption(weightedOption);

        Option dataOption = OptionBuilder.hasArg()
                .withArgName("Choice of generating random gene pairs OR Input data file")
                .withDescription("dataType").isRequired().create('d');
        addOption(dataOption);

        Option taxonOption = OptionBuilder.hasArg().withArgName("Choice of taxon")
                .withDescription("human, rat, mouse").isRequired().create('t');
        addOption(taxonOption);

        Option firstGeneOption = OptionBuilder.hasArg().withArgName("colindex")
                .withDescription("Column index with gene1 (starting from 0; default=0)").create("g1col");
        Option secondGeneOption = OptionBuilder.hasArg().withArgName("colindex")
                .withDescription("Column index with gene2 (starting from 0; default=1)").create("g2col");
        addOption(firstGeneOption);
        addOption(secondGeneOption);

        Option arrayDesignOption = OptionBuilder.hasArg().isRequired().withArgName("Array Design")
                .withDescription("Short Name of Platform").create("array");
        addOption(arrayDesignOption);

        Option numberOfRandomRunsOption = OptionBuilder.hasArg().withArgName("Number of Random Runs")
                .withDescription(
                        "Number of runs for random gene pair selection from array design (starting from 1; default = 1)")
                .create("runs");
        addOption(numberOfRandomRunsOption);

        Option outFileOption = OptionBuilder.hasArg().isRequired().withArgName("outFile")
                .withDescription("Write output to this file").create('o');
        addOption(outFileOption);

        Option noZeroGo = OptionBuilder.withDescription("Exclude genes with no GO terms").create("noZeroGo");
        addOption(noZeroGo);

        Option print = OptionBuilder.hasArg().withArgName("Output file for random links")
                .withDescription("Print out randomly chosen probe pairs in a separate output file").create("print");
        addOption(print);

        Option probenames = OptionBuilder.hasArg().withArgName("File containing subset of probe names")
                .withDescription(
                        "File containing subset of probe names associated with selected array design from which to choose random pairs")
                .create("probenames");
        addOption(probenames);

        Option subsetOption = OptionBuilder.hasArg()
                .withArgName("Approximate number of probe pairs desired to score from full set")
                .withDescription(
                        "Take random subset of probe pairs approximated to given argument from input file to score")
                .create("subset");
        addOption(subsetOption);

        Option aspectOption = OptionBuilder.hasArg().withArgName("aspect")
                .withDescription("Limit to mf, bp or cc. Default=use all three").create("aspect");
        addOption(aspectOption);

        Option outputGoAnnots = OptionBuilder.hasArg().withArgName("path")
                .withDescription("Also rint out the Gene-GO relationships in a tabbed format").create("termsout");
        addOption(outputGoAnnots);

    }

    /*
     * (non-Javadoc)
     * 
     * @see ubic.gemma.util.AbstractCLI#doWork(java.lang.String[])
     */
    @Override
    protected Exception doWork(String[] args) {

        Exception err = processCommandLine(args);
        if (err != null)
            return err;

        StringBuilder buf = new StringBuilder();
        buf.append("");

        this.arrayDesign = arrayDesignService.findByShortName(adShortName);
        this.arrayDesign = arrayDesignService.thawLite(this.arrayDesign);
        if (this.arrayDesign == null) {
            System.out.println("Array design " + adShortName + " not found");
            System.exit(0);
        }

        Collection<CompositeSequence> probes = null;

        if (randomFromSubset) {// subset desired from file containing probe NAME pairs
            if (probenames_file_path.equals("")) {
                System.out.println("Must enter a valid filepath for probe names file");
                System.exit(0);
            }
            try {
                File fl = new File(probenames_file_path);
                probes = getProbesFromSubset(fl);
            } catch (IOException e) {
                return e;
            }
        } else {
            probes = this.arrayDesign.getCompositeSequences();
        }

        this.populateGeneGoMapForTaxon();

        probemap = css.getGenes(probes);

        if (this.randomFromArray) { // randomly select links from array design

            if (numberOfRandomRuns > 1) {
                int[][] results = new int[numberOfRandomRuns][];

                for (int i = 0; i < numberOfRandomRuns; i++) {
                    Collection<GenePair> randomPairs = getRandomPairsFromProbes(SET_SIZE);
                    Map<GenePair, Double> scoreMap = scorePairs(randomPairs);
                    addResults(i, results, scoreMap);
                }

                summarizeResults(results);

                if (this.printRandomLinks) {
                    outputRandomLinks();
                }
            } else {// if only 1 run, no need to generate histogram data, produce regular format for GO scores
                Collection<GenePair> randomPairs = getRandomPairsFromProbes(SET_SIZE);
                Map<GenePair, Double> scoreMap = scorePairs(randomPairs);
                writeGoSimilarityResults(scoreMap);

                if (this.printRandomLinks) {
                    outputRandomLinks();
                }
            }

        } else {
            Collection<GenePair> genePairs = getLinks();

            Map<GenePair, Double> scoreMap = scorePairs(genePairs);

            writeGoSimilarityResults(scoreMap);
        }

        if (termsOutPath != null) {
            try {
                writeGoMap();
            } catch (IOException e) {
                return e;
            }

        }

        return null;

    }

    protected void initBeans() {
        taxonService = getBean(TaxonService.class);
        geneService = getBean(GeneService.class);
        goService = getBean(GeneOntologyService.class);
        goMetric = getBean(GoMetric.class);
        arrayDesignService = getBean(ArrayDesignService.class);
        css = getBean(CompositeSequenceService.class);

    }

    /**
     * Opens a file for writing and adds the header for histogram data for selecting random gene pairs from an array
     * design
     * 
     * @param fileName if Null, output will be written to standard output.
     * @throws IOException
     */
    protected Writer initHistFile(String fileName) throws IOException {

        Writer writer;
        if (StringUtils.isBlank(fileName)) {
            log.info("Output to stdout");
            writer = new PrintWriter(System.out);
        } else {

            // write into file
            log.info("Creating new link eval file " + fileName + " \n");

            File f = new File(fileName);

            if (f.exists()) {
                log.warn("Will overwrite existing file " + f);
                f.delete();
            }

            f.createNewFile();
            writer = new FileWriter(f);
        }

        writer.write("Overlap");
        for (int j = 0; j < numberOfRandomRuns; j++) {
            writer.write("\tRun_" + (j + 1));
        }
        // writer.write( buf.toString() );
        writer.write("\tMean\tSDev\tMeanF\tStdevF\n");

        return writer;
    }

    /**
     * Opens a file for writing anda adds the header.
     * 
     * @param fileName if Null, output will be written to standard output.
     * @throws IOException
     */
    protected Writer initOutputFile(String fileName) throws IOException {

        Writer writer;
        if (StringUtils.isBlank(fileName)) {
            log.info("Output to stdout");
            writer = new PrintWriter(System.out);
        } else {

            // write into file
            log.info("Creating new annotation file " + fileName + " \n");

            File f = new File(fileName);

            if (f.exists()) {
                log.warn("Will overwrite existing file " + f);
                f.delete();
            }

            f.createNewFile();
            writer = new FileWriter(f);
        }
        // writer.write( buf.toString() );
        if (this.subsetUsed) {
            writer.write("# scoresGenerated:" + subsetLinks + "\n");
        }

        writer.write("Gene1\tGene2\tScore\tG1GOTerms\tG2GOTerms\tOverlapTerms\n");

        return writer;
    }

    /**
     * Opens a file for writing scores for the randomly selected probe pairs
     * 
     * @param fileName if Null, output will be written to standard output.
     * @throws IOException
     */
    protected Writer initRandLinksFile(String fileName) throws IOException {

        Writer writer;
        if (StringUtils.isBlank(fileName)) {
            log.info("Output to stdout");
            writer = new PrintWriter(System.out);
        } else {

            // write into file
            log.info("Creating new annotation file " + fileName + " \n");

            File f = new File(fileName);

            if (f.exists()) {
                log.warn("Will overwrite existing file " + f);
                f.delete();
            }

            f.createNewFile();
            writer = new FileWriter(f);
        }

        return writer;
    }

    @Override
    protected void processOptions() {
        super.processOptions();
        initBeans();
        String commonName = getOptionValue('t');
        if (StringUtils.isBlank(commonName)) {
            System.out.println("MUST enter a valid taxon!");
            System.exit(0);
        }
        if (!StringUtils.equalsIgnoreCase("mouse", commonName) && !StringUtils.equalsIgnoreCase("rat", commonName)
                && !StringUtils.equalsIgnoreCase("human", commonName)) {
            System.out.println("MUST enter a valid taxon!");
            System.exit(0);
        }

        this.taxon = taxonService.findByCommonName(commonName);
        if (taxon == null) {
            System.out.println("Taxon " + commonName + " not found.");
            System.exit(0);
        }
        String input = getOptionValue('d');
        if (StringUtils.isBlank(input)) {
            System.out.println("You MUST enter a valid filename OR size of dataset");
            System.exit(0);
        }
        this.adShortName = getOptionValue("array");
        if (StringUtils.isBlank(this.adShortName)) {
            System.out.println("You MUST enter the shortname for the associated platform");
            System.exit(0);
        }
        if (StringUtils.isNumeric(input)) {// set size entered
            if (this.adShortName.equals("n/a")) {
                this.randomFromTaxon = true;
            } else {
                this.randomFromArray = true;
            }

            SET_SIZE = Integer.parseInt(input);
            if (SET_SIZE < 1) {
                System.out.println("Random set size must be a positive integer");
                System.exit(0);
            }
            System.out.println("Will create a set of " + SET_SIZE + " random gene pairs!");
            if (this.hasOption("runs")) {
                String runs = getOptionValue("runs");
                if (StringUtils.isNumeric(runs)) {
                    numberOfRandomRuns = Integer.parseInt(runs);
                    if (numberOfRandomRuns < 1) {
                        System.out.println("Number of random runs must be a positive integer");
                        System.exit(0);
                    }
                } else {
                    System.out.println("Number of random runs must be a numeric value");
                    System.exit(0);
                }
            }
        } else {// path to input file entered
            File f = new File(input);
            if (f.canRead()) {
                this.file_path = input;
                if (this.hasOption("subset")) {
                    String size = getOptionValue("subset");
                    if (StringUtils.isNumeric(size)) {
                        this.subsetSize = Double.parseDouble(size);
                        this.selectSubset = true;
                        log.info("Approximate subset of scores desired");
                    } else {
                        System.out.println("Subset size must be a numeric value");
                        System.exit(0);
                    }
                }
            } else {
                System.out.println(
                        input + "is NOT a valid filename! You MUST enter a valid filename OR size of dataset");
                System.exit(0);
            }
        }
        if (hasOption('x')) {
            String maxs = getOptionValue('x');
            if (maxs.equalsIgnoreCase("MAX"))
                this.max = true;
            else
                this.max = false;
        }

        if (hasOption('w')) {
            String weights = getOptionValue('w');
            if (weights.equalsIgnoreCase("weight"))
                this.weight = true;
            else
                this.weight = false;
        }

        if (hasOption("termsout")) {
            this.termsOutPath = getOptionValue("termsout");
            log.info("GO mapping will be saved as tabbed text to " + this.termsOutPath);
        }

        if (hasOption('m')) {
            String metricName = getOptionValue('m');
            if (metricName.equalsIgnoreCase("resnik"))
                this.metric = GoMetric.Metric.resnik;
            else if (metricName.equalsIgnoreCase("lin"))
                this.metric = GoMetric.Metric.lin;
            else if (metricName.equalsIgnoreCase("jiang"))
                this.metric = GoMetric.Metric.jiang;
            else if (metricName.equalsIgnoreCase("percent"))
                this.metric = GoMetric.Metric.percent;
            else if (metricName.equalsIgnoreCase("cosine"))
                this.metric = GoMetric.Metric.cosine;
            else if (metricName.equalsIgnoreCase("kappa"))
                this.metric = GoMetric.Metric.kappa;
            else {
                this.metric = GoMetric.Metric.simple;
                this.max = false;
            }
        }

        if (this.hasOption("g1col")) {
            this.firstProbeColumn = getIntegerOptionValue("g1col");
        }

        if (this.hasOption("g2col")) {
            this.secondProbeColumn = getIntegerOptionValue("g2col");
        }

        if (this.hasOption("noZeroGo")) {
            this.noZeros = true;
        }

        if (this.hasOption("print")) {
            this.printRandomLinks = true;
            this.outRandLinksFile = getOptionValue("print");
            if (StringUtils.isBlank(outRandLinksFile)) {
                System.out.println("You must enter an output file path for printing random probe pairs");
                System.exit(0);
            }
        }

        if (this.hasOption("probenames")) {
            this.randomFromSubset = true;
            this.probenames_file_path = getOptionValue("probenames");
        }

        if (this.hasOption("aspect")) {
            String aspect = getOptionValue("aspect");
            if (aspect.equals("mf")) {
                this.goAspectToUse = GOAspect.MOLECULAR_FUNCTION;
            } else if (aspect.equals("bp")) {
                this.goAspectToUse = GOAspect.BIOLOGICAL_PROCESS;
            } else if (aspect.equals("cc")) {
                this.goAspectToUse = GOAspect.CELLULAR_COMPONENT;

            } else {
                System.out.println("Aspect must be one of bp, mf or cc");
                System.exit(0);
            }
        }

        outFile = getOptionValue('o');
        initGO();
    }

    /**
     * @param writer
     * @param pairString
     * @param score e.g. overlap
     * @param goTerms - terms in the overlap
     * @param masterGOTerms number of go terms for the first gene
     * @param coExpGOTerms number of go terms for the second gene
     * @throws IOException
     */
    protected void writeOverlapLine(Writer writer, String pairString, double score,
            Collection<OntologyTerm> goTerms, int masterGOTerms, int coExpGOTerms) throws IOException {

        if (log.isDebugEnabled())
            log.debug("Generating line for annotation file \n");

        writer.write(pairString + "\t" + score + "\t" + masterGOTerms + "\t" + coExpGOTerms + "\t");

        if (goTerms == null || goTerms.isEmpty()) {
            writer.write("\n");
            writer.flush();
            return;
        }

        boolean wrote = false;

        for (OntologyTerm oe : goTerms) {
            if (oe == null)
                continue;
            if (wrote)
                writer.write("|" + GeneOntologyServiceImpl.asRegularGoId(oe));
            else
                writer.write(GeneOntologyServiceImpl.asRegularGoId(oe));
            wrote = true;
        }

        writer.write("\n");
        writer.flush();
    }

    /**
     * Assumes using TO
     * 
     * @param currentRunIndex
     * @param results
     * @param scoreMap
     */
    private void addResults(int currentRunIndex, int[][] results, Map<GenePair, Double> scoreMap) {
        results[currentRunIndex] = new int[MAX_GO_SIM];
        for (int i = 0; i < MAX_GO_SIM; i++) {
            results[currentRunIndex][i] = 0; // init.
        }

        for (Double s : scoreMap.values()) {
            int sim = (int) Math.floor(s.doubleValue());

            if (sim >= MAX_GO_SIM) {
                throw new IllegalArgumentException("sim was " + sim + "; increase MAX_GO_SIM");
            }

            results[currentRunIndex][sim]++;
        }

    }

    /**
     * @param genePair
     * @return
     */
    private boolean checkGenePair(GenePair genePair) {
        boolean isAKeeper = true;
        if (genePair == null) {
            return false;
        } else if (genePair.getFirstGenes().isEmpty() || genePair.getSecondGenes().isEmpty()) {
            return false;
        } else {// check if genes are equal
            List<Gene> first = genePair.getFirstGenes();
            List<Gene> second = genePair.getSecondGenes();
            for (int i = 0; i < first.size() && isAKeeper == true; i++) {
                String s1 = first.get(i).toString();
                for (int j = 0; j < second.size() && isAKeeper == true; j++) {
                    String s2 = second.get(j).toString();
                    if (s1.equals(s2)) {
                        isAKeeper = false;
                    }
                }
            }
        }
        return isAKeeper;
    }

    /**
     * 
     */
    @SuppressWarnings("unchecked")
    private void computeTermProbabilities() {
        File f2 = new File(HOME_DIR + File.separatorChar + GO_PROB_MAP);
        if (f2.exists()) {
            GOProbMap = (HashMap<String, Double>) getCacheFromDisk(f2);
            log.info("Found probability file!");
        }

        else {
            log.info("Calculating probabilities... ");

            GOcountMap = goMetric.getTermOccurrence(geneGoMap);
            makeRootMap(GOcountMap.keySet());
            GOProbMap = makeProbMap();

            this.saveCacheToDisk(GOProbMap, GO_PROB_MAP);
        }
    }

    @SuppressWarnings("unchecked")
    private void createGene2TermMatrix() {

        File f3 = new File(HOME_DIR + File.separatorChar + VECTOR_MATRIX);
        if (f3.exists()) {
            geneVectorMatrix = (DoubleMatrix<Long, String>) getCacheFromDisk(f3);
            log.info("Found vector matrix file!");
        }

        else {
            log.info("Creating sparse matrix... ");

            geneVectorMatrix = goMetric.createVectorMatrix(geneGoMap, weight);

            this.saveCacheToDisk(geneVectorMatrix, VECTOR_MATRIX);
        }

    }

    /**
     * @param taxon
     * @return
     */
    private String getGeneGOMapFileName() {
        return GENE_GO_MAP_FILE + "." + taxon.getCommonName();
    }

    /**
     * @param probemap
     * @return
     */
    private Collection<GenePair> getLinks() {
        Collection<GenePair> genePairs = new HashSet<GenePair>();

        if (randomFromTaxon) {
            genePairs = loadRandomPairs();
        } else if (StringUtils.isNotBlank(file_path)) {
            try {
                File linkFile = new File(file_path);
                genePairs = loadLinks(linkFile);
                log.info("Done loading data...");
            } catch (IOException e) {
                throw new RuntimeException(e);
            }

        } else {
            log.error("What should I do?");
            bail(ErrorCode.INVALID_OPTION);
        }
        return genePairs;
    }

    /**
     * @param merged1
     * @param merged2
     * @return
     */
    private Collection<OntologyTerm> getMergedTermOverlap(Set<String> merged1, Set<String> merged2) {

        Collection<OntologyTerm> overlapTerms = new HashSet<OntologyTerm>();

        if ((merged2 == null) || merged2.isEmpty())
            return null;

        if ((merged1 == null) || merged1.isEmpty())
            return null;

        for (String goTerm1 : merged1) {
            if (goTerm1.equalsIgnoreCase(process) || goTerm1.equalsIgnoreCase(function)
                    || goTerm1.equalsIgnoreCase(component))
                continue;
            for (String goTerm2 : merged2) {

                if (goTerm2.equalsIgnoreCase(process) || goTerm2.equalsIgnoreCase(function)
                        || goTerm2.equalsIgnoreCase(component))
                    continue;

                if (goTerm1.equalsIgnoreCase(goTerm2))
                    overlapTerms.add(GeneOntologyServiceImpl.getTermForURI(goTerm1));
            }
        }

        return overlapTerms;
    }

    /**
     * @param file containing probe names(not IDs) in first column
     * @return collection of composite sequences (probes) obtained from file
     */
    private Collection<CompositeSequence> getProbesFromSubset(File f) throws IOException {

        try (BufferedReader in = new BufferedReader(new FileReader(f));) {
            Collection<CompositeSequence> probeSubset = new HashSet<CompositeSequence>();

            String line;
            log.info("Loading probes from " + f);
            while ((line = in.readLine()) != null) {
                line = line.trim();
                if (line.startsWith("#")) {
                    continue;
                }

                String[] fields = StringUtils.split(line, "\t");

                String prb = fields[0];
                CompositeSequence cs = css.findByName(arrayDesign, prb);
                probeSubset.add(cs);
            }
            return probeSubset;
        }
    }

    /**
     * @param take a collection of genes and size of susbset
     * @return a collection of random gene pairs that have GO annotations
     */
    @SuppressWarnings("unused")
    private Collection<GenePair> getRandomPairs(int size, Collection<Gene> genes) {

        Collection<GenePair> subsetPairs = new HashSet<GenePair>();
        int i = 0;

        while (i < size) {
            List<Gene> twoGenes = new ArrayList<Gene>(RandomChooser.chooseRandomSubset(2, genes));

            if (twoGenes.size() != 2) {
                log.warn("A pair consists of two objects. More than two is unacceptable. Fix it!!");
            }
            Collections.sort(twoGenes, new GeneComparator());

            GenePair genePair = new GenePair();

            Gene g1 = twoGenes.get(0);
            Gene g2 = twoGenes.get(1);

            genePair.addFirstGene(g1);
            genePair.addSecondGene(g2);

            // Collections.sort( genePair );

            if (subsetPairs.contains(genePair))
                continue;

            Iterator<Gene> iterator = twoGenes.iterator();
            boolean noGoTerms = false;
            while (iterator.hasNext()) {
                if (!geneGoMap.containsKey(iterator.next().getId())) {
                    noGoTerms = true;
                    break;
                }
            }

            if (noGoTerms)
                continue;
            subsetPairs.add(genePair);
            log.info("Added pair to subset!");
            i++;
        }

        return subsetPairs;
    }

    /**
     * @param take a collection of probes and size of susbset
     * @return a collection of random gene pairs that may or may not have GO annotations
     */
    private Collection<GenePair> getRandomPairsFromProbes(int size) {

        Collection<GenePair> subsetPairs = new HashSet<GenePair>();

        firstProbes = new ArrayList<CompositeSequence>();
        secondProbes = new ArrayList<CompositeSequence>();
        List<CompositeSequence> probes = new ArrayList<CompositeSequence>(probemap.keySet());
        Collections.sort(probes, new ProbeComparator());

        while (subsetPairs.size() < size) {

            List<CompositeSequence> twoProbes = new ArrayList<CompositeSequence>(
                    RandomChooser.chooseRandomSubset(2, probes));

            if (twoProbes.size() != 2) {
                log.debug("A pair consists of two objects. More than two is unacceptable. Fix it!!");
            }

            // Collections.sort( twoProbes, new ProbeComparator() );

            Collection<Gene> firstGenes = probemap.get(twoProbes.get(0));
            Collection<Gene> secondGenes = probemap.get(twoProbes.get(1));

            GenePair genePair = makeGenePair(firstGenes, secondGenes);

            boolean isAKeeper = checkGenePair(genePair);

            if (!isAKeeper) {
                continue;
            }

            subsetPairs.add(genePair);
            log.debug("Added pair to subset!");

            firstProbes.add(twoProbes.get(0));
            secondProbes.add(twoProbes.get(1));
        }

        return subsetPairs;
    }

    /**
     * @param g
     * @param coexpG
     * @return
     */
    private Collection<OntologyTerm> getTermOverlap(Gene g, Gene coexpG) {

        Collection<String> masterGO = geneGoMap.get(g.getId());
        Collection<String> coExpGO = geneGoMap.get(coexpG.getId());
        Collection<OntologyTerm> overlapTerms = new HashSet<OntologyTerm>();

        if ((coExpGO == null) || coExpGO.isEmpty())
            return null;

        if ((masterGO == null) || masterGO.isEmpty())
            return null;

        for (String ontologyEntry : masterGO) {
            if (ontologyEntry.equalsIgnoreCase(process) || ontologyEntry.equalsIgnoreCase(function)
                    || ontologyEntry.equalsIgnoreCase(component))
                continue;
            for (String ontologyEntryC : coExpGO) {

                if (ontologyEntryC.equalsIgnoreCase(process) || ontologyEntryC.equalsIgnoreCase(function)
                        || ontologyEntryC.equalsIgnoreCase(component))
                    continue;

                if (ontologyEntry.equalsIgnoreCase(ontologyEntryC))
                    overlapTerms.add(GeneOntologyServiceImpl.getTermForURI(ontologyEntry));
            }
        }

        return overlapTerms;
    }

    /**
     * 
     */
    private void initGO() {
        /*
         * Initialize the Gene Ontology.
         */
        this.goService.init(true);

        while (!goService.isReady()) {
            log.info("waiting for ontology load..");
            try {
                Thread.sleep(10000);
                if (!goService.isRunning())
                    break;
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }

        if (!goService.isReady()) {
            throw new RuntimeException("Gene Ontology was not loaded successfully.");
        }
    }

    private Collection<GenePair> loadLinks(File f) throws IOException {

        log.info("Loading data from " + f);
        try (BufferedReader in = new BufferedReader(new FileReader(f));) {

            Collection<GenePair> geneMap = new HashSet<GenePair>();
            String line;
            Double printedLinks = -1.0;
            Random generator = new Random();
            double rand = 0.0;
            double fraction = 0.0;
            boolean alreadyWarned = false;

            while ((line = in.readLine()) != null) {
                line = line.trim();
                if (line.startsWith("#")) {
                    if (line.contains("printedLinks")) {
                        int ind = line.indexOf(':');
                        printedLinks = Double.parseDouble(line.substring(ind + 1));
                        fraction = this.subsetSize / printedLinks;
                    }
                    continue;
                }
                if (printedLinks == -1.0) {
                    System.out.println("Printed link count not found in file header");
                    System.exit(0);
                }
                if (selectSubset && printedLinks > this.subsetSize) {
                    this.subsetUsed = true;
                    rand = generator.nextDouble();
                    if (rand > fraction)
                        continue;
                }
                String[] fields = StringUtils.split(line, "\t");

                if (fields.length < 2) {
                    if (!alreadyWarned) {
                        log.warn("Bad field on line: " + line + " (subsequent errors suppressed)");
                        alreadyWarned = true;
                    }
                    continue;
                }

                String g1 = fields[firstProbeColumn];
                String g2 = fields[secondProbeColumn];

                /*
                 * Use the probe field, get the gene mapping from the probemap
                 */
                CompositeSequence cs1 = css.load(Long.parseLong(g1));
                CompositeSequence cs2 = css.load(Long.parseLong(g2));

                Collection<Gene> genes1 = probemap.get(cs1);
                Collection<Gene> genes2 = probemap.get(cs2);

                GenePair genePair = null;

                if (genes1 == null) {
                    log.warn("No genes found for probe ID " + g1 + " in array design");
                } else if (genes2 == null) {
                    log.warn("No genes found for probe ID " + g2 + " in array design");
                } else {
                    genePair = makeGenePair(genes1, genes2);
                }

                if (!this.checkGenePair(genePair)) {
                    continue;
                }

                geneMap.add(genePair);

                if (geneMap.size() % 50000 == 0) {
                    log.info("Loaded " + geneMap.size() + " links");
                }
                // compute the median of gooverlaps and do something with the
                // result.

            }
            log.info("Loaded " + geneMap.size() + " links");
            saveCacheToDisk(geneCache, GENE_CACHE);
            return geneMap;
        }
    }

    /**
     * @return
     */
    private Collection<GenePair> loadRandomPairs() {
        Collection<GenePair> randomPairs = null;

        try {
            File f3 = new File(HOME_DIR + File.separatorChar + RANDOM_SUBSET);
            if (f3.exists()) {
                randomPairs = loadLinks(f3);
                log.info("Found cached subset file!");
            }

            else {
                populateGeneGoMapForTaxon();

                assert !this.goMappedGenes.isEmpty();

                // randomPairs = getRandomPairs( SET_SIZE, this.goMappedGenes );

                // Writer w = new FileWriter( f3 );
                // this.writeLinks( randomPairs, w );
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

        return randomPairs;
    }

    /**
     * @param firstGenes
     * @param secondGenes
     * @return
     */
    private GenePair makeGenePair(Collection<Gene> firstGenes, Collection<Gene> secondGenes) {
        GenePair genePair = new GenePair();
        for (Gene firstGene : firstGenes) {
            if (noZeros
                    && (geneGoMap.get(firstGene.getId()) == null || geneGoMap.get(firstGene.getId()).isEmpty())) {// exclude
                // genes
                // with
                // zero
                // GO
                // terms
                // if
                // desired
                continue;
            }

            genePair.addFirstGene(firstGene);

        }

        for (Gene secondGene : secondGenes) {
            if (noZeros
                    && (geneGoMap.get(secondGene.getId()) == null || geneGoMap.get(secondGene.getId()).isEmpty())) {// exclude
                // genes
                // with
                // zero
                // GO
                // terms
                // if
                // desired
                continue;
            }

            genePair.addSecondGene(secondGene);

        }

        return genePair;
    }

    private Map<String, Double> makeProbMap() {

        for (String uri : GOcountMap.keySet()) {
            int total = 0;
            log.info("Counting children for " + uri);
            int count = goMetric.getChildrenOccurrence(GOcountMap, uri);
            if (rootMap.get(uri) == 1)
                total = pCount;
            if (rootMap.get(uri) == 2)
                total = fCount;
            if (rootMap.get(uri) == 3)
                total = cCount;

            GOProbMap.put(uri, (double) count / total);
        }

        return GOProbMap;
    }

    /**
     * @param take a collection of GOTerm URIs
     * @return Identify the root of each term and put it in the rootMap
     */
    private void makeRootMap(Collection<String> terms) {

        Collection<String> remove = new HashSet<String>();

        for (String t : terms) {
            Collection<OntologyTerm> parents = goService.getAllParents(GeneOntologyServiceImpl.getTermForURI(t),
                    partOf);

            for (OntologyTerm p : parents) {
                if (p.getUri().equalsIgnoreCase(process)) {
                    rootMap.put(t, 1);
                    pCount += GOcountMap.get(t);
                    break;
                }
                if (p.getUri().equalsIgnoreCase(function)) {
                    rootMap.put(t, 2);
                    fCount += GOcountMap.get(t);
                    break;
                }
                if (p.getUri().equalsIgnoreCase(component)) {
                    rootMap.put(t, 3);
                    cCount += GOcountMap.get(t);
                    break;
                }
            }
            if (!(rootMap.containsKey(t))) {
                log.warn("Couldn't get root for term: " + t);
                remove.add(t);
            }
        }

        for (String s : remove) {
            GOcountMap.remove(s);
        }
    }

    /**
     * Organizes a list of genes in to different bins, identified by gene name, with each bin containing duplicates
     * found of different genes with the same gene name.
     * 
     * @param genes list of genes to be organized
     * @return a map of organized genes with the value being the collection of duplicate genes and the key being the
     *         common gene name among them
     */
    private Map<String, List<Gene>> organizeDuplicates(List<Gene> genes) {
        Map<String, List<Gene>> organizedGeneBins = new HashMap<String, List<Gene>>();
        String tempName = "";
        Set<String> uniqueGeneNames = new HashSet<String>();
        for (Gene tempGene : genes) {
            tempName = tempGene.getName();
            if (uniqueGeneNames.contains(tempName)) {// gene already encountered; duplicate
                List<Gene> tempGeneBin = organizedGeneBins.get(tempName);
                tempGeneBin.add(tempGene);
                organizedGeneBins.put(tempName, tempGeneBin);
            } else {
                uniqueGeneNames.add(tempName);
                List<Gene> tempGeneBin = new ArrayList<Gene>();
                tempGeneBin.add(tempGene);
                organizedGeneBins.put(tempName, tempGeneBin);
            }
        }
        return organizedGeneBins;
    }

    private void outputRandomLinks() {
        assert firstProbes.size() == secondProbes.size() : "yikes";
        try (Writer w = initRandLinksFile(outRandLinksFile);) {

            for (int i = 0; i < firstProbes.size(); i++) {
                w.write(firstProbes.get(i).getId() + "\t" + secondProbes.get(i).getId() + "\t\n");
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * @param genes
     */
    private void populateGeneGoMap(Collection<Gene> genes) {
        if (genes.isEmpty()) {
            throw new IllegalArgumentException("No genes!");
        }
        this.goMappedGenes = genes;
        int hadTerms = 0;
        for (Gene gene : genes) {
            Collection<OntologyTerm> GOTerms = goService.getGOTerms(gene, partOf, goAspectToUse);

            if (GOTerms == null || GOTerms.isEmpty())
                continue;
            if (log.isDebugEnabled())
                log.debug("Got go terms for " + gene.getName());

            hadTerms++;

            Collection<String> termString = new HashSet<String>();
            for (OntologyTerm oe : GOTerms) {
                termString.add(oe.getUri());
            }
            geneGoMap.put(gene.getId(), termString);
        }

        log.info(hadTerms + " genes had GO terms");

        if (hadTerms == 0) {
            throw new IllegalStateException("NO genes had go terms");
        }
    }

    /**
     * 
     */
    @SuppressWarnings("unchecked")
    private void populateGeneGoMapForTaxon() {

        File f = new File(HOME_DIR + File.separatorChar + getGeneGOMapFileName());
        if (!f.exists()) {
            log.info("Reloading GO mapping");
            rebuildTaxonGeneGOMap();
        } else {
            log.info("Loading GO mapping from disk cache");
            this.geneGoMap = (Map<Long, Collection<String>>) getCacheFromDisk(f);
        }

        if (metric.equals(GoMetric.Metric.resnik) || metric.equals(GoMetric.Metric.lin)
                || metric.equals(GoMetric.Metric.jiang)) {
            log.info("Computing term probabilities for all GOterms ...");
            computeTermProbabilities();
        } else if (metric.equals(GoMetric.Metric.cosine) || metric.equals(GoMetric.Metric.kappa)) {
            createGene2TermMatrix();
        }

    }

    /**
     * @param taxon
     */
    private void rebuildTaxonGeneGOMap() {
        Collection<Gene> genes = geneService.loadAll(this.taxon);
        populateGeneGoMap(genes);
        saveCacheToDisk(geneGoMap, getGeneGOMapFileName());
    }

    /**
     * @param genePairs
     * @return
     */
    private Map<GenePair, Double> scorePairs(Collection<GenePair> genePairs) {
        Map<GenePair, Double> scoreMap = new HashMap<GenePair, Double>();
        log.info(genePairs.size() + " pairs to score");

        for (GenePair pair : genePairs) {

            List<Gene> genes1 = pair.getFirstGenes();
            List<Gene> genes2 = pair.getSecondGenes();

            List<Double> scores = new ArrayList<Double>();

            if (metric.equals(GoMetric.Metric.simple)) {
                Map<String, List<Gene>> orgGenes1 = organizeDuplicates(genes1);
                Map<String, List<Gene>> orgGenes2 = organizeDuplicates(genes2);

                // calculate scores
                for (String geneName1 : orgGenes1.keySet()) {
                    List<Gene> sameGenes1 = orgGenes1.get(geneName1);
                    for (String geneName2 : orgGenes2.keySet()) {
                        List<Gene> sameGenes2 = orgGenes2.get(geneName2);
                        if (sameGenes1.get(0).getId() == sameGenes2.get(0).getId())
                            continue;
                        double mergedScore = 0.0;
                        mergedScore = goMetric.computeMergedOverlap(sameGenes1, sameGenes2, geneGoMap,
                                goAspectToUse);
                        scores.add(mergedScore);
                    }
                }

            } else {

                if (this.goAspectToUse != null) {
                    throw new IllegalArgumentException(
                            "Sorry, you can't use go aspect filtering with that metric - supported for overlap only");
                }

                for (Gene g1 : genes1) {
                    for (Gene g2 : genes2) {
                        if (g1.getId().equals(g2.getId()))
                            continue;
                        double score = 0.0;

                        if (metric.equals(GoMetric.Metric.cosine) || metric.equals(GoMetric.Metric.kappa)) {
                            score = goMetric.computeMatrixSimilarity(g1, g2, geneVectorMatrix, metric);
                        } else if (max) {
                            log.info("getting MAX scores for " + metric);
                            score = goMetric.computeMaxSimilarity(g1, g2, GOProbMap, metric);
                        }
                        scores.add(score);
                    }
                }
            }

            DoubleArrayList dlist = new DoubleArrayList();
            dlist.addAllOf(scores);

            double median = Descriptive.median(dlist);
            log.debug("Adding pair " + pair + " with score " + median);
            scoreMap.put(pair, median);
        }
        return scoreMap;
    }

    private void summarizeResults(int[][] results) {
        try (Writer w = initHistFile(outFile);) {

            for (int i = 0; i < MAX_GO_SIM; i++) {
                double[] valsForBin = new double[numberOfRandomRuns];
                double[] freqForBin = new double[numberOfRandomRuns];
                w.write(i + "");
                for (int j = 0; j < numberOfRandomRuns; j++) {
                    valsForBin[j] = results[j][i];
                    freqForBin[j] = results[j][i] / (double) SET_SIZE;
                    w.write("\t" + results[j][i]);
                }

                DoubleArrayList valL = new DoubleArrayList(valsForBin);
                DoubleArrayList valFL = new DoubleArrayList(freqForBin);

                double mean = Descriptive.mean(valL);
                double meanF = Descriptive.mean(valFL);

                double stdev = 0.0;
                double stdevF = 0.0;

                if (numberOfRandomRuns > 1) {
                    stdev = Math.sqrt(Descriptive.sampleVariance(valL, mean));
                    stdevF = Math.sqrt(Descriptive.sampleVariance(valFL, meanF));
                }

                w.write("\t" + mean + "\t" + stdev + "\t" + meanF + "\t" + stdevF + "\n");
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

    }

    /**
     * Write the GO annotations in a tabbed format.
     */
    private void writeGoMap() throws IOException {

        try (Writer w = new FileWriter(new File(this.termsOutPath));) {
            w.write("# Go terms for probes used in linkeval\n");
            w.write("ProbeId\tProbeName\tGene\tGoTermCount\tGoTerms\n");
            Set<String> seenProbes = new HashSet<String>();
            for (CompositeSequence cs : this.probemap.keySet()) {

                if (seenProbes.contains(cs.getName())) {
                    log.info("Skipping duplicate probe name " + cs.getName());
                    continue;
                }
                seenProbes.add(cs.getName());

                w.write(cs.getId() + "\t" + cs.getName() + "\t");
                Collection<Gene> genes = this.probemap.get(cs);
                Set<String> goTerms = new HashSet<String>();
                Set<String> geneSymbs = new HashSet<String>();
                for (Gene gene : genes) {
                    if (geneGoMap.containsKey(gene.getId())) {
                        for (String go : geneGoMap.get(gene.getId())) {
                            goTerms.add(GeneOntologyServiceImpl.asRegularGoId(go));
                        }
                    }
                    geneSymbs.add(gene.getOfficialSymbol());
                }

                w.write(StringUtils.join(geneSymbs, "|") + "\t");
                w.write(goTerms.size() + "\t");
                w.write(StringUtils.join(goTerms, "|") + "\n");
            }

        }
    }

    /**
     * @param scoreMap
     * @param overallWatch
     */
    private void writeGoSimilarityResults(Map<GenePair, Double> scoreMap) {
        StopWatch overallWatch = new StopWatch();
        overallWatch.start();
        subsetLinks = scoreMap.size();
        try (Writer write = initOutputFile(outFile);) {
            for (GenePair pair : scoreMap.keySet()) {
                List<Gene> firstGenes = pair.getFirstGenes();
                List<Gene> secondGenes = pair.getSecondGenes();

                String pairString = pair.toString();
                Double score = scoreMap.get(pair);
                if (score == null) {
                    log.warn("Score is null for " + pair);
                    continue;
                }
                int masterGOTerms = 0;
                int coExpGOTerms = 0;
                Collection<OntologyTerm> overlappingGoTerms = null;
                if (firstGenes.size() == 1 && secondGenes.size() == 1) {
                    Gene mGene = firstGenes.iterator().next();
                    Gene cGene = secondGenes.iterator().next();

                    if (!geneGoMap.containsKey(mGene.getId()))
                        masterGOTerms = 0;
                    else
                        masterGOTerms = (geneGoMap.get(mGene.getId())).size();

                    if (!geneGoMap.containsKey(cGene.getId()))
                        coExpGOTerms = 0;
                    else
                        coExpGOTerms = (geneGoMap.get(cGene.getId())).size();

                    overlappingGoTerms = getTermOverlap(mGene, cGene);

                } else {
                    Map<String, List<Gene>> orgGenes1 = organizeDuplicates(firstGenes);
                    Map<String, List<Gene>> orgGenes2 = organizeDuplicates(secondGenes);

                    GenePair uniquePair = new GenePair();
                    List<Set<String>> mergedGoTerms1 = new ArrayList<Set<String>>();
                    List<Set<String>> mergedGoTerms2 = new ArrayList<Set<String>>();
                    for (String geneName1 : orgGenes1.keySet()) {
                        List<Gene> tempGenes = orgGenes1.get(geneName1);
                        uniquePair.addFirstGene(tempGenes.get(0));
                        Set<String> uniqGoTerms = new HashSet<String>();
                        for (Gene g1 : tempGenes) {
                            if (geneGoMap.containsKey(g1.getId())) {
                                uniqGoTerms.addAll(geneGoMap.get(g1.getId()));
                            }
                        }
                        mergedGoTerms1.add(uniqGoTerms);
                    }
                    for (String geneName2 : orgGenes2.keySet()) {
                        List<Gene> tempGenes = orgGenes2.get(geneName2);
                        uniquePair.addSecondGene(tempGenes.get(0));
                        Set<String> uniqGoTerms = new HashSet<String>();
                        for (Gene g2 : tempGenes) {
                            if (geneGoMap.containsKey(g2.getId())) {
                                uniqGoTerms.addAll(geneGoMap.get(g2.getId()));
                            }
                        }
                        mergedGoTerms2.add(uniqGoTerms);
                    }
                    pairString = uniquePair.toString();
                    if (orgGenes1.size() == 1 && orgGenes2.size() == 1) {
                        masterGOTerms = mergedGoTerms1.get(0).size();
                        coExpGOTerms = mergedGoTerms2.get(0).size();
                        overlappingGoTerms = getMergedTermOverlap(mergedGoTerms1.get(0), mergedGoTerms2.get(0));
                    }
                }
                writeOverlapLine(write, pairString, score, overlappingGoTerms, masterGOTerms, coExpGOTerms);
            }
            write.flush();
            overallWatch.stop();
            log.info("Compute GoOverlap takes " + overallWatch.getTime() + "ms");

            // printResults(masterTermCountMap);
        } catch (IOException ioe) {
            log.error("Couldn't write to file: " + ioe);

        }
    }

    /* (non-Javadoc)
     * @see ubic.gemma.util.AbstractCLI#getCommandName()
     */
    @Override
    public String getCommandName() {
        // TODO Auto-generated method stub
        return null;
    }

}