edu.msu.cme.rdp.unifrac.Unifrac.java Source code

Java tutorial

Introduction

Here is the source code for edu.msu.cme.rdp.unifrac.Unifrac.java

Source

/*
 * Copyright (C) 2012 Michigan State University <rdpstaff at msu.edu>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package edu.msu.cme.rdp.unifrac;

import edu.msu.cme.rdp.multicompare.MCSample;
import edu.msu.cme.rdp.multicompare.taxon.MCTaxon;
import edu.msu.cme.rdp.taxatree.UnifracTree;
import edu.msu.cme.rdp.taxatree.UnifracTree.UnifracResult;
import edu.msu.cme.rdp.taxatree.utils.NewickTreeBuilder;
import edu.msu.cme.rdp.taxatree.utils.NewickTreeBuilder.NewickTaxonFactory;
import edu.msu.cme.rdp.unifrac.UnifracTreeBuilder.UnifracSample;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.cli.UnrecognizedOptionException;

/**
 *
 * @author fishjord
 */
public class Unifrac {

    private static final DecimalFormat format = new DecimalFormat("0.0000");
    private static final Options options = new Options();

    static {
        options.addOption("t", "tree", true,
                "Newick tree file to preform unifrac calculations on, must specify a sample mapping file");
        options.addOption("s", "sample-mapping", true,
                "Sample mapping file specifying what sample sequences are in");
        options.addOption("f", "sequence-file", true, "Comma seperated list of sequence files");
        options.addOption("u", "unweighted", false, "Perform unweighted unifrac calculation");
        options.addOption("w", "weighted", false, "Perform weighted unifrac calculation");
        options.addOption("S", "significance", false, "Compute significance with unifrac metric");
        options.addOption("o", "outfile", true, "Write results to file instead of stdout");
    }

    private static void printUsage() {
        new HelpFormatter().printHelp("Unifrac", options, true);
    }

    private static UnifracTree parseNewickTree(String treeFile, Map<String, UnifracSample> sampleMap)
            throws IOException {
        UnifracTree unifracTree = new UnifracTree();

        NewickTreeBuilder builder = new NewickTreeBuilder(unifracTree, new FileInputStream(treeFile),
                new NewickTaxonFactory<MCTaxon>() {

                    public MCTaxon buildTaxon(int taxid, String name, float distance) {
                        return new UnifracTaxon(taxid, name, "", distance);
                    }
                });

        for (String seqName : sampleMap.keySet()) {
            UnifracSample sample = sampleMap.get(seqName);
            Integer taxid = builder.getTaxidByName(seqName);
            if (taxid == null) {
                System.err.println("Couldn't find " + seqName + " in the tree");
                continue;
            }

            unifracTree.getChildTaxon(taxid).addSampleCount(sample.sample, sample.count);
        }

        unifracTree.refreshInnerTaxa();

        return unifracTree;
    }

    public static void printResults(PrintStream out, UnifracResult unifracResult, String label) {

        List<MCSample> labels = new ArrayList(unifracResult.getSamples());
        Collections.sort(labels, new Comparator<MCSample>() {

            public int compare(MCSample o1, MCSample o2) {
                return o1.getSampleName().compareTo(o2.getSampleName());
            }
        });

        out.println(label);
        for (int i = 0; i < labels.size(); i++) {
            for (int j = i + 1; j < labels.size(); j++) {
                int row = unifracResult.getSamples().indexOf(labels.get(i));
                int col = unifracResult.getSamples().indexOf(labels.get(j));
                out.println(labels.get(i) + "-" + labels.get(j) + "\t"
                        + format.format(unifracResult.getUnifracMatrix()[row][col]) + "  ");
            }
        }
    }

    private static Map<String, UnifracSample> readSampleMap(String sampleFile) throws IOException {
        Map<String, UnifracSample> ret = new HashMap();
        Map<String, MCSample> sampleMap = new HashMap();

        int lineno = 1;
        Scanner s = new Scanner(new File(sampleFile)).useDelimiter("\n");
        while (s.hasNext()) {
            String line = s.next().trim();
            if (line.equals("")) {
                continue;
            }

            String[] tokens = line.split("\\s+");
            if (tokens.length < 2) {
                throw new IOException("Failed to parse sample mapping file (lineno=" + lineno + ")");
            }

            String sampleName = tokens[1];
            String seqName = tokens[0];
            int sampleCount = 1;

            try {
                sampleCount = Integer.parseInt(tokens[2]);
            } catch (Exception e) {
            }

            if (!sampleMap.containsKey(sampleName))
                sampleMap.put(sampleName, new MCSample(sampleName));

            UnifracSample unifracSample = new UnifracSample();
            unifracSample.sample = sampleMap.get(sampleName);
            unifracSample.count = sampleCount;

            ret.put(seqName, unifracSample);

            lineno++;
        }
        s.close();

        return ret;
    }

    public static void main(String[] args) throws Exception {
        CommandLineParser parser = new PosixParser();
        CommandLine line = null;
        try {
            line = parser.parse(options, args);
        } catch (UnrecognizedOptionException e) {
            System.err.println(e.getMessage());
            printUsage();
            return;
        }
        UnifracTree unifracTree = null;
        PrintStream out = System.out;

        if (line.hasOption("tree") && line.hasOption("sequence-files")) {
            printUsage();
        } else if (!(line.hasOption("weighted") || line.hasOption("unweighted")
                || line.hasOption("significance"))) {
            System.err.println("Must specify at least one calculation option");
            printUsage();
        } else if (line.hasOption("sample-mapping")) {
            Map<String, UnifracSample> sampleMap = readSampleMap(line.getOptionValue("sample-mapping"));

            if (line.hasOption("tree")) {
                unifracTree = parseNewickTree(line.getOptionValue("tree"), sampleMap);
            }
        } else {
            if (!line.hasOption("sample-mapping")) {
                System.err.println("A sample mapping file must be provided");
            }
            printUsage();
        }

        if (line.hasOption("outfile")) {
            out = new PrintStream(line.getOptionValue("outfile"));
        }

        if (unifracTree != null) {
            if (line.hasOption("unweighted")) {
                printResults(out, unifracTree.computeUnifrac(), "Unweighted Unifrac");
                if (line.hasOption("significance")) {
                    printResults(out, unifracTree.computeUnifracSig(1000, false),
                            "Unweighted Unifrac Significance");
                }
            }

            if (line.hasOption("weighted")) {
                printResults(out, unifracTree.computeWeightedUnifrac(), "Weighted Unifrac");
                if (line.hasOption("significance")) {
                    printResults(out, unifracTree.computeUnifracSig(1000, true), "Weighted Unifrac Significance");
                }
            }
        }
    }
}