Java tutorial
/* * Copyright (C) 2012 Michigan State University <rdpstaff at msu.edu> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package edu.msu.cme.rdp.unifrac; import edu.msu.cme.rdp.multicompare.MCSample; import edu.msu.cme.rdp.multicompare.taxon.MCTaxon; import edu.msu.cme.rdp.taxatree.UnifracTree; import edu.msu.cme.rdp.taxatree.UnifracTree.UnifracResult; import edu.msu.cme.rdp.taxatree.utils.NewickTreeBuilder; import edu.msu.cme.rdp.taxatree.utils.NewickTreeBuilder.NewickTaxonFactory; import edu.msu.cme.rdp.unifrac.UnifracTreeBuilder.UnifracSample; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PrintStream; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Scanner; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.PosixParser; import org.apache.commons.cli.UnrecognizedOptionException; /** * * @author fishjord */ public class Unifrac { private static final DecimalFormat format = new DecimalFormat("0.0000"); private static final Options options = new Options(); static { options.addOption("t", "tree", true, "Newick tree file to preform unifrac calculations on, must specify a sample mapping file"); options.addOption("s", "sample-mapping", true, "Sample mapping file specifying what sample sequences are in"); options.addOption("f", "sequence-file", true, "Comma seperated list of sequence files"); options.addOption("u", "unweighted", false, "Perform unweighted unifrac calculation"); options.addOption("w", "weighted", false, "Perform weighted unifrac calculation"); options.addOption("S", "significance", false, "Compute significance with unifrac metric"); options.addOption("o", "outfile", true, "Write results to file instead of stdout"); } private static void printUsage() { new HelpFormatter().printHelp("Unifrac", options, true); } private static UnifracTree parseNewickTree(String treeFile, Map<String, UnifracSample> sampleMap) throws IOException { UnifracTree unifracTree = new UnifracTree(); NewickTreeBuilder builder = new NewickTreeBuilder(unifracTree, new FileInputStream(treeFile), new NewickTaxonFactory<MCTaxon>() { public MCTaxon buildTaxon(int taxid, String name, float distance) { return new UnifracTaxon(taxid, name, "", distance); } }); for (String seqName : sampleMap.keySet()) { UnifracSample sample = sampleMap.get(seqName); Integer taxid = builder.getTaxidByName(seqName); if (taxid == null) { System.err.println("Couldn't find " + seqName + " in the tree"); continue; } unifracTree.getChildTaxon(taxid).addSampleCount(sample.sample, sample.count); } unifracTree.refreshInnerTaxa(); return unifracTree; } public static void printResults(PrintStream out, UnifracResult unifracResult, String label) { List<MCSample> labels = new ArrayList(unifracResult.getSamples()); Collections.sort(labels, new Comparator<MCSample>() { public int compare(MCSample o1, MCSample o2) { return o1.getSampleName().compareTo(o2.getSampleName()); } }); out.println(label); for (int i = 0; i < labels.size(); i++) { for (int j = i + 1; j < labels.size(); j++) { int row = unifracResult.getSamples().indexOf(labels.get(i)); int col = unifracResult.getSamples().indexOf(labels.get(j)); out.println(labels.get(i) + "-" + labels.get(j) + "\t" + format.format(unifracResult.getUnifracMatrix()[row][col]) + " "); } } } private static Map<String, UnifracSample> readSampleMap(String sampleFile) throws IOException { Map<String, UnifracSample> ret = new HashMap(); Map<String, MCSample> sampleMap = new HashMap(); int lineno = 1; Scanner s = new Scanner(new File(sampleFile)).useDelimiter("\n"); while (s.hasNext()) { String line = s.next().trim(); if (line.equals("")) { continue; } String[] tokens = line.split("\\s+"); if (tokens.length < 2) { throw new IOException("Failed to parse sample mapping file (lineno=" + lineno + ")"); } String sampleName = tokens[1]; String seqName = tokens[0]; int sampleCount = 1; try { sampleCount = Integer.parseInt(tokens[2]); } catch (Exception e) { } if (!sampleMap.containsKey(sampleName)) sampleMap.put(sampleName, new MCSample(sampleName)); UnifracSample unifracSample = new UnifracSample(); unifracSample.sample = sampleMap.get(sampleName); unifracSample.count = sampleCount; ret.put(seqName, unifracSample); lineno++; } s.close(); return ret; } public static void main(String[] args) throws Exception { CommandLineParser parser = new PosixParser(); CommandLine line = null; try { line = parser.parse(options, args); } catch (UnrecognizedOptionException e) { System.err.println(e.getMessage()); printUsage(); return; } UnifracTree unifracTree = null; PrintStream out = System.out; if (line.hasOption("tree") && line.hasOption("sequence-files")) { printUsage(); } else if (!(line.hasOption("weighted") || line.hasOption("unweighted") || line.hasOption("significance"))) { System.err.println("Must specify at least one calculation option"); printUsage(); } else if (line.hasOption("sample-mapping")) { Map<String, UnifracSample> sampleMap = readSampleMap(line.getOptionValue("sample-mapping")); if (line.hasOption("tree")) { unifracTree = parseNewickTree(line.getOptionValue("tree"), sampleMap); } } else { if (!line.hasOption("sample-mapping")) { System.err.println("A sample mapping file must be provided"); } printUsage(); } if (line.hasOption("outfile")) { out = new PrintStream(line.getOptionValue("outfile")); } if (unifracTree != null) { if (line.hasOption("unweighted")) { printResults(out, unifracTree.computeUnifrac(), "Unweighted Unifrac"); if (line.hasOption("significance")) { printResults(out, unifracTree.computeUnifracSig(1000, false), "Unweighted Unifrac Significance"); } } if (line.hasOption("weighted")) { printResults(out, unifracTree.computeWeightedUnifrac(), "Weighted Unifrac"); if (line.hasOption("significance")) { printResults(out, unifracTree.computeUnifracSig(1000, true), "Weighted Unifrac Significance"); } } } } }