lineage.LineageEngine.java Source code

Java tutorial

Introduction

Here is the source code for lineage.LineageEngine.java

Source

/*
 * Program LICHeE for multi-sample cancer phylogeny reconstruction
 * by Victoria Popic (viq@stanford.edu) 2014
 *
 * MIT License
 *
 * Copyright (c) 2014 Victoria Popic.
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
*/

package lineage;

import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.logging.ConsoleHandler;
import java.util.logging.Formatter;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

import lineage.AAFClusterer.Cluster;
import lineage.AAFClusterer.ClusteringAlgorithms;
import lineage.Parameters.Format;
import lineage.PHYTree;

/**
 * Main cell lineage reconstruction pipeline
 * @autor viq
 */
public class LineageEngine {
    protected static final Logger logger = Logger.getLogger("lineage.engine");

    /**
     * The main pipeline for reconstructing the cell lineage trees
     */
    public static void buildLineage(Args args) {

        // 1. load SNV data
        SNVDataStore db = new SNVDataStore(args.inputFileName, args.clustersFileName, args.normalSampleId);

        // 2. get the SNVs partitioned by group tag and create the appropriate SNV group objects
        HashMap<String, ArrayList<SNVEntry>> snvsByTag = db.getSomaticGroups();
        ArrayList<SNVGroup> groups = new ArrayList<SNVGroup>();
        for (String groupTag : snvsByTag.keySet()) {
            groups.add(new SNVGroup(groupTag, snvsByTag.get(groupTag), db.isRobustGroup(groupTag)));
        }
        if (groups.size() == 0) {
            logger.warning("All SNV groups have been filtered out.");
            return;
        }

        // 3. cluster SNVs in each group
        AAFClusterer clusterer = new AAFClusterer();
        for (SNVGroup group : groups) {
            if (args.clustersFileName == null) {
                Cluster[] clusters = clusterer.clusterSubPopulations(group, ClusteringAlgorithms.EM, 1);
                logger.fine("Clustering results for group: " + group.getTag());
                for (Cluster c : clusters) {
                    logger.fine(c.toString());
                }
                group.setSubPopulations(clusters);
            } else {
                ArrayList<Cluster> groupClusters = db.getClusters().get(group.getTag());
                group.subPopulations = new Cluster[groupClusters.size()];
                group.subPopulations = groupClusters.toArray(group.subPopulations);
            }
        }

        // 4. construct the constraint network
        PHYNetwork constrNetwork = new PHYNetwork(groups, db.getNumSamples());
        logger.fine(constrNetwork.toString());

        // 5. find all the lineage trees that pass the VAF constraints
        ArrayList<PHYTree> spanningTrees = constrNetwork.getLineageTrees();
        logger.info("Found " + spanningTrees.size() + " valid tree(s)");

        if (spanningTrees.size() == 0) {
            logger.info("Adjusting the network...");
            // if no valid trees were found, fix the network 
            // remove group nodes that are not robust, complete edges
            int delta = 0;
            do {
                int numNodes = constrNetwork.numNodes;
                constrNetwork = constrNetwork.fixNetwork();
                spanningTrees = constrNetwork.getLineageTrees();
                delta = numNodes - constrNetwork.numNodes;
            } while ((delta != 0) && (spanningTrees.size() <= 0));
            if (spanningTrees.size() <= 0) {
                Parameters.ALL_EDGES = true;
                constrNetwork = new PHYNetwork(groups, db.getNumSamples());
                spanningTrees = constrNetwork.getLineageTrees();
            }
            logger.info("Found " + spanningTrees.size() + " valid trees after network adjustments");
        }

        // 6. evaluate/rank the trees
        if (spanningTrees.size() > 0) {
            constrNetwork.evaluateLineageTrees();
            logger.fine("Top tree\nError score: " + spanningTrees.get(0).getErrorScore());
            logger.fine(spanningTrees.get(0).toString());
        }

        // 7. result visualization
        if (args.showNetwork) {
            constrNetwork.displayNetwork();
        }
        if (spanningTrees.size() > 0) {
            for (int i = 0; i < args.numShow; i++) {
                if (spanningTrees.size() > i) {
                    constrNetwork.displayTree(spanningTrees.get(i), db.getSampleNames(), null, null);
                } else {
                    break;
                }
            }
            // 8. persistent storage
            if (args.numSave > 0) {
                writeTreesToTxtFile(constrNetwork, spanningTrees, db.getSampleNames(), args);
            }
        }
    }

    ///// I/O /////

    private static void writeTreesToTxtFile(PHYNetwork net, ArrayList<PHYTree> trees, ArrayList<String> sampleNames,
            Args args) {
        String treeFileName = args.outputFileName;
        try {
            FileWriter fw = new FileWriter(treeFileName);
            fw.write("Nodes:\n" + net.getNodesWithMembersAsString() + "\n");
            for (int i = 0; i < args.numSave; i++) {
                if (trees.size() > i) {
                    fw.write("****Tree " + i + "****\n");
                    String edges = trees.get(i).toString();
                    fw.write(edges);
                    fw.write("Error score: " + trees.get(i).getErrorScore() + "\n\n");
                    fw.write("Sample decomposition: \n");
                    String lineage = "";
                    for (int j = 0; j < sampleNames.size(); j++) {
                        lineage += trees.get(i).getLineage(j, sampleNames.get(j));
                        lineage += "\n";
                    }
                    fw.write(lineage);
                    fw.write("\n");
                }
            }
            fw.write("SNV info:\n" + net.getNodeMembersOnlyAsString() + "\n");
            fw.close();
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("Failed to write to the file: " + treeFileName);
            System.exit(-1);
        }
    }

    // ---- LAUNCH ----

    private static final String TREES_TXT_FILE_EXTENSION = ".trees.txt";

    public static void main(String[] args) {
        Options options = new Options();
        // Commands
        options.addOption("build", false, "Construct the sample lineage trees");

        // Input/Output/Display
        options.addOption("i", true, "Input file path [required]");
        options.addOption("o", true, "Output file path (default: input file with suffix .trees.txt)");
        options.addOption("cp", false, "Input data represents cell prevalaence (CP) values");
        options.addOption("sampleProfile", false,
                "Input file contains the SSNV sample presence-absence profile (this will disable the default SSNV calling step)");
        options.addOption("n", "normal", true,
                "Normal sample column id in the list of samples, 0-based (e.g 0 is the first column) [required without -sampleProfile]");
        options.addOption("clustersFile", true, "SSNV clusters file path");
        options.addOption("s", "save", true, "Maximum number of output trees to save (default: 1)");
        options.addOption("showNetwork", "net", false, "Display the constraint network");
        options.addOption("showTree", "tree", true, "Number of top-ranking trees to display (default: 0)");

        // SSNV filtering / calling
        options.addOption("maxVAFAbsent", "absent", true,
                "Maximum VAF to robustly consider an SSNV as absent from a sample [required without -sampleProfile]");
        options.addOption("minVAFPresent", "present", true,
                "Minimum VAF to robustly consider an SSNV as present in a sample [required without -sampleProfile]");
        options.addOption("maxVAFValid", true, "Maximum allowed VAF in a sample (default: 0.6)");
        options.addOption("minProfileSupport", true,
                "Minimum number of robust SSNVs required for a group presence-absence profile to be labeled robust (default: 2)");

        // Network Construction / Tree Search
        options.addOption("minClusterSize", true,
                "Minimum size a cluster must have to be a considered a node in the network (default: 2)");
        options.addOption("minPrivateClusterSize", true,
                "Minimum size a private mutation cluster must have to be a considered a node in the network (default: 1)");
        options.addOption("minRobustNodeSupport", true,
                "Minimum number of robust SSNVs required for a node to be labeled robust during tree search: non-robust nodes can be removed from the network when no valid lineage trees are found (default: 2)");
        options.addOption("maxClusterDist", true,
                "Maximum mean VAF difference up to which two clusters can be collapsed (default: 0.2)");
        options.addOption("c", "completeNetwork", false,
                "Add all possible edges to the constraint network (default: private nodes are connected only to closest level parents; only nodes with no other parents are descendants of root)");
        options.addOption("e", true, "VAF error margin (default: 0.1)");
        options.addOption("nTreeQPCheck", true,
                "Number of top-ranking trees on which the QP consistency check is run, we have not seen this check fail in practice (default: 0, for best performance)");

        options.addOption("v", "verbose", false, "Verbose mode");
        options.addOption("h", "help", false, "Print usage");

        // display order
        ArrayList<Option> optionsList = new ArrayList<Option>();
        optionsList.add(options.getOption("build"));

        optionsList.add(options.getOption("i"));
        optionsList.add(options.getOption("o"));
        optionsList.add(options.getOption("cp"));
        optionsList.add(options.getOption("sampleProfile"));
        optionsList.add(options.getOption("n"));
        optionsList.add(options.getOption("clustersFile"));
        optionsList.add(options.getOption("s"));
        optionsList.add(options.getOption("net"));
        optionsList.add(options.getOption("tree"));
        optionsList.add(options.getOption("maxVAFAbsent"));
        optionsList.add(options.getOption("minVAFPresent"));
        optionsList.add(options.getOption("maxVAFValid"));
        optionsList.add(options.getOption("minProfileSupport"));
        optionsList.add(options.getOption("minClusterSize"));
        optionsList.add(options.getOption("minPrivateClusterSize"));
        optionsList.add(options.getOption("minRobustNodeSupport"));
        optionsList.add(options.getOption("maxClusterDist"));
        optionsList.add(options.getOption("c"));
        optionsList.add(options.getOption("e"));
        optionsList.add(options.getOption("nTreeQPCheck"));
        optionsList.add(options.getOption("v"));
        optionsList.add(options.getOption("h"));

        CommandLineParser parser = new BasicParser();
        CommandLine cmdLine = null;
        HelpFormatter hf = new HelpFormatter();
        hf.setOptionComparator(new OptionComarator<Option>(optionsList));
        try {
            cmdLine = parser.parse(options, args);
        } catch (ParseException e) {
            System.out.println(e.getMessage());
            hf.printHelp("lichee", options);
            System.exit(-1);
        }

        // Set-up input args
        Args params = new Args();
        if (cmdLine.hasOption("i")) {
            params.inputFileName = cmdLine.getOptionValue("i");
        } else {
            System.out.println("Required parameter: input file path [-i]");
            hf.printHelp("lichee", options);
            System.exit(-1);
        }
        if (cmdLine.hasOption("o")) {
            params.outputFileName = cmdLine.getOptionValue("o");
        } else {
            params.outputFileName = params.inputFileName + TREES_TXT_FILE_EXTENSION;
        }
        if (cmdLine.hasOption("clustersFile")) {
            params.clustersFileName = cmdLine.getOptionValue("clustersFile");
        }
        if (cmdLine.hasOption("sampleProfile")) {
            Parameters.INPUT_FORMAT = Format.SNV_WITH_PROFILE;
        }

        if (cmdLine.hasOption("n")) {
            params.normalSampleId = Integer.parseInt(cmdLine.getOptionValue("n"));
        } else if (!cmdLine.hasOption("sampleProfile")) {
            System.out.println("Required parameter: normal sample id [-n]");
            hf.printHelp("lichee", options);
            System.exit(-1);
        }
        if (cmdLine.hasOption("showTree")) {
            params.numShow = Integer.parseInt(cmdLine.getOptionValue("showTree"));
        }
        if (cmdLine.hasOption("showNetwork")) {
            params.showNetwork = true;
        }
        if (cmdLine.hasOption("s")) {
            params.numSave = Integer.parseInt(cmdLine.getOptionValue("s"));
        }

        if (cmdLine.hasOption("maxVAFAbsent")) {
            Parameters.MAX_VAF_ABSENT = Double.parseDouble(cmdLine.getOptionValue("maxVAFAbsent"));
        } else if (!cmdLine.hasOption("sampleProfile")) {
            System.out.println("Required parameter: -maxVAFAbsent");
            hf.printHelp("lichee", options);
            System.exit(-1);
        }
        if (cmdLine.hasOption("minVAFPresent")) {
            Parameters.MIN_VAF_PRESENT = Double.parseDouble(cmdLine.getOptionValue("minVAFPresent"));
        } else if (!cmdLine.hasOption("sampleProfile")) {
            System.out.println("Required parameter: -minVAFPresent");
            hf.printHelp("lichee", options);
            System.exit(-1);
        }
        if (cmdLine.hasOption("maxVAFValid")) {
            Parameters.MAX_ALLOWED_VAF = Double.parseDouble(cmdLine.getOptionValue("maxVAFValid"));
        }
        if (cmdLine.hasOption("minProfileSupport")) {
            Parameters.MIN_GROUP_PROFILE_SUPPORT = Integer.parseInt(cmdLine.getOptionValue("minProfileSupport"));
        }
        if (cmdLine.hasOption("minClusterSize")) {
            Parameters.MIN_CLUSTER_SIZE = Integer.parseInt(cmdLine.getOptionValue("minClusterSize"));
        }
        if (cmdLine.hasOption("minPrivateClusterSize")) {
            Parameters.MIN_PRIVATE_CLUSTER_SIZE = Integer.parseInt(cmdLine.getOptionValue("minPrivateClusterSize"));
        }
        if (cmdLine.hasOption("minRobustNodeSupport")) {
            Parameters.MIN_ROBUST_CLUSTER_SUPPORT = Integer
                    .parseInt(cmdLine.getOptionValue("minRobustNodeSupport"));
        }
        if (cmdLine.hasOption("maxClusterDist")) {
            Parameters.MAX_COLLAPSE_CLUSTER_DIFF = Double.parseDouble(cmdLine.getOptionValue("maxClusterDist"));
        }
        if (cmdLine.hasOption("c")) {
            Parameters.ALL_EDGES = true;
        }
        if (cmdLine.hasOption("cp")) {
            Parameters.CP = true;
            Parameters.VAF_MAX = 1.0;
            Parameters.MAX_ALLOWED_VAF = 1.0;
        }
        if (cmdLine.hasOption("e")) {
            Parameters.VAF_ERROR_MARGIN = Double.parseDouble(cmdLine.getOptionValue("e"));
        }
        if (cmdLine.hasOption("nTreeQPCheck")) {
            Parameters.NUM_TREES_FOR_CONSISTENCY_CHECK = Integer.parseInt(cmdLine.getOptionValue("nTreeQPCheck"));
        }
        if (cmdLine.hasOption("h")) {
            new HelpFormatter().printHelp(" ", options);
        }
        // logger
        ConsoleHandler h = new ConsoleHandler();
        h.setFormatter(new LogFormatter());
        h.setLevel(Level.INFO);
        logger.setLevel(Level.INFO);
        if (cmdLine.hasOption("v")) {
            h.setLevel(Level.FINEST);
            logger.setLevel(Level.FINEST);
        }
        logger.addHandler(h);
        logger.setUseParentHandlers(false);

        if (cmdLine.hasOption("build")) {
            buildLineage(params);

        } else {
            new HelpFormatter().printHelp("lichee", options);
            System.exit(-1);
        }
    }

    protected static class Args {
        // --- 'build' command ---
        String inputFileName;
        String outputFileName;
        String clustersFileName;
        int normalSampleId = 0;
        String cnvFileName;
        String annFileName;
        String cosmicFileName;
        String tcgaFileName;

        // --- 'show' command ---
        String showFileNamePrefix;
        int numShow = 0;
        int numSave = 1;

        // flags
        boolean showNetwork = false;
        boolean verbose = false;
    }

    protected static class LogFormatter extends Formatter {
        public String format(LogRecord rec) {
            return rec.getMessage() + "\r\n";
        }
    }

    protected static class OptionComarator<T extends Option> implements Comparator<T> {
        protected ArrayList<Option> orderedOptions;

        public OptionComarator(ArrayList<Option> options) {
            orderedOptions = options;
        }

        public int compare(T o1, T o2) {
            return orderedOptions.indexOf(o1) - orderedOptions.indexOf(o2);
        }
    }
}