Java tutorial
/* * Copyright (c) 2009-2013 David Soergel <dev@davidsoergel.com> * Licensed under the Apache License, Version 2.0 * http://www.apache.org/licenses/LICENSE-2.0 */ package com.davidsoergel.trees; import com.davidsoergel.dsutils.collections.ConcurrentHashWeightedSet; import com.davidsoergel.dsutils.collections.DSCollectionUtils; import com.davidsoergel.dsutils.collections.MutableWeightedSet; import com.davidsoergel.stats.ContinuousDistribution1D; import com.google.common.collect.Multiset; import org.apache.commons.lang.NotImplementedException; import org.apache.log4j.Logger; import org.jetbrains.annotations.NotNull; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; /** * Abstract implementation of the RootedPhylogeny interface, providing all required functionality that is not * implementation-specific. * * @author <a href="mailto:dev@davidsoergel.com">David Soergel</a> * @version $Id: AbstractRootedPhylogeny.java 354 2009-10-19 19:01:44Z soergel $ */ public abstract class AbstractRootedPhylogeny<T extends Serializable> implements RootedPhylogeny<T> { private static final Logger logger = Logger.getLogger(AbstractRootedPhylogeny.class); protected transient RootedPhylogeny<T> basePhylogeny = null; /** * {@inheritDoc} */ @NotNull public T commonAncestor(Collection<T> knownMergeIds) throws NoSuchNodeException { return commonAncestor(knownMergeIds, 1.0); } public RootedPhylogeny<T> asRootedPhylogeny() { return this; } private String name; public String getName() { return name; } public void setName(final String name) { this.name = name; } /** * {@inheritDoc} */ @NotNull public T commonAncestor(Collection<T> knownMergeIds, double proportion) throws NoSuchNodeException { Set<List<PhylogenyNode<T>>> theDisposableAncestorLists = new HashSet<List<PhylogenyNode<T>>>(); for (T id : knownMergeIds) { try { PhylogenyNode<T> node = getNode(id); theDisposableAncestorLists.add(new ArrayList<PhylogenyNode<T>>(node.getAncestorPath())); } catch (NoSuchNodeException e) { logger.debug("Node not found with id " + id + " when looking for common ancestor; ignoring"); } } int numberThatMustAgree = (int) Math.ceil(theDisposableAncestorLists.size() * proportion); PhylogenyNode<T> commonAncestor = null; try { while (true) { commonAncestor = DSCollectionUtils.getDominantFirstElement(theDisposableAncestorLists, numberThatMustAgree); // throws NoSuchElementException theDisposableAncestorLists = DSCollectionUtils .filterByAndRemoveFirstElement(theDisposableAncestorLists, commonAncestor); } } catch (NoSuchElementException e) { // good, broke the loop, leaving commonAncestor and theAncestorLists in the most recent valid state } /* while (DSCollectionUtils.allFirstElementsEqual(theAncestorLists)) { commonAncestor = DSCollectionUtils.removeAllFirstElements(theAncestorLists); } */ if (commonAncestor == null) { throw new NoSuchNodeException("Nodes have no common ancestor"); //return null; } return commonAncestor.getPayload(); } /** * {@inheritDoc} */ @NotNull public T commonAncestor(T nameA, T nameB) throws NoSuchNodeException { //commonAncestorCache.get(nameA, nameB) PhylogenyNode<T> a = getNode(nameA); PhylogenyNode<T> b = getNode(nameB); return commonAncestor(a, b).getPayload(); } /** * {@inheritDoc} */ @NotNull public PhylogenyNode<T> commonAncestor(@NotNull PhylogenyNode<T> a, @NotNull PhylogenyNode<T> b) throws NoSuchNodeException { List<PhylogenyNode<T>> ancestorsA = new LinkedList<PhylogenyNode<T>>(a.getAncestorPath()); List<PhylogenyNode<T>> ancestorsB = new LinkedList<PhylogenyNode<T>>(b.getAncestorPath()); PhylogenyNode<T> commonAncestor = null; while (ancestorsA.size() > 0 && ancestorsB.size() > 0 && ancestorsA.get(0).equals(ancestorsB.get(0))) { commonAncestor = ancestorsA.remove(0); ancestorsB.remove(0); } if (commonAncestor == null) { throw new NoSuchNodeException("Nodes have no common ancestor"); } return commonAncestor; } public boolean isDescendant(T ancestor, T descendant) { try { //** depends on PhylogenyNode.equals() working right. Does it? List<T> ancestorPath = getAncestorPathIds(descendant); if (ancestorPath == null) { return false; } return ancestorPath.contains(ancestor); // lame //return ancestor.equals(commonAncestor(ancestor, descendant)); } catch (NoSuchNodeException e) { return false; } } public Set<T> selectAncestors(final Collection<T> labels, final T id) { try { List<T> ancestorPath = getAncestorPathIds(id); return DSCollectionUtils.intersectionSet(ancestorPath, labels); } catch (NoSuchNodeException e) { return new HashSet<T>(); } } public boolean isDescendant(PhylogenyNode<T> ancestor, PhylogenyNode<T> descendant) { try { return ancestor.equals(commonAncestor(ancestor, descendant)); } catch (NoSuchNodeException e) { return false; } } /** * {@inheritDoc} */ /* @NotNull public RootedPhylogeny<T> extractTreeWithLeafIDs(Collection<T> ids) throws NoSuchNodeException { return extractTreeWithLeafIDs(ids, false, false); }*/ /** * {@inheritDoc} */ @NotNull public BasicRootedPhylogeny<T> extractTreeWithLeafIDs(Set<T> ids, boolean ignoreAbsentNodes, boolean includeInternalBranches) throws NoSuchNodeException //, NodeNamer<T> namer { return extractTreeWithLeafIDs(ids, ignoreAbsentNodes, includeInternalBranches, MutualExclusionResolutionMode.EXCEPTION); } /** * {@inheritDoc} */ @NotNull public BasicRootedPhylogeny<T> extractTreeWithLeafIDs(Set<T> ids, boolean ignoreAbsentNodes, boolean includeInternalBranches, MutualExclusionResolutionMode mode) throws NoSuchNodeException //, NodeNamer<T> namer { /*try { if (getLeafValues().equals(ids) && includeInternalBranches) { return this; } } catch (TreeRuntimeException e) { // the actual tree is expensive to load (e.g. NcbiTaxonomyService) so getLeafValues is a bad idea // OK, just do the explicit extraction anyway then } */ /* List<PhylogenyNode<T>> theLeaves = idsToLeaves(ids, ignoreAbsentNodes); if (theLeaves.isEmpty()) { throw new NoSuchNodeException("No leaves found for ids: " + ids); } RootedPhylogeny<T> result = extractTreeWithLeaves(theLeaves, includeInternalBranches, mode); */ Set<List<? extends PhylogenyNode<T>>> theDisposableLeafPaths = idsToDisposableBasicLeafPaths(ids, ignoreAbsentNodes); if (theDisposableLeafPaths.isEmpty()) { throw new NoSuchNodeException("No leaves found for ids: " + ids); } BasicRootedPhylogeny<T> result = extractTreeWithLeafPaths(theDisposableLeafPaths, includeInternalBranches, mode); Collection<T> gotLeaves = result.getLeafValues(); Collection<T> gotNodes = result.getNodeValues(); // all the leaves that were found were leaves that were requested assert ids.containsAll(gotLeaves); // BAD confusing interaction between all three parameters //if (includeInternalBranches && !ignoreAbsentNodes) //(mode == MutualExclusionResolutionMode.LEAF || mode == MutualExclusionResolutionMode.BOTH)) if (!ignoreAbsentNodes) { // some requested leaves may turn out to be internal nodes, but at least they should all be accounted for assert gotNodes.containsAll(ids); } /* if (!ignoreAbsentNodes) { // any requested leaves that turned out to be internal nodes should have had a phantom leaf added assert gotLeaves.containsAll(ids); } */ return result; } /* private List<PhylogenyNode<T>> idsToLeaves(Set<T> ids, boolean ignoreAbsentNodes) throws NoSuchNodeException { // don't use HashSet, to avoid calling hashcode since that requires a transaction //Set<PhylogenyNode<T>> theLeaves = new HashSet<PhylogenyNode<T>>(); List<PhylogenyNode<T>> theLeaves = new ArrayList<PhylogenyNode<T>>(); for (T id : ids) { try { PhylogenyNode<T> n = getNode(id); theLeaves.add(n); } catch (NoSuchNodeException e) { if (!ignoreAbsentNodes) { throw new NoSuchNodeException("Can't extract tree; requested node " + id + " not found"); } } } return theLeaves; }*/ /** * @param ids * @param ignoreAbsentNodes * @return * @throws NoSuchNodeException */ protected Set<List<? extends PhylogenyNode<T>>> idsToDisposableBasicLeafPaths(Set<T> ids, boolean ignoreAbsentNodes) throws NoSuchNodeException { // don't use HashSet, to avoid calling hashcode since that requires a transaction //Set<PhylogenyNode<T>> theLeaves = new HashSet<PhylogenyNode<T>>(); Set<List<? extends PhylogenyNode<T>>> theLeafPaths = new HashSet<List<? extends PhylogenyNode<T>>>(); for (T id : ids) { try { List<? extends PhylogenyNode<T>> safecopy = new ArrayList<PhylogenyNode<T>>( getAncestorPathAsBasic(id)); theLeafPaths.add(safecopy); } catch (NoSuchNodeException e) { if (!ignoreAbsentNodes) { throw new NoSuchNodeException("Can't extract tree; requested node " + id + " not found"); } } } return theLeafPaths; } @NotNull public BasicRootedPhylogeny<T> extractTreeWithLeaves(Collection<? extends PhylogenyNode<T>> leaves, boolean includeInternalBranches, MutualExclusionResolutionMode mode) //, NodeNamer<T> namer) { // leaves must be unique, so we really wanted a Set, but we had to use a List in idsToLeaves above to avoid calling hashcode. // Still, the ids were in a Set to begin with, so uniqueness should be guaranteed anyway. // assert DSCollectionUtils.setOfLastElements(leaves).size() == leaves.size(); // we're going to destroy the ancestorlists in the process of extracting the tree, so make copies first final Set<List<? extends PhylogenyNode<T>>> theDisposableAncestorLists = new HashSet<List<? extends PhylogenyNode<T>>>( leaves.size()); for (final PhylogenyNode<T> leaf : leaves) { theDisposableAncestorLists.add(new ArrayList<PhylogenyNode<T>>(leaf.getAncestorPath())); } return extractTreeWithLeafPaths(theDisposableAncestorLists, includeInternalBranches, mode); } @NotNull public BasicRootedPhylogeny<T> extractTreeWithLeafPaths( final Set<List<? extends PhylogenyNode<T>>> theDisposableAncestorLists, final boolean includeInternalBranches, final MutualExclusionResolutionMode mode) { BasicPhylogenyNode<T> commonAncestor = null; try { commonAncestor = extractSubtreeWithLeafPaths(theDisposableAncestorLists, includeInternalBranches, mode); //, namer); } catch (NoSuchNodeException e) { logger.error("Error", e); throw new TreeRuntimeException(e); } // always use the same root, even if it has only one child BasicRootedPhylogeny<T> newTree = new BasicRootedPhylogeny<T>(this.getPayload()); if (!commonAncestor.getPayload().equals(this.getPayload())) { // add a single branch descending from the root to the common ancestor //** this will have a length of zero, I think, but that's OK ?? commonAncestor.setParent(newTree.getRoot()); //newRoot = new BasicPhylogenyNode<T>(newRoot, commonAncestor.getValue(), commonAncestor.getLength()); } else { newTree.setRoot(commonAncestor); } newTree.assignUniqueIds(new RequireExistingNodeNamer<T>(true)); newTree.setBasePhylogeny(this); // assert newTree.getNodes().containsAll(leaves); // assert CollectionUtils.isEqualCollection(newTree.getLeaves(),leaves); return newTree; } /* private void deepCopy(PhylogenyNode<T> from, BasicPhylogenyNode<T> to) { for (PhylogenyNode<T> fromChild : from.getChildren()) { BasicPhylogenyNode<T> toChild = new BasicPhylogenyNode<T>(to, fromChild);// may produce ClassCastException deepCopy(fromChild, toChild); //child.setParent(newRoot); } } */ @NotNull public List<T> getAncestorPathIds(final T id) throws NoSuchNodeException { return getNode(id).getAncestorPathPayloads(); } public List<? extends PhylogenyNode<T>> getAncestorPath(final T id) throws NoSuchNodeException { return getNode(id).getAncestorPath(); } // the computingMap thing doesn't work right over a Hessian proxy; just do it manually Map<PhylogenyNode<T>, BasicPhylogenyNode<T>> convertedNodes = new HashMap<PhylogenyNode<T>, BasicPhylogenyNode<T>>(); /* public final ConcurrentMap<PhylogenyNode<T>, BasicPhylogenyNode<T>> convertedNodes = new MapMaker().makeComputingMap(new Function<PhylogenyNode<T>, BasicPhylogenyNode<T>>() { public BasicPhylogenyNode<T> apply(final PhylogenyNode<T> origNode) { BasicPhylogenyNode<T> convertedNode; if (origNode instanceof BasicPhylogenyNode) { convertedNode = (BasicPhylogenyNode<T>) origNode; } else { BasicPhylogenyNode<T> parent = convertedNodes.get(origNode.getParent()); convertedNode = new BasicPhylogenyNode<T>(parent, origNode); } return convertedNode; } }); */ private BasicPhylogenyNode<T> convertToBasic(final PhylogenyNode<T> origNode) { BasicPhylogenyNode<T> convertedNode = convertedNodes.get(origNode); if (convertedNode == null) { if (origNode instanceof BasicPhylogenyNode) { convertedNode = (BasicPhylogenyNode<T>) origNode; } else { BasicPhylogenyNode<T> parent = convertToBasic(origNode.getParent()); convertedNode = new BasicPhylogenyNode<T>(parent, origNode); } convertedNodes.put(origNode, convertedNode); } return convertedNode; } @NotNull public List<BasicPhylogenyNode<T>> getAncestorPathAsBasic(final T id) throws NoSuchNodeException { PhylogenyNode<T> n = getNode(id); BasicPhylogenyNode<T> convertedNode = convertToBasic(n); return Collections.unmodifiableList(convertedNode.getAncestorPath()); /* List<? extends PhylogenyNode<T>> orig = getNode(id).getAncestorPath(); ArrayList<BasicPhylogenyNode<T>> result = new ArrayList<BasicPhylogenyNode<T>>(); BasicPhylogenyNode<T> parent = null; for (PhylogenyNode<T> origNode : orig) { BasicPhylogenyNode<T> convertedNode; if (origNode instanceof BasicPhylogenyNode) { convertedNode = (BasicPhylogenyNode<T>) origNode; } else { convertedNode = new BasicPhylogenyNode<T>(parent, origNode); } result.add(convertedNode); parent = convertedNode; } return Collections.unmodifiableList(result);*/ } /** * When we request extraction of a tree with a bunch of nodes, and one of those nodes is an ancestor of the other, do * we include only the leaf, only the ancestor, both, or throw an exception? BOTHNOBRANCHLENGTH is a compromise * between ANCESTOR and BOTH; both nodes are provided, but all branch lengths below the ancestor are set to zero. */ public enum MutualExclusionResolutionMode { LEAF, ANCESTOR, BOTH, EXCEPTION //BOTHNOBRANCHLENGTH, } /** * Builds a fresh tree containing all of the requested leaves, which are the last elements in the provided * AncestorLists. Each AncestorList describes the path from the root to one of the leaves. The roots (the first * element of each list) must be equal; a copy of that root provides the root of the newly built tree. If * includeInternalBranches is set, then all elements of the AncestorLists will be included in the resulting tree even * if there is no branching at that node. * * @param theDisposableAncestorLists * @return * @throws TreeException */ @NotNull protected BasicPhylogenyNode<T> extractSubtreeWithLeafPaths( Set<List<? extends PhylogenyNode<T>>> theDisposableAncestorLists, boolean includeInternalBranches, MutualExclusionResolutionMode mode) throws NoSuchNodeException //, NodeNamer<T> namer) { BasicPhylogenyNode<T> result; // this was spaghetti before when I tried to handle both modes together if (includeInternalBranches) { result = extractSubtreeWithLeafPathsIncludingInternal(theDisposableAncestorLists, mode); } else { result = extractSubtreeWithLeafPathsExcludingInternal(theDisposableAncestorLists, mode); } // currently we deal with MutualExclusionResolutionMode (in the form of allowRequestingInternal nodes) in the course of the tree-building recursion above, // but, would it be easier to build the whole tree and then postprocess? /* for (PhylogenyNode<T> node : result) { gah } */ return result; } private BasicPhylogenyNode<T> extractSubtreeWithLeafPathsExcludingInternal( Set<List<? extends PhylogenyNode<T>>> theDisposableAncestorLists, MutualExclusionResolutionMode mode) throws NoSuchNodeException { double accumulatedLength = 0; // use this as a marker to test that the provided lists were actually consistent PhylogenyNode<T> commonAncestor = null; BasicPhylogenyNode<T> bottomOfChain = null; // first consume any common prefix on the ancestor lists do { while (DSCollectionUtils.allFirstElementsEqual(theDisposableAncestorLists)) { commonAncestor = DSCollectionUtils.removeAllFirstElements(theDisposableAncestorLists); Double d = commonAncestor.getLength(); if (d == null) { //logger.warn("Ignoring null length at node " + commonAncestor); } else { accumulatedLength += d; } } } while (!resolveMutualExclusion(theDisposableAncestorLists, mode)); // that returns false only if we're in LEAF mode, i.e. so far we found a requested ancestor node but we want to ignore it // if includeInternalBranches is off, and BOTH mode is requested, and the requested ancestor node happens to be a branch point anyway, then that's OK. // But if BOTH mode requires including a node that is disallowed because includeInternalBranches is off... well, just include it anyway. // now the lists must differ in their first position, and commonAncestor is set to the immediate parent of whatever the list heads are if (commonAncestor == null) // only possible if allFirstElementsEqual == false on the first attempt { throw new NoSuchNodeException("Provided ancestor lists do not have a common root"); } // since we are not including internal branches, we now need to create the branching node BasicPhylogenyNode<T> node = new BasicPhylogenyNode<T>(); node.setLength(accumulatedLength); // the commonAncestor is now the most recent one, so that's the most sensible name for the new node node.setPayload(commonAncestor.getPayload()); node.setWeight(commonAncestor.getWeight()); bottomOfChain = node; // split the ancestor lists into sets with a common head Collection<Set<List<? extends PhylogenyNode<T>>>> childAncestorLists = separateFirstAncestorSets( theDisposableAncestorLists); assert childAncestorLists.size() != 1; // otherwise there should be no branch here // recurse for (Set<List<? extends PhylogenyNode<T>>> childAncestorList : childAncestorLists) { PhylogenyNode<T> child = extractSubtreeWithLeafPathsExcludingInternal(childAncestorList, mode); child.setParent(bottomOfChain); } return bottomOfChain.findRoot(); } private BasicPhylogenyNode<T> extractSubtreeWithLeafPathsIncludingInternal( Set<List<? extends PhylogenyNode<T>>> theDisposableAncestorLists, MutualExclusionResolutionMode mode) throws NoSuchNodeException { // use this as a marker to test that the provided lists were actually consistent PhylogenyNode<T> commonAncestor = null; BasicPhylogenyNode<T> bottomOfChain = null; // first consume any common prefix on the ancestor lists while (DSCollectionUtils.allFirstElementsEqual(theDisposableAncestorLists)) { commonAncestor = DSCollectionUtils.removeAllFirstElements(theDisposableAncestorLists); // copy the common ancestor to the new tree BasicPhylogenyNode<T> node = new BasicPhylogenyNode<T>(); node.setLength(commonAncestor.getLength()); node.setPayload(commonAncestor.getPayload()); //** avoid isLeaf due to ncbi lazy initialization issue //if (commonAncestor.isLeaf()) // { // don't bother with internal weights; they'll get recalculated on demand anyway if (bottomOfChain != null) { bottomOfChain.setWeight(null); // just to be sure } try { node.setWeight(commonAncestor.getWeight()); } catch (NotImplementedException e) { node.setWeight(1.0); } node.setParent(bottomOfChain); bottomOfChain = node; // we don't react to the result of resolveMutualExclusion, but we have to run it anyway in case we're in ANCESTOR mode and should prune a subtree resolveMutualExclusion(theDisposableAncestorLists, mode); /*if(resolveMutualExclusion(theAncestorLists, mode)) { // we wanted to include this node anyway since we're in ANCESTOR or BOTH mode; no problem } else { // we requested an ancestor but we are in LEAF mode. If we weren't including internal branches, we'd want to ignore this node. // since we are, though, we'll include it anyway (i.e., if we just ignored the "ancestor" path, this node would still be on the // "leaf" path, so we'd want to include it. } */ } // now the lists must differ in their first position, and commonAncestor is set to the immediate parent of whatever the list heads are if (commonAncestor == null) // only possible if allFirstElementsEqual == false on the first attempt { throw new NoSuchNodeException("Provided ancestor lists do not have a common root"); } // split the ancestor lists into sets with a common head Collection<Set<List<? extends PhylogenyNode<T>>>> childAncestorLists = separateFirstAncestorSets( theDisposableAncestorLists); // recurse for (Set<List<? extends PhylogenyNode<T>>> childAncestorList : childAncestorLists) { PhylogenyNode<T> child = extractSubtreeWithLeafPathsIncludingInternal(childAncestorList, mode); child.setParent(bottomOfChain); } return bottomOfChain.findRoot(); } /* private void addPhantomLeafIfNeeded(Set<List<PhylogenyNode<T>>> theAncestorLists, BasicPhylogenyNode<T> node, NodeNamer<T> namer) { // check if we need a leaf node here boolean needStubLeafNode = false; for (List<PhylogenyNode<T>> ancestorList : theAncestorLists) { if (ancestorList.isEmpty()) { needStubLeafNode = true; break; } } if (needStubLeafNode) { // an internal node was requested as a leaf. // add a phantom leaf to honor the request, and then continue with the other paths BasicPhylogenyNode<T> leaf = new BasicPhylogenyNode<T>(); leaf.setLength(0.0); leaf.setValue(node.getValue()); leaf.setParent(node); //** changing the ID of the internal node may cause trouble later, e.g. when trying to make an intersection tree node.setValue(namer.uniqueify(node.getValue())); // note we leave bottomOfChain intact } } */ /** * Returns true if the current node should be included; false if it should be ignored * * @param theDisposableAncestorLists * @param mode * @return */ private boolean resolveMutualExclusion(Set<List<? extends PhylogenyNode<T>>> theDisposableAncestorLists, MutualExclusionResolutionMode mode) { // if there is only one list left, and it's empty, that's OK, we just finished a branch if (theDisposableAncestorLists.size() == 1) { return true; } assert theDisposableAncestorLists.size() > 1; // but if there's more than one, and one of them is empty, then we asked for a node as a leaf that turns out to be an ancestor of another leaf. // if we give the same path twice, that causes a failure here. Note leaf id uniqueness constraints above. Iterator<List<? extends PhylogenyNode<T>>> iterator = theDisposableAncestorLists.iterator(); while (iterator.hasNext()) { List<? extends PhylogenyNode<T>> ancestorList = iterator.next(); // there can be at most one empty list here due to leaf id uniqueness, so it's safe to return immediately rather than testing the rest if (ancestorList.isEmpty()) { if (mode == MutualExclusionResolutionMode.LEAF) { // don't include this node (we're currently at the ancestor) iterator.remove(); return false; } if (mode == MutualExclusionResolutionMode.ANCESTOR) { // remove all paths extending below this. // this would cause ConcurrentModificationException except that we return and never touch the iterator again theDisposableAncestorLists.clear(); theDisposableAncestorLists.add(new ArrayList<PhylogenyNode<T>>()); // the ancestor itself. Maybe not strictly necessary, but for consistency anyway return true; } if (mode == MutualExclusionResolutionMode.BOTH) { iterator.remove(); return true; } else // if (mode == EXCEPTION) { throw new TreeRuntimeException("Requested extraction of an internal node as a leaf"); } } } /*for (List<PhylogenyNode<T>> ancestorList : theAncestorLists) { if (ancestorList.isEmpty()) { throw new TreeRuntimeException("Requested extraction of an internal node as a leaf"); } }*/ // if we got here, then there are multiple ancestor lists and none of them is empty, so this is a branch point. return true; } private Collection<Set<List<? extends PhylogenyNode<T>>>> separateFirstAncestorSets( Set<List<? extends PhylogenyNode<T>>> theAncestorLists) { // assert allFirstElementsEqual(theAncestorLists); Map<PhylogenyNode<T>, Set<List<? extends PhylogenyNode<T>>>> theSeparatedSets = new HashMap<PhylogenyNode<T>, Set<List<? extends PhylogenyNode<T>>>>(); for (List<? extends PhylogenyNode<T>> theAncestorList : theAncestorLists) { if (theAncestorList.isEmpty()) { //we've arrived at one of the originally requested nodes. // if it's a leaf, then theAncestorLists should contain only one (empty) list. // no problem, we just return an empty set since there are no children. // if it's an internal node, we can just ignore it since it's already accounted for in the subtree extraction. // we do want to process any descendants though. // in either case, we just ignore this situation. } else { PhylogenyNode<T> commonAncestor = theAncestorList.get(0); Set<List<? extends PhylogenyNode<T>>> theChildList = theSeparatedSets.get(commonAncestor); if (theChildList == null) { theChildList = new HashSet<List<? extends PhylogenyNode<T>>>(); theSeparatedSets.put(commonAncestor, theChildList); } theChildList.add(theAncestorList); } } return theSeparatedSets.values(); } /** * {@inheritDoc} */ public double distanceBetween(T nameA, T nameB) throws NoSuchNodeException { PhylogenyNode a = getNode(nameA); PhylogenyNode b = getNode(nameB); return distanceBetween(a, b); } /** * {@inheritDoc} */ public double distanceBetween(PhylogenyNode<T> a, PhylogenyNode<T> b) throws NoSuchNodeException { // PERF might be a better way to do this than copy + remove nodes? List<PhylogenyNode<T>> ancestorsA = new ArrayList<PhylogenyNode<T>>(a.getAncestorPath()); List<PhylogenyNode<T>> ancestorsB = new ArrayList<PhylogenyNode<T>>(b.getAncestorPath()); int commonAncestors = 0; while (!ancestorsA.isEmpty() && !ancestorsB.isEmpty() && ancestorsA.get(0).equals(ancestorsB.get(0))) { ancestorsA.remove(0); ancestorsB.remove(0); commonAncestors++; } if (commonAncestors == 0) { throw new NoSuchNodeException("Can't compute distance between nodes with no common ancestor"); } double dist = 0; for (PhylogenyNode<T> n : ancestorsA) { dist += n.getLength(); } for (PhylogenyNode<T> n : ancestorsB) { dist += n.getLength(); } return dist; } /** * {@inheritDoc} */ public double getTotalBranchLength() { double result = 0; for (PhylogenyNode<T> node : getUniqueIdToNodeMap().values()) { if (node.getLength() != null)// count null length as zero { result += node.getLength(); } } return result; } /** * {@inheritDoc} */ public void setAllBranchLengthsTo(Double d) { for (PhylogenyNode<T> node : getUniqueIdToNodeMap().values()) { node.setLength(d); } } /** * {@inheritDoc} */ public void setLeafWeightsRandom(ContinuousDistribution1D speciesAbundanceDistribution) //throws TreeException//throws DistributionException { for (PhylogenyNode<T> leaf : getLeaves()) { leaf.setWeight(speciesAbundanceDistribution.sample()); } try { normalizeWeights(); } catch (TreeException e) { logger.error("Error", e); throw new Error("Impossible"); } } /** * {@inheritDoc} */ public void setLeafWeightsUniform() // throws TreeException//throws DistributionException { for (PhylogenyNode<T> leaf : getLeaves()) { leaf.setWeight(1.); } try { normalizeWeights(); } catch (TreeException e) { logger.error("Error", e); throw new Error("Impossible"); } } public Map<T, Double> distributeInternalWeightsToLeaves(Map<T, Double> taxIdToWeightMap) throws NoSuchNodeException { MutableWeightedSet<T> result = new ConcurrentHashWeightedSet<T>(); for (Map.Entry<T, Double> entry : taxIdToWeightMap.entrySet()) { T id = entry.getKey(); Double weight = entry.getValue(); try { PhylogenyNode<T> n = getNode(id); distributeWeight(n, weight, result); } catch (NoSuchNodeException e) { // this can only happen if we already issued a warning about "node not found" logger.warn("Requested member weight dropped: " + id + " " + weight); } } return result.getItemNormalizedMap(); } private void distributeWeight(PhylogenyNode<T> n, Double weight, MutableWeightedSet<T> result) throws NoSuchNodeException { if (n.isLeaf()) { result.add(n.getPayload(), weight, 1); //result.incrementItems(); } else { List<? extends PhylogenyNode<T>> children = n.getChildren(); double childWeight = weight / children.size(); for (PhylogenyNode<T> child : children) { distributeWeight(child, childWeight, result); } } } /** * {@inheritDoc} */ public void setLeafWeights(Multiset<T> leafWeights) throws TreeException { for (PhylogenyNode<T> leaf : getLeaves()) { int value = leafWeights.count(leaf.getPayload()); leaf.setWeight(new Double(value)); } normalizeWeights(); } /** * {@inheritDoc} */ public void setLeafWeights(Map<T, Double> leafWeights) throws TreeException { for (PhylogenyNode<T> leaf : getLeaves()) { Double value = leafWeights.get(leaf.getPayload()); if (value == null) { throw new TreeException("No leaf weight provided for " + leaf); } leaf.setWeight(value); } normalizeWeights(); } public Map<T, Double> getLeafWeights() //throws TreeException { Map<T, Double> result = new HashMap<T, Double>(); for (PhylogenyNode<T> leaf : getLeaves()) { result.put(leaf.getPayload(), leaf.getWeight()); } return result; } public Map<T, Double> getNodeWeights() //throws TreeException { Map<T, Double> result = new HashMap<T, Double>(); for (PhylogenyNode<T> node : this) { result.put(node.getPayload(), node.getWeight()); } return result; } /** * {@inheritDoc} */ public void normalizeWeights() throws TreeException { // first normalize at the leaves double total = 0; for (PhylogenyNode<T> leaf : getLeaves()) { Double w = leaf.getWeight(); if (w == null) { throw new TreeException("Can't normalize when a leaf weight is null"); } total += w; } for (PhylogenyNode<T> leaf : getLeaves()) { leaf.setWeight(leaf.getWeight() / total); } // then propagate up //propagateWeightFromBelow(); } /** * {@inheritDoc} */ public RootedPhylogeny<T> getBasePhylogeny() { return basePhylogeny; } /** * {@inheritDoc} */ @NotNull public RootedPhylogeny<T> getBasePhylogenyRecursive() { if (basePhylogeny == null) { return this; } return basePhylogeny.getBasePhylogenyRecursive(); } public void setBasePhylogeny(RootedPhylogeny<T> basePhylogeny) { this.basePhylogeny = basePhylogeny; } /** * {@inheritDoc} */ public BasicRootedPhylogeny<T> extractIntersectionTree(Collection<T> leafIdsA, Collection<T> leafIdsB, NodeNamer<T> namer) throws NoSuchNodeException, TreeException { Set<PhylogenyNode<T>> allTreeNodesA = new HashSet<PhylogenyNode<T>>(); for (T id : leafIdsA) { allTreeNodesA.addAll(getNode(id).getAncestorPath()); } Set<PhylogenyNode<T>> allTreeNodesB = new HashSet<PhylogenyNode<T>>(); for (T id : leafIdsB) { allTreeNodesB.addAll(getNode(id).getAncestorPath()); } allTreeNodesA.retainAll(allTreeNodesB); // now allTreeNodesA contains all nodes that are in common between the two input leaf sets, including internal nodes // remove internal nodes for (PhylogenyNode<T> node : new HashSet<PhylogenyNode<T>>(allTreeNodesA)) { allTreeNodesA.remove(node.getParent()); } return extractTreeWithLeaves(allTreeNodesA, false, MutualExclusionResolutionMode.EXCEPTION); } /** * {@inheritDoc} */ public BasicRootedPhylogeny<T> mixWith(RootedPhylogeny<T> otherTree, double mixingProportion) throws TreeException //NoSuchNodeException { if (mixingProportion < 0 || mixingProportion > 1) { throw new TreeException("Mixing proportion must be between 0 and 1"); } //RootedPhylogeny<T> theBasePhylogeny = getBasePhylogeny(); if (basePhylogeny == null || basePhylogeny != otherTree.getBasePhylogeny()) { throw new TreeException( "Phylogeny mixtures can be computed only between trees extracted from the same underlying tree"); } try { Set<T> unionLeaves = new HashSet<T>(); unionLeaves.addAll(getLeafValues()); unionLeaves.addAll(otherTree.getLeafValues()); BasicRootedPhylogeny<T> unionTree = basePhylogeny.extractTreeWithLeafIDs(unionLeaves, false, false, MutualExclusionResolutionMode.EXCEPTION); for (PhylogenyNode<T> node : getLeaves()) { unionTree.getNode(node.getPayload()).setWeight(node.getWeight() * mixingProportion); } for (PhylogenyNode<T> node : otherTree.getLeaves()) { unionTree.getNode(node.getPayload()).incrementWeightBy(node.getWeight() * (1. - mixingProportion)); } unionTree.normalizeWeights(); return unionTree; } catch (NoSuchNodeException e) { logger.error("Error", e); throw new TreeRuntimeException(e); } } /** * {@inheritDoc} */ public void smoothWeightsFrom(RootedPhylogeny<T> otherTree, double smoothingFactor) throws TreeException //throws TreeException { /*RootedPhylogeny<T> theBasePhylogeny = getBasePhylogeny(); if (theBasePhylogeny != otherTree.getBasePhylogeny()) { throw new TreeException( "Phylogeny mixtures can be computed only between trees extracted from the same underlying tree"); } */ //** if the otherTree has leaves that are not present in this tree, we'll ignore them and never know. // That circumstance should probably throw an exception, but it's a bit of a drag to test for it. try { for (PhylogenyNode<T> leaf : getLeaves())//theBasePhylogeny.getLeaves()) { T leafId = leaf.getPayload(); PhylogenyNode<T> otherLeaf = null; final PhylogenyNode<T> node = getNode(leafId); try { otherLeaf = otherTree.getNode(leafId); node.setWeight(otherLeaf.getWeight() + smoothingFactor); } catch (NoSuchNodeException e) { node.setWeight(smoothingFactor); } } } catch (NoSuchNodeException e) { logger.error("Error", e); throw new TreeRuntimeException(e); } normalizeWeights(); } /** * {@inheritDoc} */ @Override public abstract RootedPhylogeny<T> clone(); /** * {@inheritDoc} */ @Override public String toString() { StringBuffer sb = new StringBuffer("\n"); appendSubtree(sb, ""); return sb.toString(); } /* public void saveState() { } */ @NotNull public T getShallowestLeaf() { try { T shallowestId = null; double shallowestDepth = Double.POSITIVE_INFINITY; for (PhylogenyNode<T> n : getLeaves()) { //PhylogenyNode<Integer> n = theIntegerTree.getNode(id); double depth = distanceBetween(getRoot(), n); T nId = n.getPayload(); // BAD if two depths are exactly equal, then the result is nondeterministic // try to impose a deterministic order using the id hashcodes if (depth < shallowestDepth || (depth == shallowestDepth && nId.hashCode() < shallowestId.hashCode())) { shallowestDepth = depth; shallowestId = nId; } } return shallowestId; } catch (NoSuchNodeException e) { throw new Error("Impossible"); } } public PhylogenyNode<T> getFirstBranchingNode() { PhylogenyNode<T> r = getRoot(); while (r.getChildren().size() == 1) { r = r.getChildren().iterator().next(); } return r; } public PhylogenyNode<T> getRandomLeafBelow() { return getRoot().getRandomLeafBelow(); } //private static final int MAX_SEARCH_ITERATIONS = 1000; public T getLeafAtApproximateDistance(final T aId, final double minDesiredTreeDistance, final double maxDesiredTreeDistance) throws NoSuchNodeException { // we want to select a bunch of random nodes and then pick the one closest to the desired distance // but doing this over the whole tree is inefficient; we can constrain the search to the subtree that can possibly be within that distance double distanceToSubtreeRoot = 0; final PhylogenyNode<T> queryNode = getNode(aId); PhylogenyNode<T> p = queryNode; PhylogenyNode<T> root = getRoot(); Map<PhylogenyNode<T>, Double> candidateRoots = new HashMap<PhylogenyNode<T>, Double>(); //new ConcurrentSkipListMap<PhylogenyNode<T>, Double>(); while (distanceToSubtreeRoot <= maxDesiredTreeDistance && p != root) { candidateRoots.put(p, distanceToSubtreeRoot); distanceToSubtreeRoot += p.getLength(); p = p.getParent(); } candidateRoots.remove(queryNode); // now p is the root of the subtree that can possibly contain the node we want, and candidateRoots contains all the nodes along the ancestor path to it Collection<PhylogenyNode<T>> candidates = new HashSet<PhylogenyNode<T>>(); //new ConcurrentSkipListSet<PhylogenyNode<T>>(); // PERF Parallel.forEach for (Map.Entry<PhylogenyNode<T>, Double> entry : candidateRoots.entrySet()) { PhylogenyNode<T> candidateRoot = entry.getKey(); double candidateRootHeight = entry.getValue(); candidateRoot.collectLeavesBelowAtApproximateDistance(minDesiredTreeDistance - candidateRootHeight, maxDesiredTreeDistance - candidateRootHeight, candidates); } // now all of the candidates meet the criteria. if (candidates.isEmpty()) { throw new NoSuchNodeException("No node found with distance between " + minDesiredTreeDistance + " and " + maxDesiredTreeDistance + " of " + aId); } // just pick one // ** could try to pick the closest to the request, i.e. the middle of the range? return DSCollectionUtils.chooseRandom(candidates).getPayload(); // for (int i = 0; i < MAX_SEARCH_ITERATIONS; i++) // { // //** Note getRandomLeafBelow is weighted by tree structure (uniform at each node on the path, not uniform over leaves) // PhylogenyNode<T> candidate = p.getRandomLeafBelow(); // double candidateDistance = distanceBetween(queryNode, candidate); // if (candidateDistance >= minDesiredTreeDistance && candidateDistance <= maxDesiredTreeDistance) // { // return candidate.getPayload(); // } // } // // throw new NoSuchNodeException( // "Could not find a node in the requested distance range (" + minDesiredTreeDistance + " - " // + maxDesiredTreeDistance + " from " + aId + ") after " + MAX_SEARCH_ITERATIONS + " attempts"); } public int countDescendantsIncludingThis() { int result = 1; for (PhylogenyNode<T> c : getChildren()) { result += c.countDescendantsIncludingThis(); } return result; } }