edu.berkeley.compbio.phyloutils.HugenholtzTaxonomyService.java Source code

Java tutorial

Introduction

Here is the source code for edu.berkeley.compbio.phyloutils.HugenholtzTaxonomyService.java

Source

/*
 * Copyright (c) 2006-2013  David Soergel  <dev@davidsoergel.com>
 * Licensed under the Apache License, Version 2.0
 * http://www.apache.org/licenses/LICENSE-2.0
 */

package edu.berkeley.compbio.phyloutils;

import com.davidsoergel.dsutils.CacheManager;
import com.davidsoergel.dsutils.DSStringUtils;
import com.davidsoergel.dsutils.collections.DSCollectionUtils;
import com.davidsoergel.dsutils.file.StringSetIntMapReader;
import com.davidsoergel.trees.AbstractRootedPhylogeny;
import com.davidsoergel.trees.BasicPhylogenyNode;
import com.davidsoergel.trees.BasicRootedPhylogeny;
import com.davidsoergel.trees.NoSuchNodeException;
import com.davidsoergel.trees.RequireExistingNodeNamer;
import com.davidsoergel.trees.RootedPhylogeny;
import com.google.common.collect.HashMultimap;
import org.apache.commons.lang.NotImplementedException;
import org.apache.log4j.Logger;
import org.jetbrains.annotations.NotNull;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

/**
 * Provides a view onto the Hugenholtz taxonomy using Integer IDs.  The Hugenholtz taxonomy has integer ids
 * (prokMSA_ids) at the leaves, but not at internal nodes.  Some internal nodes have string names, but there is no
 * guarantee of uniqueness.  For various reasons we need a tree with unique Integer IDs throughout.  This tree uses the
 * prokMSA_ids at the leaves and generated IDs at internal nodes.  It also allows looking up nodes by String name,
 * including semicolon-delimited multilevel classifications.
 * <p/>
 * Uniform leaf weights are provided.
 *
 * @author <a href="mailto:dev@davidsoergel.com">David Soergel</a>
 * @version $Id$
 */
public class HugenholtzTaxonomyService implements TaxonomyService<Integer> //, TaxonomyService<String>
{
    private static final Logger logger = Logger.getLogger(HugenholtzTaxonomyService.class);

    //private String ciccarelliFilename = "tree_Feb15_unrooted.txt";
    //   private static final String hugenholtzFilename = "275K.nast.ft190.constrained.rooted.allids.gz";
    //private static final String hugenholtzFilename = "greengenes.all.tree.allids.gz";
    //   private static final String bigGreenGenesFilename = "greengenes16SrRNAgenes.txt.gz";
    //   private static final String overrideFilename = "overrideNameToProkMSAid.txt";

    private String hugenholtzFilename; // a newick tree file
    private String greengenesRawFilename; // = "greengenes16SrRNAgenes.txt.gz";
    private String nameToProkMSAidFilename; // = "overrideNameToProkMSAid.txt";

    public void setHugenholtzFilename(final String hugenholtzFilename) {
        this.hugenholtzFilename = hugenholtzFilename;
    }

    public void setGreengenesRawFilename(final String greengenesRawFilename) {
        this.greengenesRawFilename = greengenesRawFilename;
    }

    public void setNameToProkMSAidFilename(final String nameToProkMSAidFilename) {
        this.nameToProkMSAidFilename = nameToProkMSAidFilename;
    }

    private static HugenholtzTaxonomyService instance;// = new CiccarelliUtils();

    private TaxonomySynonymService synonymService;

    private final static Integer NO_VALID_ID = -1;

    public static HugenholtzTaxonomyService getInjectedInstance() {
        return instance;
    }

    public Map<Integer, String> getFriendlyLabelMap() {
        return null;
    }

    public static void setInjectedInstance(HugenholtzTaxonomyService instance) {
        HugenholtzTaxonomyService.instance = instance;
    }

    public synchronized void setSynonymService(@NotNull TaxonomySynonymService synonymService) {
        this.synonymService = synonymService;
    }

    // PERF use ConcurrentMaps and such here instead of synchronizing all the methods

    /*   private BasicRootedPhylogeny<Integer> theIntegerTree;
     private HashMultimap<String, Integer> extraNameToIdsMap;
     // when a node has multiple names separated by "==", store all those after the first here
        
     private HashMultimap<String, Integer> nameToIdsMap;// = new HashMap<String, Integer>();
     private ConcurrentHashMap<String, Integer> nameToUniqueIdMap; // = new HashMap<String, Integer>();
    */ CacheManager.LazyStub theIntegerTreeStub;
    CacheManager.LazyStub extraNameToIdsMapStub;
    CacheManager.LazyStub nameToIdsMapStub;
    CacheManager.LazyStub nameToUniqueIdMapStub;

    //   BiMap<Integer, PhylogenyNode<String>> intToNodeMap = new HashBiMap<Integer, PhylogenyNode<String>>();
    //   Multimap<String, PhylogenyNode<String>> nameToNodeMap = new HashMultimap<String, PhylogenyNode<String>>();

    //   NewickTaxonomyService stringTaxonomyService;// = new NewickTaxonomyService(hugenholtzFilename);

    public Set<Integer> getLeafIds() {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();
        return theIntegerTree.getLeafValues();
    }

    // split init from constructor for the sake of a Jandy adapter (HugenholtzTaxonomyServiceAdapter)

    public HugenholtzTaxonomyService() //throws PhyloUtilsException
    {
    }

    public synchronized void init() {
        // don't bother keeping track of which caches are affected by which inputs; just reload them all if anything changes
        final String allFilenames = hugenholtzFilename + ", " + greengenesRawFilename + ", "
                + nameToProkMSAidFilename;
        logger.info("Cache key: " + allFilenames);
        theIntegerTreeStub = CacheManager.getLazy(this, allFilenames + ".theIntegerTree");
        nameToIdsMapStub = CacheManager.getLazy(this, allFilenames + ".nameToIdsMap");
        extraNameToIdsMapStub = CacheManager.getLazy(this, allFilenames + ".extraNameToIdsMap");
        nameToUniqueIdMapStub = CacheManager.getLazy(this, allFilenames + ".nameToUniqueIdMap");

        if (theIntegerTreeStub.notCached() || nameToIdsMapStub.notCached() || extraNameToIdsMapStub.notCached()
                || nameToUniqueIdMapStub.notCached()) {
            HashMultimap<String, Integer> nameToIdsMap = HashMultimap.create();

            reloadFromNewick(nameToIdsMap);
            reloadNameToProkMSAidMap(nameToIdsMap);

            nameToIdsMapStub.put(nameToIdsMap);

            // ** Note we don't invalidate downstream caches, e.g. for StrainDirectoryLabelChooser and so forth
            // CacheManager.invalidate
            /*   CacheManager.put(this, allFilenames + ".theIntegerTree", theIntegerTree);
              CacheManager.put(this, allFilenames + ".nameToIdsMap", nameToIdsMap);
              CacheManager.put(this, allFilenames + ".extraNameToIdsMap", extraNameToIdsMap);
              CacheManager.put(this, allFilenames + ".nameToUniqueIdMap", nameToUniqueIdMap); */
        }

        /*if (!readStateIfAvailable())
           {
           reloadFromNewick();
           //invalidateDependentCaches();
           saveState();
           }*/
    }

    private synchronized void reloadFromNewick(HashMultimap<String, Integer> nameToIdsMap) {

        HashMultimap<String, Integer> extraNameToIdsMap = HashMultimap.create();

        //** here we assume that the tree has already been converted to have named nodes at leaves, using the NewickParser command-line tool
        // else we'd need new NewickTaxonomyService(hugenholtzFilename, truel
        // );
        NewickStringTaxonomyService stringTaxonomyService = new NewickStringTaxonomyService(hugenholtzFilename,
                false);

        RootedPhylogeny<String> theStringTree = stringTaxonomyService.getTree();

        //** because the node children are iterated in random order in the course of the depth-first copy,
        // the random IDs won't be consistently assigned from one run to the next.

        BasicRootedPhylogeny<Integer> theIntegerTree = PhylogenyTypeConverter.convertToIDTree(theStringTree,
                new RequireExistingNodeNamer(false), new TaxonStringIdMapper<Integer>() {
                    @NotNull
                    public Integer findTaxidByNameRelaxed(@NotNull String name) throws NoSuchNodeException {
                        return findTaxidByName(name);
                    }

                    public Integer findTaxidByName(@NotNull String name) throws NoSuchNodeException {
                        try {
                            return new Integer(name);
                        } catch (NumberFormatException e) {
                            throw new NoSuchNodeException("Can't convert node name to integer ID: " + name);
                        }
                    }

                    public Set<String> getCachedNamesForId(Integer id) {
                        return DSCollectionUtils.setOf("" + id);
                    }
                }, nameToIdsMap, extraNameToIdsMap);

        theIntegerTree.setLeafWeightsUniform();
        addStrainNamesToMap(nameToIdsMap);

        theIntegerTreeStub.put(theIntegerTree);
        extraNameToIdsMapStub.put(extraNameToIdsMap);
    }

    private synchronized void reloadNameToProkMSAidMap(HashMultimap<String, Integer> nameToIdsMap) {

        if (nameToProkMSAidFilename != null) {
            Map<String, Set<Integer>> nameToProkMSAidMap;
            try {
                ConcurrentHashMap<String, Integer> nameToUniqueIdMap = new ConcurrentHashMap<String, Integer>();

                nameToProkMSAidMap = StringSetIntMapReader.read(nameToProkMSAidFilename);

                for (Map.Entry<String, Set<Integer>> entry : nameToProkMSAidMap.entrySet()) {
                    String key = entry.getKey();
                    Set<Integer> valueSet = entry.getValue();

                    logger.info("Loaded mapping: " + key + " -> " + DSStringUtils.join(valueSet, ", "));

                    nameToIdsMap.removeAll(key);
                    nameToUniqueIdMap.remove(key);

                    nameToIdsMap.putAll(key, valueSet);

                    for (Integer id : valueSet) {
                        nameToUniqueIdMap.put(key, id);
                    }
                }

                nameToUniqueIdMapStub.put(nameToUniqueIdMap);
            } catch (IOException e) {
                throw new Error(e);
            }
        }
    }

    public BasicRootedPhylogeny<Integer> getRandomSubtree(int numTaxa, Double mergeThreshold) {
        throw new NotImplementedException();
    }

    public BasicRootedPhylogeny<Integer> getRandomSubtree(int numTaxa, Double mergeThreshold,
            Integer exceptDescendantsOf) {
        throw new NotImplementedException();
    }

    private synchronized static InputStream getInputStream(String filename)
            throws PhyloUtilsException, IOException {
        //ClassLoader classClassLoader = new NewickParser().getClass().getClassLoader();
        ClassLoader threadClassLoader = Thread.currentThread().getContextClassLoader();
        //ClassLoader systemClassLoader = ClassLoader.getSystemClassLoader();

        //URL res1 = classClassLoader.getResource(filename);
        URL res = threadClassLoader.getResource(filename);
        //URL res3 = systemClassLoader.getResource(filename);

        if (res == null) {
            File f = new File(filename);
            if (f.exists()) {
                res = f.toURI().toURL(); // new URL("file://" + filename);
            }
        }

        if (res == null) {
            logger.error("file not found: " + filename);
            //Get the System Classloader
            //ClassLoader.getSystemClassLoader();

            //Get the URLs
            URL[] urls = ((URLClassLoader) threadClassLoader).getURLs();

            for (int i = 0; i < urls.length; i++) {
                logger.warn(urls[i].getFile());
            }

            throw new PhyloUtilsException("file not found: " + filename);
        }

        InputStream is = res.openStream();
        is = filename.endsWith(".gz") ? new GZIPInputStream(is) : is;
        /*if (is == null)
            {
            is = new FileInputStream(filename);
            }*/
        return is;
    }

    /**
     *
     */
    private synchronized void addStrainNamesToMap(HashMultimap<String, Integer> nameToIdsMap) {
        if (greengenesRawFilename != null) {
            BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

            // there are much cleaner ways to do this, I know.  I'm in a freaking hurry.

            String organism = null;
            String prokMSAname = null;
            String source = null;
            Integer prokMSA_id = null;
            //   Integer replaced_by = null;

            // for now we ignore replaced_by and put all the IDs (old and new) in the map.
            // the only consequence AFAIK is that the old IDs won't be in the current tree;
            // the benefit is that if an old ID turns up for some reason we can still map it.

            try {

                BufferedReader in = new BufferedReader(
                        new InputStreamReader(getInputStream(greengenesRawFilename)));
                String line;
                Pattern strainPattern = Pattern.compile("( str.? )|( strain )");
                int skipped = 0;
                int found = 0;
                while ((line = in.readLine()) != null) {
                    line = line.trim();
                    if (line.equals("END")) {
                        try {
                            theIntegerTree.getNode(prokMSA_id);
                        } catch (NoSuchNodeException e) {
                            if (logger.isTraceEnabled()) {
                                logger.trace("prokMSA_id " + prokMSA_id + " not in tree; " + organism + " "
                                        + prokMSAname + " " + source);
                            }
                            skipped++;
                            continue;
                        }
                        found++;
                        if (organism != null) {
                            nameToIdsMap.put(organism, prokMSA_id);
                            String cleanOrganism = strainPattern.matcher(organism).replaceAll(" ");
                            if (!cleanOrganism.equals(source)) {
                                nameToIdsMap.put(cleanOrganism, prokMSA_id);
                            }
                        }
                        if (prokMSAname != null) {
                            nameToIdsMap.put(prokMSAname, prokMSA_id);
                            String cleanProkMSAname = strainPattern.matcher(prokMSAname).replaceAll(" ");
                            if (!cleanProkMSAname.equals(source)) {
                                nameToIdsMap.put(cleanProkMSAname, prokMSA_id);
                            }
                        }
                        if (source != null) {
                            nameToIdsMap.put(source, prokMSA_id);
                            String cleanSource = strainPattern.matcher(source).replaceAll("");
                            if (!cleanSource.equals(source)) {
                                nameToIdsMap.put(cleanSource, prokMSA_id);
                            }
                        }

                        organism = null;
                        prokMSAname = null;
                        source = null;
                        prokMSA_id = null;
                    } else {
                        String[] sa = line.split("=");
                        if (sa[0].equals("organism")) {
                            organism = sa[1];
                        } else if (sa[0].equals("source")) {
                            source = sa[1];
                        } else if (sa[0].equals("prokMSA_id")) {
                            prokMSA_id = new Integer(sa[1]);
                        } else if (sa[0].equals("prokMSAname")) {
                            prokMSAname = sa[1];
                        }
                        //   else if (sa[0].equals("replaced_by"))
                        //         {
                        //         replaced_by = sa[1];
                        //         }
                        // else ignore
                    }
                }
                logger.info("Found " + found + " taxa in tree, skipped " + skipped);
            } catch (IOException e) {
                logger.error("Error", e);
                throw new PhyloUtilsRuntimeException(e);
            } catch (PhyloUtilsException e) {
                logger.error("Error", e);
                throw new PhyloUtilsRuntimeException(e);
            }

            //try
            //   {
            //   }
            //catch (PhyloUtilsException e)
            //   {
            //throw new
            //   }
            //super(hugenholtzFilename);

            // walk the entire tree, making an int->node map and a string->int multimap along the way

            // assume that all prokMSA_IDs are less than 10000000, so just start the generated IDs from there
        }
    }

    /*      int idGenerator = 10000000;
        
          for (PhylogenyNode<String> node : stringTaxonomyService.getRoot())
     {
     String stringName = node.getValue();
     Integer id;
     try
        {
    //
    //            if (stringName == null)
    //               {
    //               throw new NumberFormatException("");
    //               }
    //
    //            stringName = stringName.trim();
    //
        
        id = new Integer(stringName);
        if (intToNodeMap.containsKey(id))
           {
           throw new PhyloUtilsRuntimeException("Found duplicate prokMSA_id: " + id);
           }
        }
     catch (NumberFormatException e)
        {
        // ok, generate an ID instead
        id = idGenerator++;
        
        if (stringName != null && !stringName.trim().equals(""))
           {
           nameToNodeMap.put(stringName.trim(), node);
           }
        
        // note we don't put the string representation of the integer id in the string map
        }
        
     intToNodeMap.put(id, node);
     }*/

    /*
     String cacheFilename = "/phyloutils.hugenholtz.cache";
        
     public void saveState()
        {
        try
      {
      File cacheFile = new File(EnvironmentUtils.getCacheRoot() + cacheFilename);
      cacheFile.getParentFile().mkdirs();
      FileOutputStream fout = new FileOutputStream(cacheFile);
      ObjectOutputStream oos = new ObjectOutputStream(fout);
    //         oos.writeObject(stringTaxonomyService);
    //         oos.writeObject(intToNodeMap);
    //         oos.writeObject(nameToNodeMap);
      oos.writeObject(theIntegerTree);
      oos.writeObject(nameToIdsMap);
      oos.close();
      }
        catch (Exception e)
      {
      logger.error("Error", e);
      }
        }
        
     private boolean readStateIfAvailable()
        {
        try
      {
      FileInputStream fin = new FileInputStream(EnvironmentUtils.getCacheRoot() + cacheFilename);
      ObjectInputStream ois = new ObjectInputStream(fin);
    //         stringTaxonomyService = (NewickTaxonomyService) ois.readObject();
    //         intToNodeMap = (BiMap<Integer, PhylogenyNode<String>>) ois.readObject();
    //         nameToNodeMap = (Multimap<String, PhylogenyNode<String>>) ois.readObject();
      theIntegerTree = (BasicRootedPhylogeny<Integer>) ois.readObject();
      nameToIdsMap = (HashMultimap<String, Integer>) ois.readObject();
      ois.close();
      return true;
      }
        catch (IOException e)
      {// no problem
      logger.warn("Could not read Hugenholtz cache; rereading source files", e);
      }
        catch (ClassNotFoundException e)
      {// no problem
      logger.warn("Could not read Hugenholtz cache; rereading source files", e);
      }
        return false;
        }
    */

    public synchronized boolean isLeaf(Integer leafId) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.getNode(leafId).isLeaf();
    }

    public synchronized boolean isKnown(Integer leafId) //throws NoSuchNodeException
    {
        try {
            BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

            theIntegerTree.getNode(leafId);
            return true;
        } catch (NoSuchNodeException e) {
            return false;
        }
    }

    @NotNull
    public Integer findTaxidByNameRelaxed(String name) throws NoSuchNodeException {
        return findTaxidByName(name);
    }

    public synchronized Set<String> getCachedNamesForId(Integer id) {
        HashMultimap<String, Integer> nameToIdsMap = (HashMultimap<String, Integer>) nameToIdsMapStub.get();
        ConcurrentHashMap<String, Integer> nameToUniqueIdMap = (ConcurrentHashMap<String, Integer>) nameToUniqueIdMapStub
                .get();

        //PERF, need a BiMultiMap or something
        Set<String> result = new HashSet<String>();
        for (Map.Entry<String, Integer> entry : nameToUniqueIdMap.entrySet()) {
            if (entry.getValue().equals(id)) {
                result.add(entry.getKey());
            }
        }
        for (Map.Entry<String, Integer> entry : nameToIdsMap.entries()) {
            if (entry.getValue().equals(id)) {
                result.add(entry.getKey());
            }
        }
        return result;
    }

    @NotNull
    public synchronized Integer findTaxidByName(String name) throws NoSuchNodeException {
        ConcurrentHashMap<String, Integer> nameToUniqueIdMap = (ConcurrentHashMap<String, Integer>) nameToUniqueIdMapStub
                .get();

        Integer result = nameToUniqueIdMap.get(name);

        if (result == null) {
            BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

            try {
                Integer id = new Integer(name);
                theIntegerTree.getNode(id); // throws exception if not present // intToNodeMap.containsKey(id))
                result = id;
            } catch (NumberFormatException e) {
                // ok, try the next thing
            } catch (NoSuchNodeException e) {
                // ok, try the next thing
            }

            if (result == null) {
                if (!name.contains(";")) {
                    try {
                        BasicRootedPhylogeny<Integer> bTree = findSubtreeByNameRelaxed(name);
                        result = bTree.getShallowestLeaf();
                    } catch (NoSuchNodeException e) {
                        result = NO_VALID_ID;
                    }

                    //result = getUniqueNodeForName(name);

                    // REVIEW for our present purposes we always want the worst-case node; but in other contexts that may be the wrong thing to do

                    //   result = getDeepestNodeForName(name);
                } else {
                    result = getUniqueNodeForMultilevelName(name.split("[; ]+"));
                }
            }
            nameToUniqueIdMap.put(name, result);
        }

        if (result.equals(NO_VALID_ID)) {
            throw new NoSuchNodeException("Taxon not found: " + name);
        }

        return result;
    }

    /*   private Integer getUniqueNodeForMultilevelName(String[] taxa) throws PhyloUtilsException
          {
        
          }
    */

    private synchronized Integer commonAncestor(Set<Deque<Integer>> paths) throws NoSuchNodeException {
        if (paths.size() == 1) {
            final Deque<Integer> path = paths.iterator().next();
            return path.getLast();
        } else {
            assert paths.size() > 1;
            //   throw new PhyloUtilsRuntimeException("Taxonomy path not unique : " + DSStringUtils.join(taxa, "; "));
            Set<Integer> leafIds = new HashSet<Integer>();
            for (Deque<Integer> path : paths) {
                leafIds.add(path.peekLast());
            }
            BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

            return theIntegerTree.commonAncestor(leafIds, 0.75);
        }
    }

    // bottom-up search

    @NotNull
    private synchronized Integer getUniqueNodeForMultilevelName(String[] taxa) throws NoSuchNodeException {
        List<String> reverseTaxa = new ArrayList(Arrays.asList(taxa.clone()));
        Collections.reverse(reverseTaxa);

        //final String firstS = reverseTaxa.remove(0);
        //Collection<Integer> trav = null; // = nameToIdMap.get(firstS);

        /*while (trav.isEmpty())
           {
           logger.warn("IGNORING Node " + s + " not found in " + DSStringUtils.join(taxa, "; "));
           continue;
           }
        */

        Set<Deque<Integer>> paths = null;
        HashMultimap<String, Integer> nameToIdsMap = (HashMultimap<String, Integer>) nameToIdsMapStub.get();
        HashMultimap<String, Integer> extraNameToIdsMap = (HashMultimap<String, Integer>) extraNameToIdsMapStub
                .get();

        for (String s : reverseTaxa) {
            Collection<Integer> matchingNodes = nameToIdsMap.get(s);
            if (matchingNodes.isEmpty()) {
                matchingNodes = extraNameToIdsMap.get(s);
            }
            if (matchingNodes.isEmpty()) {
                logger.debug("IGNORING Node " + s + " not found in " + DSStringUtils.join(taxa, "; "));
            } else {
                //   Set<Integer> nextTrav = new HashSet<Integer>();
                if (paths == null) {
                    paths = new HashSet<Deque<Integer>>(matchingNodes.size());
                    //nextTrav.addAll(matchingNodes);
                    for (Integer node : matchingNodes) {
                        Deque<Integer> l = new LinkedList<Integer>();
                        l.add(node);
                        paths.add(l);
                    }
                } else {
                    BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub
                            .get();

                    Set<Deque<Integer>> okPaths = new HashSet<Deque<Integer>>();
                    for (Deque<Integer> path : paths) {
                        Integer descendant = path.peek();
                        for (Integer ancestor : matchingNodes) {
                            if (theIntegerTree.isDescendant(ancestor, descendant)) {
                                path.addFirst(ancestor);
                                okPaths.add(path);
                            }
                        }
                    }
                    paths = okPaths; // ditch any paths that didn't have an ancestor added this round
                }

                if (paths.isEmpty()) {
                    // we get here only if
                    //  a) there was more than one live path on the last round
                    //  b) none of those paths are descendants of the matches at the current level
                    throw new NoSuchNodeException(
                            "Requested classification path does not match tree: " + DSStringUtils.join(taxa, "; "));
                }

                // if all the paths converge on exactly one node, call it a match, even if higher levels of the tree don't match.

                if (matchingNodes.size() == 1) {
                    return commonAncestor(paths);
                }
            }
        }
        throw new NoSuchNodeException("Multiple distinct matching paths: " + DSStringUtils.join(taxa, "; "));
        //return commonAncestor(paths);
    }

    // top-down search
    /*   private Integer getUniqueNodeForMultilevelName(String[] taxa) throws PhyloUtilsException
      {
      //List<Integer> intTaxa = new ArrayList<Integer>(taxa.length);
      Integer trav = theIntegerTree.getRoot().getValue();
      for (String s : taxa)
         {
         Collection<Integer> matchingNodes = nameToIdMap.get(s);
        
         if (matchingNodes.isEmpty())
            {
            throw new PhyloUtilsException("Node " + s + " not found in " + DSStringUtils.join(taxa, "; "));
            }
        
         for (Iterator<Integer> iter = matchingNodes.iterator(); iter.hasNext();)
            {
            Integer node = iter.next();
        
            try
               {
               if (!theIntegerTree.isDescendant(trav, node))
                  {
                  iter.remove();
                  }
               }
            catch (NoSuchElementException e)  // probably the requested node is not in the tree (i.e., it's unclassified, but had an organism name associated anyway)
               {
               iter.remove();
               }
            }
        
         if (matchingNodes.isEmpty())
            {
            throw new PhyloUtilsException(
                  "Requested classification path does not match tree: " + DSStringUtils.join(taxa, "; "));
            }
        
         if (matchingNodes.size() == 1)
            {
            trav = matchingNodes.iterator().next();
            }
         else
            {
            // check descendants pairwise
        
            for (Iterator<Integer> iter = matchingNodes.iterator(); iter.hasNext();)
               {
               Integer descendant = iter.next();
               for (Integer ancestor : matchingNodes)
                  {
                  if (ancestor != descendant && theIntegerTree.isDescendant(ancestor, descendant))
                     {
                     iter.remove();
                     break;
                     }
                  }
               }
            if (matchingNodes.size() == 1)
               {
               trav = matchingNodes.iterator().next();
               }
            else
               {
               // sadly this is too strict; there are 7 distinct "Bacteria" clades!
               // OK, don't parse the "organism" field, use "prokMSAname" instead.
        
               throw new PhyloUtilsException(
                     "Node " + s + " not unique at " + trav + " in " + DSStringUtils.join(taxa, "; "));
               }
            }
         }
        
      return trav; //intToNodeMap.inverse().get(trav);
      }
     */

    Pattern spaceSuffixPattern = Pattern.compile(" \\S*$");
    //Pattern strainSuffixPattern = Pattern.compile("( (sp.?)|(str.?)|(strain)).*$");

    /*   @NotNull
       private Integer getUniqueNodeForName(String name) throws NoSuchNodeException
          {
          return findSubtreeByName(name).getValue();
          }
        
       public RootedPhylogeny<Integer> findSubtreeByName(String name) throws NoSuchNodeException
          {
          Collection<Integer> matchingIds = findMatchingIds(name);
        
          if (matchingIds.size() == 0)
     {
     throw new NoSuchNodeException();
     }
        
          return findSubtreeWithIds(matchingIds, name);
          }
    */

    private synchronized BasicRootedPhylogeny<Integer> findSubtreeByNameRelaxed(String name)
            throws NoSuchNodeException {
        Set<Integer> matchingIds = findMatchingIdsRelaxed(name);

        if (matchingIds.size() == 0) {
            throw new NoSuchNodeException("Subtree not found even with relaxation: " + name);
        }

        return findCompactSubtreeWithIds(matchingIds, name);
    }

    /*
     @NotNull
     public Integer getDeepestNodeForName(String name) throws NoSuchNodeException
        {
        ///Integer result;
        Collection<Integer> matchingIds = findMatchingIdsRelaxed(name);
        
        //   PhylogenyNode<Integer> deepestNode;
        Integer deepestId = null;
        double deepestDepth = Double.NEGATIVE_INFINITY;
        
        for (Integer id : matchingIds)
      {
      //PhylogenyNode<Integer> n = theIntegerTree.getNode(id);
      double depth = getDepthFromRoot(id);
      if (depth > deepestDepth)
         {
         deepestDepth = depth;
         deepestId = id;
         }
      }
        
        //assert theIntegerTree.getNode(deepestId).isLeaf();
        
        return deepestId;
        }
        
     @NotNull
     public Integer getShallowestNodeForName(String name) throws NoSuchNodeException
        {
        ///Integer result;
        Collection<Integer> matchingIds = findMatchingIdsRelaxed(name);
        
        //   PhylogenyNode<Integer> deepestNode;
        Integer shallowestId = null;
        double shallowestDepth = Double.POSITIVE_INFINITY;
        
        for (Integer id : matchingIds)
      {
      //PhylogenyNode<Integer> n = theIntegerTree.getNode(id);
      double depth = getDepthFromRoot(id);
      if (depth < shallowestDepth)
         {
         shallowestDepth = depth;
         shallowestId = id;
         }
      }
        
        //assert theIntegerTree.getNode(deepestId).isLeaf();
        
        return shallowestId;
        }
    */

    public synchronized Set<Integer> findMatchingIds(String name) throws NoSuchNodeException {
        HashMultimap<String, Integer> nameToIdsMap = (HashMultimap<String, Integer>) nameToIdsMapStub.get();

        Set<Integer> matchingIds = nameToIdsMap.get(name);
        if (matchingIds.isEmpty()) {
            throw new NoSuchNodeException("Node not found: " + name);
        }
        return matchingIds;
    }

    public synchronized Set<Integer> findMatchingIdsRelaxed(String name) throws NoSuchNodeException {
        HashMultimap<String, Integer> nameToIdsMap = (HashMultimap<String, Integer>) nameToIdsMapStub.get();

        Set<Integer> matchingIds = nameToIdsMap.get(name);
        /*   if (matchingIds.isEmpty())
           {
           matchingIds = new HashSet<Integer>();
           for (String syn : synonymService.synonymsOf(name))
        {
        matchingIds.addAll(nameToIdMap.get(syn));
        }
           }
        if (matchingIds.isEmpty())
           {
           matchingIds = new HashSet<Integer>();
           for (String syn : synonymService.synonymsOfParent(name))
        {
        matchingIds.addAll(nameToIdMap.get(syn));
        }
           }*/
        if (matchingIds.isEmpty()) {
            matchingIds = new HashSet<Integer>();
            for (String syn : synonymService.synonymsOfRelaxed(name)) {
                matchingIds.addAll(nameToIdsMap.get(syn));
            }
        }
        String shortName = name;

        // even when we use synonymsOfRelaxed(shortName), we may not find any matching IDs in the nameToIdMap.
        // that's why we do asecond level of relaxing here.
        // the space-delimited relaxing should automatically incorporate the strainSuffixPattern.

        while (matchingIds.isEmpty() && shortName.contains(" ")) {
            shortName = spaceSuffixPattern.matcher(shortName).replaceFirst("");
            //shortName = strainSuffixPattern.matcher(shortName).replaceAll("");
            matchingIds = new HashSet<Integer>();
            for (String syn : synonymService.synonymsOfRelaxed(shortName)) {
                matchingIds.addAll(nameToIdsMap.get(syn));
            }
            if (!matchingIds.isEmpty()) {
                logger.warn("Relaxed name " + name + " to " + shortName);
            }
        }
        if (matchingIds.isEmpty()) {
            throw new NoSuchNodeException("Node not found: " + name + "; no id found even for " + shortName);
        }
        shortNames.put(name, shortName);
        return matchingIds;
    }

    //   WeightedSet<String> depthsBelow = new HashWeightedSet<String>(); // for debugging

    private Map<String, String> shortNames = new HashMap<String, String>();

    public synchronized String getRelaxedName(String name) {
        return shortNames.get(name);
    }

    /*   public void printDepthsBelow()
       {
       for (String name : depthsBelow.keysInDecreasingWeightOrder())
     {
     double depthBelow = depthsBelow.get(name);
     String shortName = shortNames.get(name);
        
     logger.info(String.format("Depth below = %.3f for %s relaxed from %s", depthBelow, shortName, name));
     }
       }*/

    public synchronized boolean isDescendant(Integer ancestor, Integer descendant) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.isDescendant(ancestor, descendant);
        //      return stringTaxonomyService.isDescendant(intToNodeMap.get(ancestor), intToNodeMap.get(descendant));
    }

    public Set<Integer> selectAncestors(final Collection<Integer> labels, final Integer id) {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.selectAncestors(labels, id);
    }

    /*public double exactDistanceBetween(Integer a, Integer b)
       {
       return stringTaxonomyService.distanceBetween(intToNodeMap(a), intToNodeMap(b));
       }
    */

    // assume that theIntegerTree is good to go, and that its distanceBetween method is threadsafe

    public double minDistanceBetween(Integer a, Integer b) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.distanceBetween(a, b);
        //return stringTaxonomyService.minDistanceBetween(intToNodeMap.get(a), intToNodeMap.get(b));
        //   return exactDistanceBetween(name1, name2);
    }

    public synchronized double getDepthFromRoot(Integer b) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.distanceBetween(theIntegerTree.getRoot().getPayload(), b);
        //return stringTaxonomyService.minDistanceBetween(intToNodeMap.get(a), intToNodeMap.get(b));
        //   return exactDistanceBetween(name1, name2);
    }

    public synchronized double getGreatestDepthBelow(Integer taxid) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.getNode(taxid).getGreatestBranchLengthDepthBelow();
    }

    public synchronized double getLargestLengthSpan(Integer taxid) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.getNode(taxid).getLargestLengthSpan();
    }

    private Double maxDistance = null;

    public synchronized double maxDistance() {
        if (maxDistance == null) {
            BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

            maxDistance = 2.0 * theIntegerTree.getRoot().getGreatestBranchLengthDepthBelow();
        }
        return maxDistance;
    }

    /**
     * Just return the argument even if the branch length is zero
     *
     * @param id the T identifying the starting node
     * @return
     * @throws NoSuchNodeException
     */
    public synchronized Integer nearestAncestorWithBranchLength(Integer id) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        //checkNodeExists(id);
        theIntegerTree.getNode(id); // test exists
        return id;

        //return theIntegerTree.nearestAncestorWithBranchLength(id);
        //      return intToNodeMap.inverse().get(intToNodeMap.get(id).nearestAncestorWithBranchLength());
    }

    @NotNull
    public List<Integer> getAncestorPathIds(final Integer id) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.getAncestorPathIds(id);
    }

    /*   public List<PhylogenyNode<Integer>> getAncestorPath(final Integer id) throws NoSuchNodeException
        {
        return theIntegerTree.getAncestorPath(id);
        }
    */

    @NotNull
    public List<BasicPhylogenyNode<Integer>> getAncestorPathAsBasic(final Integer id) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.getAncestorPathAsBasic(id);
    }

    /*   public Integer nearestAncestorAtRank(final String rankName, Integer leafId) throws NoSuchNodeException
          {
          if(synonymService)
          }
    */

    /*
     public RootedPhylogeny<Integer> extractTreeWithLeafIDs(Collection<Integer> ids, boolean ignoreAbsentNodes)
      throws PhyloUtilsException
        {
        return stringTaxonomyService
         .extractTreeWithLeaves(DSCollectionUtils.mapAll(intToNodeMap, ids), ignoreAbsentNodes);
        }
        
     public RootedPhylogeny<Integer> extractTreeWithLeafIDs(Collection<Integer> ids) throws PhyloUtilsException
        {
        return stringTaxonomyService.extractTreeWithLeaves(DSCollectionUtils.mapAll(intToNodeMap, ids));
        }
    */

    /*   public boolean isDescendant(PhylogenyNode<Integer> ancestor, PhylogenyNode<Integer> descendant)
      throws PhyloUtilsException
        {
        throw new NotImplementedException();
        }
        
     public Double minDistanceBetween(PhylogenyNode<Integer> node1, PhylogenyNode<Integer> node2)
      throws PhyloUtilsException
        {
        throw new NotImplementedException();
        }
        
     public PhylogenyNode<Integer> getRoot()
        {
        throw new NotImplementedException();
        }
        
     public PhylogenyNode<Integer> nearestAncestorWithBranchLength(PhylogenyNode<Integer> id) throws PhyloUtilsException
        {
        throw new NotImplementedException();
        }
    */

    /*   public RootedPhylogeny<Integer> extractTreeWithLeafIDs(Collection<Integer> ids) throws NoSuchNodeException
          {
          return extractTreeWithLeafIDs(ids, false, false);
          }*/

    /*
     public RootedPhylogeny<Integer> extractTreeWithLeaves(Collection<PhylogenyNode<Integer>> ids)
      throws PhyloUtilsException
        {
        throw new NotImplementedException();
        }
    */

    public synchronized BasicRootedPhylogeny<Integer> extractTreeWithLeafIDs(Set<Integer> ids,
            boolean ignoreAbsentNodes, boolean includeInternalBranches,
            AbstractRootedPhylogeny.MutualExclusionResolutionMode mode) throws NoSuchNodeException //, NodeNamer<Integer> namer
    {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.extractTreeWithLeafIDs(ids, ignoreAbsentNodes, includeInternalBranches, mode); //, namer);
    }

    public synchronized BasicRootedPhylogeny<Integer> extractTreeWithLeafIDs(Set<Integer> ids,
            boolean ignoreAbsentNodes, boolean includeInternalBranches) throws NoSuchNodeException //, NodeNamer<Integer> namer
    {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.extractTreeWithLeafIDs(ids, ignoreAbsentNodes, includeInternalBranches); //, namer);
    }

    @Override
    public synchronized String toString() {
        String shortname = getClass().getName();
        shortname = shortname.substring(shortname.lastIndexOf(".") + 1);
        return shortname;
    }
    /*
       public Integer findTaxIdOfShallowestLeaf(String name) throws NoSuchNodeException
          {
          RootedPhylogeny<Integer> bTree = findTreeForName(name);
          return bTree.getShallowestLeaf();
          }
    */
    /*   public int getNumNodesForName(String name)
        
          {
          int mappedBIds = 0;
          Set<Integer> idBSet = nameToIdsMap.get(name);
          if (idBSet != null)
     {
     mappedBIds = idBSet.size();
     }
          return mappedBIds;
          }
    */

    /*
     public RootedPhylogeny<Integer> findTreeForName(String name) throws NoSuchNodeException
        {
        Set<Integer> idBSet = nameToIdsMap.get(name);
        
        RootedPhylogeny<Integer> bTree;
        
        if (idBSet == null)
      {
      //logger.warn("No mapping for ID: " + idA);
      //System.err.printf("%s\t%d\tNOMAP\t0\t0\t0\t0\n", name, idA);
      try
         {
         bTree = findSubtreeByName(name);
         }
      catch (NoSuchNodeException e)
         {
         //logger.warn("No leaf IDs are classified on the tree: " + name);
         //System.err.printf("%s\t%d\tUNCLASSIFIED\t0\t0\t0\t0\n", name, idA);
         bTree = findSubtreeByNameRelaxed(name);
         }
      }
        else
      {
      try
         {
         bTree = extractTreeWithLeafIDs(idBSet, true, true);
         PhylogenyNode<Integer> r = bTree.getFirstBranchingNode();
         bTree = r.asRootedPhylogeny();
         }
      catch (NoSuchNodeException e)
         {
         //logger.warn("No leaf IDs are classified on the tree: " + name);
         //System.err.printf("%s\t%d\tUNCLASSIFIED\t0\t0\t0\t0\n", name, idA);
         bTree = findSubtreeByNameRelaxed(name);
         }
      }
        return bTree;
        }
    */

    public synchronized BasicRootedPhylogeny<Integer> findTreeForIds(Set<Integer> idBSet) {
        try {
            BasicRootedPhylogeny<Integer> bTree = extractTreeWithLeafIDs(idBSet, true, true,
                    AbstractRootedPhylogeny.MutualExclusionResolutionMode.BOTH);
            BasicPhylogenyNode<Integer> r = bTree.getFirstBranchingNode();
            bTree = r.asRootedPhylogeny();
            return bTree;
        } catch (NoSuchNodeException e) {
            logger.error("Error", e);
            throw new Error("Impossible");
        }
    }

    public synchronized BasicRootedPhylogeny<Integer> findCompactSubtreeWithIds(Set<Integer> matchingIds,
            String name) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> tree = extractTreeWithLeafIDs(matchingIds, true, true,
                AbstractRootedPhylogeny.MutualExclusionResolutionMode.BOTH);
        BasicPhylogenyNode<Integer> result = tree.getFirstBranchingNode();

        double span = result.getLargestLengthSpan();
        if (span > 0.1) {
            logger.warn("Subtree for " + name + " has span = " + span + ", trying 75% solution");
            Integer sub = tree.commonAncestor(matchingIds, 0.75);
            result = tree.getNode(sub);
            span = result.getLargestLengthSpan();
            logger.warn("75% subtree for " + name + " has span = " + span);
        }

        result = result;

        //result = tree.commonAncestor(matchingIds, 0.75);
        //throw new PhyloUtilsException("Name not unique: " + name);

        //double depthBelow = theIntegerTree.getNode(result).getGreatestBranchLengthDepthBelow();

        //depthsBelow.add(name, depthBelow);

        //logger.info("Node found for name " + name + " has depth below = " + jdepthBelow);

        return result.asRootedPhylogeny();
    }

    public Integer getLeafAtApproximateDistance(final Integer aId, final double minDesiredTreeDistance,
            final double maxDesiredTreeDistance) throws NoSuchNodeException {
        BasicRootedPhylogeny<Integer> theIntegerTree = (BasicRootedPhylogeny<Integer>) theIntegerTreeStub.get();

        return theIntegerTree.getLeafAtApproximateDistance(aId, minDesiredTreeDistance, maxDesiredTreeDistance);
    }

    public Collection<String> getAllNamesForIds(final Set<Integer> ids) {
        throw new NotImplementedException();
    }

    public String getScientificName(final Integer taxid) throws NoSuchNodeException {
        throw new NotImplementedException();
        //PhylogenyNode<String> node = basePhylogeny.getNode(name);  // not needed; nameToNode contains the primary ID too
        /*   String name = nameByTaxId.get(taxid);
        if (taxid == null)
           {
           throw new NoSuchNodeException("" + taxid);
           }
        return name;*/
    }
}