weka.core.neighboursearch.CoverTree.java Source code

Java tutorial

Introduction

Here is the source code for weka.core.neighboursearch.CoverTree.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * CoverTree.java
 * Copyright (C) 2006 Alina Beygelzimer and Sham Kakade and John Langford
 */

package weka.core.neighboursearch;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.Serializable;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Vector;

import weka.core.DistanceFunction;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.converters.CSVLoader;
import weka.core.neighboursearch.covertrees.Stack;

/**
 * <!-- globalinfo-start --> Class implementing the CoverTree datastructure.<br/>
 * The class is very much a translation of the c source code made available by
 * the authors.<br/>
 * <br/>
 * For more information and original source code see:<br/>
 * <br/>
 * Alina Beygelzimer, Sham Kakade, John Langford: Cover trees for nearest
 * neighbor. In: ICML'06: Proceedings of the 23rd international conference on
 * Machine learning, New York, NY, USA, 97-104, 2006.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;inproceedings{Beygelzimer2006,
 *    address = {New York, NY, USA},
 *    author = {Alina Beygelzimer and Sham Kakade and John Langford},
 *    booktitle = {ICML'06: Proceedings of the 23rd international conference on Machine learning},
 *    pages = {97-104},
 *    publisher = {ACM Press},
 *    title = {Cover trees for nearest neighbor},
 *    year = {2006},
 *    location = {Pittsburgh, Pennsylvania},
 *    HTTP = {http://hunch.net/\~jl/projects/cover_tree/cover_tree.html}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -B &lt;value&gt;
 *  Set base of the expansion constant
 *  (default = 1.3).
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Alina Beygelzimer (original C++ code)
 * @author Sham Kakade (original C++ code)
 * @author John Langford (original C++ code)
 * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
 *         (Java port)
 * @version $Revision$
 */
public class CoverTree extends NearestNeighbourSearch implements TechnicalInformationHandler {

    /** for serialization. */
    private static final long serialVersionUID = 7617412821497807586L;

    /**
     * class representing a node of the cover tree.
     * 
     * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
     * @version $Revision$
     */
    public class CoverTreeNode implements Serializable, RevisionHandler {

        /** for serialization. */
        private static final long serialVersionUID = 1808760031169036512L;

        /** Index of the instance represented by this node in the index array. */
        private Integer idx;

        /** The distance of the furthest descendant of the node. */
        private double max_dist; // The maximum distance to any grandchild.

        /** The distance to the nodes parent. */
        private double parent_dist; // The distance to the parent.

        /** The children of the node. */
        private Stack<CoverTreeNode> children;

        /** The number of children node has. */
        private int num_children; // The number of children.

        /** The min i that makes base^i &lt;= max_dist. */
        private int scale; // Essentially, an upper bound on the distance to any
                           // child.

        /** Constructor for the class. */
        public CoverTreeNode() {
        }

        /**
         * Constructor.
         * 
         * @param i The index of the Instance this node is associated with.
         * @param md The distance of the furthest descendant.
         * @param pd The distance of the node to its parent.
         * @param childs Children of the node in a stack.
         * @param numchilds The number of children of the node.
         * @param s The scale/level of the node in the tree.
         */
        public CoverTreeNode(Integer i, double md, double pd, Stack<CoverTreeNode> childs, int numchilds, int s) {
            idx = i;
            max_dist = md;
            parent_dist = pd;
            children = childs;
            num_children = numchilds;
            scale = s;
        }

        /**
         * Returns the instance represented by the node.
         * 
         * @return The instance represented by the node.
         */
        public Instance p() {
            return m_Instances.instance(idx);
        }

        /**
         * Returns whether if the node is a leaf or not.
         * 
         * @return true if the node is a leaf node.
         */
        public boolean isALeaf() {
            return num_children == 0;
        }

        /**
         * Returns the revision string.
         * 
         * @return the revision
         */
        @Override
        public String getRevision() {
            return RevisionUtils.extract("$Revision$");
        }
    }

    /**
     * Private class holding a point's distance to the current reference point p.
     * 
     * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
     * @version $Revision$
     */
    private class DistanceNode implements RevisionHandler {

        /**
         * The last distance is to the current reference point (potential current
         * parent). The previous ones are to reference points that were previously
         * looked at (all potential ancestors).
         */
        Stack<Double> dist;

        /** The index of the instance represented by this node. */
        Integer idx;

        /**
         * Returns the instance represent by this DistanceNode.
         * 
         * @return The instance represented by this node.
         */
        public Instance q() {
            return m_Instances.instance(idx);
        }

        /**
         * Returns the revision string.
         * 
         * @return the revision
         */
        @Override
        public String getRevision() {
            return RevisionUtils.extract("$Revision$");
        }
    }

    /** The euclidean distance function to use. */
    protected EuclideanDistance m_EuclideanDistance;
    { // to make sure we have only one object of EuclideanDistance
        if (m_DistanceFunction instanceof EuclideanDistance) {
            m_EuclideanDistance = (EuclideanDistance) m_DistanceFunction;
        } else {
            m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance();
        }
    }

    /** The root node. */
    protected CoverTreeNode m_Root;

    /**
     * Array holding the distances of the nearest neighbours. It is filled up both
     * by nearestNeighbour() and kNearestNeighbours().
     */
    protected double[] m_DistanceList;

    /** Number of nodes in the tree. */
    protected int m_NumNodes, m_NumLeaves, m_MaxDepth;

    /** Tree Stats variables. */
    protected TreePerformanceStats m_TreeStats = null;

    /**
     * The base of our expansion constant. In other words the 2 in 2^i used in
     * covering tree and separation invariants of a cover tree. P.S.: In paper
     * it's suggested the separation invariant is relaxed in batch construction.
     */
    protected double m_Base = 1.3;

    /**
     * if we have base 2 then this can be viewed as 1/ln(2), which can be used
     * later on to do il2*ln(d) instead of ln(d)/ln(2), to get log2(d), in
     * get_scale method.
     */
    protected double il2 = 1.0 / Math.log(m_Base);

    /**
     * default constructor.
     */
    public CoverTree() {
        super();
        if (getMeasurePerformance()) {
            m_Stats = m_TreeStats = new TreePerformanceStats();
        }
    }

    /**
     * Returns a string describing this nearest neighbour search algorithm.
     * 
     * @return a description of the algorithm for displaying in the
     *         explorer/experimenter gui
     */
    @Override
    public String globalInfo() {
        return "Class implementing the CoverTree datastructure.\n"
                + "The class is very much a translation of the c source code made "
                + "available by the authors.\n\n" + "For more information and original source code see:\n\n"
                + getTechnicalInformation().toString();
    }

    /**
     * Returns an instance of a TechnicalInformation object, containing detailed
     * information about the technical background of this class, e.g., paper
     * reference or book this class is based on.
     * 
     * @return the technical information about this class
     */
    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result;

        result = new TechnicalInformation(Type.INPROCEEDINGS);
        result.setValue(Field.AUTHOR, "Alina Beygelzimer and Sham Kakade and John Langford");
        result.setValue(Field.TITLE, "Cover trees for nearest neighbor");
        result.setValue(Field.BOOKTITLE,
                "ICML'06: Proceedings of the 23rd international conference on Machine learning");
        result.setValue(Field.PAGES, "97-104");
        result.setValue(Field.YEAR, "2006");
        result.setValue(Field.PUBLISHER, "ACM Press");
        result.setValue(Field.ADDRESS, "New York, NY, USA");
        result.setValue(Field.LOCATION, "Pittsburgh, Pennsylvania");
        result.setValue(Field.HTTP, "http://hunch.net/~jl/projects/cover_tree/cover_tree.html");

        return result;
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> newVector = new Vector<Option>();

        newVector.addElement(
                new Option("\tSet base of the expansion constant\n" + "\t(default = 1.3).", "B", 1, "-B <value>"));

        newVector.addAll(Collections.list(super.listOptions()));

        return newVector.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -B &lt;value&gt;
     *  Set base of the expansion constant
     *  (default = 1.3).
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {

        super.setOptions(options);

        String optionString = Utils.getOption('B', options);
        if (optionString.length() != 0) {
            setBase(Double.parseDouble(optionString));
        } else {
            setBase(1.3);
        }

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of KDtree.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();

        Collections.addAll(result, super.getOptions());

        result.add("-B");
        result.add("" + getBase());

        return result.toArray(new String[result.size()]);
    }

    /**
     * Returns the distance/value of a given scale/level. I.e. the value of base^i
     * (e.g. 2^i).
     * 
     * @param s the level/scale
     * @return base^s
     */
    protected double dist_of_scale(int s) {
        return Math.pow(m_Base, s);
    }

    /**
     * Finds the scale/level of a given value. I.e. the "i" in base^i.
     * 
     * @param d the value whose scale/level is to be determined.
     * @return the scale/level of the given value.
     */
    protected int get_scale(double d) {
        return (int) Math.ceil(il2 * Math.log(d));
    }

    /**
     * Creates a new internal node for a given Instance/point p.
     * 
     * @param idx The index of the instance the node represents.
     * @return Newly created CoverTreeNode.
     */
    protected CoverTreeNode new_node(Integer idx) { // const point &p)
        CoverTreeNode new_node = new CoverTreeNode();
        new_node.idx = idx;
        return new_node;
    }

    /**
     * Creates a new leaf node for a given Instance/point p.
     * 
     * @param idx The index of the instance this leaf node represents.
     * @return Newly created leaf CoverTreeNode.
     */
    protected CoverTreeNode new_leaf(Integer idx) { // (const point &p)
        CoverTreeNode new_leaf = new CoverTreeNode(idx, 0.0, 0.0, null, 0, 100);
        return new_leaf;
    }

    /**
     * Returns the max distance of the reference point p in current node to it's
     * children nodes.
     * 
     * @param v The stack of DistanceNode objects.
     * @return Distance of the furthest child.
     */
    protected double max_set(Stack<DistanceNode> v) { // rename to
                                                      // maxChildDist
        double max = 0.0;
        for (int i = 0; i < v.length; i++) {
            DistanceNode n = v.element(i);
            if (max < n.dist.element(n.dist.length - 1).floatValue()) { // v[i].dist.last())
                max = n.dist.element(n.dist.length - 1).floatValue(); // v[i].dist.last();
            }
        }
        return max;
    }

    /**
     * Splits a given point_set into near and far based on the given scale/level.
     * All points with distance > base^max_scale would be moved to far set. In
     * other words, all those points that are not covered by the next child ball
     * of a point p (ball made of the same point p but of smaller radius at the
     * next lower level) are removed from the supplied current point_set and put
     * into far_set.
     * 
     * @param point_set The supplied set from which all far points would be
     *          removed.
     * @param far_set The set in which all far points having distance >
     *          base^max_scale would be put into.
     * @param max_scale The given scale based on which the distances of points are
     *          judged to be far or near.
     */
    protected void split(Stack<DistanceNode> point_set, Stack<DistanceNode> far_set, int max_scale) {
        int new_index = 0;
        double fmax = dist_of_scale(max_scale);
        for (int i = 0; i < point_set.length; i++) {
            DistanceNode n = point_set.element(i);
            if (n.dist.element(n.dist.length - 1).doubleValue() <= fmax) {
                point_set.set(new_index++, point_set.element(i));
            } else {
                far_set.push(point_set.element(i)); // point_set[i]);
            }
        }
        List<DistanceNode> l = new java.util.LinkedList<DistanceNode>();
        for (int i = 0; i < new_index; i++) {
            l.add(point_set.element(i));
        }
        // removing all and adding only the near points
        point_set.clear();
        point_set.addAll(l); // point_set.index=new_index;
    }

    /**
     * Moves all the points in point_set covered by (the ball of) new_point into
     * new_point_set, based on the given scale/level.
     * 
     * @param point_set The supplied set of instances from which all points
     *          covered by new_point will be removed.
     * @param new_point_set The set in which all points covered by new_point will
     *          be put into.
     * @param new_point The given new point.
     * @param max_scale The scale based on which distances are judged (radius of
     *          cover ball is calculated).
     */
    protected void dist_split(Stack<DistanceNode> point_set, Stack<DistanceNode> new_point_set,
            DistanceNode new_point, int max_scale) {
        int new_index = 0;
        double fmax = dist_of_scale(max_scale);
        for (int i = 0; i < point_set.length; i++) {
            double new_d = Math
                    .sqrt(m_DistanceFunction.distance(new_point.q(), point_set.element(i).q(), fmax * fmax));
            if (new_d <= fmax) {
                point_set.element(i).dist.push(new_d);
                new_point_set.push(point_set.element(i));
            } else {
                point_set.set(new_index++, point_set.element(i));
            }
        }
        List<DistanceNode> l = new java.util.LinkedList<DistanceNode>();
        for (int i = 0; i < new_index; i++) {
            l.add(point_set.element(i));
        }
        point_set.clear();
        point_set.addAll(l);
    }

    /**
     * Creates a cover tree recursively using batch insert method.
     * 
     * @param p The index of the instance from which to create the first node. All
     *          other points will be inserted beneath this node for p.
     * @param max_scale The current scale/level where the node is to be created
     *          (Also determines the radius of the cover balls created at this
     *          level).
     * @param top_scale The max scale in the whole tree.
     * @param point_set The set of unprocessed points from which child nodes need
     *          to be created.
     * @param consumed_set The set of processed points from which child nodes have
     *          already been created. This would be used to find the radius of the
     *          cover ball of p.
     * @return the node of cover tree created with p.
     */
    protected CoverTreeNode batch_insert(Integer p, int max_scale, // current
            // scale/level
            int top_scale, // max scale/level for this dataset
            Stack<DistanceNode> point_set, // set of points that are nearer to p
            // [will also contain returned unused
            // points]
            Stack<DistanceNode> consumed_set) // to return the set of points that have
                                              // been used to calc. max_dist to a
                                              // descendent
                                              // Stack<Stack<DistanceNode>> stack) //may not be needed
    {
        if (point_set.length == 0) {
            CoverTreeNode leaf = new_leaf(p);
            m_NumNodes++; // incrementing node count
            m_NumLeaves++; // incrementing leaves count
            return leaf;
        } else {
            double max_dist = max_set(point_set); // O(|point_set|) the max dist
            // in point_set to point "p".
            int next_scale = Math.min(max_scale - 1, get_scale(max_dist));
            if (next_scale == Integer.MIN_VALUE) { // We have points with distance
                // 0. if max_dist is 0.
                Stack<CoverTreeNode> children = new Stack<CoverTreeNode>();
                CoverTreeNode leaf = new_leaf(p);
                children.push(leaf);
                m_NumLeaves++;
                m_NumNodes++; // incrementing node and leaf count
                while (point_set.length > 0) {
                    DistanceNode tmpnode = point_set.pop();
                    leaf = new_leaf(tmpnode.idx);
                    children.push(leaf);
                    m_NumLeaves++;
                    m_NumNodes++; // incrementing node and leaf count
                    consumed_set.push(tmpnode);
                }
                CoverTreeNode n = new_node(p); // make a new node out of p and assign
                m_NumNodes++; // incrementing node count
                n.scale = 100; // A magic number meant to be larger than all scales.
                n.max_dist = 0; // since all points have distance 0 to p
                n.num_children = children.length;
                n.children = children;
                return n;
            } else {
                Stack<DistanceNode> far = new Stack<DistanceNode>();
                split(point_set, far, max_scale); // O(|point_set|)

                CoverTreeNode child = batch_insert(p, next_scale, top_scale, point_set, consumed_set);

                if (point_set.length == 0) { // not creating any node in this
                    // recursive call
                    // push(stack,point_set);
                    point_set.replaceAllBy(far); // point_set=far;
                    return child;
                } else {
                    CoverTreeNode n = new_node(p);
                    m_NumNodes++; // incrementing node count
                    Stack<CoverTreeNode> children = new Stack<CoverTreeNode>();
                    children.push(child);

                    while (point_set.length != 0) { // O(|point_set| * num_children)
                        Stack<DistanceNode> new_point_set = new Stack<DistanceNode>();
                        Stack<DistanceNode> new_consumed_set = new Stack<DistanceNode>();
                        DistanceNode tmpnode = point_set.pop();
                        double new_dist = tmpnode.dist.last();
                        consumed_set.push(tmpnode);

                        // putting points closer to new_point into new_point_set (and
                        // removing them from point_set)
                        dist_split(point_set, new_point_set, tmpnode, max_scale); // O(|point_saet|)
                        // putting points closer to new_point into new_point_set (and
                        // removing them from far)
                        dist_split(far, new_point_set, tmpnode, max_scale); // O(|far|)

                        CoverTreeNode new_child = batch_insert(tmpnode.idx, next_scale, top_scale, new_point_set,
                                new_consumed_set);
                        new_child.parent_dist = new_dist;

                        children.push(new_child);

                        // putting the unused points from new_point_set back into
                        // point_set and far
                        double fmax = dist_of_scale(max_scale);
                        tmpnode = null;
                        for (int i = 0; i < new_point_set.length; i++) { // O(|new_point_set|)
                            tmpnode = new_point_set.element(i);
                            tmpnode.dist.pop();
                            if (tmpnode.dist.last() <= fmax) {
                                point_set.push(tmpnode);
                            } else {
                                far.push(tmpnode);
                            }
                        }
                        // putting the points consumed while recursing for new_point
                        // into consumed_set
                        tmpnode = null;
                        for (int i = 0; i < new_consumed_set.length; i++) { // O(|new_point_set|)
                            tmpnode = new_consumed_set.element(i);
                            tmpnode.dist.pop();
                            consumed_set.push(tmpnode);
                        }
                    } // end while(point_size.size!=0)
                    point_set.replaceAllBy(far); // point_set=far;
                    n.scale = top_scale - max_scale;
                    n.max_dist = max_set(consumed_set);
                    n.num_children = children.length;
                    n.children = children;
                    return n;
                } // end else if(pointset!=0)
            } // end else if(next_scale != -214....
        } // end else if(pointset!=0)
    }

    /**
     * Builds the tree on the given set of instances. P.S.: For internal use only.
     * Outside classes should call setInstances().
     * 
     * @param insts The instances on which to build the cover tree.
     * @throws Exception If the supplied set of Instances is empty, or if there
     *           are missing values.
     */
    protected void buildCoverTree(Instances insts) throws Exception {
        if (insts.numInstances() == 0) {
            throw new Exception("CoverTree: Empty set of instances. Cannot build tree.");
        }
        checkMissing(insts);
        if (m_EuclideanDistance == null) {
            m_DistanceFunction = m_EuclideanDistance = new EuclideanDistance(insts);
        } else {
            m_EuclideanDistance.setInstances(insts);
        }

        Stack<DistanceNode> point_set = new Stack<DistanceNode>();
        Stack<DistanceNode> consumed_set = new Stack<DistanceNode>();

        Instance point_p = insts.instance(0);
        int p_idx = 0;
        double max_dist = -1, dist = 0.0;

        for (int i = 1; i < insts.numInstances(); i++) {
            DistanceNode temp = new DistanceNode();
            temp.dist = new Stack<Double>();
            dist = Math.sqrt(m_DistanceFunction.distance(point_p, insts.instance(i), Double.POSITIVE_INFINITY));
            if (dist > max_dist) {
                max_dist = dist;
                insts.instance(i);
            }
            temp.dist.push(dist);
            temp.idx = i;
            point_set.push(temp);
        }

        max_dist = max_set(point_set);
        m_Root = batch_insert(p_idx, get_scale(max_dist), get_scale(max_dist), point_set, consumed_set);
    }

    /********************************* NNSearch related stuff ********************/

    /**
     * A class for a heap to store the nearest k neighbours to an instance. The
     * heap also takes care of cases where multiple neighbours are the same
     * distance away. i.e. the minimum size of the heap is k.
     * 
     * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
     * @version $Revision$
     */
    protected class MyHeap implements RevisionHandler {

        /** the heap. */
        MyHeapElement m_heap[] = null;

        /**
         * constructor.
         * 
         * @param maxSize the maximum size of the heap
         */
        public MyHeap(int maxSize) {
            if ((maxSize % 2) == 0) {
                maxSize++;
            }

            m_heap = new MyHeapElement[maxSize + 1];
            m_heap[0] = new MyHeapElement(-1);
        }

        /**
         * returns the size of the heap.
         * 
         * @return the size
         */
        public int size() {
            return m_heap[0].index;
        }

        /**
         * peeks at the first element.
         * 
         * @return the first element
         */
        public MyHeapElement peek() {
            return m_heap[1];
        }

        /**
         * returns the first element and removes it from the heap.
         * 
         * @return the first element
         * @throws Exception if no elements in heap
         */
        public MyHeapElement get() throws Exception {
            if (m_heap[0].index == 0) {
                throw new Exception("No elements present in the heap");
            }
            MyHeapElement r = m_heap[1];
            m_heap[1] = m_heap[m_heap[0].index];
            m_heap[0].index--;
            downheap();
            return r;
        }

        /**
         * adds the distance value to the heap.
         * 
         * @param d the distance value
         * @throws Exception if the heap gets too large
         */
        public void put(double d) throws Exception {
            if ((m_heap[0].index + 1) > (m_heap.length - 1)) {
                throw new Exception("the number of elements cannot exceed the " + "initially set maximum limit");
            }
            m_heap[0].index++;
            m_heap[m_heap[0].index] = new MyHeapElement(d);
            upheap();
        }

        /**
         * Puts an element by substituting it in place of the top most element.
         * 
         * @param d The distance value.
         * @throws Exception If distance is smaller than that of the head element.
         */
        public void putBySubstitute(double d) throws Exception {
            MyHeapElement head = get();
            put(d);
            if (head.distance == m_heap[1].distance) {
                putKthNearest(head.distance);
            } else if (head.distance > m_heap[1].distance) {
                m_KthNearest = null;
                m_KthNearestSize = 0;
                initSize = 10;
            } else if (head.distance < m_heap[1].distance) {
                throw new Exception("The substituted element is greater than the "
                        + "head element. put() should have been called " + "in place of putBySubstitute()");
            }
        }

        /** the kth nearest ones. */
        MyHeapElement m_KthNearest[] = null;

        /** The number of kth nearest elements. */
        int m_KthNearestSize = 0;

        /** the initial size of the heap. */
        int initSize = 10;

        /**
         * returns the number of k nearest.
         * 
         * @return the number of k nearest
         * @see #m_KthNearestSize
         */
        public int noOfKthNearest() {
            return m_KthNearestSize;
        }

        /**
         * Stores kth nearest elements (if there are more than one).
         * 
         * @param d the distance
         */
        public void putKthNearest(double d) {
            if (m_KthNearest == null) {
                m_KthNearest = new MyHeapElement[initSize];
            }
            if (m_KthNearestSize >= m_KthNearest.length) {
                initSize += initSize;
                MyHeapElement temp[] = new MyHeapElement[initSize];
                System.arraycopy(m_KthNearest, 0, temp, 0, m_KthNearest.length);
                m_KthNearest = temp;
            }
            m_KthNearest[m_KthNearestSize++] = new MyHeapElement(d);
        }

        /**
         * returns the kth nearest element or null if none there.
         * 
         * @return the kth nearest element
         */
        public MyHeapElement getKthNearest() {
            if (m_KthNearestSize == 0) {
                return null;
            }
            m_KthNearestSize--;
            return m_KthNearest[m_KthNearestSize];
        }

        /**
         * performs upheap operation for the heap to maintian its properties.
         */
        protected void upheap() {
            int i = m_heap[0].index;
            MyHeapElement temp;
            while (i > 1 && m_heap[i].distance > m_heap[i / 2].distance) {
                temp = m_heap[i];
                m_heap[i] = m_heap[i / 2];
                i = i / 2;
                m_heap[i] = temp; // this is i/2 done here to avoid another division.
            }
        }

        /**
         * performs downheap operation for the heap to maintian its properties.
         */
        protected void downheap() {
            int i = 1;
            MyHeapElement temp;
            while (((2 * i) <= m_heap[0].index && m_heap[i].distance < m_heap[2 * i].distance)
                    || ((2 * i + 1) <= m_heap[0].index && m_heap[i].distance < m_heap[2 * i + 1].distance)) {
                if ((2 * i + 1) <= m_heap[0].index) {
                    if (m_heap[2 * i].distance > m_heap[2 * i + 1].distance) {
                        temp = m_heap[i];
                        m_heap[i] = m_heap[2 * i];
                        i = 2 * i;
                        m_heap[i] = temp;
                    } else {
                        temp = m_heap[i];
                        m_heap[i] = m_heap[2 * i + 1];
                        i = 2 * i + 1;
                        m_heap[i] = temp;
                    }
                } else {
                    temp = m_heap[i];
                    m_heap[i] = m_heap[2 * i];
                    i = 2 * i;
                    m_heap[i] = temp;
                }
            }
        }

        /**
         * returns the total size.
         * 
         * @return the total size
         */
        public int totalSize() {
            return size() + noOfKthNearest();
        }

        /**
         * Returns the revision string.
         * 
         * @return the revision
         */
        @Override
        public String getRevision() {
            return RevisionUtils.extract("$Revision$");
        }
    }

    /**
     * A class for storing data about a neighboring instance.
     * 
     * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
     * @version $Revision$
     */
    protected class MyHeapElement implements RevisionHandler {

        /** the distance. */
        public double distance;

        /**
         * The index of this element. Also used as the size of the heap in the first
         * element.
         */
        int index = 0;

        /**
         * constructor.
         * 
         * @param d the distance
         */
        public MyHeapElement(double d) {
            distance = d;
        }

        /**
         * Returns the revision string.
         * 
         * @return the revision
         */
        @Override
        public String getRevision() {
            return RevisionUtils.extract("$Revision$");
        }
    }

    /**
     * stores a CoverTreeNode and its distance to the current query node.
     * 
     * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
     * @version $Revision$
     */
    private class d_node implements RevisionHandler {

        /** The distance of the node's point to the query point. */
        double dist;

        /** The node. */
        CoverTreeNode n;

        /**
         * Constructor.
         * 
         * @param d The distance of the node to the query.
         * @param node The node.
         */
        public d_node(double d, CoverTreeNode node) {
            dist = d;
            n = node;
        }

        /**
         * Returns the revision string.
         * 
         * @return the revision
         */
        @Override
        public String getRevision() {
            return RevisionUtils.extract("$Revision$");
        }
    };

    /**
     * Initializes a heap with k values of the the given upper_bound.
     * 
     * @param heap The heap to put values into.
     * @param upper_bound The value to put into heap (the value with which it
     *          should be initialized).
     * @param k The number of times upper_bound should be put into heap for
     *          initialization.
     * @throws Exception If there is some problem in initializing the heap (if k
     *           &gt; size of the heap).
     */
    protected void setter(MyHeap heap, double upper_bound, final int k) throws Exception {
        if (heap.size() > 0) {
            heap.m_heap[0].index = 0;
        }

        while (heap.size() < k) {
            heap.put(upper_bound);
        }
    }

    /**
     * Replaces the current top/max value in the heap with the new one. The new
     * max value should be &lt;= the old one.
     * 
     * @param upper_bound The heap.
     * @param new_bound The new value that should replace the old top one.
     * @throws Exception if the new value is greater than the old value.
     */
    protected void update(MyHeap upper_bound, double new_bound) throws Exception {
        upper_bound.putBySubstitute(new_bound);
    }

    /**
     * Returns a cover set for a given level/scale. A cover set for a level
     * consists of nodes whose Instances/centres are which are inside the query
     * ball at that level. If no cover set exists for the given level (if it is
     * the first time it is going to be used), than a new one is created.
     * 
     * @param idx The level/scale for which the cover set is required.
     * @param cover_sets The covers sets. Consists of stack of a stack of d_node
     *          objects.
     * @return The cover set for the given level/scale.
     */
    protected Stack<d_node> getCoverSet(int idx, Stack<Stack<d_node>> cover_sets) {
        if (cover_sets.length <= idx) {
            int i = cover_sets.length - 1;
            while (i < idx) {
                i++;
                Stack<d_node> new_cover_set = new Stack<d_node>();
                cover_sets.push(new_cover_set);
            }
        }
        return cover_sets.element(idx);
    }

    /**
     * Copies the contents of one zero set to the other. This is required if we
     * are going to inspect child of some query node (if the queries are given in
     * batch in the form of a cover tree). Only those nodes are copied to the new
     * zero set that are inside the query ball of query_chi. P.S.: A zero set is a
     * set of all leaf nodes that are found to be inside the query ball.
     * 
     * @param query_chi The child node of our query node that we are going to
     *          inspect.
     * @param new_upper_k New heap that will store the distances of the k NNs for
     *          query_chi.
     * @param zero_set The zero set of query_chi's parent that needs to be copied.
     * @param new_zero_set The new zero set of query_chi where old zero sets need
     *          to be copied into.
     * @throws Exception If there is some problem.
     */
    protected void copy_zero_set(CoverTreeNode query_chi, MyHeap new_upper_k, Stack<d_node> zero_set,
            Stack<d_node> new_zero_set) throws Exception {
        new_zero_set.clear();
        d_node ele;
        for (int i = 0; i < zero_set.length; i++) {
            ele = zero_set.element(i);
            double upper_dist = new_upper_k.peek().distance + query_chi.max_dist;
            if (shell(ele.dist, query_chi.parent_dist, upper_dist)) {
                double d = Math
                        .sqrt(m_DistanceFunction.distance(query_chi.p(), ele.n.p(), upper_dist * upper_dist));
                if (m_TreeStats != null) {
                    m_TreeStats.incrPointCount();
                }
                if (d <= upper_dist) {
                    if (d < new_upper_k.peek().distance) {
                        update(new_upper_k, d);
                    }
                    d_node temp = new d_node(d, ele.n);
                    new_zero_set.push(temp);
                    if (m_TreeStats != null) {
                        m_TreeStats.incrLeafCount();
                    }
                } // end if(d<newupperbound)
            } // end if(shell(...
        } // end for
    }

    /**
     * Copies the contents of one set of cover sets to the other. It is required
     * if we are going to inspect child of some query node (if the queries are
     * given in batch in the form of a cover tree). For each level, only those
     * nodes are copied to the new set which are inside the query ball of
     * query_chi at that level.
     * 
     * @param query_chi The child node of our query node that we are going to
     *          inspect.
     * @param new_upper_k New heap that will store the distances of the k NNs for
     *          query_chi.
     * @param cover_sets The cover_sets of query_chi's parent, which need to be
     *          copied to new_cover_sets.
     * @param new_cover_sets The new set of cover_sets that need to contain
     *          contents of cover_sets.
     * @param current_scale The scale/level we are inspecting in our cover tree.
     * @param max_scale The maximum level so far possible in our search (this is
     *          only updated as we descend and a deeper child is found inside the
     *          query ball).
     * @throws Exception If there is problem.
     */
    protected void copy_cover_sets(CoverTreeNode query_chi, MyHeap new_upper_k, Stack<Stack<d_node>> cover_sets,
            Stack<Stack<d_node>> new_cover_sets, int current_scale, int max_scale) throws Exception {
        new_cover_sets.clear();
        for (; current_scale <= max_scale; current_scale++) {
            d_node ele;
            Stack<d_node> cover_set_currentscale = getCoverSet(current_scale, cover_sets);
            for (int i = 0; i < cover_set_currentscale.length; i++) { // ; ele != end;
                                                                      // ele++) {
                ele = cover_set_currentscale.element(i);
                double upper_dist = new_upper_k.peek().distance + query_chi.max_dist + ele.n.max_dist;
                if (shell(ele.dist, query_chi.parent_dist, upper_dist)) {
                    double d = Math
                            .sqrt(m_DistanceFunction.distance(query_chi.p(), ele.n.p(), upper_dist * upper_dist));
                    if (m_TreeStats != null) {
                        m_TreeStats.incrPointCount();
                    }
                    if (d <= upper_dist) {
                        if (d < new_upper_k.peek().distance) {
                            update(new_upper_k, d);
                        }
                        d_node temp = new d_node(d, ele.n);
                        new_cover_sets.element(current_scale).push(temp);
                        if (m_TreeStats != null) {
                            m_TreeStats.incrIntNodeCount();
                        }
                    } // end if(d<=..
                } // end if(shell(...
            } // end for(coverset_i)
        } // end for(scales)
    }

    /**
     * Prints the given cover sets and zero set.
     * 
     * @param cover_sets The cover sets to print.
     * @param zero_set The zero set to print.
     * @param current_scale The scale/level to start printing the cover sets from.
     * @param max_scale The max scale/level to print the cover sets upto.
     */
    void print_cover_sets(Stack<Stack<d_node>> cover_sets, Stack<d_node> zero_set, int current_scale,
            int max_scale) {
        d_node ele;
        println("cover set = ");
        for (; current_scale <= max_scale; current_scale++) {
            println("" + current_scale);
            for (int i = 0; i < cover_sets.element(current_scale).length; i++) {
                ele = cover_sets.element(current_scale).element(i);
                CoverTreeNode n = ele.n;
                println(n.p());
            }
        }
        println("infinity");
        for (int i = 0; i < zero_set.length; i++) {
            ele = zero_set.element(i);
            CoverTreeNode n = ele.n;
            println(n.p());
        }
    }

    /**
     * Swap two nodes in a cover set.
     * 
     * @param a The index first node.
     * @param b The index of second node.
     * @param cover_set The cover set in which the two nodes are.
     */

    protected void SWAP(int a, int b, Stack<d_node> cover_set) {
        d_node tmp = cover_set.element(a);
        cover_set.set(a, cover_set.element(b));
        cover_set.set(b, tmp);
    }

    /**
     * Returns the difference of two given nodes distance to the query. It is used
     * in half-sorting a cover set.
     * 
     * @param p1 The index of first node.
     * @param p2 The index of second node.
     * @param cover_set The cover set containing the two given nodes.
     * @return dist_to_query_of_p1 - dist_to_query_of_p2
     */

    protected double compare(final int p1, final int p2, Stack<d_node> cover_set) {
        return cover_set.element(p1).dist - cover_set.element(p2).dist;
    }

    /**
     * Half-sorts a cover set, so that nodes nearer to the query are at the front.
     * 
     * @param cover_set The cover set to sort.
     */

    protected void halfsort(Stack<d_node> cover_set) {
        if (cover_set.length <= 1) {
            return;
        }
        int start = 0;
        int hi = cover_set.length - 1;
        int right = hi;
        int left;

        while (right > start) {
            int mid = start + ((hi - start) >> 1);

            boolean jumpover = false;
            if (compare(mid, start, cover_set) < 0.0) {
                SWAP(mid, start, cover_set);
            }
            if (compare(hi, mid, cover_set) < 0.0) {
                SWAP(mid, hi, cover_set);
            } else {
                jumpover = true;
            }
            if (!jumpover && compare(mid, start, cover_set) < 0.0) {
                SWAP(mid, start, cover_set);
            }

            ;

            left = start + 1;
            right = hi - 1;

            do {
                while (compare(left, mid, cover_set) < 0.0) {
                    left++;
                }

                while (compare(mid, right, cover_set) < 0.0) {
                    right--;
                }

                if (left < right) {
                    SWAP(left, right, cover_set);
                    if (mid == left) {
                        mid = right;
                    } else if (mid == right) {
                        mid = left;
                    }
                    left++;
                    right--;
                } else if (left == right) {
                    left++;
                    right--;
                    break;
                }
            } while (left <= right);
            hi = right;
        }
    }

    /**
     * Function to check if a child node can be inside a query ball, without
     * calculating the child node's distance to the query. This further avoids
     * unnecessary distance calculation.
     * 
     * @param parent_query_dist The distance of parent to the query
     * @param child_parent_dist The distance of child to the parent.
     * @param upper_bound The distance to the query of the best kth NN found so
     *          far.
     * @return true If child can be inside the query ball.
     */
    protected boolean shell(double parent_query_dist, double child_parent_dist, double upper_bound) {
        return parent_query_dist - child_parent_dist <= upper_bound;
    }

    /**
     * This functions adds nodes for inspection at the next level during NN
     * search. The internal nodes are added to one of the cover sets (at the level
     * of the child node which is added) and leaf nodes are added to the zero set.
     * 
     * An optimization to consider: Make all distance evaluations occur in
     * descend.
     * 
     * Instead of passing a cover_set, pass a stack of cover sets. The last
     * element holds d_nodes with your distance. The next lower element holds a
     * d_node with the distance to your query parent, next = query grand parent,
     * etc..
     * 
     * Compute distances in the presence of the tighter upper bound.
     * 
     * @param query The query (in shape of a cover tree node, as we are doing
     *          batch searching).
     * @param upper_k Heap containing distances of best k-NNs found so far.
     * @param current_scale The current scale/level being looked at in the tree.
     * @param max_scale The max scale/level that has so far been looked at.
     * @param cover_sets The cover sets of tree nodes for each level of our trees
     *          for.
     * @param zero_set The set containing leaf nodes.
     * @return A new max_scale, if we descend to a deeper level.
     * @throws Exception If there is some problem (in updating the heap upper_k).
     */
    protected int descend(final CoverTreeNode query, MyHeap upper_k, int current_scale, int max_scale, // amk14comment: make sure this gets
            // passed by reference in Java
            Stack<Stack<d_node>> cover_sets, // amk14comment: contains children in
            // set Q in paper
            Stack<d_node> zero_set) // amk14comment: zeroset contains the children at
            // the lowest level i.e. -infinity
            throws Exception {
        d_node parent;
        Stack<d_node> cover_set_currentscale = getCoverSet(current_scale, cover_sets);
        for (int i = 0; i < cover_set_currentscale.length; i++) {
            parent = cover_set_currentscale.element(i);
            CoverTreeNode par = parent.n;
            double upper_dist = upper_k.peek().distance + query.max_dist + query.max_dist; // *upper_bound + query->max_dist + query->max_dist;
            if (parent.dist <= upper_dist + par.max_dist) {
                CoverTreeNode chi;
                if (par == m_Root && par.num_children == 0) {
                    // only one root(which is
                    // also leaf) node
                    chi = par;
                } else {
                    chi = par.children.element(0);
                }
                if (parent.dist <= upper_dist + chi.max_dist) { // amk14comment: looking
                                                                // at child_0 (which is
                                                                // the parent itself)
                    if (chi.num_children > 0) {
                        if (max_scale < chi.scale) {
                            max_scale = chi.scale;
                        }
                        d_node temp = new d_node(parent.dist, chi);
                        getCoverSet(chi.scale, cover_sets).push(temp);
                        if (m_TreeStats != null) {
                            m_TreeStats.incrIntNodeCount();
                        }
                    } else if (parent.dist <= upper_dist) {
                        d_node temp = new d_node(parent.dist, chi);
                        zero_set.push(temp);
                        if (m_TreeStats != null) {
                            m_TreeStats.incrLeafCount();
                        }
                    }
                }
                for (int c = 1; c < par.num_children; c++) {
                    chi = par.children.element(c);
                    double upper_chi = upper_k.peek().distance + chi.max_dist + query.max_dist + query.max_dist; // *upper_bound + chi.max_dist
                                                                                                                 // + query.max_dist +
                                                                                                                 // query.max_dist;
                    if (shell(parent.dist, chi.parent_dist, upper_chi)) { // amk14comment:parent_query_dist
                                                                          // -
                                                                          // child_parent_dist
                                                                          // <= upper_chi
                                                                          // - if child
                                                                          // can be
                                                                          // inside the
                                                                          // shrunk query
                                                                          // ball
                                                                          // NOT the same as above parent->dist <= upper_dist + chi->max_dist
                        double d = Math.sqrt(m_DistanceFunction.distance(query.p(), chi.p(), upper_chi * upper_chi,
                                m_TreeStats));
                        if (m_TreeStats != null) {
                            m_TreeStats.incrPointCount();
                        }
                        if (d <= upper_chi) { // if child is inside the shrunk query ball
                            if (d < upper_k.peek().distance) {
                                update(upper_k, d);
                            }
                            if (chi.num_children > 0) {
                                if (max_scale < chi.scale) {
                                    max_scale = chi.scale;
                                }
                                d_node temp = new d_node(d, chi);
                                getCoverSet(chi.scale, cover_sets).push(temp);
                                if (m_TreeStats != null) {
                                    m_TreeStats.incrIntNodeCount();
                                }
                            } else if (d <= upper_chi - chi.max_dist) {
                                d_node temp = new d_node(d, chi);
                                zero_set.push(temp);
                                if (m_TreeStats != null) {
                                    m_TreeStats.incrLeafCount();
                                }
                            }
                        } // end if(d<=upper_chi)
                    } // end if(shell(parent.dist,...
                } // end for(child_1 to n)
            } // end if(parent.dist<=upper_dist..
        } // end for(covers_sets[current_scale][i])
        return max_scale;
    }

    /**
     * Does a brute force NN search on the nodes in the given zero set. A zero set
     * might have some nodes added to it that were not k-NNs, so need to do a
     * brute-force to pick only the k-NNs (without calculating distances, as each
     * node in the zero set already had its distance calculated to the query,
     * which is stored with the node).
     * 
     * @param k The k in kNN.
     * @param query The query.
     * @param zero_set The zero set on which the brute force NN search is
     *          performed.
     * @param upper_k The heap storing distances of k-NNs found during the search.
     * @param results The returned k-NNs.
     * @throws Exception If there is somem problem.
     */
    protected void brute_nearest(final int k, final CoverTreeNode query, Stack<d_node> zero_set, MyHeap upper_k,
            Stack<NeighborList> results) throws Exception {
        if (query.num_children > 0) {
            Stack<d_node> new_zero_set = new Stack<d_node>();
            CoverTreeNode query_chi = query.children.element(0);
            brute_nearest(k, query_chi, zero_set, upper_k, results);
            MyHeap new_upper_k = new MyHeap(k);

            for (int i = 1; i < query.children.length; i++) {
                query_chi = query.children.element(i);
                setter(new_upper_k, upper_k.peek().distance + query_chi.parent_dist, k);
                copy_zero_set(query_chi, new_upper_k, zero_set, new_zero_set);
                brute_nearest(k, query_chi, new_zero_set, new_upper_k, results);
            }
        } else {
            NeighborList temp = new NeighborList(k);
            d_node ele;
            for (int i = 0; i < zero_set.length; i++) {
                ele = zero_set.element(i);
                if (ele.dist <= upper_k.peek().distance) {
                    temp.insertSorted(ele.dist, ele.n.p()); // temp.push(ele.n.p());
                }
            }
            results.push(temp);
        }
    }

    /**
     * Performs a recursive k-NN search for a given batch of queries provided in
     * the form of a cover tree. P.S.: This function should not be called from
     * outside. Outside classes should use kNearestNeighbours() instead.
     * 
     * @param k The number of NNs to find.
     * @param query_node The node of the query tree to start the search from.
     * @param cover_sets The set of sets that contains internal nodes that were
     *          found to be inside the query ball at previous scales/levels
     *          (intially there would be just the root node at root level).
     * @param zero_set The set that'll contain the leaf nodes that are found to be
     *          inside the query ball.
     * @param current_scale The level/scale to do the search from (this value
     *          would be used to inspect the cover set in the provided set of
     *          cover sets).
     * @param max_scale The max scale/level that has so far been inspected.
     * @param upper_k The heap containing distances of the best k-NNs found so far
     *          (initialized to Double.POSITIVE_INFINITY).
     * @param results The list of returned k-NNs.
     * @throws Exception If there is some problem during the search.
     */
    protected void internal_batch_nearest_neighbor(final int k, final CoverTreeNode query_node,
            Stack<Stack<d_node>> cover_sets, Stack<d_node> zero_set, int current_scale, int max_scale,
            MyHeap upper_k, Stack<NeighborList> results) throws Exception {
        if (current_scale > max_scale) { // All remaining points are in the zero
                                         // set.
            brute_nearest(k, query_node, zero_set, upper_k, results);
        } else {
            // Our query_node has too much scale. Reduce.
            if (query_node.scale <= current_scale && query_node.scale != 100) { // amk14comment:if
                                                                                // j>=i
                                                                                // in
                                                                                // paper
                CoverTreeNode query_chi;
                Stack<d_node> new_zero_set = new Stack<d_node>();
                Stack<Stack<d_node>> new_cover_sets = new Stack<Stack<d_node>>();
                MyHeap new_upper_k = new MyHeap(k);

                for (int i = 1; i < query_node.num_children; i++) { // processing
                                                                    // child_1 and
                                                                    // onwards
                    query_chi = query_node.children.element(i);
                    setter(new_upper_k, upper_k.peek().distance + query_chi.parent_dist, k);
                    // copy the zero set that satisfy a certain bound to the new zero set
                    copy_zero_set(query_chi, new_upper_k, zero_set, new_zero_set);
                    // copy the coversets[current_scale] nodes that satisfy a certain
                    // bound to the new_cover_sets[current_scale]
                    copy_cover_sets(query_chi, new_upper_k, cover_sets, new_cover_sets, current_scale, max_scale);
                    // search for the query_node child in the nodes nearer to it.
                    internal_batch_nearest_neighbor(k, query_chi, new_cover_sets, new_zero_set, current_scale,
                            max_scale, new_upper_k, results);
                }
                new_cover_sets = null;
                new_zero_set = null;
                new_upper_k = null;
                // now doing child_0 //which is the parent itself, that's why we don't
                // need new_zero_set or new_cover_sets
                internal_batch_nearest_neighbor(k, query_node.children.element(0), cover_sets, zero_set,
                        current_scale, max_scale, upper_k, results);
            } else { // reduce cover set scale -- amk14comment: if j<i in paper
                Stack<d_node> cover_set_i = getCoverSet(current_scale, cover_sets);
                // println("sorting");
                halfsort(cover_set_i);
                max_scale = descend(query_node, upper_k, current_scale, max_scale, cover_sets, zero_set);
                cover_set_i.clear();
                current_scale++;
                internal_batch_nearest_neighbor(k, query_node, cover_sets, zero_set, current_scale, max_scale,
                        upper_k, results);
            }
        }
    }

    /**
     * Performs k-NN search for a batch of queries provided in the form of a cover
     * tree. P.S.: Outside classes should call kNearestNeighbours().
     * 
     * @param k The number of k-NNs to find.
     * @param tree_root The root of the cover tree on which k-NN search is to be
     *          performed.
     * @param query_root The root of the cover tree consisting of queries.
     * @param results The list of returned k-NNs.
     * @throws Exception If there is some problem during the search.
     */
    protected void batch_nearest_neighbor(final int k, CoverTreeNode tree_root, CoverTreeNode query_root,
            Stack<NeighborList> results) throws Exception {
        // amk14comment: These contain the covering nodes at each level
        Stack<Stack<d_node>> cover_sets = new Stack<Stack<d_node>>(100);
        // amk14comment: These contain the nodes thought to be nearest at the leaf
        // level
        Stack<d_node> zero_set = new Stack<d_node>();
        MyHeap upper_k = new MyHeap(k);
        // probably not needed //amk14comment:initializes the array to MAXFLOAT
        setter(upper_k, Double.POSITIVE_INFINITY, k);

        // amk14comment:distance from top query point to top node point
        double treeroot_to_query_dist = Math
                .sqrt(m_DistanceFunction.distance(query_root.p(), tree_root.p(), Double.POSITIVE_INFINITY));
        // amk14comment:probably stores the kth smallest distances encountered so
        // far
        update(upper_k, treeroot_to_query_dist);

        d_node temp = new d_node(treeroot_to_query_dist, tree_root);
        getCoverSet(0, cover_sets).push(temp);

        // incrementing counts for the root node
        if (m_TreeStats != null) {
            m_TreeStats.incrPointCount();
            if (tree_root.num_children > 0) {
                m_TreeStats.incrIntNodeCount();
            } else {
                m_TreeStats.incrLeafCount();
            }
        }

        internal_batch_nearest_neighbor(k, query_root, cover_sets, zero_set, 0, 0, upper_k, results);
    }

    /**
     * Performs k-NN serach for a single given query/test Instance.
     * 
     * @param target The query/test instance.
     * @param k Number of k-NNs to find.
     * @return List of k-NNs.
     * @throws Exception If there is some problem during the search for k-NNs.
     */
    protected NeighborList findKNearest(final Instance target, final int k) throws Exception {
        Stack<d_node> cover_set_current = new Stack<d_node>(), cover_set_next, zero_set = new Stack<d_node>();
        CoverTreeNode parent, child;
        d_node par;
        MyHeap upper_k = new MyHeap(k);
        double d = Math
                .sqrt(m_DistanceFunction.distance(m_Root.p(), target, Double.POSITIVE_INFINITY, m_TreeStats)),
                upper_bound;
        cover_set_current.push(new d_node(d, m_Root));
        setter(upper_k, Double.POSITIVE_INFINITY, k);
        this.update(upper_k, d);
        // updating stats for the root node
        if (m_TreeStats != null) {
            if (m_Root.num_children > 0) {
                m_TreeStats.incrIntNodeCount();
            } else {
                m_TreeStats.incrLeafCount();
            }
            m_TreeStats.incrPointCount();
        }

        // if root is the only node
        if (m_Root.num_children == 0) {
            NeighborList list = new NeighborList(k);
            list.insertSorted(d, m_Root.p());
            return list;
        }
        // else
        while (cover_set_current.length > 0) {
            cover_set_next = new Stack<d_node>();
            for (int i = 0; i < cover_set_current.length; i++) {
                par = cover_set_current.element(i);
                parent = par.n;
                for (int c = 0; c < parent.num_children; c++) {
                    child = parent.children.element(c);
                    upper_bound = upper_k.peek().distance;
                    if (c == 0) {
                        d = par.dist;
                    } else {
                        d = upper_bound + child.max_dist;
                        d = Math.sqrt(m_DistanceFunction.distance(child.p(), target, d * d, m_TreeStats));
                        if (m_TreeStats != null) {
                            m_TreeStats.incrPointCount();
                        }
                    }
                    if (d <= (upper_bound + child.max_dist)) {
                        if (c > 0 && d < upper_bound) {
                            update(upper_k, d);
                        }
                        if (child.num_children > 0) {
                            cover_set_next.push(new d_node(d, child));
                            if (m_TreeStats != null) {
                                m_TreeStats.incrIntNodeCount();
                            }
                        } else if (d <= upper_bound) {
                            zero_set.push(new d_node(d, child));
                            if (m_TreeStats != null) {
                                m_TreeStats.incrLeafCount();
                            }
                        }
                    }
                } // end for current_set children
            } // end for current_set elements
            cover_set_current = cover_set_next;
        } // end while(curret_set not empty)

        NeighborList list = new NeighborList(k);
        d_node tmpnode;
        upper_bound = upper_k.peek().distance;
        for (int i = 0; i < zero_set.length; i++) {
            tmpnode = zero_set.element(i);
            if (tmpnode.dist <= upper_bound) {
                list.insertSorted(tmpnode.dist, tmpnode.n.p());
            }
        }

        if (list.currentLength() <= 0) {
            throw new Exception("Error: No neighbour found. This cannot happen");
        }

        return list;
    }

    /********************************* NNSearch related stuff above. ********************/

    /**
     * Returns k-NNs of a given target instance, from among the previously
     * supplied training instances (supplied through setInstances method) P.S.:
     * May return more than k-NNs if more one instances have the same distance to
     * the target as the kth NN.
     * 
     * @param target The instance for which k-NNs are required.
     * @param k The number of k-NNs to find.
     * @return The k-NN instances of the given target instance.
     * @throws Exception If there is some problem find the k-NNs.
     */
    @Override
    public Instances kNearestNeighbours(Instance target, int k) throws Exception {
        if (m_Stats != null) {
            m_Stats.searchStart();
        }
        CoverTree querytree = new CoverTree();
        Instances insts = new Instances(m_Instances, 0);
        insts.add(target);
        querytree.setInstances(insts);
        Stack<NeighborList> result = new Stack<NeighborList>();
        batch_nearest_neighbor(k, this.m_Root, querytree.m_Root, result);
        if (m_Stats != null) {
            m_Stats.searchFinish();
        }

        insts = new Instances(m_Instances, 0);
        NeighborNode node = result.element(0).getFirst();
        m_DistanceList = new double[result.element(0).currentLength()];
        int i = 0;
        while (node != null) {
            insts.add(node.m_Instance);
            m_DistanceList[i] = node.m_Distance;
            i++;
            node = node.m_Next;
        }
        return insts;
    }

    /**
     * Returns the NN instance of a given target instance, from among the
     * previously supplied training instances.
     * 
     * @param target The instance for which NN is required.
     * @throws Exception If there is some problem finding the nearest neighbour.
     * @return The NN instance of the target instance.
     */
    @Override
    public Instance nearestNeighbour(Instance target) throws Exception {
        return kNearestNeighbours(target, 1).instance(0);
    }

    /**
     * Returns the distances of the (k)-NN(s) found earlier by
     * kNearestNeighbours()/nearestNeighbour().
     * 
     * @throws Exception If the tree hasn't been built (by calling
     *           setInstances()), or none of kNearestNeighbours() or
     *           nearestNeighbour() has been called before.
     * @return The distances (in the same order) of the k-NNs.
     */
    @Override
    public double[] getDistances() throws Exception {
        if (m_Instances == null || m_DistanceList == null) {
            throw new Exception("The tree has not been supplied with a set of "
                    + "instances or getDistances() has been called " + "before calling kNearestNeighbours().");
        }
        return m_DistanceList;
    }

    /**
     * Checks if there is any instance with missing values. Throws an exception if
     * there is, as KDTree does not handle missing values.
     * 
     * @param instances the instances to check
     * @throws Exception if missing values are encountered
     */
    protected void checkMissing(Instances instances) throws Exception {
        for (int i = 0; i < instances.numInstances(); i++) {
            Instance ins = instances.instance(i);
            for (int j = 0; j < ins.numValues(); j++) {
                if (ins.index(j) != ins.classIndex()) {
                    if (ins.isMissingSparse(j)) {
                        throw new Exception("ERROR: KDTree can not deal with missing "
                                + "values. Please run ReplaceMissingValues filter "
                                + "on the dataset before passing it on to the KDTree.");
                    }
                }
            }
        }
    }

    /**
     * Builds the Cover Tree on the given set of instances.
     * 
     * @param instances The insts on which the Cover Tree is to be built.
     * @throws Exception If some error occurs while building the Cover Tree
     */
    @Override
    public void setInstances(Instances instances) throws Exception {
        super.setInstances(instances);
        buildCoverTree(instances);
    }

    /**
     * Adds an instance to the cover tree. P.S.: The current version doesn't allow
     * addition of instances after batch construction.
     * 
     * @param ins The instance to add.
     * @throws Exception Alway throws this, as current implementation doesn't
     *           allow addition of instances after building.
     */
    @Override
    public void update(Instance ins) throws Exception {
        throw new Exception("BottomUpConstruction method does not allow addition " + "of new Instances.");
    }

    /**
     * Adds the given instance info. This implementation updates only the range
     * datastructures of the EuclideanDistance. Nothing is required to be updated
     * in the built Cover Tree.
     * 
     * @param ins The instance to add the information of. Usually this is the test
     *          instance supplied to update the range of attributes in the
     *          distance function.
     */
    @Override
    public void addInstanceInfo(Instance ins) {
        if (m_Instances != null) {
            try {
                m_DistanceFunction.update(ins);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        } else if (m_Instances == null) {
            throw new IllegalStateException(
                    "No instances supplied yet. Cannot update without" + "supplying a set of instances first.");
        }
    }

    /**
     * Sets the distance function to use for nearest neighbour search. Currently
     * only EuclideanDistance is supported.
     * 
     * @param df the distance function to use
     * @throws Exception if not EuclideanDistance
     */
    @Override
    public void setDistanceFunction(DistanceFunction df) throws Exception {
        if (!(df instanceof EuclideanDistance)) {
            throw new Exception("CoverTree currently only works with " + "EuclideanDistanceFunction.");
        }
        m_DistanceFunction = m_EuclideanDistance = (EuclideanDistance) df;
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String baseTipText() {
        return "The base for the expansion constant.";
    }

    /**
     * Returns the base in use for expansion constant.
     * 
     * @return base currently in use.
     */
    public double getBase() {
        return m_Base;
    }

    /**
     * Sets the base to use for expansion constant. The 2 in 2^i in the paper.
     * 
     * @param b the new base;
     */
    public void setBase(double b) {
        m_Base = b;
    }

    /**
     * Returns the size of the tree. (number of internal nodes + number of leaves)
     * 
     * @return the size of the tree
     */
    public double measureTreeSize() {
        return m_NumNodes;
    }

    /**
     * Returns the number of leaves.
     * 
     * @return the number of leaves
     */
    public double measureNumLeaves() {
        return m_NumLeaves;
    }

    /**
     * Returns the depth of the tree.
     * 
     * @return the number of rules
     */
    public double measureMaxDepth() {
        return m_MaxDepth;
    }

    /**
     * Returns an enumeration of the additional measure names.
     * 
     * @return an enumeration of the measure names
     */
    @Override
    public Enumeration<String> enumerateMeasures() {
        Vector<String> newVector = new Vector<String>();
        newVector.addElement("measureTreeSize");
        newVector.addElement("measureNumLeaves");
        newVector.addElement("measureMaxDepth");
        if (m_Stats != null) {
            newVector.addAll(Collections.list(m_Stats.enumerateMeasures()));
        }
        return newVector.elements();
    }

    /**
     * Returns the value of the named measure.
     * 
     * @param additionalMeasureName the name of the measure to query for its value
     * @return the value of the named measure
     * @throws IllegalArgumentException if the named measure is not supported
     */
    @Override
    public double getMeasure(String additionalMeasureName) {
        if (additionalMeasureName.compareToIgnoreCase("measureMaxDepth") == 0) {
            return measureMaxDepth();
        } else if (additionalMeasureName.compareToIgnoreCase("measureTreeSize") == 0) {
            return measureTreeSize();
        } else if (additionalMeasureName.compareToIgnoreCase("measureNumLeaves") == 0) {
            return measureNumLeaves();
        } else if (m_Stats != null) {
            return m_Stats.getMeasure(additionalMeasureName);
        } else {
            throw new IllegalArgumentException(additionalMeasureName + " not supported (KDTree)");
        }
    }

    /******** Utility print functions.****** */
    /**
     * Prints a string to stdout.
     * 
     * @param s The string to print.
     */
    protected static void print(String s) {
        System.out.print(s);
    }

    /**
     * Prints a string to stdout followed by newline.
     * 
     * @param s The string to print.
     */
    protected static void println(String s) {
        System.out.println(s);
    }

    /**
     * Prints an object to stdout.
     * 
     * @param o The object to print.
     */
    protected static void print(Object o) {
        System.out.print(o);
    }

    /**
     * Prints an object to stdout followed by newline.
     * 
     * @param o The object to print.
     */
    protected static void println(Object o) {
        System.out.println(o);
    }

    /**
     * Prints the specified number of spaces.
     * 
     * @param s The number of space characters to print.
     */
    protected static void print_space(int s) {
        for (int i = 0; i < s; i++) {
            System.out.print(" ");
        }
    }

    /**
     * Prints a cover tree starting from the given node.
     * 
     * @param depth The depth of top_node.
     * @param top_node The node to start printing from.
     */
    protected static void print(int depth, CoverTreeNode top_node) {
        print_space(depth);
        println(top_node.p());
        if (top_node.num_children > 0) {
            print_space(depth);
            print("scale = " + top_node.scale + "\n");
            print_space(depth);
            print("num children = " + top_node.num_children + "\n");
            System.out.flush();
            for (int i = 0; i < top_node.num_children; i++) {
                print(depth + 1, top_node.children.element(i)); // top_node.children[i]);
            }
        }
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Method for testing the class from command line.
     * 
     * @param args The supplied command line arguments.
     */
    public static void main(String[] args) {
        if (args.length != 1) {
            System.err.println("Usage: CoverTree <ARFF file>");
            System.exit(-1);
        }
        try {
            Instances insts = null;
            if (args[0].endsWith(".csv")) {
                CSVLoader csv = new CSVLoader();
                csv.setFile(new File(args[0]));
                insts = csv.getDataSet();
            } else {
                insts = new Instances(new BufferedReader(new FileReader(args[0])));
            }

            CoverTree tree = new CoverTree();
            tree.setInstances(insts);
            print("Created data tree:\n");
            print(0, tree.m_Root);
            println("");
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }
}