com.clust4j.algo.HDBSCAN.java Source code

Java tutorial

Introduction

Here is the source code for com.clust4j.algo.HDBSCAN.java

Source

/*******************************************************************************
 *    Copyright 2015, 2016 Taylor G Smith
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *******************************************************************************/
package com.clust4j.algo;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.util.Precision;

import com.clust4j.GlobalState;
import com.clust4j.utils.QuadTup;
import com.clust4j.algo.Neighborhood;
import com.clust4j.log.LogTimer;
import com.clust4j.log.Loggable;
import com.clust4j.log.Log.Tag.Algo;
import com.clust4j.metrics.pairwise.Distance;
import com.clust4j.metrics.pairwise.DistanceMetric;
import com.clust4j.metrics.pairwise.GeometricallySeparable;
import com.clust4j.metrics.pairwise.Pairwise;
import com.clust4j.utils.EntryPair;
import com.clust4j.utils.Series.Inequality;
import com.clust4j.utils.MatUtils;
import com.clust4j.utils.MatUtils.MatSeries;
import com.clust4j.utils.VecUtils;
import com.clust4j.utils.VecUtils.DoubleSeries;

/**
 * Hierarchical Density-Based Spatial Clustering of Applications with Noise. 
 * Performs {@link DBSCAN} over varying epsilon values and integrates the result to 
 * find a clustering that gives the best stability over epsilon. This allows 
 * HDBSCAN to find clusters of varying densities (unlike DBSCAN), and be more 
 * robust to parameter selection.
 * 
 * @author Taylor G Smith, adapted from the Python 
 * <a href="https://github.com/lmcinnes/hdbscan">HDBSCAN package</a>, inspired by
 * <a href="http://dl.acm.org/citation.cfm?id=2733381">the paper</a> by 
 * R. Campello, D. Moulavi, and J. Sander
 */
final public class HDBSCAN extends AbstractDBSCAN {
    private static final long serialVersionUID = -5112901322434131541L;
    public static final HDBSCAN_Algorithm DEF_ALGO = HDBSCAN_Algorithm.AUTO;
    public static final double DEF_ALPHA = 1.0;
    public static final boolean DEF_APPROX_MIN_SPAN = true;
    public static final int DEF_LEAF_SIZE = 40;
    public static final int DEF_MIN_CLUST_SIZE = 5;
    /** The number of features that should trigger a boruvka implementation */
    static final int boruvka_n_features_ = 60;
    static final Set<Class<? extends GeometricallySeparable>> fast_metrics_;

    /** Not final because can change if auto-enabled */
    protected HDBSCAN_Algorithm algo;
    private final double alpha;
    private final boolean approxMinSpanTree;
    private final int min_cluster_size;
    private final int leafSize;

    private volatile HDBSCANLinkageTree tree = null;
    private volatile double[][] dist_mat = null;
    private volatile int[] labels = null;
    private volatile int numClusters = -1;
    private volatile int numNoisey = -1;
    /** A copy of the data array inside the data matrix */
    private volatile double[][] dataData = null;

    private interface HInitializer extends MetricValidator {
        public HDBSCANLinkageTree initTree(HDBSCAN h);
    }

    public static enum HDBSCAN_Algorithm implements HInitializer {
        /**
         * Automatically selects the appropriate algorithm
         * given dimensions of the dataset.
         */
        AUTO {
            @Override
            public HDBSCANLinkageTree initTree(HDBSCAN h) {
                final Class<? extends GeometricallySeparable> clz = h.dist_metric.getClass();
                final int n = h.data.getColumnDimension();

                // rare situation... only if oddball dist
                if (!fast_metrics_.contains(clz)) {
                    return GENERIC.initTree(h);
                }

        else if (KDTree.VALID_METRICS.contains(clz)) {
                    return n > boruvka_n_features_ ? BORUVKA_KDTREE.initTree(h) : PRIMS_KDTREE.initTree(h);
                }

                // otherwise is valid balltree metric
                return n > boruvka_n_features_ ? BORUVKA_BALLTREE.initTree(h) : PRIMS_BALLTREE.initTree(h);
            }

            @Override
            public boolean isValidMetric(GeometricallySeparable g) {
                throw new UnsupportedOperationException("auto does not have supported metrics");
            }
        },

        /**
         * Generates a minimum spanning tree using a pairwise,
         * full distance matrix. Generally performs slower than
         * the other algorithms for larger datasets, but has less
         * setup overhead.
         * @see Pairwise
         */
        GENERIC {
            @Override
            public GenericTree initTree(HDBSCAN h) {
                // we set this in case it was called by auto
                h.algo = this;
                ensureMetric(h, this);
                return h.new GenericTree();
            }

            @Override
            public boolean isValidMetric(GeometricallySeparable g) {
                HashSet<Class<? extends GeometricallySeparable>> unsupported = new HashSet<>();

                for (DistanceMetric d : Distance.binaryDistances())
                    unsupported.add(d.getClass());

                // if we ever have MORE invalid ones, add them here...
                return !unsupported.contains(g.getClass());
            }
        },

        /**
         * Prim's algorithm is a greedy algorithm that finds a 
         * minimum spanning tree for a weighted undirected graph. 
         * This means it finds a subset of the edges that forms a 
         * tree that includes every vertex, where the total weight 
         * of all the edges in the tree is minimized. This implementation
         * internally uses a {@link KDTree} to handle the graph
         * linkage function.
         * @see KDTree
         */
        PRIMS_KDTREE {
            @Override
            public PrimsKDTree initTree(HDBSCAN h) {
                // we set this in case it was called by auto
                h.algo = this;
                ensureMetric(h, this);
                return h.new PrimsKDTree(h.leafSize);
            }

            @Override
            public boolean isValidMetric(GeometricallySeparable g) {
                return KDTree.VALID_METRICS.contains(g.getClass());
            }
        },

        /**
         * Prim's algorithm is a greedy algorithm that finds a 
         * minimum spanning tree for a weighted undirected graph. 
         * This means it finds a subset of the edges that forms a 
         * tree that includes every vertex, where the total weight 
         * of all the edges in the tree is minimized. This implementation
         * internally uses a {@link BallTree} to handle the graph
         * linkage function.
         * @see BallTree
         */
        PRIMS_BALLTREE {
            @Override
            public PrimsBallTree initTree(HDBSCAN h) {
                // we set this in case it was called by auto
                h.algo = this;
                ensureMetric(h, this);
                return h.new PrimsBallTree(h.leafSize);
            }

            @Override
            public boolean isValidMetric(GeometricallySeparable g) {
                return BallTree.VALID_METRICS.contains(g.getClass());
            }
        },

        /**
         * Uses Boruvka's algorithm to find a minimum spanning
         * tree. Internally uses a {@link KDTree} to handle the
         * linkage function.
         * @see BoruvkaAlgorithm
         * @see KDTree
         */
        BORUVKA_KDTREE {
            @Override
            public BoruvkaKDTree initTree(HDBSCAN h) {
                // we set this in case it was called by auto
                h.algo = this;
                ensureMetric(h, this);
                return h.new BoruvkaKDTree(h.leafSize);
            }

            @Override
            public boolean isValidMetric(GeometricallySeparable g) {
                return KDTree.VALID_METRICS.contains(g.getClass());
            }
        },

        /**
         * Uses Boruvka's algorithm to find a minimum spanning
         * tree. Internally uses a {@link BallTree} to handle the
         * linkage function.
         * @see BoruvkaAlgorithm
         * @see BallTree
         */
        BORUVKA_BALLTREE {
            @Override
            public BoruvkaBallTree initTree(HDBSCAN h) {
                // we set this in case it was called by auto
                h.algo = this;
                ensureMetric(h, this);
                return h.new BoruvkaBallTree(h.leafSize);
            }

            @Override
            public boolean isValidMetric(GeometricallySeparable g) {
                return BallTree.VALID_METRICS.contains(g.getClass())
                        // For some reason Boruvka hates Canberra...
                        && !g.equals(Distance.CANBERRA);
            }
        };

        private static void ensureMetric(HDBSCAN h, HDBSCAN_Algorithm a) {
            if (!a.isValidMetric(h.dist_metric)) {
                h.warn(h.dist_metric.getName() + " is not valid for " + a + ". Falling back to default Euclidean.");
                h.setSeparabilityMetric(DEF_DIST);
            }
        }
    }

    static {
        fast_metrics_ = new HashSet<Class<? extends GeometricallySeparable>>();
        fast_metrics_.addAll(KDTree.VALID_METRICS);
        fast_metrics_.addAll(BallTree.VALID_METRICS);
    }

    /**
     * Is the provided metric valid for this model?
     */
    @Override
    final public boolean isValidMetric(GeometricallySeparable geo) {
        return this.algo.isValidMetric(geo);
    }

    /**
     * Constructs an instance of HDBSCAN from the default values
     * @param data
     */
    protected HDBSCAN(final RealMatrix data) {
        this(data, DEF_MIN_PTS);
    }

    /**
     * Constructs an instance of HDBSCAN from the default values
     * @param eps
     * @param data
     */
    protected HDBSCAN(final RealMatrix data, final int minPts) {
        this(data, new HDBSCANParameters(minPts));
    }

    /**
     * Constructs an instance of HDBSCAN from the provided builder
     * @throws IllegalArgumentException if alpha is 0
     * @param builder
     * @param data
     */
    protected HDBSCAN(final RealMatrix data, final HDBSCANParameters planner) {
        super(data, planner);

        this.algo = planner.getAlgo();
        this.alpha = planner.getAlpha();
        this.approxMinSpanTree = planner.getApprox();
        this.min_cluster_size = planner.getMinClusterSize();
        this.leafSize = planner.getLeafSize();

        if (alpha <= 0.0)
            throw new IllegalArgumentException("alpha must be greater than 0");
        if (leafSize < 1)
            throw new IllegalArgumentException("leafsize must be greater than 0");

        logModelSummary();
    }

    @Override
    final protected ModelSummary modelSummary() {
        return new ModelSummary(
                new Object[] { "Num Rows", "Num Cols", "Metric", "Algo.", "Allow Par.", "Min Pts.",
                        "Min Clust. Size", "Alpha" },
                new Object[] { data.getRowDimension(), data.getColumnDimension(), getSeparabilityMetric(), algo,
                        parallel, minPts, min_cluster_size, alpha });
    }

    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (o instanceof HDBSCAN) {
            HDBSCAN h = (HDBSCAN) o;

            /*
             * Has one been fit and not the other?
             */
            if (null == this.labels ^ null == h.labels)
                return false;

            return super.equals(o) // UUID test
                    && MatUtils.equalsExactly(this.data.getDataRef(), h.data.getDataRef())
                    && (null == this.labels ? true : VecUtils.equalsExactly(this.labels, h.labels))
                    && this.algo.equals(h.algo) && this.alpha == h.alpha && this.leafSize == h.leafSize
                    && this.min_cluster_size == h.min_cluster_size;
        }

        return false;
    }

    /**
     * This class extension is for the sake of testing; it restricts
     * types to a subclass of Number and adds the method 
     * {@link CompQuadTup#almostEquals(CompQuadTup)} to measure whether
     * values are equal within a margin of 1e-8.
     * @author Taylor G Smith
     * @param <ONE>
     * @param <TWO>
     * @param <THREE>
     * @param <FOUR>
     */
    protected final static class CompQuadTup<ONE extends Number, TWO extends Number, THREE extends Number, FOUR extends Number>
            extends QuadTup<ONE, TWO, THREE, FOUR> {
        private static final long serialVersionUID = -8699738868282635229L;

        public CompQuadTup(ONE one, TWO two, THREE three, FOUR four) {
            super(one, two, three, four);
        }

        /*
         * For testing
         */
        public boolean almostEquals(CompQuadTup<ONE, TWO, THREE, FOUR> other) {
            return Precision.equals(this.one.doubleValue(), other.one.doubleValue(), 1e-8)
                    && Precision.equals(this.two.doubleValue(), other.two.doubleValue(), 1e-8)
                    && Precision.equals(this.three.doubleValue(), other.three.doubleValue(), 1e-8)
                    && Precision.equals(this.four.doubleValue(), other.four.doubleValue(), 1e-8);
        }
    }

    /**
     * A simple extension of {@link HashSet} that takes
     * an array or varargs as a constructor arg
     * @author Taylor G Smith
     * @param <T>
     */
    protected final static class HSet<T> extends HashSet<T> {
        private static final long serialVersionUID = 5185550036712184095L;

        HSet(int size) {
            super(size);
        }

        HSet(Collection<? extends T> coll) {
            super(coll);
        }
    }

    /**
     * Constructs an {@link HSet} from the labels
     * @author Taylor G Smith
     */
    protected final static class LabelHSetFactory {
        static HSet<Integer> build(int[] labs) {
            HSet<Integer> res = new HSet<Integer>(labs.length);
            for (int i : labs)
                res.add(i);

            return res;
        }
    }

    /** Classes that will explicitly need to define 
     *  reachability will have to implement this interface */
    interface ExplicitMutualReachability {
        double[][] mutualReachability();
    }

    /**
     * Mutual reachability is implicit when using 
     * {@link BoruvkaAlgorithm},
     * thus we don't need these classes to implement 
     * {@link ExplicitMutualReachability#mutualReachability()} */
    interface Boruvka {
    }

    /**
     * Mutual reachability is implicit when using 
     * {@link LinkageTreeUtils#mstLinkageCore_cdist},
     * thus we don't need these classes to implement 
     * {@link ExplicitMutualReachability#mutualReachability()} */
    interface Prim {
    }

    /**
     * Util mst linkage methods
     * @author Taylor G Smith
     */
    protected static abstract class LinkageTreeUtils {

        /**
         * Perform a breadth first search on a tree
         * @param hierarchy
         * @param root
         * @return
         */
        // Tested: passing
        static ArrayList<Integer> breadthFirstSearch(final double[][] hierarchy, final int root) {
            ArrayList<Integer> toProcess = new ArrayList<>(), tmp;
            int dim = hierarchy.length, maxNode = 2 * dim, numPoints = maxNode - dim + 1;

            toProcess.add(root);
            ArrayList<Integer> result = new ArrayList<>();
            while (!toProcess.isEmpty()) {
                result.addAll(toProcess);

                tmp = new ArrayList<>();
                for (Integer x : toProcess)
                    if (x >= numPoints)
                        tmp.add(x - numPoints);
                toProcess = tmp;

                tmp = new ArrayList<>();
                if (!toProcess.isEmpty()) {
                    for (Integer row : toProcess)
                        for (int i = 0; i < 2; i++)
                            tmp.add((int) hierarchy[row][wraparoundIdxGet(hierarchy[row].length, i)]);

                    toProcess = tmp;
                }
            }

            return result;
        }

        // Tested: passing
        static TreeMap<Integer, Double> computeStability(
                ArrayList<CompQuadTup<Integer, Integer, Double, Integer>> condensed) {
            double[] resultArr, births, lambdas = new double[condensed.size()];
            int[] sizes = new int[condensed.size()], parents = new int[condensed.size()];
            int child, parent, childSize, resultIdx, currentChild = -1, idx = 0, row = 0;
            double lambda, minLambda = 0;

            /* Populate parents, sizes and lambdas pre-sort and get min/max parent info
             * ['parent', 'child', 'lambda', 'childSize']
             */
            int largestChild = Integer.MIN_VALUE, minParent = Integer.MAX_VALUE, maxParent = Integer.MIN_VALUE;
            for (CompQuadTup<Integer, Integer, Double, Integer> q : condensed) {
                parent = q.getFirst();
                child = q.getSecond();
                lambda = q.getThird();
                childSize = q.getFourth();

                if (child > largestChild)
                    largestChild = child;
                if (parent < minParent)
                    minParent = parent;
                if (parent > maxParent)
                    maxParent = parent;

                parents[idx] = parent;
                sizes[idx] = childSize;
                lambdas[idx] = lambda;
                idx++;
            }

            int numClusters = maxParent - minParent + 1;
            births = VecUtils.rep(Double.NaN, largestChild + 1);

            /*
             * Perform sort, then get sorted lambdas and children
             */
            Collections.sort(condensed, new Comparator<QuadTup<Integer, Integer, Double, Integer>>() {
                @Override
                public int compare(QuadTup<Integer, Integer, Double, Integer> q1,
                        QuadTup<Integer, Integer, Double, Integer> q2) {
                    int cmp = q1.getSecond().compareTo(q2.getSecond());

                    if (cmp == 0) {
                        cmp = q1.getThird().compareTo(q2.getThird());
                        return cmp;
                    }

                    return cmp;
                }
            });

            /*
             * Go through sorted list...
             */
            for (row = 0; row < condensed.size(); row++) {
                CompQuadTup<Integer, Integer, Double, Integer> q = condensed.get(row);
                child = q.getSecond();
                lambda = q.getThird();

                if (child == currentChild)
                    minLambda = FastMath.min(minLambda, lambda);
                else if (currentChild != -1) {
                    // Already been initialized
                    births[currentChild] = minLambda;
                    currentChild = child;
                    minLambda = lambda;
                } else {
                    // Initialize
                    currentChild = child;
                    minLambda = lambda;
                }
            }

            resultArr = new double[numClusters];

            // Second loop
            double birthParent;
            for (idx = 0; idx < condensed.size(); idx++) {
                parent = parents[idx];
                lambda = lambdas[idx];
                childSize = sizes[idx];
                resultIdx = parent - minParent;

                // the Cython exploits the C contiguous pointer array's
                // out of bounds allowance (2.12325E-314), but we have to
                // do a check for that...
                birthParent = parent >= births.length ? GlobalState.Mathematics.TINY : births[parent];
                resultArr[resultIdx] += (lambda - birthParent) * childSize;
            }

            double[] top = VecUtils.asDouble(VecUtils.arange(minParent, maxParent + 1));
            double[][] mat = MatUtils.transpose(VecUtils.vstack(top, resultArr));

            TreeMap<Integer, Double> result = new TreeMap<>();
            for (idx = 0; idx < mat.length; idx++)
                result.put((int) mat[idx][0], mat[idx][1]);

            return result;
        }

        // Tested: passing
        static ArrayList<CompQuadTup<Integer, Integer, Double, Integer>> condenseTree(final double[][] hierarchy,
                final int minSize) {
            final int m = hierarchy.length;
            int root = 2 * m, numPoints = root / 2 + 1 /*Integer division*/, nextLabel = numPoints + 1;

            // Get node list from BFS
            ArrayList<Integer> nodeList = breadthFirstSearch(hierarchy, root), tmpList;
            ArrayList<CompQuadTup<Integer, Integer, Double, Integer>> resultList = new ArrayList<>();

            // Indices needing relabeling -- cython code assigns this to nodeList.size()
            // but often times this is way too small and causes out of bounds exceptions...
            // Changed to root + 1 on 02/01/2016; this should be the max node ever in the resultList
            int[] relabel = new int[root + 1]; //nodeList.size()
            boolean[] ignore = new boolean[root + 1];
            double[] children;

            double lambda;
            int left, right, leftCount, rightCount;

            // Sanity check
            // System.out.println("Root: " + root + ", Relabel length: " + relabel.length + ", m: " + m + ", Relabel array: " + Arrays.toString(relabel));

            // The cython code doesn't check for bounds and sloppily 
            // assigns this even if root > relabel.length. 
            relabel[root] = numPoints;

            for (Integer node : nodeList) {

                if (ignore[node] || node < numPoints)
                    continue;

                children = hierarchy[wraparoundIdxGet(hierarchy.length, node - numPoints)];
                left = (int) children[0];
                right = (int) children[1];

                if (children[2] > 0)
                    lambda = 1.0 / children[2];
                else
                    lambda = Double.POSITIVE_INFINITY;

                if (left >= numPoints)
                    leftCount = (int) (hierarchy[wraparoundIdxGet(hierarchy.length, left - numPoints)][3]);
                else
                    leftCount = 1;

                if (right >= numPoints)
                    rightCount = (int) (hierarchy[wraparoundIdxGet(hierarchy.length, right - numPoints)][3]);
                else
                    rightCount = 1;

                if (leftCount >= minSize && rightCount >= minSize) {
                    relabel[left] = nextLabel++;
                    resultList.add(new CompQuadTup<Integer, Integer, Double, Integer>(
                            relabel[wraparoundIdxGet(relabel.length, node)],
                            relabel[wraparoundIdxGet(relabel.length, left)], lambda, leftCount));

                    relabel[wraparoundIdxGet(relabel.length, right)] = nextLabel++;
                    resultList.add(new CompQuadTup<Integer, Integer, Double, Integer>(
                            relabel[wraparoundIdxGet(relabel.length, node)],
                            relabel[wraparoundIdxGet(relabel.length, right)], lambda, rightCount));

                } else if (leftCount < minSize && rightCount < minSize) {
                    tmpList = breadthFirstSearch(hierarchy, left);
                    for (Integer subnode : tmpList) {
                        if (subnode < numPoints)
                            resultList.add(new CompQuadTup<Integer, Integer, Double, Integer>(
                                    relabel[wraparoundIdxGet(relabel.length, node)], subnode, lambda, 1));
                        ignore[subnode] = true;
                    }

                    tmpList = breadthFirstSearch(hierarchy, right);
                    for (Integer subnode : tmpList) {
                        if (subnode < numPoints)
                            resultList.add(new CompQuadTup<Integer, Integer, Double, Integer>(
                                    relabel[wraparoundIdxGet(relabel.length, node)], subnode, lambda, 1));
                        ignore[subnode] = true;
                    }

                } else if (leftCount < minSize) {
                    relabel[right] = relabel[node];
                    tmpList = breadthFirstSearch(hierarchy, left);

                    for (Integer subnode : tmpList) {
                        if (subnode < numPoints)
                            resultList.add(new CompQuadTup<Integer, Integer, Double, Integer>(
                                    relabel[wraparoundIdxGet(relabel.length, node)], subnode, lambda, 1));
                        ignore[subnode] = true;
                    }
                }

                else {
                    relabel[left] = relabel[node];
                    tmpList = breadthFirstSearch(hierarchy, right);
                    for (Integer subnode : tmpList) {
                        if (subnode < numPoints)
                            resultList.add(new CompQuadTup<Integer, Integer, Double, Integer>(
                                    relabel[wraparoundIdxGet(relabel.length, node)], subnode, lambda, 1));
                        ignore[subnode] = true;
                    }
                }
            }

            return resultList;
        }

        /**
         * Generic linkage core method
         * @param X
         * @param m
         * @return
         */
        static double[][] minSpanTreeLinkageCore(final double[][] X, final int m) { // Tested: passing
            int[] node_labels, current_labels, tmp_labels;
            double[] current_distances, left, right;
            boolean[] label_filter;
            boolean val;
            int current_node, new_node_index, new_node, i, j, trueCt, idx;
            DoubleSeries series;

            double[][] result = new double[m - 1][3];
            node_labels = VecUtils.arange(m);
            current_node = 0;
            current_distances = VecUtils.rep(Double.POSITIVE_INFINITY, m);
            current_labels = node_labels;

            for (i = 1; i < node_labels.length; i++) {

                // Create the boolean mask; takes 2N to create mask and then filter
                // however, creating the left vector concurrently 
                // trims off one N pass. This could be done using Vector.VecSeries
                // but that would add an extra pass of N
                idx = 0;
                trueCt = 0;
                label_filter = new boolean[current_labels.length];
                for (j = 0; j < label_filter.length; j++) {
                    val = current_labels[j] != current_node;
                    if (val)
                        trueCt++;

                    label_filter[j] = val;
                }

                tmp_labels = new int[trueCt];
                left = new double[trueCt];
                for (j = 0; j < current_labels.length; j++) {
                    if (label_filter[j]) {
                        tmp_labels[idx] = current_labels[j];
                        left[idx] = current_distances[j];
                        idx++;
                    }
                }

                current_labels = tmp_labels;
                right = new double[current_labels.length];
                for (j = 0; j < right.length; j++)
                    right[j] = X[current_node][current_labels[j]];

                // Build the current_distances vector
                series = new DoubleSeries(left, Inequality.LESS_THAN, right);
                current_distances = VecUtils.where(series, left, right);

                // Get next iter values
                new_node_index = VecUtils.argMin(current_distances);
                new_node = current_labels[new_node_index];
                result[i - 1][0] = (double) current_node;
                result[i - 1][1] = (double) new_node;
                result[i - 1][2] = current_distances[new_node_index];

                current_node = new_node;
            }

            return result;
        }

        static double[][] minSpanTreeLinkageCore_cdist(final double[][] raw, final double[] coreDistances,
                GeometricallySeparable sep, final double alpha) {
            double[] currentDists;
            int[] inTreeArr;
            double[][] resultArr;

            int currentNode = 0, newNode, i, j, dim = raw.length;
            double currentNodeCoreDist, rightVal, leftVal, coreVal, newDist;

            resultArr = new double[dim - 1][3];
            inTreeArr = new int[dim];
            currentDists = VecUtils.rep(Double.POSITIVE_INFINITY, dim);

            for (i = 1; i < dim; i++) {
                inTreeArr[currentNode] = 1;
                currentNodeCoreDist = coreDistances[currentNode];

                newDist = Double.MAX_VALUE;
                newNode = 0;

                for (j = 0; j < dim; j++) {
                    if (inTreeArr[j] != 0)
                        continue; // only skips currentNode idx

                    rightVal = currentDists[j];
                    leftVal = sep.getDistance(raw[currentNode], raw[j]);

                    if (alpha != 1.0)
                        leftVal /= alpha;

                    coreVal = coreDistances[j];
                    if (currentNodeCoreDist > rightVal || coreVal > rightVal || leftVal > rightVal) {
                        if (rightVal < newDist) { // Should always be the case?
                            newDist = rightVal;
                            newNode = j;
                        }

                        continue;
                    }

                    if (coreVal > currentNodeCoreDist) {
                        if (coreVal > leftVal)
                            leftVal = coreVal;
                    } else if (currentNodeCoreDist > leftVal) {
                        leftVal = currentNodeCoreDist;
                    }

                    if (leftVal < rightVal) {
                        currentDists[j] = leftVal;
                        if (leftVal < newDist) {
                            newDist = leftVal;
                            newNode = j;
                        }
                    } else if (rightVal < newDist) {
                        newDist = rightVal;
                        newNode = j;
                    }
                } // end for j

                resultArr[i - 1][0] = currentNode;
                resultArr[i - 1][1] = newNode;
                resultArr[i - 1][2] = newDist;
                currentNode = newNode;
            } // end for i

            return resultArr;
        }

        /**
         * The index may be -1; this will return 
         * the index of the length of the array minus
         * the absolute value of the index in the case
         * of negative indices, like the original Python
         * code.
         * @param array
         * @param idx
         * @throws ArrayIndexOutOfBoundsException if the absolute value of the index
         * exceeds the length of the array
         * @return the index to be queried in wrap-around indexing
         */
        static int wraparoundIdxGet(int array_len, int idx) {
            int abs;
            if ((abs = FastMath.abs(idx)) > array_len)
                throw new ArrayIndexOutOfBoundsException(idx);
            if (idx >= 0)
                return idx;
            return array_len - abs;
        }

        static double[][] mutualReachability(double[][] dist_mat, int minPts, double alpha) {
            final int size = dist_mat.length;
            minPts = FastMath.min(size - 1, minPts);

            final double[] core_distances = MatUtils.sortColsAsc(dist_mat)[minPts];

            if (alpha != 1.0)
                dist_mat = MatUtils.scalarDivide(dist_mat, alpha);

            final MatSeries ser1 = new MatSeries(core_distances, Inequality.GREATER_THAN, dist_mat);
            double[][] stage1 = MatUtils.where(ser1, core_distances, dist_mat);

            stage1 = MatUtils.transpose(stage1);
            final MatSeries ser2 = new MatSeries(core_distances, Inequality.GREATER_THAN, stage1);
            final double[][] result = MatUtils.where(ser2, core_distances, stage1);

            return MatUtils.transpose(result);
        }
    }

    /**
     * The top level class for all HDBSCAN linkage trees.
     * @author Taylor G Smith
     */
    abstract class HDBSCANLinkageTree {
        final HDBSCAN model;
        final GeometricallySeparable metric;
        final int m, n;

        HDBSCANLinkageTree() {
            model = HDBSCAN.this;
            metric = model.getSeparabilityMetric();
            m = model.data.getRowDimension();
            n = model.data.getColumnDimension();
        }

        abstract double[][] link();
    }

    /**
     * Algorithms that utilize {@link NearestNeighborHeapSearch} 
     * algorithms for mutual reachability
     * @author Taylor G Smith
     */
    abstract class HeapSearchAlgorithm extends HDBSCANLinkageTree {
        final int leafSize;

        HeapSearchAlgorithm(int leafSize) {
            super();
            this.leafSize = leafSize;
        }

        abstract NearestNeighborHeapSearch getTree(double[][] X);

        abstract String getTreeName();

        /**
         * The linkage function to be used for any classes
         * implementing the {@link Prim} interface.
         * @param dt
         * @return
         */
        final double[][] primTreeLinkageFunction(double[][] dt) {
            final int min_points = FastMath.min(m - 1, minPts);

            LogTimer timer = new LogTimer();
            model.info("building " + getTreeName() + " search tree...");
            NearestNeighborHeapSearch tree = getTree(dt);
            model.info("completed NearestNeighborHeapSearch construction in " + timer.toString());

            // Query for dists to k nearest neighbors -- no longer use breadth first!
            Neighborhood query = tree.query(dt, min_points, true, true);
            double[][] dists = query.getDistances();
            double[] coreDistances = MatUtils.getColumn(dists, dists[0].length - 1);

            double[][] minSpanningTree = LinkageTreeUtils.minSpanTreeLinkageCore_cdist(dt, coreDistances, metric,
                    alpha);

            return label(MatUtils.sortAscByCol(minSpanningTree, 2));
        }

        /**
         * The linkage function to be used for any classes
         * implementing the {@link Boruvka} interface.
         * @param dt
         * @return
         */
        final double[][] boruvkaTreeLinkageFunction(double[][] dt) {
            final int min_points = FastMath.min(m - 1, minPts);
            int ls = FastMath.max(leafSize, 3);

            model.info("building " + getTreeName() + " search tree...");

            LogTimer timer = new LogTimer();
            NearestNeighborHeapSearch tree = getTree(dt);
            model.info("completed NearestNeighborHeapSearch construction in " + timer.toString());

            // We can safely cast the metric to DistanceMetric at this point
            final BoruvkaAlgorithm alg = new BoruvkaAlgorithm(tree, min_points, (DistanceMetric) metric, ls / 3,
                    approxMinSpanTree, alpha, model);

            double[][] minSpanningTree = alg.spanningTree();
            return label(MatUtils.sortAscByCol(minSpanningTree, 2));
        }
    }

    /**
     * A class for HDBSCAN algorithms that utilize {@link KDTree}
     * search spaces for segmenting nearest neighbors
     * @author Taylor G Smith
     */
    abstract class KDTreeAlgorithm extends HeapSearchAlgorithm {
        KDTreeAlgorithm(int leafSize) {
            super(leafSize);
        }

        @Override
        String getTreeName() {
            return "KD";
        }

        @Override
        final KDTree getTree(double[][] X) {
            // We can safely cast the sep metric as DistanceMetric
            // after the check in the constructor
            return new KDTree(X, this.leafSize, (DistanceMetric) metric, model);
        }
    }

    /**
     * A class for HDBSCAN algorithms that utilize {@link BallTree}
     * search spaces for segmenting nearest neighbors
     * @author Taylor G Smith
     */
    abstract class BallTreeAlgorithm extends HeapSearchAlgorithm {
        BallTreeAlgorithm(int leafSize) {
            super(leafSize);
        }

        @Override
        String getTreeName() {
            return "Ball";
        }

        @Override
        final BallTree getTree(double[][] X) {
            // We can safely cast the sep metric as DistanceMetric
            // after the check in the constructor
            return new BallTree(X, this.leafSize, (DistanceMetric) metric, model);
        }
    }

    /**
     * Generic single linkage tree that uses an 
     * upper triangular distance matrix to compute
     * mutual reachability
     * @author Taylor G Smith
     */
    class GenericTree extends HDBSCANLinkageTree implements ExplicitMutualReachability {
        GenericTree() {
            super();

            // The generic implementation requires the computation of an UT dist mat
            final LogTimer s = new LogTimer();
            dist_mat = Pairwise.getDistance(data, getSeparabilityMetric(), false, false);
            info("completed distance matrix computation in " + s.toString());
        }

        @Override
        double[][] link() {
            final double[][] mutual_reachability = mutualReachability();
            double[][] min_spanning_tree = LinkageTreeUtils.minSpanTreeLinkageCore(mutual_reachability, m);

            // Sort edges of the min_spanning_tree by weight
            min_spanning_tree = MatUtils.sortAscByCol(min_spanning_tree, 2);
            return label(min_spanning_tree);
        }

        @Override
        public double[][] mutualReachability() {
            /*// this shouldn't be able to happen...
            if(null == dist_mat)
               throw new IllegalClusterStateException("dist matrix is null; "
                  + "this only can happen when the model attempts to invoke "
                  + "mutualReachability on a tree without proper initialization "
                  + "or after the model has already been fit.");
            */

            return LinkageTreeUtils.mutualReachability(dist_mat, minPts, alpha);
        }
    }

    /**
     * An implementation of HDBSCAN using the {@link Prim} algorithm
     * and leveraging {@link KDTree} search spaces
     * @author Taylor G Smith
     */
    class PrimsKDTree extends KDTreeAlgorithm implements Prim {
        PrimsKDTree(int leafSize) {
            super(leafSize);
        }

        @Override
        double[][] link() {
            return primTreeLinkageFunction(dataData);
        }
    }

    /**
     * An implementation of HDBSCAN using the {@link Prim} algorithm
     * and leveraging {@link BallTree} search spaces
     * @author Taylor G Smith
     */
    class PrimsBallTree extends BallTreeAlgorithm implements Prim {
        PrimsBallTree(int leafSize) {
            super(leafSize);
        }

        @Override
        double[][] link() {
            return primTreeLinkageFunction(dataData);
        }
    }

    class BoruvkaKDTree extends KDTreeAlgorithm implements Boruvka {
        BoruvkaKDTree(int leafSize) {
            super(leafSize);
        }

        @Override
        double[][] link() {
            return boruvkaTreeLinkageFunction(dataData);
        }
    }

    class BoruvkaBallTree extends BallTreeAlgorithm implements Boruvka {
        BoruvkaBallTree(int leafSize) {
            super(leafSize);
        }

        @Override
        double[][] link() {
            return boruvkaTreeLinkageFunction(dataData);
        }
    }

    /**
     * A base class for any unify finder classes
     * to extend. These should help join nodes and
     * branches from trees.
     * @author Taylor G Smith
     */
    abstract static class UnifiedFinder {
        final int SIZE;

        UnifiedFinder(int N) {
            this.SIZE = N;
        }

        /**
         * Wraps the index in a python way (-1 = last index).
         * Easier and more concise than having lots of references to 
         * {@link LinkageTreeUtils#wraparoundIdxGet(int, int)}
         * @param i
         * @param j
         * @return
         */
        static int wrap(int i, int j) {
            return LinkageTreeUtils.wraparoundIdxGet(i, j);
        }

        int wrap(int i) {
            return wrap(SIZE, i);
        }

        abstract void union(int m, int n);

        abstract int find(int x);
    }

    // Tested: passing
    static class TreeUnionFind extends UnifiedFinder {
        int[][] dataArr;
        boolean[] is_component;

        public TreeUnionFind(int size) {
            super(size);
            dataArr = new int[size][2];

            // First col should be arange to size
            for (int i = 0; i < size; i++)
                dataArr[i][0] = i;

            is_component = VecUtils.repBool(true, size);
        }

        @Override
        public void union(int x, int y) {
            int x_root = find(x);
            int y_root = find(y);

            int x1idx = wrap(x_root);
            int y1idx = wrap(y_root);

            int dx1 = dataArr[x1idx][1];
            int dy1 = dataArr[y1idx][1];

            if (dx1 < dy1)
                dataArr[x1idx][0] = y_root;
            else if (dx1 > dy1)
                dataArr[y1idx][0] = x_root;
            else {
                dataArr[y1idx][0] = x_root;
                dataArr[x1idx][1] += 1;
            }
        }

        @Override
        public int find(int x) {
            final int idx = wrap(x);
            if (dataArr[idx][0] != x) {
                dataArr[idx][0] = find(dataArr[idx][0]);
                is_component[idx] = false;
            }

            return dataArr[idx][0];
        }

        /**
         * Returns all non-zero indices in is_component
         * @return
         */
        int[] components() {
            final ArrayList<Integer> h = new ArrayList<>();
            for (int i = 0; i < is_component.length; i++)
                if (is_component[i])
                    h.add(i);

            int idx = 0;
            int[] out = new int[h.size()];
            for (Integer i : h)
                out[idx++] = i;

            return out;
        }
    }

    // Tested: passing
    static class UnionFind extends UnifiedFinder {
        int[] parent, size;
        int nextLabel;

        public UnionFind(int N) {
            super(N);
            parent = VecUtils.repInt(-1, 2 * N - 1);
            nextLabel = N;

            size = new int[2 * N - 1];
            for (int i = 0; i < size.length; i++)
                size[i] = i >= N ? 0 : 1; // if N == 5 [1,1,1,1,1,0,0,0,0]
        }

        int fastFind(int n) {
            int p = n //,tmp
            ;

            while (parent[wrap(parent.length, n)] != -1)
                n = parent[wrap(parent.length, n)];

            // Incredibly enraging to debug -- skeptics be warned
            while (parent[wrap(parent.length, p)] != n) {
                //System.out.println("First: {p:" + p + ", parent[p]:" +parent[wrap(parent.length, p)] +  ", n:" +n+"}");

                //tmp = p;
                p = parent[wrap(parent.length, p)];
                parent[wrap(parent.length, p)] = n;

                //System.out.println("Second: {p:" + p + ", parent[p]:" +parent[wrap(parent.length, p)] +  ", n:" +n+"}");
                //System.out.println(Arrays.toString(parent));
            }

            return n;
        }

        @Override
        public int find(int n) {
            while (parent[wrap(parent.length, n)] != -1)
                n = parent[wrap(parent.length, n)];
            return n;
        }

        @Override
        public void union(final int m, final int n) {
            int mWrap = wrap(size.length, m);
            int nWrap = wrap(size.length, n);

            size[nextLabel] = size[mWrap] + size[nWrap];
            parent[mWrap] = nextLabel;
            parent[nWrap] = nextLabel;
            size[nextLabel] = size[mWrap] + size[nWrap];
            nextLabel++;
            return;
        }

        @Override
        public String toString() {
            return "Parent arr: " + Arrays.toString(parent) + "; " + "Sizes: " + Arrays.toString(size) + "; "
                    + "Parent: " + Arrays.toString(parent);
        }
    }

    protected static int[] doLabeling(ArrayList<CompQuadTup<Integer, Integer, Double, Integer>> tree,
            ArrayList<Integer> clusters, TreeMap<Integer, Integer> clusterMap) {

        CompQuadTup<Integer, Integer, Double, Integer> quad;
        int rootCluster, parent, child, n = tree.size(), cluster, i;
        int[] resultArr, parentArr = new int[n], childArr = new int[n];
        UnifiedFinder unionFind;

        // [parent, child, lambda, size]
        int maxParent = Integer.MIN_VALUE;
        int minParent = Integer.MAX_VALUE;
        for (i = 0; i < n; i++) {
            quad = tree.get(i);
            parentArr[i] = quad.getFirst();
            childArr[i] = quad.getSecond();

            if (quad.getFirst() < minParent)
                minParent = quad.getFirst();
            if (quad.getFirst() > maxParent)
                maxParent = quad.getFirst();
        }

        rootCluster = minParent;
        resultArr = new int[rootCluster];
        unionFind = new TreeUnionFind(maxParent + 1);

        for (i = 0; i < n; i++) {
            child = childArr[i];
            parent = parentArr[i];
            if (!clusters.contains(child))
                unionFind.union(parent, child);
        }

        for (i = 0; i < rootCluster; i++) {
            cluster = unionFind.find(i);
            if (cluster <= rootCluster)
                resultArr[i] = NOISE_CLASS;
            else
                resultArr[i] = clusterMap.get(cluster);
        }

        return resultArr;
    }

    @Override
    protected HDBSCAN fit() {
        synchronized (fitLock) {
            if (null != labels) // Then we've already fit this...
                return this;

            // Meant to prevent multiple .getData() copy calls
            final LogTimer timer = new LogTimer();
            dataData = this.data.getData();

            // Build the tree
            info("constructing HDBSCAN single linkage dendrogram: " + algo);
            this.tree = algo.initTree(this);

            LogTimer treeTimer = new LogTimer();
            final double[][] lab_tree = tree.link(); // returns the result of the label(..) function
            info("completed tree building in " + treeTimer.toString());

            info("converting tree to labels (" + lab_tree.length + " x " + lab_tree[0].length + ")");
            LogTimer labTimer = new LogTimer();
            labels = treeToLabels(dataData, lab_tree, min_cluster_size, this);

            // Wrap up...
            info("completed cluster labeling in " + labTimer.toString());

            // Count missing
            numNoisey = 0;
            for (int lab : labels)
                if (lab == NOISE_CLASS)
                    numNoisey++;

            int nextLabel = LabelHSetFactory.build(labels).size() - (numNoisey > 0 ? 1 : 0);
            info((numClusters = nextLabel) + " cluster" + (nextLabel != 1 ? "s" : "") + " identified, " + numNoisey
                    + " record" + (numNoisey != 1 ? "s" : "") + " classified noise");

            // Need to encode labels to maintain order
            final NoiseyLabelEncoder encoder = new NoiseyLabelEncoder(labels).fit();
            labels = encoder.getEncodedLabels();

            /*
             * In this portion, we build the fit summary... HDBSCAN is hard
             * to iteratively update on status, so we will merely provide summary
             * statistics on the class labels. Since it's not a centroid-based model
             * it wouldn't make since to track any metrics such as WSS, so we'll
             * leave it at simple counts and pcts.
             */
            String label_rep;
            int[] ordered_label_classes = VecUtils.reorder(encoder.getClasses(),
                    VecUtils.argSort(encoder.getClasses()));
            for (int label : ordered_label_classes) {
                label_rep = label + (NOISE_CLASS == label ? " (noise)" : "");

                int count = VecUtils.sum(new VecUtils.IntSeries(labels, Inequality.EQUAL_TO, label).get());
                double pct = (double) count / (double) labels.length;

                // log the summary
                fitSummary.add(new Object[] { label_rep, count, pct, timer.wallTime() });
            }

            // Close this model out
            sayBye(timer);

            // Clean anything with big overhead..
            dataData = null;
            dist_mat = null;
            tree = null;

            return this;
        }
    }

    @Override
    public int[] getLabels() {
        return super.handleLabelCopy(labels);
    }

    @Override
    public Algo getLoggerTag() {
        return com.clust4j.log.Log.Tag.Algo.HDBSCAN;
    }

    @Override
    public String getName() {
        return "HDBSCAN";
    }

    @Override
    public int getNumberOfIdentifiedClusters() {
        return numClusters;
    }

    @Override
    public int getNumberOfNoisePoints() {
        return numNoisey;
    }

    /**
     * Break up the getLabels method 
     * into numerous smaller ones.
     * @author Taylor G Smith
     */
    abstract static class GetLabelUtils {
        /**
         * Descendingly sort the keys of the map and return
         * them in order, but eliminate the very smallest key
         * @param stability
         * @return
         */
        protected static <T, P> ArrayList<T> descSortedKeySet(TreeMap<T, P> stability) {
            int ct = 0;
            ArrayList<T> nodeList = new ArrayList<>();
            for (T d : stability.descendingKeySet())
                if (++ct < stability.size()) // exclude the root...
                    nodeList.add(d);

            return nodeList;
        }

        /**
         * Get tuples where child size is over one
         * @param tree
         * @return
         */
        protected static EntryPair<ArrayList<double[]>, Integer> childSizeGtOneAndMaxChild(
                ArrayList<CompQuadTup<Integer, Integer, Double, Integer>> tree) {
            ArrayList<double[]> out = new ArrayList<>();
            int max = Integer.MIN_VALUE;

            // [parent, child, lambda, size]
            for (CompQuadTup<Integer, Integer, Double, Integer> tup : tree) {
                if (tup.getFourth() > 1)
                    out.add(new double[] { tup.getFirst(), tup.getSecond(), tup.getThird(), tup.getFourth() });
                else if (tup.getFourth() == 1)
                    max = FastMath.max(max, tup.getSecond());
            }

            return new EntryPair<>(out, max + 1);
        }

        protected static TreeMap<Integer, Boolean> initNodeMap(ArrayList<Integer> nodes) {
            TreeMap<Integer, Boolean> out = new TreeMap<>();
            for (Integer i : nodes)
                out.put(i, true);
            return out;
        }

        protected static double subTreeStability(ArrayList<double[]> clusterTree, int node,
                TreeMap<Integer, Double> stability) {
            double sum = 0;

            // [parent, child, lambda, size]
            for (double[] d : clusterTree)
                if ((int) d[0] == node)
                    sum += stability.get((int) d[1]);

            return sum;
        }

        protected static ArrayList<Integer> breadthFirstSearchFromClusterTree(ArrayList<double[]> tree,
                Integer bfsRoot) {
            int child, parent;
            ArrayList<Integer> result = new ArrayList<>();
            ArrayList<Integer> toProcess = new ArrayList<Integer>();
            ArrayList<Integer> tmp;

            toProcess.add(bfsRoot);

            // [parent, child, lambda, size]
            while (toProcess.size() > 0) {
                result.addAll(toProcess);

                // python code: 
                // to_process = tree['child'][np.in1d(tree['parent'], to_process)]
                // For all tuples, if the parent is in toProcess, then
                // add the child to the new list
                tmp = new ArrayList<Integer>();
                for (double[] d : tree) {
                    parent = (int) d[0];
                    child = (int) d[1];

                    if (toProcess.contains(parent))
                        tmp.add(child);
                }

                toProcess = tmp;
            }

            return result;
        }
    }

    protected static int[] getLabels(ArrayList<CompQuadTup<Integer, Integer, Double, Integer>> condensed,
            TreeMap<Integer, Double> stability) {

        double subTreeStability;
        ArrayList<Integer> clusters = new ArrayList<Integer>();
        HSet<Integer> clusterSet;
        TreeMap<Integer, Integer> clusterMap = new TreeMap<>(), reverseClusterMap = new TreeMap<>();

        // Get descending sorted key set
        ArrayList<Integer> nodeList = GetLabelUtils.descSortedKeySet(stability);

        // Get tuples where child size > 1
        EntryPair<ArrayList<double[]>, Integer> entry = GetLabelUtils.childSizeGtOneAndMaxChild(condensed);
        ArrayList<double[]> clusterTree = entry.getKey();

        // Map of nodes to whether it's a cluster
        TreeMap<Integer, Boolean> isCluster = GetLabelUtils.initNodeMap(nodeList);

        // Get num points
        //int numPoints = entry.getValue();

        // Iter over nodes
        for (Integer node : nodeList) {
            subTreeStability = GetLabelUtils.subTreeStability(clusterTree, node, stability);

            if (subTreeStability > stability.get(node)) {
                isCluster.put(node, false);
                stability.put(node, subTreeStability);
            } else {
                for (Integer subNode : GetLabelUtils.breadthFirstSearchFromClusterTree(clusterTree, node))
                    if (subNode.intValue() != node)
                        isCluster.put(subNode, false);
            }

        }

        // Now add to clusters
        for (Map.Entry<Integer, Boolean> c : isCluster.entrySet())
            if (c.getValue())
                clusters.add(c.getKey());
        clusterSet = new HSet<Integer>(clusters);

        // Build cluster map
        int n = 0;
        for (Integer clust : clusterSet) {
            clusterMap.put(clust, n);
            reverseClusterMap.put(n, clust);
            n++;
        }

        return doLabeling(condensed, clusters, clusterMap);
    }

    // Tested: passing
    static double[][] label(final double[][] tree) {
        double[][] result;
        int a, aa, b, bb, index;
        final int m = tree.length, n = tree[0].length, N = m + 1;
        double delta;

        result = new double[m][n + 1];
        UnionFind U = new UnionFind(N);

        for (index = 0; index < m; index++) {

            a = (int) tree[index][0];
            b = (int) tree[index][1];
            delta = tree[index][2];

            aa = U.fastFind(a);
            bb = U.fastFind(b);

            result[index][0] = aa;
            result[index][1] = bb;
            result[index][2] = delta;
            result[index][3] = U.size[aa] + U.size[bb];

            U.union(aa, bb);
        }

        return result;
    }

    /*
    protected static double[][] singleLinkage(final double[][] dists) {
       final double[][] hierarchy = LinkageTreeUtils.minSpanTreeLinkageCore(dists, dists.length);
       return label(MatUtils.sortAscByCol(hierarchy, 2));
    }
    */

    protected static int[] treeToLabels(final double[][] X, final double[][] single_linkage_tree,
            final int min_size) {
        return treeToLabels(X, single_linkage_tree, min_size, null);
    }

    protected static int[] treeToLabels(final double[][] X, final double[][] single_linkage_tree,
            final int min_size, Loggable logger) {

        final ArrayList<CompQuadTup<Integer, Integer, Double, Integer>> condensed = LinkageTreeUtils
                .condenseTree(single_linkage_tree, min_size);
        final TreeMap<Integer, Double> stability = LinkageTreeUtils.computeStability(condensed);
        return getLabels(condensed, stability);
    }

    @Override
    final protected Object[] getModelFitSummaryHeaders() {
        return new Object[] { "Class Label", "Num. Instances", "Pct. Instances", "Wall" };
    }

    @Override
    public int[] predict(RealMatrix newData) {
        @SuppressWarnings("unused")
        final int[] fit_labels = getLabels(); // throws the exception if not fit
        final int n = newData.getColumnDimension();

        if (n != this.data.getColumnDimension())
            throw new DimensionMismatchException(n, newData.getColumnDimension());

        // TODO: how to predict these???
        throw new UnsupportedOperationException("HDBSCAN does not currently support predictions");
    }
}