Example usage for com.google.common.collect Iterables skip

List of usage examples for com.google.common.collect Iterables skip

Introduction

In this page you can find the example usage for com.google.common.collect Iterables skip.

Prototype

public static <T> Iterable<T> skip(final Iterable<T> iterable, final int numberToSkip) 

Source Link

Document

Returns a view of iterable that skips its first numberToSkip elements.

Usage

From source file:org.calrissian.mango.collect.CloseableIterables.java

/**
 * Returns a view of {@code iterable} that skips its first
 * {@code numberToSkip} elements. If {@code iterable} contains fewer than
 * {@code numberToSkip} elements, the returned closeableiterable skips all of its
 * elements.//  w w  w  . ja  v a  2 s . c om
 *
 * <p>Modifications to the underlying {@link CloseableIterable} before a call to
 * {@code iterator()} are reflected in the returned iterator. That is, the
 * iterator skips the first {@code numberToSkip} elements that exist when the
 * {@code Iterator} is created, not when {@code skip()} is called.
 *
 * <p>The returned closeableiterable's iterator supports {@code remove()} if the
 * iterator of the underlying iterable supports it. Note that it is
 * <i>not</i> possible to delete the last skipped element by immediately
 * calling {@code remove()} on that iterator, as the {@code Iterator}
 * contract states that a call to {@code remove()} before a call to
 * {@code next()} will throw an {@link IllegalStateException}.
 */
public static <T> CloseableIterable<T> skip(final CloseableIterable<T> iterable, final int numberToSkip) {
    return wrap(Iterables.skip(iterable, numberToSkip), iterable);
}

From source file:com.wrmsr.wava.yen.parser.ModuleFactory.java

public YModule create() {
    functionCounter = 0;//w  w  w. j  a v  a2 s . c  o m
    for (Element e : Iterables.skip(root, 1)) {
        ListElement le = (ListElement) e;
        preParseFunctionType(le);
        preParseImports(le);
    }
    functionCounter = 0;
    for (Element e : Iterables.skip(root, 1)) {
        ListElement le = (ListElement) e;
        parseModuleElement(le);
    }
    return builder.build();
}

From source file:com.google.enterprise.connector.util.diffing.SnapshotStore.java

@VisibleForTesting
public void deleteOldSnapshots() {
    // Leave at least two snapshot files, even if oldestSnapshotToKeep
    // is too high.
    for (long k : Iterables.skip(getExistingSnapshots(), 2)) {
        if (k < oldestSnapshotToKeep) {
            File x = getSnapshotFile(snapshotDir, k);
            if (x.delete()) {
                LOG.fine("deleting snapshot file " + x.getAbsolutePath());
            } else {
                LOG.warning("failed to delete snapshot file " + x.getAbsolutePath());
            }/*from   ww w . ja  v  a 2 s. c o  m*/
        }
    }
}

From source file:msi.gaml.factories.ModelAssembler.java

public ModelDescription assemble(final String projectPath, final String modelPath,
        final Iterable<ISyntacticElement> allModels, final ValidationContext collector, final boolean document,
        final Map<String, ModelDescription> mm) {
    final ImmutableList<ISyntacticElement> models = ImmutableList.copyOf(allModels);
    final TOrderedHashMap<String, ISyntacticElement> speciesNodes = new TOrderedHashMap();
    final TOrderedHashMap<String, TOrderedHashMap<String, ISyntacticElement>>[] experimentNodes = new TOrderedHashMap[1];
    final ISyntacticElement globalNodes = SyntacticFactory.create(GLOBAL, (EObject) null, true);
    final ISyntacticElement source = models.get(0);
    Facets globalFacets = null;//from   w w w.  ja  v  a 2s.  c  om
    if (source.hasFacet(IKeyword.PRAGMA)) {
        final Facets facets = source.copyFacets(null);
        final List<String> pragmas = (List<String>) facets.get(IKeyword.PRAGMA).getExpression().getConstValue();
        collector.resetInfoAndWarning();
        if (pragmas != null) {
            if (pragmas.contains(IKeyword.NO_INFO)) {
                collector.setNoInfo();
            }
            if (pragmas.contains(IKeyword.NO_WARNING)) {
                collector.setNoWarning();
            }
            if (pragmas.contains(IKeyword.NO_EXPERIMENT)) {
                collector.setNoExperiment();
            }
        }

    }
    final Map<String, SpeciesDescription> tempSpeciesCache = new THashMap<>();

    for (final ISyntacticElement cm : models.reverse()) {
        final SyntacticModelElement currentModel = (SyntacticModelElement) cm;
        if (currentModel != null) {
            if (currentModel.hasFacets()) {
                if (globalFacets == null) {
                    globalFacets = new Facets(currentModel.copyFacets(null));
                } else {
                    globalFacets.putAll(currentModel.copyFacets(null));
                }
            }
            currentModel.visitChildren(element -> globalNodes.addChild(element));
            SyntacticVisitor visitor = element -> addSpeciesNode(element, speciesNodes, collector);
            currentModel.visitSpecies(visitor);

            // We input the species so that grids are always the last ones
            // (see DiffusionStatement)
            currentModel.visitGrids(visitor);
            visitor = element -> {
                if (experimentNodes[0] == null) {
                    experimentNodes[0] = new TOrderedHashMap();
                }
                addExperimentNode(element, currentModel.getName(), experimentNodes[0], collector);

            };
            currentModel.visitExperiments(visitor);

        }
    }

    final String modelName = buildModelName(source.getName());

    // We build a list of working paths from which the composite model will
    // be able to look for resources. These working paths come from the
    // imported models

    Set<String> absoluteAlternatePathAsStrings = models.isEmpty() ? null
            : ImmutableSet.copyOf(
                    Iterables.transform(models.reverse(), each -> ((SyntacticModelElement) each).getPath()));

    if (mm != null) {
        for (final ModelDescription m1 : mm.values()) {
            for (final String im : m1.getAlternatePaths()) {
                absoluteAlternatePathAsStrings = Sets.union(absoluteAlternatePathAsStrings,
                        Collections.singleton(im));
            }
        }
    }

    final ModelDescription model = new ModelDescription(modelName, null, projectPath, modelPath,
            source.getElement(), null, ModelDescription.ROOT, null, globalFacets, collector,
            absoluteAlternatePathAsStrings);

    final Collection<String> allModelNames = models.size() == 1 ? null
            : ImmutableSet.copyOf(
                    Iterables.transform(Iterables.skip(models, 1), each -> buildModelName(each.getName())));
    model.setImportedModelNames(allModelNames);
    model.isDocumenting(document);

    // hqnghi add micro-models
    if (mm != null) {
        // model.setMicroModels(mm);
        model.addChildren(mm.values());
    }
    // end-hqnghi
    // recursively add user-defined species to world and down on to the
    // hierarchy
    speciesNodes.forEachValue(speciesNode -> {
        addMicroSpecies(model, speciesNode, tempSpeciesCache);
        return true;
    });
    if (experimentNodes[0] != null) {
        experimentNodes[0].forEachEntry((s, b) -> {
            b.forEachValue(experimentNode -> {
                addExperiment(s, model, experimentNode, tempSpeciesCache);
                return true;
            });
            return true;
        });
    }

    // Parent the species and the experiments of the model (all are now
    // known).
    speciesNodes.forEachValue(speciesNode -> {
        parentSpecies(model, speciesNode, model, tempSpeciesCache);
        return true;
    });

    if (experimentNodes[0] != null) {
        experimentNodes[0].forEachEntry((s, b) -> {
            b.forEachValue(experimentNode -> {
                parentExperiment(model, experimentNode);
                return true;
            });
            return true;
        });
    }

    // Initialize the hierarchy of types
    model.buildTypes();
    // hqnghi build micro-models as types
    if (mm != null) {
        for (final Entry<String, ModelDescription> entry : mm.entrySet()) {
            model.getTypesManager().alias(entry.getValue().getName(), entry.getKey());
        }
        // end-hqnghi
    }

    // Make species and experiments recursively create their attributes,
    // actions....
    complementSpecies(model, globalNodes);

    speciesNodes.forEachValue(speciesNode -> {
        complementSpecies(model.getMicroSpecies(speciesNode.getName()), speciesNode);
        return true;
    });

    if (experimentNodes[0] != null) {
        experimentNodes[0].forEachEntry((s, b) -> {
            b.forEachValue(experimentNode -> {
                complementSpecies(model.getExperiment(experimentNode.getName()), experimentNode);
                return true;
            });
            return true;
        });
    }

    // Complement recursively the different species (incl. the world). The
    // recursion is hierarchical

    model.inheritFromParent();

    for (final SpeciesDescription sd : getSpeciesInHierarchicalOrder(model)) {
        sd.inheritFromParent();
        if (sd.isExperiment()) {
            if (!sd.finalizeDescription()) {
                return null;
            }
        }
    }

    // Issue #1708 (put before the finalization)
    if (model.hasFacet(SCHEDULES) || model.hasFacet(FREQUENCY)) {
        createSchedulerSpecies(model);
    }

    if (!model.finalizeDescription()) {
        return null;
    }

    if (document) {
        collector.document(model);
    }
    return model;

}

From source file:org.apache.mahout.knn.cluster.BallKMeans.java

/**
 * Selects some of the original points according to the k-means++ algorithm.  The basic idea is that
 * points are selected with probability proportional to their distance from any selected point.  In
 * this version, points have weights which multiply their likelihood of being selected.  This is the
 * same as if there were as many copies of the same point as indicated by the weight.
 * <p/>/*w ww. j a  v  a 2s  .c  o  m*/
 * This is pretty expensive, but it vastly improves the quality and convergences of the k-means algorithm.
 * The basic idea can be made much faster by only processing a random subset of the original points.
 * In the context of streaming k-means, the total number of possible seeds will be about k log n so this
 * selection will cost O(k^2 (log n)^2) which isn't much worse than the random sampling idea.  At
 * n = 10^9, the cost of this initialization will be about 10x worse than a reasonable random sampling
 * implementation.
 * <p/>
 * The side effect of this method is to fill the centroids structure.
 * itself.
 *
 * @param datapoints The datapoints to select from.  These datapoints should be WeightedVectors of some kind.
 */
private void initializeSeeds(List<? extends WeightedVector> datapoints) {
    Preconditions.checkArgument(datapoints.size() > 1,
            "Must have at least two datapoints points to cluster " + "sensibly");
    // Compute the centroid of all of the datapoints.  This is then used to compute the squared radius of the datapoints.
    Centroid center = new Centroid(datapoints.iterator().next());
    for (WeightedVector row : Iterables.skip(datapoints, 1)) {
        center.update(row);
    }
    // Given the centroid, we can compute \Delta_1^2(X), the total squared distance for the datapoints
    // this accelerates seed selection.
    double radius = 0;
    DistanceMeasure l2 = new SquaredEuclideanDistanceMeasure();
    for (WeightedVector row : datapoints) {
        radius += l2.distance(row, center);
    }

    // Find the first seed c_1 (and conceptually the second, c_2) as might be done in the 2-means clustering so that
    // the probability of selecting c_1 and c_2 is proportional to || c_1 - c_2 ||^2.  This is done
    // by first selecting c_1 with probability:
    //
    // p(c_1) = sum_{c_1} || c_1 - c_2 ||^2 \over sum_{c_1, c_2} || c_1 - c_2 ||^2
    //
    // This can be simplified to:
    //
    // p(c_1) = \Delta_1^2(X) + n || c_1 - c ||^2 / (2 n \Delta_1^2(X))
    //
    // where c = \sum x / n and \Delta_1^2(X) = sum || x - c ||^2
    //
    // All subsequent seeds c_i (including c_2) can then be selected from the remaining points with probability
    // proportional to Pr(c_i == x_j) = min_{m < i} || c_m - x_j ||^2.

    // Multinomial distribution of vector indices for the selection seeds. These correspond to
    // the indices of the vectors in the original datapoints list.
    Multinomial<Integer> seedSelector = new Multinomial<Integer>();
    for (int i = 0; i < datapoints.size(); ++i) {
        double selectionProbability = radius + datapoints.size() * l2.distance(datapoints.get(i), center);
        seedSelector.add(i, selectionProbability);
    }

    Centroid c_1 = new Centroid(datapoints.get(seedSelector.sample()).clone());
    c_1.setIndex(0);
    // Construct a set of weighted things which can be used for random selection.  Initial weights are
    // set to the squared distance from c_1
    for (int i = 0; i < datapoints.size(); ++i) {
        WeightedVector row = datapoints.get(i);
        final double w = l2.distance(c_1, row) * row.getWeight();
        seedSelector.set(i, w);
    }

    // From here, seeds are selected with probability proportional to:
    //
    // r_i = min_{c_j} || x_i - c_j ||^2
    //
    // when we only have c_1, we have already set these distances and as we select each new
    // seed, we update the minimum distances.
    centroids.add(c_1);
    int clusterIndex = 1;
    while (centroids.size() < numClusters) {
        // Select according to weights.
        int seedIndex = seedSelector.sample();
        Centroid nextSeed = new Centroid(datapoints.get(seedIndex));
        // (WeightedVector)datapoints.get(seedIndex).clone());
        nextSeed.setIndex(clusterIndex++);
        centroids.add(nextSeed);
        // Don't select this one again.
        seedSelector.delete(seedIndex);
        // Re-weight everything according to the minimum distance to a seed.
        for (int currSeedIndex : seedSelector) {
            WeightedVector curr = datapoints.get(currSeedIndex);
            double newWeight = nextSeed.getWeight() * l2.distance(nextSeed, curr);
            if (newWeight < seedSelector.getWeight(currSeedIndex)) {
                seedSelector.set(currSeedIndex, newWeight);
            }
        }
    }
}

From source file:co.cask.cdap.logging.read.AvroFileReader.java

public Collection<LogEvent> readLogPrev(Location file, Filter logFilter, long fromTimeMs, final int maxEvents) {
    try {//from  ww w .j a v  a 2  s. c  o m
        DataFileReader<GenericRecord> dataFileReader = createReader(file);

        try {
            if (!dataFileReader.hasNext()) {
                return ImmutableList.of();
            }

            GenericRecord datum;
            List<List<LogEvent>> logSegments = Lists.newArrayList();
            int count = 0;

            // Calculate skipLen based on fileLength
            long skipLen = file.length() / 10;
            if (skipLen > DEFAULT_SKIP_LEN) {
                skipLen = DEFAULT_SKIP_LEN;
            } else if (skipLen <= 0) {
                skipLen = DEFAULT_SKIP_LEN;
            }

            List<LogEvent> logSegment = Lists.newArrayList();

            long lastSeekPos;
            long seekPos = file.length();
            while (seekPos > 0) {
                lastSeekPos = seekPos;
                seekPos = seekPos < skipLen ? 0 : seekPos - skipLen;
                dataFileReader.sync(seekPos);

                logSegment = logSegment.isEmpty() ? logSegment : Lists.<LogEvent>newArrayList();
                // read all the elements in the current segment (seekPos up to lastSeekPos)
                while (dataFileReader.hasNext() && !dataFileReader.pastSync(lastSeekPos)) {
                    datum = dataFileReader.next();

                    ILoggingEvent loggingEvent = LoggingEvent.decode(datum);

                    // Stop when reached fromTimeMs
                    if (loggingEvent.getTimeStamp() > fromTimeMs) {
                        break;
                    }

                    if (logFilter.match(loggingEvent)) {
                        ++count;
                        logSegment.add(new LogEvent(loggingEvent,
                                new LogOffset(LogOffset.INVALID_KAFKA_OFFSET, loggingEvent.getTimeStamp())));
                    }
                }

                if (!logSegment.isEmpty()) {
                    logSegments.add(logSegment);
                }

                if (count > maxEvents) {
                    break;
                }
            }

            int skip = count >= maxEvents ? count - maxEvents : 0;
            return Lists.newArrayList(Iterables.skip(Iterables.concat(Lists.reverse(logSegments)), skip));
        } finally {
            try {
                dataFileReader.close();
            } catch (IOException e) {
                LOG.error(String.format("Got exception while closing log file %s", file.toURI()), e);
            }
        }
    } catch (Exception e) {
        LOG.error(String.format("Got exception while reading log file %s", file.toURI()), e);
        throw Throwables.propagate(e);
    }
}

From source file:co.cask.cdap.logging.read.AvroFileLogReader.java

public Collection<LogEvent> readLogPrev(Location file, Filter logFilter, long fromTimeMs, final int maxEvents) {
    try {/*from  w ww  .  jav  a2s .c om*/
        DataFileReader<GenericRecord> dataFileReader = createReader(file);

        try {
            if (!dataFileReader.hasNext()) {
                return ImmutableList.of();
            }

            GenericRecord datum;
            List<List<LogEvent>> logSegments = Lists.newArrayList();
            int count = 0;

            // Calculate skipLen based on fileLength
            long skipLen = file.length() / 10;
            if (skipLen > DEFAULT_SKIP_LEN) {
                skipLen = DEFAULT_SKIP_LEN;
            } else if (skipLen <= 0) {
                skipLen = DEFAULT_SKIP_LEN;
            }

            List<LogEvent> logSegment = Lists.newArrayList();
            long boundaryTimeMs = Long.MAX_VALUE;

            long seekPos = file.length();
            while (seekPos > 0) {
                seekPos = seekPos < skipLen ? 0 : seekPos - skipLen;
                dataFileReader.sync(seekPos);

                logSegment = logSegment.isEmpty() ? logSegment : Lists.<LogEvent>newArrayList();
                long segmentStartTimeMs = Long.MAX_VALUE;
                while (dataFileReader.hasNext()) {
                    datum = dataFileReader.next();

                    ILoggingEvent loggingEvent = LoggingEvent.decode(datum);

                    if (segmentStartTimeMs == Long.MAX_VALUE) {
                        segmentStartTimeMs = loggingEvent.getTimeStamp();
                    }

                    // Stop when reached fromTimeMs, or at the end of current segment.
                    if (loggingEvent.getTimeStamp() > fromTimeMs
                            || loggingEvent.getTimeStamp() >= boundaryTimeMs) {
                        break;
                    }

                    if (logFilter.match(loggingEvent)) {
                        ++count;
                        logSegment.add(new LogEvent(loggingEvent, loggingEvent.getTimeStamp()));
                    }
                }

                boundaryTimeMs = segmentStartTimeMs;

                if (!logSegment.isEmpty()) {
                    logSegments.add(logSegment);
                }

                if (count > maxEvents) {
                    break;
                }
            }

            int skip = count >= maxEvents ? count - maxEvents : 0;
            return Lists.newArrayList(Iterables.skip(Iterables.concat(Lists.reverse(logSegments)), skip));
        } finally {
            try {
                dataFileReader.close();
            } catch (IOException e) {
                LOG.error(String.format("Got exception while closing log file %s", file.toURI()), e);
            }
        }
    } catch (Exception e) {
        LOG.error(String.format("Got exception while reading log file %s", file.toURI()), e);
        throw Throwables.propagate(e);
    }
}

From source file:com.blackducksoftware.bdio.model.ExternalIdentifier.java

@Nullable
public String getSuiteReleaseTag() {
    return Iterables.getFirst(Iterables.skip(getBdSuiteId(), 1), null);

}

From source file:co.cask.cdap.logging.write.LogLocation.java

/**
 * Return closeable iterator of {@link LogEvent}
 * @param logFilter filter for filtering log events
 * @param fromTimeMs start timestamp in millis
 * @param maxEvents max events to return
 * @return closeable iterator of previous log events
 *//*from   w ww .j a  va  2s .  c  o  m*/
@SuppressWarnings("WeakerAccess")
public Collection<LogEvent> readLogPrev(Filter logFilter, long fromTimeMs, final int maxEvents)
        throws IOException {
    DataFileReader<GenericRecord> dataFileReader = createReader();

    try {
        if (!dataFileReader.hasNext()) {
            return ImmutableList.of();
        }

        List<List<LogEvent>> logSegments = Lists.newArrayList();
        List<LogEvent> logSegment;
        int count = 0;

        // Calculate skipLen based on fileLength
        long length = location.length();
        LOG.trace("Got file length {}", length);
        long skipLen = length / 10;
        if (skipLen > DEFAULT_SKIP_LEN || skipLen <= 0) {
            skipLen = DEFAULT_SKIP_LEN;
        }

        // For open file, endPosition sync marker is unknown so start from file length and read up to the actual EOF
        dataFileReader.sync(length);
        long finalSync = dataFileReader.previousSync();
        logSegment = readToEndSyncPosition(dataFileReader, logFilter, fromTimeMs, -1);

        if (!logSegment.isEmpty()) {
            logSegments.add(logSegment);
            count = count + logSegment.size();
        }

        LOG.trace("Read logevents {} from position {}", count, finalSync);

        long startPosition = finalSync;
        long endPosition = startPosition;
        long currentSync;

        while (startPosition > 0 && count < maxEvents) {
            // Skip to sync position less than current sync position
            startPosition = skipToPosition(dataFileReader, startPosition, endPosition, skipLen);
            currentSync = dataFileReader.previousSync();
            logSegment = readToEndSyncPosition(dataFileReader, logFilter, fromTimeMs, endPosition);

            if (!logSegment.isEmpty()) {
                logSegments.add(logSegment);
                count = count + logSegment.size();
            }
            LOG.trace("Read logevents {} from position {} to endPosition {}", count, currentSync, endPosition);

            endPosition = currentSync;
        }

        int skip = count >= maxEvents ? count - maxEvents : 0;
        return Lists.newArrayList(Iterables.skip(Iterables.concat(Lists.reverse(logSegments)), skip));
    } finally {
        try {
            dataFileReader.close();
        } catch (IOException e) {
            LOG.error("Got exception while closing log file {}", location, e);
        }
    }
}

From source file:org.apache.mahout.knn.cluster.StreamingKMeans.java

private UpdatableSearcher clusterInternal(Iterable<Centroid> datapoints, boolean collapseClusters) {
    int oldNumProcessedDataPoints = numProcessedDatapoints;
    // We clear the centroids we have in case of cluster collapse, the old clusters are the
    // datapoints but we need to re-cluster them.
    if (collapseClusters) {
        centroids.clear();/*  ww w  . j  a  v a  2  s  .c om*/
        numProcessedDatapoints = 0;
    }

    int numCentroidsToSkip = 0;
    if (centroids.size() == 0) {
        // Assign the first datapoint to the first cluster.
        // Adding a vector to a searcher would normally just reference the copy,
        // but we could potentially mutate it and so we need to make a clone.
        centroids.add(Iterables.get(datapoints, 0).clone());
        numCentroidsToSkip = 1;
        ++numProcessedDatapoints;
    }

    Random rand = RandomUtils.getRandom();
    // To cluster, we scan the data and either add each point to the nearest group or create a new group.
    // when we get too many groups, we need to increase the threshold and rescan our current groups
    for (WeightedVector row : Iterables.skip(datapoints, numCentroidsToSkip)) {
        // Get the closest vector and its weight as a WeightedThing<Vector>.
        // The weight of the WeightedThing is the distance to the query and the value is a
        // reference to one of the vectors we added to the searcher previously.
        WeightedThing<Vector> closestPair = centroids.search(row, 1).get(0);

        // We get a uniformly distributed random number between 0 and 1 and compare it with the
        // distance to the closest cluster divided by the distanceCutoff.
        // This is so that if the closest cluster is further than distanceCutoff,
        // closestPair.getWeight() / distanceCutoff > 1 which will trigger the creation of a new
        // cluster anyway.
        // However, if the ratio is less than 1, we want to create a new cluster with probability
        // proportional to the distance to the closest cluster.
        if (rand.nextDouble() < closestPair.getWeight() / distanceCutoff) {
            // Add new centroid, note that the vector is copied because we may mutate it later.
            centroids.add(row.clone());
        } else {
            // Merge the new point with the existing centroid. This will update the centroid's actual
            // position.
            // We know that all the points we inserted in the centroids searcher are (or extend)
            // WeightedVector, so the cast will always succeed.
            Centroid centroid = (Centroid) closestPair.getValue();
            // We will update the centroid by removing it from the searcher and reinserting it to
            // ensure consistency.
            if (!centroids.remove(centroid, 1e-7)) {
                throw new RuntimeException("Unable to remove centroid");
            }
            centroid.update(row);
            centroids.add(centroid);
        }

        progressLogger.debug(
                "numProcessedDataPoints: {}, estimatedNumClusters: {}, "
                        + "distanceCutoff: {}, numCentroids: {}",
                numProcessedDatapoints, estimatedNumClusters, distanceCutoff, centroids.size());

        if (!collapseClusters && centroids.size() > estimatedNumClusters) {
            estimatedNumClusters = (int) Math.max(estimatedNumClusters,
                    clusterLogFactor * Math.log(numProcessedDatapoints));

            // TODO does shuffling help?
            List<Centroid> shuffled = Lists.newArrayList();
            for (Vector v : centroids) {
                shuffled.add((Centroid) v);
            }
            Collections.shuffle(shuffled);
            // Re-cluster using the shuffled centroids as data points. The centroids member variable
            // is modified directly.
            clusterInternal(shuffled, true);

            // In the original algorithm, with distributions with sharp scale effects, the
            // distanceCutoff can grow to excessive size leading sub-clustering to collapse
            // the centroids set too much. This test prevents increase in distanceCutoff if
            // the current value is doing well at collapsing the clusters.
            if (centroids.size() > clusterOvershoot * estimatedNumClusters) {
                distanceCutoff *= beta;
            }
        }
        ++numProcessedDatapoints;
    }

    if (collapseClusters) {
        numProcessedDatapoints = oldNumProcessedDataPoints;
    }

    // Normally, iterating through the searcher produces Vectors,
    // but since we always used Centroids, we adapt the return type.
    return centroids;
}