Example usage for org.apache.commons.math3.util ArithmeticUtils binomialCoefficientDouble

List of usage examples for org.apache.commons.math3.util ArithmeticUtils binomialCoefficientDouble

Introduction

In this page you can find the example usage for org.apache.commons.math3.util ArithmeticUtils binomialCoefficientDouble.

Prototype

public static double binomialCoefficientDouble(final int n, final int k)
        throws NotPositiveException, NumberIsTooLargeException, MathArithmeticException 

Source Link

Document

Returns a double representation of the <a href="http://mathworld.wolfram.com/BinomialCoefficient.html"> Binomial Coefficient</a>, " n choose k ", the number of k -element subsets that can be selected from an n -element set.

Usage

From source file:com.cloudera.oryx.als.common.lsh.LocationSensitiveHash.java

/**
 * @param Y item vectors to hash/* w  ww  . ja  va  2s  .c om*/
 */
public LocationSensitiveHash(LongObjectMap<float[]> Y, double lshSampleRatio, int numHashes) {
    Preconditions.checkNotNull(Y);
    Preconditions.checkArgument(!Y.isEmpty(), "Y is empty");

    Preconditions.checkArgument(lshSampleRatio > 0.0 && lshSampleRatio <= 1.0, "Bad LSH ratio: %s",
            lshSampleRatio);
    Preconditions.checkArgument(numHashes >= 1 && numHashes <= 64, "Bad # hashes: %s", numHashes);

    this.Y = Y;

    log.info("Using LSH sampling to sample about {}% of items", lshSampleRatio * 100.0);

    // This follows from the binomial distribution:
    double cumulativeProbability = 0.0;
    double denominator = FastMath.pow(2.0, numHashes);
    int bitsDiffering = -1;
    while (bitsDiffering < numHashes && cumulativeProbability < lshSampleRatio) {
        bitsDiffering++;
        cumulativeProbability += ArithmeticUtils.binomialCoefficientDouble(numHashes, bitsDiffering)
                / denominator;
    }

    maxBitsDiffering = bitsDiffering - 1;
    log.info("Max bits differing: {}", maxBitsDiffering);

    int features = Y.entrySet().iterator().next().getValue().length;

    RandomGenerator random = RandomManager.getRandom();
    randomVectors = new boolean[numHashes][features];
    for (boolean[] randomVector : randomVectors) {
        for (int j = 0; j < features; j++) {
            randomVector[j] = random.nextBoolean();
        }
    }

    meanVector = findMean(Y, features);

    buckets = new LongObjectMap<long[]>();
    int count = 0;
    int maxBucketSize = 0;
    for (LongObjectMap.MapEntry<float[]> entry : Y.entrySet()) {
        long signature = toBitSignature(entry.getValue());
        long[] ids = buckets.get(signature);
        if (ids == null) {
            buckets.put(signature, new long[] { entry.getKey() });
        } else {
            int length = ids.length;
            // Large majority of arrays will be length 1; all are short.
            // This is a reasonable way to store 'sets' of longs
            long[] newIDs = new long[length + 1];
            for (int i = 0; i < length; i++) {
                newIDs[i] = ids[i];
            }
            newIDs[length] = entry.getKey();
            maxBucketSize = FastMath.max(maxBucketSize, newIDs.length);
            buckets.put(signature, newIDs);
        }
        if (++count % 1000000 == 0) {
            log.info("Bucketed {} items", count);
        }
    }

    log.info("Max bucket size {}", maxBucketSize);
    log.info("Put {} items into {} buckets", Y.size(), buckets.size());
    // A separate bucket for new items, which will always be considered
    newItems = new LongSet();
}

From source file:net.myrrix.online.candidate.LocationSensitiveHash.java

/**
 * @param Y item vectors to hash/*from   w  w  w.  j  av a  2 s  .c  o  m*/
 */
public LocationSensitiveHash(FastByIDMap<float[]> Y) {
    Preconditions.checkNotNull(Y);
    Preconditions.checkArgument(!Y.isEmpty(), "Y is empty");
    Preconditions.checkState(LSH_SAMPLE_RATIO < 1.0);

    this.Y = Y;

    log.info("Using LSH sampling to sample about {}% of items", LSH_SAMPLE_RATIO * 100.0);

    // This follows from the binomial distribution:
    double cumulativeProbability = 0.0;
    double denominator = FastMath.pow(2.0, NUM_HASHES);
    int bitsDiffering = -1;
    while (bitsDiffering < NUM_HASHES && cumulativeProbability < LSH_SAMPLE_RATIO) {
        bitsDiffering++;
        cumulativeProbability += ArithmeticUtils.binomialCoefficientDouble(NUM_HASHES, bitsDiffering)
                / denominator;
    }

    maxBitsDiffering = bitsDiffering - 1;
    log.info("Max bits differing: {}", maxBitsDiffering);

    int features = Y.entrySet().iterator().next().getValue().length;

    RandomGenerator random = RandomManager.getRandom();
    randomVectors = new boolean[NUM_HASHES][features];
    for (boolean[] randomVector : randomVectors) {
        for (int j = 0; j < features; j++) {
            randomVector[j] = random.nextBoolean();
        }
    }

    meanVector = findMean(Y, features);

    buckets = new FastByIDMap<long[]>(1000);
    int count = 0;
    int maxBucketSize = 0;
    for (FastByIDMap.MapEntry<float[]> entry : Y.entrySet()) {
        long signature = toBitSignature(entry.getValue());
        long[] ids = buckets.get(signature);
        if (ids == null) {
            buckets.put(signature, new long[] { entry.getKey() });
        } else {
            int length = ids.length;
            // Large majority of arrays will be length 1; all are short.
            // This is a reasonable way to store 'sets' of longs
            long[] newIDs = new long[length + 1];
            for (int i = 0; i < length; i++) {
                newIDs[i] = ids[i];
            }
            newIDs[length] = entry.getKey();
            maxBucketSize = FastMath.max(maxBucketSize, newIDs.length);
            buckets.put(signature, newIDs);
        }
        if (++count % 1000000 == 0) {
            log.info("Bucketed {} items", count);
        }
    }

    log.info("Max bucket size {}", maxBucketSize);
    log.info("Put {} items into {} buckets", Y.size(), buckets.size());
    // A separate bucket for new items, which will always be considered
    newItems = new FastIDSet();
}

From source file:experiment.PascalDistribution_bug.java

/** {@inheritDoc} */
public double probability(int x) {
    double ret;//  w  w w.  jav  a  2 s  .  c  o  m
    if (x < 0) {
        ret = 0.0;
    } else {
        ret = ArithmeticUtils.binomialCoefficientDouble(x + numberOfSuccesses - 1, numberOfSuccesses - 1)
                * FastMath.pow(probabilityOfSuccess, numberOfSuccesses)
                * FastMath.pow(1.0 - probabilityOfSuccess, x);
    }
    return ret;
}