List of usage examples for org.apache.commons.math3.util ArithmeticUtils binomialCoefficientDouble
public static double binomialCoefficientDouble(final int n, final int k) throws NotPositiveException, NumberIsTooLargeException, MathArithmeticException
From source file:com.cloudera.oryx.als.common.lsh.LocationSensitiveHash.java
/** * @param Y item vectors to hash/* w ww . ja va 2s .c om*/ */ public LocationSensitiveHash(LongObjectMap<float[]> Y, double lshSampleRatio, int numHashes) { Preconditions.checkNotNull(Y); Preconditions.checkArgument(!Y.isEmpty(), "Y is empty"); Preconditions.checkArgument(lshSampleRatio > 0.0 && lshSampleRatio <= 1.0, "Bad LSH ratio: %s", lshSampleRatio); Preconditions.checkArgument(numHashes >= 1 && numHashes <= 64, "Bad # hashes: %s", numHashes); this.Y = Y; log.info("Using LSH sampling to sample about {}% of items", lshSampleRatio * 100.0); // This follows from the binomial distribution: double cumulativeProbability = 0.0; double denominator = FastMath.pow(2.0, numHashes); int bitsDiffering = -1; while (bitsDiffering < numHashes && cumulativeProbability < lshSampleRatio) { bitsDiffering++; cumulativeProbability += ArithmeticUtils.binomialCoefficientDouble(numHashes, bitsDiffering) / denominator; } maxBitsDiffering = bitsDiffering - 1; log.info("Max bits differing: {}", maxBitsDiffering); int features = Y.entrySet().iterator().next().getValue().length; RandomGenerator random = RandomManager.getRandom(); randomVectors = new boolean[numHashes][features]; for (boolean[] randomVector : randomVectors) { for (int j = 0; j < features; j++) { randomVector[j] = random.nextBoolean(); } } meanVector = findMean(Y, features); buckets = new LongObjectMap<long[]>(); int count = 0; int maxBucketSize = 0; for (LongObjectMap.MapEntry<float[]> entry : Y.entrySet()) { long signature = toBitSignature(entry.getValue()); long[] ids = buckets.get(signature); if (ids == null) { buckets.put(signature, new long[] { entry.getKey() }); } else { int length = ids.length; // Large majority of arrays will be length 1; all are short. // This is a reasonable way to store 'sets' of longs long[] newIDs = new long[length + 1]; for (int i = 0; i < length; i++) { newIDs[i] = ids[i]; } newIDs[length] = entry.getKey(); maxBucketSize = FastMath.max(maxBucketSize, newIDs.length); buckets.put(signature, newIDs); } if (++count % 1000000 == 0) { log.info("Bucketed {} items", count); } } log.info("Max bucket size {}", maxBucketSize); log.info("Put {} items into {} buckets", Y.size(), buckets.size()); // A separate bucket for new items, which will always be considered newItems = new LongSet(); }
From source file:net.myrrix.online.candidate.LocationSensitiveHash.java
/** * @param Y item vectors to hash/*from w w w. j av a 2 s .c o m*/ */ public LocationSensitiveHash(FastByIDMap<float[]> Y) { Preconditions.checkNotNull(Y); Preconditions.checkArgument(!Y.isEmpty(), "Y is empty"); Preconditions.checkState(LSH_SAMPLE_RATIO < 1.0); this.Y = Y; log.info("Using LSH sampling to sample about {}% of items", LSH_SAMPLE_RATIO * 100.0); // This follows from the binomial distribution: double cumulativeProbability = 0.0; double denominator = FastMath.pow(2.0, NUM_HASHES); int bitsDiffering = -1; while (bitsDiffering < NUM_HASHES && cumulativeProbability < LSH_SAMPLE_RATIO) { bitsDiffering++; cumulativeProbability += ArithmeticUtils.binomialCoefficientDouble(NUM_HASHES, bitsDiffering) / denominator; } maxBitsDiffering = bitsDiffering - 1; log.info("Max bits differing: {}", maxBitsDiffering); int features = Y.entrySet().iterator().next().getValue().length; RandomGenerator random = RandomManager.getRandom(); randomVectors = new boolean[NUM_HASHES][features]; for (boolean[] randomVector : randomVectors) { for (int j = 0; j < features; j++) { randomVector[j] = random.nextBoolean(); } } meanVector = findMean(Y, features); buckets = new FastByIDMap<long[]>(1000); int count = 0; int maxBucketSize = 0; for (FastByIDMap.MapEntry<float[]> entry : Y.entrySet()) { long signature = toBitSignature(entry.getValue()); long[] ids = buckets.get(signature); if (ids == null) { buckets.put(signature, new long[] { entry.getKey() }); } else { int length = ids.length; // Large majority of arrays will be length 1; all are short. // This is a reasonable way to store 'sets' of longs long[] newIDs = new long[length + 1]; for (int i = 0; i < length; i++) { newIDs[i] = ids[i]; } newIDs[length] = entry.getKey(); maxBucketSize = FastMath.max(maxBucketSize, newIDs.length); buckets.put(signature, newIDs); } if (++count % 1000000 == 0) { log.info("Bucketed {} items", count); } } log.info("Max bucket size {}", maxBucketSize); log.info("Put {} items into {} buckets", Y.size(), buckets.size()); // A separate bucket for new items, which will always be considered newItems = new FastIDSet(); }
From source file:experiment.PascalDistribution_bug.java
/** {@inheritDoc} */ public double probability(int x) { double ret;// w w w. jav a 2 s . c o m if (x < 0) { ret = 0.0; } else { ret = ArithmeticUtils.binomialCoefficientDouble(x + numberOfSuccesses - 1, numberOfSuccesses - 1) * FastMath.pow(probabilityOfSuccess, numberOfSuccesses) * FastMath.pow(1.0 - probabilityOfSuccess, x); } return ret; }