List of usage examples for org.apache.commons.math3.stat.descriptive.moment Mean Mean
public Mean()
From source file:com.cloudera.oryx.als.common.lsh.LocationSensitiveHashIT.java
@Test public void testLSH() { RandomGenerator random = RandomManager.getRandom(); Mean avgPercentTopRecsConsidered = new Mean(); Mean avgNDCG = new Mean(); Mean avgPercentAllItemsConsidered = new Mean(); for (int iteration = 0; iteration < ITERATIONS; iteration++) { LongObjectMap<float[]> Y = new LongObjectMap<float[]>(); for (int i = 0; i < NUM_ITEMS; i++) { Y.put(i, RandomUtils.randomUnitVector(NUM_FEATURES, random)); }// ww w. j av a 2 s . com float[] userVec = RandomUtils.randomUnitVector(NUM_FEATURES, random); double[] results = doTestRandomVecs(Y, userVec); double percentTopRecsConsidered = results[0]; double ndcg = results[1]; double percentAllItemsConsidered = results[2]; log.info("Considered {}% of all candidates, {} nDCG, got {}% recommendations correct", 100 * percentAllItemsConsidered, ndcg, 100 * percentTopRecsConsidered); avgPercentTopRecsConsidered.increment(percentTopRecsConsidered); avgNDCG.increment(ndcg); avgPercentAllItemsConsidered.increment(percentAllItemsConsidered); } log.info("{}", avgPercentTopRecsConsidered.getResult()); log.info("{}", avgNDCG.getResult()); log.info("{}", avgPercentAllItemsConsidered.getResult()); assertTrue(avgPercentTopRecsConsidered.getResult() > 0.8); assertTrue(avgNDCG.getResult() > 0.8); assertTrue(avgPercentAllItemsConsidered.getResult() < 0.09); }
From source file:com.cloudera.oryx.als.common.candidate.LocationSensitiveHashIT.java
@Test public void testLSH() { RandomGenerator random = RandomManager.getRandom(); Mean avgPercentTopRecsConsidered = new Mean(); Mean avgNDCG = new Mean(); Mean avgPercentAllItemsConsidered = new Mean(); for (int iteration = 0; iteration < ITERATIONS; iteration++) { LongObjectMap<float[]> Y = new LongObjectMap<>(); for (int i = 0; i < NUM_ITEMS; i++) { Y.put(i, RandomUtils.randomUnitVector(NUM_FEATURES, random)); }/* ww w . j ava 2 s . c om*/ float[] userVec = RandomUtils.randomUnitVector(NUM_FEATURES, random); double[] results = doTestRandomVecs(Y, userVec); double percentTopRecsConsidered = results[0]; double ndcg = results[1]; double percentAllItemsConsidered = results[2]; log.info("Considered {}% of all candidates, {} nDCG, got {}% recommendations correct", 100 * percentAllItemsConsidered, ndcg, 100 * percentTopRecsConsidered); avgPercentTopRecsConsidered.increment(percentTopRecsConsidered); avgNDCG.increment(ndcg); avgPercentAllItemsConsidered.increment(percentAllItemsConsidered); } log.info("{}", avgPercentTopRecsConsidered.getResult()); log.info("{}", avgNDCG.getResult()); log.info("{}", avgPercentAllItemsConsidered.getResult()); assertTrue(avgPercentTopRecsConsidered.getResult() > 0.85); assertTrue(avgNDCG.getResult() > 0.85); assertTrue(avgPercentAllItemsConsidered.getResult() < 0.1); }
From source file:cz.cuni.mff.d3s.spl.data.BenchmarkRunSummary.java
/** Compute artihmetic mean of the samples. * /*from ww w . j a va 2 s. c om*/ * @return Arithmetic mean of the data in the original benchmark run. */ public synchronized double getMean() { if (cacheMean == null) { Mean mean = new Mean(); cacheMean = mean.evaluate(data); } return cacheMean; }
From source file:br.unicamp.ic.recod.gpsi.applications.gpsiOVOClassifierFromFiles.java
public gpsiOVOClassifierFromFiles(String datasetPath, gpsiDatasetReader datasetReader, Byte[] classLabels, String outputPath, String programsPath, double errorScore) throws Exception { super(datasetPath, datasetReader, classLabels, outputPath, errorScore); int nClasses, i, j; gpsiClassifier[][] classifiers;// w w w . j a v a2s. c om File dir = new File(programsPath + "5/"); BufferedReader reader; File[] files = dir.listFiles((File dir1, String name) -> name.toLowerCase().endsWith(".program")); nClasses = (int) Math.ceil(Math.sqrt(2 * files.length)); classifiers = new gpsiClassifier[nClasses - 1][]; String[] labels; for (i = 0; i < classifiers.length; i++) classifiers[i] = new gpsiClassifier[classifiers.length - i]; for (File program : files) { reader = new BufferedReader(new FileReader(program)); labels = program.getName().split("[_.]"); i = Integer.parseInt(labels[0]) - 1; j = Integer.parseInt(labels[1]) - i - 2; classifiers[i][j] = new gpsiClassifier( new gpsiScalarSpectralIndexDescriptor( new gpsiStringParserVoxelCombiner(null, reader.readLine())), new gpsi1NNToMomentScalarClassificationAlgorithm(new Mean())); reader.close(); } ensemble = new gpsiOVOEnsembleMethod(classifiers); }
From source file:com.cloudera.oryx.rdf.computation.CovtypeIT.java
@Test public void testCovtype() throws Exception { List<Example> allExamples = readCovtypeExamples(); DecisionForest forest = DecisionForest.fromExamplesWithDefault(allExamples); log.info("Evals: {}", forest.getEvaluations()); assertTrue(new Mean().evaluate(forest.getEvaluations()) >= 0.8); double[] importances = forest.getFeatureImportances(); log.info("Importances: {}", importances); assertNotNull(importances);// www . j a v a2s. com for (double d : importances) { assertTrue(d >= 0.0); assertTrue(d <= 1.0); } assertEquals(importances[0], Doubles.max(importances)); // Assert something about important features assertTrue(importances[0] > 0.9); assertTrue(importances[5] > 0.4); assertTrue(importances[9] > 0.4); assertTrue(importances[13] > 0.4); }
From source file:net.myrrix.online.eval.PrecisionRecallEvaluator.java
@Override public EvaluationResult evaluate(final MyrrixRecommender recommender, final RescorerProvider provider, final Multimap<Long, RecommendedItem> testData) throws TasteException { final Mean precision = new Mean(); final Mean recall = new Mean(); final Mean ndcg = new Mean(); final Mean meanAveragePrecision = new Mean(); Processor<Long> processor = new Processor<Long>() { @Override/*w w w . j a v a2 s.c o m*/ public void process(Long userID, long count) { Collection<RecommendedItem> values = testData.get(userID); int numValues = values.size(); if (numValues == 0) { return; } IDRescorer rescorer = provider == null ? null : provider.getRecommendRescorer(new long[] { userID }, recommender); List<RecommendedItem> recs; try { recs = recommender.recommend(userID, numValues, rescorer); } catch (NoSuchUserException nsue) { // Probably OK, just removed all data for this user from training log.warn("User only in test data: {}", userID); return; } catch (TasteException te) { log.warn("Unexpected exception", te); return; } int numRecs = recs.size(); Collection<Long> valueIDs = Sets.newHashSet(); for (RecommendedItem rec : values) { valueIDs.add(rec.getItemID()); } int intersectionSize = 0; double score = 0.0; double maxScore = 0.0; Mean precisionAtI = new Mean(); double averagePrecision = 0.0; for (int i = 0; i < numRecs; i++) { RecommendedItem rec = recs.get(i); double value = LN2 / Math.log(2.0 + i); // 1 / log_2(1 + (i+1)) if (valueIDs.contains(rec.getItemID())) { intersectionSize++; score += value; precisionAtI.increment(1.0); averagePrecision += precisionAtI.getResult(); } else { precisionAtI.increment(0.0); } maxScore += value; } averagePrecision /= numValues; synchronized (precision) { precision.increment(numRecs == 0 ? 0.0 : (double) intersectionSize / numRecs); recall.increment((double) intersectionSize / numValues); ndcg.increment(maxScore == 0.0 ? 0.0 : score / maxScore); meanAveragePrecision.increment(averagePrecision); if (count % 10000 == 0) { log.info(new IRStatisticsImpl(precision.getResult(), recall.getResult(), ndcg.getResult(), meanAveragePrecision.getResult()).toString()); } } } }; Paralleler<Long> paralleler = new Paralleler<Long>(testData.keySet().iterator(), processor, "PREval"); try { if (Boolean.parseBoolean(System.getProperty("eval.parallel", "true"))) { paralleler.runInParallel(); } else { paralleler.runInSerial(); } } catch (InterruptedException ie) { throw new TasteException(ie); } catch (ExecutionException e) { throw new TasteException(e.getCause()); } EvaluationResult result; if (precision.getN() > 0) { result = new IRStatisticsImpl(precision.getResult(), recall.getResult(), ndcg.getResult(), meanAveragePrecision.getResult()); } else { result = null; } log.info(String.valueOf(result)); return result; }
From source file:com.cloudera.oryx.rdf.computation.WineQualityIT.java
@Test public void testWineQuality() throws Exception { List<Example> allExamples = readWineQualityExamples(); DecisionForest forest = DecisionForest.fromExamplesWithDefault(allExamples); log.info("Evals: {}", forest.getEvaluations()); assertTrue(new Mean().evaluate(forest.getEvaluations()) < 1.2); double[] importances = forest.getFeatureImportances(); log.info("Importances: {}", importances); for (double d : importances) { assertTrue(d >= 0.0);/*from w ww.j a va 2 s .c om*/ assertTrue(d <= 1.0); } assertEquals(importances[8], Doubles.max(importances)); assertTrue(importances[1] > 0.6); assertTrue(importances[5] > 0.6); assertTrue(importances[8] > 0.8); assertTrue(importances[9] > 0.6); assertTrue(importances[10] > 0.7); }
From source file:com.cloudera.oryx.rdf.common.rule.NumericDecision.java
static List<Decision> numericDecisionsFromExamples(int featureNumber, Iterable<Example> examples, int suggestedMaxSplitCandidates) { Multiset<Float> sortedFeatureValueCounts = TreeMultiset.create(); StorelessUnivariateStatistic mean = new Mean(); int numExamples = 0; for (Example example : examples) { NumericFeature feature = (NumericFeature) example.getFeature(featureNumber); if (feature == null) { continue; }//w w w .j a va 2 s. com numExamples++; float value = feature.getValue(); sortedFeatureValueCounts.add(value, 1); mean.increment(value); } // Make decisions from split points that divide up input into roughly equal amounts of examples List<Decision> decisions = Lists.newArrayListWithExpectedSize(suggestedMaxSplitCandidates); int approxExamplesPerSplit = FastMath.max(1, numExamples / suggestedMaxSplitCandidates); int examplesInSplit = 0; float lastValue = Float.NaN; // This will iterate in order of value by nature of TreeMap for (Multiset.Entry<Float> entry : sortedFeatureValueCounts.entrySet()) { float value = entry.getElement(); if (examplesInSplit >= approxExamplesPerSplit) { decisions.add( new NumericDecision(featureNumber, (value + lastValue) / 2.0f, (float) mean.getResult())); examplesInSplit = 0; } examplesInSplit += entry.getCount(); lastValue = value; } // The vital condition here is that if decision n decides an example is positive, then all subsequent // decisions in the list will also find it positive. So we need to order from highest threshold to lowest Collections.reverse(decisions); return decisions; }
From source file:com.itemanalysis.psychometrics.measurement.ClassicalItemStatistics.java
public ClassicalItemStatistics(Object id, boolean biasCorrection, boolean pearson, boolean dIndex) { this.biasCorrection = biasCorrection; this.pearson = pearson; this.dIndex = dIndex; mean = new Mean(); sd = new StandardDeviation(); if (dIndex) { upper = new Mean(); lower = new Mean(); }//w ww . j av a 2 s . com if (this.pearson) { pointBiserial = new PearsonCorrelation(); } else { polyserial = new PolyserialPlugin(); } }
From source file:com.cloudera.oryx.app.serving.als.LoadBenchmark.java
@Test public void testRecommendLoad() throws Exception { AtomicLong count = new AtomicLong(); Mean meanReqTimeMS = new Mean(); long start = System.currentTimeMillis(); int workers = LoadTestALSModelFactory.WORKERS; ExecUtils.doInParallel(workers, workers, true, i -> { RandomGenerator random = RandomManager.getRandom(Integer.toString(i).hashCode() ^ System.nanoTime()); for (int j = 0; j < LoadTestALSModelFactory.REQS_PER_WORKER; j++) { String userID = "U" + random.nextInt(LoadTestALSModelFactory.USERS); long callStart = System.currentTimeMillis(); target("/recommend/" + userID).request().accept(MediaType.APPLICATION_JSON_TYPE) .get(LIST_ID_VALUE_TYPE); long timeMS = System.currentTimeMillis() - callStart; synchronized (meanReqTimeMS) { meanReqTimeMS.increment(timeMS); }/*from www .j av a 2s . co m*/ long currentCount = count.incrementAndGet(); if (currentCount % 100 == 0) { log(currentCount, meanReqTimeMS, start); } } }); int totalRequests = workers * LoadTestALSModelFactory.REQS_PER_WORKER; log(totalRequests, meanReqTimeMS, start); }