List of usage examples for org.apache.commons.math3.stat.descriptive.moment Mean Mean
public Mean()
From source file:br.unicamp.ic.recod.gpsi.applications.gpsiJGAPSelectorEvolver.java
@Override public void run() throws InvalidConfigurationException, InterruptedException, Exception { int i, j, k;// ww w. j ava 2 s .c o m byte nFolds = 5; gpsiDescriptor descriptor; gpsiMLDataset mlDataset; gpsiVoxelRawDataset dataset; GPGenotype gp; double[][] fitnessCurves; String[] curveLabels = new String[] { "train", "train_val", "val" }; double bestScore, currentScore; IGPProgram current; IGPProgram[] elite = null; Mean mean = new Mean(); StandardDeviation sd = new StandardDeviation(); double validationScore, trainScore; double[][][] samples; for (byte f = 0; f < nFolds; f++) { System.out.println("\nRun " + (f + 1) + "\n"); rawDataset.assignFolds(new byte[] { f, (byte) ((f + 1) % nFolds), (byte) ((f + 2) % nFolds) }, new byte[] { (byte) ((f + 3) % nFolds) }, new byte[] { (byte) ((f + 4) % nFolds) }); dataset = (gpsiVoxelRawDataset) rawDataset; gp = create(config, dataset.getnBands(), fitness, null); // 0: train, 1: train_val, 2: val fitnessCurves = new double[super.numGenerations + numGenerationsSel][]; bestScore = -Double.MAX_VALUE; if (validation > 0) elite = new IGPProgram[validation]; for (int generation = 0; generation < numGenerationsSel; generation++) { gp.evolve(1); gp.getGPPopulation().sortByFitness(); if (validation > 0) elite = mergeElite(elite, gp.getGPPopulation().getGPPrograms(), generation); if (this.dumpGens) { double[][][] dists; descriptor = new gpsiScalarSpectralIndexDescriptor( new gpsiJGAPVoxelCombiner(fitness.getB(), gp.getGPPopulation().getGPPrograms()[0])); mlDataset = new gpsiMLDataset(descriptor); mlDataset.loadWholeDataset(rawDataset, true); dists = (new gpsiWholeSampler()).sample(mlDataset.getTrainingEntities(), this.classLabels); for (i = 0; i < this.classLabels.length; i++) { stream.register(new gpsiDoubleCsvIOElement(dists[i], null, "gens/f" + (f + 1) + "/" + classLabels[i] + "/" + (generation + 1) + ".csv")); } } fitnessCurves[generation] = new double[] { gp.getAllTimeBest().getFitnessValue() - 1.0 }; System.out.printf("%3dg: %.4f\n", generation + 1, fitnessCurves[generation][0]); } HashSet<Integer> variables = new HashSet<>(); for (IGPProgram ind : elite) { for (CommandGene node : ind.getChromosome(0).getFunctions()) { if (node instanceof Variable) { variables.add(Integer.parseInt(node.getName().replace('b', '0'))); } } } int[] vars = variables.stream().mapToInt(p -> p).toArray(); Arrays.sort(vars); stream.register(new gpsiStringIOElement(Arrays.toString(vars), "selected_bands/f" + (f + 1) + ".out")); gp = create(config, dataset.getnBands(), fitness, vars); gp.addFittestProgram(elite[0]); for (int generation = numGenerationsSel; generation < numGenerationsSel + super.numGenerations; generation++) { gp.evolve(1); gp.getGPPopulation().sortByFitness(); if (validation > 0) elite = mergeElite(elite, gp.getGPPopulation().getGPPrograms(), generation); if (this.dumpGens) { double[][][] dists; descriptor = new gpsiScalarSpectralIndexDescriptor( new gpsiJGAPVoxelCombiner(fitness.getB(), gp.getGPPopulation().getGPPrograms()[0])); mlDataset = new gpsiMLDataset(descriptor); mlDataset.loadWholeDataset(rawDataset, true); dists = (new gpsiWholeSampler()).sample(mlDataset.getTrainingEntities(), this.classLabels); for (i = 0; i < this.classLabels.length; i++) { stream.register(new gpsiDoubleCsvIOElement(dists[i], null, "gens/f" + (f + 1) + "/" + classLabels[i] + "/" + (generation + 1) + ".csv")); } } fitnessCurves[generation] = new double[] { gp.getAllTimeBest().getFitnessValue() - 1.0 }; System.out.printf("%3dg: %.4f\n", generation + 1, fitnessCurves[generation][0]); } best = new IGPProgram[2]; best[0] = gp.getAllTimeBest(); for (i = 0; i < super.validation; i++) { current = elite[i]; descriptor = new gpsiScalarSpectralIndexDescriptor( new gpsiJGAPVoxelCombiner(fitness.getB(), current)); mlDataset = new gpsiMLDataset(descriptor); mlDataset.loadWholeDataset(rawDataset, true); samples = this.fitness.getSampler().sample(mlDataset.getValidationEntities(), classLabels); validationScore = fitness.getScore().score(samples); trainScore = current.getFitnessValue() - 1.0; currentScore = mean.evaluate(new double[] { trainScore, validationScore }) - sd.evaluate(new double[] { trainScore, validationScore }); if (currentScore > bestScore) { best[1] = current; bestScore = currentScore; } } stream.register(new gpsiDoubleCsvIOElement(fitnessCurves, curveLabels, "curves/f" + (f + 1) + ".csv")); System.out.println("Best solution for trainning: " + gp.getAllTimeBest().toStringNorm(0)); stream.register(new gpsiStringIOElement(gp.getAllTimeBest().toStringNorm(0), "programs/f" + (f + 1) + "train.program")); if (validation > 0) { System.out.println("Best solution for trainning and validation: " + best[1].toStringNorm(0)); stream.register(new gpsiStringIOElement(best[1].toStringNorm(0), "programs/f" + (f + 1) + "train_val.program")); } descriptor = new gpsiScalarSpectralIndexDescriptor(new gpsiJGAPVoxelCombiner(fitness.getB(), best[0])); gpsi1NNToMomentScalarClassificationAlgorithm classificationAlgorithm = new gpsi1NNToMomentScalarClassificationAlgorithm( new Mean()); gpsiClassifier classifier = new gpsiClassifier(descriptor, classificationAlgorithm); classifier.fit(this.rawDataset.getTrainingEntities()); classifier.predict(this.rawDataset.getTestEntities()); int[][] confusionMatrix = classifier.getConfusionMatrix(); stream.register(new gpsiIntegerCsvIOElement(confusionMatrix, null, "confusion_matrices/f" + (f + 1) + "_train.csv")); if (validation > 0) { descriptor = new gpsiScalarSpectralIndexDescriptor( new gpsiJGAPVoxelCombiner(fitness.getB(), best[1])); classificationAlgorithm = new gpsi1NNToMomentScalarClassificationAlgorithm(new Mean()); classifier = new gpsiClassifier(descriptor, classificationAlgorithm); classifier.fit(this.rawDataset.getTrainingEntities()); classifier.predict(this.rawDataset.getTestEntities()); confusionMatrix = classifier.getConfusionMatrix(); stream.register(new gpsiIntegerCsvIOElement(confusionMatrix, null, "confusion_matrices/f" + (f + 1) + "_train_val.csv")); } } }
From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java
/** * Tests the calculations needed for mean/mean and mean/sigma scale linking. * Item parameters and true values obtained from example 2 from the STUIRT * program by Michael Kolen and colleagues. Note that the original example * used teh PARSCALE version of item parameters. These were converted to * ICL type parameters by subtracting a step from the item difficulty. * *///from w w w .j a va 2s .c om @Test public void mixedFormatDescriptiveStatisticsTestFormX() { System.out.println("Mixed format descriptive statistics test Form X"); ItemResponseModel[] irm = new ItemResponseModel[17]; irm[0] = new Irm3PL(0.751335, -0.897391, 0.244001, 1.7); irm[1] = new Irm3PL(0.955947, -0.811477, 0.242883, 1.7); irm[2] = new Irm3PL(0.497206, -0.858681, 0.260893, 1.7); irm[3] = new Irm3PL(0.724000, -0.123911, 0.243497, 1.7); irm[4] = new Irm3PL(0.865200, 0.205889, 0.319135, 1.7); irm[5] = new Irm3PL(0.658129, 0.555228, 0.277826, 1.7); irm[6] = new Irm3PL(1.082118, 0.950549, 0.157979, 1.7); irm[7] = new Irm3PL(0.988294, 1.377501, 0.084828, 1.7); irm[8] = new Irm3PL(1.248923, 1.614355, 0.181874, 1.7); irm[9] = new Irm3PL(1.116682, 2.353932, 0.246856, 1.7); irm[10] = new Irm3PL(0.438171, 3.217965, 0.309243, 1.7); irm[11] = new Irm3PL(1.082206, 4.441864, 0.192339, 1.7); double[] step1 = { 0, -1.09327, 1.101266 }; irm[12] = new IrmGPCM(0.269994, step1, 1.7); double[] step2 = { 0, 1.526148, 1.739176 }; irm[13] = new IrmGPCM(0.972506, step2, 1.7); double[] step3 = { 0, 1.362356, 5.566958 }; irm[14] = new IrmGPCM(0.378812, step3, 1.7); double[] step4 = { 0, 1.486566, -0.071229, 1.614823 }; irm[15] = new IrmGPCM(0.537706, step4, 1.7); double[] step5 = { 0, 1.425413, 2.630705, 3.242696 }; irm[16] = new IrmGPCM(0.554506, step5, 1.7); Mean discriminationX = new Mean(); Mean difficultyX = new Mean(); Mean difficultyMeanX = new Mean(); StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1. for (int j = 0; j < 17; j++) { irm[j].incrementMeanMean(discriminationX, difficultyX); irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX); } // System.out.println("Mean/mean descriptive statistics for Form X"); // System.out.println("a-mean: " + discriminationX.getResult()); // System.out.println("b-mean: " + difficultyX.getResult()); assertEquals("Mean/mean check: discrimination mean", 0.7719, Precision.round(discriminationX.getResult(), 4), 1e-5); assertEquals("Mean/mean check: difficulty mean", 1.3566, Precision.round(difficultyX.getResult(), 4), 1e-5); assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3); // System.out.println(); // System.out.println("Mean/sigma descriptive statistics for Form X"); // System.out.println("b-mean: " + difficultyMeanX.getResult()); // System.out.println("b-sd: " + difficultySdX.getResult()); // System.out.println("b-N: " + difficultyMeanX.getN() + ", " + difficultySdX.getN()); assertEquals("Mean/sigma check: difficulty mean", 1.3566, Precision.round(difficultyMeanX.getResult(), 4), 1e-5); assertEquals("Mean/sigma check: difficulty sd", 1.6372, Precision.round(difficultySdX.getResult(), 4), 1e-5); assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(), 1e-3); assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3); }
From source file:com.cloudera.oryx.rdf.computation.RDFDistributedGenerationRunner.java
private static void updateMeanImportances(Map<String, Mean> columnNameToMeanImportance, Model model) { for (MiningField field : model.getMiningSchema().getMiningFields()) { Double importance = field.getImportance(); if (importance != null) { String fieldName = field.getName().getValue(); Mean mean = columnNameToMeanImportance.get(fieldName); if (mean == null) { mean = new Mean(); columnNameToMeanImportance.put(fieldName, mean); }/*from w ww .jav a2 s . c om*/ mean.increment(importance); } } }
From source file:gedi.util.math.stat.distributions.NormalMixtureDistribution.java
public static NormalMixtureDistribution fit(NormalMixtureDistribution initialMixture, double[] data, final int maxIterations, final double threshold) { if (maxIterations < 1) { throw new NotStrictlyPositiveException(maxIterations); }/* w w w. ja v a2 s .c o m*/ if (threshold < Double.MIN_VALUE) { throw new NotStrictlyPositiveException(threshold); } final int n = data.length; final int k = initialMixture.getNumComponents(); if (k == 1) return new NormalMixtureDistribution(new NormalDistribution[] { new NormalDistribution(new Mean().evaluate(data), new StandardDeviation().evaluate(data)) }, new double[] { 1 }); int numIterations = 0; double previousLogLikelihood = 0d; double logLikelihood = Double.NEGATIVE_INFINITY; // Initialize model to fit to initial mixture. NormalMixtureDistribution fittedModel = new NormalMixtureDistribution(initialMixture.components, initialMixture.mixing); while (numIterations++ <= maxIterations && FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) { previousLogLikelihood = logLikelihood; logLikelihood = 0d; // E-step: compute the data dependent parameters of the expectation // function. // The percentage of row's total density between a row and a // component final double[][] gamma = new double[n][k]; // Sum of gamma for each component final double[] gammaSums = new double[k]; for (int i = 0; i < n; i++) { final double rowDensity = fittedModel.density(data[i]); logLikelihood += FastMath.log(rowDensity); for (int j = 0; j < k; j++) { gamma[i][j] = fittedModel.mixing[j] * fittedModel.components[j].density(data[i]) / rowDensity; gammaSums[j] += gamma[i][j]; } } logLikelihood /= n; // System.out.println(logLikelihood); // M-step: compute the new parameters based on the expectation // function. final double[] newWeights = gammaSums.clone(); ArrayUtils.mult(newWeights, 1.0 / n); NormalDistribution[] comp = new NormalDistribution[k]; for (int j = 0; j < k; j++) { double m = 0; for (int i = 0; i < n; i++) { m += gamma[i][j] * data[i]; } m /= gammaSums[j]; double var = 0; for (int i = 0; i < n; i++) { double d = m - data[i]; var += gamma[i][j] * d * d; } var /= gammaSums[j]; comp[j] = new NormalDistribution(m, Math.sqrt(var)); } // Update current model fittedModel = new NormalMixtureDistribution(comp, newWeights); } if (FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) { // Did not converge before the maximum number of iterations throw new ConvergenceException(); } return fittedModel; }
From source file:com.itemanalysis.psychometrics.irt.equating.RobustZEquatingTest.java
private void testB() { double[] bDiff = new double[nB]; zb = new RobustZ[nB]; for (int i = 0; i < nB; i++) { bDiff[i] = bY[i] - slope * bX[i]; }//from w ww . ja v a 2 s. c o m double median = percentile.evaluate(bDiff, 50); double q3 = percentile.evaluate(bDiff, 75); double q1 = percentile.evaluate(bDiff, 25); double iqr = q3 - q1; Mean mean = new Mean(); for (int i = 0; i < nB; i++) { zb[i] = new RobustZ(bDiff[i], median, iqr); if (!zb[i].significant(significanceLevel)) { mean.increment(bDiff[i]); } } intercept = mean.getResult(); }
From source file:com.cloudera.oryx.als.computation.iterate.row.RowStep.java
@Override protected MRPipeline createPipeline() throws IOException { IterationState iterationState = getIterationState(); String iterationKey = iterationState.getIterationKey(); boolean x = iterationState.isComputingX(); int lastIteration = iterationState.getIteration() - 1; Store store = Store.get();//w w w .j a va 2 s .c o m JobStepConfig config = getConfig(); String instanceDir = config.getInstanceDir(); int generationID = config.getGenerationID(); if (store.exists(Namespaces.getInstanceGenerationPrefix(instanceDir, generationID) + "X/", false)) { // Actually, looks like whole computation of X/Y finished -- just proceed return null; } // Take the opportunity to clean out iteration before last, if computing X if (x) { String lastLastIterationKey = Namespaces.getIterationsPrefix(instanceDir, generationID) + (lastIteration - 1) + '/'; if (store.exists(lastLastIterationKey, false)) { log.info("Deleting old iteration data from {}", lastLastIterationKey); store.recursiveDelete(lastLastIterationKey); } } String yKey; if (x) { yKey = Namespaces.getIterationsPrefix(instanceDir, generationID) + lastIteration + "/Y/"; } else { yKey = iterationKey + "X/"; } String xKey = iterationKey + (x ? "X/" : "Y/"); String tempKey = Namespaces.getTempPrefix(instanceDir, generationID); String rKey = tempKey + (x ? "userVectors/" : "itemVectors/"); if (!validOutputPath(xKey)) { return null; } MRPipeline p = createBasicPipeline(RowReduceFn.class); Configuration conf = p.getConfiguration(); conf.set(Y_KEY_KEY, yKey); String popularKey = tempKey + (x ? "popularItemsByUserPartition/" : "popularUsersByItemPartition/"); conf.set(POPULAR_KEY, popularKey); String testPrefix = Namespaces.getInstanceGenerationPrefix(instanceDir, generationID) + "test/"; conf.set(MAP_KEY, testPrefix); YState yState = new YState(ALSTypes.DENSE_ROW_MATRIX); // Shared Y-Matrix state GroupingOptions opts = groupingOptions(); PCollection<MatrixRow> matrix = PTables.asPTable(p.read(input(rKey, ALSTypes.SPARSE_ROW_MATRIX))) .groupByKey(opts).parallelDo("rowReduce", new RowReduceFn(yState), ALSTypes.DENSE_ROW_MATRIX) .write(output(xKey)); if (!x) { matrix.parallelDo("asPair", MatrixRow.AS_PAIR, Avros.tableOf(Avros.longs(), ALSTypes.FLOAT_ARRAY)) .parallelDo("convergenceSample", new ConvergenceSampleFn(yState), Avros.strings()) .write(compressedTextOutput(p.getConfiguration(), iterationKey + "Yconvergence")); } if (x && ConfigUtils.getDefaultConfig().getDouble("model.test-set-fraction") > 0.0 && store.exists(testPrefix, false)) { PCollection<Double> aps = matrix .parallelDo("asPair", MatrixRow.AS_PAIR, Avros.tableOf(Avros.longs(), ALSTypes.FLOAT_ARRAY)) .parallelDo("computeAP", new ComputeUserAPFn(yState), Avros.doubles()); Mean meanAveragePrecision = new Mean(); for (double ap : aps.materialize()) { meanAveragePrecision.increment(ap); } log.info("Mean average precision: {}", meanAveragePrecision.getResult()); File tempMAPFile = File.createTempFile("MAP", ".txt"); tempMAPFile.deleteOnExit(); Files.write(Double.toString(meanAveragePrecision.getResult()), tempMAPFile, StandardCharsets.UTF_8); store.upload(iterationKey + "MAP", tempMAPFile, false); IOUtils.delete(tempMAPFile); } return p; }
From source file:com.itemanalysis.psychometrics.histogram.Histogram.java
private void createHistogram(double[] x) { n = x.length;// w ww .jav a2 s .c o m Min min = new Min(); Max max = new Max(); Mean mean = new Mean(); StandardDeviation sd = new StandardDeviation(); for (int i = 0; i < x.length; i++) { min.increment(x[i]); max.increment(x[i]); mean.increment(x[i]); sd.increment(x[i]); } double range = max.getResult() - min.getResult(); double lowestBoundary = min.getResult() - range / 1000; double largestBoundary = max.getResult() + range / 1000; if (binCalculationType == BinCalculationType.SCOTT) { binCalc = new ScottBinCalculation(n, min.getResult(), max.getResult(), sd.getResult()); } else if (binCalculationType == BinCalculationType.FREEDMAN_DIACONIS) { Percentile percentile = new Percentile(); double q1 = percentile.evaluate(x, 25); double q3 = percentile.evaluate(x, 75); binCalc = new FreedmanDiaconisBinCalculation(n, min.getResult(), max.getResult(), q1, q3); } else if (binCalculationType == BinCalculationType.STURGES) { binCalc = new SturgesBinCalculation(n, min.getResult(), max.getResult()); } numberOfBins = binCalc.numberOfBins(); binWidth = binCalc.binWidth(); //create bins createBins(lowestBoundary, largestBoundary); //count observations in each bin for (int i = 0; i < n; i++) { for (Bin b : bins) { b.increment(x[i]); } } }
From source file:gamlss.distributions.PE.java
/** Calculate and set initial value of sigma. * @param y - vector of values of response variable * @return vector of initial values of sigma *//* w ww. ja v a 2 s .c o m*/ private ArrayRealVector setSigmaInitial(final ArrayRealVector y) { //sigma.initial = expression( sigma <- (abs(y-mean(y))+sd(y))/2 ) final double mean = new Mean().evaluate(y.getDataRef()); final double sd = new StandardDeviation().evaluate(y.getDataRef()); size = y.getDimension(); double[] out = new double[size]; for (int i = 0; i < size; i++) { out[i] = (FastMath.abs(y.getEntry(i) - mean) + sd) / 2; } return new ArrayRealVector(out, false); }
From source file:gamlss.distributions.BCPE.java
/** Calculates initial value of mu, by assumption these * values lie between observed data and the trend line. * @param y - vector of values of response variable * @return a vector of initial values of mu *//* w w w . j av a 2 s . com*/ private ArrayRealVector setMuInitial(final ArrayRealVector y) { //mu.initial = expression(mu <- (y+mean(y))/2) size = y.getDimension(); double[] out = new double[size]; Mean mean = new Mean(); double yMean = mean.evaluate(y.getDataRef()); for (int i = 0; i < size; i++) { out[i] = (y.getEntry(i) + yMean) / 2; } return new ArrayRealVector(out, false); }
From source file:edu.umd.umiacs.clip.tools.classifier.LibSVMUtils.java
public static Map<Integer, Pair<Double, Double>> learnZscoringModel(List<String> training) { return training.stream().map(LibSVMUtils::split).map(Triple::getMiddle).flatMap(List::stream) .collect(groupingBy(Pair::getKey, ConcurrentHashMap::new, reducing(new ArrayList<Float>(), pair -> asList(pair.getRight()), (p1, p2) -> Stream.of(p1, p2).flatMap(List::stream).collect(toList())))) .entrySet().stream()/*from www .j a v a 2 s. co m*/ .map(entry -> Pair.of(entry.getKey(), entry.getValue().stream().mapToDouble(f -> f).toArray())) .collect(toMap(Entry::getKey, entry -> Pair.of(new Mean().evaluate(entry.getValue()), new StandardDeviation().evaluate(entry.getValue())))); }