Example usage for org.apache.commons.math3.stat.descriptive.moment Mean Mean

List of usage examples for org.apache.commons.math3.stat.descriptive.moment Mean Mean

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive.moment Mean Mean.

Prototype

public Mean() 

Source Link

Document

Constructs a Mean.

Usage

From source file:br.unicamp.ic.recod.gpsi.applications.gpsiJGAPSelectorEvolver.java

@Override
public void run() throws InvalidConfigurationException, InterruptedException, Exception {

    int i, j, k;// ww  w.  j  ava 2  s  .c  o m
    byte nFolds = 5;
    gpsiDescriptor descriptor;
    gpsiMLDataset mlDataset;
    gpsiVoxelRawDataset dataset;
    GPGenotype gp;
    double[][] fitnessCurves;
    String[] curveLabels = new String[] { "train", "train_val", "val" };
    double bestScore, currentScore;
    IGPProgram current;
    IGPProgram[] elite = null;

    Mean mean = new Mean();
    StandardDeviation sd = new StandardDeviation();

    double validationScore, trainScore;
    double[][][] samples;

    for (byte f = 0; f < nFolds; f++) {

        System.out.println("\nRun " + (f + 1) + "\n");

        rawDataset.assignFolds(new byte[] { f, (byte) ((f + 1) % nFolds), (byte) ((f + 2) % nFolds) },
                new byte[] { (byte) ((f + 3) % nFolds) }, new byte[] { (byte) ((f + 4) % nFolds) });
        dataset = (gpsiVoxelRawDataset) rawDataset;
        gp = create(config, dataset.getnBands(), fitness, null);

        // 0: train, 1: train_val, 2: val
        fitnessCurves = new double[super.numGenerations + numGenerationsSel][];
        bestScore = -Double.MAX_VALUE;

        if (validation > 0)
            elite = new IGPProgram[validation];

        for (int generation = 0; generation < numGenerationsSel; generation++) {

            gp.evolve(1);
            gp.getGPPopulation().sortByFitness();

            if (validation > 0)
                elite = mergeElite(elite, gp.getGPPopulation().getGPPrograms(), generation);

            if (this.dumpGens) {

                double[][][] dists;
                descriptor = new gpsiScalarSpectralIndexDescriptor(
                        new gpsiJGAPVoxelCombiner(fitness.getB(), gp.getGPPopulation().getGPPrograms()[0]));
                mlDataset = new gpsiMLDataset(descriptor);
                mlDataset.loadWholeDataset(rawDataset, true);

                dists = (new gpsiWholeSampler()).sample(mlDataset.getTrainingEntities(), this.classLabels);
                for (i = 0; i < this.classLabels.length; i++) {
                    stream.register(new gpsiDoubleCsvIOElement(dists[i], null,
                            "gens/f" + (f + 1) + "/" + classLabels[i] + "/" + (generation + 1) + ".csv"));
                }

            }

            fitnessCurves[generation] = new double[] { gp.getAllTimeBest().getFitnessValue() - 1.0 };
            System.out.printf("%3dg: %.4f\n", generation + 1, fitnessCurves[generation][0]);

        }

        HashSet<Integer> variables = new HashSet<>();
        for (IGPProgram ind : elite) {
            for (CommandGene node : ind.getChromosome(0).getFunctions()) {
                if (node instanceof Variable) {
                    variables.add(Integer.parseInt(node.getName().replace('b', '0')));
                }
            }
        }

        int[] vars = variables.stream().mapToInt(p -> p).toArray();
        Arrays.sort(vars);
        stream.register(new gpsiStringIOElement(Arrays.toString(vars), "selected_bands/f" + (f + 1) + ".out"));

        gp = create(config, dataset.getnBands(), fitness, vars);
        gp.addFittestProgram(elite[0]);

        for (int generation = numGenerationsSel; generation < numGenerationsSel
                + super.numGenerations; generation++) {

            gp.evolve(1);
            gp.getGPPopulation().sortByFitness();

            if (validation > 0)
                elite = mergeElite(elite, gp.getGPPopulation().getGPPrograms(), generation);

            if (this.dumpGens) {

                double[][][] dists;
                descriptor = new gpsiScalarSpectralIndexDescriptor(
                        new gpsiJGAPVoxelCombiner(fitness.getB(), gp.getGPPopulation().getGPPrograms()[0]));
                mlDataset = new gpsiMLDataset(descriptor);
                mlDataset.loadWholeDataset(rawDataset, true);

                dists = (new gpsiWholeSampler()).sample(mlDataset.getTrainingEntities(), this.classLabels);
                for (i = 0; i < this.classLabels.length; i++) {
                    stream.register(new gpsiDoubleCsvIOElement(dists[i], null,
                            "gens/f" + (f + 1) + "/" + classLabels[i] + "/" + (generation + 1) + ".csv"));
                }

            }

            fitnessCurves[generation] = new double[] { gp.getAllTimeBest().getFitnessValue() - 1.0 };
            System.out.printf("%3dg: %.4f\n", generation + 1, fitnessCurves[generation][0]);

        }

        best = new IGPProgram[2];
        best[0] = gp.getAllTimeBest();
        for (i = 0; i < super.validation; i++) {

            current = elite[i];

            descriptor = new gpsiScalarSpectralIndexDescriptor(
                    new gpsiJGAPVoxelCombiner(fitness.getB(), current));
            mlDataset = new gpsiMLDataset(descriptor);
            mlDataset.loadWholeDataset(rawDataset, true);

            samples = this.fitness.getSampler().sample(mlDataset.getValidationEntities(), classLabels);

            validationScore = fitness.getScore().score(samples);
            trainScore = current.getFitnessValue() - 1.0;

            currentScore = mean.evaluate(new double[] { trainScore, validationScore })
                    - sd.evaluate(new double[] { trainScore, validationScore });

            if (currentScore > bestScore) {
                best[1] = current;
                bestScore = currentScore;
            }

        }

        stream.register(new gpsiDoubleCsvIOElement(fitnessCurves, curveLabels, "curves/f" + (f + 1) + ".csv"));

        System.out.println("Best solution for trainning: " + gp.getAllTimeBest().toStringNorm(0));
        stream.register(new gpsiStringIOElement(gp.getAllTimeBest().toStringNorm(0),
                "programs/f" + (f + 1) + "train.program"));

        if (validation > 0) {
            System.out.println("Best solution for trainning and validation: " + best[1].toStringNorm(0));
            stream.register(new gpsiStringIOElement(best[1].toStringNorm(0),
                    "programs/f" + (f + 1) + "train_val.program"));
        }

        descriptor = new gpsiScalarSpectralIndexDescriptor(new gpsiJGAPVoxelCombiner(fitness.getB(), best[0]));
        gpsi1NNToMomentScalarClassificationAlgorithm classificationAlgorithm = new gpsi1NNToMomentScalarClassificationAlgorithm(
                new Mean());
        gpsiClassifier classifier = new gpsiClassifier(descriptor, classificationAlgorithm);

        classifier.fit(this.rawDataset.getTrainingEntities());
        classifier.predict(this.rawDataset.getTestEntities());

        int[][] confusionMatrix = classifier.getConfusionMatrix();

        stream.register(new gpsiIntegerCsvIOElement(confusionMatrix, null,
                "confusion_matrices/f" + (f + 1) + "_train.csv"));

        if (validation > 0) {
            descriptor = new gpsiScalarSpectralIndexDescriptor(
                    new gpsiJGAPVoxelCombiner(fitness.getB(), best[1]));
            classificationAlgorithm = new gpsi1NNToMomentScalarClassificationAlgorithm(new Mean());
            classifier = new gpsiClassifier(descriptor, classificationAlgorithm);

            classifier.fit(this.rawDataset.getTrainingEntities());
            classifier.predict(this.rawDataset.getTestEntities());

            confusionMatrix = classifier.getConfusionMatrix();

            stream.register(new gpsiIntegerCsvIOElement(confusionMatrix, null,
                    "confusion_matrices/f" + (f + 1) + "_train_val.csv"));

        }

    }

}

From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java

/**
 * Tests the calculations needed for mean/mean and mean/sigma scale linking.
 * Item parameters and true values obtained from example 2 from the STUIRT
 * program by Michael Kolen and colleagues. Note that the original example
 * used teh PARSCALE version of item parameters. These were converted to
 * ICL type parameters by subtracting a step from the item difficulty.
 *
 *///from   w w  w  .j a  va 2s  .c om
@Test
public void mixedFormatDescriptiveStatisticsTestFormX() {
    System.out.println("Mixed format descriptive statistics test Form X");

    ItemResponseModel[] irm = new ItemResponseModel[17];

    irm[0] = new Irm3PL(0.751335, -0.897391, 0.244001, 1.7);
    irm[1] = new Irm3PL(0.955947, -0.811477, 0.242883, 1.7);
    irm[2] = new Irm3PL(0.497206, -0.858681, 0.260893, 1.7);
    irm[3] = new Irm3PL(0.724000, -0.123911, 0.243497, 1.7);
    irm[4] = new Irm3PL(0.865200, 0.205889, 0.319135, 1.7);
    irm[5] = new Irm3PL(0.658129, 0.555228, 0.277826, 1.7);
    irm[6] = new Irm3PL(1.082118, 0.950549, 0.157979, 1.7);
    irm[7] = new Irm3PL(0.988294, 1.377501, 0.084828, 1.7);
    irm[8] = new Irm3PL(1.248923, 1.614355, 0.181874, 1.7);
    irm[9] = new Irm3PL(1.116682, 2.353932, 0.246856, 1.7);
    irm[10] = new Irm3PL(0.438171, 3.217965, 0.309243, 1.7);
    irm[11] = new Irm3PL(1.082206, 4.441864, 0.192339, 1.7);

    double[] step1 = { 0, -1.09327, 1.101266 };
    irm[12] = new IrmGPCM(0.269994, step1, 1.7);

    double[] step2 = { 0, 1.526148, 1.739176 };
    irm[13] = new IrmGPCM(0.972506, step2, 1.7);

    double[] step3 = { 0, 1.362356, 5.566958 };
    irm[14] = new IrmGPCM(0.378812, step3, 1.7);

    double[] step4 = { 0, 1.486566, -0.071229, 1.614823 };
    irm[15] = new IrmGPCM(0.537706, step4, 1.7);

    double[] step5 = { 0, 1.425413, 2.630705, 3.242696 };
    irm[16] = new IrmGPCM(0.554506, step5, 1.7);

    Mean discriminationX = new Mean();
    Mean difficultyX = new Mean();

    Mean difficultyMeanX = new Mean();
    StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1.

    for (int j = 0; j < 17; j++) {
        irm[j].incrementMeanMean(discriminationX, difficultyX);
        irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX);
    }

    //        System.out.println("Mean/mean descriptive statistics for Form X");
    //        System.out.println("a-mean: " + discriminationX.getResult());
    //        System.out.println("b-mean: " + difficultyX.getResult());

    assertEquals("Mean/mean check: discrimination mean", 0.7719,
            Precision.round(discriminationX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: difficulty mean", 1.3566, Precision.round(difficultyX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3);

    //        System.out.println();
    //        System.out.println("Mean/sigma descriptive statistics for Form X");
    //        System.out.println("b-mean: " + difficultyMeanX.getResult());
    //        System.out.println("b-sd: " + difficultySdX.getResult());
    //        System.out.println("b-N: " + difficultyMeanX.getN() + ",   " + difficultySdX.getN());

    assertEquals("Mean/sigma check: difficulty mean", 1.3566, Precision.round(difficultyMeanX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: difficulty sd", 1.6372, Precision.round(difficultySdX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(),
            1e-3);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3);

}

From source file:com.cloudera.oryx.rdf.computation.RDFDistributedGenerationRunner.java

private static void updateMeanImportances(Map<String, Mean> columnNameToMeanImportance, Model model) {
    for (MiningField field : model.getMiningSchema().getMiningFields()) {
        Double importance = field.getImportance();
        if (importance != null) {
            String fieldName = field.getName().getValue();
            Mean mean = columnNameToMeanImportance.get(fieldName);
            if (mean == null) {
                mean = new Mean();
                columnNameToMeanImportance.put(fieldName, mean);
            }/*from   w  ww .jav  a2 s  . c  om*/
            mean.increment(importance);
        }
    }
}

From source file:gedi.util.math.stat.distributions.NormalMixtureDistribution.java

public static NormalMixtureDistribution fit(NormalMixtureDistribution initialMixture, double[] data,
        final int maxIterations, final double threshold) {

    if (maxIterations < 1) {
        throw new NotStrictlyPositiveException(maxIterations);
    }/* w w w.  ja v a2 s  .c  o  m*/

    if (threshold < Double.MIN_VALUE) {
        throw new NotStrictlyPositiveException(threshold);
    }

    final int n = data.length;

    final int k = initialMixture.getNumComponents();

    if (k == 1)
        return new NormalMixtureDistribution(new NormalDistribution[] {
                new NormalDistribution(new Mean().evaluate(data), new StandardDeviation().evaluate(data)) },
                new double[] { 1 });

    int numIterations = 0;
    double previousLogLikelihood = 0d;

    double logLikelihood = Double.NEGATIVE_INFINITY;

    // Initialize model to fit to initial mixture.
    NormalMixtureDistribution fittedModel = new NormalMixtureDistribution(initialMixture.components,
            initialMixture.mixing);

    while (numIterations++ <= maxIterations
            && FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) {
        previousLogLikelihood = logLikelihood;
        logLikelihood = 0d;

        // E-step: compute the data dependent parameters of the expectation
        // function.
        // The percentage of row's total density between a row and a
        // component
        final double[][] gamma = new double[n][k];
        // Sum of gamma for each component
        final double[] gammaSums = new double[k];

        for (int i = 0; i < n; i++) {
            final double rowDensity = fittedModel.density(data[i]);
            logLikelihood += FastMath.log(rowDensity);

            for (int j = 0; j < k; j++) {
                gamma[i][j] = fittedModel.mixing[j] * fittedModel.components[j].density(data[i]) / rowDensity;
                gammaSums[j] += gamma[i][j];
            }
        }
        logLikelihood /= n;
        //         System.out.println(logLikelihood);

        // M-step: compute the new parameters based on the expectation
        // function.
        final double[] newWeights = gammaSums.clone();
        ArrayUtils.mult(newWeights, 1.0 / n);

        NormalDistribution[] comp = new NormalDistribution[k];
        for (int j = 0; j < k; j++) {
            double m = 0;
            for (int i = 0; i < n; i++) {
                m += gamma[i][j] * data[i];
            }
            m /= gammaSums[j];

            double var = 0;
            for (int i = 0; i < n; i++) {
                double d = m - data[i];
                var += gamma[i][j] * d * d;
            }
            var /= gammaSums[j];

            comp[j] = new NormalDistribution(m, Math.sqrt(var));
        }

        // Update current model
        fittedModel = new NormalMixtureDistribution(comp, newWeights);
    }

    if (FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) {
        // Did not converge before the maximum number of iterations
        throw new ConvergenceException();
    }

    return fittedModel;
}

From source file:com.itemanalysis.psychometrics.irt.equating.RobustZEquatingTest.java

private void testB() {
    double[] bDiff = new double[nB];
    zb = new RobustZ[nB];

    for (int i = 0; i < nB; i++) {
        bDiff[i] = bY[i] - slope * bX[i];
    }//from  w ww  . ja  v a 2 s. c  o  m

    double median = percentile.evaluate(bDiff, 50);
    double q3 = percentile.evaluate(bDiff, 75);
    double q1 = percentile.evaluate(bDiff, 25);
    double iqr = q3 - q1;
    Mean mean = new Mean();

    for (int i = 0; i < nB; i++) {
        zb[i] = new RobustZ(bDiff[i], median, iqr);
        if (!zb[i].significant(significanceLevel)) {
            mean.increment(bDiff[i]);
        }
    }
    intercept = mean.getResult();
}

From source file:com.cloudera.oryx.als.computation.iterate.row.RowStep.java

@Override
protected MRPipeline createPipeline() throws IOException {

    IterationState iterationState = getIterationState();
    String iterationKey = iterationState.getIterationKey();
    boolean x = iterationState.isComputingX();
    int lastIteration = iterationState.getIteration() - 1;
    Store store = Store.get();//w  w w .j  a  va  2 s .c o m

    JobStepConfig config = getConfig();
    String instanceDir = config.getInstanceDir();
    int generationID = config.getGenerationID();

    if (store.exists(Namespaces.getInstanceGenerationPrefix(instanceDir, generationID) + "X/", false)) {
        // Actually, looks like whole computation of X/Y finished -- just proceed
        return null;
    }

    // Take the opportunity to clean out iteration before last, if computing X
    if (x) {
        String lastLastIterationKey = Namespaces.getIterationsPrefix(instanceDir, generationID)
                + (lastIteration - 1) + '/';
        if (store.exists(lastLastIterationKey, false)) {
            log.info("Deleting old iteration data from {}", lastLastIterationKey);
            store.recursiveDelete(lastLastIterationKey);
        }
    }

    String yKey;
    if (x) {
        yKey = Namespaces.getIterationsPrefix(instanceDir, generationID) + lastIteration + "/Y/";
    } else {
        yKey = iterationKey + "X/";
    }

    String xKey = iterationKey + (x ? "X/" : "Y/");
    String tempKey = Namespaces.getTempPrefix(instanceDir, generationID);
    String rKey = tempKey + (x ? "userVectors/" : "itemVectors/");

    if (!validOutputPath(xKey)) {
        return null;
    }

    MRPipeline p = createBasicPipeline(RowReduceFn.class);
    Configuration conf = p.getConfiguration();
    conf.set(Y_KEY_KEY, yKey);

    String popularKey = tempKey + (x ? "popularItemsByUserPartition/" : "popularUsersByItemPartition/");
    conf.set(POPULAR_KEY, popularKey);

    String testPrefix = Namespaces.getInstanceGenerationPrefix(instanceDir, generationID) + "test/";
    conf.set(MAP_KEY, testPrefix);

    YState yState = new YState(ALSTypes.DENSE_ROW_MATRIX); // Shared Y-Matrix state

    GroupingOptions opts = groupingOptions();
    PCollection<MatrixRow> matrix = PTables.asPTable(p.read(input(rKey, ALSTypes.SPARSE_ROW_MATRIX)))
            .groupByKey(opts).parallelDo("rowReduce", new RowReduceFn(yState), ALSTypes.DENSE_ROW_MATRIX)
            .write(output(xKey));

    if (!x) {
        matrix.parallelDo("asPair", MatrixRow.AS_PAIR, Avros.tableOf(Avros.longs(), ALSTypes.FLOAT_ARRAY))
                .parallelDo("convergenceSample", new ConvergenceSampleFn(yState), Avros.strings())
                .write(compressedTextOutput(p.getConfiguration(), iterationKey + "Yconvergence"));
    }

    if (x && ConfigUtils.getDefaultConfig().getDouble("model.test-set-fraction") > 0.0
            && store.exists(testPrefix, false)) {
        PCollection<Double> aps = matrix
                .parallelDo("asPair", MatrixRow.AS_PAIR, Avros.tableOf(Avros.longs(), ALSTypes.FLOAT_ARRAY))
                .parallelDo("computeAP", new ComputeUserAPFn(yState), Avros.doubles());
        Mean meanAveragePrecision = new Mean();
        for (double ap : aps.materialize()) {
            meanAveragePrecision.increment(ap);
        }
        log.info("Mean average precision: {}", meanAveragePrecision.getResult());

        File tempMAPFile = File.createTempFile("MAP", ".txt");
        tempMAPFile.deleteOnExit();
        Files.write(Double.toString(meanAveragePrecision.getResult()), tempMAPFile, StandardCharsets.UTF_8);
        store.upload(iterationKey + "MAP", tempMAPFile, false);
        IOUtils.delete(tempMAPFile);
    }

    return p;
}

From source file:com.itemanalysis.psychometrics.histogram.Histogram.java

private void createHistogram(double[] x) {
    n = x.length;// w ww .jav a2 s  .c  o  m
    Min min = new Min();
    Max max = new Max();
    Mean mean = new Mean();
    StandardDeviation sd = new StandardDeviation();

    for (int i = 0; i < x.length; i++) {
        min.increment(x[i]);
        max.increment(x[i]);
        mean.increment(x[i]);
        sd.increment(x[i]);
    }

    double range = max.getResult() - min.getResult();
    double lowestBoundary = min.getResult() - range / 1000;
    double largestBoundary = max.getResult() + range / 1000;

    if (binCalculationType == BinCalculationType.SCOTT) {
        binCalc = new ScottBinCalculation(n, min.getResult(), max.getResult(), sd.getResult());
    } else if (binCalculationType == BinCalculationType.FREEDMAN_DIACONIS) {
        Percentile percentile = new Percentile();
        double q1 = percentile.evaluate(x, 25);
        double q3 = percentile.evaluate(x, 75);
        binCalc = new FreedmanDiaconisBinCalculation(n, min.getResult(), max.getResult(), q1, q3);
    } else if (binCalculationType == BinCalculationType.STURGES) {
        binCalc = new SturgesBinCalculation(n, min.getResult(), max.getResult());
    }

    numberOfBins = binCalc.numberOfBins();
    binWidth = binCalc.binWidth();

    //create bins
    createBins(lowestBoundary, largestBoundary);

    //count observations in each bin
    for (int i = 0; i < n; i++) {
        for (Bin b : bins) {
            b.increment(x[i]);
        }
    }
}

From source file:gamlss.distributions.PE.java

/** Calculate and set initial value of sigma.
 * @param y - vector of values of response variable
 * @return vector of initial values of sigma
 *//*  w ww.  ja  v  a 2  s .c o  m*/
private ArrayRealVector setSigmaInitial(final ArrayRealVector y) {
    //sigma.initial = expression( sigma <- (abs(y-mean(y))+sd(y))/2 )   
    final double mean = new Mean().evaluate(y.getDataRef());
    final double sd = new StandardDeviation().evaluate(y.getDataRef());
    size = y.getDimension();
    double[] out = new double[size];
    for (int i = 0; i < size; i++) {
        out[i] = (FastMath.abs(y.getEntry(i) - mean) + sd) / 2;
    }
    return new ArrayRealVector(out, false);
}

From source file:gamlss.distributions.BCPE.java

/**  Calculates initial value of mu, by assumption these 
 * values lie between observed data and the trend line.
 * @param y - vector of values of response variable
 * @return  a vector of initial values of mu
 *//*  w w  w  .  j av a 2  s . com*/
private ArrayRealVector setMuInitial(final ArrayRealVector y) {
    //mu.initial =  expression(mu <- (y+mean(y))/2)
    size = y.getDimension();
    double[] out = new double[size];
    Mean mean = new Mean();
    double yMean = mean.evaluate(y.getDataRef());
    for (int i = 0; i < size; i++) {
        out[i] = (y.getEntry(i) + yMean) / 2;
    }
    return new ArrayRealVector(out, false);
}

From source file:edu.umd.umiacs.clip.tools.classifier.LibSVMUtils.java

public static Map<Integer, Pair<Double, Double>> learnZscoringModel(List<String> training) {
    return training.stream().map(LibSVMUtils::split).map(Triple::getMiddle).flatMap(List::stream)
            .collect(groupingBy(Pair::getKey, ConcurrentHashMap::new,
                    reducing(new ArrayList<Float>(), pair -> asList(pair.getRight()),
                            (p1, p2) -> Stream.of(p1, p2).flatMap(List::stream).collect(toList()))))
            .entrySet().stream()/*from  www  .j a  v  a 2 s.  co m*/
            .map(entry -> Pair.of(entry.getKey(), entry.getValue().stream().mapToDouble(f -> f).toArray()))
            .collect(toMap(Entry::getKey, entry -> Pair.of(new Mean().evaluate(entry.getValue()),
                    new StandardDeviation().evaluate(entry.getValue()))));
}