From source file:br.unicamp.ic.recod.gpsi.applications.gpsiJGAPSelectorEvolver.java

public void run() throws InvalidConfigurationException, InterruptedException, Exception {

    int i, j, k;// ww  w.  j  ava 2  s  .c  o m
    byte nFolds = 5;
    gpsiDescriptor descriptor;
    gpsiMLDataset mlDataset;
    gpsiVoxelRawDataset dataset;
    GPGenotype gp;
    double[][] fitnessCurves;
    String[] curveLabels = new String[] { "train", "train_val", "val" };
    double bestScore, currentScore;
    IGPProgram current;
    IGPProgram[] elite = null;

    Mean mean = new Mean();
    StandardDeviation sd = new StandardDeviation();

    double validationScore, trainScore;
    double[][][] samples;

    for (byte f = 0; f < nFolds; f++) {

        System.out.println("\nRun " + (f + 1) + "\n");

        rawDataset.assignFolds(new byte[] { f, (byte) ((f + 1) % nFolds), (byte) ((f + 2) % nFolds) },
                new byte[] { (byte) ((f + 3) % nFolds) }, new byte[] { (byte) ((f + 4) % nFolds) });
        dataset = (gpsiVoxelRawDataset) rawDataset;
        gp = create(config, dataset.getnBands(), fitness, null);

        // 0: train, 1: train_val, 2: val
        fitnessCurves = new double[super.numGenerations + numGenerationsSel][];
        bestScore = -Double.MAX_VALUE;

        if (validation > 0)
            elite = new IGPProgram[validation];

        for (int generation = 0; generation < numGenerationsSel; generation++) {


            if (validation > 0)
                elite = mergeElite(elite, gp.getGPPopulation().getGPPrograms(), generation);

            if (this.dumpGens) {

                double[][][] dists;
                descriptor = new gpsiScalarSpectralIndexDescriptor(
                        new gpsiJGAPVoxelCombiner(fitness.getB(), gp.getGPPopulation().getGPPrograms()[0]));
                mlDataset = new gpsiMLDataset(descriptor);
                mlDataset.loadWholeDataset(rawDataset, true);

                dists = (new gpsiWholeSampler()).sample(mlDataset.getTrainingEntities(), this.classLabels);
                for (i = 0; i < this.classLabels.length; i++) {
                    stream.register(new gpsiDoubleCsvIOElement(dists[i], null,
                            "gens/f" + (f + 1) + "/" + classLabels[i] + "/" + (generation + 1) + ".csv"));


            fitnessCurves[generation] = new double[] { gp.getAllTimeBest().getFitnessValue() - 1.0 };
            System.out.printf("%3dg: %.4f\n", generation + 1, fitnessCurves[generation][0]);


        HashSet<Integer> variables = new HashSet<>();
        for (IGPProgram ind : elite) {
            for (CommandGene node : ind.getChromosome(0).getFunctions()) {
                if (node instanceof Variable) {
                    variables.add(Integer.parseInt(node.getName().replace('b', '0')));

        int[] vars = variables.stream().mapToInt(p -> p).toArray();
        stream.register(new gpsiStringIOElement(Arrays.toString(vars), "selected_bands/f" + (f + 1) + ".out"));

        gp = create(config, dataset.getnBands(), fitness, vars);

        for (int generation = numGenerationsSel; generation < numGenerationsSel
                + super.numGenerations; generation++) {


            if (validation > 0)
                elite = mergeElite(elite, gp.getGPPopulation().getGPPrograms(), generation);

            if (this.dumpGens) {

                double[][][] dists;
                descriptor = new gpsiScalarSpectralIndexDescriptor(
                        new gpsiJGAPVoxelCombiner(fitness.getB(), gp.getGPPopulation().getGPPrograms()[0]));
                mlDataset = new gpsiMLDataset(descriptor);
                mlDataset.loadWholeDataset(rawDataset, true);

                dists = (new gpsiWholeSampler()).sample(mlDataset.getTrainingEntities(), this.classLabels);
                for (i = 0; i < this.classLabels.length; i++) {
                    stream.register(new gpsiDoubleCsvIOElement(dists[i], null,
                            "gens/f" + (f + 1) + "/" + classLabels[i] + "/" + (generation + 1) + ".csv"));


            fitnessCurves[generation] = new double[] { gp.getAllTimeBest().getFitnessValue() - 1.0 };
            System.out.printf("%3dg: %.4f\n", generation + 1, fitnessCurves[generation][0]);


        best = new IGPProgram[2];
        best[0] = gp.getAllTimeBest();
        for (i = 0; i < super.validation; i++) {

            current = elite[i];

            descriptor = new gpsiScalarSpectralIndexDescriptor(
                    new gpsiJGAPVoxelCombiner(fitness.getB(), current));
            mlDataset = new gpsiMLDataset(descriptor);
            mlDataset.loadWholeDataset(rawDataset, true);

            samples = this.fitness.getSampler().sample(mlDataset.getValidationEntities(), classLabels);

            validationScore = fitness.getScore().score(samples);
            trainScore = current.getFitnessValue() - 1.0;

            currentScore = mean.evaluate(new double[] { trainScore, validationScore })
                    - sd.evaluate(new double[] { trainScore, validationScore });

            if (currentScore > bestScore) {
                best[1] = current;
                bestScore = currentScore;


        stream.register(new gpsiDoubleCsvIOElement(fitnessCurves, curveLabels, "curves/f" + (f + 1) + ".csv"));

        System.out.println("Best solution for trainning: " + gp.getAllTimeBest().toStringNorm(0));
        stream.register(new gpsiStringIOElement(gp.getAllTimeBest().toStringNorm(0),
                "programs/f" + (f + 1) + "train.program"));

        if (validation > 0) {
            System.out.println("Best solution for trainning and validation: " + best[1].toStringNorm(0));
            stream.register(new gpsiStringIOElement(best[1].toStringNorm(0),
                    "programs/f" + (f + 1) + "train_val.program"));

        descriptor = new gpsiScalarSpectralIndexDescriptor(new gpsiJGAPVoxelCombiner(fitness.getB(), best[0]));
        gpsi1NNToMomentScalarClassificationAlgorithm classificationAlgorithm = new gpsi1NNToMomentScalarClassificationAlgorithm(
                new Mean());
        gpsiClassifier classifier = new gpsiClassifier(descriptor, classificationAlgorithm);


        int[][] confusionMatrix = classifier.getConfusionMatrix();

        stream.register(new gpsiIntegerCsvIOElement(confusionMatrix, null,
                "confusion_matrices/f" + (f + 1) + "_train.csv"));

        if (validation > 0) {
            descriptor = new gpsiScalarSpectralIndexDescriptor(
                    new gpsiJGAPVoxelCombiner(fitness.getB(), best[1]));
            classificationAlgorithm = new gpsi1NNToMomentScalarClassificationAlgorithm(new Mean());
            classifier = new gpsiClassifier(descriptor, classificationAlgorithm);


            confusionMatrix = classifier.getConfusionMatrix();

            stream.register(new gpsiIntegerCsvIOElement(confusionMatrix, null,
                    "confusion_matrices/f" + (f + 1) + "_train_val.csv"));




From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java

 * Tests the calculations needed for mean/mean and mean/sigma scale linking.
 * Item parameters and true values obtained from example 2 from the STUIRT
 * program by Michael Kolen and colleagues. Note that the original example
 * used teh PARSCALE version of item parameters. These were converted to
 * ICL type parameters by subtracting a step from the item difficulty.
 *///from   w w  w  .j a  va 2s  .c om
public void mixedFormatDescriptiveStatisticsTestFormX() {
    System.out.println("Mixed format descriptive statistics test Form X");

    ItemResponseModel[] irm = new ItemResponseModel[17];

    irm[0] = new Irm3PL(0.751335, -0.897391, 0.244001, 1.7);
    irm[1] = new Irm3PL(0.955947, -0.811477, 0.242883, 1.7);
    irm[2] = new Irm3PL(0.497206, -0.858681, 0.260893, 1.7);
    irm[3] = new Irm3PL(0.724000, -0.123911, 0.243497, 1.7);
    irm[4] = new Irm3PL(0.865200, 0.205889, 0.319135, 1.7);
    irm[5] = new Irm3PL(0.658129, 0.555228, 0.277826, 1.7);
    irm[6] = new Irm3PL(1.082118, 0.950549, 0.157979, 1.7);
    irm[7] = new Irm3PL(0.988294, 1.377501, 0.084828, 1.7);
    irm[8] = new Irm3PL(1.248923, 1.614355, 0.181874, 1.7);
    irm[9] = new Irm3PL(1.116682, 2.353932, 0.246856, 1.7);
    irm[10] = new Irm3PL(0.438171, 3.217965, 0.309243, 1.7);
    irm[11] = new Irm3PL(1.082206, 4.441864, 0.192339, 1.7);

    double[] step1 = { 0, -1.09327, 1.101266 };
    irm[12] = new IrmGPCM(0.269994, step1, 1.7);

    double[] step2 = { 0, 1.526148, 1.739176 };
    irm[13] = new IrmGPCM(0.972506, step2, 1.7);

    double[] step3 = { 0, 1.362356, 5.566958 };
    irm[14] = new IrmGPCM(0.378812, step3, 1.7);

    double[] step4 = { 0, 1.486566, -0.071229, 1.614823 };
    irm[15] = new IrmGPCM(0.537706, step4, 1.7);

    double[] step5 = { 0, 1.425413, 2.630705, 3.242696 };
    irm[16] = new IrmGPCM(0.554506, step5, 1.7);

    Mean discriminationX = new Mean();
    Mean difficultyX = new Mean();

    Mean difficultyMeanX = new Mean();
    StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1.

    for (int j = 0; j < 17; j++) {
        irm[j].incrementMeanMean(discriminationX, difficultyX);
        irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX);

    //        System.out.println("Mean/mean descriptive statistics for Form X");
    //        System.out.println("a-mean: " + discriminationX.getResult());
    //        System.out.println("b-mean: " + difficultyX.getResult());

    assertEquals("Mean/mean check: discrimination mean", 0.7719,
            Precision.round(discriminationX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: difficulty mean", 1.3566, Precision.round(difficultyX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3);

    //        System.out.println();
    //        System.out.println("Mean/sigma descriptive statistics for Form X");
    //        System.out.println("b-mean: " + difficultyMeanX.getResult());
    //        System.out.println("b-sd: " + difficultySdX.getResult());
    //        System.out.println("b-N: " + difficultyMeanX.getN() + ",   " + difficultySdX.getN());

    assertEquals("Mean/sigma check: difficulty mean", 1.3566, Precision.round(difficultyMeanX.getResult(), 4),
    assertEquals("Mean/sigma check: difficulty sd", 1.6372, Precision.round(difficultySdX.getResult(), 4),
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(),
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3);


From source file:com.cloudera.oryx.rdf.computation.RDFDistributedGenerationRunner.java

private static void updateMeanImportances(Map<String, Mean> columnNameToMeanImportance, Model model) {
    for (MiningField field : model.getMiningSchema().getMiningFields()) {
        Double importance = field.getImportance();
        if (importance != null) {
            String fieldName = field.getName().getValue();
            Mean mean = columnNameToMeanImportance.get(fieldName);
            if (mean == null) {
                mean = new Mean();
                columnNameToMeanImportance.put(fieldName, mean);
            }/*from   w  ww .jav  a2 s  . c  om*/

From source file:gedi.util.math.stat.distributions.NormalMixtureDistribution.java

public static NormalMixtureDistribution fit(NormalMixtureDistribution initialMixture, double[] data,
        final int maxIterations, final double threshold) {

    if (maxIterations < 1) {
        throw new NotStrictlyPositiveException(maxIterations);
    }/* w w w.  ja v a2 s  .c  o  m*/

    if (threshold < Double.MIN_VALUE) {
        throw new NotStrictlyPositiveException(threshold);

    final int n = data.length;

    final int k = initialMixture.getNumComponents();

    if (k == 1)
        return new NormalMixtureDistribution(new NormalDistribution[] {
                new NormalDistribution(new Mean().evaluate(data), new StandardDeviation().evaluate(data)) },
                new double[] { 1 });

    int numIterations = 0;
    double previousLogLikelihood = 0d;

    double logLikelihood = Double.NEGATIVE_INFINITY;

    // Initialize model to fit to initial mixture.
    NormalMixtureDistribution fittedModel = new NormalMixtureDistribution(initialMixture.components,

    while (numIterations++ <= maxIterations
            && FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) {
        previousLogLikelihood = logLikelihood;
        logLikelihood = 0d;

        // E-step: compute the data dependent parameters of the expectation
        // function.
        // The percentage of row's total density between a row and a
        // component
        final double[][] gamma = new double[n][k];
        // Sum of gamma for each component
        final double[] gammaSums = new double[k];

        for (int i = 0; i < n; i++) {
            final double rowDensity = fittedModel.density(data[i]);
            logLikelihood += FastMath.log(rowDensity);

            for (int j = 0; j < k; j++) {
                gamma[i][j] = fittedModel.mixing[j] * fittedModel.components[j].density(data[i]) / rowDensity;
                gammaSums[j] += gamma[i][j];
        logLikelihood /= n;
        //         System.out.println(logLikelihood);

        // M-step: compute the new parameters based on the expectation
        // function.
        final double[] newWeights = gammaSums.clone();
        ArrayUtils.mult(newWeights, 1.0 / n);

        NormalDistribution[] comp = new NormalDistribution[k];
        for (int j = 0; j < k; j++) {
            double m = 0;
            for (int i = 0; i < n; i++) {
                m += gamma[i][j] * data[i];
            m /= gammaSums[j];

            double var = 0;
            for (int i = 0; i < n; i++) {
                double d = m - data[i];
                var += gamma[i][j] * d * d;
            var /= gammaSums[j];

            comp[j] = new NormalDistribution(m, Math.sqrt(var));

        // Update current model
        fittedModel = new NormalMixtureDistribution(comp, newWeights);

    if (FastMath.abs(previousLogLikelihood - logLikelihood) > threshold) {
        // Did not converge before the maximum number of iterations
        throw new ConvergenceException();

    return fittedModel;

From source file:com.itemanalysis.psychometrics.irt.equating.RobustZEquatingTest.java

private void testB() {
    double[] bDiff = new double[nB];
    zb = new RobustZ[nB];

    for (int i = 0; i < nB; i++) {
        bDiff[i] = bY[i] - slope * bX[i];
    }//from  w ww  . ja  v a 2 s. c  o  m

    double median = percentile.evaluate(bDiff, 50);
    double q3 = percentile.evaluate(bDiff, 75);
    double q1 = percentile.evaluate(bDiff, 25);
    double iqr = q3 - q1;
    Mean mean = new Mean();

    for (int i = 0; i < nB; i++) {
        zb[i] = new RobustZ(bDiff[i], median, iqr);
        if (!zb[i].significant(significanceLevel)) {
    intercept = mean.getResult();

From source file:com.cloudera.oryx.als.computation.iterate.row.RowStep.java

protected MRPipeline createPipeline() throws IOException {

    IterationState iterationState = getIterationState();
    String iterationKey = iterationState.getIterationKey();
    boolean x = iterationState.isComputingX();
    int lastIteration = iterationState.getIteration() - 1;
    Store store = Store.get();//w  w w .j  a  va  2 s .c o m

    JobStepConfig config = getConfig();
    String instanceDir = config.getInstanceDir();
    int generationID = config.getGenerationID();

    if (store.exists(Namespaces.getInstanceGenerationPrefix(instanceDir, generationID) + "X/", false)) {
        // Actually, looks like whole computation of X/Y finished -- just proceed
        return null;

    // Take the opportunity to clean out iteration before last, if computing X
    if (x) {
        String lastLastIterationKey = Namespaces.getIterationsPrefix(instanceDir, generationID)
                + (lastIteration - 1) + '/';
        if (store.exists(lastLastIterationKey, false)) {
            log.info("Deleting old iteration data from {}", lastLastIterationKey);

    String yKey;
    if (x) {
        yKey = Namespaces.getIterationsPrefix(instanceDir, generationID) + lastIteration + "/Y/";
    } else {
        yKey = iterationKey + "X/";

    String xKey = iterationKey + (x ? "X/" : "Y/");
    String tempKey = Namespaces.getTempPrefix(instanceDir, generationID);
    String rKey = tempKey + (x ? "userVectors/" : "itemVectors/");

    if (!validOutputPath(xKey)) {
        return null;

    MRPipeline p = createBasicPipeline(RowReduceFn.class);
    Configuration conf = p.getConfiguration();
    conf.set(Y_KEY_KEY, yKey);

    String popularKey = tempKey + (x ? "popularItemsByUserPartition/" : "popularUsersByItemPartition/");
    conf.set(POPULAR_KEY, popularKey);

    String testPrefix = Namespaces.getInstanceGenerationPrefix(instanceDir, generationID) + "test/";
    conf.set(MAP_KEY, testPrefix);

    YState yState = new YState(ALSTypes.DENSE_ROW_MATRIX); // Shared Y-Matrix state

    GroupingOptions opts = groupingOptions();
    PCollection<MatrixRow> matrix = PTables.asPTable(p.read(input(rKey, ALSTypes.SPARSE_ROW_MATRIX)))
            .groupByKey(opts).parallelDo("rowReduce", new RowReduceFn(yState), ALSTypes.DENSE_ROW_MATRIX)

    if (!x) {
        matrix.parallelDo("asPair", MatrixRow.AS_PAIR, Avros.tableOf(Avros.longs(), ALSTypes.FLOAT_ARRAY))
                .parallelDo("convergenceSample", new ConvergenceSampleFn(yState), Avros.strings())
                .write(compressedTextOutput(p.getConfiguration(), iterationKey + "Yconvergence"));

    if (x && ConfigUtils.getDefaultConfig().getDouble("model.test-set-fraction") > 0.0
            && store.exists(testPrefix, false)) {
        PCollection<Double> aps = matrix
                .parallelDo("asPair", MatrixRow.AS_PAIR, Avros.tableOf(Avros.longs(), ALSTypes.FLOAT_ARRAY))
                .parallelDo("computeAP", new ComputeUserAPFn(yState), Avros.doubles());
        Mean meanAveragePrecision = new Mean();
        for (double ap : aps.materialize()) {
        log.info("Mean average precision: {}", meanAveragePrecision.getResult());

        File tempMAPFile = File.createTempFile("MAP", ".txt");
        Files.write(Double.toString(meanAveragePrecision.getResult()), tempMAPFile, StandardCharsets.UTF_8);
        store.upload(iterationKey + "MAP", tempMAPFile, false);

    return p;

From source file:com.itemanalysis.psychometrics.histogram.Histogram.java

private void createHistogram(double[] x) {
    n = x.length;// w ww .jav a2 s  .c  o  m
    Min min = new Min();
    Max max = new Max();
    Mean mean = new Mean();
    StandardDeviation sd = new StandardDeviation();

    for (int i = 0; i < x.length; i++) {

    double range = max.getResult() - min.getResult();
    double lowestBoundary = min.getResult() - range / 1000;
    double largestBoundary = max.getResult() + range / 1000;

    if (binCalculationType == BinCalculationType.SCOTT) {
        binCalc = new ScottBinCalculation(n, min.getResult(), max.getResult(), sd.getResult());
    } else if (binCalculationType == BinCalculationType.FREEDMAN_DIACONIS) {
        Percentile percentile = new Percentile();
        double q1 = percentile.evaluate(x, 25);
        double q3 = percentile.evaluate(x, 75);
        binCalc = new FreedmanDiaconisBinCalculation(n, min.getResult(), max.getResult(), q1, q3);
    } else if (binCalculationType == BinCalculationType.STURGES) {
        binCalc = new SturgesBinCalculation(n, min.getResult(), max.getResult());

    numberOfBins = binCalc.numberOfBins();
    binWidth = binCalc.binWidth();

    //create bins
    createBins(lowestBoundary, largestBoundary);

    //count observations in each bin
    for (int i = 0; i < n; i++) {
        for (Bin b : bins) {

From source file:gamlss.distributions.PE.java

/** Calculate and set initial value of sigma.
 * @param y - vector of values of response variable
 * @return vector of initial values of sigma
 *//*  w ww.  ja  v  a 2  s .c o  m*/
private ArrayRealVector setSigmaInitial(final ArrayRealVector y) {
    //sigma.initial = expression( sigma <- (abs(y-mean(y))+sd(y))/2 )   
    final double mean = new Mean().evaluate(y.getDataRef());
    final double sd = new StandardDeviation().evaluate(y.getDataRef());
    size = y.getDimension();
    double[] out = new double[size];
    for (int i = 0; i < size; i++) {
        out[i] = (FastMath.abs(y.getEntry(i) - mean) + sd) / 2;
    return new ArrayRealVector(out, false);

From source file:gamlss.distributions.BCPE.java

/**  Calculates initial value of mu, by assumption these 
 * values lie between observed data and the trend line.
 * @param y - vector of values of response variable
 * @return  a vector of initial values of mu
 *//*  w w  w  .  j av a 2  s . com*/
private ArrayRealVector setMuInitial(final ArrayRealVector y) {
    //mu.initial =  expression(mu <- (y+mean(y))/2)
    size = y.getDimension();
    double[] out = new double[size];
    Mean mean = new Mean();
    double yMean = mean.evaluate(y.getDataRef());
    for (int i = 0; i < size; i++) {
        out[i] = (y.getEntry(i) + yMean) / 2;
    return new ArrayRealVector(out, false);

From source file:edu.umd.umiacs.clip.tools.classifier.LibSVMUtils.java

public static Map<Integer, Pair<Double, Double>> learnZscoringModel(List<String> training) {
    return training.stream().map(LibSVMUtils::split).map(Triple::getMiddle).flatMap(List::stream)
            .collect(groupingBy(Pair::getKey, ConcurrentHashMap::new,
                    reducing(new ArrayList<Float>(), pair -> asList(pair.getRight()),
                            (p1, p2) -> Stream.of(p1, p2).flatMap(List::stream).collect(toList()))))
            .entrySet().stream()/*from  www  .j a  v  a 2 s.  co m*/
            .map(entry -> Pair.of(entry.getKey(), entry.getValue().stream().mapToDouble(f -> f).toArray()))
            .collect(toMap(Entry::getKey, entry -> Pair.of(new Mean().evaluate(entry.getValue()),
                    new StandardDeviation().evaluate(entry.getValue()))));