distributed.core.DistributedUtils.java Source code

Java tutorial

Introduction

Here is the source code for distributed.core.DistributedUtils.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    DistributedUtils.java
 *    Copyright (C) 2014 University of Waikato, Hamilton, New Zealand
 *
 */

package distributed.core;

import weka.core.Attribute;
import weka.core.AttributeStats;
import weka.core.Instances;
import weka.core.Utils;
import weka.core.stats.ArffSummaryNumericMetric;
import weka.core.stats.NominalStats;
import weka.core.stats.NumericStats;

/**
 * A few utility routines
 * 
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @version $Revision$
 */
public class DistributedUtils {

    public static NumericStats getNumericAttributeStatsSparse(Instances denormalized, int attIndex) {
        NumericStats ns = new NumericStats(denormalized.attribute(attIndex).name());

        for (int j = 0; j < denormalized.numInstances(); j++) {
            double value = denormalized.instance(j).value(attIndex);

            if (Utils.isMissingValue(value) || value == 0) {
                ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()]++;
            } else {
                ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()]++;
                ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] += value;
                ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] += value * value;
                if (Double.isNaN(ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()])) {
                    ns.getStats()[ArffSummaryNumericMetric.MIN
                            .ordinal()] = ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = value;
                } else if (value < ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()]) {
                    ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = value;
                } else if (value > ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()]) {
                    ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = value;
                }
            }
        }

        ns.computeDerived();

        return ns;
    }

    public static Instances makeHeaderWithSummaryAtts(Instances denormalized, boolean treatZerosAsMissing) {
        Instances header = new Instances(denormalized, 0);

        for (int i = 0; i < denormalized.numAttributes(); i++) {
            AttributeStats stats = denormalized.attributeStats(i);
            if (denormalized.attribute(i).isNumeric()) {
                NumericStats ns = new NumericStats(denormalized.attribute(i).name());
                if (!treatZerosAsMissing) {
                    ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = stats.numericStats.min;
                    ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = stats.numericStats.max;
                    ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()] = stats.numericStats.count;
                    ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] = stats.numericStats.sum;
                    ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] = stats.numericStats.sumSq;
                    ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()] = stats.missingCount;

                    ns.computeDerived();
                } else {
                    ns = getNumericAttributeStatsSparse(denormalized, i);
                }

                Attribute newAtt = ns.makeAttribute();
                header.insertAttributeAt(newAtt, header.numAttributes());
            } else if (denormalized.attribute(i).isNominal()) {
                NominalStats nom = new NominalStats(denormalized.attribute(i).name());
                nom.setNumMissing(stats.missingCount);

                double[] labelFreqs = stats.nominalWeights;
                for (int j = 0; j < denormalized.attribute(i).numValues(); j++) {
                    nom.add(denormalized.attribute(i).value(j), labelFreqs[j]);
                }

                Attribute newAtt = nom.makeAttribute();
                header.insertAttributeAt(newAtt, header.numAttributes());
            }
        }

        return header;
    }
}