Example usage for weka.core Instances meanOrMode

List of usage examples for weka.core Instances meanOrMode

Introduction

In this page you can find the example usage for weka.core Instances meanOrMode.

Prototype

publicdouble meanOrMode(Attribute att) 

Source Link

Document

Returns the mean (mode) for a numeric (nominal) attribute as a floating-point value.

Usage

From source file:milk.classifiers.SimpleMI.java

License:Open Source License

public Instances transform(Exemplars train) throws Exception {

     Instances data = new Instances(m_Attributes);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);
     Instance template = new Instance(dataset.numAttributes());
     template.setDataset(dataset);//from  w w w .  j  a  v  a  2 s .c o m
     double N = train.numExemplars(); // Number of exemplars

     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         Instances insts = exi.getInstances();
         int attIdx = 0;
         Instance newIns = new Instance(template);
         newIns.setDataset(dataset);
         for (int j = 0; j < insts.numAttributes(); j++) {
             if ((j == m_IdIndex) || (j == m_ClassIndex))
                 continue;
             double value;
             if (m_TransformMethod == 1) {
                 value = insts.meanOrMode(j);
             } else {
                 double[] minimax = minimax(insts, j);
                 value = (minimax[0] + minimax[1]) / 2.0;
             }
             newIns.setValue(attIdx++, value);
         }
         newIns.setClassValue(exi.classValue());
         data.add(newIns);
     }

     return data;
 }

From source file:mlda.attributes.MeanKurtosis.java

License:Open Source License

/**
 * Calculate metric value//from  w w w. j a  v  a 2s.  com
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData) {
    Instances instances = mlData.getDataSet();
    int nInstances = mlData.getNumInstances();

    double avg;
    double var2;
    double var4;
    double val;
    int nNumeric = 0;
    double mean = 0;

    Set<Attribute> attributesSet = mlData.getFeatureAttributes();

    for (Attribute att : attributesSet) {
        if (att.isNumeric()) {
            nNumeric++;
            avg = instances.meanOrMode(att);
            var2 = 0;
            var4 = 0;

            for (Instance inst : instances) {
                val = inst.value(att);
                var2 += Math.pow(val - avg, 2);
                var4 += Math.pow(val - avg, 4);
            }

            double kurtosis = (nInstances * var4 / Math.pow(var2, 2)) - 3;
            double sampleKurtosis = (kurtosis * (nInstances + 1) + 6) * (nInstances - 1)
                    / ((nInstances - 2) * (nInstances - 3));
            mean += sampleKurtosis;
        }
    }
    if (nNumeric > 0) {
        mean = mean / nNumeric;
    } else {
        mean = Double.NaN;
    }

    this.value = mean;
    return value;
}

From source file:mlda.attributes.MeanOfMeanOfNumericAttributes.java

License:Open Source License

/**
 * Calculate metric value//  w  w  w  . j  a v a  2s.  c  o  m
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData) {
    double mean = 0.0;
    int nNumeric = 0;

    Instances instances = mlData.getDataSet();

    Set<Attribute> attributeSet = mlData.getFeatureAttributes();
    for (Attribute att : attributeSet) {
        if (att.isNumeric()) {
            nNumeric++;
            mean += instances.meanOrMode(att);
        }
    }

    mean = mean / nNumeric;

    this.value = mean;
    return value;
}

From source file:mlda.attributes.MeanSkewnessNumericAttributes.java

License:Open Source License

/**
 * Calculate metric value/*from   w ww.  j a  v a2s  .  co  m*/
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData) {
    Instances instances = mlData.getDataSet();
    int nInstances = mlData.getNumInstances();

    Set<Attribute> attributesSet = mlData.getFeatureAttributes();

    int nNumeric = 0;
    double mean = 0;
    double avg;
    double var;
    double stdev;

    for (Attribute att : attributesSet) {
        if (att.isNumeric()) {
            nNumeric++;
            avg = instances.meanOrMode(att);
            var = 0;
            for (Instance inst : instances) {
                var += Math.pow(inst.value(att) - avg, 3);
            }
            stdev = Math.sqrt(instances.variance(att));
            mean += nInstances * var / ((nInstances - 1) * (nInstances - 2) * Math.pow(stdev, 3));
        }
    }

    if (nNumeric > 0) {
        this.value = mean / nNumeric;
    } else {
        this.value = Double.NaN;
    }

    //this.value = mean;
    return value;
}

From source file:org.isep.simizer.example.policy.utils.IterativeSimpleKMeans.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid
 * coordinates based on it's members (objects assigned to the cluster of the
 * centroid) and the distance function being used.
 *
 * @param centroidIndex index of the centroid which the coordinates will be
 * computed//from ww w .  ja v  a  2s.c o  m
 * @param members the objects that are assigned to the cluster of this
 * centroid
 * @param updateClusterInfo if the method is supposed to update the
 * m_Cluster arrays
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    //used only for Manhattan Distance
    Instances sortedMembers = null;
    int middle = 0;
    boolean dataIsEven = false;

    if (m_DistanceFunction instanceof ManhattanDistance) {
        middle = (members.numInstances() - 1) / 2;
        dataIsEven = ((members.numInstances() % 2) == 0);
        if (m_PreserveOrder) {
            sortedMembers = members;
        } else {
            sortedMembers = new Instances(members);
        }
    }

    for (int j = 0; j < members.numAttributes(); j++) {

        //in case of Euclidian distance the centroid is the mean point
        //in case of Manhattan distance the centroid is the median point
        //in both cases, if the attribute is nominal, the centroid is the mode
        if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        } else if (m_DistanceFunction instanceof ManhattanDistance) {
            //singleton special case
            if (members.numInstances() == 1) {
                vals[j] = members.instance(0).value(j);
            } else {
                vals[j] = sortedMembers.kthSmallestValue(j, middle + 1);
                if (dataIsEven) {
                    vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2;
                }
            }
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Instance.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Instance.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo) {
        m_ClusterCentroids.add(new Instance(1.0, vals));
    }
    return vals;
}

From source file:org.openml.webapplication.fantail.dc.statistical.DefaultAccuracy.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances instances) {

    Attribute class_attrib = instances.classAttribute();
    final double mode = instances.meanOrMode(class_attrib);
    final int count = instances.numInstances();
    int nonerrors = 0;

    for (int i = 0; i < count; i++) {
        if (mode == instances.instance(i).value(class_attrib)) {
            nonerrors++;// www  .jav a  2  s. c o  m
        }
    }

    Map<String, Double> qualities = new HashMap<String, Double>();
    qualities.put(ids[0], ((double) nonerrors / count));
    return qualities;
}

From source file:org.openml.webapplication.fantail.dc.statistical.Statistical.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances instances) {
    int attrib_count = instances.numAttributes() - 1, numeric_count = 0;

    for (int i = 0; i < attrib_count; i++) {
        if (instances.attribute(i).isNumeric()) {
            numeric_count++;/*from   w w  w .  j  a  va 2  s.  c om*/
            final double mean = instances.meanOrMode(i);
            final double stddev = Math.sqrt(instances.variance(i));
            final double kurtosis = findKurtosis(instances, mean, stddev, i);
            final double skewness = findSkewness(instances, mean, stddev, i);

            meanList.add(mean);
            stdDevList.add(stddev);
            kurtosisList.add(kurtosis);
            skewnessList.add(skewness);
        }
    }

    if (0 == numeric_count) {

        Map<String, Double> qualities = new HashMap<String, Double>();
        qualities.put(ids[0], 0.0);
        qualities.put(ids[1], 0.0);
        qualities.put(ids[2], 0.0);
        qualities.put(ids[3], 0.0);

        qualities.put(ids[4], 0.0);
        qualities.put(ids[5], 0.0);
        qualities.put(ids[6], 0.0);
        qualities.put(ids[7], 0.0);

        qualities.put(ids[8], 0.0);
        qualities.put(ids[9], 0.0);
        qualities.put(ids[10], 0.0);
        qualities.put(ids[11], 0.0);

        qualities.put(ids[12], 0.0);
        qualities.put(ids[13], 0.0);
        qualities.put(ids[14], 0.0);
        qualities.put(ids[15], 0.0);

        qualities.put(ids[16], 0.0);
        qualities.put(ids[17], 0.0);
        qualities.put(ids[18], 0.0);
        qualities.put(ids[19], 0.0);

        qualities.put(ids[20], 0.0);
        qualities.put(ids[21], 0.0);
        qualities.put(ids[22], 0.0);
        qualities.put(ids[23], 0.0);
        return qualities;
    } else {
        double[] meansArray = ArrayUtils.toPrimitive(meanList.toArray(new Double[numeric_count]));
        double[] stdDevsArray = ArrayUtils.toPrimitive(stdDevList.toArray(new Double[numeric_count]));
        double[] kurtosisArray = ArrayUtils.toPrimitive(kurtosisList.toArray(new Double[numeric_count]));
        double[] skewnessArray = ArrayUtils.toPrimitive(skewnessList.toArray(new Double[numeric_count]));

        Map<String, Double> qualities = new HashMap<String, Double>();
        qualities.put(ids[0], StatUtils.mean(meansArray));
        qualities.put(ids[1], StatUtils.mean(stdDevsArray));
        qualities.put(ids[2], StatUtils.mean(kurtosisArray));
        qualities.put(ids[3], StatUtils.mean(skewnessArray));

        qualities.put(ids[4], StatUtils.min(meansArray));
        qualities.put(ids[5], StatUtils.min(stdDevsArray));
        qualities.put(ids[6], StatUtils.min(kurtosisArray));
        qualities.put(ids[7], StatUtils.min(skewnessArray));

        qualities.put(ids[8], StatUtils.max(meansArray));
        qualities.put(ids[9], StatUtils.max(stdDevsArray));
        qualities.put(ids[10], StatUtils.max(kurtosisArray));
        qualities.put(ids[11], StatUtils.max(skewnessArray));

        qualities.put(ids[12], StatUtils.percentile(meansArray, 25));
        qualities.put(ids[13], StatUtils.percentile(stdDevsArray, 25));
        qualities.put(ids[14], StatUtils.percentile(kurtosisArray, 25));
        qualities.put(ids[15], StatUtils.percentile(skewnessArray, 25));

        qualities.put(ids[16], StatUtils.percentile(meansArray, 50));
        qualities.put(ids[17], StatUtils.percentile(stdDevsArray, 50));
        qualities.put(ids[18], StatUtils.percentile(kurtosisArray, 50));
        qualities.put(ids[19], StatUtils.percentile(skewnessArray, 50));

        qualities.put(ids[20], StatUtils.percentile(meansArray, 75));
        qualities.put(ids[21], StatUtils.percentile(stdDevsArray, 75));
        qualities.put(ids[22], StatUtils.percentile(kurtosisArray, 75));
        qualities.put(ids[23], StatUtils.percentile(skewnessArray, 75));
        return qualities;
    }
}

From source file:org.vimarsha.utils.impl.ArffAttributeInfoExtractor.java

License:Open Source License

/**
 * Returns a table model with the attribute related info when the selected attribute index is passed.
 *
 * @param index index of the attribute//from  w  w  w . j a v  a  2s  . co m
 * @return DefaultTableModel
 */
public DefaultTableModel getArffAttributeInfo(int index) {
    DefaultTableModel defaultTableModel = new DefaultTableModel();
    Instances temp = this.arffData;
    //since kthSmallestValue cannot handle missing values, they need to be removed.
    temp.deleteWithMissing(index);
    ArrayList<String> tmp = new ArrayList<String>();
    defaultTableModel.addColumn("Statistics", new String[] { "Name", "Variance", "Min", "Max", "Mean" });
    tmp.add(temp.attribute(index).name());
    tmp.add(String.valueOf(temp.variance(index)));
    tmp.add(String.valueOf(temp.kthSmallestValue(index, 1))); //min value is the 1st smallest value
    tmp.add(String.valueOf(temp.kthSmallestValue(index, temp.numInstances()))); //max value is the last smallest value
    tmp.add(String.valueOf(temp.meanOrMode(index)));
    defaultTableModel.addColumn("Value", tmp.toArray());
    return defaultTableModel;
}

From source file:testtubesclassifier.MyKmeans.java

public Instance updateCentroid(Instances centroidCluster) {
    double[] newMean = new double[centroidCluster.numAttributes()];
    for (int i = 0; i < centroidCluster.numAttributes() - 2; i++) {
        newMean[i] = centroidCluster.meanOrMode(i);
    }//from  w  w w .  j a  v  a  2 s .c o  m
    Instance returnInstance = new DenseInstance(centroidCluster.numAttributes());
    for (int i = 0; i < newMean.length; i++) {
        returnInstance.setValue(centroidCluster.attribute(i), newMean[i]);
    }
    System.out.println("New centroid " + returnInstance);
    return returnInstance;
}

From source file:transformation.mimlTOml.ArithmeticTransformation.java

License:Open Source License

@Override
public MultiLabelInstances transformDataset() throws Exception {
    Instances newData = new Instances(template);
    int labelIndices[] = dataset.getLabelIndices();
    Instance newInst = new DenseInstance(newData.numAttributes());
    newInst.setDataset(newData); // Sets the reference to the dataset

    // For all bags in the dataset
    double nBags = dataset.getNumBags();
    for (int i = 0; i < nBags; i++) {
        // retrieves a bag
        Bag bag = dataset.getBag(i);// w  ww .  ja  v  a  2 s  .  co m
        // sets the bagLabel
        newInst.setValue(0, bag.value(0));

        // retrieves instances (relational value) for each bag
        Instances instances = bag.getBagAsInstances();
        // for all attributes in bag
        for (int j = 0, attIdx = 1; j < instances.numAttributes(); j++, attIdx++) {
            double value = instances.meanOrMode(j);
            newInst.setValue(attIdx, value);
        }

        // inserts label information into the instance
        for (int j = 0; j < labelIndices.length; j++) {
            newInst.setValue(updatedLabelIndices[j], dataset.getBag(i).value(labelIndices[j]));
        }

        newData.add(newInst);
    }

    return new MultiLabelInstances(newData, dataset.getLabelsMetaData());
}