Example usage for weka.core Instances attributeStats

List of usage examples for weka.core Instances attributeStats

Introduction

In this page you can find the example usage for weka.core Instances attributeStats.

Prototype


public AttributeStats attributeStats(int index) 

Source Link

Document

Calculates summary statistics on the values that appear in this set of instances for a specified attribute.

Usage

From source file:sirius.misc.zscore.ZscoreTableModel.java

License:Open Source License

public void siriusCorrelationFiltering(final double stdDevDist, final double maxOverlapPercent,
        final boolean includeNegatives) {
    Thread thread = new Thread() {
        public void run() {
            Instances instances = ZscoreTableModel.this.posInstances;
            if (includeNegatives)
                for (int x = 0; x < ZscoreTableModel.this.negInstances.numInstances(); x++)
                    instances.add(ZscoreTableModel.this.negInstances.instance(x));
            //for now, i will ignore the sign: as in, i would care only about the absolute change of stddev (ie. |stddev|)
            //use an O(a*a*n) algorithm where n = num of instances and a = num of attributes   
            MessageDialog m = new MessageDialog(null, "Progress", "0%");
            for (int a = 0; a < instances.numAttributes(); a++) {
                int indexA = instances.attribute(ZscoreTableModel.this.scoreList.get(a).getName()).index();
                if (instances.attribute(indexA).isNumeric() == false)
                    continue;
                //for each attribute pair, check for the num of overlap percent               
                double attibuteAStddev = instances.attributeStats(indexA).numericStats.stdDev;
                for (int b = a + 1; b < instances.numAttributes();) {
                    m.update(a + "/" + instances.numAttributes());
                    int indexB = instances.attribute(ZscoreTableModel.this.scoreList.get(b).getName()).index();
                    if (instances.attribute(indexB).isNumeric() == false) {
                        b++;//  w w  w  .  j a va 2  s. c om
                        continue;
                    }
                    int numOfOverlap = 0;
                    double attibuteBStddev = instances.attributeStats(indexB).numericStats.stdDev;
                    for (int x = 0; x < instances.numInstances() - 1; x++) {
                        //how do i consider an overlap?
                        //absolute difference from the previous instance is same in stddev
                        double attributeADifference = Math.abs(
                                ((instances.instance(x).value(indexA) - instances.instance(x + 1).value(indexA))
                                        / attibuteAStddev));
                        double attributeBDifference = Math.abs(
                                ((instances.instance(x).value(indexB) - instances.instance(x + 1).value(indexB))
                                        / attibuteBStddev));
                        if (Math.abs(attributeADifference - attributeBDifference) < stdDevDist)
                            numOfOverlap++;
                    }
                    double overlapPercent = (numOfOverlap * 100) / (instances.numInstances() - 1);
                    if (overlapPercent > maxOverlapPercent) {
                        ZscoreTableModel.this.posInstances.deleteAttributeAt(indexB);
                        ZscoreTableModel.this.negInstances.deleteAttributeAt(indexB);
                        ZscoreTableModel.this.scoreList.remove(b);
                        indexA = instances.attribute(ZscoreTableModel.this.scoreList.get(a).getName()).index();
                    } else
                        b++;
                }
            }
            m.dispose();
            ZscoreTableModel.this.label.setText("" + instances.numAttributes());
            //compute(ZscoreTableModel.this.posInstances,ZscoreTableModel.this.negInstances);
            ZscoreTableModel.this.fireTableDataChanged();
        }
    };
    thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority
    thread.start();
}

From source file:sirius.misc.zscore.ZscoreTableModel.java

License:Open Source License

public void pearsonCorrelationFiltering(final double score, final boolean includeNegatives) {
    Thread thread = new Thread() {
        public void run() {
            Instances instances = ZscoreTableModel.this.posInstances;
            if (includeNegatives)
                for (int x = 0; x < ZscoreTableModel.this.negInstances.numInstances(); x++)
                    instances.add(ZscoreTableModel.this.negInstances.instance(x));
            //for now, i will ignore the sign: as in, i would care only about the absolute change of stddev (ie. |stddev|)
            //use an O(a*a*n) algorithm where n = num of instances and a = num of attributes
            MessageDialog m = new MessageDialog(null, "Progress", "0%");
            for (int a = 0; a < instances.numAttributes(); a++) {
                int indexA = instances.attribute(ZscoreTableModel.this.scoreList.get(a).getName()).index();
                if (instances.attribute(indexA).isNumeric() == false)
                    continue;
                //for each attribute pair, check for the num of overlap percent               
                double attributeAStddev = instances.attributeStats(indexA).numericStats.stdDev;
                double attributeAMean = instances.attributeStats(indexA).numericStats.mean;
                for (int b = a + 1; b < instances.numAttributes();) {
                    m.update(a + "/" + instances.numAttributes());
                    int indexB = instances.attribute(ZscoreTableModel.this.scoreList.get(b).getName()).index();
                    if (instances.attribute(indexB).isNumeric() == false) {
                        b++;//from w ww.j  a  va  2 s .c  o  m
                        continue;
                    }
                    double attributeBStddev = instances.attributeStats(indexB).numericStats.stdDev;
                    double attributeBMean = instances.attributeStats(indexB).numericStats.mean;
                    double nominator = 0.0;
                    for (int x = 0; x < instances.numInstances(); x++) {
                        nominator += ((instances.instance(x).value(indexA) - attributeAMean)
                                * (instances.instance(x).value(indexB) - attributeBMean));
                    }
                    double pScore = Math.abs(
                            nominator / ((instances.numInstances() - 1) * attributeAStddev * attributeBStddev));
                    if (pScore > score) {
                        ZscoreTableModel.this.posInstances.deleteAttributeAt(indexB);
                        ZscoreTableModel.this.negInstances.deleteAttributeAt(indexB);
                        ZscoreTableModel.this.scoreList.remove(b);
                        indexA = instances.attribute(ZscoreTableModel.this.scoreList.get(a).getName()).index();
                    } else
                        b++;
                }
            }
            m.dispose();
            ZscoreTableModel.this.label.setText("" + instances.numAttributes());
            //compute(ZscoreTableModel.this.posInstances,ZscoreTableModel.this.negInstances);
            ZscoreTableModel.this.fireTableDataChanged();
        }
    };
    thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority
    thread.start();
}

From source file:sirius.misc.zscore.ZscoreTableModel.java

License:Open Source License

public void compute(final Instances posInstances, final Instances negInstances) {
    if (posInstances == null || negInstances == null) {
        JOptionPane.showMessageDialog(null, "Please load file before computing.", "Error",
                JOptionPane.ERROR_MESSAGE);
        return;/*from w w w .  j a  v a 2  s.  c  o m*/
    }
    if (posInstances.numAttributes() != negInstances.numAttributes()) {
        JOptionPane.showMessageDialog(null, "Number of attributes between the two files does not tally.",
                "Error", JOptionPane.ERROR_MESSAGE);
        return;
    }
    this.scoreList = new ArrayList<Scores>();
    this.posInstances = posInstances;
    this.negInstances = negInstances;
    Thread thread = new Thread() {
        public void run() {
            MessageDialog m = new MessageDialog(null, "Progress", "0%");
            int percentCount = posInstances.numAttributes() / 100;
            if (percentCount == 0)
                percentCount = 1;
            for (int x = 0; x < posInstances.numAttributes(); x++) {
                if (x % percentCount == 0)
                    m.update(x / percentCount + "%");
                if (posInstances.attribute(x).isNumeric() == false) {
                    ZscoreTableModel.this.scoreList.add(new Scores(posInstances.attribute(x).name()));
                    continue;
                }
                String name = posInstances.attribute(x).name();
                double posMean = posInstances.attributeStats(x).numericStats.mean;
                double posStdDev = posInstances.attributeStats(x).numericStats.stdDev;
                double negMean = negInstances.attributeStats(x).numericStats.mean;
                double negStdDev = negInstances.attributeStats(x).numericStats.stdDev;
                if (negStdDev == 0)
                    negStdDev = 0.01;
                double totalZScore = 0.0;
                int numGTZScore0_5 = 0;
                int numGTZScore1 = 0;
                int numGTZScore2 = 0;
                int numGTZScore3 = 0;
                for (int y = 0; y < posInstances.numInstances(); y++) {
                    double zScore = Math.abs(((posInstances.instance(y).value(x) - negMean) / negStdDev));
                    totalZScore += zScore;
                    if (zScore > 0.5)
                        numGTZScore0_5++;
                    if (zScore > 1)
                        numGTZScore1++;
                    if (zScore > 2)
                        numGTZScore2++;
                    if (zScore > 3)
                        numGTZScore3++;
                }
                double meanZScore = totalZScore / posInstances.numInstances();
                double percentGTZScore0_5 = (numGTZScore0_5 * 100) / posInstances.numInstances();
                double percentGTZScore1 = (numGTZScore1 * 100) / posInstances.numInstances();
                double percentGTZScore2 = (numGTZScore2 * 100) / posInstances.numInstances();
                double percentGTZScore3 = (numGTZScore3 * 100) / posInstances.numInstances();
                ZscoreTableModel.this.scoreList
                        .add(new Scores(name, posMean, posStdDev, negMean, negStdDev, meanZScore,
                                percentGTZScore0_5, percentGTZScore1, percentGTZScore2, percentGTZScore3, -1));
            }
            try {
                Instances instances = new Instances(posInstances);
                for (int x = 0; x < negInstances.numInstances(); x++)
                    instances.add(negInstances.instance(x));
                instances.setClassIndex(instances.numAttributes() - 1);
                //Evaluate the attributes individually and obtain the gainRatio      
                GainRatioAttributeEval gainRatio = new GainRatioAttributeEval();
                if (instances.numAttributes() > 0) {
                    gainRatio.buildEvaluator(instances);
                }
                for (int x = 0; x < (instances.numAttributes() - 1); x++) {
                    ZscoreTableModel.this.scoreList.get(x).setGainRatio(gainRatio.evaluateAttribute(x));
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            Collections.sort(ZscoreTableModel.this.scoreList, new SortByMeanZScore());
            fireTableDataChanged();
            m.dispose();
            ZscoreTableModel.this.label.setText("" + ZscoreTableModel.this.scoreList.size());
        }
    };
    thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority
    thread.start();
}