List of usage examples for weka.core Instances attributeStats
public AttributeStats attributeStats(int index)
From source file:sirius.misc.zscore.ZscoreTableModel.java
License:Open Source License
public void siriusCorrelationFiltering(final double stdDevDist, final double maxOverlapPercent, final boolean includeNegatives) { Thread thread = new Thread() { public void run() { Instances instances = ZscoreTableModel.this.posInstances; if (includeNegatives) for (int x = 0; x < ZscoreTableModel.this.negInstances.numInstances(); x++) instances.add(ZscoreTableModel.this.negInstances.instance(x)); //for now, i will ignore the sign: as in, i would care only about the absolute change of stddev (ie. |stddev|) //use an O(a*a*n) algorithm where n = num of instances and a = num of attributes MessageDialog m = new MessageDialog(null, "Progress", "0%"); for (int a = 0; a < instances.numAttributes(); a++) { int indexA = instances.attribute(ZscoreTableModel.this.scoreList.get(a).getName()).index(); if (instances.attribute(indexA).isNumeric() == false) continue; //for each attribute pair, check for the num of overlap percent double attibuteAStddev = instances.attributeStats(indexA).numericStats.stdDev; for (int b = a + 1; b < instances.numAttributes();) { m.update(a + "/" + instances.numAttributes()); int indexB = instances.attribute(ZscoreTableModel.this.scoreList.get(b).getName()).index(); if (instances.attribute(indexB).isNumeric() == false) { b++;// w w w . j a va 2 s. c om continue; } int numOfOverlap = 0; double attibuteBStddev = instances.attributeStats(indexB).numericStats.stdDev; for (int x = 0; x < instances.numInstances() - 1; x++) { //how do i consider an overlap? //absolute difference from the previous instance is same in stddev double attributeADifference = Math.abs( ((instances.instance(x).value(indexA) - instances.instance(x + 1).value(indexA)) / attibuteAStddev)); double attributeBDifference = Math.abs( ((instances.instance(x).value(indexB) - instances.instance(x + 1).value(indexB)) / attibuteBStddev)); if (Math.abs(attributeADifference - attributeBDifference) < stdDevDist) numOfOverlap++; } double overlapPercent = (numOfOverlap * 100) / (instances.numInstances() - 1); if (overlapPercent > maxOverlapPercent) { ZscoreTableModel.this.posInstances.deleteAttributeAt(indexB); ZscoreTableModel.this.negInstances.deleteAttributeAt(indexB); ZscoreTableModel.this.scoreList.remove(b); indexA = instances.attribute(ZscoreTableModel.this.scoreList.get(a).getName()).index(); } else b++; } } m.dispose(); ZscoreTableModel.this.label.setText("" + instances.numAttributes()); //compute(ZscoreTableModel.this.posInstances,ZscoreTableModel.this.negInstances); ZscoreTableModel.this.fireTableDataChanged(); } }; thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority thread.start(); }
From source file:sirius.misc.zscore.ZscoreTableModel.java
License:Open Source License
public void pearsonCorrelationFiltering(final double score, final boolean includeNegatives) { Thread thread = new Thread() { public void run() { Instances instances = ZscoreTableModel.this.posInstances; if (includeNegatives) for (int x = 0; x < ZscoreTableModel.this.negInstances.numInstances(); x++) instances.add(ZscoreTableModel.this.negInstances.instance(x)); //for now, i will ignore the sign: as in, i would care only about the absolute change of stddev (ie. |stddev|) //use an O(a*a*n) algorithm where n = num of instances and a = num of attributes MessageDialog m = new MessageDialog(null, "Progress", "0%"); for (int a = 0; a < instances.numAttributes(); a++) { int indexA = instances.attribute(ZscoreTableModel.this.scoreList.get(a).getName()).index(); if (instances.attribute(indexA).isNumeric() == false) continue; //for each attribute pair, check for the num of overlap percent double attributeAStddev = instances.attributeStats(indexA).numericStats.stdDev; double attributeAMean = instances.attributeStats(indexA).numericStats.mean; for (int b = a + 1; b < instances.numAttributes();) { m.update(a + "/" + instances.numAttributes()); int indexB = instances.attribute(ZscoreTableModel.this.scoreList.get(b).getName()).index(); if (instances.attribute(indexB).isNumeric() == false) { b++;//from w ww.j a va 2 s .c o m continue; } double attributeBStddev = instances.attributeStats(indexB).numericStats.stdDev; double attributeBMean = instances.attributeStats(indexB).numericStats.mean; double nominator = 0.0; for (int x = 0; x < instances.numInstances(); x++) { nominator += ((instances.instance(x).value(indexA) - attributeAMean) * (instances.instance(x).value(indexB) - attributeBMean)); } double pScore = Math.abs( nominator / ((instances.numInstances() - 1) * attributeAStddev * attributeBStddev)); if (pScore > score) { ZscoreTableModel.this.posInstances.deleteAttributeAt(indexB); ZscoreTableModel.this.negInstances.deleteAttributeAt(indexB); ZscoreTableModel.this.scoreList.remove(b); indexA = instances.attribute(ZscoreTableModel.this.scoreList.get(a).getName()).index(); } else b++; } } m.dispose(); ZscoreTableModel.this.label.setText("" + instances.numAttributes()); //compute(ZscoreTableModel.this.posInstances,ZscoreTableModel.this.negInstances); ZscoreTableModel.this.fireTableDataChanged(); } }; thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority thread.start(); }
From source file:sirius.misc.zscore.ZscoreTableModel.java
License:Open Source License
public void compute(final Instances posInstances, final Instances negInstances) { if (posInstances == null || negInstances == null) { JOptionPane.showMessageDialog(null, "Please load file before computing.", "Error", JOptionPane.ERROR_MESSAGE); return;/*from w w w . j a v a 2 s. c o m*/ } if (posInstances.numAttributes() != negInstances.numAttributes()) { JOptionPane.showMessageDialog(null, "Number of attributes between the two files does not tally.", "Error", JOptionPane.ERROR_MESSAGE); return; } this.scoreList = new ArrayList<Scores>(); this.posInstances = posInstances; this.negInstances = negInstances; Thread thread = new Thread() { public void run() { MessageDialog m = new MessageDialog(null, "Progress", "0%"); int percentCount = posInstances.numAttributes() / 100; if (percentCount == 0) percentCount = 1; for (int x = 0; x < posInstances.numAttributes(); x++) { if (x % percentCount == 0) m.update(x / percentCount + "%"); if (posInstances.attribute(x).isNumeric() == false) { ZscoreTableModel.this.scoreList.add(new Scores(posInstances.attribute(x).name())); continue; } String name = posInstances.attribute(x).name(); double posMean = posInstances.attributeStats(x).numericStats.mean; double posStdDev = posInstances.attributeStats(x).numericStats.stdDev; double negMean = negInstances.attributeStats(x).numericStats.mean; double negStdDev = negInstances.attributeStats(x).numericStats.stdDev; if (negStdDev == 0) negStdDev = 0.01; double totalZScore = 0.0; int numGTZScore0_5 = 0; int numGTZScore1 = 0; int numGTZScore2 = 0; int numGTZScore3 = 0; for (int y = 0; y < posInstances.numInstances(); y++) { double zScore = Math.abs(((posInstances.instance(y).value(x) - negMean) / negStdDev)); totalZScore += zScore; if (zScore > 0.5) numGTZScore0_5++; if (zScore > 1) numGTZScore1++; if (zScore > 2) numGTZScore2++; if (zScore > 3) numGTZScore3++; } double meanZScore = totalZScore / posInstances.numInstances(); double percentGTZScore0_5 = (numGTZScore0_5 * 100) / posInstances.numInstances(); double percentGTZScore1 = (numGTZScore1 * 100) / posInstances.numInstances(); double percentGTZScore2 = (numGTZScore2 * 100) / posInstances.numInstances(); double percentGTZScore3 = (numGTZScore3 * 100) / posInstances.numInstances(); ZscoreTableModel.this.scoreList .add(new Scores(name, posMean, posStdDev, negMean, negStdDev, meanZScore, percentGTZScore0_5, percentGTZScore1, percentGTZScore2, percentGTZScore3, -1)); } try { Instances instances = new Instances(posInstances); for (int x = 0; x < negInstances.numInstances(); x++) instances.add(negInstances.instance(x)); instances.setClassIndex(instances.numAttributes() - 1); //Evaluate the attributes individually and obtain the gainRatio GainRatioAttributeEval gainRatio = new GainRatioAttributeEval(); if (instances.numAttributes() > 0) { gainRatio.buildEvaluator(instances); } for (int x = 0; x < (instances.numAttributes() - 1); x++) { ZscoreTableModel.this.scoreList.get(x).setGainRatio(gainRatio.evaluateAttribute(x)); } } catch (Exception e) { e.printStackTrace(); } Collections.sort(ZscoreTableModel.this.scoreList, new SortByMeanZScore()); fireTableDataChanged(); m.dispose(); ZscoreTableModel.this.label.setText("" + ZscoreTableModel.this.scoreList.size()); } }; thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority thread.start(); }