List of usage examples for weka.attributeSelection GainRatioAttributeEval evaluateAttribute
@Override public double evaluateAttribute(int attribute) throws Exception
From source file:org.uclab.mm.kcl.ddkat.dataselector.FeatureEvaluator.java
License:Apache License
/** * Constructor to instantiate a new FeatureEvaluator object. * * @param json the data string// ww w.j a v a2 s . co m * @param data the data set * @throws Exception the exception */ public FeatureEvaluator(String json, Instances data) throws Exception { // public FeatureEvaluator(String json, Instances data, String filePath) throws Exception { this.featureTitles = new ArrayList<String>(); this.featureScores = new ArrayList<Double>(); this.featureWeights = new ArrayList<Double>(); this.featurePriorities = new ArrayList<Double>(); OrderedJSONObject jsonObject = new OrderedJSONObject(json.toString()); JSONArray jsontokenArray = jsonObject.getJSONArray("unprocessed_data"); String csvString = ""; String str; for (int i = 0; i < jsontokenArray.length(); i++) { str = jsontokenArray.get(i).toString(); str = str.substring(1, str.length() - 1); csvString += str + "\n"; } String filePath = BASE_DIR + "FeaturesEvaluationDataSet.csv"; File file = new File(filePath); // if file does not exists, then create it if (!file.exists()) file.createNewFile(); FileUtils.writeStringToFile(file, csvString); CSVLoader loader = new CSVLoader(); loader.setSource(new File(filePath)); data = loader.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); int numUnlabeledAttributes = data.numAttributes() - 1; double[] minmaxValues = new double[2]; double min, max; String[] options = new String[1]; options[0] = "-T -1.7976931348623157E308 -N -1"; // confidenceFactor = 0.25, minNumObject = 2 Ranker atrank = new Ranker(); atrank.setOptions(options); weka.attributeSelection.AttributeSelection atsel = new weka.attributeSelection.AttributeSelection(); // Information Gain Attribute Evaluator InfoGainAttributeEval infoGainAttrEval = new InfoGainAttributeEval(); atsel.setEvaluator(infoGainAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] infoGainRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { infoGainRanks[i] = Math.round(10000 * infoGainAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(infoGainRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledInfoGainRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledInfoGainRanks[i] = Math.round(10000 * ((infoGainRanks[i] - min) / (max - min))) / 10000d; } // Gain Ratio Attribute Evaluator GainRatioAttributeEval gainRatioAttrEval = new GainRatioAttributeEval(); atsel.setEvaluator(gainRatioAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] gainRatioRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { gainRatioRanks[i] = Math.round(10000 * gainRatioAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(gainRatioRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledGainRatioRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledGainRatioRanks[i] = Math.round(10000 * ((gainRatioRanks[i] - min) / (max - min))) / 10000d; } // Chi Squared Attribute Evaluator ChiSquaredAttributeEval chiSquaredAttrEval = new ChiSquaredAttributeEval(); atsel.setEvaluator(chiSquaredAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] chiSquaredRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { chiSquaredRanks[i] = Math.round(10000 * chiSquaredAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(chiSquaredRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledChiSquaredRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledChiSquaredRanks[i] = Math.round(10000 * ((chiSquaredRanks[i] - min) / (max - min))) / 10000d; } // Symmetrical Uncert Attribute Evaluator SymmetricalUncertAttributeEval symmetricalUncertAttrEval = new SymmetricalUncertAttributeEval(); atsel.setEvaluator(symmetricalUncertAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] symmetricalUncertRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { symmetricalUncertRanks[i] = Math.round(10000 * symmetricalUncertAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(symmetricalUncertRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledSymmetricalUncertRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledSymmetricalUncertRanks[i] = Math.round(10000 * ((symmetricalUncertRanks[i] - min) / (max - min))) / 10000d; } // Significance Attribute Evaluator SignificanceAttributeEval significanceAttrEval = new SignificanceAttributeEval(); atsel.setEvaluator(significanceAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] significanceRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { significanceRanks[i] = Math.round(10000 * significanceAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(significanceRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledSignificanceRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledSignificanceRanks[i] = Math.round(10000 * ((significanceRanks[i] - min) / (max - min))) / 10000d; } double attributeSum; double[] combinedRanks = new double[numUnlabeledAttributes]; double combinedranksSum = 0; for (int i = 0; i < numUnlabeledAttributes; i++) { attributeSum = scaledInfoGainRanks[i] + scaledGainRatioRanks[i] + scaledChiSquaredRanks[i] + scaledSymmetricalUncertRanks[i] + scaledSignificanceRanks[i]; combinedRanks[i] = Math.round(10000 * attributeSum) / 10000d; combinedranksSum = combinedranksSum + combinedRanks[i]; } double[][] tempArray = new double[numUnlabeledAttributes][2]; String[] attributesTitles = new String[numUnlabeledAttributes]; double[] attributesScores = new double[numUnlabeledAttributes]; double[] attributesWeights = new double[numUnlabeledAttributes]; double[] attributesPriorities = new double[numUnlabeledAttributes]; for (int j = 0; j < numUnlabeledAttributes; j++) { tempArray[j][0] = j; tempArray[j][1] = combinedRanks[j]; } double temp; for (int i = 0; i < numUnlabeledAttributes; i++) { for (int j = 1; j < (numUnlabeledAttributes - i); j++) { if (combinedRanks[j - 1] < combinedRanks[j]) { //swap the elements! temp = combinedRanks[j - 1]; combinedRanks[j - 1] = combinedRanks[j]; combinedRanks[j] = temp; } } } for (int j = 0; j < numUnlabeledAttributes; j++) { for (int k = 0; k < numUnlabeledAttributes; k++) { if (combinedRanks[j] == tempArray[k][1]) { attributesTitles[j] = data.attribute((int) tempArray[k][0]).toString(); String res[] = attributesTitles[j].split("\\s+"); attributesTitles[j] = res[1]; this.featureTitles.add(attributesTitles[j]); break; } } attributesScores[j] = Math.round(10000 * (combinedRanks[j] / 9)) / 100d; attributesWeights[j] = Math.round(10000 * (combinedRanks[j] / combinedranksSum)) / 100d; attributesPriorities[j] = Math.round(attributesScores[j] * attributesWeights[j]) / 100d; this.featureScores.add(attributesScores[j]); this.featureWeights.add(attributesWeights[j]); this.featurePriorities.add(attributesPriorities[j]); System.out.println(attributesTitles[j] + " is " + attributesScores[j] + " % Important"); } }
From source file:sirius.misc.zscore.ZscoreTableModel.java
License:Open Source License
public void compute(final Instances posInstances, final Instances negInstances) { if (posInstances == null || negInstances == null) { JOptionPane.showMessageDialog(null, "Please load file before computing.", "Error", JOptionPane.ERROR_MESSAGE); return;/* w w w .ja v a 2s. c o m*/ } if (posInstances.numAttributes() != negInstances.numAttributes()) { JOptionPane.showMessageDialog(null, "Number of attributes between the two files does not tally.", "Error", JOptionPane.ERROR_MESSAGE); return; } this.scoreList = new ArrayList<Scores>(); this.posInstances = posInstances; this.negInstances = negInstances; Thread thread = new Thread() { public void run() { MessageDialog m = new MessageDialog(null, "Progress", "0%"); int percentCount = posInstances.numAttributes() / 100; if (percentCount == 0) percentCount = 1; for (int x = 0; x < posInstances.numAttributes(); x++) { if (x % percentCount == 0) m.update(x / percentCount + "%"); if (posInstances.attribute(x).isNumeric() == false) { ZscoreTableModel.this.scoreList.add(new Scores(posInstances.attribute(x).name())); continue; } String name = posInstances.attribute(x).name(); double posMean = posInstances.attributeStats(x).numericStats.mean; double posStdDev = posInstances.attributeStats(x).numericStats.stdDev; double negMean = negInstances.attributeStats(x).numericStats.mean; double negStdDev = negInstances.attributeStats(x).numericStats.stdDev; if (negStdDev == 0) negStdDev = 0.01; double totalZScore = 0.0; int numGTZScore0_5 = 0; int numGTZScore1 = 0; int numGTZScore2 = 0; int numGTZScore3 = 0; for (int y = 0; y < posInstances.numInstances(); y++) { double zScore = Math.abs(((posInstances.instance(y).value(x) - negMean) / negStdDev)); totalZScore += zScore; if (zScore > 0.5) numGTZScore0_5++; if (zScore > 1) numGTZScore1++; if (zScore > 2) numGTZScore2++; if (zScore > 3) numGTZScore3++; } double meanZScore = totalZScore / posInstances.numInstances(); double percentGTZScore0_5 = (numGTZScore0_5 * 100) / posInstances.numInstances(); double percentGTZScore1 = (numGTZScore1 * 100) / posInstances.numInstances(); double percentGTZScore2 = (numGTZScore2 * 100) / posInstances.numInstances(); double percentGTZScore3 = (numGTZScore3 * 100) / posInstances.numInstances(); ZscoreTableModel.this.scoreList .add(new Scores(name, posMean, posStdDev, negMean, negStdDev, meanZScore, percentGTZScore0_5, percentGTZScore1, percentGTZScore2, percentGTZScore3, -1)); } try { Instances instances = new Instances(posInstances); for (int x = 0; x < negInstances.numInstances(); x++) instances.add(negInstances.instance(x)); instances.setClassIndex(instances.numAttributes() - 1); //Evaluate the attributes individually and obtain the gainRatio GainRatioAttributeEval gainRatio = new GainRatioAttributeEval(); if (instances.numAttributes() > 0) { gainRatio.buildEvaluator(instances); } for (int x = 0; x < (instances.numAttributes() - 1); x++) { ZscoreTableModel.this.scoreList.get(x).setGainRatio(gainRatio.evaluateAttribute(x)); } } catch (Exception e) { e.printStackTrace(); } Collections.sort(ZscoreTableModel.this.scoreList, new SortByMeanZScore()); fireTableDataChanged(); m.dispose(); ZscoreTableModel.this.label.setText("" + ZscoreTableModel.this.scoreList.size()); } }; thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority thread.start(); }