List of usage examples for weka.attributeSelection GainRatioAttributeEval GainRatioAttributeEval
public GainRatioAttributeEval()
From source file:FeatureSelectionClass.java
public AttributeSelection withInfoGain(String path) throws Exception { int N;//from w w w .ja va 2 s. com PreparingSteps pr = new PreparingSteps(); N = pr.getReadFileData(path).numAttributes(); Instances data = pr.getReadFileData(path); AttributeSelection selector = new AttributeSelection(); GainRatioAttributeEval evaluator = new GainRatioAttributeEval(); Ranker ranker = new Ranker(); ranker.setNumToSelect(Math.min(500, N - 1)); selector.setEvaluator(evaluator); selector.setSearch(ranker); selector.SelectAttributes(data); return selector; }
From source file:mulan.examples.InformationGainDimensionalityReduction.java
License:Open Source License
public static void main(String[] args) throws Exception { String path = Utils.getOption("path", args); String filestem = Utils.getOption("filestem", args); MultiLabelInstances mlData = new MultiLabelInstances(path + filestem + ".arff", path + filestem + ".xml"); String attributesToKeep = Utils.getOption("numattribs", args); final int NUM_TO_KEEP = Integer.parseInt(attributesToKeep); ASEvaluation ase = new GainRatioAttributeEval(); BinaryRelevanceAttributeEvaluator ae = new BinaryRelevanceAttributeEvaluator(ase, mlData, "max", "dl", "eval"); System.out.println(mlData.getDataSet().numAttributes()); if (NUM_TO_KEEP == 0) { for (int i = 0; i < mlData.getFeatureIndices().length; i++) { System.out//from ww w . j av a 2s. c om .println("Attribute " + mlData.getDataSet().attribute(mlData.getFeatureIndices()[i]).name() + " : " + ae.evaluateAttribute( mlData.getDataSet().attribute(mlData.getFeatureIndices()[i]).index())); } } else { Ranker r = new Ranker(); int[] result = r.search(ae, mlData); System.out.println(Arrays.toString(result)); int[] toKeep = new int[NUM_TO_KEEP + mlData.getNumLabels()]; System.arraycopy(result, 0, toKeep, 0, NUM_TO_KEEP); int[] labelIndices = mlData.getLabelIndices(); System.arraycopy(labelIndices, 0, toKeep, NUM_TO_KEEP, mlData.getNumLabels()); Remove filterRemove = new Remove(); filterRemove.setAttributeIndicesArray(toKeep); filterRemove.setInvertSelection(true); filterRemove.setInputFormat(mlData.getDataSet()); Instances filtered = Filter.useFilter(mlData.getDataSet(), filterRemove); MultiLabelInstances mlFiltered = new MultiLabelInstances(filtered, mlData.getLabelsMetaData()); System.out.println("\n\n\n\n" + mlFiltered.getDataSet()); } // You can now work on the reduced multi-label dataset mlFiltered }
From source file:org.uclab.mm.kcl.ddkat.dataselector.FeatureEvaluator.java
License:Apache License
/** * Constructor to instantiate a new FeatureEvaluator object. * * @param json the data string//from www . java2 s .c o m * @param data the data set * @throws Exception the exception */ public FeatureEvaluator(String json, Instances data) throws Exception { // public FeatureEvaluator(String json, Instances data, String filePath) throws Exception { this.featureTitles = new ArrayList<String>(); this.featureScores = new ArrayList<Double>(); this.featureWeights = new ArrayList<Double>(); this.featurePriorities = new ArrayList<Double>(); OrderedJSONObject jsonObject = new OrderedJSONObject(json.toString()); JSONArray jsontokenArray = jsonObject.getJSONArray("unprocessed_data"); String csvString = ""; String str; for (int i = 0; i < jsontokenArray.length(); i++) { str = jsontokenArray.get(i).toString(); str = str.substring(1, str.length() - 1); csvString += str + "\n"; } String filePath = BASE_DIR + "FeaturesEvaluationDataSet.csv"; File file = new File(filePath); // if file does not exists, then create it if (!file.exists()) file.createNewFile(); FileUtils.writeStringToFile(file, csvString); CSVLoader loader = new CSVLoader(); loader.setSource(new File(filePath)); data = loader.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); int numUnlabeledAttributes = data.numAttributes() - 1; double[] minmaxValues = new double[2]; double min, max; String[] options = new String[1]; options[0] = "-T -1.7976931348623157E308 -N -1"; // confidenceFactor = 0.25, minNumObject = 2 Ranker atrank = new Ranker(); atrank.setOptions(options); weka.attributeSelection.AttributeSelection atsel = new weka.attributeSelection.AttributeSelection(); // Information Gain Attribute Evaluator InfoGainAttributeEval infoGainAttrEval = new InfoGainAttributeEval(); atsel.setEvaluator(infoGainAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] infoGainRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { infoGainRanks[i] = Math.round(10000 * infoGainAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(infoGainRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledInfoGainRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledInfoGainRanks[i] = Math.round(10000 * ((infoGainRanks[i] - min) / (max - min))) / 10000d; } // Gain Ratio Attribute Evaluator GainRatioAttributeEval gainRatioAttrEval = new GainRatioAttributeEval(); atsel.setEvaluator(gainRatioAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] gainRatioRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { gainRatioRanks[i] = Math.round(10000 * gainRatioAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(gainRatioRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledGainRatioRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledGainRatioRanks[i] = Math.round(10000 * ((gainRatioRanks[i] - min) / (max - min))) / 10000d; } // Chi Squared Attribute Evaluator ChiSquaredAttributeEval chiSquaredAttrEval = new ChiSquaredAttributeEval(); atsel.setEvaluator(chiSquaredAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] chiSquaredRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { chiSquaredRanks[i] = Math.round(10000 * chiSquaredAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(chiSquaredRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledChiSquaredRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledChiSquaredRanks[i] = Math.round(10000 * ((chiSquaredRanks[i] - min) / (max - min))) / 10000d; } // Symmetrical Uncert Attribute Evaluator SymmetricalUncertAttributeEval symmetricalUncertAttrEval = new SymmetricalUncertAttributeEval(); atsel.setEvaluator(symmetricalUncertAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] symmetricalUncertRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { symmetricalUncertRanks[i] = Math.round(10000 * symmetricalUncertAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(symmetricalUncertRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledSymmetricalUncertRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledSymmetricalUncertRanks[i] = Math.round(10000 * ((symmetricalUncertRanks[i] - min) / (max - min))) / 10000d; } // Significance Attribute Evaluator SignificanceAttributeEval significanceAttrEval = new SignificanceAttributeEval(); atsel.setEvaluator(significanceAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] significanceRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { significanceRanks[i] = Math.round(10000 * significanceAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(significanceRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledSignificanceRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledSignificanceRanks[i] = Math.round(10000 * ((significanceRanks[i] - min) / (max - min))) / 10000d; } double attributeSum; double[] combinedRanks = new double[numUnlabeledAttributes]; double combinedranksSum = 0; for (int i = 0; i < numUnlabeledAttributes; i++) { attributeSum = scaledInfoGainRanks[i] + scaledGainRatioRanks[i] + scaledChiSquaredRanks[i] + scaledSymmetricalUncertRanks[i] + scaledSignificanceRanks[i]; combinedRanks[i] = Math.round(10000 * attributeSum) / 10000d; combinedranksSum = combinedranksSum + combinedRanks[i]; } double[][] tempArray = new double[numUnlabeledAttributes][2]; String[] attributesTitles = new String[numUnlabeledAttributes]; double[] attributesScores = new double[numUnlabeledAttributes]; double[] attributesWeights = new double[numUnlabeledAttributes]; double[] attributesPriorities = new double[numUnlabeledAttributes]; for (int j = 0; j < numUnlabeledAttributes; j++) { tempArray[j][0] = j; tempArray[j][1] = combinedRanks[j]; } double temp; for (int i = 0; i < numUnlabeledAttributes; i++) { for (int j = 1; j < (numUnlabeledAttributes - i); j++) { if (combinedRanks[j - 1] < combinedRanks[j]) { //swap the elements! temp = combinedRanks[j - 1]; combinedRanks[j - 1] = combinedRanks[j]; combinedRanks[j] = temp; } } } for (int j = 0; j < numUnlabeledAttributes; j++) { for (int k = 0; k < numUnlabeledAttributes; k++) { if (combinedRanks[j] == tempArray[k][1]) { attributesTitles[j] = data.attribute((int) tempArray[k][0]).toString(); String res[] = attributesTitles[j].split("\\s+"); attributesTitles[j] = res[1]; this.featureTitles.add(attributesTitles[j]); break; } } attributesScores[j] = Math.round(10000 * (combinedRanks[j] / 9)) / 100d; attributesWeights[j] = Math.round(10000 * (combinedRanks[j] / combinedranksSum)) / 100d; attributesPriorities[j] = Math.round(attributesScores[j] * attributesWeights[j]) / 100d; this.featureScores.add(attributesScores[j]); this.featureWeights.add(attributesWeights[j]); this.featurePriorities.add(attributesPriorities[j]); System.out.println(attributesTitles[j] + " is " + attributesScores[j] + " % Important"); } }
From source file:sirius.misc.zscore.ZscoreTableModel.java
License:Open Source License
public void compute(final Instances posInstances, final Instances negInstances) { if (posInstances == null || negInstances == null) { JOptionPane.showMessageDialog(null, "Please load file before computing.", "Error", JOptionPane.ERROR_MESSAGE); return;/*w w w . java 2 s . com*/ } if (posInstances.numAttributes() != negInstances.numAttributes()) { JOptionPane.showMessageDialog(null, "Number of attributes between the two files does not tally.", "Error", JOptionPane.ERROR_MESSAGE); return; } this.scoreList = new ArrayList<Scores>(); this.posInstances = posInstances; this.negInstances = negInstances; Thread thread = new Thread() { public void run() { MessageDialog m = new MessageDialog(null, "Progress", "0%"); int percentCount = posInstances.numAttributes() / 100; if (percentCount == 0) percentCount = 1; for (int x = 0; x < posInstances.numAttributes(); x++) { if (x % percentCount == 0) m.update(x / percentCount + "%"); if (posInstances.attribute(x).isNumeric() == false) { ZscoreTableModel.this.scoreList.add(new Scores(posInstances.attribute(x).name())); continue; } String name = posInstances.attribute(x).name(); double posMean = posInstances.attributeStats(x).numericStats.mean; double posStdDev = posInstances.attributeStats(x).numericStats.stdDev; double negMean = negInstances.attributeStats(x).numericStats.mean; double negStdDev = negInstances.attributeStats(x).numericStats.stdDev; if (negStdDev == 0) negStdDev = 0.01; double totalZScore = 0.0; int numGTZScore0_5 = 0; int numGTZScore1 = 0; int numGTZScore2 = 0; int numGTZScore3 = 0; for (int y = 0; y < posInstances.numInstances(); y++) { double zScore = Math.abs(((posInstances.instance(y).value(x) - negMean) / negStdDev)); totalZScore += zScore; if (zScore > 0.5) numGTZScore0_5++; if (zScore > 1) numGTZScore1++; if (zScore > 2) numGTZScore2++; if (zScore > 3) numGTZScore3++; } double meanZScore = totalZScore / posInstances.numInstances(); double percentGTZScore0_5 = (numGTZScore0_5 * 100) / posInstances.numInstances(); double percentGTZScore1 = (numGTZScore1 * 100) / posInstances.numInstances(); double percentGTZScore2 = (numGTZScore2 * 100) / posInstances.numInstances(); double percentGTZScore3 = (numGTZScore3 * 100) / posInstances.numInstances(); ZscoreTableModel.this.scoreList .add(new Scores(name, posMean, posStdDev, negMean, negStdDev, meanZScore, percentGTZScore0_5, percentGTZScore1, percentGTZScore2, percentGTZScore3, -1)); } try { Instances instances = new Instances(posInstances); for (int x = 0; x < negInstances.numInstances(); x++) instances.add(negInstances.instance(x)); instances.setClassIndex(instances.numAttributes() - 1); //Evaluate the attributes individually and obtain the gainRatio GainRatioAttributeEval gainRatio = new GainRatioAttributeEval(); if (instances.numAttributes() > 0) { gainRatio.buildEvaluator(instances); } for (int x = 0; x < (instances.numAttributes() - 1); x++) { ZscoreTableModel.this.scoreList.get(x).setGainRatio(gainRatio.evaluateAttribute(x)); } } catch (Exception e) { e.printStackTrace(); } Collections.sort(ZscoreTableModel.this.scoreList, new SortByMeanZScore()); fireTableDataChanged(); m.dispose(); ZscoreTableModel.this.label.setText("" + ZscoreTableModel.this.scoreList.size()); } }; thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority thread.start(); }
From source file:tutorials.featureselection.TutorialWekaAttributeSelection.java
License:Open Source License
public static void main(String[] args) throws IOException { /* Load data */ Dataset data = FileHandler.loadDataset(new File("devtools/data/iris.data"), 4, ","); /* Create a AS Evaluation algorithm */ ASEvaluation eval = new GainRatioAttributeEval(); /* Create a Weka's AS Search algorithm */ ASSearch search = new Ranker(); /* Wrap Wekas' Algorithms in bridge */ WekaAttributeSelection wekaattrsel = new WekaAttributeSelection(eval, search); /*//ww w . j a v a 2s.c o m * to apply algorithm to the data set and generate the new data based on * the given parameters */ wekaattrsel.build(data); /* to retrieve the number of attributes */ System.out.println("Total number of attributes: " + wekaattrsel.noAttributes()); /* to display all the rank and score for each attribute */ for (int i = 0; i < wekaattrsel.noAttributes() - 1; i++) { System.out.println( "Attribute " + i + " Ranks " + wekaattrsel.rank(i) + " and Scores " + wekaattrsel.score(i)); } }