List of usage examples for weka.attributeSelection InfoGainAttributeEval InfoGainAttributeEval
public InfoGainAttributeEval()
From source file:mlda.attributes.AvgGainRatio.java
License:Open Source License
/** * Calculate metric value// ww w .j a v a2 s . c om * * @param mlData Multi-label dataset to which calculate the metric * @return Value of the metric */ public double calculate(MultiLabelInstances mlData) { double res = 0.0; try { ASEvaluation ase = new InfoGainAttributeEval(); BinaryRelevanceAttributeEvaluator eval = new BinaryRelevanceAttributeEvaluator(ase, mlData, "avg", "none", "eval"); int[] featureIndices = mlData.getFeatureIndices(); for (int i : featureIndices) { res += eval.evaluateAttribute(i); } res = res / featureIndices.length; } catch (Exception e) { e.printStackTrace(); res = Double.NaN; } this.value = res; return value; }
From source file:mulan.experiments.ENTCS13FeatureSelection.java
License:Open Source License
/** * Initiates {@link weka.attributeSelection.ASEvaluation} given by a Weka * feature importance measure and a Mulan approach to deal with * {@link MultiLabelInstances}//from www . jav a 2 s. c om * * @param multiLabelFeatureSelectionMethod name of the multi-label feature * selection method ("RF-BR", "RF-LP", "IG-BR", "IG-LP) * @param dataSet original dataset with all features. This dataset should * not have any feature/label named "class" * @return an initialized {@link weka.attributeSelection.ASEvaluation} to * perform multi-label feature selection */ public static ASEvaluation buildMultiLabelFeatureSelection(String multiLabelFeatureSelectionMethod, MultiLabelInstances dataSet) { if (multiLabelFeatureSelectionMethod.equalsIgnoreCase("RFBR") || multiLabelFeatureSelectionMethod.equalsIgnoreCase("RF-BR")) { return new BinaryRelevanceAttributeEvaluator(new ReliefFAttributeEval(), dataSet, "avg", "none", "eval"); } else if (multiLabelFeatureSelectionMethod.equalsIgnoreCase("RFLP") || multiLabelFeatureSelectionMethod.equalsIgnoreCase("RF-LP")) { return new LabelPowersetAttributeEvaluator(new ReliefFAttributeEval(), dataSet); } else if (multiLabelFeatureSelectionMethod.equalsIgnoreCase("IGBR") || multiLabelFeatureSelectionMethod.equalsIgnoreCase("IG-BR")) { return new BinaryRelevanceAttributeEvaluator(new InfoGainAttributeEval(), dataSet, "avg", "none", "eval"); } else if (multiLabelFeatureSelectionMethod.equalsIgnoreCase("IGLP") || multiLabelFeatureSelectionMethod.equalsIgnoreCase("IG-LP")) { return new LabelPowersetAttributeEvaluator(new InfoGainAttributeEval(), dataSet); } System.out.println("multiLabelFeatureSelectionMethod should be set on one of the allowed values"); System.exit(1); return null; }
From source file:net.semanticmetadata.lire.classifiers.HashingSearchBasedClassifierMod.java
License:Open Source License
private static HashMap<String, Double> calculateInformationGain(String wekaFileLocation, double[] featureInformationGain, int featureSpace[], HashMap<String, Integer> featureSpaceHashMap, ArrayList<String> featureOrder, HashMap<String, Double> featureInformationGainHashMap) { Instances data = null;// w ww. j av a 2 s. c om try { data = new Instances(new BufferedReader(new FileReader(wekaFileLocation))); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } AttributeSelection attsel = new AttributeSelection(); // package weka.attributeSelection! InfoGainAttributeEval eval = new InfoGainAttributeEval(); Ranker search = new Ranker(); search.setThreshold(-1.7976931348623157E308); search.setNumToSelect(-1); search.setGenerateRanking(true); attsel.setEvaluator(eval); attsel.setSearch(search); try { attsel.SelectAttributes(data); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } // obtain the attribute indices that were selected int[] indices = new int[0]; double[][] rankedAttribuesArray = new double[0][0]; try { rankedAttribuesArray = attsel.rankedAttributes(); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } try { indices = attsel.selectedAttributes(); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } for (int i = 0; i < rankedAttribuesArray.length; i++) { int currentFeature = Integer.parseInt(data.attribute((int) rankedAttribuesArray[i][0]).name() .substring(0, data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_"))); //System.out.println("DDDDDDDDDDDDDD"+currentFeature); // System.out.print(data.attribute((int) rankedAttribuesArray[i][0]).name() + "/" + rankedAttribuesArray[i][0] + "/"); // System.out.println(rankedAttribuesArray[i][1]); // data.attribute((int) rankedAttribuesArray[i][0]).name().substring(0,data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_")); // featureInformationGain[currentFeature] = featureInformationGain[currentFeature] + rankedAttribuesArray[i][1]; featureInformationGainHashMap.put(featureOrder.get(currentFeature), featureInformationGainHashMap.get(featureOrder.get(currentFeature)) + rankedAttribuesArray[i][1]); } //Caalculate the mean of the information gain (better comparable) // for (int i = 0; i < featureInformationGain.length; i++) { // featureInformationGain[i] = (featureInformationGain[i] / featureSpace[i]) * 100; // } //Calculate the mean of the information gain (better comparable) for (int i = 0; i < featureOrder.size(); i++) { // featureInformationGainHashMap.put(featureOrder.get(i), (featureInformationGainHashMap.get(featureOrder.get(i)) / featureSpaceHashMap.get(featureOrder.get(i))) * 100); featureInformationGainHashMap.put(featureOrder.get(i), (featureInformationGainHashMap.get(featureOrder.get(i)))); } // for(int i=0;i<0;i++){ // System.out.println(data.attribute(indices[i]).toString()); // } System.out.println("Scoring finished, starting with classification! Scores: "); for (int i = 0; i < featureOrder.size(); i++) { System.out.println(featureOrder.get(i) + " " + featureInformationGainHashMap.get(featureOrder.get(i))); // featureInformationGainHashMap.put(featureOrder.get(i),(featureInformationGainHashMap.get(featureOrder.get(i))/featureSpaceHashMap.get(featureOrder.get(i)))*100); } // return featureInformationGain; File deleteFile = new File(wekaFileLocation); deleteFile.delete(); return featureInformationGainHashMap; }
From source file:org.uclab.mm.kcl.ddkat.dataselector.FeatureEvaluator.java
License:Apache License
/** * Constructor to instantiate a new FeatureEvaluator object. * * @param json the data string/*from w w w.j a v a 2 s.c o m*/ * @param data the data set * @throws Exception the exception */ public FeatureEvaluator(String json, Instances data) throws Exception { // public FeatureEvaluator(String json, Instances data, String filePath) throws Exception { this.featureTitles = new ArrayList<String>(); this.featureScores = new ArrayList<Double>(); this.featureWeights = new ArrayList<Double>(); this.featurePriorities = new ArrayList<Double>(); OrderedJSONObject jsonObject = new OrderedJSONObject(json.toString()); JSONArray jsontokenArray = jsonObject.getJSONArray("unprocessed_data"); String csvString = ""; String str; for (int i = 0; i < jsontokenArray.length(); i++) { str = jsontokenArray.get(i).toString(); str = str.substring(1, str.length() - 1); csvString += str + "\n"; } String filePath = BASE_DIR + "FeaturesEvaluationDataSet.csv"; File file = new File(filePath); // if file does not exists, then create it if (!file.exists()) file.createNewFile(); FileUtils.writeStringToFile(file, csvString); CSVLoader loader = new CSVLoader(); loader.setSource(new File(filePath)); data = loader.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); int numUnlabeledAttributes = data.numAttributes() - 1; double[] minmaxValues = new double[2]; double min, max; String[] options = new String[1]; options[0] = "-T -1.7976931348623157E308 -N -1"; // confidenceFactor = 0.25, minNumObject = 2 Ranker atrank = new Ranker(); atrank.setOptions(options); weka.attributeSelection.AttributeSelection atsel = new weka.attributeSelection.AttributeSelection(); // Information Gain Attribute Evaluator InfoGainAttributeEval infoGainAttrEval = new InfoGainAttributeEval(); atsel.setEvaluator(infoGainAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] infoGainRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { infoGainRanks[i] = Math.round(10000 * infoGainAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(infoGainRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledInfoGainRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledInfoGainRanks[i] = Math.round(10000 * ((infoGainRanks[i] - min) / (max - min))) / 10000d; } // Gain Ratio Attribute Evaluator GainRatioAttributeEval gainRatioAttrEval = new GainRatioAttributeEval(); atsel.setEvaluator(gainRatioAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] gainRatioRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { gainRatioRanks[i] = Math.round(10000 * gainRatioAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(gainRatioRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledGainRatioRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledGainRatioRanks[i] = Math.round(10000 * ((gainRatioRanks[i] - min) / (max - min))) / 10000d; } // Chi Squared Attribute Evaluator ChiSquaredAttributeEval chiSquaredAttrEval = new ChiSquaredAttributeEval(); atsel.setEvaluator(chiSquaredAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] chiSquaredRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { chiSquaredRanks[i] = Math.round(10000 * chiSquaredAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(chiSquaredRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledChiSquaredRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledChiSquaredRanks[i] = Math.round(10000 * ((chiSquaredRanks[i] - min) / (max - min))) / 10000d; } // Symmetrical Uncert Attribute Evaluator SymmetricalUncertAttributeEval symmetricalUncertAttrEval = new SymmetricalUncertAttributeEval(); atsel.setEvaluator(symmetricalUncertAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] symmetricalUncertRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { symmetricalUncertRanks[i] = Math.round(10000 * symmetricalUncertAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(symmetricalUncertRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledSymmetricalUncertRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledSymmetricalUncertRanks[i] = Math.round(10000 * ((symmetricalUncertRanks[i] - min) / (max - min))) / 10000d; } // Significance Attribute Evaluator SignificanceAttributeEval significanceAttrEval = new SignificanceAttributeEval(); atsel.setEvaluator(significanceAttrEval); atsel.setSearch(atrank); atsel.SelectAttributes(data); double[] significanceRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { significanceRanks[i] = Math.round(10000 * significanceAttrEval.evaluateAttribute(i)) / 10000d; } minmaxValues = computerMinMaxValues(significanceRanks); min = minmaxValues[0]; max = minmaxValues[1]; double[] scaledSignificanceRanks = new double[numUnlabeledAttributes]; for (int i = 0; i < numUnlabeledAttributes; i++) { scaledSignificanceRanks[i] = Math.round(10000 * ((significanceRanks[i] - min) / (max - min))) / 10000d; } double attributeSum; double[] combinedRanks = new double[numUnlabeledAttributes]; double combinedranksSum = 0; for (int i = 0; i < numUnlabeledAttributes; i++) { attributeSum = scaledInfoGainRanks[i] + scaledGainRatioRanks[i] + scaledChiSquaredRanks[i] + scaledSymmetricalUncertRanks[i] + scaledSignificanceRanks[i]; combinedRanks[i] = Math.round(10000 * attributeSum) / 10000d; combinedranksSum = combinedranksSum + combinedRanks[i]; } double[][] tempArray = new double[numUnlabeledAttributes][2]; String[] attributesTitles = new String[numUnlabeledAttributes]; double[] attributesScores = new double[numUnlabeledAttributes]; double[] attributesWeights = new double[numUnlabeledAttributes]; double[] attributesPriorities = new double[numUnlabeledAttributes]; for (int j = 0; j < numUnlabeledAttributes; j++) { tempArray[j][0] = j; tempArray[j][1] = combinedRanks[j]; } double temp; for (int i = 0; i < numUnlabeledAttributes; i++) { for (int j = 1; j < (numUnlabeledAttributes - i); j++) { if (combinedRanks[j - 1] < combinedRanks[j]) { //swap the elements! temp = combinedRanks[j - 1]; combinedRanks[j - 1] = combinedRanks[j]; combinedRanks[j] = temp; } } } for (int j = 0; j < numUnlabeledAttributes; j++) { for (int k = 0; k < numUnlabeledAttributes; k++) { if (combinedRanks[j] == tempArray[k][1]) { attributesTitles[j] = data.attribute((int) tempArray[k][0]).toString(); String res[] = attributesTitles[j].split("\\s+"); attributesTitles[j] = res[1]; this.featureTitles.add(attributesTitles[j]); break; } } attributesScores[j] = Math.round(10000 * (combinedRanks[j] / 9)) / 100d; attributesWeights[j] = Math.round(10000 * (combinedRanks[j] / combinedranksSum)) / 100d; attributesPriorities[j] = Math.round(attributesScores[j] * attributesWeights[j]) / 100d; this.featureScores.add(attributesScores[j]); this.featureWeights.add(attributesWeights[j]); this.featurePriorities.add(attributesPriorities[j]); System.out.println(attributesTitles[j] + " is " + attributesScores[j] + " % Important"); } }
From source file:trabfs.machineLeaningFrameWork.core.Problema.java
public double[] getAttributeQuality() { try {/* ww w . j a v a 2 s . com*/ ASEvaluation[] filters = { new InfoGainAttributeEval(), new ChiSquaredAttributeEval(), new ReliefFAttributeEval() }; R = new double[data.numAttributes() - 1][filters.length]; Ranker rk = new Ranker(); AttributeSelection selec = new AttributeSelection(); selec.setSearch(rk); for (int j = 0; j < filters.length; j++) { selec.setEvaluator(filters[j]); selec.SelectAttributes(data); double[][] full = selec.rankedAttributes(); //double[] r = new double[full.length]; Arrays.sort(full, new Comparator() { @Override public int compare(Object t, Object t1) { double[] a1 = (double[]) t; double[] a2 = (double[]) t1; if (a1[0] > a2[0]) return 1; else if (a1[0] < a2[0]) return -1; else return 0; } }); double max = Double.NEGATIVE_INFINITY, min = Double.POSITIVE_INFINITY; for (int i = 0; i < full.length; i++) { if (full[i][1] < min) min = full[i][1]; if (full[i][1] > max) max = full[i][1]; } // armazena for (int i = 0; i < full.length; i++) { R[i][j] = (full[i][1] - min) / (max - min); } } double[] Rfinal = new double[data.numAttributes() - 1]; double SW = 1.0f; for (int i = 0; i < Rfinal.length; i++) { Rfinal[i] = somaWK(i) / 3.0f; } return Rfinal; } catch (Exception ex) { Logger.getLogger(Problema.class.getName()).log(Level.SEVERE, null, ex); } return null; }