Example usage for weka.attributeSelection InfoGainAttributeEval InfoGainAttributeEval

Introduction

In this page you can find the example usage for weka.attributeSelection InfoGainAttributeEval InfoGainAttributeEval.

Prototype

public InfoGainAttributeEval()

Source Link

Document

Constructor

Usage

From source file:mlda.attributes.AvgGainRatio.java

License:Open Source License

/**
 * Calculate metric value//  ww w  .j a v a2  s . c  om
 * 
 * @param mlData Multi-label dataset to which calculate the metric
 * @return Value of the metric
 */
public double calculate(MultiLabelInstances mlData) {
    double res = 0.0;

    try {
        ASEvaluation ase = new InfoGainAttributeEval();

        BinaryRelevanceAttributeEvaluator eval = new BinaryRelevanceAttributeEvaluator(ase, mlData, "avg",
                "none", "eval");

        int[] featureIndices = mlData.getFeatureIndices();

        for (int i : featureIndices) {
            res += eval.evaluateAttribute(i);
        }

        res = res / featureIndices.length;
    } catch (Exception e) {
        e.printStackTrace();
        res = Double.NaN;
    }

    this.value = res;
    return value;
}

From source file:mulan.experiments.ENTCS13FeatureSelection.java

License:Open Source License

/**
 * Initiates {@link weka.attributeSelection.ASEvaluation} given by a Weka
 * feature importance measure and a Mulan approach to deal with
 * {@link MultiLabelInstances}//from   www . jav  a 2 s.  c om
 *
 * @param multiLabelFeatureSelectionMethod name of the multi-label feature
 * selection method ("RF-BR", "RF-LP", "IG-BR", "IG-LP)
 * @param dataSet original dataset with all features. This dataset should
 * not have any feature/label named "class"
 * @return an initialized {@link weka.attributeSelection.ASEvaluation} to
 * perform multi-label feature selection
 */
public static ASEvaluation buildMultiLabelFeatureSelection(String multiLabelFeatureSelectionMethod,
        MultiLabelInstances dataSet) {
    if (multiLabelFeatureSelectionMethod.equalsIgnoreCase("RFBR")
            || multiLabelFeatureSelectionMethod.equalsIgnoreCase("RF-BR")) {
        return new BinaryRelevanceAttributeEvaluator(new ReliefFAttributeEval(), dataSet, "avg", "none",
                "eval");
    } else if (multiLabelFeatureSelectionMethod.equalsIgnoreCase("RFLP")
            || multiLabelFeatureSelectionMethod.equalsIgnoreCase("RF-LP")) {
        return new LabelPowersetAttributeEvaluator(new ReliefFAttributeEval(), dataSet);
    } else if (multiLabelFeatureSelectionMethod.equalsIgnoreCase("IGBR")
            || multiLabelFeatureSelectionMethod.equalsIgnoreCase("IG-BR")) {
        return new BinaryRelevanceAttributeEvaluator(new InfoGainAttributeEval(), dataSet, "avg", "none",
                "eval");
    } else if (multiLabelFeatureSelectionMethod.equalsIgnoreCase("IGLP")
            || multiLabelFeatureSelectionMethod.equalsIgnoreCase("IG-LP")) {
        return new LabelPowersetAttributeEvaluator(new InfoGainAttributeEval(), dataSet);
    }
    System.out.println("multiLabelFeatureSelectionMethod should be set on one of the allowed values");
    System.exit(1);
    return null;
}

From source file:net.semanticmetadata.lire.classifiers.HashingSearchBasedClassifierMod.java

License:Open Source License

private static HashMap<String, Double> calculateInformationGain(String wekaFileLocation,
        double[] featureInformationGain, int featureSpace[], HashMap<String, Integer> featureSpaceHashMap,
        ArrayList<String> featureOrder, HashMap<String, Double> featureInformationGainHashMap) {

    Instances data = null;// w  ww.  j  av  a  2 s.  c om
    try {
        data = new Instances(new BufferedReader(new FileReader(wekaFileLocation)));
    } catch (IOException e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    AttributeSelection attsel = new AttributeSelection(); // package weka.attributeSelection!
    InfoGainAttributeEval eval = new InfoGainAttributeEval();
    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(-1);
    search.setGenerateRanking(true);
    attsel.setEvaluator(eval);
    attsel.setSearch(search);
    try {

        attsel.SelectAttributes(data);
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    // obtain the attribute indices that were selected
    int[] indices = new int[0];
    double[][] rankedAttribuesArray = new double[0][0];
    try {
        rankedAttribuesArray = attsel.rankedAttributes();
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    try {
        indices = attsel.selectedAttributes();
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }

    for (int i = 0; i < rankedAttribuesArray.length; i++) {

        int currentFeature = Integer.parseInt(data.attribute((int) rankedAttribuesArray[i][0]).name()
                .substring(0, data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_")));
        //System.out.println("DDDDDDDDDDDDDD"+currentFeature);
        // System.out.print(data.attribute((int) rankedAttribuesArray[i][0]).name() + "/" + rankedAttribuesArray[i][0] + "/");
        //     System.out.println(rankedAttribuesArray[i][1]);
        // data.attribute((int) rankedAttribuesArray[i][0]).name().substring(0,data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_"));
        // featureInformationGain[currentFeature] = featureInformationGain[currentFeature] + rankedAttribuesArray[i][1];
        featureInformationGainHashMap.put(featureOrder.get(currentFeature),
                featureInformationGainHashMap.get(featureOrder.get(currentFeature))
                        + rankedAttribuesArray[i][1]);
    }

    //Caalculate the mean of the information gain (better comparable)
    // for (int i = 0; i < featureInformationGain.length; i++) {
    //     featureInformationGain[i] = (featureInformationGain[i] / featureSpace[i]) * 100;
    // }

    //Calculate the mean of the information gain (better comparable)
    for (int i = 0; i < featureOrder.size(); i++) {
        //  featureInformationGainHashMap.put(featureOrder.get(i), (featureInformationGainHashMap.get(featureOrder.get(i)) / featureSpaceHashMap.get(featureOrder.get(i))) * 100);
        featureInformationGainHashMap.put(featureOrder.get(i),
                (featureInformationGainHashMap.get(featureOrder.get(i))));
    }

    // for(int i=0;i<0;i++){
    //     System.out.println(data.attribute(indices[i]).toString());
    // }
    System.out.println("Scoring finished, starting with classification! Scores: ");
    for (int i = 0; i < featureOrder.size(); i++) {
        System.out.println(featureOrder.get(i) + " " + featureInformationGainHashMap.get(featureOrder.get(i)));
        // featureInformationGainHashMap.put(featureOrder.get(i),(featureInformationGainHashMap.get(featureOrder.get(i))/featureSpaceHashMap.get(featureOrder.get(i)))*100);
    }
    // return featureInformationGain;
    File deleteFile = new File(wekaFileLocation);
    deleteFile.delete();
    return featureInformationGainHashMap;
}

From source file:org.uclab.mm.kcl.ddkat.dataselector.FeatureEvaluator.java

License:Apache License

/**
 * Constructor to instantiate a new FeatureEvaluator object.
 *
 * @param json the data string/*from  w  w w.j  a  v  a 2 s.c o  m*/
 * @param data the data set
 * @throws Exception the exception
 */

public FeatureEvaluator(String json, Instances data) throws Exception {
    //   public FeatureEvaluator(String json, Instances data, String filePath) throws Exception {

    this.featureTitles = new ArrayList<String>();
    this.featureScores = new ArrayList<Double>();
    this.featureWeights = new ArrayList<Double>();
    this.featurePriorities = new ArrayList<Double>();

    OrderedJSONObject jsonObject = new OrderedJSONObject(json.toString());
    JSONArray jsontokenArray = jsonObject.getJSONArray("unprocessed_data");
    String csvString = "";
    String str;
    for (int i = 0; i < jsontokenArray.length(); i++) {
        str = jsontokenArray.get(i).toString();
        str = str.substring(1, str.length() - 1);
        csvString += str + "\n";
    }

    String filePath = BASE_DIR + "FeaturesEvaluationDataSet.csv";
    File file = new File(filePath);
    // if file does not exists, then create it
    if (!file.exists())
        file.createNewFile();

    FileUtils.writeStringToFile(file, csvString);

    CSVLoader loader = new CSVLoader();
    loader.setSource(new File(filePath));
    data = loader.getDataSet();

    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    int numUnlabeledAttributes = data.numAttributes() - 1;
    double[] minmaxValues = new double[2];
    double min, max;

    String[] options = new String[1];
    options[0] = "-T -1.7976931348623157E308 -N -1"; // confidenceFactor = 0.25, minNumObject = 2
    Ranker atrank = new Ranker();
    atrank.setOptions(options);

    weka.attributeSelection.AttributeSelection atsel = new weka.attributeSelection.AttributeSelection();

    //  Information Gain Attribute Evaluator
    InfoGainAttributeEval infoGainAttrEval = new InfoGainAttributeEval();
    atsel.setEvaluator(infoGainAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] infoGainRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        infoGainRanks[i] = Math.round(10000 * infoGainAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(infoGainRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledInfoGainRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledInfoGainRanks[i] = Math.round(10000 * ((infoGainRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Gain Ratio Attribute Evaluator
    GainRatioAttributeEval gainRatioAttrEval = new GainRatioAttributeEval();
    atsel.setEvaluator(gainRatioAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] gainRatioRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        gainRatioRanks[i] = Math.round(10000 * gainRatioAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(gainRatioRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledGainRatioRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledGainRatioRanks[i] = Math.round(10000 * ((gainRatioRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Chi Squared Attribute Evaluator
    ChiSquaredAttributeEval chiSquaredAttrEval = new ChiSquaredAttributeEval();
    atsel.setEvaluator(chiSquaredAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] chiSquaredRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        chiSquaredRanks[i] = Math.round(10000 * chiSquaredAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(chiSquaredRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledChiSquaredRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledChiSquaredRanks[i] = Math.round(10000 * ((chiSquaredRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Symmetrical Uncert Attribute Evaluator
    SymmetricalUncertAttributeEval symmetricalUncertAttrEval = new SymmetricalUncertAttributeEval();
    atsel.setEvaluator(symmetricalUncertAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] symmetricalUncertRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        symmetricalUncertRanks[i] = Math.round(10000 * symmetricalUncertAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(symmetricalUncertRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledSymmetricalUncertRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledSymmetricalUncertRanks[i] = Math.round(10000 * ((symmetricalUncertRanks[i] - min) / (max - min)))
                / 10000d;
    }

    //  Significance Attribute Evaluator
    SignificanceAttributeEval significanceAttrEval = new SignificanceAttributeEval();
    atsel.setEvaluator(significanceAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] significanceRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        significanceRanks[i] = Math.round(10000 * significanceAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(significanceRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledSignificanceRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledSignificanceRanks[i] = Math.round(10000 * ((significanceRanks[i] - min) / (max - min))) / 10000d;
    }

    double attributeSum;

    double[] combinedRanks = new double[numUnlabeledAttributes];
    double combinedranksSum = 0;

    for (int i = 0; i < numUnlabeledAttributes; i++) {
        attributeSum = scaledInfoGainRanks[i] + scaledGainRatioRanks[i] + scaledChiSquaredRanks[i]
                + scaledSymmetricalUncertRanks[i] + scaledSignificanceRanks[i];
        combinedRanks[i] = Math.round(10000 * attributeSum) / 10000d;
        combinedranksSum = combinedranksSum + combinedRanks[i];
    }

    double[][] tempArray = new double[numUnlabeledAttributes][2];
    String[] attributesTitles = new String[numUnlabeledAttributes];
    double[] attributesScores = new double[numUnlabeledAttributes];
    double[] attributesWeights = new double[numUnlabeledAttributes];
    double[] attributesPriorities = new double[numUnlabeledAttributes];

    for (int j = 0; j < numUnlabeledAttributes; j++) {
        tempArray[j][0] = j;
        tempArray[j][1] = combinedRanks[j];
    }

    double temp;
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        for (int j = 1; j < (numUnlabeledAttributes - i); j++) {
            if (combinedRanks[j - 1] < combinedRanks[j]) {
                //swap the elements!
                temp = combinedRanks[j - 1];
                combinedRanks[j - 1] = combinedRanks[j];
                combinedRanks[j] = temp;
            }
        }
    }

    for (int j = 0; j < numUnlabeledAttributes; j++) {
        for (int k = 0; k < numUnlabeledAttributes; k++) {
            if (combinedRanks[j] == tempArray[k][1]) {
                attributesTitles[j] = data.attribute((int) tempArray[k][0]).toString();
                String res[] = attributesTitles[j].split("\\s+");
                attributesTitles[j] = res[1];

                this.featureTitles.add(attributesTitles[j]);
                break;
            }
        }
        attributesScores[j] = Math.round(10000 * (combinedRanks[j] / 9)) / 100d;
        attributesWeights[j] = Math.round(10000 * (combinedRanks[j] / combinedranksSum)) / 100d;
        attributesPriorities[j] = Math.round(attributesScores[j] * attributesWeights[j]) / 100d;
        this.featureScores.add(attributesScores[j]);
        this.featureWeights.add(attributesWeights[j]);
        this.featurePriorities.add(attributesPriorities[j]);

        System.out.println(attributesTitles[j] + " is " + attributesScores[j] + " % Important");
    }

}

From source file:trabfs.machineLeaningFrameWork.core.Problema.java

public double[] getAttributeQuality() {
    try {/*  ww w . j  a v  a  2  s .  com*/

        ASEvaluation[] filters = { new InfoGainAttributeEval(), new ChiSquaredAttributeEval(),
                new ReliefFAttributeEval() };
        R = new double[data.numAttributes() - 1][filters.length];
        Ranker rk = new Ranker();
        AttributeSelection selec = new AttributeSelection();
        selec.setSearch(rk);

        for (int j = 0; j < filters.length; j++) {
            selec.setEvaluator(filters[j]);
            selec.SelectAttributes(data);
            double[][] full = selec.rankedAttributes();
            //double[] r = new double[full.length];

            Arrays.sort(full, new Comparator() {
                @Override
                public int compare(Object t, Object t1) {
                    double[] a1 = (double[]) t;
                    double[] a2 = (double[]) t1;
                    if (a1[0] > a2[0])
                        return 1;
                    else if (a1[0] < a2[0])
                        return -1;
                    else
                        return 0;
                }
            });

            double max = Double.NEGATIVE_INFINITY, min = Double.POSITIVE_INFINITY;
            for (int i = 0; i < full.length; i++) {
                if (full[i][1] < min)
                    min = full[i][1];
                if (full[i][1] > max)
                    max = full[i][1];
            }

            // armazena
            for (int i = 0; i < full.length; i++) {
                R[i][j] = (full[i][1] - min) / (max - min);
            }
        }

        double[] Rfinal = new double[data.numAttributes() - 1];
        double SW = 1.0f;
        for (int i = 0; i < Rfinal.length; i++) {
            Rfinal[i] = somaWK(i) / 3.0f;
        }

        return Rfinal;
    } catch (Exception ex) {
        Logger.getLogger(Problema.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}