Example usage for weka.attributeSelection GainRatioAttributeEval GainRatioAttributeEval

List of usage examples for weka.attributeSelection GainRatioAttributeEval GainRatioAttributeEval

Introduction

In this page you can find the example usage for weka.attributeSelection GainRatioAttributeEval GainRatioAttributeEval.

Prototype

public GainRatioAttributeEval() 

Source Link

Document

Constructor

Usage

From source file:FeatureSelectionClass.java

public AttributeSelection withInfoGain(String path) throws Exception {
    int N;//from   w w w .ja  va 2  s.  com
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    GainRatioAttributeEval evaluator = new GainRatioAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;
}

From source file:mulan.examples.InformationGainDimensionalityReduction.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String path = Utils.getOption("path", args);
    String filestem = Utils.getOption("filestem", args);
    MultiLabelInstances mlData = new MultiLabelInstances(path + filestem + ".arff", path + filestem + ".xml");
    String attributesToKeep = Utils.getOption("numattribs", args);
    final int NUM_TO_KEEP = Integer.parseInt(attributesToKeep);

    ASEvaluation ase = new GainRatioAttributeEval();
    BinaryRelevanceAttributeEvaluator ae = new BinaryRelevanceAttributeEvaluator(ase, mlData, "max", "dl",
            "eval");

    System.out.println(mlData.getDataSet().numAttributes());

    if (NUM_TO_KEEP == 0) {
        for (int i = 0; i < mlData.getFeatureIndices().length; i++) {
            System.out//from  ww w . j av  a  2s. c  om
                    .println("Attribute " + mlData.getDataSet().attribute(mlData.getFeatureIndices()[i]).name()
                            + " : " + ae.evaluateAttribute(
                                    mlData.getDataSet().attribute(mlData.getFeatureIndices()[i]).index()));
        }
    } else {
        Ranker r = new Ranker();
        int[] result = r.search(ae, mlData);
        System.out.println(Arrays.toString(result));

        int[] toKeep = new int[NUM_TO_KEEP + mlData.getNumLabels()];
        System.arraycopy(result, 0, toKeep, 0, NUM_TO_KEEP);
        int[] labelIndices = mlData.getLabelIndices();
        System.arraycopy(labelIndices, 0, toKeep, NUM_TO_KEEP, mlData.getNumLabels());

        Remove filterRemove = new Remove();
        filterRemove.setAttributeIndicesArray(toKeep);
        filterRemove.setInvertSelection(true);
        filterRemove.setInputFormat(mlData.getDataSet());
        Instances filtered = Filter.useFilter(mlData.getDataSet(), filterRemove);
        MultiLabelInstances mlFiltered = new MultiLabelInstances(filtered, mlData.getLabelsMetaData());

        System.out.println("\n\n\n\n" + mlFiltered.getDataSet());
    }
    // You can now work on the reduced multi-label dataset mlFiltered
}

From source file:org.uclab.mm.kcl.ddkat.dataselector.FeatureEvaluator.java

License:Apache License

/**
 * Constructor to instantiate a new FeatureEvaluator object.
 *
 * @param json the data string//from  www  . java2  s .c o  m
 * @param data the data set
 * @throws Exception the exception
 */

public FeatureEvaluator(String json, Instances data) throws Exception {
    //   public FeatureEvaluator(String json, Instances data, String filePath) throws Exception {

    this.featureTitles = new ArrayList<String>();
    this.featureScores = new ArrayList<Double>();
    this.featureWeights = new ArrayList<Double>();
    this.featurePriorities = new ArrayList<Double>();

    OrderedJSONObject jsonObject = new OrderedJSONObject(json.toString());
    JSONArray jsontokenArray = jsonObject.getJSONArray("unprocessed_data");
    String csvString = "";
    String str;
    for (int i = 0; i < jsontokenArray.length(); i++) {
        str = jsontokenArray.get(i).toString();
        str = str.substring(1, str.length() - 1);
        csvString += str + "\n";
    }

    String filePath = BASE_DIR + "FeaturesEvaluationDataSet.csv";
    File file = new File(filePath);
    // if file does not exists, then create it
    if (!file.exists())
        file.createNewFile();

    FileUtils.writeStringToFile(file, csvString);

    CSVLoader loader = new CSVLoader();
    loader.setSource(new File(filePath));
    data = loader.getDataSet();

    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    int numUnlabeledAttributes = data.numAttributes() - 1;
    double[] minmaxValues = new double[2];
    double min, max;

    String[] options = new String[1];
    options[0] = "-T -1.7976931348623157E308 -N -1"; // confidenceFactor = 0.25, minNumObject = 2
    Ranker atrank = new Ranker();
    atrank.setOptions(options);

    weka.attributeSelection.AttributeSelection atsel = new weka.attributeSelection.AttributeSelection();

    //  Information Gain Attribute Evaluator
    InfoGainAttributeEval infoGainAttrEval = new InfoGainAttributeEval();
    atsel.setEvaluator(infoGainAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] infoGainRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        infoGainRanks[i] = Math.round(10000 * infoGainAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(infoGainRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledInfoGainRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledInfoGainRanks[i] = Math.round(10000 * ((infoGainRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Gain Ratio Attribute Evaluator
    GainRatioAttributeEval gainRatioAttrEval = new GainRatioAttributeEval();
    atsel.setEvaluator(gainRatioAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] gainRatioRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        gainRatioRanks[i] = Math.round(10000 * gainRatioAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(gainRatioRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledGainRatioRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledGainRatioRanks[i] = Math.round(10000 * ((gainRatioRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Chi Squared Attribute Evaluator
    ChiSquaredAttributeEval chiSquaredAttrEval = new ChiSquaredAttributeEval();
    atsel.setEvaluator(chiSquaredAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] chiSquaredRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        chiSquaredRanks[i] = Math.round(10000 * chiSquaredAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(chiSquaredRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledChiSquaredRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledChiSquaredRanks[i] = Math.round(10000 * ((chiSquaredRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Symmetrical Uncert Attribute Evaluator
    SymmetricalUncertAttributeEval symmetricalUncertAttrEval = new SymmetricalUncertAttributeEval();
    atsel.setEvaluator(symmetricalUncertAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] symmetricalUncertRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        symmetricalUncertRanks[i] = Math.round(10000 * symmetricalUncertAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(symmetricalUncertRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledSymmetricalUncertRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledSymmetricalUncertRanks[i] = Math.round(10000 * ((symmetricalUncertRanks[i] - min) / (max - min)))
                / 10000d;
    }

    //  Significance Attribute Evaluator
    SignificanceAttributeEval significanceAttrEval = new SignificanceAttributeEval();
    atsel.setEvaluator(significanceAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] significanceRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        significanceRanks[i] = Math.round(10000 * significanceAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(significanceRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledSignificanceRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledSignificanceRanks[i] = Math.round(10000 * ((significanceRanks[i] - min) / (max - min))) / 10000d;
    }

    double attributeSum;

    double[] combinedRanks = new double[numUnlabeledAttributes];
    double combinedranksSum = 0;

    for (int i = 0; i < numUnlabeledAttributes; i++) {
        attributeSum = scaledInfoGainRanks[i] + scaledGainRatioRanks[i] + scaledChiSquaredRanks[i]
                + scaledSymmetricalUncertRanks[i] + scaledSignificanceRanks[i];
        combinedRanks[i] = Math.round(10000 * attributeSum) / 10000d;
        combinedranksSum = combinedranksSum + combinedRanks[i];
    }

    double[][] tempArray = new double[numUnlabeledAttributes][2];
    String[] attributesTitles = new String[numUnlabeledAttributes];
    double[] attributesScores = new double[numUnlabeledAttributes];
    double[] attributesWeights = new double[numUnlabeledAttributes];
    double[] attributesPriorities = new double[numUnlabeledAttributes];

    for (int j = 0; j < numUnlabeledAttributes; j++) {
        tempArray[j][0] = j;
        tempArray[j][1] = combinedRanks[j];
    }

    double temp;
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        for (int j = 1; j < (numUnlabeledAttributes - i); j++) {
            if (combinedRanks[j - 1] < combinedRanks[j]) {
                //swap the elements!
                temp = combinedRanks[j - 1];
                combinedRanks[j - 1] = combinedRanks[j];
                combinedRanks[j] = temp;
            }
        }
    }

    for (int j = 0; j < numUnlabeledAttributes; j++) {
        for (int k = 0; k < numUnlabeledAttributes; k++) {
            if (combinedRanks[j] == tempArray[k][1]) {
                attributesTitles[j] = data.attribute((int) tempArray[k][0]).toString();
                String res[] = attributesTitles[j].split("\\s+");
                attributesTitles[j] = res[1];

                this.featureTitles.add(attributesTitles[j]);
                break;
            }
        }
        attributesScores[j] = Math.round(10000 * (combinedRanks[j] / 9)) / 100d;
        attributesWeights[j] = Math.round(10000 * (combinedRanks[j] / combinedranksSum)) / 100d;
        attributesPriorities[j] = Math.round(attributesScores[j] * attributesWeights[j]) / 100d;
        this.featureScores.add(attributesScores[j]);
        this.featureWeights.add(attributesWeights[j]);
        this.featurePriorities.add(attributesPriorities[j]);

        System.out.println(attributesTitles[j] + " is " + attributesScores[j] + " % Important");
    }

}

From source file:sirius.misc.zscore.ZscoreTableModel.java

License:Open Source License

public void compute(final Instances posInstances, final Instances negInstances) {
    if (posInstances == null || negInstances == null) {
        JOptionPane.showMessageDialog(null, "Please load file before computing.", "Error",
                JOptionPane.ERROR_MESSAGE);
        return;/*w  w w  . java  2  s  . com*/
    }
    if (posInstances.numAttributes() != negInstances.numAttributes()) {
        JOptionPane.showMessageDialog(null, "Number of attributes between the two files does not tally.",
                "Error", JOptionPane.ERROR_MESSAGE);
        return;
    }
    this.scoreList = new ArrayList<Scores>();
    this.posInstances = posInstances;
    this.negInstances = negInstances;
    Thread thread = new Thread() {
        public void run() {
            MessageDialog m = new MessageDialog(null, "Progress", "0%");
            int percentCount = posInstances.numAttributes() / 100;
            if (percentCount == 0)
                percentCount = 1;
            for (int x = 0; x < posInstances.numAttributes(); x++) {
                if (x % percentCount == 0)
                    m.update(x / percentCount + "%");
                if (posInstances.attribute(x).isNumeric() == false) {
                    ZscoreTableModel.this.scoreList.add(new Scores(posInstances.attribute(x).name()));
                    continue;
                }
                String name = posInstances.attribute(x).name();
                double posMean = posInstances.attributeStats(x).numericStats.mean;
                double posStdDev = posInstances.attributeStats(x).numericStats.stdDev;
                double negMean = negInstances.attributeStats(x).numericStats.mean;
                double negStdDev = negInstances.attributeStats(x).numericStats.stdDev;
                if (negStdDev == 0)
                    negStdDev = 0.01;
                double totalZScore = 0.0;
                int numGTZScore0_5 = 0;
                int numGTZScore1 = 0;
                int numGTZScore2 = 0;
                int numGTZScore3 = 0;
                for (int y = 0; y < posInstances.numInstances(); y++) {
                    double zScore = Math.abs(((posInstances.instance(y).value(x) - negMean) / negStdDev));
                    totalZScore += zScore;
                    if (zScore > 0.5)
                        numGTZScore0_5++;
                    if (zScore > 1)
                        numGTZScore1++;
                    if (zScore > 2)
                        numGTZScore2++;
                    if (zScore > 3)
                        numGTZScore3++;
                }
                double meanZScore = totalZScore / posInstances.numInstances();
                double percentGTZScore0_5 = (numGTZScore0_5 * 100) / posInstances.numInstances();
                double percentGTZScore1 = (numGTZScore1 * 100) / posInstances.numInstances();
                double percentGTZScore2 = (numGTZScore2 * 100) / posInstances.numInstances();
                double percentGTZScore3 = (numGTZScore3 * 100) / posInstances.numInstances();
                ZscoreTableModel.this.scoreList
                        .add(new Scores(name, posMean, posStdDev, negMean, negStdDev, meanZScore,
                                percentGTZScore0_5, percentGTZScore1, percentGTZScore2, percentGTZScore3, -1));
            }
            try {
                Instances instances = new Instances(posInstances);
                for (int x = 0; x < negInstances.numInstances(); x++)
                    instances.add(negInstances.instance(x));
                instances.setClassIndex(instances.numAttributes() - 1);
                //Evaluate the attributes individually and obtain the gainRatio      
                GainRatioAttributeEval gainRatio = new GainRatioAttributeEval();
                if (instances.numAttributes() > 0) {
                    gainRatio.buildEvaluator(instances);
                }
                for (int x = 0; x < (instances.numAttributes() - 1); x++) {
                    ZscoreTableModel.this.scoreList.get(x).setGainRatio(gainRatio.evaluateAttribute(x));
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            Collections.sort(ZscoreTableModel.this.scoreList, new SortByMeanZScore());
            fireTableDataChanged();
            m.dispose();
            ZscoreTableModel.this.label.setText("" + ZscoreTableModel.this.scoreList.size());
        }
    };
    thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority
    thread.start();
}

From source file:tutorials.featureselection.TutorialWekaAttributeSelection.java

License:Open Source License

public static void main(String[] args) throws IOException {
    /* Load data */
    Dataset data = FileHandler.loadDataset(new File("devtools/data/iris.data"), 4, ",");
    /* Create a AS Evaluation algorithm */
    ASEvaluation eval = new GainRatioAttributeEval();
    /* Create a Weka's AS Search algorithm */
    ASSearch search = new Ranker();
    /* Wrap Wekas' Algorithms in bridge */
    WekaAttributeSelection wekaattrsel = new WekaAttributeSelection(eval, search);
    /*//ww w .  j  a v a 2s.c  o  m
     * to apply algorithm to the data set and generate the new data based on
     * the given parameters
     */
    wekaattrsel.build(data);
    /* to retrieve the number of attributes */
    System.out.println("Total number of attributes:  " + wekaattrsel.noAttributes());
    /* to display all the rank and score for each attribute */
    for (int i = 0; i < wekaattrsel.noAttributes() - 1; i++) {
        System.out.println(
                "Attribute  " + i + "  Ranks  " + wekaattrsel.rank(i) + " and Scores " + wekaattrsel.score(i));
    }

}