Example usage for weka.attributeSelection GainRatioAttributeEval evaluateAttribute

List of usage examples for weka.attributeSelection GainRatioAttributeEval evaluateAttribute

Introduction

In this page you can find the example usage for weka.attributeSelection GainRatioAttributeEval evaluateAttribute.

Prototype

@Override
public double evaluateAttribute(int attribute) throws Exception 

Source Link

Document

evaluates an individual attribute by measuring the gain ratio of the class given the attribute.

Usage

From source file:org.uclab.mm.kcl.ddkat.dataselector.FeatureEvaluator.java

License:Apache License

/**
 * Constructor to instantiate a new FeatureEvaluator object.
 *
 * @param json the data string//  ww w.j a  v  a2  s  . co  m
 * @param data the data set
 * @throws Exception the exception
 */

public FeatureEvaluator(String json, Instances data) throws Exception {
    //   public FeatureEvaluator(String json, Instances data, String filePath) throws Exception {

    this.featureTitles = new ArrayList<String>();
    this.featureScores = new ArrayList<Double>();
    this.featureWeights = new ArrayList<Double>();
    this.featurePriorities = new ArrayList<Double>();

    OrderedJSONObject jsonObject = new OrderedJSONObject(json.toString());
    JSONArray jsontokenArray = jsonObject.getJSONArray("unprocessed_data");
    String csvString = "";
    String str;
    for (int i = 0; i < jsontokenArray.length(); i++) {
        str = jsontokenArray.get(i).toString();
        str = str.substring(1, str.length() - 1);
        csvString += str + "\n";
    }

    String filePath = BASE_DIR + "FeaturesEvaluationDataSet.csv";
    File file = new File(filePath);
    // if file does not exists, then create it
    if (!file.exists())
        file.createNewFile();

    FileUtils.writeStringToFile(file, csvString);

    CSVLoader loader = new CSVLoader();
    loader.setSource(new File(filePath));
    data = loader.getDataSet();

    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    int numUnlabeledAttributes = data.numAttributes() - 1;
    double[] minmaxValues = new double[2];
    double min, max;

    String[] options = new String[1];
    options[0] = "-T -1.7976931348623157E308 -N -1"; // confidenceFactor = 0.25, minNumObject = 2
    Ranker atrank = new Ranker();
    atrank.setOptions(options);

    weka.attributeSelection.AttributeSelection atsel = new weka.attributeSelection.AttributeSelection();

    //  Information Gain Attribute Evaluator
    InfoGainAttributeEval infoGainAttrEval = new InfoGainAttributeEval();
    atsel.setEvaluator(infoGainAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] infoGainRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        infoGainRanks[i] = Math.round(10000 * infoGainAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(infoGainRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledInfoGainRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledInfoGainRanks[i] = Math.round(10000 * ((infoGainRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Gain Ratio Attribute Evaluator
    GainRatioAttributeEval gainRatioAttrEval = new GainRatioAttributeEval();
    atsel.setEvaluator(gainRatioAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] gainRatioRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        gainRatioRanks[i] = Math.round(10000 * gainRatioAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(gainRatioRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledGainRatioRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledGainRatioRanks[i] = Math.round(10000 * ((gainRatioRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Chi Squared Attribute Evaluator
    ChiSquaredAttributeEval chiSquaredAttrEval = new ChiSquaredAttributeEval();
    atsel.setEvaluator(chiSquaredAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] chiSquaredRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        chiSquaredRanks[i] = Math.round(10000 * chiSquaredAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(chiSquaredRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledChiSquaredRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledChiSquaredRanks[i] = Math.round(10000 * ((chiSquaredRanks[i] - min) / (max - min))) / 10000d;
    }

    //  Symmetrical Uncert Attribute Evaluator
    SymmetricalUncertAttributeEval symmetricalUncertAttrEval = new SymmetricalUncertAttributeEval();
    atsel.setEvaluator(symmetricalUncertAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] symmetricalUncertRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        symmetricalUncertRanks[i] = Math.round(10000 * symmetricalUncertAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(symmetricalUncertRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledSymmetricalUncertRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledSymmetricalUncertRanks[i] = Math.round(10000 * ((symmetricalUncertRanks[i] - min) / (max - min)))
                / 10000d;
    }

    //  Significance Attribute Evaluator
    SignificanceAttributeEval significanceAttrEval = new SignificanceAttributeEval();
    atsel.setEvaluator(significanceAttrEval);
    atsel.setSearch(atrank);
    atsel.SelectAttributes(data);
    double[] significanceRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        significanceRanks[i] = Math.round(10000 * significanceAttrEval.evaluateAttribute(i)) / 10000d;
    }
    minmaxValues = computerMinMaxValues(significanceRanks);
    min = minmaxValues[0];
    max = minmaxValues[1];
    double[] scaledSignificanceRanks = new double[numUnlabeledAttributes];
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        scaledSignificanceRanks[i] = Math.round(10000 * ((significanceRanks[i] - min) / (max - min))) / 10000d;
    }

    double attributeSum;

    double[] combinedRanks = new double[numUnlabeledAttributes];
    double combinedranksSum = 0;

    for (int i = 0; i < numUnlabeledAttributes; i++) {
        attributeSum = scaledInfoGainRanks[i] + scaledGainRatioRanks[i] + scaledChiSquaredRanks[i]
                + scaledSymmetricalUncertRanks[i] + scaledSignificanceRanks[i];
        combinedRanks[i] = Math.round(10000 * attributeSum) / 10000d;
        combinedranksSum = combinedranksSum + combinedRanks[i];
    }

    double[][] tempArray = new double[numUnlabeledAttributes][2];
    String[] attributesTitles = new String[numUnlabeledAttributes];
    double[] attributesScores = new double[numUnlabeledAttributes];
    double[] attributesWeights = new double[numUnlabeledAttributes];
    double[] attributesPriorities = new double[numUnlabeledAttributes];

    for (int j = 0; j < numUnlabeledAttributes; j++) {
        tempArray[j][0] = j;
        tempArray[j][1] = combinedRanks[j];
    }

    double temp;
    for (int i = 0; i < numUnlabeledAttributes; i++) {
        for (int j = 1; j < (numUnlabeledAttributes - i); j++) {
            if (combinedRanks[j - 1] < combinedRanks[j]) {
                //swap the elements!
                temp = combinedRanks[j - 1];
                combinedRanks[j - 1] = combinedRanks[j];
                combinedRanks[j] = temp;
            }
        }
    }

    for (int j = 0; j < numUnlabeledAttributes; j++) {
        for (int k = 0; k < numUnlabeledAttributes; k++) {
            if (combinedRanks[j] == tempArray[k][1]) {
                attributesTitles[j] = data.attribute((int) tempArray[k][0]).toString();
                String res[] = attributesTitles[j].split("\\s+");
                attributesTitles[j] = res[1];

                this.featureTitles.add(attributesTitles[j]);
                break;
            }
        }
        attributesScores[j] = Math.round(10000 * (combinedRanks[j] / 9)) / 100d;
        attributesWeights[j] = Math.round(10000 * (combinedRanks[j] / combinedranksSum)) / 100d;
        attributesPriorities[j] = Math.round(attributesScores[j] * attributesWeights[j]) / 100d;
        this.featureScores.add(attributesScores[j]);
        this.featureWeights.add(attributesWeights[j]);
        this.featurePriorities.add(attributesPriorities[j]);

        System.out.println(attributesTitles[j] + " is " + attributesScores[j] + " % Important");
    }

}

From source file:sirius.misc.zscore.ZscoreTableModel.java

License:Open Source License

public void compute(final Instances posInstances, final Instances negInstances) {
    if (posInstances == null || negInstances == null) {
        JOptionPane.showMessageDialog(null, "Please load file before computing.", "Error",
                JOptionPane.ERROR_MESSAGE);
        return;/* w  w  w .ja  v  a 2s. c  o  m*/
    }
    if (posInstances.numAttributes() != negInstances.numAttributes()) {
        JOptionPane.showMessageDialog(null, "Number of attributes between the two files does not tally.",
                "Error", JOptionPane.ERROR_MESSAGE);
        return;
    }
    this.scoreList = new ArrayList<Scores>();
    this.posInstances = posInstances;
    this.negInstances = negInstances;
    Thread thread = new Thread() {
        public void run() {
            MessageDialog m = new MessageDialog(null, "Progress", "0%");
            int percentCount = posInstances.numAttributes() / 100;
            if (percentCount == 0)
                percentCount = 1;
            for (int x = 0; x < posInstances.numAttributes(); x++) {
                if (x % percentCount == 0)
                    m.update(x / percentCount + "%");
                if (posInstances.attribute(x).isNumeric() == false) {
                    ZscoreTableModel.this.scoreList.add(new Scores(posInstances.attribute(x).name()));
                    continue;
                }
                String name = posInstances.attribute(x).name();
                double posMean = posInstances.attributeStats(x).numericStats.mean;
                double posStdDev = posInstances.attributeStats(x).numericStats.stdDev;
                double negMean = negInstances.attributeStats(x).numericStats.mean;
                double negStdDev = negInstances.attributeStats(x).numericStats.stdDev;
                if (negStdDev == 0)
                    negStdDev = 0.01;
                double totalZScore = 0.0;
                int numGTZScore0_5 = 0;
                int numGTZScore1 = 0;
                int numGTZScore2 = 0;
                int numGTZScore3 = 0;
                for (int y = 0; y < posInstances.numInstances(); y++) {
                    double zScore = Math.abs(((posInstances.instance(y).value(x) - negMean) / negStdDev));
                    totalZScore += zScore;
                    if (zScore > 0.5)
                        numGTZScore0_5++;
                    if (zScore > 1)
                        numGTZScore1++;
                    if (zScore > 2)
                        numGTZScore2++;
                    if (zScore > 3)
                        numGTZScore3++;
                }
                double meanZScore = totalZScore / posInstances.numInstances();
                double percentGTZScore0_5 = (numGTZScore0_5 * 100) / posInstances.numInstances();
                double percentGTZScore1 = (numGTZScore1 * 100) / posInstances.numInstances();
                double percentGTZScore2 = (numGTZScore2 * 100) / posInstances.numInstances();
                double percentGTZScore3 = (numGTZScore3 * 100) / posInstances.numInstances();
                ZscoreTableModel.this.scoreList
                        .add(new Scores(name, posMean, posStdDev, negMean, negStdDev, meanZScore,
                                percentGTZScore0_5, percentGTZScore1, percentGTZScore2, percentGTZScore3, -1));
            }
            try {
                Instances instances = new Instances(posInstances);
                for (int x = 0; x < negInstances.numInstances(); x++)
                    instances.add(negInstances.instance(x));
                instances.setClassIndex(instances.numAttributes() - 1);
                //Evaluate the attributes individually and obtain the gainRatio      
                GainRatioAttributeEval gainRatio = new GainRatioAttributeEval();
                if (instances.numAttributes() > 0) {
                    gainRatio.buildEvaluator(instances);
                }
                for (int x = 0; x < (instances.numAttributes() - 1); x++) {
                    ZscoreTableModel.this.scoreList.get(x).setGainRatio(gainRatio.evaluateAttribute(x));
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            Collections.sort(ZscoreTableModel.this.scoreList, new SortByMeanZScore());
            fireTableDataChanged();
            m.dispose();
            ZscoreTableModel.this.label.setText("" + ZscoreTableModel.this.scoreList.size());
        }
    };
    thread.setPriority(Thread.MIN_PRIORITY); // UI has most priority
    thread.start();
}