cyber009.udal.functions.StatisticalAnalysis.java Source code

Java tutorial

Introduction

Here is the source code for cyber009.udal.functions.StatisticalAnalysis.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package cyber009.udal.functions;

import java.util.logging.Level;
import java.util.logging.Logger;
import weka.classifiers.Classifier;
import weka.classifiers.evaluation.Evaluation;
import weka.core.AttributeStats;
import weka.core.Instance;
import weka.core.Instances;

/**
 *
 * @author pavel
 */
public class StatisticalAnalysis {

    public StatisticalAnalysis() {

    }

    public static boolean instanceCMPWithoutClass(Instance a, Instance b) {
        if (a.numAttributes() == b.numAttributes()) {
            for (int i = 0; i < a.numAttributes(); i++) {
                if (a.value(i) != b.value(i)) {
                    return false;
                }
            }
        } else {
            return false;
        }
        return true;
    }

    /**
     * 
     * @param dataSet
     * @param classTarget
     * @return 
     */
    public double probabilityOfTargerClass(Instances dataSet, double classTarget) {
        AttributeStats classStats = dataSet.attributeStats(dataSet.classIndex());
        double ptc = 0.0D;
        if (classStats.nominalCounts != null) {
            for (int i = 0; i < classStats.nominalCounts.length; i++) {
                if (new Double(dataSet.attribute(dataSet.classIndex()).value(i)) == classTarget) {
                    ptc = (double) classStats.nominalCounts[i] / (double) classStats.totalCount;
                }
            }
        }
        return ptc;
    }

    /**
     * 
     * @param classifier
     * @param trainingDataSet
     * @param unLabelSet
     * @param classTarget
     * @return 
     */
    public double posteriorDistribution(Classifier classifier, Instances trainingDataSet, Instance unLabelSet,
            double classTarget) {
        double prDistribution = 0.0D;
        double classPradic = 0.0D;
        Evaluation evaluation = null;
        try {
            evaluation = new Evaluation(trainingDataSet);
            evaluation.evaluateModelOnceAndRecordPrediction(classifier, unLabelSet);
            //evaluation.pctCorrect(); // must be show for correctness  ----------------------
            classPradic = evaluation.meanAbsoluteError();
            prDistribution = classPradic * probabilityOfTargerClass(trainingDataSet, classTarget);
            //            System.out.println(classPradic);
        } catch (Exception ex) {
            Logger.getLogger(StatisticalAnalysis.class.getName()).log(Level.SEVERE, null, ex);
        }
        return prDistribution;
    }

    /**
     * 
     * @param classifier
     * @param trainingDataSet
     * @param unLabelDataSets
     * @param unLabelSet
     * @param classTarget
     * @return 
     */
    public double conditionalEntropy(Classifier classifier, Instances trainingDataSet, Instances unLabelDataSets,
            Instance unLabelSet, double classTarget) {
        double cEnt = 0.0D;
        double entropy = 0.0D;
        unLabelSet.setClassValue(classTarget);
        trainingDataSet.add(trainingDataSet.numInstances(), unLabelSet);
        AttributeStats classStats = trainingDataSet.attributeStats(trainingDataSet.classIndex());
        for (Instance set : unLabelDataSets) {
            if (instanceCMPWithoutClass(set, unLabelSet) == true)
                continue;
            for (int i = 0; i < classStats.nominalCounts.length; i++) {
                double target = new Double(trainingDataSet.attribute(trainingDataSet.classIndex()).value(i));
                set.setClassValue(target);
                entropy = posteriorDistribution(classifier, trainingDataSet, set, classTarget);
                //System.out.println("entropy:"+entropy);
                cEnt += -(entropy) * Math.log10(entropy);
                set.setClassMissing();
            }
        }
        trainingDataSet.remove(trainingDataSet.numInstances() - 1);
        return cEnt;
    }
}