Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Computes information gain for an attribute.
 *
 * @param data the data for which info gain is to be computed
 * @param att the attribute//from  w  w  w  .  j  a  v a  2  s .  co  m
 * @return the information gain for the given attribute and data
 * @throws Exception if computation fails
 */
private double computeInfoGain(Instances data, Attribute att) throws Exception {

    double infoGain = computeEntropy(data);
    Instances[] splitData = splitData(data, att);
    for (int j = 0; j < att.numValues(); j++) {
        if (splitData[j].numInstances() > 0) {
            infoGain -= ((double) splitData[j].numInstances() / (double) data.numInstances())
                    * computeEntropy(splitData[j]);
        }
    }
    return infoGain;
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Computes the entropy of a dataset./*from  w  ww  . java 2 s . co  m*/
 *
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 * @throws Exception if computation fails
 */
private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data the data which is to be split
 * @param att the attribute to be used for splitting
 * @return the sets of instances produced by the split
 *//*ww  w.  ja va  2  s  . co  m*/
private Instances[] splitData(Instances data, Attribute att) {

    Instances[] splitData = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
        splitData[j] = new Instances(data, data.numInstances());
    }
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        splitData[(int) inst.value(att)].add(inst);
    }
    for (int i = 0; i < splitData.length; i++) {
        splitData[i].compactify();
    }
    return splitData;
}

From source file:cezeri.evaluater.FactoryEvaluation.java

public static Evaluation performCrossValidateTestAlso(Classifier model, Instances datax, Instances test,
        boolean show_text, boolean show_plot) {
    TFigureAttribute attr = new TFigureAttribute();
    Random rand = new Random(1);
    Instances randData = new Instances(datax);
    randData.randomize(rand);// w ww  . j  a v a  2s . c  om

    Evaluation eval = null;
    int folds = randData.numInstances();
    try {
        eval = new Evaluation(randData);
        for (int n = 0; n < folds; n++) {
            //                randData.randomize(rand);
            //                Instances train = randData;                
            Instances train = randData.trainCV(folds, n);
            //                Instances train = randData.trainCV(folds, n, rand);
            Classifier clsCopy = Classifier.makeCopy(model);
            clsCopy.buildClassifier(train);
            Instances validation = randData.testCV(folds, n);
            //                Instances validation = test.testCV(test.numInstances(), n%test.numInstances());
            //                CMatrix.fromInstances(train).showDataGrid();
            //                CMatrix.fromInstances(validation).showDataGrid();

            simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation));
            observed = FactoryUtils.concatenate(observed,
                    validation.attributeToDoubleArray(validation.classIndex()));
        }

        if (show_plot) {
            double[][] d = new double[2][simulated.length];
            d[0] = observed;
            d[1] = simulated;
            CMatrix f1 = CMatrix.getInstance(d);
            attr.figureCaption = "overall performance";
            f1.transpose().plot(attr);
        }
        if (show_text) {
            // output evaluation
            System.out.println();
            System.out.println("=== Setup for Overall Cross Validation===");
            System.out.println(
                    "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions()));
            System.out.println("Dataset: " + randData.relationName());
            System.out.println("Folds: " + folds);
            System.out.println("Seed: " + 1);
            System.out.println();
            System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false));
        }
    } catch (Exception ex) {
        Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex);
    }
    return eval;
}

From source file:cezeri.feature.selection.FeatureSelectionInfluence.java

private static double[] getAttributeValues(Instances test) {
    int n = test.numInstances();
    double[] ret = new double[n];
    for (int i = 0; i < n; i++) {
        Instance ins = test.instance(i);
        ret[i] = ins.classValue();//  w w  w. j  a v a 2 s .  c  om
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static double[][] getData(Instances m) {
    double[][] ret = new double[m.numInstances()][m.numAttributes()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);//from  w  w w  . jav a  2s  . co m
        ret[i] = ins.toDoubleArray();
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static CMatrix toMatrix(Instances m) {
    double[][] ret = new double[m.numInstances()][m.numAttributes()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);//from www .  j  a  v a2  s. c o  m
        ret[i] = ins.toDoubleArray();
    }
    return CMatrix.getInstance(ret);
}

From source file:cezeri.utils.FactoryInstance.java

public static CMatrix fromInstances(Instances m) {
    double[][] ret = new double[m.numInstances()][m.numAttributes()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);/*w w  w. j av  a  2  s . c  o m*/
        ret[i] = ins.toDoubleArray();
    }
    return CMatrix.getInstance(ret);
}

From source file:cezeri.utils.FactoryInstance.java

/**
 *
 * @param m tm dataset/*from ww  w  .j  a  v  a2 s  . c  om*/
 * @param val class value deeri val olanlar filtrele
 * @return
 */
public static Instances[] getSpecificInstancesBasedOnClassValue(Instances m, String[] cl) {
    Instances[] ret = new Instances[cl.length];
    for (int i = 0; i < ret.length; i++) {
        ret[i] = FactoryInstance.generateInstances(m.relationName() + "_class=" + cl[i], m.numAttributes());
        //            ret[i] = m.resampleWithWeights(new Random());
        ret[i].delete();
    }
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        for (int j = 0; j < cl.length; j++) {
            if (("" + (int) ins.classValue()).equals(cl[j])) {
                ret[j].add(ins);
            }
        }
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static double[] getClassData(Instances m) {
    double[] ret = new double[m.numInstances()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);//from  w  ww  .  j  a v a2s .  co  m
        ret[i] = ins.classValue();
    }
    return ret;
}