Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:dewaweebtreeclassifier.Sujeong.java

public Instances[] splitInstancesOnAttribute(Instances data, Attribute attr) {
    Instances[] splitInstances = new Instances[attr.numValues()];

    for (int i = 0; i < attr.numValues(); i++) {
        splitInstances[i] = new Instances(data, data.numInstances());
    }//ww w  . j ava  2s  . c  o  m

    Enumeration enumInstance = data.enumerateInstances();
    while (enumInstance.hasMoreElements()) {
        Instance instance = (Instance) enumInstance.nextElement();
        splitInstances[(int) instance.value(attr)].add(instance);
    }

    for (int i = 0; i < attr.numValues(); i++) {
        splitInstances[i].compactify();
    }

    return splitInstances;
}

From source file:dewaweebtreeclassifier.veranda.VerandaTree.java

/**
 * //  w w  w. j av  a 2  s  . c o m
 * @param data 
 */
public void buildTree(Instances data) {
    // exit if there is no data left in the dataset
    if (data.numInstances() == 0) {
        mChild = null;
        return;
    }

    double[] informationGains = new double[data.numAttributes()];
    Enumeration enumAttrs = data.enumerateAttributes();
    while (enumAttrs.hasMoreElements()) {
        Attribute attr = (Attribute) enumAttrs.nextElement();
        informationGains[attr.index()] = computeGain(data, attr);
    }
    int maxIdx = Utils.maxIndex(informationGains);

    if (Utils.eq(informationGains[maxIdx], 0)) {
        mClassDistribution = new int[data.numClasses()];
        Enumeration enumInst = data.enumerateInstances();
        while (enumInst.hasMoreElements()) {
            Instance instance = (Instance) enumInst.nextElement();
            mClassDistribution[(int) instance.classValue()]++;
        }
        mClassValue = Utils.maxIndex(mClassDistribution);
    } else {
        mSplitAttribute = data.attribute(maxIdx);
        Instances[] splitInstances = splitInstancesOnAttribute(data, mSplitAttribute);
        mChild = new VerandaTree[mSplitAttribute.numValues()];
        for (int i = 0; i < mChild.length; i++) {
            mChild[i] = new VerandaTree();
            mChild[i].buildTree(splitInstances[i]);
        }
    }
}

From source file:dewaweebtreeclassifier.veranda.VerandaTree.java

/**
 * /*from w  ww  . ja va 2  s.  co  m*/
 * @param data
 * @param attr
 * @return 
 */
public double computeGain(Instances data, Attribute attr) {
    double informationGain = computeEntropy(data);
    Instances[] splitInstances = splitInstancesOnAttribute(data, attr);
    for (Instances instances : splitInstances) {
        informationGain -= ((double) instances.numInstances() / (double) data.numInstances())
                * computeEntropy(instances);
    }

    return informationGain;
}

From source file:dewaweebtreeclassifier.veranda.VerandaTree.java

/**
 * /*  ww w.  j ava2 s  .  c  o  m*/
 * @param data 
 * @return  
 */
public double computeEntropy(Instances data) {
    double[] nClass = new double[data.numClasses()];
    Enumeration enumInstance = data.enumerateInstances();
    while (enumInstance.hasMoreElements()) {
        Instance instance = (Instance) enumInstance.nextElement();
        nClass[(int) instance.classValue()]++;
    }

    double entropy = 0.0;
    for (int i = 0; i < data.numClasses(); i++) {
        if (nClass[i] > 0) {
            double ratio = nClass[i] / data.numInstances();
            entropy -= (ratio * Utils.log2(ratio));
        }
    }

    return entropy;
}

From source file:dewaweebtreeclassifier.veranda.VerandaTree.java

/**
 * //from  w ww  .j  av a2s  .  com
 * @param data
 * @param attr
 * @return 
 */
public Instances[] splitInstancesOnAttribute(Instances data, Attribute attr) {
    Instances[] splitInstances = new Instances[attr.numValues()];

    for (int i = 0; i < attr.numValues(); i++) {
        splitInstances[i] = new Instances(data, data.numInstances());
    }

    Enumeration enumInstance = data.enumerateInstances();
    while (enumInstance.hasMoreElements()) {
        Instance instance = (Instance) enumInstance.nextElement();
        splitInstances[(int) instance.value(attr)].add(instance);
    }

    for (int i = 0; i < attr.numValues(); i++) {
        splitInstances[i].compactify();
    }

    return splitInstances;
}

From source file:distributed.core.DistributedUtils.java

License:Open Source License

public static NumericStats getNumericAttributeStatsSparse(Instances denormalized, int attIndex) {
    NumericStats ns = new NumericStats(denormalized.attribute(attIndex).name());

    for (int j = 0; j < denormalized.numInstances(); j++) {
        double value = denormalized.instance(j).value(attIndex);

        if (Utils.isMissingValue(value) || value == 0) {
            ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()]++;
        } else {//from  www . j  a  va  2 s  . c  o m
            ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()]++;
            ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] += value;
            ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] += value * value;
            if (Double.isNaN(ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()])) {
                ns.getStats()[ArffSummaryNumericMetric.MIN
                        .ordinal()] = ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = value;
            } else if (value < ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()]) {
                ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = value;
            } else if (value > ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()]) {
                ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = value;
            }
        }
    }

    ns.computeDerived();

    return ns;
}

From source file:distributions.ClassdistributionNumeric.java

/**
 *
 * @param inst/*from www.j  a  va 2s . c o m*/
 * @param classID
 * @param kID
 */
public ClassdistributionNumeric(Instances inst, int classID, int kID) {

    this.inst = new Instances(inst);

    this.classID = classID;
    this.kID = kID;

    attwerten = new double[inst.numInstances()];
    for (int k = 0; k < inst.numInstances(); k++) {
        attwerten[k] = inst.instance(k).value(kID);
    }

    attwerten = super.EliminiereDopelt(attwerten);
    /*for(int j=0;j<attwerten.length;j++){
     for (int i = 0; i < inst.numInstances(); i++) {
               
            
     attwerten[(int) inst.instance(i).value(kID)] = inst.instance(i).value(kID);
     }
            
     }*/

    probs = new double[attwerten.length];

}

From source file:distributions.NumericDistribution.java

public NumericDistribution(Instances inst, int kID) {

    this.inst = new Instances(inst);
    this.kID = kID;
    attwerten = new double[inst.numAttributes()];
    for (int i = 0, j = 0; i < attwerten.length && j < inst.numInstances(); i++, j++) {
        attwerten[i] = inst.instance(j).value(kID);
    }/* ww w.  j  ava 2 s. c  o m*/

    attwerten = super.EliminiereDopelt(attwerten);
    /* for (int k = 0; k < inst.numInstances(); k++) {
    attwerten[(int) inst.instance(k).value(kID)] = inst.instance(k).value(kID);
     }*/

    probs = new double[attwerten.length];

}

From source file:distributions.NumericNumericDistribution.java

public NumericNumericDistribution(Instances inst, int kID, int pID) {

    this.inst = new Instances(inst);
    attwerten1 = new double[inst.numInstances()];
    attwerten2 = new double[inst.numInstances()];
    for (int k = 0, j = 0; k < attwerten1.length && j < attwerten2.length; k++, j++) {
        attwerten2[k] = inst.instance(k).value(pID);
        attwerten1[j] = inst.instance(j).value(kID);
    }/*  w ww  .j av a2 s .c  om*/
    /* for (int i = 0; i < inst.numInstances(); i++) {
            
    attwerten1[(int) inst.instance(i).value(kID)] = inst.instance(i).value(kID);
            
            
    attwerten2[(int) inst.instance(i).value(pID)] = inst.instance(i).value(pID);
            
     }*/

    this.attwerten1 = super.EliminiereDopelt(this.attwerten1);
    this.attwerten2 = super.EliminiereDopelt(this.attwerten2);
    this.probs = new double[attwerten1.length][attwerten2.length];
    zaelt = new double[attwerten1.length][attwerten2.length];

}

From source file:DiversifyQuery.DivTopK.java

public Instances transformData(Instances data) throws Exception {
    ArrayList<LegacyShapelet> shapelets = new ArrayList<>();
    for (int i = 5; i <= 1; i--) {
        if (DResultSet.get(i).result.size() == i) {
            shapelets.addAll(DResultSet.get(i).result);
        }/*  ww  w .ja v  a  2s . co  m*/
    }
    if (shapelets.size() < 1) {
        throw new Exception(
                "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets");
    }

    if (data.classIndex() < 0) {
        throw new Exception("Require that the class be set for the ShapeletTransform");
    }

    Instances output = determineOutputFormat(data, shapelets);

    // for each data, get distance to each shapelet and create new instance
    for (int i = 0; i < data.numInstances(); i++) { // for each data
        Instance toAdd = new Instance(shapelets.size() + 1);
        int shapeletNum = 0;
        for (LegacyShapelet s : shapelets) {
            double dist = subsequenceDistance(s.content, data.instance(i));
            toAdd.setValue(shapeletNum++, dist);
        }
        toAdd.setValue(shapelets.size(), data.instance(i).classValue());
        output.add(toAdd);
    }
    return output;
}