Example usage for weka.core Instances sumOfWeights

List of usage examples for weka.core Instances sumOfWeights

Introduction

In this page you can find the example usage for weka.core Instances sumOfWeights.

Prototype

publicdouble sumOfWeights() 

Source Link

Document

Computes the sum of all the instances' weights.

Usage

From source file:j48.NBTreeModelSelection.java

License:Open Source License

/**
 * Selects NBTree-type split for the given dataset.
 *//*from w w  w .  java2 s.  co m*/
public final ClassifierSplitModel selectModel(Instances data) {

    double globalErrors = 0;

    double minResult;
    double currentResult;
    NBTreeSplit[] currentModel;
    NBTreeSplit bestModel = null;
    NBTreeNoSplit noSplitModel = null;
    int validModels = 0;
    boolean multiVal = true;
    Distribution checkDistribution;
    Attribute attribute;
    double sumOfWeights;
    int i;

    try {
        // build the global model at this node
        noSplitModel = new NBTreeNoSplit();
        noSplitModel.buildClassifier(data);
        if (data.numInstances() < 5) {
            return noSplitModel;
        }

        // evaluate it
        globalErrors = noSplitModel.getErrors();
        if (globalErrors == 0) {
            return noSplitModel;
        }

        // Check if all Instances belong to one class or if not
        // enough Instances to split.
        checkDistribution = new Distribution(data);
        if (Utils.sm(checkDistribution.total(), m_minNoObj) || Utils.eq(checkDistribution.total(),
                checkDistribution.perClass(checkDistribution.maxClass()))) {
            return noSplitModel;
        }

        // Check if all attributes are nominal and have a 
        // lot of values.
        if (m_allData != null) {
            Enumeration enu = data.enumerateAttributes();
            while (enu.hasMoreElements()) {
                attribute = (Attribute) enu.nextElement();
                if ((attribute.isNumeric()) || (Utils.sm((double) attribute.numValues(),
                        (0.3 * (double) m_allData.numInstances())))) {
                    multiVal = false;
                    break;
                }
            }
        }

        currentModel = new NBTreeSplit[data.numAttributes()];
        sumOfWeights = data.sumOfWeights();

        // For each attribute.
        for (i = 0; i < data.numAttributes(); i++) {

            // Apart from class attribute.
            if (i != (data).classIndex()) {

                // Get models for current attribute.
                currentModel[i] = new NBTreeSplit(i, m_minNoObj, sumOfWeights);
                currentModel[i].setGlobalModel(noSplitModel);
                currentModel[i].buildClassifier(data);

                // Check if useful split for current attribute
                // exists and check for enumerated attributes with 
                // a lot of values.
                if (currentModel[i].checkModel()) {
                    validModels++;
                }
            } else {
                currentModel[i] = null;
            }
        }

        // Check if any useful split was found.
        if (validModels == 0) {
            return noSplitModel;
        }

        // Find "best" attribute to split on.
        minResult = globalErrors;
        for (i = 0; i < data.numAttributes(); i++) {
            if ((i != (data).classIndex()) && (currentModel[i].checkModel())) {
                /*  System.err.println("Errors for "+data.attribute(i).name()+" "+
                    currentModel[i].getErrors()); */
                if (currentModel[i].getErrors() < minResult) {
                    bestModel = currentModel[i];
                    minResult = currentModel[i].getErrors();
                }
            }
        }
        //      System.exit(1);
        // Check if useful split was found.

        if (((globalErrors - minResult) / globalErrors) < 0.05) {
            return noSplitModel;
        }

        /*      if (bestModel == null) {
        System.err.println("This shouldn't happen! glob : "+globalErrors+
              " minRes : "+minResult);
        System.exit(1);
        } */
        // Set the global model for the best split
        //      bestModel.setGlobalModel(noSplitModel);

        return bestModel;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

From source file:milk.classifiers.MIRBFNetwork.java

License:Open Source License

public Exemplars transform(Exemplars ex) throws Exception {

    // Throw all the instances together
    Instances data = new Instances(ex.exemplar(0).getInstances());
    for (int i = 0; i < ex.numExemplars(); i++) {
        Exemplar curr = ex.exemplar(i);// ww w.ja  v  a  2  s .c o m
        double weight = 1.0 / (double) curr.getInstances().numInstances();
        for (int j = 0; j < curr.getInstances().numInstances(); j++) {
            Instance inst = (Instance) curr.getInstances().instance(j).copy();
            inst.setWeight(weight);
            data.add(inst);
        }
    }
    double factor = (double) data.numInstances() / (double) data.sumOfWeights();
    for (int i = 0; i < data.numInstances(); i++) {
        data.instance(i).setWeight(data.instance(i).weight() * factor);
    }

    SimpleKMeans kMeans = new SimpleKMeans();
    kMeans.setNumClusters(m_num_clusters);
    MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer();
    clust.setClusterer(kMeans);
    m_clm.setDensityBasedClusterer(clust);
    m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1));
    m_clm.setInputFormat(data);

    // Use filter and discard result
    Instances tempData = Filter.useFilter(data, m_clm);
    tempData = new Instances(tempData, 0);
    tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0);

    // Go through exemplars and add them to new dataset
    Exemplars newExs = new Exemplars(tempData);
    for (int i = 0; i < ex.numExemplars(); i++) {
        Exemplar curr = ex.exemplar(i);
        Instances temp = Filter.useFilter(curr.getInstances(), m_clm);
        temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0);
        for (int j = 0; j < temp.numInstances(); j++) {
            temp.instance(j).setValue(0, curr.idValue());
        }
        newExs.add(new Exemplar(temp));
    }
    //System.err.println("Finished transforming");
    //System.err.println(newExs);
    return newExs;
}

From source file:myclassifier.myC45Pack.MyClassifierTree.java

/**
 * Builds the tree structure./*from   w w w . j a v  a2  s  .co m*/
 *
 * @param data the data for which the tree structure is to be
 * generated.
 * @param keepData is training data to be kept?
 * @throws Exception if something goes wrong
 */
public void buildTree(Instances data, boolean keepData) throws Exception {
    Instances[] localInstances;
    if (keepData) {
        train = data;
    }
    test = null;
    isLeaf = false;
    isEmpty = false;
    childTree = null;
    localModel = toSelectModel.selectModel(data);
    if (localModel.numSubsets() > 1) {
        localInstances = localModel.split(data);
        data = null;
        childTree = new MyClassifierTree[localModel.numSubsets()];
        for (int i = 0; i < childTree.length; i++) {
            childTree[i] = getNewTree(localInstances[i]);
            localInstances[i] = null;
        }
    } else {
        isLeaf = true;
        if (Utils.eq(data.sumOfWeights(), 0)) {
            isEmpty = true;
        }
        data = null;
    }
}

From source file:myclassifier.myC45Pack.MyClassifierTree.java

/**
 * Builds the tree structure with hold out set
 *
 * @param train the data for which the tree structure is to be
 * generated.//w  w w .j  ava 2s .c o m
 * @param test the test data for potential pruning
 * @param keepData is training Data to be kept?
 * @throws Exception if something goes wrong
 */
public void buildTree(Instances train, Instances test, boolean keepData) throws Exception {
    //local variable
    Instances[] localTrain, localTest;
    int i;

    if (keepData) {
        this.train = train;
    }
    isLeaf = false;
    isEmpty = false;
    childTree = null;
    localModel = toSelectModel.selectModel(train, test);
    this.test = new Distribution(test, localModel);
    if (localModel.numSubsets() > 1) {
        localTrain = localModel.split(train);
        localTest = localModel.split(test);
        train = test = null;
        childTree = new MyClassifierTree[localModel.numSubsets()];
        for (i = 0; i < childTree.length; i++) {
            childTree[i] = getNewTree(localTrain[i], localTest[i]);
            localTrain[i] = null;
            localTest[i] = null;
        }
    } else {
        //tidak ada 
        isLeaf = true;
        if (Utils.eq(train.sumOfWeights(), 0))
            isEmpty = true;
        train = test = null;
    }
}