Example usage for weka.core Instances enumerateInstances

Introduction

In this page you can find the example usage for weka.core Instances enumerateInstances.

Prototype

publicEnumeration<Instance> enumerateInstances()

Source Link

Document

Returns an enumeration of all instances in the dataset.

Usage

From source file:dewaweebtreeclassifier.veranda.VerandaTree.java

/**
 * //w w w  .  jav  a  2 s. c  om
 * @param data 
 */
public void buildTree(Instances data) {
    // exit if there is no data left in the dataset
    if (data.numInstances() == 0) {
        mChild = null;
        return;
    }

    double[] informationGains = new double[data.numAttributes()];
    Enumeration enumAttrs = data.enumerateAttributes();
    while (enumAttrs.hasMoreElements()) {
        Attribute attr = (Attribute) enumAttrs.nextElement();
        informationGains[attr.index()] = computeGain(data, attr);
    }
    int maxIdx = Utils.maxIndex(informationGains);

    if (Utils.eq(informationGains[maxIdx], 0)) {
        mClassDistribution = new int[data.numClasses()];
        Enumeration enumInst = data.enumerateInstances();
        while (enumInst.hasMoreElements()) {
            Instance instance = (Instance) enumInst.nextElement();
            mClassDistribution[(int) instance.classValue()]++;
        }
        mClassValue = Utils.maxIndex(mClassDistribution);
    } else {
        mSplitAttribute = data.attribute(maxIdx);
        Instances[] splitInstances = splitInstancesOnAttribute(data, mSplitAttribute);
        mChild = new VerandaTree[mSplitAttribute.numValues()];
        for (int i = 0; i < mChild.length; i++) {
            mChild[i] = new VerandaTree();
            mChild[i].buildTree(splitInstances[i]);
        }
    }
}

From source file:dewaweebtreeclassifier.veranda.VerandaTree.java

/**
 * /*from   www  .ja  v  a 2  s. c o  m*/
 * @param data 
 * @return  
 */
public double computeEntropy(Instances data) {
    double[] nClass = new double[data.numClasses()];
    Enumeration enumInstance = data.enumerateInstances();
    while (enumInstance.hasMoreElements()) {
        Instance instance = (Instance) enumInstance.nextElement();
        nClass[(int) instance.classValue()]++;
    }

    double entropy = 0.0;
    for (int i = 0; i < data.numClasses(); i++) {
        if (nClass[i] > 0) {
            double ratio = nClass[i] / data.numInstances();
            entropy -= (ratio * Utils.log2(ratio));
        }
    }

    return entropy;
}

From source file:dewaweebtreeclassifier.veranda.VerandaTree.java

/**
 * /*from www.  j  av  a2  s.  c o  m*/
 * @param data
 * @param attr
 * @return 
 */
public Instances[] splitInstancesOnAttribute(Instances data, Attribute attr) {
    Instances[] splitInstances = new Instances[attr.numValues()];

    for (int i = 0; i < attr.numValues(); i++) {
        splitInstances[i] = new Instances(data, data.numInstances());
    }

    Enumeration enumInstance = data.enumerateInstances();
    while (enumInstance.hasMoreElements()) {
        Instance instance = (Instance) enumInstance.nextElement();
        splitInstances[(int) instance.value(attr)].add(instance);
    }

    for (int i = 0; i < attr.numValues(); i++) {
        splitInstances[i].compactify();
    }

    return splitInstances;
}

From source file:dewaweebtreeclassifier.Veranda.java

/**
 * /*from  ww w  . ja  v a 2  s . c  o  m*/
 * @param data
 * @return
 */
public boolean isHaveMissingAttributes(Instances data) {
    Enumeration enumInst = data.enumerateInstances();
    while (enumInst.hasMoreElements()) {
        Instance instance = (Instance) enumInst.nextElement();
        if (instance.hasMissingValue()) {
            return true;
        }
    }

    return false;
}

From source file:fiit.gpminerstatic.Main.java

public static void main(String args[]) {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = 0; i < 1000; i++) {
        attributes.add(new Attribute(String.valueOf(i)));
    }//from   w  w w.  j a  v a  2 s . c om
    // load data from file into instances 
    SessionsFileStream stream = new SessionsFileStream(
            "g:\\workspace_GPMiner\\data\\alef_sessions_aggregated.csv");
    Instances instances = new Instances("Instances", attributes, 1000);
    Enumeration<Instance> enumer = instances.enumerateInstances();
    while (enumer.hasMoreElements()) {
        instances.add(enumer.nextElement());
    }
    try {
        // make global patterns with fpgrowth alghoritm 
        FPGrowth fp = new FPGrowth();
        fp.buildAssociations(instances);
        AssociationRules assocRules = fp.getAssociationRules();
    } catch (Exception ex) {
        Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:guineu.modules.dataanalysis.clustering.em.EMClusterer.java

License:Open Source License

public List<Integer> getClusterGroups(Instances dataset) {
    List<Integer> clusters = new ArrayList<Integer>();
    String[] options = new String[2];
    Clusterer clusterer = new EM();

    int numberOfIterations = parameters.getParameter(EMClustererParameters.numberOfIterations).getValue();
    options[0] = "-I";
    options[1] = String.valueOf(numberOfIterations);

    try {/* w ww. j av  a2s  . c  o  m*/
        ((EM) clusterer).setOptions(options);
        clusterer.buildClusterer(dataset);
        Enumeration e = dataset.enumerateInstances();
        while (e.hasMoreElements()) {
            clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
        }
        this.numberOfGroups = clusterer.numberOfClusters();
    } catch (Exception ex) {
        Logger.getLogger(EMClusterer.class.getName()).log(Level.SEVERE, null, ex);
    }
    return clusters;
}

From source file:guineu.modules.dataanalysis.clustering.farthestfirst.FarthestFirstClusterer.java

License:Open Source License

public List<Integer> getClusterGroups(Instances dataset) {
    List<Integer> clusters = new ArrayList<Integer>();
    String[] options = new String[2];
    Clusterer clusterer = new FarthestFirst();

    int numberOfGroups = parameters.getParameter(FarthestFirstClustererParameters.numberOfGroups).getValue();
    options[0] = "-N";
    options[1] = String.valueOf(numberOfGroups);

    try {/* www .j a v a2 s  .  c o  m*/
        ((FarthestFirst) clusterer).setOptions(options);
        clusterer.buildClusterer(dataset);
        Enumeration e = dataset.enumerateInstances();
        while (e.hasMoreElements()) {
            clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
        }
        this.numberOfGroups = clusterer.numberOfClusters();
    } catch (Exception ex) {
        Logger.getLogger(FarthestFirstClusterer.class.getName()).log(Level.SEVERE, null, ex);
    }
    return clusters;
}

From source file:guineu.modules.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java

License:Open Source License

public List<Integer> getClusterGroups(Instances dataset) {
    List<Integer> clusters = new ArrayList<Integer>();
    String[] options = new String[2];
    Clusterer clusterer = new SimpleKMeans();

    int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
    options[0] = "-N";
    options[1] = String.valueOf(numberOfGroups);

    try {/*from   ww w  .j a  v a2s. c  o  m*/
        ((SimpleKMeans) clusterer).setOptions(options);
        clusterer.buildClusterer(dataset);
        Enumeration e = dataset.enumerateInstances();
        while (e.hasMoreElements()) {
            clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
        }
        this.numberOfGroups = clusterer.numberOfClusters();
    } catch (Exception ex) {
        Logger.getLogger(SimpleKMeansClusterer.class.getName()).log(Level.SEVERE, null, ex);
    }
    return clusters;
}

From source file:gyc.OverBoostM1.java

License:Open Source License

/**
 * Sets the weights for the next iteration.
 * /*from  w ww .  j a  v a  2s .c  o m*/
 * @param training the training instances
 * @param reweight the reweighting factor
 * @throws Exception if something goes wrong
 */
protected void setWeights(Instances training, double reweight) throws Exception {

    double oldSumOfWeights, newSumOfWeights;

    oldSumOfWeights = training.sumOfWeights();
    Enumeration enu = training.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        if (!Utils.eq(m_Classifiers[m_NumIterationsPerformed].classifyInstance(instance),
                instance.classValue()))
            instance.setWeight(instance.weight() * reweight);
    }

    // Renormalize weights
    newSumOfWeights = training.sumOfWeights();
    enu = training.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        instance.setWeight(instance.weight() * oldSumOfWeights / newSumOfWeights);
    }
}

From source file:iris.ID3.java

public void makeLikeAWhat(Instances instances) {
    // Create storage for different info gains
    double[] infoGains = new double[instances.numAttributes()];
    // Enumerate through attributes to find the best gain
    Enumeration attributeEnum = instances.enumerateAttributes();
    while (attributeEnum.hasMoreElements()) {
        // Loop through attributes, adding gain to infoGains array
        Attribute att = (Attribute) attributeEnum.nextElement();
        infoGains[att.index()] = infoGain(instances, att);
    }//w  ww  .  ja va2s .  c o m
    // Use maxIndex to find the highest info gain in the array
    highestInfoGain = instances.attribute(Utils.maxIndex(infoGains));

    // Make a leaf if there is no more info to gain
    // Otherwise, create children
    // Check if there is no more info to gain
    if (Utils.eq(infoGains[highestInfoGain.index()], 0)) {
        highestInfoGain = null;
        // Instantiate maxDistribution
        maxDistribution = new double[instances.numClasses()];
        // Set up enumerator for instances
        Enumeration instanceEnum = instances.enumerateInstances();
        // Tally classes
        while (instanceEnum.hasMoreElements()) {
            Instance instance = (Instance) instanceEnum.nextElement();
            maxDistribution[(int) instance.classValue()]++;
        }
        // Normalize data for easier manipulation
        Utils.normalize(maxDistribution);
        // Get the max index of the distrubtion
        classValue = Utils.maxIndex(maxDistribution);
        // Save class attribute
        classAttribute = instances.classAttribute();
    }
    // Create children
    else {
        // Split best attribute into bins
        Instances[] bins = makeBins(instances, highestInfoGain);
        // Create nodes
        children = new ID3[highestInfoGain.numValues()];
        for (int i = 0; i < highestInfoGain.numValues(); i++) {
            children[i] = new ID3();
            children[i].makeLikeAWhat(bins[i]);
        }
    }
}