List of usage examples for weka.core Instances enumerateInstances
publicEnumeration<Instance> enumerateInstances()
From source file:dewaweebtreeclassifier.veranda.VerandaTree.java
/** * //w w w . jav a 2 s. c om * @param data */ public void buildTree(Instances data) { // exit if there is no data left in the dataset if (data.numInstances() == 0) { mChild = null; return; } double[] informationGains = new double[data.numAttributes()]; Enumeration enumAttrs = data.enumerateAttributes(); while (enumAttrs.hasMoreElements()) { Attribute attr = (Attribute) enumAttrs.nextElement(); informationGains[attr.index()] = computeGain(data, attr); } int maxIdx = Utils.maxIndex(informationGains); if (Utils.eq(informationGains[maxIdx], 0)) { mClassDistribution = new int[data.numClasses()]; Enumeration enumInst = data.enumerateInstances(); while (enumInst.hasMoreElements()) { Instance instance = (Instance) enumInst.nextElement(); mClassDistribution[(int) instance.classValue()]++; } mClassValue = Utils.maxIndex(mClassDistribution); } else { mSplitAttribute = data.attribute(maxIdx); Instances[] splitInstances = splitInstancesOnAttribute(data, mSplitAttribute); mChild = new VerandaTree[mSplitAttribute.numValues()]; for (int i = 0; i < mChild.length; i++) { mChild[i] = new VerandaTree(); mChild[i].buildTree(splitInstances[i]); } } }
From source file:dewaweebtreeclassifier.veranda.VerandaTree.java
/** * /*from www .ja v a 2 s. c o m*/ * @param data * @return */ public double computeEntropy(Instances data) { double[] nClass = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance instance = (Instance) enumInstance.nextElement(); nClass[(int) instance.classValue()]++; } double entropy = 0.0; for (int i = 0; i < data.numClasses(); i++) { if (nClass[i] > 0) { double ratio = nClass[i] / data.numInstances(); entropy -= (ratio * Utils.log2(ratio)); } } return entropy; }
From source file:dewaweebtreeclassifier.veranda.VerandaTree.java
/** * /*from www. j av a2 s. c o m*/ * @param data * @param attr * @return */ public Instances[] splitInstancesOnAttribute(Instances data, Attribute attr) { Instances[] splitInstances = new Instances[attr.numValues()]; for (int i = 0; i < attr.numValues(); i++) { splitInstances[i] = new Instances(data, data.numInstances()); } Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance instance = (Instance) enumInstance.nextElement(); splitInstances[(int) instance.value(attr)].add(instance); } for (int i = 0; i < attr.numValues(); i++) { splitInstances[i].compactify(); } return splitInstances; }
From source file:dewaweebtreeclassifier.Veranda.java
/** * /*from ww w . ja v a 2 s . c o m*/ * @param data * @return */ public boolean isHaveMissingAttributes(Instances data) { Enumeration enumInst = data.enumerateInstances(); while (enumInst.hasMoreElements()) { Instance instance = (Instance) enumInst.nextElement(); if (instance.hasMissingValue()) { return true; } } return false; }
From source file:fiit.gpminerstatic.Main.java
public static void main(String args[]) { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (int i = 0; i < 1000; i++) { attributes.add(new Attribute(String.valueOf(i))); }//from w w w. j a v a 2 s . c om // load data from file into instances SessionsFileStream stream = new SessionsFileStream( "g:\\workspace_GPMiner\\data\\alef_sessions_aggregated.csv"); Instances instances = new Instances("Instances", attributes, 1000); Enumeration<Instance> enumer = instances.enumerateInstances(); while (enumer.hasMoreElements()) { instances.add(enumer.nextElement()); } try { // make global patterns with fpgrowth alghoritm FPGrowth fp = new FPGrowth(); fp.buildAssociations(instances); AssociationRules assocRules = fp.getAssociationRules(); } catch (Exception ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:guineu.modules.dataanalysis.clustering.em.EMClusterer.java
License:Open Source License
public List<Integer> getClusterGroups(Instances dataset) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; Clusterer clusterer = new EM(); int numberOfIterations = parameters.getParameter(EMClustererParameters.numberOfIterations).getValue(); options[0] = "-I"; options[1] = String.valueOf(numberOfIterations); try {/* w ww. j av a2s . c o m*/ ((EM) clusterer).setOptions(options); clusterer.buildClusterer(dataset); Enumeration e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } this.numberOfGroups = clusterer.numberOfClusters(); } catch (Exception ex) { Logger.getLogger(EMClusterer.class.getName()).log(Level.SEVERE, null, ex); } return clusters; }
From source file:guineu.modules.dataanalysis.clustering.farthestfirst.FarthestFirstClusterer.java
License:Open Source License
public List<Integer> getClusterGroups(Instances dataset) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; Clusterer clusterer = new FarthestFirst(); int numberOfGroups = parameters.getParameter(FarthestFirstClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {/* www .j a v a2 s . c o m*/ ((FarthestFirst) clusterer).setOptions(options); clusterer.buildClusterer(dataset); Enumeration e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } this.numberOfGroups = clusterer.numberOfClusters(); } catch (Exception ex) { Logger.getLogger(FarthestFirstClusterer.class.getName()).log(Level.SEVERE, null, ex); } return clusters; }
From source file:guineu.modules.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java
License:Open Source License
public List<Integer> getClusterGroups(Instances dataset) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; Clusterer clusterer = new SimpleKMeans(); int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {/*from ww w .j a v a2s. c o m*/ ((SimpleKMeans) clusterer).setOptions(options); clusterer.buildClusterer(dataset); Enumeration e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } this.numberOfGroups = clusterer.numberOfClusters(); } catch (Exception ex) { Logger.getLogger(SimpleKMeansClusterer.class.getName()).log(Level.SEVERE, null, ex); } return clusters; }
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Sets the weights for the next iteration. * /*from w ww . j a v a 2s .c o m*/ * @param training the training instances * @param reweight the reweighting factor * @throws Exception if something goes wrong */ protected void setWeights(Instances training, double reweight) throws Exception { double oldSumOfWeights, newSumOfWeights; oldSumOfWeights = training.sumOfWeights(); Enumeration enu = training.enumerateInstances(); while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); if (!Utils.eq(m_Classifiers[m_NumIterationsPerformed].classifyInstance(instance), instance.classValue())) instance.setWeight(instance.weight() * reweight); } // Renormalize weights newSumOfWeights = training.sumOfWeights(); enu = training.enumerateInstances(); while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); instance.setWeight(instance.weight() * oldSumOfWeights / newSumOfWeights); } }
From source file:iris.ID3.java
public void makeLikeAWhat(Instances instances) { // Create storage for different info gains double[] infoGains = new double[instances.numAttributes()]; // Enumerate through attributes to find the best gain Enumeration attributeEnum = instances.enumerateAttributes(); while (attributeEnum.hasMoreElements()) { // Loop through attributes, adding gain to infoGains array Attribute att = (Attribute) attributeEnum.nextElement(); infoGains[att.index()] = infoGain(instances, att); }//w ww . ja va2s . c o m // Use maxIndex to find the highest info gain in the array highestInfoGain = instances.attribute(Utils.maxIndex(infoGains)); // Make a leaf if there is no more info to gain // Otherwise, create children // Check if there is no more info to gain if (Utils.eq(infoGains[highestInfoGain.index()], 0)) { highestInfoGain = null; // Instantiate maxDistribution maxDistribution = new double[instances.numClasses()]; // Set up enumerator for instances Enumeration instanceEnum = instances.enumerateInstances(); // Tally classes while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); maxDistribution[(int) instance.classValue()]++; } // Normalize data for easier manipulation Utils.normalize(maxDistribution); // Get the max index of the distrubtion classValue = Utils.maxIndex(maxDistribution); // Save class attribute classAttribute = instances.classAttribute(); } // Create children else { // Split best attribute into bins Instances[] bins = makeBins(instances, highestInfoGain); // Create nodes children = new ID3[highestInfoGain.numValues()]; for (int i = 0; i < highestInfoGain.numValues(); i++) { children[i] = new ID3(); children[i].makeLikeAWhat(bins[i]); } } }