List of usage examples for weka.core Instances sumOfWeights
publicdouble sumOfWeights()
From source file:j48.NBTreeModelSelection.java
License:Open Source License
/** * Selects NBTree-type split for the given dataset. *//*from w w w . java2 s. co m*/ public final ClassifierSplitModel selectModel(Instances data) { double globalErrors = 0; double minResult; double currentResult; NBTreeSplit[] currentModel; NBTreeSplit bestModel = null; NBTreeNoSplit noSplitModel = null; int validModels = 0; boolean multiVal = true; Distribution checkDistribution; Attribute attribute; double sumOfWeights; int i; try { // build the global model at this node noSplitModel = new NBTreeNoSplit(); noSplitModel.buildClassifier(data); if (data.numInstances() < 5) { return noSplitModel; } // evaluate it globalErrors = noSplitModel.getErrors(); if (globalErrors == 0) { return noSplitModel; } // Check if all Instances belong to one class or if not // enough Instances to split. checkDistribution = new Distribution(data); if (Utils.sm(checkDistribution.total(), m_minNoObj) || Utils.eq(checkDistribution.total(), checkDistribution.perClass(checkDistribution.maxClass()))) { return noSplitModel; } // Check if all attributes are nominal and have a // lot of values. if (m_allData != null) { Enumeration enu = data.enumerateAttributes(); while (enu.hasMoreElements()) { attribute = (Attribute) enu.nextElement(); if ((attribute.isNumeric()) || (Utils.sm((double) attribute.numValues(), (0.3 * (double) m_allData.numInstances())))) { multiVal = false; break; } } } currentModel = new NBTreeSplit[data.numAttributes()]; sumOfWeights = data.sumOfWeights(); // For each attribute. for (i = 0; i < data.numAttributes(); i++) { // Apart from class attribute. if (i != (data).classIndex()) { // Get models for current attribute. currentModel[i] = new NBTreeSplit(i, m_minNoObj, sumOfWeights); currentModel[i].setGlobalModel(noSplitModel); currentModel[i].buildClassifier(data); // Check if useful split for current attribute // exists and check for enumerated attributes with // a lot of values. if (currentModel[i].checkModel()) { validModels++; } } else { currentModel[i] = null; } } // Check if any useful split was found. if (validModels == 0) { return noSplitModel; } // Find "best" attribute to split on. minResult = globalErrors; for (i = 0; i < data.numAttributes(); i++) { if ((i != (data).classIndex()) && (currentModel[i].checkModel())) { /* System.err.println("Errors for "+data.attribute(i).name()+" "+ currentModel[i].getErrors()); */ if (currentModel[i].getErrors() < minResult) { bestModel = currentModel[i]; minResult = currentModel[i].getErrors(); } } } // System.exit(1); // Check if useful split was found. if (((globalErrors - minResult) / globalErrors) < 0.05) { return noSplitModel; } /* if (bestModel == null) { System.err.println("This shouldn't happen! glob : "+globalErrors+ " minRes : "+minResult); System.exit(1); } */ // Set the global model for the best split // bestModel.setGlobalModel(noSplitModel); return bestModel; } catch (Exception e) { e.printStackTrace(); } return null; }
From source file:milk.classifiers.MIRBFNetwork.java
License:Open Source License
public Exemplars transform(Exemplars ex) throws Exception { // Throw all the instances together Instances data = new Instances(ex.exemplar(0).getInstances()); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i);// ww w.ja v a 2 s .c o m double weight = 1.0 / (double) curr.getInstances().numInstances(); for (int j = 0; j < curr.getInstances().numInstances(); j++) { Instance inst = (Instance) curr.getInstances().instance(j).copy(); inst.setWeight(weight); data.add(inst); } } double factor = (double) data.numInstances() / (double) data.sumOfWeights(); for (int i = 0; i < data.numInstances(); i++) { data.instance(i).setWeight(data.instance(i).weight() * factor); } SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(m_num_clusters); MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer(); clust.setClusterer(kMeans); m_clm.setDensityBasedClusterer(clust); m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1)); m_clm.setInputFormat(data); // Use filter and discard result Instances tempData = Filter.useFilter(data, m_clm); tempData = new Instances(tempData, 0); tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); // Go through exemplars and add them to new dataset Exemplars newExs = new Exemplars(tempData); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i); Instances temp = Filter.useFilter(curr.getInstances(), m_clm); temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); for (int j = 0; j < temp.numInstances(); j++) { temp.instance(j).setValue(0, curr.idValue()); } newExs.add(new Exemplar(temp)); } //System.err.println("Finished transforming"); //System.err.println(newExs); return newExs; }
From source file:myclassifier.myC45Pack.MyClassifierTree.java
/** * Builds the tree structure./*from w w w . j a v a2 s .co m*/ * * @param data the data for which the tree structure is to be * generated. * @param keepData is training data to be kept? * @throws Exception if something goes wrong */ public void buildTree(Instances data, boolean keepData) throws Exception { Instances[] localInstances; if (keepData) { train = data; } test = null; isLeaf = false; isEmpty = false; childTree = null; localModel = toSelectModel.selectModel(data); if (localModel.numSubsets() > 1) { localInstances = localModel.split(data); data = null; childTree = new MyClassifierTree[localModel.numSubsets()]; for (int i = 0; i < childTree.length; i++) { childTree[i] = getNewTree(localInstances[i]); localInstances[i] = null; } } else { isLeaf = true; if (Utils.eq(data.sumOfWeights(), 0)) { isEmpty = true; } data = null; } }
From source file:myclassifier.myC45Pack.MyClassifierTree.java
/** * Builds the tree structure with hold out set * * @param train the data for which the tree structure is to be * generated.//w w w .j ava 2s .c o m * @param test the test data for potential pruning * @param keepData is training Data to be kept? * @throws Exception if something goes wrong */ public void buildTree(Instances train, Instances test, boolean keepData) throws Exception { //local variable Instances[] localTrain, localTest; int i; if (keepData) { this.train = train; } isLeaf = false; isEmpty = false; childTree = null; localModel = toSelectModel.selectModel(train, test); this.test = new Distribution(test, localModel); if (localModel.numSubsets() > 1) { localTrain = localModel.split(train); localTest = localModel.split(test); train = test = null; childTree = new MyClassifierTree[localModel.numSubsets()]; for (i = 0; i < childTree.length; i++) { childTree[i] = getNewTree(localTrain[i], localTest[i]); localTrain[i] = null; localTest[i] = null; } } else { //tidak ada isLeaf = true; if (Utils.eq(train.sumOfWeights(), 0)) isEmpty = true; train = test = null; } }