List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:cerebro.Id3.java
License:Open Source License
/** * Computes information gain for an attribute. * * @param data the data for which info gain is to be computed * @param att the attribute//from w w w . j a v a 2 s . co m * @return the information gain for the given attribute and data * @throws Exception if computation fails */ private double computeInfoGain(Instances data, Attribute att) throws Exception { double infoGain = computeEntropy(data); Instances[] splitData = splitData(data, att); for (int j = 0; j < att.numValues(); j++) { if (splitData[j].numInstances() > 0) { infoGain -= ((double) splitData[j].numInstances() / (double) data.numInstances()) * computeEntropy(splitData[j]); } } return infoGain; }
From source file:cerebro.Id3.java
License:Open Source License
/** * Computes the entropy of a dataset./*from w ww . java 2 s . co m*/ * * @param data the data for which entropy is to be computed * @return the entropy of the data's class distribution * @throws Exception if computation fails */ private double computeEntropy(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
From source file:cerebro.Id3.java
License:Open Source License
/** * Splits a dataset according to the values of a nominal attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split *//*ww w. ja va 2 s . co m*/ private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
From source file:cezeri.evaluater.FactoryEvaluation.java
public static Evaluation performCrossValidateTestAlso(Classifier model, Instances datax, Instances test, boolean show_text, boolean show_plot) { TFigureAttribute attr = new TFigureAttribute(); Random rand = new Random(1); Instances randData = new Instances(datax); randData.randomize(rand);// w ww . j a v a 2s . c om Evaluation eval = null; int folds = randData.numInstances(); try { eval = new Evaluation(randData); for (int n = 0; n < folds; n++) { // randData.randomize(rand); // Instances train = randData; Instances train = randData.trainCV(folds, n); // Instances train = randData.trainCV(folds, n, rand); Classifier clsCopy = Classifier.makeCopy(model); clsCopy.buildClassifier(train); Instances validation = randData.testCV(folds, n); // Instances validation = test.testCV(test.numInstances(), n%test.numInstances()); // CMatrix.fromInstances(train).showDataGrid(); // CMatrix.fromInstances(validation).showDataGrid(); simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation)); observed = FactoryUtils.concatenate(observed, validation.attributeToDoubleArray(validation.classIndex())); } if (show_plot) { double[][] d = new double[2][simulated.length]; d[0] = observed; d[1] = simulated; CMatrix f1 = CMatrix.getInstance(d); attr.figureCaption = "overall performance"; f1.transpose().plot(attr); } if (show_text) { // output evaluation System.out.println(); System.out.println("=== Setup for Overall Cross Validation==="); System.out.println( "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); System.out.println("Dataset: " + randData.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + 1); System.out.println(); System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false)); } } catch (Exception ex) { Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex); } return eval; }
From source file:cezeri.feature.selection.FeatureSelectionInfluence.java
private static double[] getAttributeValues(Instances test) { int n = test.numInstances(); double[] ret = new double[n]; for (int i = 0; i < n; i++) { Instance ins = test.instance(i); ret[i] = ins.classValue();// w w w. j a v a 2 s . c om } return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static double[][] getData(Instances m) { double[][] ret = new double[m.numInstances()][m.numAttributes()]; for (int i = 0; i < m.numInstances(); i++) { Instance ins = m.instance(i);//from w w w . jav a 2s . co m ret[i] = ins.toDoubleArray(); } return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static CMatrix toMatrix(Instances m) { double[][] ret = new double[m.numInstances()][m.numAttributes()]; for (int i = 0; i < m.numInstances(); i++) { Instance ins = m.instance(i);//from www . j a v a2 s. c o m ret[i] = ins.toDoubleArray(); } return CMatrix.getInstance(ret); }
From source file:cezeri.utils.FactoryInstance.java
public static CMatrix fromInstances(Instances m) { double[][] ret = new double[m.numInstances()][m.numAttributes()]; for (int i = 0; i < m.numInstances(); i++) { Instance ins = m.instance(i);/*w w w. j av a 2 s . c o m*/ ret[i] = ins.toDoubleArray(); } return CMatrix.getInstance(ret); }
From source file:cezeri.utils.FactoryInstance.java
/** * * @param m tm dataset/*from ww w .j a v a2 s . c om*/ * @param val class value deeri val olanlar filtrele * @return */ public static Instances[] getSpecificInstancesBasedOnClassValue(Instances m, String[] cl) { Instances[] ret = new Instances[cl.length]; for (int i = 0; i < ret.length; i++) { ret[i] = FactoryInstance.generateInstances(m.relationName() + "_class=" + cl[i], m.numAttributes()); // ret[i] = m.resampleWithWeights(new Random()); ret[i].delete(); } for (int i = 0; i < m.numInstances(); i++) { Instance ins = m.instance(i); for (int j = 0; j < cl.length; j++) { if (("" + (int) ins.classValue()).equals(cl[j])) { ret[j].add(ins); } } } return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static double[] getClassData(Instances m) { double[] ret = new double[m.numInstances()]; for (int i = 0; i < m.numInstances(); i++) { Instance ins = m.instance(i);//from w ww . j a v a2s . co m ret[i] = ins.classValue(); } return ret; }