List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:gate.plugin.learningframework.engines.EngineWeka.java
@Override public Object evaluateHoldout(InstanceList instances, double portion, int repeats, String parms) { // Get the parameters // -s/-seed <int> : seed, default 0 // -S/-nostratify : switch off stratification if we evaluate classification Parms opts = new Parms(parms, "s:seed:i", "S:nostratify:b"); int seed = (int) opts.getValueOrElse("seed", 0); if (algorithm instanceof AlgorithmRegression) { throw new UnsupportedOperationException("Weka holdout eval for regression not supported yet."); } else {// ww w .j a v a2 s . c o m // must be classification algorithm then! weka.core.Instances all = new CorpusRepresentationWeka(corpusRepresentationMallet) .getRepresentationWeka(); boolean noStratify = (boolean) opts.getValueOrElse("nostratify", 0); Random rand = new Random(seed); all.randomize(rand); boolean stratified = !noStratify; // TODO: not sure if/how we can do stratification for holdout evaluation // TODO: there must be a better way to do the splitting too! // TODO: if there is no better way to split, maybe do out outside for // TODO: how to implement repeats? if (repeats != 1) { throw new GateRuntimeException("Only repeats == 1 supported yet"); } // both regression and classification? int trainSize = (int) Math.round(all.numInstances() * portion); int testSize = all.numInstances() - trainSize; Instances train = new Instances(all, 0, trainSize); Instances test = new Instances(all, trainSize, testSize); Classifier classifier = (Classifier) trainer; try { classifier.buildClassifier(train); } catch (Exception ex) { throw new GateRuntimeException("Error during training of Weka classifier", ex); } Evaluation eval = null; try { eval = new Evaluation(train); } catch (Exception ex) { throw new GateRuntimeException("Could not create Evaluation object", ex); } try { eval.evaluateModel(classifier, test); } catch (Exception ex) { throw new GateRuntimeException("Error evaluating the classifier", ex); } System.out.println("Evaluation result:\n" + eval); return eval; } }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Evaluates the classifier on a given set of instances. Note that * the data must have exactly the same format (e.g. order of * attributes) as the data used to train the classifier! Otherwise * the results will generally be meaningless. * * @param classifier machine learning classifier * @param data set of test instances for evaluation * @exception Exception if model could not be evaluated * successfully/*from ww w.j a va2 s .com*/ */ public double[] evaluateModel(Classifier classifier, Instances data) throws Exception { double predictions[] = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { predictions[i] = evaluateModelOnce((Classifier) classifier, data.instance(i)); } return predictions; }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Sets the class prior probabilities//w w w. j a v a2 s.co m * * @param train the training instances used to determine * the prior probabilities * @exception Exception if the class attribute of the instances is not * set */ public void setPriors(Instances train) throws Exception { if (!m_ClassIsNominal) { m_NumTrainClassVals = 0; m_TrainClassVals = null; m_TrainClassWeights = null; m_PriorErrorEstimator = null; m_ErrorEstimator = null; for (int i = 0; i < train.numInstances(); i++) { Instance currentInst = train.instance(i); if (!currentInst.classIsMissing()) { addNumericTrainClass(currentInst.classValue(), currentInst.weight()); } } } else { for (int i = 0; i < m_NumClasses; i++) { m_ClassPriors[i] = 1; } m_ClassPriorsSum = m_NumClasses; for (int i = 0; i < train.numInstances(); i++) { if (!train.instance(i).classIsMissing()) { m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight(); m_ClassPriorsSum += train.instance(i).weight(); } } } }
From source file:general.Util.java
/** * show learning statistic result by percentage split * @param data training data//from www.ja v a2 s .c o m * @param trainPercent percentage of the training data * @param Classifier model */ public static void PercentageSplit(Instances data, double trainPercent, String Classifier) { try { int trainSize = (int) Math.round(data.numInstances() * trainPercent / 100); int testSize = data.numInstances() - trainSize; data.randomize(new Random(1)); Instances train = new Instances(data, 0, trainSize); Instances test = new Instances(data, trainSize, testSize); train.setClassIndex(train.numAttributes() - 1); test.setClassIndex(test.numAttributes() - 1); switch (Classifier.toLowerCase()) { case "naivebayes": classifier = new NaiveBayes(); break; case "j48-prune": classifier = new MyJ48(true, 0.25f); break; case "j48-unprune": classifier = new MyJ48(false, 0f); break; case "id3": classifier = new MyID3(); break; default: break; } classifier.buildClassifier(train); for (int i = 0; i < test.numInstances(); i++) { try { double pred = classifier.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i)); System.out .print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } // Start evaluate model using instances test and print results try { Evaluation eval = new Evaluation(train); eval.evaluateModel(classifier, test); System.out.println(eval.toSummaryString("\nResults\n\n", false)); } catch (Exception e) { e.printStackTrace(); } } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:general.Util.java
/** * Classify test set using pre-build model * @param model model pathfile// w w w .ja va 2 s .c o m * @param test test file */ public static void doClassify(Classifier model, Instances test) { test.setClassIndex(test.numAttributes() - 1); for (int i = 0; i < test.numInstances(); i++) { try { double pred = model.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i)); System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java
License:Open Source License
/** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception /* w w w . j ava2 s.c o m*/ * * @param file * the .arff file * */ public ClassifierSet initializePopulation(final String file) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = InstancesUtility.openInstance(file); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, file); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); //System.out.println(partitions[i]); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomClusteringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } System.out.println(initialClassifiers); return initialClassifiers; }
From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java
License:Open Source License
/** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception /* w w w . j a v a2 s .c o m*/ * * @param trainSet * the type of Instances train set * */ public ClassifierSet initializePopulation(final Instances trainset) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = trainset; SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, trainset); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } //System.out.println(centroids); double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomCoveringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } //System.out.println(initialClassifiers); return initialClassifiers; }
From source file:gr.auth.ee.lcs.data.representations.complex.ComplexRepresentation.java
License:Open Source License
/** * Build the representation for some instances. * /*www . java2 s. c o m*/ * @param instances * the instances */ protected void buildRepresentationFromInstance(final Instances instances) { for (int i = 0; i < (instances.numAttributes() - numberOfLabels); i++) { final String attributeName = instances.attribute(i).name(); if (instances.attribute(i).isNominal()) { String[] attributeNames = new String[instances.attribute(i).numValues()]; final Enumeration<?> values = instances.attribute(i).enumerateValues(); for (int j = 0; j < attributeNames.length; j++) { attributeNames[j] = (String) values.nextElement(); } // Create boolean or generic nominal if (attributeNames.length > 2) attributeList[i] = new ComplexRepresentation.NominalAttribute(this.chromosomeSize, attributeName, attributeNames, attributeGeneralizationRate); else attributeList[i] = new ComplexRepresentation.BooleanAttribute(chromosomeSize, attributeName, attributeGeneralizationRate); } else if (instances.attribute(i).isNumeric()) { float minValue, maxValue; minValue = (float) instances.instance(0).toDoubleArray()[i]; maxValue = minValue; for (int sample = 0; sample < instances.numInstances(); sample++) { final float currentVal = (float) instances.instance(sample).toDoubleArray()[i]; if (currentVal > maxValue) maxValue = currentVal; if (currentVal < minValue) minValue = currentVal; } attributeList[i] = new ComplexRepresentation.IntervalAttribute(this.chromosomeSize, attributeName, minValue, maxValue, precision, attributeGeneralizationRate); } } createClassRepresentation(instances); }
From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java
License:Open Source License
/** * Perform the conversion.// w ww . j a va 2 s.c om * * @param set * the set containing the instances * @return a double[][] containing the instances and their respective * attributes */ public static double[][] convertIntancesToDouble(final Instances set) { if (set == null) return null; final double[][] result = new double[set.numInstances()][set.numAttributes()]; for (int i = 0; i < set.numInstances(); i++) { for (int j = 0; j < set.numAttributes(); j++) { result[i][j] = set.instance(i).value(j); } } return result; }
From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java
License:Open Source License
/** * Returns the label cardinality of the specified set. * /* w w w . j ava2s. com*/ */ public static double getLabelCardinality(final Instances set) { if (set == null) return -1; int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); double sumOfLabels = 0; for (int i = 0; i < set.numInstances(); i++) { for (int j = set.numAttributes() - numberOfLabels; j < set.numAttributes(); j++) { sumOfLabels += set.instance(i).value(j); } } if (set.numInstances() != 0) { return (double) (sumOfLabels / set.numInstances()); } return 0; }