List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Sets the class prior probabilities//ww w. jav a2 s . co m * * @param train the training instances used to determine * the prior probabilities * @exception Exception if the class attribute of the instances is not * set */ public void setPriors(Instances train) throws Exception { if (!m_ClassIsNominal) { m_NumTrainClassVals = 0; m_TrainClassVals = null; m_TrainClassWeights = null; m_PriorErrorEstimator = null; m_ErrorEstimator = null; for (int i = 0; i < train.numInstances(); i++) { Instance currentInst = train.instance(i); if (!currentInst.classIsMissing()) { addNumericTrainClass(currentInst.classValue(), currentInst.weight()); } } } else { for (int i = 0; i < m_NumClasses; i++) { m_ClassPriors[i] = 1; } m_ClassPriorsSum = m_NumClasses; for (int i = 0; i < train.numInstances(); i++) { if (!train.instance(i).classIsMissing()) { m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight(); m_ClassPriorsSum += train.instance(i).weight(); } } } }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Prints the predictions for the given dataset into a String variable. *//*from w ww . j a va 2 s .c o m*/ protected static String printClassifications(Classifier classifier, Instances train, String testFileName, int classIndex, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); if (testFileName.length() != 0) { BufferedReader testReader = null; try { testReader = new BufferedReader(new FileReader(testFileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.setDataset(test); double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().isNumeric()) { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.append("missing"); } else { text.append(instance.classValue()); } text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + test.classAttribute().value((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.append("missing "); } else { text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.close(); } return text.toString(); }
From source file:general.Util.java
/** * show learning statistic result by percentage split * @param data training data// w ww .ja v a 2 s . c om * @param trainPercent percentage of the training data * @param Classifier model */ public static void PercentageSplit(Instances data, double trainPercent, String Classifier) { try { int trainSize = (int) Math.round(data.numInstances() * trainPercent / 100); int testSize = data.numInstances() - trainSize; data.randomize(new Random(1)); Instances train = new Instances(data, 0, trainSize); Instances test = new Instances(data, trainSize, testSize); train.setClassIndex(train.numAttributes() - 1); test.setClassIndex(test.numAttributes() - 1); switch (Classifier.toLowerCase()) { case "naivebayes": classifier = new NaiveBayes(); break; case "j48-prune": classifier = new MyJ48(true, 0.25f); break; case "j48-unprune": classifier = new MyJ48(false, 0f); break; case "id3": classifier = new MyID3(); break; default: break; } classifier.buildClassifier(train); for (int i = 0; i < test.numInstances(); i++) { try { double pred = classifier.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i)); System.out .print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } // Start evaluate model using instances test and print results try { Evaluation eval = new Evaluation(train); eval.evaluateModel(classifier, test); System.out.println(eval.toSummaryString("\nResults\n\n", false)); } catch (Exception e) { e.printStackTrace(); } } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:general.Util.java
/** * Classify test set using pre-build model * @param model model pathfile//from w w w . java 2s . c o m * @param test test file */ public static void doClassify(Classifier model, Instances test) { test.setClassIndex(test.numAttributes() - 1); for (int i = 0; i < test.numInstances(); i++) { try { double pred = model.classifyInstance(test.instance(i)); System.out.print("ID: " + test.instance(i)); System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } catch (Exception ex) { Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java
License:Open Source License
/** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception // w ww . j a v a 2 s . co m * * @param file * the .arff file * */ public ClassifierSet initializePopulation(final String file) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = InstancesUtility.openInstance(file); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, file); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); //System.out.println(partitions[i]); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomClusteringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } System.out.println(initialClassifiers); return initialClassifiers; }
From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java
License:Open Source License
/** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception //w w w .j a va2 s . c o m * * @param trainSet * the type of Instances train set * */ public ClassifierSet initializePopulation(final Instances trainset) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = trainset; SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, trainset); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } //System.out.println(centroids); double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomCoveringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } //System.out.println(initialClassifiers); return initialClassifiers; }
From source file:gr.auth.ee.lcs.data.representations.complex.ComplexRepresentation.java
License:Open Source License
/** * Build the representation for some instances. * /* w w w . j a v a2s. c om*/ * @param instances * the instances */ protected void buildRepresentationFromInstance(final Instances instances) { for (int i = 0; i < (instances.numAttributes() - numberOfLabels); i++) { final String attributeName = instances.attribute(i).name(); if (instances.attribute(i).isNominal()) { String[] attributeNames = new String[instances.attribute(i).numValues()]; final Enumeration<?> values = instances.attribute(i).enumerateValues(); for (int j = 0; j < attributeNames.length; j++) { attributeNames[j] = (String) values.nextElement(); } // Create boolean or generic nominal if (attributeNames.length > 2) attributeList[i] = new ComplexRepresentation.NominalAttribute(this.chromosomeSize, attributeName, attributeNames, attributeGeneralizationRate); else attributeList[i] = new ComplexRepresentation.BooleanAttribute(chromosomeSize, attributeName, attributeGeneralizationRate); } else if (instances.attribute(i).isNumeric()) { float minValue, maxValue; minValue = (float) instances.instance(0).toDoubleArray()[i]; maxValue = minValue; for (int sample = 0; sample < instances.numInstances(); sample++) { final float currentVal = (float) instances.instance(sample).toDoubleArray()[i]; if (currentVal > maxValue) maxValue = currentVal; if (currentVal < minValue) minValue = currentVal; } attributeList[i] = new ComplexRepresentation.IntervalAttribute(this.chromosomeSize, attributeName, minValue, maxValue, precision, attributeGeneralizationRate); } } createClassRepresentation(instances); }
From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java
License:Open Source License
/** * Perform the conversion.//from w w w . j a va2 s . c o m * * @param set * the set containing the instances * @return a double[][] containing the instances and their respective * attributes */ public static double[][] convertIntancesToDouble(final Instances set) { if (set == null) return null; final double[][] result = new double[set.numInstances()][set.numAttributes()]; for (int i = 0; i < set.numInstances(); i++) { for (int j = 0; j < set.numAttributes(); j++) { result[i][j] = set.instance(i).value(j); } } return result; }
From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java
License:Open Source License
/** * Returns the label cardinality of the specified set. * // w w w . ja va 2 s . c o m */ public static double getLabelCardinality(final Instances set) { if (set == null) return -1; int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); double sumOfLabels = 0; for (int i = 0; i < set.numInstances(); i++) { for (int j = set.numAttributes() - numberOfLabels; j < set.numAttributes(); j++) { sumOfLabels += set.instance(i).value(j); } } if (set.numInstances() != 0) { return (double) (sumOfLabels / set.numInstances()); } return 0; }
From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java
License:Open Source License
/** * The number of instances are multiple of the number of folds. * From a se t of instances, it returns a chunk whose length is instances.numInstances / numberOfFolds * with index = index. Index starts at zero. * //from w ww.j ava2 s . com * In essencem this is used when splitting a partition of instances to a train and test set. * * One chunk is the test set and the rest is the train set. * We provide the index for the test set and the rest will automatically become the train set * see splitPartitionIntoFolds * * _____ * |_6_| index = 0 * |_6_| 1 * |_6_| 2 * |_6_| 3 * |_6_| 4 * |_6_| 5 * |_6_| 6 * |_6_| 7 * |_6_| 8 * |_6_| 9 * * */ public static Instances getPartitionSegment(Instances instances, int index, int numberOfFolds) { if (instances.numInstances() % numberOfFolds != 0) { System.out.println("Number of instances not a multiple of " + numberOfFolds); return null; } int numberOfInstancesToGet = instances.numInstances() / numberOfFolds; Instances segment = new Instances(instances, numberOfInstancesToGet); for (int i = index * numberOfInstancesToGet; i < (index + 1) * numberOfInstancesToGet; i++) { segment.add(instances.instance(i)); } return segment; }