List of usage examples for weka.core Instances numClasses
publicint numClasses()
From source file:boosting.classifiers.DecisionStumpWritable.java
License:Open Source License
/** * Generates the classifier./*from w ww . j a va2 s. c o m*/ * * @param instances set of instances serving as training data * @throws Exception if the classifier has not been generated successfully */ public void buildClassifier(Instances instances) throws Exception { double bestVal = Double.MAX_VALUE, currVal; double bestPoint = -Double.MAX_VALUE; int bestAtt = -1, numClasses; // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } double[][] bestDist = new double[3][instances.numClasses()]; m_Instances = new Instances(instances); if (m_Instances.classAttribute().isNominal()) { numClasses = m_Instances.numClasses(); } else { numClasses = 1; } // For each attribute boolean first = true; for (int i = 0; i < m_Instances.numAttributes(); i++) { if (i != m_Instances.classIndex()) { // Reserve space for distribution. m_Distribution = new double[3][numClasses]; // Compute value of criterion for best split on attribute if (m_Instances.attribute(i).isNominal()) { currVal = findSplitNominal(i); } else { currVal = findSplitNumeric(i); } if ((first) || (currVal < bestVal)) { bestVal = currVal; bestAtt = i; bestPoint = m_SplitPoint; for (int j = 0; j < 3; j++) { System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, numClasses); } } // First attribute has been investigated first = false; } } // Set attribute, split point and distribution. m_AttIndex = bestAtt; m_SplitPoint = bestPoint; m_Distribution = bestDist; if (m_Instances.classAttribute().isNominal()) { for (int i = 0; i < m_Distribution.length; i++) { double sumCounts = Utils.sum(m_Distribution[i]); if (sumCounts == 0) { // This means there were only missing attribute values System.arraycopy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].length); Utils.normalize(m_Distribution[i]); } else { Utils.normalize(m_Distribution[i], sumCounts); } } } // Save memory m_Instances = new Instances(m_Instances, 0); }
From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java
License:Open Source License
/** * Method used to pre-process the data, perform clustering, and * set the initial parameter vector.// w ww .jav a 2 s .c om */ protected Instances initializeClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass(); // Make sure data is shuffled Random random = new Random(m_Seed); if (data.numInstances() > 2) { random = data.getRandomNumberGenerator(m_Seed); } data.randomize(random); double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case int index = 1; while (index < data.numInstances() && data.instance(index).classValue() == y0) { index++; } if (index == data.numInstances()) { // degenerate case, all class values are equal // we don't want to deal with this, too much hassle throw new Exception("All class values are the same. At least two class values should be different"); } double y1 = data.instance(index).classValue(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(data); data = Filter.useFilter(data, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(data); data = Filter.useFilter(data, m_AttFilter); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data after removing useless attributes!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return data; } else { m_ZeroR = null; } // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(data); data = Filter.useFilter(data, m_NominalToBinary); m_Filter = new Normalize(); ((Normalize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(data); data = Filter.useFilter(data, m_Filter); double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case double z1 = data.instance(index).classValue(); m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ??? m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1 m_classIndex = data.classIndex(); m_numClasses = data.numClasses(); m_numAttributes = data.numAttributes(); // Run k-means SimpleKMeans skm = new SimpleKMeans(); skm.setMaxIterations(10000); skm.setNumClusters(m_numUnits); Remove rm = new Remove(); data.setClassIndex(-1); rm.setAttributeIndices((m_classIndex + 1) + ""); rm.setInputFormat(data); Instances dataRemoved = Filter.useFilter(data, rm); data.setClassIndex(m_classIndex); skm.buildClusterer(dataRemoved); Instances centers = skm.getClusterCentroids(); if (centers.numInstances() < m_numUnits) { m_numUnits = centers.numInstances(); } // Set up arrays OFFSET_WEIGHTS = 0; if (m_useAttributeWeights) { OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses; OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes; } else { OFFSET_ATTRIBUTE_WEIGHTS = -1; OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses; } OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes; switch (m_scaleOptimizationOption) { case USE_GLOBAL_SCALE: m_RBFParameters = new double[OFFSET_SCALES + 1]; break; case USE_SCALE_PER_UNIT_AND_ATTRIBUTE: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes]; break; default: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits]; break; } // Set initial radius based on distance to nearest other basis function double maxMinDist = -1; for (int i = 0; i < centers.numInstances(); i++) { double minDist = Double.MAX_VALUE; for (int j = i + 1; j < centers.numInstances(); j++) { double dist = 0; for (int k = 0; k < centers.numAttributes(); k++) { if (k != centers.classIndex()) { double diff = centers.instance(i).value(k) - centers.instance(j).value(k); dist += diff * diff; } } if (dist < minDist) { minDist = dist; } } if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) { maxMinDist = minDist; } } // Initialize parameters if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) { m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist); } for (int i = 0; i < m_numUnits; i++) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) { m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist); } int k = 0; for (int j = 0; j < m_numAttributes; j++) { if (k == centers.classIndex()) { k++; } if (j != data.classIndex()) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) { m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist); } m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k); k++; } } } if (m_useAttributeWeights) { for (int j = 0; j < m_numAttributes; j++) { if (j != data.classIndex()) { m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0; } } } initializeOutputLayer(random); return data; }
From source file:cerebro.Id3.java
License:Open Source License
/** * Method for building an Id3 tree.//from w w w . j a v a 2 s . c o m * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:cerebro.Id3.java
License:Open Source License
/** * Computes the entropy of a dataset./*w w w . j av a2 s. c om*/ * * @param data the data for which entropy is to be computed * @return the entropy of the data's class distribution * @throws Exception if computation fails */ private double computeEntropy(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
From source file:cezeri.utils.FactoryInstance.java
public static String[] getOriginalClasses(Instances data) { Attribute att = data.attribute(data.classIndex()); String[] ret = new String[data.numClasses()]; Enumeration enu = att.enumerateValues(); int q = 0;// w w w . java 2 s. c o m while (enu.hasMoreElements()) { ret[q++] = (String) enu.nextElement(); } return ret; }
From source file:classif.dropx.DTWKNNClassifierSimpleRank.java
License:Open Source License
@Override protected void buildSortedSequences(Instances data) { ArrayList<ClassedSequence> sortedSequencesTmp = new ArrayList<ClassedSequence>(); int nbObjToRemove = data.numInstances(); int nbClasses = data.numClasses(); double[][] distances = new double[data.numInstances()][data.numInstances()]; for (int i = 0; i < distances.length; i++) { for (int j = i + 1; j < distances[i].length; j++) { distances[i][j] = sequences[i].distance(sequences[j]); distances[j][i] = distances[i][j]; }//w w w . j ava 2s . co m } // create temp structure to remove "bad" examples ArrayList<IndexedSequence> tmpSequences = new ArrayList<IndexedSequence>(); ArrayList<String> tmpClassMap = new ArrayList<String>(); // prune tmpSequences and tmpSclassMap // init temp structure for (int i = 0; i < sequences.length; i++) { tmpSequences.add(new IndexedSequence(sequences[i], i)); tmpClassMap.add(classMap[i]); } for (int p = 0; p < nbObjToRemove - 2; p++) { // score for each point int scores[] = new int[tmpSequences.size()]; // distance to nearest of the point of the same class double[] distToNearestOfSameClass = new double[tmpSequences.size()]; for (int k = 0; k < distToNearestOfSameClass.length; k++) { distToNearestOfSameClass[k] = Double.MAX_VALUE; } ArrayList<Integer>[] nearestNeighbor = new ArrayList[tmpSequences.size()]; // for each object for (int i = 0; i < tmpSequences.size(); i++) { // looking for the nearest double minD = Double.MAX_VALUE; int nn = -1; // we look for the NN for (int j = 0; j < tmpSequences.size(); j++) { // avoid diagonal if (j != i) { // check distance double tmpD = distances[tmpSequences.get(i).index][tmpSequences.get(j).index]; // if we found a new NN if (tmpD < minD) { nn = j; minD = tmpD; } // if object is of same class if (tmpClassMap.get(i).equals(tmpClassMap.get(j))) { // if it is nearest if (minD < distToNearestOfSameClass[i]) { distToNearestOfSameClass[i] = minD; } } } } if (nearestNeighbor[nn] == null) { nearestNeighbor[nn] = new ArrayList<Integer>(); } // we tell to the NN that he is the winner nearestNeighbor[nn].add(i); } for (int i = 0; i < nearestNeighbor.length; i++) { if (nearestNeighbor[i] == null) { scores[i] = 0; } else { ArrayList<Integer> nn = nearestNeighbor[i]; int tmpScore = 0; for (Integer k : nn) { // if k is of class i + 1 if (tmpClassMap.get(k).equals(tmpClassMap.get(i))) tmpScore += 1; else tmpScore -= (2 / nbClasses); // else -2/(nbC-1) } scores[i] = tmpScore; } } // find toRemove int toRemove = 0; for (int i = 1; i < scores.length; i++) { if (scores[i] <= scores[toRemove]) { if (distToNearestOfSameClass[i] < distToNearestOfSameClass[toRemove]) toRemove = i; } } sortedSequencesTmp .add(new ClassedSequence(tmpSequences.get(toRemove).sequence, tmpClassMap.get(toRemove))); tmpSequences.remove(toRemove); tmpClassMap.remove(toRemove); } for (int i = 0; i < tmpSequences.size(); i++) { sortedSequencesTmp.add(new ClassedSequence(tmpSequences.get(i).sequence, tmpClassMap.get(i))); } sortedSequences = new ArrayList<ClassedSequence>(); // reorder for (int i = sortedSequencesTmp.size() - 1; i >= 0; i--) { sortedSequences.add(sortedSequencesTmp.get(i)); } }
From source file:classif.dropx.PrototyperSorted.java
License:Open Source License
@Override protected void buildSpecificClassifier(Instances data) { if (sortedSequences == null) { buildSortedSequences(data);//from w w w.ja v a 2 s .c o m } int max = nbPrototypesPerClass; if (isNbPrototypesPerClass) { max = this.nbPrototypesPerClass * data.numClasses(); } // add the number of required prototypes for (int i = 0; i < sortedSequences.size() && i < max; i++) { prototypes.add(sortedSequences.get(i)); } }
From source file:classif.dropx.PrototyperSorted.java
License:Open Source License
/** * Balance the classes in the train test * @param data// w w w. ja v a 2 s. c om */ protected void buildSpecificClassifierByClass(Instances data) { if (sortedSequences == null) { buildSortedSequences(data); buildSortedSequencesPerClass(data); } int max = nbPrototypesPerClass; if (isNbPrototypesPerClass) { max = this.nbPrototypesPerClass * data.numClasses(); } Attribute classAttribute = data.classAttribute(); for (int i = 0; i < classAttribute.numValues(); i++) { ArrayList<ClassedSequence> tmpClassObj = sortedSequencesByClass.get(classAttribute.value(i)); for (int j = 0; j < max & j < tmpClassObj.size(); j++) { prototypes.add(tmpClassObj.get(j)); } } }
From source file:classif.Prototyper.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { trainingData = data;/*from w ww.jav a 2 s . co m*/ Attribute classAttribute = data.classAttribute(); prototypes = new ArrayList<>(); classedData = new HashMap<String, ArrayList<Sequence>>(); indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>()); indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>()); } sequences = new Sequence[data.numInstances()]; classMap = new String[sequences.length]; for (int i = 0; i < sequences.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } sequences[i] = new Sequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(sequences[i]); indexClassedDataInFullData.get(clas).add(i); // System.out.println("Element "+i+" of train is classed "+clas+" and went to element "+(indexClassedDataInFullData.get(clas).size()-1)); } buildSpecificClassifier(data); if (fillPrototypes) addMissingPrototypesRandom(); }
From source file:classification.classifiers.LDA.java
License:Open Source License
/** * Modification on Dr. Wolfgang Lenhard's code. * This was necessary because this classifier had to implements * "buildClassifier" and "classifyInstance" to be like a classifier of WEKA(R). * /*w w w. ja v a2s . co m*/ * @param data * @throws Exception */ public void buildClassifier(Instances data) throws Exception { int n = data.numInstances(); int a = data.numAttributes(); int k = data.numClasses(); int[] g = new int[n]; double[][] d = new double[n][a]; for (int i = 0; i < n; i++) { double[] d_i = data.instance(i).toDoubleArray(); d[i] = d_i; /** * To print the attribute with the correspondent double * * System.out.print("\n"); for(int j=0; j<a; j++){ * System.out.print(data.instance(i).stringValue(data.attribute(j)) * + " = "); * System.out.print(data.instance(i).value(data.attribute(j)) + * "; "); } System.out.print("\n"); / **/ } // Gives the number of objects belonging to class i in the trainingSet. int classIndex = a - 1; valueClass = new double[k]; data.setClassIndex(classIndex); for (int i = 0; i < k; i++) { // Reference class String refClass = data.classAttribute().value(i); // // System.out.println("refClass: " + refClass + " "); for (int j = 0; j < n; j++) { // Object class String objectClass = data.instance(j).stringValue(classIndex); // // System.out.println("objectClass: " + objectClass + " - value: // " + data.instance(j).value(data.attribute(classIndex))); // Building two vectors of classes, one in int format and // another in double format. if (objectClass == refClass) { // Object class as a double valueClass[i] = data.instance(j).value(data.attribute(classIndex)); // Object class as an int g[j] = i; // // System.out.println("value of class (int): " + g[j] + " // ___ value (double): " + valueClass[i]); } } } this.BuildLDA(d, g, true); }