Example usage for weka.core Instances numClasses

Introduction

In this page you can find the example usage for weka.core Instances numClasses.

Prototype


publicint numClasses()

Source Link

Document

Returns the number of class labels.

Usage

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Generates the classifier./*from w  ww  . j  a va2 s. c  o  m*/
 *
 * @param instances set of instances serving as training data 
 * @throws Exception if the classifier has not been generated successfully
 */
public void buildClassifier(Instances instances) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double bestPoint = -Double.MAX_VALUE;
    int bestAtt = -1, numClasses;

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(instances);
        return;
    } else {
        m_ZeroR = null;
    }

    double[][] bestDist = new double[3][instances.numClasses()];

    m_Instances = new Instances(instances);

    if (m_Instances.classAttribute().isNominal()) {
        numClasses = m_Instances.numClasses();
    } else {
        numClasses = 1;
    }

    // For each attribute
    boolean first = true;
    for (int i = 0; i < m_Instances.numAttributes(); i++) {
        if (i != m_Instances.classIndex()) {

            // Reserve space for distribution.
            m_Distribution = new double[3][numClasses];

            // Compute value of criterion for best split on attribute
            if (m_Instances.attribute(i).isNominal()) {
                currVal = findSplitNominal(i);
            } else {
                currVal = findSplitNumeric(i);
            }
            if ((first) || (currVal < bestVal)) {
                bestVal = currVal;
                bestAtt = i;
                bestPoint = m_SplitPoint;
                for (int j = 0; j < 3; j++) {
                    System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, numClasses);
                }
            }

            // First attribute has been investigated
            first = false;
        }
    }

    // Set attribute, split point and distribution.
    m_AttIndex = bestAtt;
    m_SplitPoint = bestPoint;
    m_Distribution = bestDist;
    if (m_Instances.classAttribute().isNominal()) {
        for (int i = 0; i < m_Distribution.length; i++) {
            double sumCounts = Utils.sum(m_Distribution[i]);
            if (sumCounts == 0) { // This means there were only missing attribute values
                System.arraycopy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].length);
                Utils.normalize(m_Distribution[i]);
            } else {
                Utils.normalize(m_Distribution[i], sumCounts);
            }
        }
    }

    // Save memory
    m_Instances = new Instances(m_Instances, 0);
}

From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java

License:Open Source License

/**
 * Method used to pre-process the data, perform clustering, and
 * set the initial parameter vector.//  w ww .jav a 2 s .c om
 */
protected Instances initializeClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    // Make sure data is shuffled
    Random random = new Random(m_Seed);
    if (data.numInstances() > 2) {
        random = data.getRandomNumberGenerator(m_Seed);
    }
    data.randomize(random);

    double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    int index = 1;
    while (index < data.numInstances() && data.instance(index).classValue() == y0) {
        index++;
    }
    if (index == data.numInstances()) {
        // degenerate case, all class values are equal
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = data.instance(index).classValue();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(data);
    data = Filter.useFilter(data, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(data);
    data = Filter.useFilter(data, m_AttFilter);

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data after removing useless attributes!), "
                        + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return data;
    } else {
        m_ZeroR = null;
    }

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(data);
    data = Filter.useFilter(data, m_NominalToBinary);

    m_Filter = new Normalize();
    ((Normalize) m_Filter).setIgnoreClass(true);
    m_Filter.setInputFormat(data);
    data = Filter.useFilter(data, m_Filter);
    double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    double z1 = data.instance(index).classValue();
    m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
    m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1

    m_classIndex = data.classIndex();
    m_numClasses = data.numClasses();
    m_numAttributes = data.numAttributes();

    // Run k-means
    SimpleKMeans skm = new SimpleKMeans();
    skm.setMaxIterations(10000);
    skm.setNumClusters(m_numUnits);
    Remove rm = new Remove();
    data.setClassIndex(-1);
    rm.setAttributeIndices((m_classIndex + 1) + "");
    rm.setInputFormat(data);
    Instances dataRemoved = Filter.useFilter(data, rm);
    data.setClassIndex(m_classIndex);
    skm.buildClusterer(dataRemoved);
    Instances centers = skm.getClusterCentroids();

    if (centers.numInstances() < m_numUnits) {
        m_numUnits = centers.numInstances();
    }

    // Set up arrays
    OFFSET_WEIGHTS = 0;
    if (m_useAttributeWeights) {
        OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses;
        OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes;
    } else {
        OFFSET_ATTRIBUTE_WEIGHTS = -1;
        OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses;
    }
    OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes;

    switch (m_scaleOptimizationOption) {
    case USE_GLOBAL_SCALE:
        m_RBFParameters = new double[OFFSET_SCALES + 1];
        break;
    case USE_SCALE_PER_UNIT_AND_ATTRIBUTE:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes];
        break;
    default:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits];
        break;
    }

    // Set initial radius based on distance to nearest other basis function
    double maxMinDist = -1;
    for (int i = 0; i < centers.numInstances(); i++) {
        double minDist = Double.MAX_VALUE;
        for (int j = i + 1; j < centers.numInstances(); j++) {
            double dist = 0;
            for (int k = 0; k < centers.numAttributes(); k++) {
                if (k != centers.classIndex()) {
                    double diff = centers.instance(i).value(k) - centers.instance(j).value(k);
                    dist += diff * diff;
                }
            }
            if (dist < minDist) {
                minDist = dist;
            }
        }
        if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) {
            maxMinDist = minDist;
        }
    }

    // Initialize parameters
    if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) {
        m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist);
    }
    for (int i = 0; i < m_numUnits; i++) {
        if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) {
            m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist);
        }
        int k = 0;
        for (int j = 0; j < m_numAttributes; j++) {
            if (k == centers.classIndex()) {
                k++;
            }
            if (j != data.classIndex()) {
                if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) {
                    m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist);
                }
                m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k);
                k++;
            }
        }
    }

    if (m_useAttributeWeights) {
        for (int j = 0; j < m_numAttributes; j++) {
            if (j != data.classIndex()) {
                m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0;
            }
        }
    }

    initializeOutputLayer(random);

    return data;
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Method for building an Id3 tree.//from  w  w w . j a  v a 2 s  . c  o m
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new Id3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new Id3();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Computes the entropy of a dataset./*w w  w .  j av  a2  s. c  om*/
 *
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 * @throws Exception if computation fails
 */
private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}

From source file:cezeri.utils.FactoryInstance.java

public static String[] getOriginalClasses(Instances data) {
    Attribute att = data.attribute(data.classIndex());
    String[] ret = new String[data.numClasses()];
    Enumeration enu = att.enumerateValues();
    int q = 0;//  w  w  w .  java 2 s.  c  o m
    while (enu.hasMoreElements()) {
        ret[q++] = (String) enu.nextElement();
    }
    return ret;
}

From source file:classif.dropx.DTWKNNClassifierSimpleRank.java

License:Open Source License

@Override
protected void buildSortedSequences(Instances data) {
    ArrayList<ClassedSequence> sortedSequencesTmp = new ArrayList<ClassedSequence>();
    int nbObjToRemove = data.numInstances();
    int nbClasses = data.numClasses();

    double[][] distances = new double[data.numInstances()][data.numInstances()];
    for (int i = 0; i < distances.length; i++) {
        for (int j = i + 1; j < distances[i].length; j++) {
            distances[i][j] = sequences[i].distance(sequences[j]);
            distances[j][i] = distances[i][j];
        }//w w w  .  j ava  2s  .  co m
    }

    // create temp structure to remove "bad" examples
    ArrayList<IndexedSequence> tmpSequences = new ArrayList<IndexedSequence>();
    ArrayList<String> tmpClassMap = new ArrayList<String>();

    // prune tmpSequences and tmpSclassMap

    // init temp structure
    for (int i = 0; i < sequences.length; i++) {
        tmpSequences.add(new IndexedSequence(sequences[i], i));
        tmpClassMap.add(classMap[i]);
    }

    for (int p = 0; p < nbObjToRemove - 2; p++) {
        // score for each point
        int scores[] = new int[tmpSequences.size()];
        // distance to nearest of the point of the same class
        double[] distToNearestOfSameClass = new double[tmpSequences.size()];

        for (int k = 0; k < distToNearestOfSameClass.length; k++) {
            distToNearestOfSameClass[k] = Double.MAX_VALUE;
        }
        ArrayList<Integer>[] nearestNeighbor = new ArrayList[tmpSequences.size()];

        // for each object
        for (int i = 0; i < tmpSequences.size(); i++) {
            // looking for the nearest
            double minD = Double.MAX_VALUE;
            int nn = -1;
            // we look for the NN
            for (int j = 0; j < tmpSequences.size(); j++) {
                // avoid diagonal
                if (j != i) {
                    // check distance
                    double tmpD = distances[tmpSequences.get(i).index][tmpSequences.get(j).index];
                    // if we found a new NN
                    if (tmpD < minD) {
                        nn = j;
                        minD = tmpD;
                    }
                    // if object is of same class
                    if (tmpClassMap.get(i).equals(tmpClassMap.get(j))) {
                        // if it is nearest
                        if (minD < distToNearestOfSameClass[i]) {
                            distToNearestOfSameClass[i] = minD;
                        }
                    }
                }
            }
            if (nearestNeighbor[nn] == null) {
                nearestNeighbor[nn] = new ArrayList<Integer>();
            }
            // we tell to the NN that he is the winner
            nearestNeighbor[nn].add(i);
        }

        for (int i = 0; i < nearestNeighbor.length; i++) {
            if (nearestNeighbor[i] == null) {
                scores[i] = 0;
            } else {
                ArrayList<Integer> nn = nearestNeighbor[i];
                int tmpScore = 0;
                for (Integer k : nn) {
                    // if k is of class i + 1
                    if (tmpClassMap.get(k).equals(tmpClassMap.get(i)))
                        tmpScore += 1;
                    else
                        tmpScore -= (2 / nbClasses);
                    // else -2/(nbC-1)
                }
                scores[i] = tmpScore;
            }
        }
        // find toRemove
        int toRemove = 0;
        for (int i = 1; i < scores.length; i++) {
            if (scores[i] <= scores[toRemove]) {
                if (distToNearestOfSameClass[i] < distToNearestOfSameClass[toRemove])
                    toRemove = i;
            }
        }
        sortedSequencesTmp
                .add(new ClassedSequence(tmpSequences.get(toRemove).sequence, tmpClassMap.get(toRemove)));
        tmpSequences.remove(toRemove);
        tmpClassMap.remove(toRemove);
    }

    for (int i = 0; i < tmpSequences.size(); i++) {
        sortedSequencesTmp.add(new ClassedSequence(tmpSequences.get(i).sequence, tmpClassMap.get(i)));
    }

    sortedSequences = new ArrayList<ClassedSequence>();
    // reorder
    for (int i = sortedSequencesTmp.size() - 1; i >= 0; i--) {
        sortedSequences.add(sortedSequencesTmp.get(i));
    }
}

From source file:classif.dropx.PrototyperSorted.java

License:Open Source License

@Override
protected void buildSpecificClassifier(Instances data) {
    if (sortedSequences == null) {
        buildSortedSequences(data);//from w  w  w.ja  v a 2 s .c o  m
    }

    int max = nbPrototypesPerClass;
    if (isNbPrototypesPerClass) {
        max = this.nbPrototypesPerClass * data.numClasses();
    }

    // add the number of required prototypes
    for (int i = 0; i < sortedSequences.size() && i < max; i++) {
        prototypes.add(sortedSequences.get(i));
    }
}

From source file:classif.dropx.PrototyperSorted.java

License:Open Source License

/**
 * Balance the classes in the train test
 * @param data// w  w  w. ja v a  2 s.  c  om
 */
protected void buildSpecificClassifierByClass(Instances data) {
    if (sortedSequences == null) {
        buildSortedSequences(data);
        buildSortedSequencesPerClass(data);
    }

    int max = nbPrototypesPerClass;
    if (isNbPrototypesPerClass) {
        max = this.nbPrototypesPerClass * data.numClasses();
    }

    Attribute classAttribute = data.classAttribute();
    for (int i = 0; i < classAttribute.numValues(); i++) {
        ArrayList<ClassedSequence> tmpClassObj = sortedSequencesByClass.get(classAttribute.value(i));
        for (int j = 0; j < max & j < tmpClassObj.size(); j++) {
            prototypes.add(tmpClassObj.get(j));
        }
    }
}

From source file:classif.Prototyper.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    trainingData = data;/*from   w ww.jav a  2  s  .  co  m*/
    Attribute classAttribute = data.classAttribute();
    prototypes = new ArrayList<>();

    classedData = new HashMap<String, ArrayList<Sequence>>();
    indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>());
        indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>());
    }

    sequences = new Sequence[data.numInstances()];
    classMap = new String[sequences.length];
    for (int i = 0; i < sequences.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        sequences[i] = new Sequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(sequences[i]);
        indexClassedDataInFullData.get(clas).add(i);
        //         System.out.println("Element "+i+" of train is classed "+clas+" and went to element "+(indexClassedDataInFullData.get(clas).size()-1));
    }

    buildSpecificClassifier(data);

    if (fillPrototypes)
        addMissingPrototypesRandom();
}

From source file:classification.classifiers.LDA.java

License:Open Source License

/**
 * Modification on Dr. Wolfgang Lenhard's code.
 * This was necessary because this classifier had to implements
 * "buildClassifier" and "classifyInstance" to be like a classifier of WEKA(R).
 * /*w w w. ja  v  a2s  . co m*/
 * @param data
 * @throws Exception
 */
public void buildClassifier(Instances data) throws Exception {
    int n = data.numInstances();
    int a = data.numAttributes();
    int k = data.numClasses();
    int[] g = new int[n];

    double[][] d = new double[n][a];
    for (int i = 0; i < n; i++) {
        double[] d_i = data.instance(i).toDoubleArray();
        d[i] = d_i;

        /**
         * To print the attribute with the correspondent double
         *
         * System.out.print("\n"); for(int j=0; j<a; j++){
         * System.out.print(data.instance(i).stringValue(data.attribute(j))
         * + " = ");
         * System.out.print(data.instance(i).value(data.attribute(j)) +
         * ";  "); } System.out.print("\n"); /
         **/
    }

    // Gives the number of objects belonging to class i in the trainingSet.
    int classIndex = a - 1;
    valueClass = new double[k];

    data.setClassIndex(classIndex);

    for (int i = 0; i < k; i++) {
        // Reference class
        String refClass = data.classAttribute().value(i);
        //
        // System.out.println("refClass: " + refClass + " ");

        for (int j = 0; j < n; j++) {
            // Object class
            String objectClass = data.instance(j).stringValue(classIndex);
            //
            // System.out.println("objectClass: " + objectClass + " - value:
            // " + data.instance(j).value(data.attribute(classIndex)));

            // Building two vectors of classes, one in int format and
            // another in double format.
            if (objectClass == refClass) {

                // Object class as a double
                valueClass[i] = data.instance(j).value(data.attribute(classIndex));
                // Object class as an int
                g[j] = i;

                //
                // System.out.println("value of class (int): " + g[j] + "
                // ___ value (double): " + valueClass[i]);
            }
        }

    }

    this.BuildLDA(d, g, true);
}