Example usage for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:classif.Prototyper.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    trainingData = data;/*from   w  w  w .  ja va  2  s . com*/
    Attribute classAttribute = data.classAttribute();
    prototypes = new ArrayList<>();

    classedData = new HashMap<String, ArrayList<Sequence>>();
    indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>());
        indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>());
    }

    sequences = new Sequence[data.numInstances()];
    classMap = new String[sequences.length];
    for (int i = 0; i < sequences.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        sequences[i] = new Sequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(sequences[i]);
        indexClassedDataInFullData.get(clas).add(i);
        //         System.out.println("Element "+i+" of train is classed "+clas+" and went to element "+(indexClassedDataInFullData.get(clas).size()-1));
    }

    buildSpecificClassifier(data);

    if (fillPrototypes)
        addMissingPrototypesRandom();
}

From source file:classif.Prototyper.java

License:Open Source License

public static ClassedSequence[] convertWekaSetToClassedSequence(Instances test) {

    Attribute classAttribute = test.classAttribute();
    ClassedSequence[] testSequences = new ClassedSequence[test.numInstances()];
    for (int i = 0; i < testSequences.length; i++) {
        Instance sample = test.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }//ww w  .ja v a2  s.  c o m
        String clas = sample.stringValue(classAttribute);
        testSequences[i] = new ClassedSequence(new Sequence(sequence), clas);
    }

    return testSequences;

}

From source file:classification.classifiers.LDA.java

License:Open Source License

/**
 * Modification on Dr. Wolfgang Lenhard's code.
 * This was necessary because this classifier had to implements
 * "buildClassifier" and "classifyInstance" to be like a classifier of WEKA(R).
 * //from   w  ww  .jav a 2  s  .  c om
 * @param data
 * @throws Exception
 */
public void buildClassifier(Instances data) throws Exception {
    int n = data.numInstances();
    int a = data.numAttributes();
    int k = data.numClasses();
    int[] g = new int[n];

    double[][] d = new double[n][a];
    for (int i = 0; i < n; i++) {
        double[] d_i = data.instance(i).toDoubleArray();
        d[i] = d_i;

        /**
         * To print the attribute with the correspondent double
         *
         * System.out.print("\n"); for(int j=0; j<a; j++){
         * System.out.print(data.instance(i).stringValue(data.attribute(j))
         * + " = ");
         * System.out.print(data.instance(i).value(data.attribute(j)) +
         * ";  "); } System.out.print("\n"); /
         **/
    }

    // Gives the number of objects belonging to class i in the trainingSet.
    int classIndex = a - 1;
    valueClass = new double[k];

    data.setClassIndex(classIndex);

    for (int i = 0; i < k; i++) {
        // Reference class
        String refClass = data.classAttribute().value(i);
        //
        // System.out.println("refClass: " + refClass + " ");

        for (int j = 0; j < n; j++) {
            // Object class
            String objectClass = data.instance(j).stringValue(classIndex);
            //
            // System.out.println("objectClass: " + objectClass + " - value:
            // " + data.instance(j).value(data.attribute(classIndex)));

            // Building two vectors of classes, one in int format and
            // another in double format.
            if (objectClass == refClass) {

                // Object class as a double
                valueClass[i] = data.instance(j).value(data.attribute(classIndex));
                // Object class as an int
                g[j] = i;

                //
                // System.out.println("value of class (int): " + g[j] + "
                // ___ value (double): " + valueClass[i]);
            }
        }

    }

    this.BuildLDA(d, g, true);
}

From source file:Classifiers.BRkNN.java

License:Open Source License

/**
 * Select the best value for k by hold-one-out cross-validation. Hamming
 * Loss is minimized/*w w w .  j a v a 2 s. c om*/
 *
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
private void crossValidate() throws Exception {
    try {
        // the performance for each different k
        double[] hammingLoss = new double[cvMaxK];

        for (int i = 0; i < cvMaxK; i++) {
            hammingLoss[i] = 0;
        }

        Instances dataSet = train;
        Instance instance; // the hold out instance
        Instances neighbours; // the neighboring instances
        double[] origDistances, convertedDistances;
        for (int i = 0; i < dataSet.numInstances(); i++) {
            if (getDebug() && (i % 50 == 0)) {
                debug("Cross validating " + i + "/" + dataSet.numInstances() + "\r");
            }
            instance = dataSet.instance(i);
            neighbours = lnn.kNearestNeighbours(instance, cvMaxK);
            origDistances = lnn.getDistances();

            // gathering the true labels for the instance
            boolean[] trueLabels = new boolean[numLabels];
            for (int counter = 0; counter < numLabels; counter++) {
                int classIdx = labelIndices[counter];
                String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
                trueLabels[counter] = classValue.equals("1");
            }
            // calculate the performance metric for each different k
            for (int j = cvMaxK; j > 0; j--) {
                convertedDistances = new double[origDistances.length];
                System.arraycopy(origDistances, 0, convertedDistances, 0, origDistances.length);
                double[] confidences = this.getConfidences(neighbours, convertedDistances);
                boolean[] bipartition = null;

                switch (extension) {
                case NONE: // BRknn
                    MultiLabelOutput results;
                    results = new MultiLabelOutput(confidences, 0.5);
                    bipartition = results.getBipartition();
                    break;
                case EXTA: // BRknn-a
                    bipartition = labelsFromConfidences2(confidences);
                    break;
                case EXTB: // BRknn-b
                    bipartition = labelsFromConfidences3(confidences);
                    break;
                }

                double symmetricDifference = 0; // |Y xor Z|
                for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) {
                    boolean actual = trueLabels[labelIndex];
                    boolean predicted = bipartition[labelIndex];

                    if (predicted != actual) {
                        symmetricDifference++;
                    }
                }
                hammingLoss[j - 1] += (symmetricDifference / numLabels);

                neighbours = new IBk().pruneToK(neighbours, convertedDistances, j - 1);
            }
        }

        // Display the results of the cross-validation
        if (getDebug()) {
            for (int i = cvMaxK; i > 0; i--) {
                debug("Hold-one-out performance of " + (i) + " neighbors ");
                debug("(Hamming Loss) = " + hammingLoss[i - 1] / dataSet.numInstances());
            }
        }

        // Check through the performance stats and select the best
        // k value (or the lowest k if more than one best)
        double[] searchStats = hammingLoss;

        double bestPerformance = Double.NaN;
        int bestK = 1;
        for (int i = 0; i < cvMaxK; i++) {
            if (Double.isNaN(bestPerformance) || (bestPerformance > searchStats[i])) {
                bestPerformance = searchStats[i];
                bestK = i + 1;
            }
        }
        numOfNeighbors = bestK;
        if (getDebug()) {
            System.err.println("Selected k = " + bestK);
        }

    } catch (Exception ex) {
        throw new Error("Couldn't optimize by cross-validation: " + ex.getMessage());
    }
}

From source file:Classifiers.BRkNN.java

License:Open Source License

/**
 * Calculates the confidences of the labels, based on the neighboring
 * instances//from  w  ww  .  jav  a  2  s  .  c om
 *
 * @param neighbours
 *            the list of nearest neighboring instances
 * @param distances
 *            the distances of the neighbors
 * @return the confidences of the labels
 */
private double[] getConfidences(Instances neighbours, double[] distances) {
    double total, weight;
    double neighborLabels = 0;
    double[] confidences = new double[numLabels];

    // Set up a correction to the estimator
    for (int i = 0; i < numLabels; i++) {
        confidences[i] = 1.0 / Math.max(1, train.numInstances());
    }
    total = (double) numLabels / Math.max(1, train.numInstances());

    for (int i = 0; i < neighbours.numInstances(); i++) {
        // Collect class counts
        Instance current = neighbours.instance(i);
        distances[i] = distances[i] * distances[i];
        distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels));
        weight = 1.0;
        weight *= current.weight();

        for (int j = 0; j < numLabels; j++) {
            double value = Double.parseDouble(
                    current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j])));
            if (Utils.eq(value, 1.0)) {
                confidences[j] += weight;
                neighborLabels += weight;
            }
        }
        total += weight;
    }

    avgPredictedLabels = (int) Math.round(neighborLabels / total);
    // Normalise distribution
    if (total > 0) {
        Utils.normalize(confidences, total);
    }
    return confidences;
}

From source file:classifiers.ComplexClassifier.java

@Override
public void bootstrapvalidierungsmenge(Instances inst) {
    if (inst.numAttributes() != 0) {
        int[] hilf = new int[inst.numInstances()];

        for (int i = 0; i < inst.numInstances(); i++) {
            int a = ((int) (Math.random() * inst.numInstances()));

            hilf[i] = a;/*from  w w w.j  av  a 2  s  .  c om*/
        }

        Modelsindexen = EliminiereDopelt(hilf);
        Modelmenge = new Instances(inst, Modelsindexen.length);
        for (int i = 0; i < Modelsindexen.length; i++) {

            Modelmenge.add(new Instance(inst.instance(Modelsindexen[i])));
        }

        validierungsindexen = new int[inst.numInstances() - Modelsindexen.length];
        validierungsmenge = new Instances(Modelmenge, validierungsindexen.length);

        for (int i = 0, j = 0; i < inst.numInstances() && j < validierungsindexen.length; i++, j++) {
            if (!(HasSet(Modelsindexen, i))) {
                validierungsindexen[j] = i;
                validierungsmenge.add(inst.instance(validierungsindexen[j]));

            }
        }

    }
}

From source file:classifiers.ComplexClassifier.java

@Override
public void train(Instances inst) throws Exception {
    Knoten[] k = Model.getDieknoten();/*from   www .ja v  a 2s. c om*/
    Enumeration<Attribute> enu = inst.enumerateAttributes();
    int attindex = 0;
    while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();
        if (k[attindex].hatEltern()) {

            switch (att.type()) {
            case Attribute.NUMERIC: {

                for (int i = 0; i < k[attindex].anzahlEltern(); i++) {
                    Attribute a = inst.attribute(k[attindex].getEltern(i).getID());
                    int c = a.index();
                    switch (a.type()) {
                    case Attribute.NUMERIC:
                        list.add(attindex, (new NumericNumericDistribution(inst, attindex, c)));
                        break;
                    case (Attribute.NOMINAL):
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.STRING):
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    default:
                        throw new Exception("Attributetype unbekannt");
                    }
                }

            }
                break;
            case Attribute.NOMINAL: {
                for (int i = 0; i < k[attindex].anzahlEltern(); i++) {

                    Attribute a = inst.attribute(k[attindex].getEltern(i).getID());

                    int c = a.index();

                    switch (a.type()) {

                    case Attribute.NUMERIC:
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.NOMINAL):
                        list.add(attindex, new NominalNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.STRING):
                        list.add(attindex, new NominalNominalDistribution(inst, attindex, c));
                        break;
                    default: {
                        throw new Exception("Attributetype unbekannt");
                    }
                    }
                }
            }
                break;
            }
        } else {

            switch (att.type()) {
            case Attribute.NUMERIC:

                list.add(attindex, new NumericDistribution(inst, attindex));
                break;
            case Attribute.NOMINAL:

                list.add(attindex, new NominalDistribution(inst, attindex));
                break;
            case Attribute.STRING:
                list.add(attindex, new NominalDistribution(inst, attindex));
                break;
            default:
                throw new Exception("Attributetype unbekannt");

            }
        }
        attindex++;
    }

    for (int i = 0; i < inst.numClasses(); i++) {
        for (int j = 0; j < inst.numInstances(); j++) {
            if (inst.instance(j).classValue() == i) {
                Classparam[i]++;
            }

        }

    }

    for (int i = 0; i < inst.numClasses(); i++) {
        Classparam[i] /= inst.numInstances();
    }

}

From source file:classifiers.ComplexClassifier.java

@Override
public double[][] test(Instances testinst) {
    double count = 0;
    long anfangszeit = System.currentTimeMillis();
    ;//w  w  w.  j  av a  2s  .  c o  m
    long endzeit;
    double[][] ausgabe = new double[1][2];
    if (testinst.numAttributes() != 0) {

        testinst.setClass(testinst.attribute(testinst.numAttributes() - 1));

        for (int i = 0; i < testinst.numInstances(); i++) {

            if (!Classify(testinst.instance(i))) {
                count++;
            } else {
            }

        }

        endzeit = System.currentTimeMillis();
        ausgabe[0][0] = (count / testinst.numInstances()) * 100;

        ausgabe[0][1] = ((endzeit - anfangszeit));
        // System.out.println(testinst);
        return ausgabe;
    } else {
        // System.out.println(testinst);
        return ausgabe;
    }

}

From source file:classifiers.ComplexClassifierZufall.java

public ComplexClassifierZufall(Instances inst, int anzahl) {
    super(inst);/*from w ww . j  a va 2  s .  c  om*/
    Datenbank = new Instances(super.getinst());
    this.vernetzung = (int) (Math.random() * 101);
    Model = new GraphMitAngabeVernetzungsgrad(inst, vernetzung);
    Model.strukturiereGraph();

    list = new ArrayList<>();
    Classparam = new double[inst.numInstances()];
    this.anzahldurchlauf = anzahl;
    trainergebnisse = new double[anzahldurchlauf][2];
    testergebnisse = new double[anzahldurchlauf][2];
    Modelergebnisse = new double[1][2];
    validierungsergebnisse = new double[1][2];
    struct = new BayesNetz(inst, Model);

}

From source file:classifiers.ComplexClassifierZufall.java

@Override
public void train(Instances inst) throws Exception {

    Knoten[] k = Model.getDieknoten();/*from w ww.ja v a  2s  . com*/
    Enumeration<Attribute> enu = inst.enumerateAttributes();
    int attindex = 0;
    while (enu.hasMoreElements()) {
        Attribute att = enu.nextElement();
        if (k[attindex].hatEltern()) {

            switch (att.type()) {
            case Attribute.NUMERIC: {

                for (int i = 0; i < k[attindex].anzahlEltern(); i++) {
                    Attribute a = inst.attribute(k[attindex].getEltern(i).getID());
                    int c = a.index();
                    switch (a.type()) {
                    case Attribute.NUMERIC:
                        list.add(attindex, (new NumericNumericDistribution(inst, attindex, c)));
                        break;
                    case (Attribute.NOMINAL):
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.STRING):
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    default:
                        throw new Exception("Attributetype unbekannt");
                    }
                }

            }
                break;
            case Attribute.NOMINAL: {
                for (int i = 0; i < k[attindex].anzahlEltern(); i++) {

                    Attribute a = inst.attribute(k[attindex].getEltern(i).getID());

                    int c = a.index();

                    switch (a.type()) {

                    case Attribute.NUMERIC:
                        list.add(attindex, new NumericNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.NOMINAL):
                        list.add(attindex, new NominalNominalDistribution(inst, attindex, c));
                        break;
                    case (Attribute.STRING):
                        list.add(attindex, new NominalNominalDistribution(inst, attindex, c));
                        break;
                    default: {
                        throw new Exception("Attributetype unbekannt");
                    }
                    }
                }
            }
                break;
            }
        } else {

            switch (att.type()) {
            case Attribute.NUMERIC:

                list.add(attindex, new NumericDistribution(inst, attindex));
                break;
            case Attribute.NOMINAL:

                list.add(attindex, new NominalDistribution(inst, attindex));
                break;
            case Attribute.STRING:
                list.add(attindex, new NominalDistribution(inst, attindex));
                break;
            default:
                throw new Exception("Attributetype unbekannt");

            }
        }
        attindex++;
    }

    for (int i = 0; i < inst.numClasses(); i++) {
        for (int j = 0; j < inst.numInstances(); j++) {
            if (inst.instance(j).classValue() == i) {
                Classparam[i]++;
            }

        }

    }

    for (int i = 0; i < inst.numClasses(); i++) {
        Classparam[i] /= inst.numInstances();
    }

}