Example usage for weka.core Instance weight

List of usage examples for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:lascer.WekaClassifier.java

License:Open Source License

/**
 * Classifies a given instance./*  ww w .j a  v a 2 s.co  m*/
 *
 * @param inst  the instance to be classified.
 *
 * @return  the classification of the instance.
 */
public double classifyInstance(Instance inst) {
    weka.coreExtended.Instance extendedInstance;
    de.unistuttgart.commandline.Option lokalGrenzwertOption;
    AttributSammlung attributSammlung;
    PraedErzParameter praedErzParameter = null;
    KonzErzParameter konzErzParameter = null;
    Konzept posFormelTrue, posFormelFalse;
    Konzept negFormelTrue, negFormelFalse;
    Konzept posKonzeptTrue, posKonzeptFalse;
    Konzept negKonzeptTrue, negKonzeptFalse;
    Beispiel testbeispiel;
    float fehlAnzPosTrue, fehlAnzPosFalse, fehlAnzPosDiff;
    float fehlAnzNegTrue, fehlAnzNegFalse, fehlAnzNegDiff;
    float komplexPosTrue, komplexPosFalse, komplexPosDiff;
    float komplexNegTrue, komplexNegFalse, komplexNegDiff;
    float bewertPosTrue, bewertPosFalse, bewertPosDiff;
    float bewertNegTrue, bewertNegFalse, bewertNegDiff;
    float lokalGrenzwert;

    extendedInstance = new weka.coreExtended.BasicInstance(inst.weight(), inst.toDoubleArray());
    attributSammlung = posDatensatz.attributSammlung();
    testbeispiel = ArffDateiEinlesen.instanzBeispiel(extendedInstance, attributListe, attributSammlung);

    lokalGrenzwert = Konstanten.LOKAL_GRENZWERT;
    lokalGrenzwertOption = parser.getOption("lokalGrenzwert");
    if (parser.isEnabled(lokalGrenzwertOption)) {
        lokalGrenzwert = parser.getParameterAsFloat(lokalGrenzwertOption);
    }

    if ((posFormel != null) && (negFormel != null) || (lokalGrenzwert > 0)) {
        // Ermittlung der Parameter.
        praedErzParameter = Steuerung.praedErzParameter(parser);
        konzErzParameter = Steuerung.konzErzParameter(parser);
    }

    if ((posFormel == null) && (negFormel == null)) {

        throw (new RuntimeException("Keine Formel vorhanden."));

    } else if (negFormel == null) {
        // Es ist nur eine Formel fr die positiven Beispiele vorhanden.

        if (lokalGrenzwert == 0) {
            // Die Klassifikation erfolgt nur mittels der vorhandenen
            // Formel.
            if (posFormel.trifftZu(testbeispiel)) {
                return wekaClassTrue;
            } else {
                return wekaClassFalse;
            }
        } else {
            // Die Klassifikation erfolgt nach Erzeugung von Formeln und
            // gegebenenfalls Konzepten nach Hinzunahme des Testbeispiels
            // zum Datensatz.
            return relativeCassification(posDatensatz, praedErzParameter, konzErzParameter, posFormel,
                    testbeispiel, lokalGrenzwert, wekaClassTrue, wekaClassFalse);
        }

    } else if (posFormel == null) {
        // Es ist nur eine Formel fr die negativen Beispiele vorhanden.

        if (lokalGrenzwert == 0) {
            // Die Klassifikation erfolgt nur mittels der vorhandenen
            // Formel.
            if (negFormel.trifftZu(testbeispiel)) {
                return wekaClassFalse;
            } else {
                return wekaClassTrue;
            }
        } else {
            // Die Klassifikation erfolgt nach Erzeugung von Formeln und
            // gegebenenfalls Konzepten nach Hinzunahme des Testbeispiels
            // zum Datensatz.
            return relativeCassification(negDatensatz, praedErzParameter, konzErzParameter, negFormel,
                    testbeispiel, lokalGrenzwert, wekaClassFalse, wekaClassTrue);
        }

    } else {
        // Es ist sowohl eine Formel fr die positiven wie fr negativen
        // Beispiele vorhanden. Es findet daher eine Klassifikation
        // entsprechend der relativen Vernderung der Komplexitt der
        // Formeln bei Bercksichtigung des zu klassifizierenden Beispiels
        // statt.

        // Eine Formel fr die positiven Beispiele inklusive des
        // Testbeispiels erzeugen.
        if (posFormel.trifftZu(testbeispiel)) {
            posFormelTrue = posFormel;
            posFormelFalse = generatedAddFormula(posDatensatz, praedErzParameter, konzErzParameter, posFormel,
                    testbeispiel, false);
        } else {
            posFormelFalse = posFormel;
            posFormelTrue = generatedAddFormula(posDatensatz, praedErzParameter, konzErzParameter, posFormel,
                    testbeispiel, true);
        }

        // Eine Formel fr die negativen Beispiele inklusive des
        // Testbeispiels erzeugen.
        if (negFormel.trifftZu(testbeispiel)) {
            negFormelTrue = negFormel;
            negFormelFalse = generatedAddFormula(negDatensatz, praedErzParameter, konzErzParameter, negFormel,
                    testbeispiel, false);
        } else {
            negFormelFalse = negFormel;
            negFormelTrue = generatedAddFormula(negDatensatz, praedErzParameter, konzErzParameter, negFormel,
                    testbeispiel, true);
        }

        // Die Fehleranzahl der Formeln ermitteln.
        fehlAnzPosTrue = (posFormelTrue.posFalschAnz() + posFormelTrue.negFalschAnz());
        fehlAnzPosFalse = (posFormelFalse.posFalschAnz() + posFormelFalse.negFalschAnz());
        fehlAnzNegTrue = (negFormelTrue.posFalschAnz() + negFormelTrue.negFalschAnz());
        fehlAnzNegFalse = (negFormelFalse.posFalschAnz() + negFormelFalse.negFalschAnz());
        fehlAnzPosDiff = fehlAnzPosTrue - fehlAnzPosFalse;
        fehlAnzNegDiff = fehlAnzNegTrue - fehlAnzNegFalse;

        // Die Komplexitt der Formeln ermitteln.
        komplexPosTrue = posFormelTrue.komplexitaet();
        komplexPosFalse = posFormelFalse.komplexitaet();
        komplexNegTrue = negFormelTrue.komplexitaet();
        komplexNegFalse = negFormelFalse.komplexitaet();
        komplexPosDiff = komplexPosTrue - komplexPosFalse;
        komplexNegDiff = komplexNegTrue - komplexNegFalse;

        // Vergleich der Vernderungen der Formeln.
        if ((fehlAnzPosDiff < 0) && (fehlAnzNegDiff > 0)) {

            return wekaClassTrue;

        } else if ((fehlAnzPosDiff > 0) && (fehlAnzNegDiff < 0)) {

            return wekaClassFalse;

        } else if (Math.abs(fehlAnzPosDiff) > Math.abs(fehlAnzNegDiff)) {

            // fehlAnzPosDiff ist nicht Null.
            if (fehlAnzPosDiff < 0) {
                return wekaClassTrue;
            } else {
                return wekaClassFalse;
            }

        } else if (Math.abs(fehlAnzPosDiff) < Math.abs(fehlAnzNegDiff)) {

            // fehlAnzNegDiff ist nicht Null.
            if (fehlAnzNegDiff > 0) {
                return wekaClassTrue;
            } else {
                return wekaClassFalse;
            }

        } else {
            // Die Vernderung der Fehleranzahl bezglich der Klassse des
            // Testbeispiels ist fr beide Formeln aller Beispiel der
            // jeweiligen Klasse gleich.

            if ((Math.abs(komplexPosDiff - komplexNegDiff) >= 2 * lokalGrenzwert)
                    && (komplexPosDiff != komplexNegDiff)) {

                // Klassifikation nach den Komplexitten.
                if (Math.abs(komplexPosDiff) > Math.abs(komplexNegDiff)) {

                    // komplexPosDiff ist nicht Null.
                    if (komplexPosDiff < 0) {
                        return wekaClassTrue;
                    } else {
                        return wekaClassFalse;
                    }

                } else {

                    // komplexNegDiff ist betragsmig grer als
                    // komplexPosDiff und nicht Null.
                    if (komplexNegDiff > 0) {
                        return wekaClassTrue;
                    } else {
                        return wekaClassFalse;
                    }

                }

            } else if (lokalGrenzwert > 0) {
                // Eine lokale Klassifikation vornehmen.

                // Erzeugung der effizientesten Konzepte nach Hinzunahme
                // des Testbeispiels.
                posKonzeptTrue = generatedAddConcept(posDatensatz, praedErzParameter, konzErzParameter,
                        posFormel, testbeispiel, true);
                posKonzeptFalse = generatedAddConcept(posDatensatz, praedErzParameter, konzErzParameter,
                        posFormel, testbeispiel, false);
                negKonzeptTrue = generatedAddConcept(negDatensatz, praedErzParameter, konzErzParameter,
                        negFormel, testbeispiel, true);
                negKonzeptFalse = generatedAddConcept(negDatensatz, praedErzParameter, konzErzParameter,
                        negFormel, testbeispiel, false);

                // Die Bewertungen der Konzepte ermitteln.
                bewertPosTrue = KonzeptErzeugungFacade.konzeptEffizienz(posKonzeptTrue);
                bewertPosFalse = KonzeptErzeugungFacade.konzeptEffizienz(posKonzeptFalse);
                bewertNegTrue = KonzeptErzeugungFacade.konzeptEffizienz(negKonzeptTrue);
                bewertNegFalse = KonzeptErzeugungFacade.konzeptEffizienz(negKonzeptFalse);
                bewertPosDiff = bewertPosTrue - bewertPosFalse;
                bewertNegDiff = bewertNegTrue - bewertNegFalse;

                if (Math.abs(bewertPosDiff) >= Math.abs(bewertNegDiff)) {
                    if (bewertPosDiff >= 0) {
                        return wekaClassTrue;
                    } else {
                        return wekaClassFalse;
                    }
                } else {
                    if (bewertNegDiff < 0) {
                        return wekaClassTrue;
                    } else {
                        return wekaClassFalse;
                    }
                }

            } else {
                // Die Vernderung der Komplexitt bezglich der Klassse
                // des Testbeispiels ist fr beide Formeln aller Beispiel
                // der jeweiligen Klasse gleich und eine lokale
                // Klassifikation soll nicht erfolgen. Daher nach der
                // Anzahl der Beispiele der Klassen klassifizieren.

                if (posDatensatz.posBspAnz() >= posDatensatz.negBspAnz()) {
                    return wekaClassTrue;
                } else {
                    return wekaClassFalse;
                }
            }
        }
    }
}

From source file:LogReg.Logistic.java

License:Open Source License

/**
 * Builds the classifier//from   www  .  ja  v a2  s . co m
 *
 * @param train the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances train) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(train);

    // remove instances with missing class
    train = new Instances(train);
    train.deleteWithMissingClass();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(train);
    train = Filter.useFilter(train, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(train);
    train = Filter.useFilter(train, m_AttFilter);

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(train);
    train = Filter.useFilter(train, m_NominalToBinary);

    // Save the structure for printing the model
    m_structure = new Instances(train, 0);

    // Extract data
    m_ClassIndex = train.classIndex();
    m_NumClasses = train.numClasses();

    int nK = m_NumClasses - 1; // Only K-1 class labels needed 
    int nR = m_NumPredictors = train.numAttributes() - 1;
    int nC = train.numInstances();

    m_Data = new double[nC][nR + 1]; // Data values
    int[] Y = new int[nC]; // Class labels
    double[] xMean = new double[nR + 1]; // Attribute means
    xSD = new double[nR + 1]; // Attribute stddev's
    double[] sY = new double[nK + 1]; // Number of classes
    double[] weights = new double[nC]; // Weights of instances
    double totWeights = 0; // Total weights of the instances
    m_Par = new double[nR + 1][nK]; // Optimized parameter values

    if (m_Debug) {
        System.out.println("Extracting data...");
    }

    for (int i = 0; i < nC; i++) {
        // initialize X[][]
        Instance current = train.instance(i);
        Y[i] = (int) current.classValue(); // Class value starts from 0
        weights[i] = current.weight(); // Dealing with weights
        totWeights += weights[i];

        m_Data[i][0] = 1;
        int j = 1;
        for (int k = 0; k <= nR; k++) {
            if (k != m_ClassIndex) {
                double x = current.value(k);
                m_Data[i][j] = x;
                xMean[j] += weights[i] * x;
                xSD[j] += weights[i] * x * x;
                j++;
            }
        }

        // Class count
        sY[Y[i]]++;
    }

    if ((totWeights <= 1) && (nC > 1))
        throw new Exception("Sum of weights of instances less than 1, please reweight!");

    xMean[0] = 0;
    xSD[0] = 1;
    for (int j = 1; j <= nR; j++) {
        xMean[j] = xMean[j] / totWeights;
        if (totWeights > 1)
            xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
        else
            xSD[j] = 0;
    }

    if (m_Debug) {
        // Output stats about input data
        System.out.println("Descriptives...");
        for (int m = 0; m <= nK; m++)
            System.out.println(sY[m] + " cases have class " + m);
        System.out.println("\n Variable     Avg       SD    ");
        for (int j = 1; j <= nR; j++)
            System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4)
                    + Utils.doubleToString(xSD[j], 10, 4));
    }

    // Normalise input data 
    for (int i = 0; i < nC; i++) {
        for (int j = 0; j <= nR; j++) {
            if (xSD[j] != 0) {
                m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
            }
        }
    }

    if (m_Debug) {
        System.out.println("\nIteration History...");
    }

    double x[] = new double[(nR + 1) * nK];
    double[][] b = new double[2][x.length]; // Boundary constraints, N/A here

    // Initialize
    for (int p = 0; p < nK; p++) {
        int offset = p * (nR + 1);
        x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model
        b[0][offset] = Double.NaN;
        b[1][offset] = Double.NaN;
        for (int q = 1; q <= nR; q++) {
            x[offset + q] = 0.0;
            b[0][offset + q] = Double.NaN;
            b[1][offset + q] = Double.NaN;
        }
    }

    OptEng opt = new OptEng();
    opt.setDebug(m_Debug);
    opt.setWeights(weights);
    opt.setClassLabels(Y);

    if (m_MaxIts == -1) { // Search until convergence
        x = opt.findArgmin(x, b);
        while (x == null) {
            x = opt.getVarbValues();
            if (m_Debug)
                System.out.println("200 iterations finished, not enough!");
            x = opt.findArgmin(x, b);
        }
        if (m_Debug)
            System.out.println(" -------------<Converged>--------------");
    } else {
        opt.setMaxIteration(m_MaxIts);
        x = opt.findArgmin(x, b);
        if (x == null) // Not enough, but use the current value
            x = opt.getVarbValues();
    }

    m_LL = -opt.getMinFunction(); // Log-likelihood

    // Don't need data matrix anymore
    m_Data = null;

    // Convert coefficients back to non-normalized attribute units
    for (int i = 0; i < nK; i++) {
        m_Par[0][i] = x[i * (nR + 1)];
        for (int j = 1; j <= nR; j++) {
            m_Par[j][i] = x[i * (nR + 1) + j];
            if (xSD[j] != 0) {
                m_Par[j][i] /= xSD[j];
                m_Par[0][i] -= m_Par[j][i] * xMean[j];
            }
        }
    }
}

From source file:main.NaiveBayes.java

License:Open Source License

/**
 * Updates the classifier with the given instance.
 * //from w  w  w . j  av a 2s .c  om
 * @param instance the new training instance to include in the model
 * @exception Exception if the instance could not be incorporated in the
 *              model.
 */
public void updateClassifier(Instance instance) throws Exception {

    if (!instance.classIsMissing()) {
        Enumeration<Attribute> enumAtts = m_Instances.enumerateAttributes();
        int attIndex = 0;
        while (enumAtts.hasMoreElements()) {
            Attribute attribute = enumAtts.nextElement();
            if (!instance.isMissing(attribute)) {
                m_Distributions[attIndex][(int) instance.classValue()].addValue(instance.value(attribute),
                        instance.weight());
            }
            attIndex++;
        }
        m_ClassDistribution.addValue(instance.classValue(), instance.weight());
    }
}

From source file:meka.classifiers.multilabel.incremental.meta.BaggingMLUpdateable.java

License:Open Source License

@Override
public void updateClassifier(Instance x) throws Exception {

    for (int i = 0; i < m_NumIterations; i++) {
        // Oza-Bag style
        int k = poisson(1.0, random);
        if (m_BagSizePercent == 100) {
            // Train on all instances
            k = 1;/*from   ww  w  .ja v  a2s  .c  o m*/
        }
        if (k > 0) {
            // Train on this instance only if k > 0
            Instance x_weighted = (Instance) x.copy();
            x_weighted.setWeight(x.weight() * (double) k);
            ((UpdateableClassifier) m_Classifiers[i]).updateClassifier(x_weighted);
        }
    }
}

From source file:meka.classifiers.multilabel.MajorityLabelset.java

License:Open Source License

protected void updateCount(Instance x, int L) {
    String y = MLUtils.toBitString(x, L);

    if (classFreqs.containsKey(y)) {
        double freq = classFreqs.get(y) + x.weight();
        classFreqs.put(y, freq);// w w w . jav a 2 s  .  com
        if (maxValue < freq) {
            maxValue = freq;
            this.prediction = MLUtils.fromBitString(y);
        }
    } else {
        classFreqs.put(y, x.weight());
    }
}

From source file:milk.classifiers.MIBoost.java

License:Open Source License

/**
  * Builds the classifier/*from  w w  w  .j a  va2 s.c om*/
  *
  * @param train the training data to be used for generating the
  * boosted classifier.
  * @exception Exception if the classifier could not be built successfully
  */
 public void buildClassifier(Exemplars exps) throws Exception {

     Exemplars train = new Exemplars(exps);

     if (train.classAttribute().type() != Attribute.NOMINAL) {
         throw new Exception("Class attribute must be nominal.");
     }
     if (train.checkForStringAttributes()) {
         throw new Exception("Can't handle string attributes!");
     }

     m_ClassIndex = train.classIndex();
     m_IdIndex = train.idIndex();
     m_NumClasses = train.numClasses();
     m_NumIterations = m_MaxIterations;

     if (m_NumClasses > 2) {
         throw new Exception("Not yet prepared to deal with multiple classes!");
     }

     if (m_Classifier == null)
         throw new Exception("A base classifier has not been specified!");
     if (!(m_Classifier instanceof WeightedInstancesHandler))
         throw new Exception("Base classifier cannot handle weighted instances!");

     m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations());
     if (m_Debug)
         System.err.println("Base classifier: " + m_Classifier.getClass().getName());

     m_Beta = new double[m_NumIterations];
     m_Attributes = new Instances(train.exemplar(0).getInstances(), 0);

     double N = (double) train.numExemplars(), sumNi = 0;
     Instances data = new Instances(m_Attributes, 0);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);

     // Initialize weights
     for (int i = 0; i < N; i++)
         sumNi += train.exemplar(i).getInstances().numInstances();

     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         exi.setWeight(sumNi / N);
         Instances insts = exi.getInstances();
         double ni = (double) insts.numInstances();
         for (int j = 0; j < ni; j++) {
             Instance ins = new Instance(insts.instance(j));// Copy
             //insts.instance(j).setWeight(1.0);   

             ins.deleteAttributeAt(m_IdIndex);
             ins.setDataset(dataset);
             ins.setWeight(exi.weight() / ni);
             data.add(ins);
         }
     }

     // Assume the order of the instances are preserved in the Discretize filter
     if (m_DiscretizeBin > 0) {
         m_Filter = new Discretize();
         m_Filter.setInputFormat(new Instances(data, 0));
         m_Filter.setBins(m_DiscretizeBin);
         data = Filter.useFilter(data, m_Filter);
     }

     // Main algorithm
     int dataIdx;
     iterations: for (int m = 0; m < m_MaxIterations; m++) {
         if (m_Debug)
             System.err.println("\nIteration " + m);
         // Build a model
         m_Models[m].buildClassifier(data);

         // Prediction of each bag
         double[] err = new double[(int) N], weights = new double[(int) N];
         boolean perfect = true, tooWrong = true;
         dataIdx = 0;
         for (int n = 0; n < N; n++) {
             Exemplar exn = train.exemplar(n);
             // Prediction of each instance and the predicted class distribution
             // of the bag      
             double nn = (double) exn.getInstances().numInstances();
             for (int p = 0; p < nn; p++) {
                 Instance testIns = data.instance(dataIdx++);
                 if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors
                     err[n]++;
             }
             weights[n] = exn.weight();
             err[n] /= nn;
             if (err[n] > 0.5)
                 perfect = false;
             if (err[n] < 0.5)
                 tooWrong = false;
         }

         if (perfect || tooWrong) { // No or 100% classification error, cannot find beta
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("No errors");
             break iterations;
         }

         double[] x = new double[1];
         x[0] = 0;
         double[][] b = new double[2][x.length];
         b[0][0] = Double.NaN;
         b[1][0] = Double.NaN;

         OptEng opt = new OptEng();
         opt.setWeights(weights);
         opt.setErrs(err);
         //opt.setDebug(m_Debug);
         if (m_Debug)
             System.out.println("Start searching for c... ");
         x = opt.findArgmin(x, b);
         while (x == null) {
             x = opt.getVarbValues();
             if (m_Debug)
                 System.out.println("200 iterations finished, not enough!");
             x = opt.findArgmin(x, b);
         }
         if (m_Debug)
             System.out.println("Finished.");
         m_Beta[m] = x[0];

         if (m_Debug)
             System.err.println("c = " + m_Beta[m]);

         // Stop if error too small or error too big and ignore this model
         if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) {
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("Errors out of range!");
             break iterations;
         }

         // Update weights of data and class label of wfData
         dataIdx = 0;
         double totWeights = 0;
         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0)));
             totWeights += exr.weight();
         }

         if (m_Debug)
             System.err.println("Total weights = " + totWeights);

         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             double num = (double) exr.getInstances().numInstances();
             exr.setWeight(sumNi * exr.weight() / totWeights);
             //if(m_Debug)
             //    System.err.print("\nExemplar "+r+"="+exr.weight()+": \t");
             for (int s = 0; s < num; s++) {
                 Instance inss = data.instance(dataIdx);
                 inss.setWeight(exr.weight() / num);
                 //    if(m_Debug)
                 //  System.err.print("instance "+s+"="+inss.weight()+
                 //          "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t");
                 if (Double.isNaN(inss.weight()))
                     throw new Exception("instance " + s + " in bag " + r + " has weight NaN!");
                 dataIdx++;
             }
             //if(m_Debug)
             //    System.err.println();
         }
     }
 }

From source file:milk.classifiers.TLD.java

License:Open Source License

/**
 *
 * @param exs the training exemplars/*from   w w  w.j ava 2 s. co  m*/
 * @exception if the model cannot be built properly
 */
public void buildClassifier(Exemplars exs) throws Exception {
    m_ClassIndex = exs.classIndex();
    m_IdIndex = exs.idIndex();
    int numegs = exs.numExemplars();
    m_Dimension = exs.numAttributes() - 2;

    Exemplars pos = new Exemplars(exs, 0), neg = new Exemplars(exs, 0);

    for (int u = 0; u < numegs; u++) {
        Exemplar example = exs.exemplar(u);
        if (example.classValue() == 0)
            pos.add(example);
        else
            neg.add(example);
    }

    int pnum = pos.numExemplars(), nnum = neg.numExemplars();

    m_MeanP = new double[pnum][m_Dimension];
    m_VarianceP = new double[pnum][m_Dimension];
    m_SumP = new double[pnum][m_Dimension];
    m_MeanN = new double[nnum][m_Dimension];
    m_VarianceN = new double[nnum][m_Dimension];
    m_SumN = new double[nnum][m_Dimension];
    m_ParamsP = new double[4 * m_Dimension];
    m_ParamsN = new double[4 * m_Dimension];

    // Estimation of the parameters: as the start value for search
    double[] pSumVal = new double[m_Dimension], // for m 
            nSumVal = new double[m_Dimension];
    double[] maxVarsP = new double[m_Dimension], // for a
            maxVarsN = new double[m_Dimension];
    // Mean of sample variances: for b, b=a/E(\sigma^2)+2
    double[] varMeanP = new double[m_Dimension], varMeanN = new double[m_Dimension];
    // Variances of sample means: for w, w=E[var(\mu)]/E[\sigma^2]
    double[] meanVarP = new double[m_Dimension], meanVarN = new double[m_Dimension];
    // number of exemplars without all values missing
    double[] numExsP = new double[m_Dimension], numExsN = new double[m_Dimension];

    // Extract metadata fro both positive and negative bags
    for (int v = 0; v < pnum; v++) {
        Exemplar px = pos.exemplar(v);
        m_MeanP[v] = px.meanOrMode();
        m_VarianceP[v] = px.variance();
        Instances pxi = px.getInstances();

        for (int w = 0, t = 0; w < m_Dimension; w++, t++) {
            if ((t == m_ClassIndex) || (t == m_IdIndex))
                t++;

            if (!Double.isNaN(m_MeanP[v][w])) {
                for (int u = 0; u < pxi.numInstances(); u++) {
                    Instance ins = pxi.instance(u);
                    if (!ins.isMissing(t))
                        m_SumP[v][w] += ins.weight();
                }
                numExsP[w]++;
                pSumVal[w] += m_MeanP[v][w];
                meanVarP[w] += m_MeanP[v][w] * m_MeanP[v][w];
                if (maxVarsP[w] < m_VarianceP[v][w])
                    maxVarsP[w] = m_VarianceP[v][w];
                varMeanP[w] += m_VarianceP[v][w];
                m_VarianceP[v][w] *= (m_SumP[v][w] - 1.0);
                if (m_VarianceP[v][w] < 0.0)
                    m_VarianceP[v][w] = 0.0;
            }
        }
    }

    for (int v = 0; v < nnum; v++) {
        Exemplar nx = neg.exemplar(v);
        m_MeanN[v] = nx.meanOrMode();
        m_VarianceN[v] = nx.variance();
        Instances nxi = nx.getInstances();

        for (int w = 0, t = 0; w < m_Dimension; w++, t++) {
            if ((t == m_ClassIndex) || (t == m_IdIndex))
                t++;

            if (!Double.isNaN(m_MeanN[v][w])) {
                for (int u = 0; u < nxi.numInstances(); u++)
                    if (!nxi.instance(u).isMissing(t))
                        m_SumN[v][w] += nxi.instance(u).weight();
                numExsN[w]++;
                nSumVal[w] += m_MeanN[v][w];
                meanVarN[w] += m_MeanN[v][w] * m_MeanN[v][w];
                if (maxVarsN[w] < m_VarianceN[v][w])
                    maxVarsN[w] = m_VarianceN[v][w];
                varMeanN[w] += m_VarianceN[v][w];
                m_VarianceN[v][w] *= (m_SumN[v][w] - 1.0);
                if (m_VarianceN[v][w] < 0.0)
                    m_VarianceN[v][w] = 0.0;
            }
        }
    }

    for (int w = 0; w < m_Dimension; w++) {
        pSumVal[w] /= numExsP[w];
        nSumVal[w] /= numExsN[w];
        if (numExsP[w] > 1)
            meanVarP[w] = meanVarP[w] / (numExsP[w] - 1.0) - pSumVal[w] * numExsP[w] / (numExsP[w] - 1.0);
        if (numExsN[w] > 1)
            meanVarN[w] = meanVarN[w] / (numExsN[w] - 1.0) - nSumVal[w] * numExsN[w] / (numExsN[w] - 1.0);
        varMeanP[w] /= numExsP[w];
        varMeanN[w] /= numExsN[w];
    }

    //Bounds and parameter values for each run
    double[][] bounds = new double[2][4];
    double[] pThisParam = new double[4], nThisParam = new double[4];

    // Initial values for parameters
    double a, b, w, m;

    // Optimize for one dimension
    for (int x = 0; x < m_Dimension; x++) {
        System.err.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Dimension #" + x);

        // Positive examplars: first run
        a = (maxVarsP[x] > ZERO) ? maxVarsP[x] : 1.0;
        b = a / varMeanP[x] + 2.0; // a/(b-2) = E(\sigma^2)
        w = meanVarP[x] / varMeanP[x]; // E[var(\mu)] = w*E[\sigma^2]
        if (w <= ZERO)
            w = 1.0;
        m = pSumVal[x];
        pThisParam[0] = a; // a
        pThisParam[1] = b; // b
        pThisParam[2] = w; // w
        pThisParam[3] = m; // m

        // Negative examplars: first run
        a = (maxVarsN[x] > ZERO) ? maxVarsN[x] : 1.0;
        b = a / varMeanN[x] + 2.0; // a/(b-2) = E(\sigma^2)
        w = meanVarN[x] / varMeanN[x]; // E[var(\mu)] = w*E[\sigma^2]
        if (w <= ZERO)
            w = 1.0;
        m = nSumVal[x];
        nThisParam[0] = a; // a
        nThisParam[1] = b; // b
        nThisParam[2] = w; // w
        nThisParam[3] = m; // m

        // Bound constraints
        bounds[0][0] = ZERO; // a > 0
        bounds[0][1] = 2.0 + ZERO; // b > 2 
        bounds[0][2] = ZERO; // w > 0
        bounds[0][3] = Double.NaN;

        for (int t = 0; t < 4; t++) {
            bounds[1][t] = Double.NaN;
            m_ParamsP[4 * x + t] = pThisParam[t];
            m_ParamsN[4 * x + t] = nThisParam[t];
        }
        double pminVal = Double.MAX_VALUE, nminVal = Double.MAX_VALUE;
        Random whichEx = new Random(m_Seed);
        TLD_Optm pOp = null, nOp = null;
        boolean isRunValid = true;
        double[] sumP = new double[pnum], meanP = new double[pnum], varP = new double[pnum];
        double[] sumN = new double[nnum], meanN = new double[nnum], varN = new double[nnum];

        // One dimension
        for (int p = 0; p < pnum; p++) {
            sumP[p] = m_SumP[p][x];
            meanP[p] = m_MeanP[p][x];
            varP[p] = m_VarianceP[p][x];
        }
        for (int q = 0; q < nnum; q++) {
            sumN[q] = m_SumN[q][x];
            meanN[q] = m_MeanN[q][x];
            varN[q] = m_VarianceN[q][x];
        }

        for (int y = 0; y < m_Run;) {
            System.err.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Run #" + y);
            double thisMin;

            System.err.println("\nPositive exemplars");
            pOp = new TLD_Optm();
            pOp.setNum(sumP);
            pOp.setSSquare(varP);
            pOp.setXBar(meanP);

            pThisParam = pOp.findArgmin(pThisParam, bounds);
            while (pThisParam == null) {
                pThisParam = pOp.getVarbValues();
                System.err.println("!!! 200 iterations finished, not enough!");
                pThisParam = pOp.findArgmin(pThisParam, bounds);
            }

            thisMin = pOp.getMinFunction();
            if (!Double.isNaN(thisMin) && (thisMin < pminVal)) {
                pminVal = thisMin;
                for (int z = 0; z < 4; z++)
                    m_ParamsP[4 * x + z] = pThisParam[z];
            }

            if (Double.isNaN(thisMin)) {
                pThisParam = new double[4];
                isRunValid = false;
            }

            System.err.println("\nNegative exemplars");
            nOp = new TLD_Optm();
            nOp.setNum(sumN);
            nOp.setSSquare(varN);
            nOp.setXBar(meanN);

            nThisParam = nOp.findArgmin(nThisParam, bounds);
            while (nThisParam == null) {
                nThisParam = nOp.getVarbValues();
                System.err.println("!!! 200 iterations finished, not enough!");
                nThisParam = nOp.findArgmin(nThisParam, bounds);
            }
            thisMin = nOp.getMinFunction();
            if (!Double.isNaN(thisMin) && (thisMin < nminVal)) {
                nminVal = thisMin;
                for (int z = 0; z < 4; z++)
                    m_ParamsN[4 * x + z] = nThisParam[z];
            }

            if (Double.isNaN(thisMin)) {
                nThisParam = new double[4];
                isRunValid = false;
            }

            if (!isRunValid) {
                y--;
                isRunValid = true;
            }

            if (++y < m_Run) {
                // Change the initial parameters and restart             
                int pone = whichEx.nextInt(pnum), // Randomly pick one pos. exmpl.
                        none = whichEx.nextInt(nnum);

                // Positive exemplars: next run 
                while ((m_SumP[pone][x] <= 1.0) || Double.isNaN(m_MeanP[pone][x]))
                    pone = whichEx.nextInt(pnum);

                a = m_VarianceP[pone][x] / (m_SumP[pone][x] - 1.0);
                if (a <= ZERO)
                    a = m_ParamsN[4 * x]; // Change to negative params
                m = m_MeanP[pone][x];
                double sq = (m - m_ParamsP[4 * x + 3]) * (m - m_ParamsP[4 * x + 3]);

                b = a * m_ParamsP[4 * x + 2] / sq + 2.0; // b=a/Var+2, assuming Var=Sq/w'
                if ((b <= ZERO) || Double.isNaN(b) || Double.isInfinite(b))
                    b = m_ParamsN[4 * x + 1];

                w = sq * (m_ParamsP[4 * x + 1] - 2.0) / m_ParamsP[4 * x];//w=Sq/Var, assuming Var=a'/(b'-2)
                if ((w <= ZERO) || Double.isNaN(w) || Double.isInfinite(w))
                    w = m_ParamsN[4 * x + 2];

                pThisParam[0] = a; // a
                pThisParam[1] = b; // b
                pThisParam[2] = w; // w
                pThisParam[3] = m; // m       

                // Negative exemplars: next run 
                while ((m_SumN[none][x] <= 1.0) || Double.isNaN(m_MeanN[none][x]))
                    none = whichEx.nextInt(nnum);

                a = m_VarianceN[none][x] / (m_SumN[none][x] - 1.0);
                if (a <= ZERO)
                    a = m_ParamsP[4 * x];
                m = m_MeanN[none][x];
                sq = (m - m_ParamsN[4 * x + 3]) * (m - m_ParamsN[4 * x + 3]);

                b = a * m_ParamsN[4 * x + 2] / sq + 2.0; // b=a/Var+2, assuming Var=Sq/w'
                if ((b <= ZERO) || Double.isNaN(b) || Double.isInfinite(b))
                    b = m_ParamsP[4 * x + 1];

                w = sq * (m_ParamsN[4 * x + 1] - 2.0) / m_ParamsN[4 * x];//w=Sq/Var, assuming Var=a'/(b'-2)
                if ((w <= ZERO) || Double.isNaN(w) || Double.isInfinite(w))
                    w = m_ParamsP[4 * x + 2];

                nThisParam[0] = a; // a
                nThisParam[1] = b; // b
                nThisParam[2] = w; // w
                nThisParam[3] = m; // m             
            }
        }
    }

    for (int x = 0, y = 0; x < m_Dimension; x++, y++) {
        if ((x == exs.classIndex()) || (x == exs.idIndex()))
            y++;
        a = m_ParamsP[4 * x];
        b = m_ParamsP[4 * x + 1];
        w = m_ParamsP[4 * x + 2];
        m = m_ParamsP[4 * x + 3];
        System.err.println(
                "\n\n???Positive: ( " + exs.attribute(y) + "): a=" + a + ", b=" + b + ", w=" + w + ", m=" + m);

        a = m_ParamsN[4 * x];
        b = m_ParamsN[4 * x + 1];
        w = m_ParamsN[4 * x + 2];
        m = m_ParamsN[4 * x + 3];
        System.err.println(
                "???Negative: (" + exs.attribute(y) + "): a=" + a + ", b=" + b + ", w=" + w + ", m=" + m);
    }

    if (m_UseEmpiricalCutOff) {
        // Find the empirical cut-off
        double[] pLogOdds = new double[pnum], nLogOdds = new double[nnum];
        for (int p = 0; p < pnum; p++)
            pLogOdds[p] = likelihoodRatio(m_SumP[p], m_MeanP[p], m_VarianceP[p]);

        for (int q = 0; q < nnum; q++)
            nLogOdds[q] = likelihoodRatio(m_SumN[q], m_MeanN[q], m_VarianceN[q]);

        // Update m_Cutoff
        findCutOff(pLogOdds, nLogOdds);
    } else
        m_Cutoff = -Math.log((double) pnum / (double) nnum);

    System.err.println("???Cut-off=" + m_Cutoff);
}

From source file:milk.visualize.PlotPanel.java

License:Open Source License

/**
 * Draws the distribution/*from  w w  w.  j av a  2  s .c  o m*/
 * @param gx the graphics context
 */
private void paintData(Graphics gx) {
    if ((plotExemplars == null) || (x == null))
        return;

    setFonts(gx);
    int w = this.getWidth();
    boolean[] xlabels = new boolean[w];

    Attribute classAtt = plotExemplars.classAttribute(), id = plotExemplars.idAttribute();
    int hf = m_labelMetrics.getAscent();

    for (int i = 0; i < plotExemplars.numExemplars(); i++) {
        Exemplar ex = plotExemplars.exemplar(i);
        if (classAtt.isNominal())
            gx.setColor((Color) colorList.elementAt((int) ex.classValue()));
        else {
            double r = (ex.classValue() - m_minC) / (m_maxC - m_minC);
            r = (r * 240) + 15;
            gx.setColor(new Color((int) r, 150, (int) (255 - r)));
        }

        double preY = 0;
        for (int j = 0; j < ex.getInstances().numInstances(); j++) {
            Instance ins = ex.getInstances().instance(j);
            double xValue = convertToAttribX(m_XaxisStart);
            double tmp = -(xValue - ins.value(x.index)) * (xValue - ins.value(x.index))
                    / (2.0 * stdDev * stdDev);
            preY += Math.exp(tmp) * ins.weight();
        }
        preY /= ex.getInstances().sumOfWeights();
        preY *= maxY;

        for (int k = m_XaxisStart + 1; k < m_XaxisEnd; k++) {
            double currY = 0;
            for (int l = 0; l < ex.getInstances().numInstances(); l++) {
                Instance ins = ex.getInstances().instance(l);
                double xValue = convertToAttribX(k);
                double tmp = -(xValue - ins.value(x.index)) * (xValue - ins.value(x.index))
                        / (2.0 * stdDev * stdDev);
                currY += Math.exp(tmp) * ins.weight();
            }
            currY /= ex.getInstances().sumOfWeights();
            currY *= maxY;

            // Draw the distribution         
            int plotPreY = (int) convertToPanelY(preY);
            int plotCurrY = (int) convertToPanelY(currY);
            gx.drawLine(k - 1, plotPreY, k, plotCurrY);

            // If peak or valley appears, specify the x value
            if (isUp && (preY > currY)) {
                Font old = gx.getFont();
                gx.setFont(new Font("Monospaced", Font.PLAIN, 10));
                String idvalue = Integer.toString((int) ex.idValue());
                gx.drawString(idvalue, k - 1 - m_labelMetrics.stringWidth(idvalue) / 2, plotCurrY);
                xlabels[k - 1] = true;
                gx.setFont(old);
            }
            isUp = (currY >= preY);
            preY = currY;
        }
    }

    // Draw the number labels on x axis where various peaks gather
    gx.setColor(m_axisColour);
    int start = 0, end = 0;
    while (start < w) {
        if (xlabels[start]) {
            end = start;
            int falseCount = 0;
            while (end < w) {
                while (xlabels[end++])
                    ; // Find the first false from start
                int m = end;
                // Count the number of falses
                for (falseCount = 0; (m < xlabels.length) && (!xlabels[m]); m++, falseCount++)
                    ;

                if ((falseCount < 28) && (m < xlabels.length))
                    end = m;
                else
                    break;
            }
            if (!xlabels[end])
                --end;

            int avg = (start + end) / 2;
            double xValue = convertToAttribX(avg);
            String stringX = Utils.doubleToString(xValue, 1);
            Font old = gx.getFont();
            gx.setFont(new Font("Monospaced", Font.PLAIN, 10));
            gx.drawString(stringX, avg - m_labelMetrics.stringWidth(stringX) / 2, m_YaxisEnd + hf + m_tickSize);
            gx.drawLine(avg, m_YaxisEnd, avg, m_YaxisEnd + m_tickSize);
            gx.setFont(old);
            start = end;
        } else
            ++start;
    }
}

From source file:moa.classifiers.AbstractClassifier.java

License:Open Source License

@Override
public void trainOnInstance(Instance inst) {
    boolean isTraining = (inst.weight() > 0.0);
    if (this instanceof SemiSupervisedLearner == false && inst.classIsMissing() == true) {
        isTraining = false;//  w w  w. j a  v  a  2 s  . c  o  m
    }
    if (isTraining) {
        this.trainingWeightSeenByModel += inst.weight();
        trainOnInstanceImpl(inst);
    }
}

From source file:moa.classifiers.bayes.NaiveBayes.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    this.observedClassDistribution.addToValue((int) inst.classValue(), inst.weight());
    for (int i = 0; i < inst.numAttributes() - 1; i++) {
        int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst);
        AttributeClassObserver obs = this.attributeObservers.get(i);
        if (obs == null) {
            obs = inst.attribute(instAttIndex).isNominal() ? newNominalClassObserver()
                    : newNumericClassObserver();
            this.attributeObservers.set(i, obs);
        }/* ww  w .jav  a 2s.c o m*/
        obs.observeAttributeClass(inst.value(instAttIndex), (int) inst.classValue(), inst.weight());
    }
}