Example usage for weka.core Instance classIsMissing

Introduction

In this page you can find the example usage for weka.core Instance classIsMissing.

Prototype

public boolean classIsMissing();

Source Link

Document

Tests if an instance's class is missing.

Usage

From source file:moa.classifiers.functions.SGDMultiClass.java

License:Open Source License

public void trainOnInstanceImpl(Instance instance, int classLabel) {
    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights[classLabel], instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() != classLabel) ? -1 : 1;
            z = y * (wx + m_bias[classLabel]);
        } else {/*from  w  w  w.j  a  va2s  .  c o m*/
            y = instance.classValue();
            z = y - (wx + m_bias[classLabel]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights[classLabel].numValues(); i++) {
            m_weights[classLabel].setValue(i, m_weights[classLabel].getValue(i) * multiplier);
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[classLabel].addToValue(indS, factor * instance.valueSparse(p1));
                }
            }

            // update the bias
            m_bias[classLabel] += factor;
        }

    }
}

From source file:moa.classifiers.functions.SGDOld.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance    the new training instance to include in the model
 *//*  w  ww .  java 2  s  .  c o m*/
@Override
public void trainOnInstanceImpl(Instance instance) {

    if (m_weights == null) {
        m_weights = new double[instance.numAttributes() + 1];
    }

    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights, instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() == 0) ? -1 : 1;
            z = y * (wx + m_weights[m_weights.length - 1]);
        } else {
            y = instance.classValue();
            z = y - (wx + m_weights[m_weights.length - 1]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights.length - 1; i++) {
            m_weights[i] *= multiplier;
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[indS] += factor * instance.valueSparse(p1);
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += factor;
        }
        m_t++;
    }
}

From source file:moa.classifiers.functions.SPegasos.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 *//*from  www.j a  v  a 2 s.c  o  m*/
@Override
public void trainOnInstanceImpl(Instance instance) {

    if (m_weights == null) {
        m_weights = new double[instance.numAttributes() + 1];
    }
    if (!instance.classIsMissing()) {

        double learningRate = 1.0 / (m_lambda * m_t);
        //double scale = 1.0 - learningRate * m_lambda;
        double scale = 1.0 - 1.0 / m_t;
        double y = (instance.classValue() == 0) ? -1 : 1;
        double wx = dotProd(instance, m_weights, instance.classIndex());
        double z = y * (wx + m_weights[m_weights.length - 1]);

        for (int j = 0; j < m_weights.length - 1; j++) {
            if (j != instance.classIndex()) {
                m_weights[j] *= scale;
            }
        }

        if (m_loss == LOGLOSS || (z < 1)) {
            double loss = dloss(z);
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    double m = learningRate * loss * (instance.valueSparse(p1) * y);
                    m_weights[indS] += m;
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += learningRate * loss * y;
        }

        double norm = 0;
        for (int k = 0; k < m_weights.length - 1; k++) {
            if (k != instance.classIndex()) {
                norm += (m_weights[k] * m_weights[k]);
            }
        }

        double scale2 = Math.min(1.0, (1.0 / (m_lambda * norm)));
        if (scale2 < 1.0) {
            scale2 = Math.sqrt(scale2);
            for (int j = 0; j < m_weights.length - 1; j++) {
                if (j != instance.classIndex()) {
                    m_weights[j] *= scale2;
                }
            }
        }
        m_t++;
    }
}

From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java

License:Open Source License

/**
 *
 *
 * @return instances used for training// ww w . ja va  2 s  .c o  m
 */
private int train() {
    this.monitor.setCurrentActivityDescription((this.inWarmupPhase) ? "Warmup Training" : "Online Training");
    int ret = 0;
    while (!this.latentTrainingInstQueue.isEmpty()
            && this.latentTrainingInstQueue.peek().deadline <= this.instancesProcessed) {
        Instance x = this.latentTrainingInstQueue.pop().inst;
        if (x.weight() > 0.0 || this.sendZeroWeightsOption.isSet()) {
            if (!x.classIsMissing()) {
                learner.trainOnInstance(x);
                this.knownLabels[(int) x.classValue()] += x.weight();
                ret++;
            }
        }
    }
    assert this.latentTrainingInstQueue.size() < (this.trainingTimeDelayOption.getValue()
            + 1) : "Cache 'latentTrainingInstQueue' is larger than designed.";
    return ret;
}

From source file:moa.tasks.EvaluatePrequential.java

License:Open Source License

@Override
protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) {
    Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption);
    InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption);
    ClassificationPerformanceEvaluator evaluator = (ClassificationPerformanceEvaluator) getPreparedClassOption(
            this.evaluatorOption);
    LearningCurve learningCurve = new LearningCurve("learning evaluation instances");

    //New for prequential methods
    if (evaluator instanceof WindowClassificationPerformanceEvaluator) {
        //((WindowClassificationPerformanceEvaluator) evaluator).setWindowWidth(widthOption.getValue());
        if (widthOption.getValue() != 1000) {
            System.out.println(/*from  w ww  . ja  v a 2 s  .c  o m*/
                    "DEPRECATED! Use EvaluatePrequential -e (WindowClassificationPerformanceEvaluator -w "
                            + widthOption.getValue() + ")");
            return learningCurve;
        }
    }
    if (evaluator instanceof EWMAClassificationPerformanceEvaluator) {
        //((EWMAClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
        if (alphaOption.getValue() != .01) {
            System.out.println(
                    "DEPRECATED! Use EvaluatePrequential -e (EWMAClassificationPerformanceEvaluator -a "
                            + alphaOption.getValue() + ")");
            return learningCurve;
        }
    }
    if (evaluator instanceof FadingFactorClassificationPerformanceEvaluator) {
        //((FadingFactorClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
        if (alphaOption.getValue() != .01) {
            System.out.println(
                    "DEPRECATED! Use EvaluatePrequential -e (FadingFactorClassificationPerformanceEvaluator -a "
                            + alphaOption.getValue() + ")");
            return learningCurve;
        }
    }
    //End New for prequential methods

    learner.setModelContext(stream.getHeader());
    int maxInstances = this.instanceLimitOption.getValue();
    long instancesProcessed = 0;
    int maxSeconds = this.timeLimitOption.getValue();
    int secondsElapsed = 0;
    monitor.setCurrentActivity("Evaluating learner...", -1.0);

    File dumpFile = this.dumpFileOption.getFile();
    PrintStream immediateResultStream = null;
    if (dumpFile != null) {
        try {
            if (dumpFile.exists()) {
                immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true);
            } else {
                immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true);
            }
        } catch (Exception ex) {
            throw new RuntimeException("Unable to open immediate result file: " + dumpFile, ex);
        }
    }
    //File for output predictions
    File outputPredictionFile = this.outputPredictionFileOption.getFile();
    PrintStream outputPredictionResultStream = null;
    if (outputPredictionFile != null) {
        try {
            if (outputPredictionFile.exists()) {
                outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile, true),
                        true);
            } else {
                outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile),
                        true);
            }
        } catch (Exception ex) {
            throw new RuntimeException("Unable to open prediction result file: " + outputPredictionFile, ex);
        }
    }
    boolean firstDump = true;
    boolean preciseCPUTiming = TimingUtils.enablePreciseTiming();
    long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
    long lastEvaluateStartTime = evaluateStartTime;
    double RAMHours = 0.0;
    while (stream.hasMoreInstances() && ((maxInstances < 0) || (instancesProcessed < maxInstances))
            && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) {
        Instance trainInst = stream.nextInstance();
        Instance testInst = (Instance) trainInst.copy();
        if (testInst.classIsMissing() == false) {
            // Added for semisupervised setting: test only if we have the label
            double[] prediction = learner.getVotesForInstance(testInst);
            // Output prediction
            if (outputPredictionFile != null) {
                outputPredictionResultStream.println(Utils.maxIndex(prediction) + "," + testInst.classValue());
            }
            evaluator.addResult(testInst, prediction);
        }
        learner.trainOnInstance(trainInst);
        instancesProcessed++;
        if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0
                || stream.hasMoreInstances() == false) {
            long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
            double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime);
            double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime);
            double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs
            RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours
            RAMHours += RAMHoursIncrement;
            lastEvaluateStartTime = evaluateTime;
            learningCurve.insertEntry(new LearningEvaluation(
                    new Measurement[] { new Measurement("learning evaluation instances", instancesProcessed),
                            new Measurement("evaluation time (" + (preciseCPUTiming ? "cpu " : "") + "seconds)",
                                    time),
                            new Measurement("model cost (RAM-Hours)", RAMHours) },
                    evaluator, learner));

            if (immediateResultStream != null) {
                if (firstDump) {
                    immediateResultStream.println(learningCurve.headerToString());
                    firstDump = false;
                }
                immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1));
                immediateResultStream.flush();
            }
        }
        if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
            if (monitor.taskShouldAbort()) {
                return null;
            }
            long estimatedRemainingInstances = stream.estimatedRemainingInstances();
            if (maxInstances > 0) {
                long maxRemaining = maxInstances - instancesProcessed;
                if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) {
                    estimatedRemainingInstances = maxRemaining;
                }
            }
            monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0
                    : (double) instancesProcessed
                            / (double) (instancesProcessed + estimatedRemainingInstances));
            if (monitor.resultPreviewRequested()) {
                monitor.setLatestResultPreview(learningCurve.copy());
            }
            secondsElapsed = (int) TimingUtils
                    .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - evaluateStartTime);
        }
    }
    if (immediateResultStream != null) {
        immediateResultStream.close();
    }
    if (outputPredictionResultStream != null) {
        outputPredictionResultStream.close();
    }
    return learningCurve;
}

From source file:net.sf.jclal.sampling.supervised.Resample.java

License:Open Source License

/**
 *
 * @param dataSet The dataset to extract the instances.
 */// w ww . jav  a2s  . co m
@Override
public void sampling(IDataset dataSet) {

    if (!(dataSet instanceof WekaDataset)) {
        throw new RuntimeException("This sample method only can be used with a single label weka dataset");
    }

    WekaDataset wekaDataSet = (WekaDataset) dataSet;

    int origSize = wekaDataSet.getNumInstances();
    int sampleSize = (int) (origSize * getPercentageInstancesToLabelled() / 100);

    // Subsample that takes class distribution into consideration
    // Sort according to class attribute.
    wekaDataSet.getDataset().sort(wekaDataSet.getClassIndex());

    // Create an index of where each class value starts
    int[] classIndices = new int[wekaDataSet.getNumClasses() + 1];

    int currentClass = 0;

    classIndices[currentClass] = 0;

    for (int i = 0; i < dataSet.getNumInstances(); i++) {
        Instance current = dataSet.instance(i);

        if (current.classIsMissing()) {
            for (int j = currentClass + 1; j < classIndices.length; j++) {
                classIndices[j] = i;
            }
            break;
        } else if (current.classValue() != currentClass) {

            for (int j = currentClass + 1; j <= current.classValue(); j++) {
                classIndices[j] = i;
            }

            currentClass = (int) current.classValue();
        }
    }

    if (currentClass <= wekaDataSet.getNumClasses()) {

        for (int j = currentClass + 1; j < classIndices.length; j++) {
            classIndices[j] = dataSet.getNumInstances();
        }
    }

    int actualClasses = 0;

    for (int i = 0; i < classIndices.length - 1; i++) {
        if (classIndices[i] != classIndices[i + 1]) {
            actualClasses++;
        }
    }

    // Convert pending input instances
    if (isNoReplacement()) {
        createSubsampleWithoutReplacement(wekaDataSet, sampleSize, actualClasses, classIndices);
    } else {
        createSubsampleWithReplacement(wekaDataSet, sampleSize, actualClasses, classIndices);
    }
}

From source file:org.esa.nest.gpf.SGD.java

/**
 * Updates the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 * @exception Exception if the instance could not be incorporated in the
 * model.//  www .j a v a  2 s . co m
 */
@Override
public void updateClassifier(Instance instance) throws Exception {

    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights, instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() == 0) ? -1 : 1;
            z = y * (wx + m_weights[m_weights.length - 1]);
        } else {
            y = instance.classValue();
            z = y - (wx + m_weights[m_weights.length - 1]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights.length - 1; i++) {
            m_weights[i] *= multiplier;
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[indS] += factor * instance.valueSparse(p1);
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += factor;
        }
        m_t++;
    }
}

From source file:resample.OverSubsample.java

License:Open Source License

/**
 * Creates a subsample of the current set of input instances. The output
 * instances are pushed onto the output queue for collection.
 *///from   w w w .  j av  a 2 s.c  o  m
private void createSubsample() {

    int classI = getInputFormat().classIndex();
    // Sort according to class attribute.
    getInputFormat().sort(classI);
    // Determine where each class starts in the sorted dataset
    int[] classIndices = getClassIndices();

    // Get the existing class distribution
    int[] counts = new int[getInputFormat().numClasses()];
    double[] weights = new double[getInputFormat().numClasses()];
    int max = -1;
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        if (current.classIsMissing() == false) {
            counts[(int) current.classValue()]++;
            weights[(int) current.classValue()] += current.weight();
        }
    }

    // Convert from total weight to average weight
    for (int i = 0; i < counts.length; i++) {
        if (counts[i] > 0) {
            weights[i] = weights[i] / counts[i];
        }
        /*
        System.err.println("Class:" + i + " " + getInputFormat().classAttribute().value(i)
                 + " Count:" + counts[i]
                 + " Total:" + weights[i] * counts[i]
                 + " Avg:" + weights[i]);
         */
    }

    // find the class with the minimum number of instances
    int maxIndex = -1;
    for (int i = 0; i < counts.length; i++) {
        if ((max < 0) && (counts[i] > 0)) {
            max = counts[i];
            maxIndex = i;
        } else if ((counts[i] > max) && (counts[i] > 0)) {
            max = counts[i];
            maxIndex = i;
        }
    }

    if (max < 0) {
        System.err.println("SpreadSubsample: *warning* none of the classes have any values in them.");
        return;
    }

    // determine the new distribution 
    int[] new_counts = new int[getInputFormat().numClasses()];
    for (int i = 0; i < counts.length; i++) {
        new_counts[i] = (int) Math.abs(Math.max(counts[i], max * m_DistributionSpread));
        if (i == maxIndex) {
            if (m_DistributionSpread > 0 && m_DistributionSpread < 1.0) {
                // don't undersample the majority class!
                new_counts[i] = counts[i];
            }
        }
        if (m_DistributionSpread == 0) {
            new_counts[i] = counts[i];
        }

        if (m_MaxCount > 0) {
            new_counts[i] = Math.min(new_counts[i], m_MaxCount);
        }
    }

    // Sample with replacement
    Random random = new Random(m_RandomSeed);
    //Hashtable t = new Hashtable();
    for (int j = 0; j < new_counts.length; j++) {
        double newWeight = 1.0;
        if (m_AdjustWeights && (new_counts[j] > 0)) {
            newWeight = weights[j] * counts[j] / new_counts[j];
            /*
            System.err.println("Class:" + j + " " + getInputFormat().classAttribute().value(j) 
                   + " Count:" + counts[j]
                   + " Total:" + weights[j] * counts[j]
                   + " Avg:" + weights[j]
                   + " NewCount:" + new_counts[j]
                   + " NewAvg:" + newWeight);
             */
        }
        int index = -1;
        for (int k = 0; k < new_counts[j]; k++) {
            //boolean ok = false;
            //do {
            index = classIndices[j] + (Math.abs(random.nextInt()) % (classIndices[j + 1] - classIndices[j]));
            // Have we used this instance before?
            //if (t.get("" + index) == null) {
            // if not, add it to the hashtable and use it
            //t.put("" + index, "");
            //ok = true;
            if (index >= 0) {
                Instance newInst = (Instance) getInputFormat().instance(index).copy();
                if (m_AdjustWeights) {
                    newInst.setWeight(newWeight);
                }
                push(newInst);
            }
            //}
            //} while (!ok);
        }
    }
}

From source file:resample.OverSubsample.java

License:Open Source License

/**
 * Creates an index containing the position where each class starts in 
 * the getInputFormat(). m_InputFormat must be sorted on the class attribute.
 * /*  w  w w  . j ava2s. c o m*/
 * @return the positions
 */
private int[] getClassIndices() {

    // Create an index of where each class value starts
    int[] classIndices = new int[getInputFormat().numClasses() + 1];
    int currentClass = 0;
    classIndices[currentClass] = 0;
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        if (current.classIsMissing()) {
            for (int j = currentClass + 1; j < classIndices.length; j++) {
                classIndices[j] = i;
            }
            break;
        } else if (current.classValue() != currentClass) {
            for (int j = currentClass + 1; j <= current.classValue(); j++) {
                classIndices[j] = i;
            }
            currentClass = (int) current.classValue();
        }
    }
    if (currentClass <= getInputFormat().numClasses()) {
        for (int j = currentClass + 1; j < classIndices.length; j++) {
            classIndices[j] = getInputFormat().numInstances();
        }
    }
    return classIndices;
}

From source file:test.org.moa.opencl.IBk.java

License:Open Source License

/**
 * Adds the supplied instance to the training set.
 *
 * @param instance the instance to add//from   w  w w . j  a  v a2  s.  c  om
 * @throws Exception if instance could not be incorporated
 * successfully
 */
public void updateClassifier(Instance instance) throws Exception {

    if (m_Train.equalHeaders(instance.dataset()) == false) {
        throw new Exception("Incompatible instance types\n" + m_Train.equalHeadersMsg(instance.dataset()));
    }
    if (instance.classIsMissing()) {
        return;
    }

    m_Train.add(instance);
    m_NNSearch.update(instance);
    m_kNNValid = false;
    if ((m_WindowSize > 0) && (m_Train.numInstances() > m_WindowSize)) {
        boolean deletedInstance = false;
        while (m_Train.numInstances() > m_WindowSize) {
            m_Train.delete(0);
            deletedInstance = true;
        }
        //rebuild datastructure KDTree currently can't delete
        if (deletedInstance == true)
            m_NNSearch.setInstances(m_Train);
    }
}