Example usage for weka.core Instance isMissing

List of usage examples for weka.core Instance isMissing

Introduction

In this page you can find the example usage for weka.core Instance isMissing.

Prototype

public boolean isMissing(Attribute att);

Source Link

Document

Tests if a specific value is "missing".

Usage

From source file:moa.classifiers.rules.RuleClassifier.java

License:Apache License

public void updateRuleAttribStatistics(Instance inst, Rule rl, int ruleIndex) {
    rl.instancesSeen++;// w  w w .  ja  va2s .c  om
    if (rl.squaredAttributeStatisticsSupervised.size() == 0 && rl.attributeStatisticsSupervised.size() == 0) {
        for (int s = 0; s < inst.numAttributes() - 1; s++) {
            ArrayList<Double> temp1 = new ArrayList<Double>();
            ArrayList<Double> temp2 = new ArrayList<Double>();
            rl.attributeStatisticsSupervised.add(temp1);
            rl.squaredAttributeStatisticsSupervised.add(temp2);
            int instAttIndex = modelAttIndexToInstanceAttIndex(s, inst);
            if (instance.attribute(instAttIndex).isNumeric()) {
                for (int i = 0; i < inst.numClasses(); i++) {
                    rl.attributeStatisticsSupervised.get(s).add(0.0);
                    rl.squaredAttributeStatisticsSupervised.get(s).add(1.0);
                }
            }
        }
    }
    for (int s = 0; s < inst.numAttributes() - 1; s++) {
        int instAttIndex = modelAttIndexToInstanceAttIndex(s, inst);
        if (!inst.isMissing(instAttIndex)) {
            if (instance.attribute(instAttIndex).isNumeric()) {
                rl.attributeStatistics.addToValue(s, inst.value(s));
                rl.squaredAttributeStatistics.addToValue(s, inst.value(s) * inst.value(s));
                double sumValue = rl.attributeStatisticsSupervised.get(s).get((int) inst.classValue())
                        + inst.value(s);
                rl.attributeStatisticsSupervised.get(s).set((int) inst.classValue(), sumValue);
                double squaredSumvalue = rl.squaredAttributeStatisticsSupervised.get(s)
                        .get((int) inst.classValue()) + (inst.value(s) * inst.value(s));
                rl.squaredAttributeStatisticsSupervised.get(s).set((int) inst.classValue(), squaredSumvalue);
            }
        } else {
            rl.attributeMissingValues.addToValue(s, 1);
        }
    }
}

From source file:moa.classifiers.rules.RuleClassifier.java

License:Apache License

public double computeAnomalyUnsupervised(Rule rl, int ruleIndex, Instance inst) { //Unsupervised
    ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>();
    ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>();
    double D = 0.0;
    double N = 0.0;

    if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue()
            && this.anomalyDetectionOption.isSet()) {
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            if (!inst.isMissing(x)) {
                ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>();
                if (inst.attribute(x).isNumeric()) { //Numeric Attributes
                    if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) {
                        double mean = computeMean(rl.attributeStatistics.getValue(x), rl.instancesSeen);
                        double sd = computeSD(rl.squaredAttributeStatistics.getValue(x),
                                rl.attributeStatistics.getValue(x), rl.instancesSeen);
                        double probability = computeProbability(mean, sd, inst.value(x));
                        if (probability != 0.0) {
                            D = D + Math.log(probability);
                            if (probability < this.probabilityThresholdOption.getValue()) { //0.10
                                N = N + Math.log(probability);
                                AttribAnomalyStatisticTemp.add((double) x);
                                AttribAnomalyStatisticTemp.add(inst.value(x));
                                AttribAnomalyStatisticTemp.add(mean);
                                AttribAnomalyStatisticTemp.add(sd);
                                AttribAnomalyStatisticTemp.add(probability);
                                AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp);
                            }/*w ww .  ja v  a 2  s.  c o m*/
                        }
                    }
                } else { //Nominal Attributes
                    AutoExpandVector<DoubleVector> attribNominal = ((NominalAttributeClassObserver) rl.observers
                            .get(x)).attValDistPerClass; //Attributes values distribution per class
                    double numbAttribValue = 0.0;
                    double attribVal = inst.value(x); //Attribute value
                    for (int i = 0; i < attribNominal.size(); i++) {
                        if (attribNominal.get(i) != null) {
                            numbAttribValue = numbAttribValue + attribNominal.get(i).getValue((int) attribVal);
                        }
                    }
                    double probability = numbAttribValue / rl.instancesSeen;
                    if (probability != 0.0) {
                        D = D + Math.log(probability);
                        if (probability < this.probabilityThresholdOption.getValue()) { //0.10
                            N = N + Math.log(probability);
                            AttribAnomalyStatisticTemp.add((double) x);
                            AttribAnomalyStatisticTemp.add(inst.value(x));
                            AttribAnomalyStatisticTemp.add(probability);
                            AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp);
                        }
                    }
                }
            }
        }
    }
    double anomaly = 0.0;
    if (D != 0) {
        anomaly = Math.abs(N / D);
    }
    if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) {
        caseAnomalyTemp.add(this.numInstance);
        double val = anomaly * 100;
        caseAnomalyTemp.add((int) val);
        this.caseAnomaly.add(caseAnomalyTemp);
        Rule x = new Rule(this.ruleSet.get(ruleIndex));
        this.ruleSetAnomalies.add(x);
        this.ruleAnomaliesIndex.add(ruleIndex + 1);
        this.ruleAttribAnomalyStatistics.add(AttribAnomalyStatisticTemp2);

    }
    return anomaly;
}

From source file:moa.classifiers.rules.RuleClassifier.java

License:Apache License

public double computeAnomalySupervised(Rule rl, int ruleIndex, Instance inst) { //Not supervised
    ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>();
    ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>();
    double D = 0.0;
    double N = 0.0;
    if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue()
            && this.anomalyDetectionOption.isSet()) {
        for (int x = 0; x < inst.numAttributes() - 1; x++) {
            if (!inst.isMissing(x)) {
                ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>();
                if (inst.attribute(x).isNumeric()) { //Numeric Attributes
                    if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) {
                        double mean = computeMean(
                                (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()),
                                (int) rl.obserClassDistrib.getValue((int) inst.classValue()));
                        double sd = computeSD(
                                (double) rl.squaredAttributeStatisticsSupervised.get(x)
                                        .get((int) inst.classValue()),
                                (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()),
                                (int) rl.obserClassDistrib.getValue((int) inst.classValue()));
                        double probability = computeProbability(mean, sd, inst.value(x));
                        if (probability != 0.0) {
                            D = D + Math.log(probability);
                            if (probability < this.probabilityThresholdOption.getValue()) { //0.10
                                N = N + Math.log(probability);
                                AttribAnomalyStatisticTemp.add((double) x);
                                AttribAnomalyStatisticTemp.add(inst.value(x));
                                AttribAnomalyStatisticTemp.add(mean);
                                AttribAnomalyStatisticTemp.add(sd);
                                AttribAnomalyStatisticTemp.add(probability);
                                AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp);
                            }//from  w  w w.  j  a  v  a  2 s.  c  o  m
                        }
                    }
                } else { //Nominal
                    double attribVal = inst.value(x); //Attribute value
                    double classVal = inst.classValue(); //Attribute value
                    double probability = rl.observers.get(x).probabilityOfAttributeValueGivenClass(attribVal,
                            (int) classVal);
                    if (probability != 0.0) {
                        D = D + Math.log(probability);
                        if (probability < this.probabilityThresholdOption.getValue()) { //0.10
                            N = N + Math.log(probability);
                            AttribAnomalyStatisticTemp.add((double) x);
                            AttribAnomalyStatisticTemp.add(inst.value(x));
                            AttribAnomalyStatisticTemp.add(probability);
                            AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp);
                        }
                    }
                }
            }
        }
    }
    double anomaly = 0.0;
    if (D != 0) {
        anomaly = Math.abs(N / D);
    }
    if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) {
        caseAnomalyTemp.add(this.numInstance);
        double val = anomaly * 100;
        caseAnomalyTemp.add((int) val);
        this.caseAnomalySupervised.add(caseAnomalyTemp);
        Rule y = new Rule(this.ruleSet.get(ruleIndex));
        this.ruleSetAnomaliesSupervised.add(y);
        this.ruleAnomaliesIndexSupervised.add(ruleIndex + 1);
        this.ruleAttribAnomalyStatisticsSupervised.add(AttribAnomalyStatisticTemp2);
    }
    return anomaly;
}

From source file:moa.classifiers.rules.RuleClassifier.java

License:Apache License

public void theBestAttributes(Instance instance, AutoExpandVector<AttributeClassObserver> observersParameter) {
    for (int z = 0; z < instance.numAttributes() - 1; z++) {
        if (!instance.isMissing(z)) {
            int instAttIndex = modelAttIndexToInstanceAttIndex(z, instance);
            ArrayList<Double> attribBest = new ArrayList<Double>();
            if (instance.attribute(instAttIndex).isNominal()) {
                this.minEntropyNominalAttrib = Double.MAX_VALUE;
                AutoExpandVector<DoubleVector> attribNominal = ((NominalAttributeClassObserver) observersParameter
                        .get(z)).attValDistPerClass;
                findBestValEntropyNominalAtt(attribNominal, instance.attribute(z).numValues()); // The best value (lowest entropy) of a nominal attribute.
                attribBest.add(this.saveBestEntropyNominalAttrib.getValue(0));
                attribBest.add(this.saveBestEntropyNominalAttrib.getValue(1));
                attribBest.add(this.saveBestEntropyNominalAttrib.getValue(2));
                this.saveBestValGlobalEntropy.add(attribBest);
                this.saveBestGlobalEntropy.setValue(z, this.saveBestEntropyNominalAttrib.getValue(1));
            } else {
                this.root = ((BinaryTreeNumericAttributeClassObserver) observersParameter.get(z)).root;
                mainFindBestValEntropy(this.root); // The best value (lowest entropy) of a numeric attribute.
                attribBest.add(this.saveBestEntropy.getValue(0));
                attribBest.add(this.saveBestEntropy.getValue(1));
                attribBest.add(this.saveBestEntropy.getValue(2));
                attribBest.add(this.saveBestEntropy.getValue(4));
                this.saveBestValGlobalEntropy.add(attribBest);
                this.saveBestGlobalEntropy.setValue(z, this.saveBestEntropy.getValue(1));
            }//from  w  w w.j  ava  2 s .  c  om
        } else {
            double value = Double.MAX_VALUE;
            this.saveBestGlobalEntropy.setValue(z, value);
        }
    }
}

From source file:moa.streams.filters.AddNoiseFilter.java

License:Open Source License

@Override
public Instance nextInstance() {
    Instance inst = (Instance) this.inputStream.nextInstance().copy();
    for (int i = 0; i < inst.numAttributes(); i++) {
        double noiseFrac = i == inst.classIndex() ? this.classNoiseFractionOption.getValue()
                : this.attNoiseFractionOption.getValue();
        if (inst.attribute(i).isNominal()) {
            DoubleVector obs = (DoubleVector) this.attValObservers.get(i);
            if (obs == null) {
                obs = new DoubleVector();
                this.attValObservers.set(i, obs);
            }//from   w  w w. j a  va2 s. com
            int originalVal = (int) inst.value(i);
            if (!inst.isMissing(i)) {
                obs.addToValue(originalVal, inst.weight());
            }
            if ((this.random.nextDouble() < noiseFrac) && (obs.numNonZeroEntries() > 1)) {
                do {
                    inst.setValue(i, this.random.nextInt(obs.numValues()));
                } while (((int) inst.value(i) == originalVal) || (obs.getValue((int) inst.value(i)) == 0.0));
            }
        } else {
            GaussianEstimator obs = (GaussianEstimator) this.attValObservers.get(i);
            if (obs == null) {
                obs = new GaussianEstimator();
                this.attValObservers.set(i, obs);
            }
            obs.addObservation(inst.value(i), inst.weight());
            inst.setValue(i, inst.value(i) + this.random.nextGaussian() * obs.getStdDev() * noiseFrac);
        }
    }
    return inst;
}

From source file:moa.streams.filters.ReplacingMissingValuesFilter.java

License:Open Source License

@Override
public Instance nextInstance() {
    Instance inst = (Instance) this.inputStream.nextInstance().copy();

    // Initialization
    if (numAttributes < 0) {
        numAttributes = inst.numAttributes();
        columnsStatistics = new double[numAttributes];
        numberOfSamples = new long[numAttributes];
        lastNominalValues = new String[numAttributes];
        frequencies = new HashMap[numAttributes];
        for (int i = 0; i < inst.numAttributes(); i++) {
            if (inst.attribute(i).isNominal())
                frequencies[i] = new HashMap<String, Integer>();
        }/*w w w .j  a  v  a 2 s . c om*/

        numericalSelectedStrategy = this.numericReplacementStrategyOption.getChosenIndex();
        nominalSelectedStrategy = this.nominalReplacementStrategyOption.getChosenIndex();
    }

    for (int i = 0; i < numAttributes; i++) {

        // ---- Numerical values ----
        if (inst.attribute(i).isNumeric()) {
            // Handle missing value
            if (inst.isMissing(i)) {
                switch (numericalSelectedStrategy) {
                case 0: // NOTHING
                    break;
                case 1: // LAST KNOWN VALUE
                case 2: // MEAN
                case 3: // MAX
                case 4: // MIN
                    inst.setValue(i, columnsStatistics[i]);
                    break;
                case 5: // CONSTANT
                    inst.setValue(i, numericalConstantValueOption.getValue());
                    break;
                default:
                    continue;
                }
            }
            // Update statistics with non-missing values
            else {
                switch (numericalSelectedStrategy) {
                case 1: // LAST KNOWN VALUE
                    columnsStatistics[i] = inst.value(i);
                    break;
                case 2: // MEAN
                    numberOfSamples[i]++;
                    columnsStatistics[i] = columnsStatistics[i]
                            + (inst.value(i) - columnsStatistics[i]) / numberOfSamples[i];
                    break;
                case 3: // MAX
                    columnsStatistics[i] = columnsStatistics[i] < inst.value(i) ? inst.value(i)
                            : columnsStatistics[i];
                    break;
                case 4: // MIN
                    columnsStatistics[i] = columnsStatistics[i] > inst.value(i) ? inst.value(i)
                            : columnsStatistics[i];
                    break;
                default:
                    continue;
                }
            }
        }
        // ---- Nominal values ----
        else if (inst.attribute(i).isNominal()) {
            // Handle missing value
            if (inst.isMissing(i)) {
                switch (nominalSelectedStrategy) {
                case 0: // NOTHING
                    break;
                case 1: // LAST KNOWN VALUE
                    if (lastNominalValues[i] != null) {
                        inst.setValue(i, lastNominalValues[i]);
                    }
                    break;
                case 2: // MODE
                    if (!frequencies[i].isEmpty()) {
                        // Sort the map to get the most frequent value
                        Map<String, Integer> sortedMap = MapUtil.sortByValue(frequencies[i]);
                        inst.setValue(i, sortedMap.entrySet().iterator().next().getKey());
                    }
                    break;
                default:
                    continue;
                }
            }
            // Update statistics with non-missing values
            else {
                switch (nominalSelectedStrategy) {
                case 1: // LAST KNOWN VALUE
                    lastNominalValues[i] = inst.stringValue(i);
                    break;
                case 2: // MODE
                    Integer previousCounter = frequencies[i].get(inst.stringValue(i));
                    if (previousCounter == null)
                        previousCounter = 0;
                    frequencies[i].put(inst.stringValue(i), ++previousCounter);
                    break;
                default:
                    continue;
                }
            }
        }
    }

    return inst;
}

From source file:mulan.classifier.meta.ConstrainedKMeans.java

License:Open Source License

/**
 * Updates the minimum and maximum values for all the attributes
 * based on a new instance./*  ww w .  j a v  a 2  s .  c o m*/
 *
 * @param instance the new instance
 */
private void updateMinMax(Instance instance) {
    for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
        if (!instance.isMissing(j)) {
            if (Double.isNaN(m_Min[j])) {
                m_Min[j] = instance.value(j);
                m_Max[j] = instance.value(j);
            } else {
                if (instance.value(j) < m_Min[j]) {
                    m_Min[j] = instance.value(j);
                } else {
                    if (instance.value(j) > m_Max[j]) {
                        m_Max[j] = instance.value(j);
                    }
                }
            }
        }
    }
}

From source file:mulan.data.MultiLabelInstances.java

License:Open Source License

/**
 * Method that checks whether an instance has missing labels
 *
 * @param instance one instance of this dataset
 * @return true if the instance has missing labels
 *///  www  . j  ava2  s  .c  om
public boolean hasMissingLabels(Instance instance) {
    int numLabels = getNumLabels();
    int[] labelIndices = getLabelIndices();

    boolean missing = false;
    for (int j = 0; j < numLabels; j++) {
        if (instance.isMissing(labelIndices[j])) {
            missing = true;
            break;
        }
    }
    return missing;
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Adds all instances with unknown values for given attribute, weighted
 * according to frequency of instances in each bag.
 *
 * @exception Exception if something goes wrong
 *///from www.  java2 s  .c  om
public void addInstWithMissValue(Instances dataSet, int attIndex) throws Exception {

    double[] valueProbs;
    double weight, newWeight;
    int classIndex;
    Instance instance;

    valueProbs = new double[w_perSubdataset.length];
    for (int i = 0; i < w_perSubdataset.length; i++) {
        if (totalWeights == 0) {
            valueProbs[i] = 1.0 / valueProbs.length;
        } else {
            valueProbs[i] = w_perSubdataset[i] / totalWeights;
        }
    }

    Enumeration E = dataSet.enumerateInstances();
    while (E.hasMoreElements()) {
        instance = (Instance) E.nextElement();
        if (instance.isMissing(attIndex)) {
            classIndex = (int) instance.classValue();
            weight = instance.weight();
            w_perClass[classIndex] = w_perClass[classIndex] + weight;
            totalWeights += weight;
            for (int i = 0; i < w_perSubdataset.length; i++) {
                newWeight = valueProbs[i] * weight;
                w_perClassPerSubdataset[i][classIndex] += newWeight;
                w_perSubdataset[i] += newWeight;
            }
        }
    }
}

From source file:myclassifier.myC45Pack.SplitModel.java

private void handleNominalAttribute(Instances dataSet) throws Exception {

    Instance instance;
    classDist = new ClassDistribution(numOfBranches, dataSet.numClasses());
    Enumeration instanceEnum = dataSet.enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
        instance = (Instance) instanceEnum.nextElement();
        if (!instance.isMissing(attribIndex)) {
            classDist.addInstance((int) instance.value(attribIndex), instance);
        }//from w w w.j  a  v  a  2  s  .  c  o m
    }

    // Check if minimum number of Instances in at least two
    // subsets.
    if (classDist.isSplitable(minInstances)) {
        numSubsets = numOfBranches;
        infoGain = classDist.calculateInfoGain(totalWeights);
        gainRatio = classDist.calculateGainRatio(infoGain);
    }
}