List of usage examples for weka.core Instance isMissing
public boolean isMissing(Attribute att);
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
public void updateRuleAttribStatistics(Instance inst, Rule rl, int ruleIndex) { rl.instancesSeen++;// w w w . ja va2s .c om if (rl.squaredAttributeStatisticsSupervised.size() == 0 && rl.attributeStatisticsSupervised.size() == 0) { for (int s = 0; s < inst.numAttributes() - 1; s++) { ArrayList<Double> temp1 = new ArrayList<Double>(); ArrayList<Double> temp2 = new ArrayList<Double>(); rl.attributeStatisticsSupervised.add(temp1); rl.squaredAttributeStatisticsSupervised.add(temp2); int instAttIndex = modelAttIndexToInstanceAttIndex(s, inst); if (instance.attribute(instAttIndex).isNumeric()) { for (int i = 0; i < inst.numClasses(); i++) { rl.attributeStatisticsSupervised.get(s).add(0.0); rl.squaredAttributeStatisticsSupervised.get(s).add(1.0); } } } } for (int s = 0; s < inst.numAttributes() - 1; s++) { int instAttIndex = modelAttIndexToInstanceAttIndex(s, inst); if (!inst.isMissing(instAttIndex)) { if (instance.attribute(instAttIndex).isNumeric()) { rl.attributeStatistics.addToValue(s, inst.value(s)); rl.squaredAttributeStatistics.addToValue(s, inst.value(s) * inst.value(s)); double sumValue = rl.attributeStatisticsSupervised.get(s).get((int) inst.classValue()) + inst.value(s); rl.attributeStatisticsSupervised.get(s).set((int) inst.classValue(), sumValue); double squaredSumvalue = rl.squaredAttributeStatisticsSupervised.get(s) .get((int) inst.classValue()) + (inst.value(s) * inst.value(s)); rl.squaredAttributeStatisticsSupervised.get(s).set((int) inst.classValue(), squaredSumvalue); } } else { rl.attributeMissingValues.addToValue(s, 1); } } }
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
public double computeAnomalyUnsupervised(Rule rl, int ruleIndex, Instance inst) { //Unsupervised ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>(); ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>(); double D = 0.0; double N = 0.0; if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue() && this.anomalyDetectionOption.isSet()) { for (int x = 0; x < inst.numAttributes() - 1; x++) { if (!inst.isMissing(x)) { ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>(); if (inst.attribute(x).isNumeric()) { //Numeric Attributes if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) { double mean = computeMean(rl.attributeStatistics.getValue(x), rl.instancesSeen); double sd = computeSD(rl.squaredAttributeStatistics.getValue(x), rl.attributeStatistics.getValue(x), rl.instancesSeen); double probability = computeProbability(mean, sd, inst.value(x)); if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(mean); AttribAnomalyStatisticTemp.add(sd); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); }/*w ww . ja v a 2 s. c o m*/ } } } else { //Nominal Attributes AutoExpandVector<DoubleVector> attribNominal = ((NominalAttributeClassObserver) rl.observers .get(x)).attValDistPerClass; //Attributes values distribution per class double numbAttribValue = 0.0; double attribVal = inst.value(x); //Attribute value for (int i = 0; i < attribNominal.size(); i++) { if (attribNominal.get(i) != null) { numbAttribValue = numbAttribValue + attribNominal.get(i).getValue((int) attribVal); } } double probability = numbAttribValue / rl.instancesSeen; if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); } } } } } } double anomaly = 0.0; if (D != 0) { anomaly = Math.abs(N / D); } if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) { caseAnomalyTemp.add(this.numInstance); double val = anomaly * 100; caseAnomalyTemp.add((int) val); this.caseAnomaly.add(caseAnomalyTemp); Rule x = new Rule(this.ruleSet.get(ruleIndex)); this.ruleSetAnomalies.add(x); this.ruleAnomaliesIndex.add(ruleIndex + 1); this.ruleAttribAnomalyStatistics.add(AttribAnomalyStatisticTemp2); } return anomaly; }
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
public double computeAnomalySupervised(Rule rl, int ruleIndex, Instance inst) { //Not supervised ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>(); ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>(); double D = 0.0; double N = 0.0; if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue() && this.anomalyDetectionOption.isSet()) { for (int x = 0; x < inst.numAttributes() - 1; x++) { if (!inst.isMissing(x)) { ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>(); if (inst.attribute(x).isNumeric()) { //Numeric Attributes if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) { double mean = computeMean( (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()), (int) rl.obserClassDistrib.getValue((int) inst.classValue())); double sd = computeSD( (double) rl.squaredAttributeStatisticsSupervised.get(x) .get((int) inst.classValue()), (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()), (int) rl.obserClassDistrib.getValue((int) inst.classValue())); double probability = computeProbability(mean, sd, inst.value(x)); if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(mean); AttribAnomalyStatisticTemp.add(sd); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); }//from w w w. j a v a 2 s. c o m } } } else { //Nominal double attribVal = inst.value(x); //Attribute value double classVal = inst.classValue(); //Attribute value double probability = rl.observers.get(x).probabilityOfAttributeValueGivenClass(attribVal, (int) classVal); if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); } } } } } } double anomaly = 0.0; if (D != 0) { anomaly = Math.abs(N / D); } if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) { caseAnomalyTemp.add(this.numInstance); double val = anomaly * 100; caseAnomalyTemp.add((int) val); this.caseAnomalySupervised.add(caseAnomalyTemp); Rule y = new Rule(this.ruleSet.get(ruleIndex)); this.ruleSetAnomaliesSupervised.add(y); this.ruleAnomaliesIndexSupervised.add(ruleIndex + 1); this.ruleAttribAnomalyStatisticsSupervised.add(AttribAnomalyStatisticTemp2); } return anomaly; }
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
public void theBestAttributes(Instance instance, AutoExpandVector<AttributeClassObserver> observersParameter) { for (int z = 0; z < instance.numAttributes() - 1; z++) { if (!instance.isMissing(z)) { int instAttIndex = modelAttIndexToInstanceAttIndex(z, instance); ArrayList<Double> attribBest = new ArrayList<Double>(); if (instance.attribute(instAttIndex).isNominal()) { this.minEntropyNominalAttrib = Double.MAX_VALUE; AutoExpandVector<DoubleVector> attribNominal = ((NominalAttributeClassObserver) observersParameter .get(z)).attValDistPerClass; findBestValEntropyNominalAtt(attribNominal, instance.attribute(z).numValues()); // The best value (lowest entropy) of a nominal attribute. attribBest.add(this.saveBestEntropyNominalAttrib.getValue(0)); attribBest.add(this.saveBestEntropyNominalAttrib.getValue(1)); attribBest.add(this.saveBestEntropyNominalAttrib.getValue(2)); this.saveBestValGlobalEntropy.add(attribBest); this.saveBestGlobalEntropy.setValue(z, this.saveBestEntropyNominalAttrib.getValue(1)); } else { this.root = ((BinaryTreeNumericAttributeClassObserver) observersParameter.get(z)).root; mainFindBestValEntropy(this.root); // The best value (lowest entropy) of a numeric attribute. attribBest.add(this.saveBestEntropy.getValue(0)); attribBest.add(this.saveBestEntropy.getValue(1)); attribBest.add(this.saveBestEntropy.getValue(2)); attribBest.add(this.saveBestEntropy.getValue(4)); this.saveBestValGlobalEntropy.add(attribBest); this.saveBestGlobalEntropy.setValue(z, this.saveBestEntropy.getValue(1)); }//from w w w.j ava 2 s . c om } else { double value = Double.MAX_VALUE; this.saveBestGlobalEntropy.setValue(z, value); } } }
From source file:moa.streams.filters.AddNoiseFilter.java
License:Open Source License
@Override public Instance nextInstance() { Instance inst = (Instance) this.inputStream.nextInstance().copy(); for (int i = 0; i < inst.numAttributes(); i++) { double noiseFrac = i == inst.classIndex() ? this.classNoiseFractionOption.getValue() : this.attNoiseFractionOption.getValue(); if (inst.attribute(i).isNominal()) { DoubleVector obs = (DoubleVector) this.attValObservers.get(i); if (obs == null) { obs = new DoubleVector(); this.attValObservers.set(i, obs); }//from w w w. j a va2 s. com int originalVal = (int) inst.value(i); if (!inst.isMissing(i)) { obs.addToValue(originalVal, inst.weight()); } if ((this.random.nextDouble() < noiseFrac) && (obs.numNonZeroEntries() > 1)) { do { inst.setValue(i, this.random.nextInt(obs.numValues())); } while (((int) inst.value(i) == originalVal) || (obs.getValue((int) inst.value(i)) == 0.0)); } } else { GaussianEstimator obs = (GaussianEstimator) this.attValObservers.get(i); if (obs == null) { obs = new GaussianEstimator(); this.attValObservers.set(i, obs); } obs.addObservation(inst.value(i), inst.weight()); inst.setValue(i, inst.value(i) + this.random.nextGaussian() * obs.getStdDev() * noiseFrac); } } return inst; }
From source file:moa.streams.filters.ReplacingMissingValuesFilter.java
License:Open Source License
@Override public Instance nextInstance() { Instance inst = (Instance) this.inputStream.nextInstance().copy(); // Initialization if (numAttributes < 0) { numAttributes = inst.numAttributes(); columnsStatistics = new double[numAttributes]; numberOfSamples = new long[numAttributes]; lastNominalValues = new String[numAttributes]; frequencies = new HashMap[numAttributes]; for (int i = 0; i < inst.numAttributes(); i++) { if (inst.attribute(i).isNominal()) frequencies[i] = new HashMap<String, Integer>(); }/*w w w .j a v a 2 s . c om*/ numericalSelectedStrategy = this.numericReplacementStrategyOption.getChosenIndex(); nominalSelectedStrategy = this.nominalReplacementStrategyOption.getChosenIndex(); } for (int i = 0; i < numAttributes; i++) { // ---- Numerical values ---- if (inst.attribute(i).isNumeric()) { // Handle missing value if (inst.isMissing(i)) { switch (numericalSelectedStrategy) { case 0: // NOTHING break; case 1: // LAST KNOWN VALUE case 2: // MEAN case 3: // MAX case 4: // MIN inst.setValue(i, columnsStatistics[i]); break; case 5: // CONSTANT inst.setValue(i, numericalConstantValueOption.getValue()); break; default: continue; } } // Update statistics with non-missing values else { switch (numericalSelectedStrategy) { case 1: // LAST KNOWN VALUE columnsStatistics[i] = inst.value(i); break; case 2: // MEAN numberOfSamples[i]++; columnsStatistics[i] = columnsStatistics[i] + (inst.value(i) - columnsStatistics[i]) / numberOfSamples[i]; break; case 3: // MAX columnsStatistics[i] = columnsStatistics[i] < inst.value(i) ? inst.value(i) : columnsStatistics[i]; break; case 4: // MIN columnsStatistics[i] = columnsStatistics[i] > inst.value(i) ? inst.value(i) : columnsStatistics[i]; break; default: continue; } } } // ---- Nominal values ---- else if (inst.attribute(i).isNominal()) { // Handle missing value if (inst.isMissing(i)) { switch (nominalSelectedStrategy) { case 0: // NOTHING break; case 1: // LAST KNOWN VALUE if (lastNominalValues[i] != null) { inst.setValue(i, lastNominalValues[i]); } break; case 2: // MODE if (!frequencies[i].isEmpty()) { // Sort the map to get the most frequent value Map<String, Integer> sortedMap = MapUtil.sortByValue(frequencies[i]); inst.setValue(i, sortedMap.entrySet().iterator().next().getKey()); } break; default: continue; } } // Update statistics with non-missing values else { switch (nominalSelectedStrategy) { case 1: // LAST KNOWN VALUE lastNominalValues[i] = inst.stringValue(i); break; case 2: // MODE Integer previousCounter = frequencies[i].get(inst.stringValue(i)); if (previousCounter == null) previousCounter = 0; frequencies[i].put(inst.stringValue(i), ++previousCounter); break; default: continue; } } } } return inst; }
From source file:mulan.classifier.meta.ConstrainedKMeans.java
License:Open Source License
/** * Updates the minimum and maximum values for all the attributes * based on a new instance./* ww w . j a v a 2 s . c o m*/ * * @param instance the new instance */ private void updateMinMax(Instance instance) { for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) { if (!instance.isMissing(j)) { if (Double.isNaN(m_Min[j])) { m_Min[j] = instance.value(j); m_Max[j] = instance.value(j); } else { if (instance.value(j) < m_Min[j]) { m_Min[j] = instance.value(j); } else { if (instance.value(j) > m_Max[j]) { m_Max[j] = instance.value(j); } } } } } }
From source file:mulan.data.MultiLabelInstances.java
License:Open Source License
/** * Method that checks whether an instance has missing labels * * @param instance one instance of this dataset * @return true if the instance has missing labels */// www . j ava2 s .c om public boolean hasMissingLabels(Instance instance) { int numLabels = getNumLabels(); int[] labelIndices = getLabelIndices(); boolean missing = false; for (int j = 0; j < numLabels; j++) { if (instance.isMissing(labelIndices[j])) { missing = true; break; } } return missing; }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Adds all instances with unknown values for given attribute, weighted * according to frequency of instances in each bag. * * @exception Exception if something goes wrong *///from www. java2 s .c om public void addInstWithMissValue(Instances dataSet, int attIndex) throws Exception { double[] valueProbs; double weight, newWeight; int classIndex; Instance instance; valueProbs = new double[w_perSubdataset.length]; for (int i = 0; i < w_perSubdataset.length; i++) { if (totalWeights == 0) { valueProbs[i] = 1.0 / valueProbs.length; } else { valueProbs[i] = w_perSubdataset[i] / totalWeights; } } Enumeration E = dataSet.enumerateInstances(); while (E.hasMoreElements()) { instance = (Instance) E.nextElement(); if (instance.isMissing(attIndex)) { classIndex = (int) instance.classValue(); weight = instance.weight(); w_perClass[classIndex] = w_perClass[classIndex] + weight; totalWeights += weight; for (int i = 0; i < w_perSubdataset.length; i++) { newWeight = valueProbs[i] * weight; w_perClassPerSubdataset[i][classIndex] += newWeight; w_perSubdataset[i] += newWeight; } } } }
From source file:myclassifier.myC45Pack.SplitModel.java
private void handleNominalAttribute(Instances dataSet) throws Exception { Instance instance; classDist = new ClassDistribution(numOfBranches, dataSet.numClasses()); Enumeration instanceEnum = dataSet.enumerateInstances(); while (instanceEnum.hasMoreElements()) { instance = (Instance) instanceEnum.nextElement(); if (!instance.isMissing(attribIndex)) { classDist.addInstance((int) instance.value(attribIndex), instance); }//from w w w.j a v a 2 s . c o m } // Check if minimum number of Instances in at least two // subsets. if (classDist.isSplitable(minInstances)) { numSubsets = numOfBranches; infoGain = classDist.calculateInfoGain(totalWeights); gainRatio = classDist.calculateGainRatio(infoGain); } }