List of usage examples for weka.core Instance attribute
public Attribute attribute(int index);
From source file:moa.classifiers.rules.AMRules.java
License:Apache License
protected double observersDistrib(Instance inst, AutoExpandVector<AttributeClassObserver> observerss) { double votes = 0.0; for (int z = 0; z < inst.numAttributes() - 1; z++) { int instAttIndex = modelAttIndexToInstanceAttIndex(z, inst); if (inst.attribute(instAttIndex).isNumeric()) { if (observerss.get(z) != null) { Node rootNode = ((BinaryTreeNumericAttributeClassObserverRegression) observerss.get(z)).root1; if (rootNode != null) { double sum = rootNode.greaterThan[0] + rootNode.lessThan[0]; double numTarget = rootNode.greaterThan[2] + rootNode.lessThan[2]; votes = sum / numTarget; break; }//from w ww . j a v a2 s . c o m } } } return votes; }
From source file:moa.classifiers.rules.functions.Perceptron.java
License:Apache License
public double updateWeights(Instance inst, double learningRatio) { // Normalize Instance double[] normalizedInstance = normalizedInstance(inst); // Compute the Normalized Prediction of Perceptron double normalizedPredict = prediction(normalizedInstance); double normalizedY = normalizeActualClassValue(inst); double sumWeights = 0.0; double delta = normalizedY - normalizedPredict; for (int j = 0; j < inst.numAttributes() - 1; j++) { int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst); if (inst.attribute(instAttIndex).isNumeric()) { this.weightAttribute[j] += learningRatio * delta * normalizedInstance[j]; sumWeights += Math.abs(this.weightAttribute[j]); }//w w w .j av a 2 s . c om } this.weightAttribute[inst.numAttributes() - 1] += learningRatio * delta; sumWeights += Math.abs(this.weightAttribute[inst.numAttributes() - 1]); if (sumWeights > inst.numAttributes()) { // Lasso regression for (int j = 0; j < inst.numAttributes() - 1; j++) { int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst); if (inst.attribute(instAttIndex).isNumeric()) { this.weightAttribute[j] = this.weightAttribute[j] / sumWeights; } } this.weightAttribute[inst.numAttributes() - 1] = this.weightAttribute[inst.numAttributes() - 1] / sumWeights; } return denormalizedPrediction(normalizedPredict); }
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
@Override public void trainOnInstanceImpl(Instance inst) { int countRuleFiredTrue = 0; boolean ruleFired = false; this.instance = inst; this.numAttributes = instance.numAttributes() - 1; this.numClass = instance.numClasses(); this.numInstance = numInstance + 1; int conta1 = 0; for (int j = 0; j < ruleSet.size(); j++) { if (this.ruleSet.get(j).ruleEvaluate(inst) == true) { countRuleFiredTrue = countRuleFiredTrue + 1; double anomaly = 0.0; if (this.Supervised.isSet()) { anomaly = computeAnomalySupervised(this.ruleSet.get(j), j, inst); // compute anomaly (Supervised method) } else if (this.Unsupervised.isSet()) { anomaly = computeAnomalyUnsupervised(this.ruleSet.get(j), j, inst); // compute anomaly (Unsupervised method) }/*from w ww.j a v a2s . c o m*/ if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) { conta1 = conta1 + 1; } // System.out.print(numInstance+";"+anomaly+"\n"); try { File dir = new File("SeaAnomaliesUnsupervised.txt"); FileWriter fileWriter = new FileWriter(dir, true); PrintWriter printWriter = new PrintWriter(fileWriter); printWriter.println(numInstance + ";" + anomaly); printWriter.flush(); printWriter.close(); } catch (IOException e) { e.printStackTrace(); } if ((this.ruleSet.get(j).instancesSeen <= this.anomalyNumInstThresholdOption.getValue()) || (anomaly < this.anomalyProbabilityThresholdOption.getValue() && this.anomalyDetectionOption.isSet()) || !this.anomalyDetectionOption.isSet()) { this.ruleSet.get(j).obserClassDistrib.addToValue((int) inst.classValue(), inst.weight()); for (int i = 0; i < inst.numAttributes() - 1; i++) { int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst); if (!inst.isMissing(instAttIndex)) { AttributeClassObserver obs = this.ruleSet.get(j).observers.get(i); // Nominal and binary tree. AttributeClassObserver obsGauss = this.ruleSet.get(j).observersGauss.get(i); // Gaussian. if (obs == null) { obs = inst.attribute(instAttIndex).isNominal() ? newNominalClassObserver() : newNumericClassObserver(); this.ruleSet.get(j).observers.set(i, obs); } if (obsGauss == null) { obsGauss = inst.attribute(instAttIndex).isNumeric() ? newNumericClassObserver2() : null; this.ruleSet.get(j).observersGauss.set(i, obsGauss); } obs.observeAttributeClass(inst.value(instAttIndex), (int) inst.classValue(), inst.weight()); if (inst.attribute(instAttIndex).isNumeric()) { obsGauss.observeAttributeClass(inst.value(instAttIndex), (int) inst.classValue(), inst.weight()); } } } expandeRule(this.ruleSet.get(j), inst, j); // This function expands the rule } if (this.orderedRulesOption.isSet()) { // Ordered rules break; } } } if (countRuleFiredTrue > 0) { ruleFired = true; } else { ruleFired = false; } if (ruleFired == false) { //If none of the rules cover the example update sufficient statistics of the default rule this.observedClassDistribution.addToValue((int) inst.classValue(), inst.weight()); for (int i = 0; i < inst.numAttributes() - 1; i++) { int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst); if (!inst.isMissing(instAttIndex)) { AttributeClassObserver obs = this.attributeObservers.get(i); AttributeClassObserver obsGauss = this.attributeObserversGauss.get(i); if (obs == null) { obs = inst.attribute(instAttIndex).isNominal() ? newNominalClassObserver() : newNumericClassObserver(); this.attributeObservers.set(i, obs); } if (obsGauss == null) { obsGauss = inst.attribute(instAttIndex).isNumeric() ? newNumericClassObserver2() : null; this.attributeObserversGauss.set(i, obsGauss); } obs.observeAttributeClass(inst.value(instAttIndex), (int) inst.classValue(), inst.weight()); if (inst.attribute(instAttIndex).isNumeric()) { obsGauss.observeAttributeClass(inst.value(instAttIndex), (int) inst.classValue(), inst.weight()); } } } createRule(inst); //This function creates a rule } }
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
public double computeAnomalyUnsupervised(Rule rl, int ruleIndex, Instance inst) { //Unsupervised ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>(); ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>(); double D = 0.0; double N = 0.0; if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue() && this.anomalyDetectionOption.isSet()) { for (int x = 0; x < inst.numAttributes() - 1; x++) { if (!inst.isMissing(x)) { ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>(); if (inst.attribute(x).isNumeric()) { //Numeric Attributes if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) { double mean = computeMean(rl.attributeStatistics.getValue(x), rl.instancesSeen); double sd = computeSD(rl.squaredAttributeStatistics.getValue(x), rl.attributeStatistics.getValue(x), rl.instancesSeen); double probability = computeProbability(mean, sd, inst.value(x)); if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(mean); AttribAnomalyStatisticTemp.add(sd); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); }/*from ww w . ja va2s . com*/ } } } else { //Nominal Attributes AutoExpandVector<DoubleVector> attribNominal = ((NominalAttributeClassObserver) rl.observers .get(x)).attValDistPerClass; //Attributes values distribution per class double numbAttribValue = 0.0; double attribVal = inst.value(x); //Attribute value for (int i = 0; i < attribNominal.size(); i++) { if (attribNominal.get(i) != null) { numbAttribValue = numbAttribValue + attribNominal.get(i).getValue((int) attribVal); } } double probability = numbAttribValue / rl.instancesSeen; if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); } } } } } } double anomaly = 0.0; if (D != 0) { anomaly = Math.abs(N / D); } if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) { caseAnomalyTemp.add(this.numInstance); double val = anomaly * 100; caseAnomalyTemp.add((int) val); this.caseAnomaly.add(caseAnomalyTemp); Rule x = new Rule(this.ruleSet.get(ruleIndex)); this.ruleSetAnomalies.add(x); this.ruleAnomaliesIndex.add(ruleIndex + 1); this.ruleAttribAnomalyStatistics.add(AttribAnomalyStatisticTemp2); } return anomaly; }
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
public double computeAnomalySupervised(Rule rl, int ruleIndex, Instance inst) { //Not supervised ArrayList<Integer> caseAnomalyTemp = new ArrayList<Integer>(); ArrayList<ArrayList<Double>> AttribAnomalyStatisticTemp2 = new ArrayList<ArrayList<Double>>(); double D = 0.0; double N = 0.0; if (rl.instancesSeen > this.anomalyNumInstThresholdOption.getValue() && this.anomalyDetectionOption.isSet()) { for (int x = 0; x < inst.numAttributes() - 1; x++) { if (!inst.isMissing(x)) { ArrayList<Double> AttribAnomalyStatisticTemp = new ArrayList<Double>(); if (inst.attribute(x).isNumeric()) { //Numeric Attributes if ((rl.instancesSeen - rl.attributeMissingValues.getValue(x)) > 30) { double mean = computeMean( (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()), (int) rl.obserClassDistrib.getValue((int) inst.classValue())); double sd = computeSD( (double) rl.squaredAttributeStatisticsSupervised.get(x) .get((int) inst.classValue()), (double) rl.attributeStatisticsSupervised.get(x).get((int) inst.classValue()), (int) rl.obserClassDistrib.getValue((int) inst.classValue())); double probability = computeProbability(mean, sd, inst.value(x)); if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(mean); AttribAnomalyStatisticTemp.add(sd); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); }//ww w . j av a 2 s.com } } } else { //Nominal double attribVal = inst.value(x); //Attribute value double classVal = inst.classValue(); //Attribute value double probability = rl.observers.get(x).probabilityOfAttributeValueGivenClass(attribVal, (int) classVal); if (probability != 0.0) { D = D + Math.log(probability); if (probability < this.probabilityThresholdOption.getValue()) { //0.10 N = N + Math.log(probability); AttribAnomalyStatisticTemp.add((double) x); AttribAnomalyStatisticTemp.add(inst.value(x)); AttribAnomalyStatisticTemp.add(probability); AttribAnomalyStatisticTemp2.add(AttribAnomalyStatisticTemp); } } } } } } double anomaly = 0.0; if (D != 0) { anomaly = Math.abs(N / D); } if (anomaly >= this.anomalyProbabilityThresholdOption.getValue()) { caseAnomalyTemp.add(this.numInstance); double val = anomaly * 100; caseAnomalyTemp.add((int) val); this.caseAnomalySupervised.add(caseAnomalyTemp); Rule y = new Rule(this.ruleSet.get(ruleIndex)); this.ruleSetAnomaliesSupervised.add(y); this.ruleAnomaliesIndexSupervised.add(ruleIndex + 1); this.ruleAttribAnomalyStatisticsSupervised.add(AttribAnomalyStatisticTemp2); } return anomaly; }
From source file:moa.classifiers.rules.RuleClassifier.java
License:Apache License
public void theBestAttributes(Instance instance, AutoExpandVector<AttributeClassObserver> observersParameter) { for (int z = 0; z < instance.numAttributes() - 1; z++) { if (!instance.isMissing(z)) { int instAttIndex = modelAttIndexToInstanceAttIndex(z, instance); ArrayList<Double> attribBest = new ArrayList<Double>(); if (instance.attribute(instAttIndex).isNominal()) { this.minEntropyNominalAttrib = Double.MAX_VALUE; AutoExpandVector<DoubleVector> attribNominal = ((NominalAttributeClassObserver) observersParameter .get(z)).attValDistPerClass; findBestValEntropyNominalAtt(attribNominal, instance.attribute(z).numValues()); // The best value (lowest entropy) of a nominal attribute. attribBest.add(this.saveBestEntropyNominalAttrib.getValue(0)); attribBest.add(this.saveBestEntropyNominalAttrib.getValue(1)); attribBest.add(this.saveBestEntropyNominalAttrib.getValue(2)); this.saveBestValGlobalEntropy.add(attribBest); this.saveBestGlobalEntropy.setValue(z, this.saveBestEntropyNominalAttrib.getValue(1)); } else { this.root = ((BinaryTreeNumericAttributeClassObserver) observersParameter.get(z)).root; mainFindBestValEntropy(this.root); // The best value (lowest entropy) of a numeric attribute. attribBest.add(this.saveBestEntropy.getValue(0)); attribBest.add(this.saveBestEntropy.getValue(1)); attribBest.add(this.saveBestEntropy.getValue(2)); attribBest.add(this.saveBestEntropy.getValue(4)); this.saveBestValGlobalEntropy.add(attribBest); this.saveBestGlobalEntropy.setValue(z, this.saveBestEntropy.getValue(1)); }// w ww .j av a 2 s. c om } else { double value = Double.MAX_VALUE; this.saveBestGlobalEntropy.setValue(z, value); } } }
From source file:moa.streams.filters.AddNoiseFilter.java
License:Open Source License
@Override public Instance nextInstance() { Instance inst = (Instance) this.inputStream.nextInstance().copy(); for (int i = 0; i < inst.numAttributes(); i++) { double noiseFrac = i == inst.classIndex() ? this.classNoiseFractionOption.getValue() : this.attNoiseFractionOption.getValue(); if (inst.attribute(i).isNominal()) { DoubleVector obs = (DoubleVector) this.attValObservers.get(i); if (obs == null) { obs = new DoubleVector(); this.attValObservers.set(i, obs); }//w ww . j a v a2 s. c om int originalVal = (int) inst.value(i); if (!inst.isMissing(i)) { obs.addToValue(originalVal, inst.weight()); } if ((this.random.nextDouble() < noiseFrac) && (obs.numNonZeroEntries() > 1)) { do { inst.setValue(i, this.random.nextInt(obs.numValues())); } while (((int) inst.value(i) == originalVal) || (obs.getValue((int) inst.value(i)) == 0.0)); } } else { GaussianEstimator obs = (GaussianEstimator) this.attValObservers.get(i); if (obs == null) { obs = new GaussianEstimator(); this.attValObservers.set(i, obs); } obs.addObservation(inst.value(i), inst.weight()); inst.setValue(i, inst.value(i) + this.random.nextGaussian() * obs.getStdDev() * noiseFrac); } } return inst; }
From source file:moa.streams.filters.ReplacingMissingValuesFilter.java
License:Open Source License
@Override public Instance nextInstance() { Instance inst = (Instance) this.inputStream.nextInstance().copy(); // Initialization if (numAttributes < 0) { numAttributes = inst.numAttributes(); columnsStatistics = new double[numAttributes]; numberOfSamples = new long[numAttributes]; lastNominalValues = new String[numAttributes]; frequencies = new HashMap[numAttributes]; for (int i = 0; i < inst.numAttributes(); i++) { if (inst.attribute(i).isNominal()) frequencies[i] = new HashMap<String, Integer>(); }//from w ww . j av a 2 s.com numericalSelectedStrategy = this.numericReplacementStrategyOption.getChosenIndex(); nominalSelectedStrategy = this.nominalReplacementStrategyOption.getChosenIndex(); } for (int i = 0; i < numAttributes; i++) { // ---- Numerical values ---- if (inst.attribute(i).isNumeric()) { // Handle missing value if (inst.isMissing(i)) { switch (numericalSelectedStrategy) { case 0: // NOTHING break; case 1: // LAST KNOWN VALUE case 2: // MEAN case 3: // MAX case 4: // MIN inst.setValue(i, columnsStatistics[i]); break; case 5: // CONSTANT inst.setValue(i, numericalConstantValueOption.getValue()); break; default: continue; } } // Update statistics with non-missing values else { switch (numericalSelectedStrategy) { case 1: // LAST KNOWN VALUE columnsStatistics[i] = inst.value(i); break; case 2: // MEAN numberOfSamples[i]++; columnsStatistics[i] = columnsStatistics[i] + (inst.value(i) - columnsStatistics[i]) / numberOfSamples[i]; break; case 3: // MAX columnsStatistics[i] = columnsStatistics[i] < inst.value(i) ? inst.value(i) : columnsStatistics[i]; break; case 4: // MIN columnsStatistics[i] = columnsStatistics[i] > inst.value(i) ? inst.value(i) : columnsStatistics[i]; break; default: continue; } } } // ---- Nominal values ---- else if (inst.attribute(i).isNominal()) { // Handle missing value if (inst.isMissing(i)) { switch (nominalSelectedStrategy) { case 0: // NOTHING break; case 1: // LAST KNOWN VALUE if (lastNominalValues[i] != null) { inst.setValue(i, lastNominalValues[i]); } break; case 2: // MODE if (!frequencies[i].isEmpty()) { // Sort the map to get the most frequent value Map<String, Integer> sortedMap = MapUtil.sortByValue(frequencies[i]); inst.setValue(i, sortedMap.entrySet().iterator().next().getKey()); } break; default: continue; } } // Update statistics with non-missing values else { switch (nominalSelectedStrategy) { case 1: // LAST KNOWN VALUE lastNominalValues[i] = inst.stringValue(i); break; case 2: // MODE Integer previousCounter = frequencies[i].get(inst.stringValue(i)); if (previousCounter == null) previousCounter = 0; frequencies[i].put(inst.stringValue(i), ++previousCounter); break; default: continue; } } } } return inst; }
From source file:mulan.classifier.lazy.BRkNN.java
License:Open Source License
/** * Select the best value for k by hold-one-out cross-validation. Hamming * Loss is minimized//from ww w . j av a 2 s .c o m * * @throws Exception */ protected void crossValidate() throws Exception { try { // the performance for each different k double[] hammingLoss = new double[cvMaxK]; for (int i = 0; i < cvMaxK; i++) { hammingLoss[i] = 0; } Instances dataSet = train; Instance instance; // the hold out instance Instances neighbours; // the neighboring instances double[] origDistances, convertedDistances; for (int i = 0; i < dataSet.numInstances(); i++) { if (getDebug() && (i % 50 == 0)) { debug("Cross validating " + i + "/" + dataSet.numInstances() + "\r"); } instance = dataSet.instance(i); neighbours = lnn.kNearestNeighbours(instance, cvMaxK); origDistances = lnn.getDistances(); // gathering the true labels for the instance boolean[] trueLabels = new boolean[numLabels]; for (int counter = 0; counter < numLabels; counter++) { int classIdx = labelIndices[counter]; String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx)); trueLabels[counter] = classValue.equals("1"); } // calculate the performance metric for each different k for (int j = cvMaxK; j > 0; j--) { convertedDistances = new double[origDistances.length]; System.arraycopy(origDistances, 0, convertedDistances, 0, origDistances.length); double[] confidences = this.getConfidences(neighbours, convertedDistances); boolean[] bipartition = null; switch (extension) { case NONE: // BRknn MultiLabelOutput results; results = new MultiLabelOutput(confidences, 0.5); bipartition = results.getBipartition(); break; case EXTA: // BRknn-a bipartition = labelsFromConfidences2(confidences); break; case EXTB: // BRknn-b bipartition = labelsFromConfidences3(confidences); break; } double symmetricDifference = 0; // |Y xor Z| for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) { boolean actual = trueLabels[labelIndex]; boolean predicted = bipartition[labelIndex]; if (predicted != actual) { symmetricDifference++; } } hammingLoss[j - 1] += (symmetricDifference / numLabels); neighbours = new IBk().pruneToK(neighbours, convertedDistances, j - 1); } } // Display the results of the cross-validation if (getDebug()) { for (int i = cvMaxK; i > 0; i--) { debug("Hold-one-out performance of " + (i) + " neighbors "); debug("(Hamming Loss) = " + hammingLoss[i - 1] / dataSet.numInstances()); } } // Check through the performance stats and select the best // k value (or the lowest k if more than one best) double[] searchStats = hammingLoss; double bestPerformance = Double.NaN; int bestK = 1; for (int i = 0; i < cvMaxK; i++) { if (Double.isNaN(bestPerformance) || (bestPerformance > searchStats[i])) { bestPerformance = searchStats[i]; bestK = i + 1; } } numOfNeighbors = bestK; if (getDebug()) { System.err.println("Selected k = " + bestK); } } catch (Exception ex) { throw new Error("Couldn't optimize by cross-validation: " + ex.getMessage()); } }
From source file:mulan.classifier.lazy.BRkNN.java
License:Open Source License
/** * Calculates the confidences of the labels, based on the neighboring * instances/* w w w .j a v a 2 s. c o m*/ * * @param neighbours * the list of nearest neighboring instances * @param distances * the distances of the neighbors * @return the confidences of the labels */ private double[] getConfidences(Instances neighbours, double[] distances) { double total = 0, weight; double neighborLabels = 0; double[] confidences = new double[numLabels]; // Set up a correction to the estimator for (int i = 0; i < numLabels; i++) { confidences[i] = 1.0 / Math.max(1, train.numInstances()); } total = (double) numLabels / Math.max(1, train.numInstances()); for (int i = 0; i < neighbours.numInstances(); i++) { // Collect class counts Instance current = neighbours.instance(i); distances[i] = distances[i] * distances[i]; distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels)); switch (distanceWeighting) { case WEIGHT_INVERSE: weight = 1.0 / (distances[i] + 0.001); // to avoid division by // zero break; case WEIGHT_SIMILARITY: weight = 1.0 - distances[i]; break; default: // WEIGHT_NONE: weight = 1.0; break; } weight *= current.weight(); for (int j = 0; j < numLabels; j++) { double value = Double.parseDouble( current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j]))); if (Utils.eq(value, 1.0)) { confidences[j] += weight; neighborLabels += weight; } } total += weight; } avgPredictedLabels = (int) Math.round(neighborLabels / total); // Normalise distribution if (total > 0) { Utils.normalize(confidences, total); } return confidences; }