List of usage examples for weka.core Instance numClasses
public int numClasses();
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Sanity check and initialization of dynamic fields * * @param x/*from w w w. j a va 2 s. com*/ */ protected final void safeInit(Instance x) { if (this.embeddedLearnerOption.getValueAsCLIString().contains("Majority class")) { this.excludeOutlierVoting = true; } if (centroid == null) { centroid = x.toDoubleArray(); } if (this.instances == null) { prepareEmbeddedClassifier(); ArrayList<Attribute> attribs = new ArrayList<>(); this.symbolFrequencies = new double[x.dataset().numAttributes()][]; for (int i = 0; i < x.dataset().numAttributes(); ++i) { Attribute a = (Attribute) x.dataset().attribute(i).copy(); if (i == x.classIndex()) { a.setWeight(0.0); } else { a.setWeight(1.0); } switch (a.type()) { case Attribute.STRING: case Attribute.NOMINAL: //UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i)); this.symbolFrequencies[i] = new double[a.numValues()]; break; case Attribute.NUMERIC: case Attribute.RELATIONAL: case Attribute.DATE: default: // UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i)); this.symbolFrequencies[i] = null; } attribs.add(a); } this.instances = new Instances("ClusterData", attribs, 1); this.instances.setClassIndex(x.classIndex()); } // else { // for (int i = 0; i < x.dataset().numAttributes() && i < this.header.numAttributes(); ++i) { // double val = x.value(i); // Attribute a = this.header.attribute(i); // // expand range as necessary // if (val < a.getLowerNumericBound() || val > a.getUpperNumericBound()){ // UnsafeUtils.setAttributeRange(a, Math.min(val,a.getLowerNumericBound()), Math.max(val,a.getUpperNumericBound())); // } // // increase frequency counts if new string value is encountered // if (a.type() == Attribute.STRING && (val >= Math.max(this.symbolFrequencies[i].length, a.numValues()))) { // double newArray[] = new double[Math.max(this.symbolFrequencies[i].length, a.numValues())]; // Arrays.fill(newArray, 0); // for(int j = 0; j <= this.symbolFrequencies[i].length; j++) { // newArray[j] = this.symbolFrequencies[i][j]; // } // this.symbolFrequencies[i] = newArray; // } // } // } if (this.variances == null) { this.variances = new double[x.numAttributes()]; Arrays.fill(this.variances, 1); } if (this.entropies == null) { this.entropies = new double[x.numAttributes()]; Arrays.fill(this.entropies, 0); } if (this.labelFrequencies == null) { this.labelFrequencies = new double[x.numClasses()]; Arrays.fill(this.labelFrequencies, 0); } if (this.gtLabelFrequencies == null) { this.gtLabelFrequencies = new double[x.numClasses()]; Arrays.fill(this.gtLabelFrequencies, 0); } if (this.rho == null) { this.rho = new double[x.numAttributes()]; Arrays.fill(this.rho, 0); } }
From source file:moa.clusterer.FeS2.java
License:Apache License
/** * Find the nearest cluster, and use its most frequent label. * If nearest cluster has no label, then we have a novel cluster * Unless data point is an outlier to all clusters, then it is just an outlier * @param inst//from w ww . j a v a 2 s.com * @return */ @Override public double[] getVotesForInstance(Instance inst) { assert (this.universalCluster != null) : "FeS2::getVotesForInstance() called without any initialization or training!"; int novelClassLabel = inst.numClasses(); int outlierLabel = novelClassLabel + 1; double[] votes = new double[inst.numClasses() + 2]; if (this.clusters.isEmpty()) { return votes; } double[] cumulativeVotes = new double[inst.numClasses()]; double[] cumulativeVotes_p = new double[inst.numClasses()]; double[] cumulativeVotes_pw = new double[inst.numClasses()]; double[] cumulativeVotes_n = new double[inst.numClasses()]; double[] cumulativeVotes_np = new double[inst.numClasses()]; double[] cumulativeVotes_npw = new double[inst.numClasses()]; double[] cumulativeWinnerTakesAllVotes = new double[inst.numClasses()]; Arrays.fill(votes, 0.0); Arrays.fill(cumulativeVotes, 0.0); Arrays.fill(cumulativeVotes_p, 0.0); Arrays.fill(cumulativeVotes_pw, 0.0); Arrays.fill(cumulativeVotes_n, 0.0); Arrays.fill(cumulativeVotes_np, 0.0); Arrays.fill(cumulativeVotes_npw, 0.0); Arrays.fill(cumulativeWinnerTakesAllVotes, 0.0); final int TRUE_CLASS = (int) inst.classValue(); // for debug watch windows only final SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(this.clusters, inst); boolean memberOfAtLeastOneTrueCluster = false; boolean universalOutlier = true; double bestProbability = 0; double universalProbability = this.universalCluster.getInclusionProbability(inst); NearestClusterTuple bestMatchCluster = null; // Gather data for (NearestClusterTuple nct : nearestClusters) { double p = nct.getDistance(); boolean localOutlier = nct.getCluster().isOutlier(inst); memberOfAtLeastOneTrueCluster = memberOfAtLeastOneTrueCluster || (!localOutlier && nct.getCluster().size() > this.minimumClusterSizeOption.getValue()); universalOutlier = universalOutlier && localOutlier; bestProbability = Math.max(p, bestProbability); if (p <= 0) { continue; } int localWinner = (int) nct.getCluster().getGroundTruth(); cumulativeWinnerTakesAllVotes[localWinner] += p; double clusterVotes[] = nct.getCluster().getVotes(); double clusterNormalizedVotes[] = nct.getCluster().getVotes().clone(); if (weka.core.Utils.sum(clusterNormalizedVotes) > 0) { weka.core.Utils.normalize(clusterNormalizedVotes); } for (int i = 0; i < clusterVotes.length; ++i) { cumulativeVotes[i] += clusterVotes[i]; cumulativeVotes_p[i] += clusterVotes[i] * p; cumulativeVotes_pw[i] += clusterVotes[i] * p * nct.getCluster().getWeight(); cumulativeVotes_n[i] += clusterNormalizedVotes[i]; cumulativeVotes_np[i] += clusterNormalizedVotes[i] * p; cumulativeVotes_npw[i] += clusterNormalizedVotes[i] * p * nct.getCluster().getWeight(); } if (!localOutlier) { bestMatchCluster = nct; } } // end for universalProbabilitySums += universalProbability; bestProbabilitySums += bestProbability; bestProbabilityCount += 1; if (nearestClusters.isEmpty()) { votes[outlierLabel] = 1.0; } else { if (weka.core.Utils.sum(cumulativeVotes) > 0) { weka.core.Utils.normalize(cumulativeVotes); } if (weka.core.Utils.sum(cumulativeVotes_p) > 0) { weka.core.Utils.normalize(cumulativeVotes_p); } if (weka.core.Utils.sum(cumulativeVotes_pw) > 0) { weka.core.Utils.normalize(cumulativeVotes_pw); } if (weka.core.Utils.sum(cumulativeVotes_n) > 0) { weka.core.Utils.normalize(cumulativeVotes_n); } if (weka.core.Utils.sum(cumulativeVotes_np) > 0) { weka.core.Utils.normalize(cumulativeVotes_np); } if (weka.core.Utils.sum(cumulativeVotes_npw) > 0) { weka.core.Utils.normalize(cumulativeVotes_npw); } if (weka.core.Utils.sum(cumulativeWinnerTakesAllVotes) > 0) { weka.core.Utils.normalize(cumulativeWinnerTakesAllVotes); } switch (this.votingStrategyOption.getChosenIndex()) { case 0: // 1-NN - usually not the strongest double[] nearestNeighborVotes = nearestClusters.last().getCluster().getVotes(); for (int i = 0; i < nearestNeighborVotes.length; ++i) { votes[i] = nearestNeighborVotes[i]; } break; case 1: // Global k-NN - this is a poor performer for (int i = 0; i < cumulativeVotes.length; ++i) { votes[i] = cumulativeVotes[i]; } break; case 2: // Globally probability-weighted k-NN - good, but biased towards heavy clusters for (int i = 0; i < cumulativeVotes_p.length; ++i) { votes[i] = cumulativeVotes_p[i]; } break; case 3: // Globally probability-utility-weighted k-NN - good, but overly complex for (int i = 0; i < cumulativeVotes_pw.length; ++i) { votes[i] = cumulativeVotes_pw[i]; } break; case 4: // Globally normalized k-NN - this is also usually a really really poor performer. Don't use it for (int i = 0; i < cumulativeVotes_n.length; ++i) { votes[i] = cumulativeVotes_n[i]; } break; case 5: // Globally normalized probability-weighted k-NN - a safe bet for (int i = 0; i < cumulativeVotes_np.length; ++i) { votes[i] = cumulativeVotes_np[i]; } break; case 6: // Globally normalized probability-utility-weighted k-NN - default and preferred method for (int i = 0; i < cumulativeVotes_npw.length; ++i) { votes[i] = cumulativeVotes_npw[i]; } break; case 7: // Globally weighted k-NN winner take all per cluster - Can avoid noise, but not usually the best default: for (int i = 0; i < cumulativeWinnerTakesAllVotes.length; ++i) { votes[i] = cumulativeWinnerTakesAllVotes[i]; } } // end switch double voteAccumulator = 0; for (double v : votes) { voteAccumulator += v; } // A novel cluster is one of sufficient size but no label if ((bestMatchCluster != null) // It matches a cluster && (bestMatchCluster.getCluster().size() > this.minimumClusterSizeOption.getValue()) // that is overall large enough && (bestMatchCluster.getCluster().getNumLabeledPoints() < 1)) { // but without labels votes[novelClassLabel] = 1.0; } // outlier detection if (universalOutlier) { int maxIdx = weka.core.Utils.maxIndex(votes); if (maxIdx < 0) { maxIdx = 0; } double outlierValue = votes[maxIdx]; if (outlierValue <= 0) { votes[novelClassLabel] = 1.0; // special case of novelty when we have absolutely no clue how to label an outlier outlierValue = 1e-16; } votes[outlierLabel] = outlierValue / 2.0; //Math.max(Math.abs(1.0 - bestProbability), Math.abs(1.0 - universalProbability)); } } // end if (nearestClusters not empty) return votes; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * In cases where this class is not used by the moa.tasks.EvaluateNonStationaryDynamicStream task, this safety * (fallback) initialization procedure is necessary. * * @param x// w w w . ja v a 2 s . c om */ public final void safeInit(Instance x) { if (this.universalCluster == null) { universalCluster = new Riffle(x); universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex()); double[] initialVariances = new double[x.numAttributes()]; Arrays.fill(initialVariances, 1.0); universalCluster.setVariances(initialVariances); universalCluster.setWeight(0); universalCluster.recompute(); bestProbabilitySums = 0; bestProbabilityCount = 0; } if (this.knownLabels == null) { this.knownLabels = new int[x.numClasses()]; Arrays.fill(knownLabels, 0); this.numAttributes = x.numAttributes(); } if (this.header == null) { this.header = AbstractNovelClassClassifier.augmentInstances(x.dataset()); } }
From source file:moa.evaluation.ClassificationWithNovelClassPerformanceEvaluator.java
License:Open Source License
/** * //from w ww . ja va 2s .c om * Note that for novel class testing, an addition class value is added to the known classes. T * This extra "Label" represents a prediction of "Novel Class". This approach allows for * algorithms that do not have novel class prediction capabilities to still function, * as this method first bounds checks to see if the prediction array includes the added label * * @param inst instance under test * @param classVotes prediction table for this instance */ @Override public void addResult(Instance inst, double[] classVotes) { if (header == null) { header = AbstractNovelClassClassifier.augmentInstances(inst.dataset()); this.novelClassLabel = header.classAttribute() .indexOfValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); this.outlierLabel = header.classAttribute() .indexOfValue(AbstractNovelClassClassifier.OUTLIER_LABEL_STR); this.rowKappa = new double[header.numClasses()]; Arrays.fill(this.rowKappa, 0.0); this.columnKappa = new double[header.numClasses()]; Arrays.fill(this.columnKappa, 0.0); this.knownTrueLabels = new int[header.numClasses()]; Arrays.fill(knownTrueLabels, 0); this.observedLabels = new int[header.numClasses()]; Arrays.fill(observedLabels, 0); } final int trueClass = (int) inst.classValue(); if (classVotes == null) { this.knownTrueLabels[trueClass]++; return; } final double[] labelsOnlyVotes = Arrays.copyOf(classVotes, inst.numClasses()); if (labelsOnlyVotes.length > this.novelClassLabel) { labelsOnlyVotes[novelClassLabel] = 0; } if (labelsOnlyVotes.length > this.outlierLabel) { labelsOnlyVotes[outlierLabel] = 0; } final double totalVoteQty = weka.core.Utils.sum(labelsOnlyVotes); final int predictedClass = weka.core.Utils.maxIndex(labelsOnlyVotes); // Don't count the special extended indexes for novel and outlier final boolean isMarkedOutlier = (weka.core.Utils.maxIndex(classVotes) == this.outlierLabel); if (predictedClass < inst.numClasses() && labelsOnlyVotes[predictedClass] > 0.0) { // Only if there is SOME vote (non-zero) this.observedLabels[predictedClass]++; // If we predict it, then it can't be novel! } //final boolean isTrueNovel = !(this.observedLabels[(int)trueClass] > observationsUntilNotNovelOption.getValue()); boolean predictedNovel = ((classVotes.length > this.novelClassLabel) && (classVotes[this.novelClassLabel] > 0));// this.thresholdOfNoveltyOption.getValue())); final boolean isVoteOutlier = (totalVoteQty <= (weka.core.Utils.SMALL * 10.0)); final boolean correctLabelPrediction = (predictedClass == trueClass); switch (this.outlierHandlingStrategyOption.getChosenIndex()) { case 0: // use anyway // keep on trucking... break; case 1: // ignore marked if (isMarkedOutlier) { return; } break; case 2: // ignore no vote if (isVoteOutlier) { return; } break; case 3: // ignore iff marked AND no vote if (isVoteOutlier && isMarkedOutlier) { return; } break; case 4: // ignore pure OR marked if (isVoteOutlier || isMarkedOutlier) { return; } break; case 5: // mark as novel predictedNovel = predictedNovel || isMarkedOutlier; break; default: break; } this.numberOfInstancesSeen++; this.weightObserved += inst.weight(); // /!\ IS THIS RIGHT??? //final boolean isTrueNovel = (this.knownTrueLabels[trueClass] < this.maxUnobservationsUntilNotNovelOption.getValue()) && (this.observedLabels[trueClass] < observationsUntilNotNovelOption.getValue()); final boolean isTrueNovel = (this.knownTrueLabels[trueClass] < this.maxUnobservationsUntilNotNovelOption .getValue()); // 8x different mutually exclusive options (i.e. 3-bits) if ((!predictedNovel) && (!isTrueNovel) && (correctLabelPrediction)) { // Should be most common this.novelClassDetectionTrueNegative++; this.weightCorrect++; } if ((predictedNovel) && (isTrueNovel) && (correctLabelPrediction)) { // Rare if ever this.novelClassDetectionTruePositive++; this.weightCorrect++; assert false : "Paradox 1 - true novel, but predicted the right label"; } if ((predictedNovel) && (!isTrueNovel) && (correctLabelPrediction)) { // Error due to overly restrictive models this.novelClassDetectionFalsePositive++; if (this.goodIsGoodOption.isSet()) { this.weightCorrect++; } } if ((!predictedNovel) && (isTrueNovel) && (correctLabelPrediction)) { // Should never happen? Framework was wrong here, so TN this.novelClassDetectionTrueNegative++; this.weightCorrect++; assert false : "Paradox 2 - true novel, but predicted the right label"; } if ((predictedNovel) && (isTrueNovel) && (!correctLabelPrediction)) { // Should be most common when x is novel this.novelClassDetectionTruePositive++; this.weightCorrect++; } if ((predictedNovel) && (!isTrueNovel) && (!correctLabelPrediction)) { // Probably an Outlier case this.novelClassDetectionFalsePositive++; if (this.outlierHandlingStrategyOption.getChosenIndex() > 0) { this.weightCorrect++; } } if ((!predictedNovel) && (isTrueNovel) && (!correctLabelPrediction)) { // NCD failure FN this.novelClassDetectionFalseNegative++; } if ((!predictedNovel) && (!isTrueNovel) && (!correctLabelPrediction)) { // Correct NCD, but bad h(x) prediction this.novelClassDetectionTrueNegative++; } this.rowKappa[predictedClass]++; this.columnKappa[trueClass]++; this.knownTrueLabels[trueClass] += inst.weight(); }
From source file:myclassifier.myC45Pack.MyClassifierTree.java
/** * Returns class probabilities for a weighted instance. * * @param instance the instance to get the distribution for * @param useLaplace whether to use laplace or not * @return the distribution// w w w . jav a2 s .c om * @throws Exception if something goes wrong */ public final double[] distributionForInstance(Instance instance, boolean useLaplace) throws Exception { double[] doubles = new double[instance.numClasses()]; for (int i = 0; i < doubles.length; i++) { if (!useLaplace) { doubles[i] = getProbs(i, instance, 1); } else { doubles[i] = getProbsLaplace(i, instance, 1); } } return doubles; }
From source file:NaiveBayesPckge.mushClass.java
@Override public double classifyInstance(Instance instance) throws java.lang.Exception { double classify = 0; // banyaknya kesimpulan. Misal T dan F berati ada 2 int numClasses = instance.numClasses(); double[] out = new double[numClasses]; //banyaknya kelas yang diuji int class_index = instance.classIndex(); //banyaknya atribut int num_attributes = instance.numAttributes(); double inputs[] = new double[num_attributes]; for (int i = 0; i < numClasses; i++) { out[i] = probabConclusion[i];// ww w . j av a2 s. c o m for (int j = 0; j < num_attributes - 1; j++) { int indexLabel = searchIndexLabel(j, instance.stringValue(j)); out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i); } } classify = getIndexBiggestProbability(out); return classify; }
From source file:NaiveBayesPckge.mushClass.java
@Override public double[] distributionForInstance(Instance instance) throws Exception { // banyaknya kesimpulan. Misal T dan F berati ada 2 int numClasses = instance.numClasses(); double[] out = new double[numClasses]; //banyaknya kelas yang diuji int class_index = instance.classIndex(); //banyaknya atribut int num_attributes = instance.numAttributes(); double inputs[] = new double[num_attributes]; for (int i = 0; i < numClasses; i++) { out[i] = probabConclusion[i];/*from w ww. j a va 2 s . com*/ // System.out.print("\n" + maxIterasi +". out["+i+"] = "); for (int j = 1; j < num_attributes; j++) { int indexLabel = searchIndexLabel(j, instance.stringValue(j)); out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i); // System.out.print(atribNom[j].getAttribObjectType(indexLabel, i) + "*"); } // System.out.println("\nout["+i+"] = "+out[i]); // System.out.println(instance.toString()); } // maxIterasi++; return out; }
From source file:NaiveBayesPckge.NaiveBayesCode.java
@Override public double[] distributionForInstance(Instance instance) throws Exception { // banyaknya kesimpulan. Misal T dan F berati ada 2 int numClasses = instance.numClasses(); double[] out = new double[numClasses]; //banyaknya kelas yang diuji int class_index = instance.classIndex(); //banyaknya atribut int num_attributes = instance.numAttributes(); double inputs[] = new double[num_attributes]; for (int i = 0; i < numClasses; i++) { out[i] = probabConclusion[i];// w ww . ja v a2s . c o m // System.out.print("\n" + maxIterasi +". out["+i+"] = "); for (int j = 0; j < num_attributes - 1; j++) { int indexLabel = searchIndexLabel(j, instance.stringValue(j)); out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i); // System.out.print(atribNom[j].getAttribObjectType(indexLabel, i) + "*"); } // System.out.println("\nout["+i+"] = "+out[i]); // System.out.println(instance.toString()); } // maxIterasi++; return out; }
From source file:net.paudan.evosvm.LibLINEAR.java
License:Open Source License
/** * Computes the distribution for a given instance. * * @param instance the instance for which distribution is computed * @return the distribution/*w ww . j ava2 s . c o m*/ * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { if (!getDoNotReplaceMissingValues()) { m_ReplaceMissingValues.input(instance); m_ReplaceMissingValues.batchFinished(); instance = m_ReplaceMissingValues.output(); } if (getConvertNominalToBinary() && m_NominalToBinary != null) { m_NominalToBinary.input(instance); m_NominalToBinary.batchFinished(); instance = m_NominalToBinary.output(); } if (m_Filter != null) { m_Filter.input(instance); m_Filter.batchFinished(); instance = m_Filter.output(); } FeatureNode[] x = instanceToArray(instance); double[] result = new double[instance.numClasses()]; if (m_ProbabilityEstimates) { if (m_SolverType != SolverType.L2R_LR && m_SolverType != SolverType.L2R_LR_DUAL && m_SolverType != SolverType.L1R_LR) { throw new WekaException("probability estimation is currently only " + "supported for L2-regularized logistic regression"); } int[] labels = m_Model.getLabels(); double[] prob_estimates = new double[instance.numClasses()]; Linear.predictProbability(m_Model, x, prob_estimates); // Return order of probabilities to canonical weka attribute order for (int k = 0; k < labels.length; k++) { result[labels[k]] = prob_estimates[k]; } } else { int prediction = (int) Linear.predict(m_Model, x); assert (instance.classAttribute().isNominal()); result[prediction] = 1; } return result; }
From source file:net.sf.jclal.classifier.MOAWrapper.java
License:Open Source License
/** * {@inheritDoc}//from ww w . j a v a 2 s . com */ public double[] distributionForInstance(Instance instance) throws Exception { double[] result; result = classifier.getVotesForInstance(instance); // ensure that the array has as many elements as there are // class values! if (result.length < instance.numClasses()) { double[] newResult = new double[instance.numClasses()]; System.arraycopy(result, 0, newResult, 0, result.length); result = newResult; } try { Utils.normalize(result); } catch (Exception e) { result = new double[instance.numClasses()]; } return result; }