List of usage examples for weka.core Instances numClasses
publicint numClasses()
From source file:meka.gui.dataviewer.DataViewerMainPanel.java
License:Open Source License
/** * displays some properties of the instances *///from www.j av a 2s . c om public void showProperties() { DataPanel panel; ListSelectorDialog dialog; Vector<String> props; Instances inst; panel = getCurrentPanel(); if (panel == null) { return; } inst = panel.getInstances(); if (inst == null) { return; } if (inst.classIndex() < 0) { inst.setClassIndex(inst.numAttributes() - 1); } // get some data props = new Vector<String>(); props.add("Filename: " + panel.getFilename()); props.add("Relation name: " + inst.relationName()); props.add("# of instances: " + inst.numInstances()); props.add("# of attributes: " + inst.numAttributes()); props.add("Class attribute: " + inst.classAttribute().name()); props.add("# of class labels: " + inst.numClasses()); dialog = new ListSelectorDialog(getParentFrame(), new JList(props)); dialog.showDialog(); }
From source file:ml.ann.MultiClassPTR.java
@Override public void buildClassifier(Instances instances) throws Exception { initAttributes(instances);//from ww w . j a va 2 s. com // REMEMBER: only works if class index is in the last position for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) { Instance instance = instances.get(instanceIdx); double[] inputInstance = inputInstances[instanceIdx]; inputInstance[0] = 1.0; // initialize bias value for (int attrIdx = 0; attrIdx < instance.numAttributes() - 1; attrIdx++) { inputInstance[attrIdx + 1] = instance.value(attrIdx); // the first index of input instance is for bias } } // Initialize target values if (instances.classAttribute().isNominal()) { for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) { Instance instance = instances.instance(instanceIdx); for (int classIdx = 0; classIdx < instances.numClasses(); classIdx++) { targetInstances[instanceIdx][classIdx] = 0.0; } targetInstances[instanceIdx][(int) instance.classValue()] = 1.0; } } else { for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) { Instance instance = instances.instance(instanceIdx); targetInstances[instanceIdx][0] = instance.classValue(); } } if (algo == 1) { setActFunction(); buildClassifier(); } else if (algo == 2) { buildClassifier(); } else if (algo == 3) { buildClassifierBatch(); } }
From source file:moa.streams.ConceptDriftRealStream.java
License:Open Source License
@Override public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { this.inputStream = (InstanceStream) getPreparedClassOption(this.streamOption); this.driftStream = (InstanceStream) getPreparedClassOption(this.driftstreamOption); this.random = new Random(this.randomSeedOption.getValue()); numberInstanceStream = 0;//from w w w . j a v a 2 s . c o m if (this.alphaOption.getValue() != 0.0) { this.widthOption.setValue((int) (1 / Math.tan(this.alphaOption.getValue() * Math.PI / 180))); } // generate header Instances first = this.inputStream.getHeader(); Instances second = this.driftStream.getHeader(); FastVector newAttributes = new FastVector(); for (int i = 0; i < first.numAttributes() - 1; i++) { newAttributes.addElement(first.attribute(i)); } for (int i = 0; i < second.numAttributes() - 1; i++) { newAttributes.addElement(second.attribute(i)); } Attribute classLabels; if (first.numClasses() < second.numClasses()) { classLabels = second.classAttribute(); } else { classLabels = first.classAttribute(); } newAttributes.addElement(classLabels); this.streamHeader = new InstancesHeader( new Instances(getCLICreationString(InstanceStream.class), newAttributes, 0)); this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1); restart(); }
From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java
License:Open Source License
/** * * @param testInstances instance set to evaluate accuracy * @return number of instances actually tested *//*from w w w. j a v a 2 s .c om*/ private int test(Instances testInstances) { this.monitor.setCurrentActivityDescription("Testing Instances"); int ret = testInstances.size(); int novelClassLabel = testInstances.numClasses(); int outlierLabel = novelClassLabel + 1; // For latent label outliers that have reached their deadline, we must now make a decision: while (!this.pendingFinalLabelInstQueue.isEmpty() && this.pendingFinalLabelInstQueue.peek().deadline <= this.instancesProcessed) { TimeBoxedInstance ti = this.pendingFinalLabelInstQueue.pop(); int y = (int) ti.inst.classValue(); double[] prediction = null; if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) { Instance novelInst = (Instance) ti.inst.copy(); //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset())); //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); novelInst.setWeight(NOVEL_WEIGHT); prediction = learner.getVotesForInstance(novelInst); evaluator.addResult(novelInst, prediction); // Outlier out of time. Remove it } else { prediction = learner.getVotesForInstance(ti.inst); evaluator.addResult(ti.inst, prediction); // Outlier out of time. Remove it } this.cm.add(weka.core.Utils.maxIndex(prediction), ti.inst.classValue()); } // Run accuracy test for current instance(s) for (Instance i : testInstances) { int y = (int) i.classValue(); double[] prediction = null; Instance instToActuallyPredict = i; // If novel, make a special instance if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) { instToActuallyPredict = (Instance) i.copy(); //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset())); //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); // WARNING - this crashes other algorithms if not also done on training! instToActuallyPredict.setWeight(NOVEL_WEIGHT); } prediction = learner.getVotesForInstance(instToActuallyPredict); if ((prediction.length > outlierLabel) && (prediction[outlierLabel] > (1.0 / prediction.length))) { this.pendingFinalLabelInstQueue.add(new TimeBoxedInstance(i, this.instancesProcessed, this.labelDeadlineOption.getValue(), prediction)); // Delay accuracy metrics until stale time } else { evaluator.addResult(instToActuallyPredict, prediction); // Not an outlier, so treat it like normal this.cm.add(weka.core.Utils.maxIndex(prediction), i.classValue()); } } // end for assert this.pendingFinalLabelInstQueue.size() < (this.labelDeadlineOption.getValue() + 1) : "Cache 'pendingFinalLabelInstQueue' is larger than designed."; return ret; }
From source file:myclassifier.MyC45.java
/** * Method building ID3 tree.//from w w w . j a v a2s . c om * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = -1; //Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] gainRatios = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); gainRatios[att.index()] = computeGainRatio(data, att); } m_Attribute = data.attribute(Utils.maxIndex(gainRatios)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(gainRatios[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new MyC45[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyC45(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:myclassifier.MyC45.java
private double computeEntropyFromData(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }//from w w w . ja v a 2 s . com double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) entropy -= (double) (classCounts[j] / data.numInstances()) * Utils.log2((double) classCounts[j] / data.numInstances()); } //return entropy + Utils.log2(data.numInstances()); return entropy; }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Constructor distribution dengan satu dataset * @param dataSet//from w w w .ja va2 s.c o m * * @exception Exception if something goes wrong */ public ClassDistribution(Instances dataSet) throws Exception { w_perClassPerSubdataset = new double[1][dataSet.numClasses()]; w_perSubdataset = new double[1]; w_perClass = new double[dataSet.numClasses()]; totalWeights = 0; Enumeration E = dataSet.enumerateInstances(); while (E.hasMoreElements()) { Instance inst = (Instance) E.nextElement(); addInstance(0, inst); } }
From source file:myclassifier.myC45Pack.ClassDistribution.java
/** * Creates a distribution according to given instances and * split model.// ww w .ja va 2s . c om * * @exception Exception if something goes wrong */ public ClassDistribution(Instances source, C45ClassifierSplitModel modelToUse) throws Exception { int index; Instance instance; double[] weights; w_perClassPerSubdataset = new double[modelToUse.numSubsets()][0]; w_perSubdataset = new double[modelToUse.numSubsets()]; totalWeights = 0; w_perClass = new double[source.numClasses()]; for (int i = 0; i < modelToUse.numSubsets(); i++) { w_perClassPerSubdataset[i] = new double[source.numClasses()]; } Enumeration E = source.enumerateInstances(); while (E.hasMoreElements()) { instance = (Instance) E.nextElement(); index = modelToUse.getSubsetIndex(instance); if (index != -1) { addInstance(index, instance); } else { weights = modelToUse.getWeights(instance); addWeights(instance, weights); } } }
From source file:myclassifier.myC45Pack.SplitModel.java
private void handleNominalAttribute(Instances dataSet) throws Exception { Instance instance;/* w ww .j a v a 2s . c o m*/ classDist = new ClassDistribution(numOfBranches, dataSet.numClasses()); Enumeration instanceEnum = dataSet.enumerateInstances(); while (instanceEnum.hasMoreElements()) { instance = (Instance) instanceEnum.nextElement(); if (!instance.isMissing(attribIndex)) { classDist.addInstance((int) instance.value(attribIndex), instance); } } // Check if minimum number of Instances in at least two // subsets. if (classDist.isSplitable(minInstances)) { numSubsets = numOfBranches; infoGain = classDist.calculateInfoGain(totalWeights); gainRatio = classDist.calculateGainRatio(infoGain); } }
From source file:myclassifier.myC45Pack.SplitModel.java
private void handleNumericAttribute(Instances dataSet) throws Exception { int firstMiss; int next = 1; int last = 0; int splitIndex = -1; double currentInfoGain; double currentGainRatio; double minSplit; Instance instance;// ww w .ja v a 2 s. co m int i; boolean instanceMissing = false; // Current attribute is a numeric attribute. classDist = new ClassDistribution(2, dataSet.numClasses()); // Only Instances with known values are relevant. Enumeration instanceEnum = dataSet.enumerateInstances(); i = 0; while ((instanceEnum.hasMoreElements() && (!instanceMissing))) { instance = (Instance) instanceEnum.nextElement(); if (instance.isMissing(attribIndex)) { instanceMissing = true; } else { classDist.addInstance(1, instance); i++; } } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (classDist.getTotalWeight()) / ((double) dataSet.numClasses()); if (minSplit <= minInstances) { minSplit = minInstances; } else if (minSplit > 25) { minSplit = 25; } // Enough Instances with known values? if ((double) firstMiss < 2 * minSplit) { return; } // Compute values of criteria for all possible split // indices. //defaultEnt = infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (dataSet.instance(next - 1).value(attribIndex) + 1e-5 < dataSet.instance(next).value(attribIndex)) { // Move class values for all Instances up to next // possible split point. classDist.moveInstancesWithRange(1, 0, dataSet, last, next); // Check if enough Instances in each subset and compute // values for criteria. if ((classDist.w_perSubdataset[0] >= minSplit) && (classDist.w_perSubdataset[1] >= minSplit)) { currentInfoGain = classDist.calculateInfoGain(totalWeights); currentGainRatio = classDist.calculateGainRatio(totalWeights); if (currentGainRatio >= gainRatio) { infoGain = currentInfoGain; gainRatio = currentGainRatio; splitIndex = next - 1; } numOfSplitPoints++; } last = next; } next++; } // Was there any useful split? if (numOfSplitPoints == 0) { return; } // Compute modified information gain for best split. infoGain = infoGain - (classDist.log2(numOfSplitPoints) / totalWeights); if (infoGain > 0) { // Set instance variables' values to values for // best split. numSubsets = 2; splitPointValue = (dataSet.instance(splitIndex + 1).value(attribIndex) + dataSet.instance(splitIndex).value(attribIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (splitPointValue == dataSet.instance(splitIndex + 1).value(attribIndex)) { splitPointValue = dataSet.instance(splitIndex).value(attribIndex); } // Restore distributioN for best split. classDist = new ClassDistribution(2, dataSet.numClasses()); classDist.addRange(0, dataSet, 0, splitIndex + 1); classDist.addRange(1, dataSet, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. gainRatio = classDist.calculateGainRatio(infoGain); } }