Example usage for weka.core Instances numClasses

Introduction

In this page you can find the example usage for weka.core Instances numClasses.

Prototype


publicint numClasses()

Source Link

Document

Returns the number of class labels.

Usage

From source file:meka.gui.dataviewer.DataViewerMainPanel.java

License:Open Source License

/**
 * displays some properties of the instances
 *///from  www.j av a 2s  . c  om
public void showProperties() {
    DataPanel panel;
    ListSelectorDialog dialog;
    Vector<String> props;
    Instances inst;

    panel = getCurrentPanel();
    if (panel == null) {
        return;
    }

    inst = panel.getInstances();
    if (inst == null) {
        return;
    }
    if (inst.classIndex() < 0) {
        inst.setClassIndex(inst.numAttributes() - 1);
    }

    // get some data
    props = new Vector<String>();
    props.add("Filename: " + panel.getFilename());
    props.add("Relation name: " + inst.relationName());
    props.add("# of instances: " + inst.numInstances());
    props.add("# of attributes: " + inst.numAttributes());
    props.add("Class attribute: " + inst.classAttribute().name());
    props.add("# of class labels: " + inst.numClasses());

    dialog = new ListSelectorDialog(getParentFrame(), new JList(props));
    dialog.showDialog();
}

From source file:ml.ann.MultiClassPTR.java

@Override
public void buildClassifier(Instances instances) throws Exception {
    initAttributes(instances);//from ww  w .  j  a  va  2 s.  com

    // REMEMBER: only works if class index is in the last position
    for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) {
        Instance instance = instances.get(instanceIdx);
        double[] inputInstance = inputInstances[instanceIdx];
        inputInstance[0] = 1.0; // initialize bias value
        for (int attrIdx = 0; attrIdx < instance.numAttributes() - 1; attrIdx++) {
            inputInstance[attrIdx + 1] = instance.value(attrIdx); // the first index of input instance is for bias
        }
    }

    // Initialize target values
    if (instances.classAttribute().isNominal()) {
        for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) {
            Instance instance = instances.instance(instanceIdx);
            for (int classIdx = 0; classIdx < instances.numClasses(); classIdx++) {
                targetInstances[instanceIdx][classIdx] = 0.0;
            }
            targetInstances[instanceIdx][(int) instance.classValue()] = 1.0;
        }
    } else {
        for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) {
            Instance instance = instances.instance(instanceIdx);
            targetInstances[instanceIdx][0] = instance.classValue();
        }
    }

    if (algo == 1) {
        setActFunction();
        buildClassifier();
    } else if (algo == 2) {
        buildClassifier();
    } else if (algo == 3) {
        buildClassifierBatch();
    }
}

From source file:moa.streams.ConceptDriftRealStream.java

License:Open Source License

@Override
public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {

    this.inputStream = (InstanceStream) getPreparedClassOption(this.streamOption);
    this.driftStream = (InstanceStream) getPreparedClassOption(this.driftstreamOption);
    this.random = new Random(this.randomSeedOption.getValue());
    numberInstanceStream = 0;//from w w  w  .  j  a v  a 2  s  . c o  m
    if (this.alphaOption.getValue() != 0.0) {
        this.widthOption.setValue((int) (1 / Math.tan(this.alphaOption.getValue() * Math.PI / 180)));
    }

    // generate header
    Instances first = this.inputStream.getHeader();
    Instances second = this.driftStream.getHeader();
    FastVector newAttributes = new FastVector();
    for (int i = 0; i < first.numAttributes() - 1; i++) {
        newAttributes.addElement(first.attribute(i));
    }
    for (int i = 0; i < second.numAttributes() - 1; i++) {
        newAttributes.addElement(second.attribute(i));
    }

    Attribute classLabels;
    if (first.numClasses() < second.numClasses()) {
        classLabels = second.classAttribute();
    } else {
        classLabels = first.classAttribute();
    }
    newAttributes.addElement(classLabels);

    this.streamHeader = new InstancesHeader(
            new Instances(getCLICreationString(InstanceStream.class), newAttributes, 0));
    this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
    restart();

}

From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java

License:Open Source License

/**
 *
 * @param testInstances instance set to evaluate accuracy
 * @return number of instances actually tested
 *//*from w  w w. j a  v  a  2 s .c  om*/
private int test(Instances testInstances) {
    this.monitor.setCurrentActivityDescription("Testing Instances");
    int ret = testInstances.size();
    int novelClassLabel = testInstances.numClasses();
    int outlierLabel = novelClassLabel + 1;

    // For latent label outliers that have reached their deadline, we must now make a decision:
    while (!this.pendingFinalLabelInstQueue.isEmpty()
            && this.pendingFinalLabelInstQueue.peek().deadline <= this.instancesProcessed) {
        TimeBoxedInstance ti = this.pendingFinalLabelInstQueue.pop();
        int y = (int) ti.inst.classValue();
        double[] prediction = null;
        if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) {
            Instance novelInst = (Instance) ti.inst.copy();
            //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset()));
            //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR);
            novelInst.setWeight(NOVEL_WEIGHT);
            prediction = learner.getVotesForInstance(novelInst);
            evaluator.addResult(novelInst, prediction); // Outlier out of time. Remove it
        } else {
            prediction = learner.getVotesForInstance(ti.inst);
            evaluator.addResult(ti.inst, prediction); // Outlier out of time. Remove it
        }

        this.cm.add(weka.core.Utils.maxIndex(prediction), ti.inst.classValue());
    }

    // Run accuracy test for current instance(s)
    for (Instance i : testInstances) {
        int y = (int) i.classValue();
        double[] prediction = null;
        Instance instToActuallyPredict = i;
        // If novel, make a special instance
        if (y >= 0 && y < knownLabels.length && knownLabels[y] <= this.labelDeadlineOption.getValue()) {
            instToActuallyPredict = (Instance) i.copy();
            //novelInst.setDataset(AbstractNovelClassClassifier.augmentInstances(novelInst.dataset()));
            //novelInst.setClassValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); // WARNING - this crashes other algorithms if not also done on training!
            instToActuallyPredict.setWeight(NOVEL_WEIGHT);
        }
        prediction = learner.getVotesForInstance(instToActuallyPredict);
        if ((prediction.length > outlierLabel) && (prediction[outlierLabel] > (1.0 / prediction.length))) {
            this.pendingFinalLabelInstQueue.add(new TimeBoxedInstance(i, this.instancesProcessed,
                    this.labelDeadlineOption.getValue(), prediction)); // Delay accuracy metrics until stale time
        } else {
            evaluator.addResult(instToActuallyPredict, prediction); // Not an outlier, so treat it like normal
            this.cm.add(weka.core.Utils.maxIndex(prediction), i.classValue());
        }
    } // end for

    assert this.pendingFinalLabelInstQueue.size() < (this.labelDeadlineOption.getValue()
            + 1) : "Cache 'pendingFinalLabelInstQueue' is larger than designed.";
    return ret;
}

From source file:myclassifier.MyC45.java

/**
 * Method building ID3 tree.//from   w  w w . j  a v  a2s .  c om
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = -1; //Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] gainRatios = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        gainRatios[att.index()] = computeGainRatio(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(gainRatios));

    // Make leaf if information gain is zero. 
    // Otherwise create successors.

    if (Utils.eq(gainRatios[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new MyC45[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyC45();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:myclassifier.MyC45.java

private double computeEntropyFromData(Instances data) throws Exception {
    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();

    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }//from   w  w  w .  ja  v a  2 s  .  com

    double entropy = 0;

    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0)
            entropy -= (double) (classCounts[j] / data.numInstances())
                    * Utils.log2((double) classCounts[j] / data.numInstances());
    }

    //return entropy + Utils.log2(data.numInstances());
    return entropy;
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Constructor distribution dengan satu dataset
 * @param dataSet//from   w  w  w .ja  va2 s.c  o m
 * 
 * @exception Exception if something goes wrong
 */
public ClassDistribution(Instances dataSet) throws Exception {
    w_perClassPerSubdataset = new double[1][dataSet.numClasses()];
    w_perSubdataset = new double[1];
    w_perClass = new double[dataSet.numClasses()];
    totalWeights = 0;

    Enumeration E = dataSet.enumerateInstances();
    while (E.hasMoreElements()) {
        Instance inst = (Instance) E.nextElement();
        addInstance(0, inst);
    }
}

From source file:myclassifier.myC45Pack.ClassDistribution.java

/**
 * Creates a distribution according to given instances and
 * split model.//  ww w .ja  va 2s  .  c  om
 *
 * @exception Exception if something goes wrong
 */

public ClassDistribution(Instances source, C45ClassifierSplitModel modelToUse) throws Exception {

    int index;
    Instance instance;
    double[] weights;

    w_perClassPerSubdataset = new double[modelToUse.numSubsets()][0];
    w_perSubdataset = new double[modelToUse.numSubsets()];
    totalWeights = 0;
    w_perClass = new double[source.numClasses()];
    for (int i = 0; i < modelToUse.numSubsets(); i++) {
        w_perClassPerSubdataset[i] = new double[source.numClasses()];
    }
    Enumeration E = source.enumerateInstances();
    while (E.hasMoreElements()) {
        instance = (Instance) E.nextElement();
        index = modelToUse.getSubsetIndex(instance);
        if (index != -1) {
            addInstance(index, instance);
        } else {
            weights = modelToUse.getWeights(instance);
            addWeights(instance, weights);
        }
    }
}

From source file:myclassifier.myC45Pack.SplitModel.java

private void handleNominalAttribute(Instances dataSet) throws Exception {

    Instance instance;/*  w ww  .j a  v  a 2s . c  o  m*/
    classDist = new ClassDistribution(numOfBranches, dataSet.numClasses());
    Enumeration instanceEnum = dataSet.enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
        instance = (Instance) instanceEnum.nextElement();
        if (!instance.isMissing(attribIndex)) {
            classDist.addInstance((int) instance.value(attribIndex), instance);
        }
    }

    // Check if minimum number of Instances in at least two
    // subsets.
    if (classDist.isSplitable(minInstances)) {
        numSubsets = numOfBranches;
        infoGain = classDist.calculateInfoGain(totalWeights);
        gainRatio = classDist.calculateGainRatio(infoGain);
    }
}

From source file:myclassifier.myC45Pack.SplitModel.java

private void handleNumericAttribute(Instances dataSet) throws Exception {

    int firstMiss;
    int next = 1;
    int last = 0;
    int splitIndex = -1;
    double currentInfoGain;
    double currentGainRatio;
    double minSplit;
    Instance instance;//  ww w .ja v a  2  s. co m
    int i;
    boolean instanceMissing = false;

    // Current attribute is a numeric attribute.
    classDist = new ClassDistribution(2, dataSet.numClasses());

    // Only Instances with known values are relevant.
    Enumeration instanceEnum = dataSet.enumerateInstances();
    i = 0;
    while ((instanceEnum.hasMoreElements() && (!instanceMissing))) {
        instance = (Instance) instanceEnum.nextElement();
        if (instance.isMissing(attribIndex)) {
            instanceMissing = true;
        } else {
            classDist.addInstance(1, instance);
            i++;
        }
    }
    firstMiss = i;

    // Compute minimum number of Instances required in each
    // subset.
    minSplit = 0.1 * (classDist.getTotalWeight()) / ((double) dataSet.numClasses());
    if (minSplit <= minInstances) {
        minSplit = minInstances;
    } else if (minSplit > 25) {
        minSplit = 25;
    }
    // Enough Instances with known values?
    if ((double) firstMiss < 2 * minSplit) {
        return;
    }
    // Compute values of criteria for all possible split
    // indices.
    //defaultEnt = infoGainCrit.oldEnt(m_distribution);
    while (next < firstMiss) {
        if (dataSet.instance(next - 1).value(attribIndex) + 1e-5 < dataSet.instance(next).value(attribIndex)) {

            // Move class values for all Instances up to next 
            // possible split point.
            classDist.moveInstancesWithRange(1, 0, dataSet, last, next);

            // Check if enough Instances in each subset and compute
            // values for criteria.
            if ((classDist.w_perSubdataset[0] >= minSplit) && (classDist.w_perSubdataset[1] >= minSplit)) {
                currentInfoGain = classDist.calculateInfoGain(totalWeights);
                currentGainRatio = classDist.calculateGainRatio(totalWeights);
                if (currentGainRatio >= gainRatio) {
                    infoGain = currentInfoGain;
                    gainRatio = currentGainRatio;
                    splitIndex = next - 1;
                }
                numOfSplitPoints++;
            }
            last = next;
        }
        next++;
    }

    // Was there any useful split?
    if (numOfSplitPoints == 0) {
        return;
    }
    // Compute modified information gain for best split.
    infoGain = infoGain - (classDist.log2(numOfSplitPoints) / totalWeights);
    if (infoGain > 0) {
        // Set instance variables' values to values for
        // best split.
        numSubsets = 2;
        splitPointValue = (dataSet.instance(splitIndex + 1).value(attribIndex)
                + dataSet.instance(splitIndex).value(attribIndex)) / 2;

        // In case we have a numerical precision problem we need to choose the
        // smaller value
        if (splitPointValue == dataSet.instance(splitIndex + 1).value(attribIndex)) {
            splitPointValue = dataSet.instance(splitIndex).value(attribIndex);
        }
        // Restore distributioN for best split.
        classDist = new ClassDistribution(2, dataSet.numClasses());
        classDist.addRange(0, dataSet, 0, splitIndex + 1);
        classDist.addRange(1, dataSet, splitIndex + 1, firstMiss);
        // Compute modified gain ratio for best split.
        gainRatio = classDist.calculateGainRatio(infoGain);
    }
}