weka.core.AbstractInstance.java Source code

Java tutorial

Introduction

Here is the source code for weka.core.AbstractInstance.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    AbstractInstance.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core;

import java.io.Serializable;
import java.util.Enumeration;

/**
 * Abstract class providing common functionality for the original instance
 * implementations.
 * 
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @version $Revision$
 */
public abstract class AbstractInstance implements Instance, Serializable, RevisionHandler {

    /** for serialization */
    static final long serialVersionUID = 1482635194499365155L;

    /**
     * The dataset the instance has access to. Null if the instance doesn't have
     * access to any dataset. Only if an instance has access to a dataset, it
     * knows about the actual attribute types.
     */
    protected/* @spec_public@ */Instances m_Dataset;

    /** The instance's attribute values. */
    protected/* @spec_public non_null@ */double[] m_AttValues;

    /** The instance's weight. */
    protected double m_Weight;

    /** Default max number of digits after the decimal point for numeric values */
    public static int s_numericAfterDecimalPoint = 6;

    /**
     * Returns the attribute with the given index.
     * 
     * @param index the attribute's index
     * @return the attribute at the given position
     * @throws UnassignedDatasetException if instance doesn't have access to a
     *           dataset
     */
    // @ requires m_Dataset != null;
    @Override
    public/* @pure@ */Attribute attribute(int index) {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return m_Dataset.attribute(index);
    }

    /**
     * Returns the attribute with the given index in the sparse representation.
     * 
     * @param indexOfIndex the index of the attribute's index
     * @return the attribute at the given position
     * @throws UnassignedDatasetException if instance doesn't have access to a
     *           dataset
     */
    // @ requires m_Dataset != null;
    @Override
    public/* @pure@ */Attribute attributeSparse(int indexOfIndex) {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return m_Dataset.attribute(index(indexOfIndex));
    }

    /**
     * Returns class attribute.
     * 
     * @return the class attribute
     * @throws UnassignedDatasetException if the class is not set or the instance
     *           doesn't have access to a dataset
     */
    // @ requires m_Dataset != null;
    @Override
    public/* @pure@ */Attribute classAttribute() {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return m_Dataset.classAttribute();
    }

    /**
     * Returns the class attribute's index.
     * 
     * @return the class index as an integer
     * @throws UnassignedDatasetException if instance doesn't have access to a
     *           dataset
     */
    // @ requires m_Dataset != null;
    // @ ensures \result == m_Dataset.classIndex();
    @Override
    public/* @pure@ */int classIndex() {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return m_Dataset.classIndex();
    }

    /**
     * Tests if an instance's class is missing.
     * 
     * @return true if the instance's class is missing
     * @throws UnassignedClassException if the class is not set or the instance
     *           doesn't have access to a dataset
     */
    // @ requires classIndex() >= 0;
    @Override
    public/* @pure@ */boolean classIsMissing() {

        int classIndex = classIndex();
        if (classIndex < 0) {
            throw new UnassignedClassException("Class is not set!");
        }
        return isMissing(classIndex);
    }

    /**
     * Returns an instance's class value in internal format. (ie. as a
     * floating-point number)
     * 
     * @return the corresponding value as a double (If the corresponding attribute
     *         is nominal (or a string) then it returns the value's index as a
     *         double).
     * @throws UnassignedClassException if the class is not set or the instance
     *           doesn't have access to a dataset
     */
    // @ requires classIndex() >= 0;
    @Override
    public/* @pure@ */double classValue() {

        int classIndex = classIndex();
        if (classIndex < 0) {
            throw new UnassignedClassException("Class is not set!");
        }
        return value(classIndex);
    }

    /**
     * Returns the dataset this instance has access to. (ie. obtains information
     * about attribute types from) Null if the instance doesn't have access to a
     * dataset.
     * 
     * @return the dataset the instance has accesss to
     */
    // @ ensures \result == m_Dataset;
    @Override
    public/* @pure@ */Instances dataset() {

        return m_Dataset;
    }

    /**
     * Deletes an attribute at the given position (0 to numAttributes() - 1). Only
     * succeeds if the instance does not have access to any dataset because
     * otherwise inconsistencies could be introduced.
     * 
     * @param position the attribute's position
     * @throws RuntimeException if the instance has access to a dataset
     */
    // @ requires m_Dataset != null;
    @Override
    public void deleteAttributeAt(int position) {

        if (m_Dataset != null) {
            throw new RuntimeException("Instance has access to a dataset!");
        }
        forceDeleteAttributeAt(position);
    }

    /**
     * Returns an enumeration of all the attributes.
     * 
     * @return enumeration of all the attributes
     * @throws UnassignedDatasetException if the instance doesn't have access to a
     *           dataset
     */
    // @ requires m_Dataset != null;
    @Override
    public/* @pure@ */Enumeration<Attribute> enumerateAttributes() {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return m_Dataset.enumerateAttributes();
    }

    /**
     * Tests if the headers of two instances are equivalent.
     * 
     * @param inst another instance
     * @return true if the header of the given instance is equivalent to this
     *         instance's header
     * @throws UnassignedDatasetException if instance doesn't have access to any
     *           dataset
     */
    // @ requires m_Dataset != null;
    @Override
    public/* @pure@ */boolean equalHeaders(Instance inst) {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return m_Dataset.equalHeaders(inst.dataset());
    }

    /**
     * Checks if the headers of two instances are equivalent. If not, then returns
     * a message why they differ.
     * 
     * @param dataset another instance
     * @return null if the header of the given instance is equivalent to this
     *         instance's header, otherwise a message with details on why they
     *         differ
     */
    @Override
    public String equalHeadersMsg(Instance inst) {
        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }

        return m_Dataset.equalHeadersMsg(inst.dataset());
    }

    /**
     * Tests whether an instance has a missing value. Skips the class attribute if
     * set.
     * 
     * @return true if instance has a missing value.
     * @throws UnassignedDatasetException if instance doesn't have access to any
     *           dataset
     */
    // @ requires m_Dataset != null;
    @Override
    public/* @pure@ */boolean hasMissingValue() {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        int classIndex = classIndex();
        for (int i = 0; i < numValues(); i++) {
            if (index(i) != classIndex) {
                if (isMissingSparse(i)) {
                    return true;
                }
            }
        }
        return false;
    }

    /**
     * Inserts an attribute at the given position (0 to numAttributes()). Only
     * succeeds if the instance does not have access to any dataset because
     * otherwise inconsistencies could be introduced.
     * 
     * @param position the attribute's position
     * @throws RuntimeException if the instance has accesss to a dataset
     * @throws IllegalArgumentException if the position is out of range
     */
    // @ requires m_Dataset == null;
    // @ requires 0 <= position && position <= numAttributes();
    @Override
    public void insertAttributeAt(int position) {

        if (m_Dataset != null) {
            throw new RuntimeException("Instance has accesss to a dataset!");
        }
        if ((position < 0) || (position > numAttributes())) {
            throw new IllegalArgumentException("Can't insert attribute: index out " + "of range");
        }
        forceInsertAttributeAt(position);
    }

    /**
     * Tests if a specific value is "missing".
     * 
     * @param attIndex the attribute's index
     * @return true if the value is "missing"
     */
    @Override
    public/* @pure@ */boolean isMissing(int attIndex) {

        if (Utils.isMissingValue(value(attIndex))) {
            return true;
        }
        return false;
    }

    /**
     * Tests if a specific value is "missing", given an index in the sparse
     * representation.
     * 
     * @param indexOfIndex the index of the attribute's index
     * @return true if the value is "missing"
     */
    @Override
    public/* @pure@ */boolean isMissingSparse(int indexOfIndex) {

        if (Utils.isMissingValue(valueSparse(indexOfIndex))) {
            return true;
        }
        return false;
    }

    /**
     * Tests if a specific value is "missing". The given attribute has to belong
     * to a dataset.
     * 
     * @param att the attribute
     * @return true if the value is "missing"
     */
    @Override
    public/* @pure@ */boolean isMissing(Attribute att) {

        return isMissing(att.index());
    }

    /**
     * Returns the number of class labels.
     * 
     * @return the number of class labels as an integer if the class attribute is
     *         nominal, 1 otherwise.
     * @throws UnassignedDatasetException if instance doesn't have access to any
     *           dataset
     */
    // @ requires m_Dataset != null;
    @Override
    public/* @pure@ */int numClasses() {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return m_Dataset.numClasses();
    }

    /**
     * Sets the class value of an instance to be "missing". A deep copy of the
     * vector of attribute values is performed before the value is set to be
     * missing.
     * 
     * @throws UnassignedClassException if the class is not set
     * @throws UnassignedDatasetException if the instance doesn't have access to a
     *           dataset
     */
    // @ requires classIndex() >= 0;
    @Override
    public void setClassMissing() {

        int classIndex = classIndex();
        if (classIndex < 0) {
            throw new UnassignedClassException("Class is not set!");
        }
        setMissing(classIndex);
    }

    /**
     * Sets the class value of an instance to the given value (internal
     * floating-point format). A deep copy of the vector of attribute values is
     * performed before the value is set.
     * 
     * @param value the new attribute value (If the corresponding attribute is
     *          nominal (or a string) then this is the new value's index as a
     *          double).
     * @throws UnassignedClassException if the class is not set
     * @throws UnaddignedDatasetException if the instance doesn't have access to a
     *           dataset
     */
    // @ requires classIndex() >= 0;
    @Override
    public void setClassValue(double value) {

        int classIndex = classIndex();
        if (classIndex < 0) {
            throw new UnassignedClassException("Class is not set!");
        }
        setValue(classIndex, value);
    }

    /**
     * Sets the class value of an instance to the given value. A deep copy of the
     * vector of attribute values is performed before the value is set.
     * 
     * @param value the new class value (If the class is a string attribute and
     *          the value can't be found, the value is added to the attribute).
     * @throws UnassignedClassException if the class is not set
     * @throws UnassignedDatasetException if the dataset is not set
     * @throws IllegalArgumentException if the attribute is not nominal or a
     *           string, or the value couldn't be found for a nominal attribute
     */
    // @ requires classIndex() >= 0;
    @Override
    public final void setClassValue(String value) {

        int classIndex = classIndex();
        if (classIndex < 0) {
            throw new UnassignedClassException("Class is not set!");
        }
        setValue(classIndex, value);
    }

    /**
     * Sets the reference to the dataset. Does not check if the instance is
     * compatible with the dataset. Note: the dataset does not know about this
     * instance. If the structure of the dataset's header gets changed, this
     * instance will not be adjusted automatically.
     * 
     * @param instances the reference to the dataset
     */
    @Override
    public final void setDataset(Instances instances) {

        m_Dataset = instances;
    }

    /**
     * Sets a specific value to be "missing". Performs a deep copy of the vector
     * of attribute values before the value is set to be missing.
     * 
     * @param attIndex the attribute's index
     */
    @Override
    public final void setMissing(int attIndex) {

        setValue(attIndex, Utils.missingValue());
    }

    /**
     * Sets a specific value to be "missing". Performs a deep copy of the vector
     * of attribute values before the value is set to be missing. The given
     * attribute has to belong to a dataset.
     * 
     * @param att the attribute
     */
    @Override
    public final void setMissing(Attribute att) {

        setMissing(att.index());
    }

    /**
     * Sets a value of a nominal or string attribute to the given value. Performs
     * a deep copy of the vector of attribute values before the value is set.
     * 
     * @param attIndex the attribute's index
     * @param value the new attribute value (If the attribute is a string
     *          attribute and the value can't be found, the value is added to the
     *          attribute).
     * @throws UnassignedDatasetException if the dataset is not set
     * @throws IllegalArgumentException if the selected attribute is not nominal
     *           or a string, or the supplied value couldn't be found for a
     *           nominal attribute
     */
    // @ requires m_Dataset != null;
    @Override
    public final void setValue(int attIndex, String value) {

        int valIndex;

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        if (!attribute(attIndex).isNominal() && !attribute(attIndex).isString()) {
            throw new IllegalArgumentException("Attribute neither nominal nor string!");
        }
        valIndex = attribute(attIndex).indexOfValue(value);
        if (valIndex == -1) {
            if (attribute(attIndex).isNominal()) {
                throw new IllegalArgumentException("Value not defined for given nominal attribute!");
            } else {
                attribute(attIndex).forceAddValue(value);
                valIndex = attribute(attIndex).indexOfValue(value);
            }
        }
        setValue(attIndex, valIndex);
    }

    /**
     * Sets a specific value in the instance to the given value (internal
     * floating-point format). Performs a deep copy of the vector of attribute
     * values before the value is set, so if you are planning on calling setValue
     * many times it may be faster to create a new instance using toDoubleArray.
     * The given attribute has to belong to a dataset.
     * 
     * @param att the attribute
     * @param value the new attribute value (If the corresponding attribute is
     *          nominal (or a string) then this is the new value's index as a
     *          double).
     */
    @Override
    public final void setValue(Attribute att, double value) {

        setValue(att.index(), value);
    }

    /**
     * Sets a value of an nominal or string attribute to the given value. Performs
     * a deep copy of the vector of attribute values before the value is set, so
     * if you are planning on calling setValue many times it may be faster to
     * create a new instance using toDoubleArray. The given attribute has to
     * belong to a dataset.
     * 
     * @param att the attribute
     * @param value the new attribute value (If the attribute is a string
     *          attribute and the value can't be found, the value is added to the
     *          attribute).
     * @throws IllegalArgumentException if the the attribute is not nominal or a
     *           string, or the value couldn't be found for a nominal attribute
     */
    @Override
    public final void setValue(Attribute att, String value) {

        if (!att.isNominal() && !att.isString()) {
            throw new IllegalArgumentException("Attribute neither nominal nor string!");
        }
        int valIndex = att.indexOfValue(value);
        if (valIndex == -1) {
            if (att.isNominal()) {
                throw new IllegalArgumentException("Value not defined for given nominal attribute!");
            } else {
                att.forceAddValue(value);
                valIndex = att.indexOfValue(value);
            }
        }
        setValue(att.index(), valIndex);
    }

    /**
     * Sets the weight of an instance.
     * 
     * @param weight the weight
     */
    @Override
    public final void setWeight(double weight) {

        m_Weight = weight;
    }

    /**
     * Returns the relational value of a relational attribute.
     * 
     * @param attIndex the attribute's index
     * @return the corresponding relation as an Instances object
     * @throws IllegalArgumentException if the attribute is not a relation-valued
     *           attribute
     * @throws UnassignedDatasetException if the instance doesn't belong to a
     *           dataset.
     */
    // @ requires m_Dataset != null;
    @Override
    public final/* @pure@ */Instances relationalValue(int attIndex) {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return relationalValue(m_Dataset.attribute(attIndex));
    }

    /**
     * Returns the relational value of a relational attribute.
     * 
     * @param att the attribute
     * @return the corresponding relation as an Instances object, null if missing
     * @throws IllegalArgumentException if the attribute is not a relation-valued
     *           attribute
     * @throws UnassignedDatasetException if the instance doesn't belong to a
     *           dataset.
     */
    @Override
    public final/* @pure@ */Instances relationalValue(Attribute att) {

        int attIndex = att.index();
        if (att.isRelationValued()) {
            if (isMissing(attIndex)) {
                return null;
            }
            return att.relation((int) value(attIndex));
        } else {
            throw new IllegalArgumentException("Attribute isn't relation-valued!");
        }
    }

    /**
     * Returns the value of a nominal, string, date, or relational attribute for
     * the instance as a string.
     * 
     * @param attIndex the attribute's index
     * @return the value as a string
     * @throws IllegalArgumentException if the attribute is not a nominal, string,
     *           date, or relation-valued attribute.
     * @throws UnassignedDatasetException if the instance doesn't belong to a
     *           dataset.
     */
    // @ requires m_Dataset != null;
    @Override
    public final/* @pure@ */String stringValue(int attIndex) {

        if (m_Dataset == null) {
            throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
        }
        return stringValue(m_Dataset.attribute(attIndex));
    }

    /**
     * Returns the value of a nominal, string, date, or relational attribute for
     * the instance as a string.
     * 
     * @param att the attribute
     * @return the value as a string
     * @throws IllegalArgumentException if the attribute is not a nominal, string,
     *           date, or relation-valued attribute.
     * @throws UnassignedDatasetException if the instance doesn't belong to a
     *           dataset.
     */
    @Override
    public final/* @pure@ */String stringValue(Attribute att) {

        int attIndex = att.index();
        if (isMissing(attIndex)) {
            return "?";
        }
        switch (att.type()) {
        case Attribute.NOMINAL:
        case Attribute.STRING:
            return att.value((int) value(attIndex));
        case Attribute.DATE:
            return att.formatDate(value(attIndex));
        case Attribute.RELATIONAL:
            return att.relation((int) value(attIndex)).stringWithoutHeader();
        default:
            throw new IllegalArgumentException("Attribute isn't nominal, string or date!");
        }
    }

    /**
     * Returns the description of one instance with any numeric values printed at
     * the supplied maximum number of decimal places. If the instance doesn't have
     * access to a dataset, it returns the internal floating-point values. Quotes
     * string values that contain whitespace characters.
     * 
     * @param afterDecimalPoint the maximum number of digits permitted after the
     *          decimal point for a numeric value
     * 
     * @return the instance's description as a string
     */
    @Override
    public final String toStringMaxDecimalDigits(int afterDecimalPoint) {
        StringBuffer text = new StringBuffer(toStringNoWeight(afterDecimalPoint));

        if (m_Weight != 1.0) {
            text.append(",{" + Utils.doubleToString(m_Weight, afterDecimalPoint) + "}");
        }

        return text.toString();
    }

    /**
     * Returns the description of one instance. If the instance doesn't have
     * access to a dataset, it returns the internal floating-point values. Quotes
     * string values that contain whitespace characters.
     * 
     * @return the instance's description as a string
     */
    @Override
    public String toString() {

        return toStringMaxDecimalDigits(s_numericAfterDecimalPoint);
    }

    /**
     * Returns the description of one value of the instance as a string. If the
     * instance doesn't have access to a dataset, it returns the internal
     * floating-point value. Quotes string values that contain whitespace
     * characters, or if they are a question mark.
     * 
     * @param attIndex the attribute's index
     * @return the value's description as a string
     */
    @Override
    public final String toString(int attIndex) {
        return toString(attIndex, s_numericAfterDecimalPoint);
    }

    /**
     * Returns the description of one value of the instance as a string. If the
     * instance doesn't have access to a dataset, it returns the internal
     * floating-point value. Quotes string values that contain whitespace
     * characters, or if they are a question mark.
     * 
     * @param attIndex the attribute's index
     * @param afterDecimalPoint the maximum number of digits permitted after the
     *          decimal point for numeric values
     * @return the value's description as a string
     */
    @Override
    public final/* @pure@ */String toString(int attIndex, int afterDecimalPoint) {

        StringBuffer text = new StringBuffer();

        if (isMissing(attIndex)) {
            text.append("?");
        } else {
            if (m_Dataset == null) {
                text.append(Utils.doubleToString(value(attIndex), afterDecimalPoint));
            } else {
                switch (m_Dataset.attribute(attIndex).type()) {
                case Attribute.NOMINAL:
                case Attribute.STRING:
                case Attribute.DATE:
                case Attribute.RELATIONAL:
                    text.append(Utils.quote(stringValue(attIndex)));
                    break;
                case Attribute.NUMERIC:
                    text.append(Utils.doubleToString(value(attIndex), afterDecimalPoint));
                    break;
                default:
                    throw new IllegalStateException("Unknown attribute type");
                }
            }
        }
        return text.toString();
    }

    /**
     * Returns the description of one value of the instance as a string. If the
     * instance doesn't have access to a dataset it returns the internal
     * floating-point value. Quotes string values that contain whitespace
     * characters, or if they are a question mark. The given attribute has to
     * belong to a dataset.
     * 
     * @param att the attribute
     * @return the value's description as a string
     */
    @Override
    public final String toString(Attribute att) {

        return toString(att.index());
    }

    /**
     * Returns the description of one value of the instance as a string. If the
     * instance doesn't have access to a dataset it returns the internal
     * floating-point value. Quotes string values that contain whitespace
     * characters, or if they are a question mark. The given attribute has to
     * belong to a dataset.
     * 
     * @param att the attribute
     * @param afterDecimalPoint the maximum number of decimal places to print
     * @return the value's description as a string
     */
    @Override
    public final String toString(Attribute att, int afterDecimalPoint) {

        return toString(att.index(), afterDecimalPoint);
    }

    /**
     * Returns an instance's attribute value in internal format. The given
     * attribute has to belong to a dataset.
     * 
     * @param att the attribute
     * @return the specified value as a double (If the corresponding attribute is
     *         nominal (or a string) then it returns the value's index as a
     *         double).
     */
    @Override
    public/* @pure@ */double value(Attribute att) {

        return value(att.index());
    }

    /**
     * Returns an instance's attribute value in internal format, given an index in
     * the sparse representation.
     * 
     * @param indexOfIndex the index of the attribute's index
     * @return the specified value as a double (If the corresponding attribute is
     *         nominal (or a string) then it returns the value's index as a
     *         double).
     */
    @Override
    public/* @pure@ */double valueSparse(int indexOfIndex) {

        return m_AttValues[indexOfIndex];
    }

    /**
     * Returns the instance's weight.
     * 
     * @return the instance's weight as a double
     */
    @Override
    public final/* @pure@ */double weight() {

        return m_Weight;
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Deletes an attribute at the given position (0 to numAttributes() - 1).
     * 
     * @param position the attribute's position
     */
    protected abstract void forceDeleteAttributeAt(int position);

    /**
     * Inserts an attribute at the given position (0 to numAttributes()) and sets
     * its value to be missing.
     * 
     * @param position the attribute's position
     */
    protected abstract void forceInsertAttributeAt(int position);
}