weka.core.InstanceComparator.java Source code

Java tutorial

Introduction

Here is the source code for weka.core.InstanceComparator.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * InstanceComparator.java
 * Copyright (C) 2005-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.Serializable;
import java.util.Collections;
import java.util.Comparator;

/**
 * A comparator for the Instance class. it can be used with or without the
 * class label. Missing values are sorted at the beginning.</br>
 * Can be used as comparator in the sorting and binary search algorithms of
 * <code>Arrays</code> and <code>Collections</code>.
 * Relational values are compared instance by instance with a nested 
 * InstanceComparator.
 *
 * @see     Instance
 * @author  FracPete (fracpete at cs dot waikato dot ac dot nz)
 * @version $Revision$
 * @see     java.util.Arrays
 * @see     java.util.Collections
 */
public class InstanceComparator implements Comparator<Instance>, Serializable, RevisionHandler {

    /** for serialization */
    private static final long serialVersionUID = -6589278678230949683L;

    /** whether to include the class in the comparison */
    protected boolean m_IncludeClass;

    /** the range of attributes to use for comparison. */
    protected Range m_Range;

    /**
     * Initializes the comparator and includes the class in the comparison
     * and all attributes included.
     */
    public InstanceComparator() {
        this(true);
    }

    /**
     * Initializes the comparator with all attributes included.
     * 
     * @param includeClass   whether to include the class in the comparison
     */
    public InstanceComparator(boolean includeClass) {
        this(includeClass, "first-last", false);
    }

    /**
     * Initializes the comparator.
     * 
     * @param includeClass   whether to include the class in the comparison
     * @param range      the attribute range string
     * @param invert      whether to invert the matching sense of the att range
     */
    public InstanceComparator(boolean includeClass, String range, boolean invert) {
        super();

        m_Range = new Range();

        setIncludeClass(includeClass);
        setRange(range);
        setInvert(invert);
    }

    /**
     * Sets whether the class should be included in the comparison.
     * 
     * @param includeClass   true if to include the class in the comparison 
     */
    public void setIncludeClass(boolean includeClass) {
        m_IncludeClass = includeClass;
    }

    /**
     * Returns whether the class is included in the comparison.
     * 
     * @return         true if the class is included
     */
    public boolean getIncludeClass() {
        return m_IncludeClass;
    }

    /**
     * Sets the attribute range to use for comparison.
     * 
     * @param value      the attribute range
     */
    public void setRange(String value) {
        m_Range.setRanges(value);
    }

    /**
     * Returns the attribute range to use in the comparison.
     * 
     * @return         the attribute range
     */
    public String getRange() {
        return m_Range.getRanges();
    }

    /**
     * Sets whether to invert the matching sense of the attribute range.
     * 
     * @param invert      true if to invert the matching sense
     */
    public void setInvert(boolean invert) {
        m_Range.setInvert(invert);
    }

    /**
     * Returns whether the matching sense of the attribute range is inverted.
     * 
     * @return         true if the matching sense is inverted
     */
    public boolean getInvert() {
        return m_Range.getInvert();
    }

    /**
     * compares the two instances, returns -1 if o1 is smaller than o2, 0
     * if equal and +1 if greater. The method assumes that both instance objects
     * have the same attributes, they don't have to belong to the same dataset.
     * 
     * @param inst1   the first instance to compare
     * @param inst2   the second instance to compare
     * @return      returns -1 if inst1 is smaller than inst2, 0 if equal and +1 
     *          if greater
     */
    public int compare(Instance inst1, Instance inst2) {
        int result;
        int classindex;
        int i;
        Instances data1;
        Instances data2;
        int n;
        InstanceComparator comp;

        m_Range.setUpper(inst1.numAttributes() - 1);

        // get class index
        if (inst1.classIndex() == -1)
            classindex = inst1.numAttributes() - 1;
        else
            classindex = inst1.classIndex();

        result = 0;
        for (i = 0; i < inst1.numAttributes(); i++) {
            // in selected range?
            if (!m_Range.isInRange(i))
                continue;

            // exclude class?
            if (!getIncludeClass() && (i == classindex))
                continue;

            // comparing attribute values
            // 1. special handling if missing value (NaN) is involved:
            if (inst1.isMissing(i) || inst2.isMissing(i)) {
                if (inst1.isMissing(i) && inst2.isMissing(i)) {
                    continue;
                } else {
                    if (inst1.isMissing(i))
                        result = -1;
                    else
                        result = 1;
                    break;
                }
            }
            // 2. regular values:
            else {
                switch (inst1.attribute(i).type()) {
                case Attribute.STRING:
                    result = inst1.stringValue(i).compareTo(inst2.stringValue(i));
                    break;
                case Attribute.RELATIONAL:
                    data1 = inst1.relationalValue(i);
                    data2 = inst2.relationalValue(i);
                    n = 0;
                    comp = new InstanceComparator();
                    while ((n < data1.numInstances()) && (n < data2.numInstances()) && (result == 0)) {
                        result = comp.compare(data1.instance(n), data2.instance(n));
                        n++;
                    }
                    break;
                default:
                    if (inst1.value(i) == inst2.value(i)) {
                        continue;
                    } else {
                        if (inst1.value(i) < inst2.value(i))
                            result = -1;
                        else
                            result = 1;
                        break;
                    }
                }
            }
            if (result != 0)
                break;
        }

        return result;
    }

    /**
     * Returns the revision string.
     * 
     * @return      the revision
     */
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * for testing only. takes an ARFF-filename as first argument to perform
     * some tests. 
     */
    public static void main(String[] args) throws Exception {
        Instances inst;
        Comparator<Instance> comp;

        if (args.length == 0)
            return;

        // read instances
        inst = new Instances(new BufferedReader(new FileReader(args[0])));
        inst.setClassIndex(inst.numAttributes() - 1);

        // compare incl. class
        comp = new InstanceComparator();
        System.out.println("\nIncluding the class");
        System.out.println("comparing 1. instance with 1.: " + comp.compare(inst.instance(0), inst.instance(0)));
        System.out.println("comparing 1. instance with 2.: " + comp.compare(inst.instance(0), inst.instance(1)));
        System.out.println("comparing 2. instance with 1.: " + comp.compare(inst.instance(1), inst.instance(0)));

        // compare excl. class
        comp = new InstanceComparator(false);
        System.out.println("\nExcluding the class");
        System.out.println("comparing 1. instance with 1.: " + comp.compare(inst.instance(0), inst.instance(0)));
        System.out.println("comparing 1. instance with 2.: " + comp.compare(inst.instance(0), inst.instance(1)));
        System.out.println("comparing 2. instance with 1.: " + comp.compare(inst.instance(1), inst.instance(0)));

        // sort the data on all attributes
        Instances tmp = new Instances(inst);
        Collections.sort(tmp, new InstanceComparator(false));
        System.out.println("\nSorted on all attributes");
        System.out.println(tmp);

        // sort the data on 2nd attribute
        tmp = new Instances(inst);
        Collections.sort(tmp, new InstanceComparator(false, "2", false));
        System.out.println("\nSorted on 2nd attribute");
        System.out.println(tmp);
    }
}