moa.classifiers.meta.OzaBagASHT.java Source code

Introduction

Here is the source code for moa.classifiers.meta.OzaBagASHT.java
Source

/*
 *    OzaBagASHT.java
 *    Copyright (C) 2008 University of Waikato, Hamilton, New Zealand
 *    @author Albert Bifet (abifet at cs dot waikato dot ac dot nz)
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program. If not, see <http://www.gnu.org/licenses/>.
 *    
 */
package moa.classifiers.meta;

import moa.classifiers.Classifier;
import moa.classifiers.trees.ASHoeffdingTree;
import moa.core.DoubleVector;
import moa.core.MiscUtils;
import moa.options.IntOption;
import moa.options.FlagOption;
import weka.core.Instance;
import weka.core.Utils;

/**
 * Bagging using trees of different size.
 * The Adaptive-Size Hoeffding Tree (ASHT) is derived from the Hoeffding Tree
 * algorithm with the following differences:
 * <ul>
 * <li> it has a maximum number of split nodes, or size
 * <li> after one node splits, if the number of split nodes of the ASHT tree
 * is higher than the maximum value, then it deletes some nodes to reduce its size
 * </ul>
 * The intuition behind this method is as follows: smaller trees adapt
 * more quickly to changes, and larger trees do better during periods with
 * no or little change, simply because they were built on more data. Trees
 * limited to size s will be reset about twice as often as trees with a size
 * limit of 2s. This creates a set of different reset-speeds for an ensemble of such
 * trees, and therefore a subset of trees that are a good approximation for the
 * current rate of change. It is important to note that resets will happen all
 * the time, even for stationary datasets, but this behaviour should not have
 * a negative impact on the ensembles predictive performance.
 * When the tree size exceeds the maximun size value, there are two different
 * delete options: <ul>
 * <li> delete the oldest node, the root, and all of its children except the one
 * where the split has been made. After that, the root of the child not
 * deleted becomes the new root
 * <li> delete all the nodes of the tree, i.e., restart from a new root.
 * </ul>
 * The maximum allowed size for the n-th ASHT tree is twice the maximum
 * allowed size for the (n-1)-th tree. Moreover, each tree has a weight
 * proportional to the inverse of the square of its error, and it monitors its
 * error with an exponential weighted moving average (EWMA) with alpha = .01.
 * The size of the first tree is 2.
 * <br/><br/>
 * With this new method, it is attempted to improve bagging performance
 * by increasing tree diversity. It has been observed that boosting tends to
 * produce a more diverse set of classifiers than bagging, and this has been
 * cited as a factor in increased performance.<br/>
 * See more details in:<br/><br/>
 * Albert Bifet, Geoff Holmes, Bernhard Pfahringer, Richard Kirkby,
 * and Ricard Gavald. New ensemble methods for evolving data
 * streams. In 15th ACM SIGKDD International Conference on Knowledge
 * Discovery and Data Mining, 2009.<br/><br/>
 * The learner must be ASHoeffdingTree, a Hoeffding Tree with a maximum
 * size value.<br/><br/>
 * Example:<br/><br/>
 * <code>OzaBagASHT -l ASHoeffdingTree -s 10 -u -r </code>
 * Parameters:<ul>
 * <li>Same parameters as <code>OzaBag</code>
 * <li>-f : the size of first classifier in the bag.
 * <li>-u : Enable weight classifiers
 * <li>-e : Reset trees when size is higher than the max
 * </ul>
 *
 * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz)
 * @version $Revision: 7 $
 */
public class OzaBagASHT extends OzaBag {

    private static final long serialVersionUID = 1L;

    @Override
    public String getPurposeString() {
        return "Bagging using trees of different size.";
    }

    public IntOption firstClassifierSizeOption = new IntOption("firstClassifierSize", 'f',
            "The size of first classifier in the bag.", 1, 1, Integer.MAX_VALUE);

    public FlagOption useWeightOption = new FlagOption("useWeight", 'u', "Enable weight classifiers.");

    public FlagOption resetTreesOption = new FlagOption("resetTrees", 'e',
            "Reset trees when size is higher than the max.");

    protected double[] error;

    protected double alpha = 0.01;

    @Override
    public void resetLearningImpl() {
        this.ensemble = new Classifier[this.ensembleSizeOption.getValue()];
        this.error = new double[this.ensembleSizeOption.getValue()];
        Classifier baseLearner = (Classifier) getPreparedClassOption(this.baseLearnerOption);
        baseLearner.resetLearning();
        int pow = this.firstClassifierSizeOption.getValue(); //EXTENSION TO ASHT
        for (int i = 0; i < this.ensemble.length; i++) {
            this.ensemble[i] = baseLearner.copy();
            this.error[i] = 0.0;
            ((ASHoeffdingTree) this.ensemble[i]).setMaxSize(pow); //EXTENSION TO ASHT
            if ((this.resetTreesOption != null) && this.resetTreesOption.isSet()) {
                ((ASHoeffdingTree) this.ensemble[i]).setResetTree();
            }
            pow *= 2; //EXTENSION TO ASHT
        }
    }

    @Override
    public void trainOnInstanceImpl(Instance inst) {
        int trueClass = (int) inst.classValue();
        for (int i = 0; i < this.ensemble.length; i++) {
            int k = MiscUtils.poisson(1.0, this.classifierRandom);
            if (k > 0) {
                Instance weightedInst = (Instance) inst.copy();
                weightedInst.setWeight(inst.weight() * k);
                if (Utils.maxIndex(this.ensemble[i].getVotesForInstance(inst)) == trueClass) {
                    this.error[i] += alpha * (0.0 - this.error[i]); //EWMA
                } else {
                    this.error[i] += alpha * (1.0 - this.error[i]); //EWMA
                }
                this.ensemble[i].trainOnInstance(weightedInst);
            }
        }
    }

    public double[] getVotesForInstance(Instance inst) {
        DoubleVector combinedVote = new DoubleVector();
        for (int i = 0; i < this.ensemble.length; i++) {
            DoubleVector vote = new DoubleVector(this.ensemble[i].getVotesForInstance(inst));
            if (vote.sumOfValues() > 0.0) {
                vote.normalize();
                if ((this.useWeightOption != null) && this.useWeightOption.isSet()) {
                    vote.scaleValues(1.0 / (this.error[i] * this.error[i]));
                }
                combinedVote.addValues(vote);
            }
        }
        return combinedVote.getArrayRef();
    }

    @Override
    public void getModelDescription(StringBuilder out, int indent) {
        // TODO Auto-generated method stub
    }
}