main.NaiveBayes.java Source code

Java tutorial

Introduction

Here is the source code for main.NaiveBayes.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    NaiveBayes.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package main;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.AbstractClassifier;
import weka.core.Aggregateable;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.estimators.DiscreteEstimator;
import weka.estimators.Estimator;
import weka.estimators.KernelEstimator;
import weka.estimators.NormalEstimator;

/**
 * <!-- globalinfo-start --> Class for a Naive Bayes classifier using estimator
 * classes. Numeric estimator precision values are chosen based on analysis of
 * the training data. For this reason, the classifier is not an
 * UpdateableClassifier (which in typical usage are initialized with zero
 * training instances) -- if you need the UpdateableClassifier functionality,
 * use the NaiveBayesUpdateable classifier. The NaiveBayesUpdateable classifier
 * will use a default precision of 0.1 for numeric attributes when
 * buildClassifier is called with zero training instances.<br/>
 * <br/>
 * For more information on Naive Bayes classifiers, see<br/>
 * <br/>
 * George H. John, Pat Langley: Estimating Continuous Distributions in Bayesian
 * Classifiers. In: Eleventh Conference on Uncertainty in Artificial
 * Intelligence, San Mateo, 338-345, 1995.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;inproceedings{John1995,
 *    address = {San Mateo},
 *    author = {George H. John and Pat Langley},
 *    booktitle = {Eleventh Conference on Uncertainty in Artificial Intelligence},
 *    pages = {338-345},
 *    publisher = {Morgan Kaufmann},
 *    title = {Estimating Continuous Distributions in Bayesian Classifiers},
 *    year = {1995}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -K
 *  Use kernel density estimator rather than normal
 *  distribution for numeric attributes
 * </pre>
 * 
 * <pre>
 * -D
 *  Use supervised discretization to process numeric attributes
 * </pre>
 * 
 * <pre>
 * -O
 *  Display model in old format (good when there are many classes)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @version $Revision: 11741 $
 */
public class NaiveBayes extends AbstractClassifier
        implements OptionHandler, WeightedInstancesHandler, TechnicalInformationHandler, Aggregateable<NaiveBayes> {

    /** for serialization */
    static final long serialVersionUID = 5995231201785697655L;

    /** The attribute estimators. */
    protected Estimator[][] m_Distributions;

    /** The class estimator. */
    protected Estimator m_ClassDistribution;

    /**
     * Whether to use kernel density estimator rather than normal distribution for
     * numeric attributes
     */
    protected boolean m_UseKernelEstimator = false;

    /**
     * Whether to use discretization than normal distribution for numeric
     * attributes
     */
    protected boolean m_UseDiscretization = false;

    /** The number of classes (or 1 for numeric class) */
    protected int m_NumClasses;

    /**
     * The dataset header for the purposes of printing out a semi-intelligible
     * model
     */
    protected Instances m_Instances;

    /*** The precision parameter used for numeric attributes */
    protected static final double DEFAULT_NUM_PRECISION = 0.01;

    /**
     * The discretization filter.
     */
    protected weka.filters.supervised.attribute.Discretize m_Disc = null;

    protected boolean m_displayModelInOldFormat = false;

    /**
     * Returns a string describing this classifier
     * 
     * @return a description of the classifier suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {
        return "Class for a Naive Bayes classifier using estimator classes. Numeric"
                + " estimator precision values are chosen based on analysis of the "
                + " training data. For this reason, the classifier is not an"
                + " UpdateableClassifier (which in typical usage are initialized with zero"
                + " training instances) -- if you need the UpdateableClassifier functionality,"
                + " use the NaiveBayesUpdateable classifier. The NaiveBayesUpdateable"
                + " classifier will  use a default precision of 0.1 for numeric attributes"
                + " when buildClassifier is called with zero training instances.\n\n"
                + "For more information on Naive Bayes classifiers, see\n\n" + getTechnicalInformation().toString();
    }

    /**
     * Returns an instance of a TechnicalInformation object, containing detailed
     * information about the technical background of this class, e.g., paper
     * reference or book this class is based on.
     * 
     * @return the technical information about this class
     */
    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result;

        result = new TechnicalInformation(Type.INPROCEEDINGS);
        result.setValue(Field.AUTHOR, "George H. John and Pat Langley");
        result.setValue(Field.TITLE, "Estimating Continuous Distributions in Bayesian Classifiers");
        result.setValue(Field.BOOKTITLE, "Eleventh Conference on Uncertainty in Artificial Intelligence");
        result.setValue(Field.YEAR, "1995");
        result.setValue(Field.PAGES, "338-345");
        result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
        result.setValue(Field.ADDRESS, "San Mateo");

        return result;
    }

    /**
     * Returns default capabilities of the classifier.
     * 
     * @return the capabilities of this classifier
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();

        // attributes
        result.enable(Capability.NOMINAL_ATTRIBUTES);
        result.enable(Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capability.MISSING_VALUES);

        // class
        result.enable(Capability.NOMINAL_CLASS);
        result.enable(Capability.MISSING_CLASS_VALUES);

        // instances
        result.setMinimumNumberInstances(0);

        return result;
    }

    /**
     * Generates the classifier.
     * 
     * @param instances set of instances serving as training data
     * @exception Exception if the classifier has not been generated successfully
     */
    @Override
    public void buildClassifier(Instances instances) throws Exception {

        // can classifier handle the data?
        getCapabilities().testWithFail(instances);

        // remove instances with missing class
        instances = new Instances(instances);
        instances.deleteWithMissingClass();

        m_NumClasses = instances.numClasses();

        // Copy the instances
        m_Instances = new Instances(instances);

        // Discretize instances if required
        if (m_UseDiscretization) {
            m_Disc = new weka.filters.supervised.attribute.Discretize();
            m_Disc.setInputFormat(m_Instances);
            m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
        } else {
            m_Disc = null;
        }

        // Reserve space for the distributions
        m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
        m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
        int attIndex = 0;
        Enumeration<Attribute> enu = m_Instances.enumerateAttributes();
        while (enu.hasMoreElements()) {
            Attribute attribute = enu.nextElement();

            // If the attribute is numeric, determine the estimator
            // numeric precision from differences between adjacent values
            double numPrecision = DEFAULT_NUM_PRECISION;
            if (attribute.type() == Attribute.NUMERIC) {
                m_Instances.sort(attribute);
                if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                    double lastVal = m_Instances.instance(0).value(attribute);
                    double currentVal, deltaSum = 0;
                    int distinct = 0;
                    for (int i = 1; i < m_Instances.numInstances(); i++) {
                        Instance currentInst = m_Instances.instance(i);
                        if (currentInst.isMissing(attribute)) {
                            break;
                        }
                        currentVal = currentInst.value(attribute);
                        if (currentVal != lastVal) {
                            deltaSum += currentVal - lastVal;
                            lastVal = currentVal;
                            distinct++;
                        }
                    }
                    if (distinct > 0) {
                        numPrecision = deltaSum / distinct;
                    }
                }
            }

            for (int j = 0; j < m_Instances.numClasses(); j++) {
                switch (attribute.type()) {
                case Attribute.NUMERIC:
                    if (m_UseKernelEstimator) {
                        m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                    } else {
                        m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                    }
                    break;
                case Attribute.NOMINAL:
                    m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                    break;
                default:
                    throw new Exception("Attribute type unknown to NaiveBayes");
                }
            }
            attIndex++;
        }

        // Compute counts
        Enumeration<Instance> enumInsts = m_Instances.enumerateInstances();
        while (enumInsts.hasMoreElements()) {
            Instance instance = enumInsts.nextElement();
            updateClassifier(instance);
        }

        // Save space
        m_Instances = new Instances(m_Instances, 0);
    }

    /**
     * Updates the classifier with the given instance.
     * 
     * @param instance the new training instance to include in the model
     * @exception Exception if the instance could not be incorporated in the
     *              model.
     */
    public void updateClassifier(Instance instance) throws Exception {

        if (!instance.classIsMissing()) {
            Enumeration<Attribute> enumAtts = m_Instances.enumerateAttributes();
            int attIndex = 0;
            while (enumAtts.hasMoreElements()) {
                Attribute attribute = enumAtts.nextElement();
                if (!instance.isMissing(attribute)) {
                    m_Distributions[attIndex][(int) instance.classValue()].addValue(instance.value(attribute),
                            instance.weight());
                }
                attIndex++;
            }
            m_ClassDistribution.addValue(instance.classValue(), instance.weight());
        }
    }

    /**
     * Calculates the class membership probabilities for the given test instance.
     * 
     * @param instance the instance to be classified
     * @return predicted class probability distribution
     * @exception Exception if there is a problem generating the prediction
     */
    @Override
    public double[] distributionForInstance(Instance instance) throws Exception {

        if (m_UseDiscretization) {
            m_Disc.input(instance);
            instance = m_Disc.output();
        }
        double[] probs = new double[m_NumClasses];
        for (int j = 0; j < m_NumClasses; j++) {
            probs[j] = m_ClassDistribution.getProbability(j);
        }
        Enumeration<Attribute> enumAtts = instance.enumerateAttributes();
        int attIndex = 0;
        while (enumAtts.hasMoreElements()) {
            Attribute attribute = enumAtts.nextElement();
            if (!instance.isMissing(attribute)) {
                double temp, max = 0;
                for (int j = 0; j < m_NumClasses; j++) {
                    temp = Math.max(1e-75,
                            Math.pow(m_Distributions[attIndex][j].getProbability(instance.value(attribute)),
                                    m_Instances.attribute(attIndex).weight()));
                    probs[j] *= temp;
                    if (probs[j] > max) {
                        max = probs[j];
                    }
                    if (Double.isNaN(probs[j])) {
                        throw new Exception("NaN returned from estimator for attribute " + attribute.name() + ":\n"
                                + m_Distributions[attIndex][j].toString());
                    }
                }
                if ((max > 0) && (max < 1e-75)) { // Danger of probability underflow
                    for (int j = 0; j < m_NumClasses; j++) {
                        probs[j] *= 1e75;
                    }
                }
            }
            attIndex++;
        }

        // Display probabilities
        Utils.normalize(probs);
        return probs;
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {

        Vector<Option> newVector = new Vector<Option>(3);

        newVector.addElement(new Option(
                "\tUse kernel density estimator rather than normal\n" + "\tdistribution for numeric attributes",
                "K", 0, "-K"));
        newVector.addElement(
                new Option("\tUse supervised discretization to process numeric attributes\n", "D", 0, "-D"));

        newVector.addElement(new Option("\tDisplay model in old format (good when there are " + "many classes)\n",
                "O", 0, "-O"));

        newVector.addAll(Collections.list(super.listOptions()));

        return newVector.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -K
     *  Use kernel density estimator rather than normal
     *  distribution for numeric attributes
     * </pre>
     * 
     * <pre>
     * -D
     *  Use supervised discretization to process numeric attributes
     * </pre>
     * 
     * <pre>
     * -O
     *  Display model in old format (good when there are many classes)
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @exception Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {

        super.setOptions(options);
        boolean k = Utils.getFlag('K', options);
        boolean d = Utils.getFlag('D', options);
        if (k && d) {
            throw new IllegalArgumentException("Can't use both kernel density " + "estimation and discretization!");
        }
        setUseSupervisedDiscretization(d);
        setUseKernelEstimator(k);
        setDisplayModelInOldFormat(Utils.getFlag('O', options));
        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of the classifier.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {

        Vector<String> options = new Vector<String>();

        Collections.addAll(options, super.getOptions());

        if (m_UseKernelEstimator) {
            options.add("-K");
        }

        if (m_UseDiscretization) {
            options.add("-D");
        }

        if (m_displayModelInOldFormat) {
            options.add("-O");
        }

        return options.toArray(new String[0]);
    }

    /**
     * Returns a description of the classifier.
     * 
     * @return a description of the classifier as a string.
     */
    @Override
    public String toString() {
        if (m_displayModelInOldFormat) {
            return toStringOriginal();
        }

        StringBuffer temp = new StringBuffer();
        temp.append("Naive Bayes Classifier");
        if (m_Instances == null) {
            temp.append(": No model built yet.");
        } else {

            int maxWidth = 0;
            int maxAttWidth = 0;
            boolean containsKernel = false;

            // set up max widths
            // class values
            for (int i = 0; i < m_Instances.numClasses(); i++) {
                if (m_Instances.classAttribute().value(i).length() > maxWidth) {
                    maxWidth = m_Instances.classAttribute().value(i).length();
                }
            }
            // attributes
            for (int i = 0; i < m_Instances.numAttributes(); i++) {
                if (i != m_Instances.classIndex()) {
                    Attribute a = m_Instances.attribute(i);
                    if (a.name().length() > maxAttWidth) {
                        maxAttWidth = m_Instances.attribute(i).name().length();
                    }
                    if (a.isNominal()) {
                        // check values
                        for (int j = 0; j < a.numValues(); j++) {
                            String val = a.value(j) + "  ";
                            if (val.length() > maxAttWidth) {
                                maxAttWidth = val.length();
                            }
                        }
                    }
                }
            }

            for (Estimator[] m_Distribution : m_Distributions) {
                for (int j = 0; j < m_Instances.numClasses(); j++) {
                    if (m_Distribution[0] instanceof NormalEstimator) {
                        // check mean/precision dev against maxWidth
                        NormalEstimator n = (NormalEstimator) m_Distribution[j];
                        double mean = Math.log(Math.abs(n.getMean())) / Math.log(10.0);
                        double precision = Math.log(Math.abs(n.getPrecision())) / Math.log(10.0);
                        double width = (mean > precision) ? mean : precision;
                        if (width < 0) {
                            width = 1;
                        }
                        // decimal + # decimal places + 1
                        width += 6.0;
                        if ((int) width > maxWidth) {
                            maxWidth = (int) width;
                        }
                    } else if (m_Distribution[0] instanceof KernelEstimator) {
                        containsKernel = true;
                        KernelEstimator ke = (KernelEstimator) m_Distribution[j];
                        int numK = ke.getNumKernels();
                        String temps = "K" + numK + ": mean (weight)";
                        if (maxAttWidth < temps.length()) {
                            maxAttWidth = temps.length();
                        }
                        // check means + weights against maxWidth
                        if (ke.getNumKernels() > 0) {
                            double[] means = ke.getMeans();
                            double[] weights = ke.getWeights();
                            for (int k = 0; k < ke.getNumKernels(); k++) {
                                String m = Utils.doubleToString(means[k], maxWidth, 4).trim();
                                m += " (" + Utils.doubleToString(weights[k], maxWidth, 1).trim() + ")";
                                if (maxWidth < m.length()) {
                                    maxWidth = m.length();
                                }
                            }
                        }
                    } else if (m_Distribution[0] instanceof DiscreteEstimator) {
                        DiscreteEstimator d = (DiscreteEstimator) m_Distribution[j];
                        for (int k = 0; k < d.getNumSymbols(); k++) {
                            String size = "" + d.getCount(k);
                            if (size.length() > maxWidth) {
                                maxWidth = size.length();
                            }
                        }
                        int sum = ("" + d.getSumOfCounts()).length();
                        if (sum > maxWidth) {
                            maxWidth = sum;
                        }
                    }
                }
            }

            // Check width of class labels
            for (int i = 0; i < m_Instances.numClasses(); i++) {
                String cSize = m_Instances.classAttribute().value(i);
                if (cSize.length() > maxWidth) {
                    maxWidth = cSize.length();
                }
            }

            // Check width of class priors
            for (int i = 0; i < m_Instances.numClasses(); i++) {
                String priorP = Utils
                        .doubleToString(((DiscreteEstimator) m_ClassDistribution).getProbability(i), maxWidth, 2)
                        .trim();
                priorP = "(" + priorP + ")";
                if (priorP.length() > maxWidth) {
                    maxWidth = priorP.length();
                }
            }

            if (maxAttWidth < "Attribute".length()) {
                maxAttWidth = "Attribute".length();
            }

            if (maxAttWidth < "  weight sum".length()) {
                maxAttWidth = "  weight sum".length();
            }

            if (containsKernel) {
                if (maxAttWidth < "  [precision]".length()) {
                    maxAttWidth = "  [precision]".length();
                }
            }

            maxAttWidth += 2;

            temp.append("\n\n");
            temp.append(pad("Class", " ", (maxAttWidth + maxWidth + 1) - "Class".length(), true));

            temp.append("\n");
            temp.append(pad("Attribute", " ", maxAttWidth - "Attribute".length(), false));
            // class labels
            for (int i = 0; i < m_Instances.numClasses(); i++) {
                String classL = m_Instances.classAttribute().value(i);
                temp.append(pad(classL, " ", maxWidth + 1 - classL.length(), true));
            }
            temp.append("\n");
            // class priors
            temp.append(pad("", " ", maxAttWidth, true));
            for (int i = 0; i < m_Instances.numClasses(); i++) {
                String priorP = Utils
                        .doubleToString(((DiscreteEstimator) m_ClassDistribution).getProbability(i), maxWidth, 2)
                        .trim();
                priorP = "(" + priorP + ")";
                temp.append(pad(priorP, " ", maxWidth + 1 - priorP.length(), true));
            }
            temp.append("\n");
            temp.append(pad("", "=",
                    maxAttWidth + (maxWidth * m_Instances.numClasses()) + m_Instances.numClasses() + 1, true));
            temp.append("\n");

            // loop over the attributes
            int counter = 0;
            for (int i = 0; i < m_Instances.numAttributes(); i++) {
                if (i == m_Instances.classIndex()) {
                    continue;
                }
                String attName = m_Instances.attribute(i).name();
                temp.append(attName + "\n");

                if (m_Distributions[counter][0] instanceof NormalEstimator) {
                    String meanL = "  mean";
                    temp.append(pad(meanL, " ", maxAttWidth + 1 - meanL.length(), false));
                    for (int j = 0; j < m_Instances.numClasses(); j++) {
                        // means
                        NormalEstimator n = (NormalEstimator) m_Distributions[counter][j];
                        String mean = Utils.doubleToString(n.getMean(), maxWidth, 4).trim();
                        temp.append(pad(mean, " ", maxWidth + 1 - mean.length(), true));
                    }
                    temp.append("\n");
                    // now do std deviations
                    String stdDevL = "  std. dev.";
                    temp.append(pad(stdDevL, " ", maxAttWidth + 1 - stdDevL.length(), false));
                    for (int j = 0; j < m_Instances.numClasses(); j++) {
                        NormalEstimator n = (NormalEstimator) m_Distributions[counter][j];
                        String stdDev = Utils.doubleToString(n.getStdDev(), maxWidth, 4).trim();
                        temp.append(pad(stdDev, " ", maxWidth + 1 - stdDev.length(), true));
                    }
                    temp.append("\n");
                    // now the weight sums
                    String weightL = "  weight sum";
                    temp.append(pad(weightL, " ", maxAttWidth + 1 - weightL.length(), false));
                    for (int j = 0; j < m_Instances.numClasses(); j++) {
                        NormalEstimator n = (NormalEstimator) m_Distributions[counter][j];
                        String weight = Utils.doubleToString(n.getSumOfWeights(), maxWidth, 4).trim();
                        temp.append(pad(weight, " ", maxWidth + 1 - weight.length(), true));
                    }
                    temp.append("\n");
                    // now the precisions
                    String precisionL = "  precision";
                    temp.append(pad(precisionL, " ", maxAttWidth + 1 - precisionL.length(), false));
                    for (int j = 0; j < m_Instances.numClasses(); j++) {
                        NormalEstimator n = (NormalEstimator) m_Distributions[counter][j];
                        String precision = Utils.doubleToString(n.getPrecision(), maxWidth, 4).trim();
                        temp.append(pad(precision, " ", maxWidth + 1 - precision.length(), true));
                    }
                    temp.append("\n\n");

                } else if (m_Distributions[counter][0] instanceof DiscreteEstimator) {
                    Attribute a = m_Instances.attribute(i);
                    for (int j = 0; j < a.numValues(); j++) {
                        String val = "  " + a.value(j);
                        temp.append(pad(val, " ", maxAttWidth + 1 - val.length(), false));
                        for (int k = 0; k < m_Instances.numClasses(); k++) {
                            DiscreteEstimator d = (DiscreteEstimator) m_Distributions[counter][k];
                            String count = "" + d.getCount(j);
                            temp.append(pad(count, " ", maxWidth + 1 - count.length(), true));
                        }
                        temp.append("\n");
                    }
                    // do the totals
                    String total = "  [total]";
                    temp.append(pad(total, " ", maxAttWidth + 1 - total.length(), false));
                    for (int k = 0; k < m_Instances.numClasses(); k++) {
                        DiscreteEstimator d = (DiscreteEstimator) m_Distributions[counter][k];
                        String count = "" + d.getSumOfCounts();
                        temp.append(pad(count, " ", maxWidth + 1 - count.length(), true));
                    }
                    temp.append("\n\n");
                } else if (m_Distributions[counter][0] instanceof KernelEstimator) {
                    String kL = "  [# kernels]";
                    temp.append(pad(kL, " ", maxAttWidth + 1 - kL.length(), false));
                    for (int k = 0; k < m_Instances.numClasses(); k++) {
                        KernelEstimator ke = (KernelEstimator) m_Distributions[counter][k];
                        String nk = "" + ke.getNumKernels();
                        temp.append(pad(nk, " ", maxWidth + 1 - nk.length(), true));
                    }
                    temp.append("\n");
                    // do num kernels, std. devs and precisions
                    String stdDevL = "  [std. dev]";
                    temp.append(pad(stdDevL, " ", maxAttWidth + 1 - stdDevL.length(), false));
                    for (int k = 0; k < m_Instances.numClasses(); k++) {
                        KernelEstimator ke = (KernelEstimator) m_Distributions[counter][k];
                        String stdD = Utils.doubleToString(ke.getStdDev(), maxWidth, 4).trim();
                        temp.append(pad(stdD, " ", maxWidth + 1 - stdD.length(), true));
                    }
                    temp.append("\n");
                    String precL = "  [precision]";
                    temp.append(pad(precL, " ", maxAttWidth + 1 - precL.length(), false));
                    for (int k = 0; k < m_Instances.numClasses(); k++) {
                        KernelEstimator ke = (KernelEstimator) m_Distributions[counter][k];
                        String prec = Utils.doubleToString(ke.getPrecision(), maxWidth, 4).trim();
                        temp.append(pad(prec, " ", maxWidth + 1 - prec.length(), true));
                    }
                    temp.append("\n");
                    // first determine max number of kernels accross the classes
                    int maxK = 0;
                    for (int k = 0; k < m_Instances.numClasses(); k++) {
                        KernelEstimator ke = (KernelEstimator) m_Distributions[counter][k];
                        if (ke.getNumKernels() > maxK) {
                            maxK = ke.getNumKernels();
                        }
                    }
                    for (int j = 0; j < maxK; j++) {
                        // means first
                        String meanL = "  K" + (j + 1) + ": mean (weight)";
                        temp.append(pad(meanL, " ", maxAttWidth + 1 - meanL.length(), false));
                        for (int k = 0; k < m_Instances.numClasses(); k++) {
                            KernelEstimator ke = (KernelEstimator) m_Distributions[counter][k];
                            double[] means = ke.getMeans();
                            double[] weights = ke.getWeights();
                            String m = "--";
                            if (ke.getNumKernels() == 0) {
                                m = "" + 0;
                            } else if (j < ke.getNumKernels()) {
                                m = Utils.doubleToString(means[j], maxWidth, 4).trim();
                                m += " (" + Utils.doubleToString(weights[j], maxWidth, 1).trim() + ")";
                            }
                            temp.append(pad(m, " ", maxWidth + 1 - m.length(), true));
                        }
                        temp.append("\n");
                    }
                    temp.append("\n");
                }

                counter++;
            }
        }

        return temp.toString();
    }

    /**
     * Returns a description of the classifier in the old format.
     * 
     * @return a description of the classifier as a string.
     */
    protected String toStringOriginal() {

        StringBuffer text = new StringBuffer();

        text.append("Naive Bayes Classifier");
        if (m_Instances == null) {
            text.append(": No model built yet.");
        } else {
            try {
                for (int i = 0; i < m_Distributions[0].length; i++) {
                    text.append("\n\nClass " + m_Instances.classAttribute().value(i) + ": Prior probability = "
                            + Utils.doubleToString(m_ClassDistribution.getProbability(i), 4, 2) + "\n\n");
                    Enumeration<Attribute> enumAtts = m_Instances.enumerateAttributes();
                    int attIndex = 0;
                    while (enumAtts.hasMoreElements()) {
                        Attribute attribute = enumAtts.nextElement();
                        if (attribute.weight() > 0) {
                            text.append(attribute.name() + ":  " + m_Distributions[attIndex][i]);
                        }
                        attIndex++;
                    }
                }
            } catch (Exception ex) {
                text.append(ex.getMessage());
            }
        }

        return text.toString();
    }

    private String pad(String source, String padChar, int length, boolean leftPad) {
        StringBuffer temp = new StringBuffer();

        if (leftPad) {
            for (int i = 0; i < length; i++) {
                temp.append(padChar);
            }
            temp.append(source);
        } else {
            temp.append(source);
            for (int i = 0; i < length; i++) {
                temp.append(padChar);
            }
        }
        return temp.toString();
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String useKernelEstimatorTipText() {
        return "Use a kernel estimator for numeric attributes rather than a " + "normal distribution.";
    }

    /**
     * Gets if kernel estimator is being used.
     * 
     * @return Value of m_UseKernelEstimatory.
     */
    public boolean getUseKernelEstimator() {

        return m_UseKernelEstimator;
    }

    /**
     * Sets if kernel estimator is to be used.
     * 
     * @param v Value to assign to m_UseKernelEstimatory.
     */
    public void setUseKernelEstimator(boolean v) {

        m_UseKernelEstimator = v;
        if (v) {
            setUseSupervisedDiscretization(false);
        }
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String useSupervisedDiscretizationTipText() {
        return "Use supervised discretization to convert numeric attributes to nominal " + "ones.";
    }

    /**
     * Get whether supervised discretization is to be used.
     * 
     * @return true if supervised discretization is to be used.
     */
    public boolean getUseSupervisedDiscretization() {

        return m_UseDiscretization;
    }

    /**
     * Set whether supervised discretization is to be used.
     * 
     * @param newblah true if supervised discretization is to be used.
     */
    public void setUseSupervisedDiscretization(boolean newblah) {

        m_UseDiscretization = newblah;
        if (newblah) {
            setUseKernelEstimator(false);
        }
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String displayModelInOldFormatTipText() {
        return "Use old format for model output. The old format is "
                + "better when there are many class values. The new format "
                + "is better when there are fewer classes and many attributes.";
    }

    /**
     * Set whether to display model output in the old, original format.
     * 
     * @param d true if model ouput is to be shown in the old format
     */
    public void setDisplayModelInOldFormat(boolean d) {
        m_displayModelInOldFormat = d;
    }

    /**
     * Get whether to display model output in the old, original format.
     * 
     * @return true if model ouput is to be shown in the old format
     */
    public boolean getDisplayModelInOldFormat() {
        return m_displayModelInOldFormat;
    }

    /**
     * Return the header that this classifier was trained with
     *
     * @return the header that this classifier was trained with
     */
    public Instances getHeader() {
        return m_Instances;
    }

    /**
     * Get all the conditional estimators.
     *
     * @return all the conditional estimators.
     */
    public Estimator[][] getConditionalEstimators() {
        return m_Distributions;
    }

    /**
     * Get the class estimator.
     *
     * @return the class estimator
     */
    public Estimator getClassEstimator() {
        return m_ClassDistribution;
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 11741 $");
    }

    @SuppressWarnings({ "rawtypes", "unchecked" })
    @Override
    public NaiveBayes aggregate(NaiveBayes toAggregate) throws Exception {

        // Highly unlikely that discretization intervals will match between the
        // two classifiers
        if (m_UseDiscretization || toAggregate.getUseSupervisedDiscretization()) {
            throw new Exception("Unable to aggregate when supervised discretization " + "has been turned on");
        }

        if (!m_Instances.equalHeaders(toAggregate.m_Instances)) {
            throw new Exception("Can't aggregate - data headers don't match: "
                    + m_Instances.equalHeadersMsg(toAggregate.m_Instances));
        }

        ((Aggregateable) m_ClassDistribution).aggregate(toAggregate.m_ClassDistribution);

        // aggregate all conditional estimators
        for (int i = 0; i < m_Distributions.length; i++) {
            for (int j = 0; j < m_Distributions[i].length; j++) {
                ((Aggregateable) m_Distributions[i][j]).aggregate(toAggregate.m_Distributions[i][j]);
            }
        }

        return this;
    }

    @Override
    public void finalizeAggregation() throws Exception {
        // nothing to do
    }

    /**
     * Main method for testing this class.
     * 
     * @param argv the options
     */
    public static void main(String[] argv) {
        runClassifier(new NaiveBayes(), argv);
    }
}