com.davidbracewell.ml.classification.bayes.NaiveBayes.java Source code

Java tutorial

Introduction

Here is the source code for com.davidbracewell.ml.classification.bayes.NaiveBayes.java

Source

/*
 * (c) 2005 David B. Bracewell
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.davidbracewell.ml.classification.bayes;

import com.davidbracewell.collection.CollectionUtils;
import com.davidbracewell.collection.Counter;
import com.davidbracewell.collection.Counters;
import com.davidbracewell.math.DoubleEntry;
import com.davidbracewell.ml.Instance;
import com.davidbracewell.ml.classification.ClassificationModel;
import com.davidbracewell.ml.classification.ClassificationResult;
import org.apache.commons.math3.util.FastMath;

import java.util.HashMap;
import java.util.Map;

/**
 * @author David B. Bracewell
 */
public class NaiveBayes extends ClassificationModel {

    private static final long serialVersionUID = 1L;
    double[] priors;
    double[][] conditionals;

    @Override
    protected ClassificationResult classifyImpl(Instance instance) {
        int numClasses = getTargetFeature().alphabetSize();
        double[] probabilities = new double[numClasses];
        double sum = 0d;
        for (int i = 0; i < numClasses; i++) {
            //prior
            probabilities[i] = FastMath.log10(priors[i]);
            //posterior
            for (DoubleEntry entry : CollectionUtils.asIterable(instance.nonZeroIterator())) {
                probabilities[i] += FastMath.log10(conditionals[entry.index][i]);
            }
            probabilities[i] = Math.exp(probabilities[i]);
            sum += probabilities[i];
        }

        //normalize to make probabilities add to one
        for (int i = 0; i < numClasses; i++) {
            probabilities[i] = probabilities[i] / sum;
        }

        return new ClassificationResult(getTargetFeature(), probabilities);
    }

    @Override
    public boolean isTrained() {
        return priors != null && conditionals != null;
    }

    public Map<String, Counter<String>> getFeatureWeights() {
        Map<String, Counter<String>> map = new HashMap<>();
        for (int fi = 0; fi < getFeatures().size(); fi++) {
            Counter<String> weights = Counters.newLinkedHashMapCounter();
            map.put(getFeatures().get(fi).getName(), weights);
            for (int ci = 0; ci < getTargetFeature().alphabetSize(); ci++) {
                weights.set(getTargetFeature().valueAtIndex(ci), conditionals[fi][ci]);
            }
        }
        return map;
    }

}//END OF NaiveBayes2