keel.Algorithms.Rule_Learning.C45Rules.C45.java Source code

Java tutorial

Introduction

Here is the source code for keel.Algorithms.Rule_Learning.C45Rules.C45.java

Source

/***********************************************************************
    
   This file is part of KEEL-software, the Data Mining tool for regression, 
   classification, clustering, pattern mining and so on.
    
   Copyright (C) 2004-2010
       
   F. Herrera (herrera@decsai.ugr.es)
L. Snchez (luciano@uniovi.es)
J. Alcal-Fdez (jalcala@decsai.ugr.es)
S. Garca (sglopez@ujaen.es)
A. Fernndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
    
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
    
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
    
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see http://www.gnu.org/licenses/
      
**********************************************************************/

/**
 * <p>
 * @author Written by Cristbal Romero (University of Oviedo) 01/07/2008
 * @author Modified by Xavi Sol (La Salle, Ramn Llull University - Barcelona) 12/12/2008
 * @version 1.1
 * @since JDK1.2
 * </p>
 */

package keel.Algorithms.Rule_Learning.C45Rules;

import java.io.FileWriter;
import java.io.PrintWriter;
import java.io.StreamTokenizer;
import java.io.IOException;

/** para commons.configuration
 import org.apache.commons.configuration.*;
 */

public class C45 extends Algorithm {
    /**
     * <p>
     * Class to implement the C4.5 algorithm
     * </p>
     */

    /** Decision tree. */
    private Tree root;

    /** Is the tree pruned or not. */
    private boolean prune = false;

    /** Confidence level. */
    private float confidence = 0.25f;

    /** Minimum number of itemsets per leaf. */
    private int minItemsets = 2;

    /** The prior probabilities of the classes. */
    private double[] priorsProbabilities;

    /** Resolution of the margin histogram. */
    private static int marginResolution = 500;

    /** Cumulative margin classification. */
    private double marginCounts[];

    /** The sum of counts for priors. */
    private double classPriorsSum;

    /** Constructor.
     *
     * @param paramFile The parameters file.
     *
     * @throws Exception If the algorithm cannot be executed.
     */
    public C45(parseParameters paramFile) throws Exception {
        try {

            // starts the time
            long startTime = System.currentTimeMillis();

            /* Sets the options of the execution from text file*/
            //StreamTokenizer tokenizer = new StreamTokenizer( new BufferedReader( new FileReader( paramFile ) ) );
            //initTokenizer( tokenizer) ;
            //setOptions( tokenizer );

            //File Names
            modelFileName = paramFile.getTrainingInputFile();
            trainFileName = paramFile.getValidationInputFile();
            testFileName = paramFile.getTestInputFile();
            //Options
            confidence = Float.parseFloat(paramFile.getParameter(1)); //confidence level for the uniform distribution
            minItemsets = Integer.parseInt(paramFile.getParameter(2)); //itemset per Leaf
            if (confidence < 0 || confidence > 1) {
                confidence = 0.25F;
                System.err.println("Error: Confidence must be in the interval [0,1]");
                System.err.println("Using default value: 0.25");
            }
            if (minItemsets <= 0) {
                minItemsets = 2;
                System.err.println("Error: itemsetPerLeaf must be greater than 0");
                System.err.println("Using default value: 2");
            }
            prune = false;

            /* Initializes the dataset. */
            modelDataset = new MyDataset(modelFileName, true);
            trainDataset = new MyDataset(trainFileName, false);
            testDataset = new MyDataset(testFileName, false);

            priorsProbabilities = new double[modelDataset.numClasses()];
            priorsProbabilities();
            marginCounts = new double[marginResolution + 1];

            // generate the tree
            generateTree(modelDataset);

        } catch (Exception e) {
            System.err.println(e.getMessage());
            System.exit(-1);
        }
    }

    /** Generates the tree.
     *
     * @param itemsets The dataset used to build the tree.
     *
     * @throws Exception If the tree cannot be built.
     */
    public void generateTree(MyDataset itemsets) throws Exception {
        SelectCut selectCut;

        selectCut = new SelectCut(minItemsets, itemsets);
        root = new Tree(selectCut, prune, confidence);
        root.buildTree(itemsets);
    }

    /** Function to evaluate the class which the itemset must have according to the classification of the tree.
     *
     * @param itemset The itemset to evaluate.
     * @throws Exception If cannot compute the classification.
     * @return The index of the class index predicted.
     */
    public double evaluateItemset(Itemset itemset) throws Exception {
        Itemset classMissing = (Itemset) itemset.copy();
        double prediction = 0;
        classMissing.setDataset(itemset.getDataset());
        classMissing.setClassMissing();

        double[] classification = classificationForItemset(classMissing);
        prediction = maxIndex(classification);
        updateStats(classification, itemset, itemset.numClasses());

        //itemset.setPredictedValue( prediction );

        return prediction;
    }

    /** Updates all the statistics for the current itemset.
     *
     * @param predictedClassification Distribution of class values predicted for the itemset.
     * @param itemset The itemset.
     * @param nClasses The number of classes.
     *
     */
    private void updateStats(double[] predictedClassification, Itemset itemset, int nClasses) {
        int actualClass = (int) itemset.getClassValue();

        if (!itemset.classIsMissing()) {
            updateMargins(predictedClassification, actualClass, nClasses);

            // Determine the predicted class (doesn't detect multiple classifications)
            int predictedClass = -1;
            double bestProb = 0.0;

            for (int i = 0; i < nClasses; i++) {
                if (predictedClassification[i] > bestProb) {
                    predictedClass = i;
                    bestProb = predictedClassification[i];
                }
            }

            // Update counts when no class was predicted
            if (predictedClass < 0) {
                return;
            }

            double predictedProb = Math.max(Double.MIN_VALUE, predictedClassification[actualClass]);
            double priorProb = Math.max(Double.MIN_VALUE, priorsProbabilities[actualClass] / classPriorsSum);
        }
    }

    /** Returns class probabilities for an itemset.
     *
     * @param itemset The itemset.
     *
     * @throws Exception If cannot compute the classification.
     * @return class probabilities for an itemset.
     */
    public final double[] classificationForItemset(Itemset itemset) throws Exception {
        return root.classificationForItemset(itemset);
    }

    /** Update the cumulative record of classification margins.
     *
     * @param predictedClassification Distribution of class values predicted for the itemset.
     * @param actualClass The class value.
     * @param nClasses Number of classes.
     */
    private void updateMargins(double[] predictedClassification, int actualClass, int nClasses) {
        double probActual = predictedClassification[actualClass];
        double probNext = 0;

        for (int i = 0; i < nClasses; i++)
            if ((i != actualClass) && ( //Comparators.isGreater( predictedClassification[i], probNext ) ) )
            predictedClassification[i] > probNext))
                probNext = predictedClassification[i];

        double margin = probActual - probNext;
        int bin = (int) ((margin + 1.0) / 2.0 * marginResolution);
        marginCounts[bin]++;
    }

    /** Evaluates if a string is a boolean value.
     *
     * @param value The string to evaluate.
     *
     * @return True if value is a boolean value. False otherwise.
     */
    private boolean isBoolean(String value) {
        if (value.equalsIgnoreCase("TRUE") || value.equalsIgnoreCase("FALSE"))
            return true;
        else
            return false;
    }

    /** Returns index of maximum element in a given array of doubles. First maximum is returned.
     *
     * @param doubles The array of elements.
     *
     * @return index of maximum element in a given array of doubles. First maximum is returned.
     */
    public static int maxIndex(double[] doubles) {
        double maximum = 0;
        int maxIndex = 0;

        for (int i = 0; i < doubles.length; i++) {
            if ((i == 0) || //
                    doubles[i] > maximum) {
                maxIndex = i;
                maximum = doubles[i];
            }
        }

        return maxIndex;
    }

    /** Sets the class prior probabilities.
     *
     * @throws Exception If cannot compute the probabilities.
     */
    public void priorsProbabilities() throws Exception {
        for (int i = 0; i < modelDataset.numClasses(); i++)
            priorsProbabilities[i] = 1;

        classPriorsSum = modelDataset.numClasses();

        for (int i = 0; i < modelDataset.numItemsets(); i++) {
            if (!modelDataset.itemset(i).classIsMissing()) {
                try {
                    priorsProbabilities[(int) modelDataset.itemset(i).getClassValue()] += modelDataset.itemset(i)
                            .getWeight();
                    classPriorsSum += modelDataset.itemset(i).getWeight();
                } catch (Exception e) {
                    System.err.println(e.getMessage());
                }
            }
        }
    }

    /** Function to print the tree.
     *
     * @return a string representation of the C4.5 tree
     */
    public String toString() {
        return root.toString();
    }

    /**
     * Returns the C4.5 tree
     * @return the C4.5 tree
     */
    public Tree getTree() {
        return root;
    }

}