clus.heuristic.FTest.java Source code

Java tutorial

Introduction

Here is the source code for clus.heuristic.FTest.java

Source

/*************************************************************************
 * Clus - Software for Predictive Clustering                             *
 * Copyright (C) 2007                                                    *
 *    Katholieke Universiteit Leuven, Leuven, Belgium                    *
 *    Jozef Stefan Institute, Ljubljana, Slovenia                        *
 *                                                                       *
 * This program is free software: you can redistribute it and/or modify  *
 * it under the terms of the GNU General Public License as published by  *
 * the Free Software Foundation, either version 3 of the License, or     *
 * (at your option) any later version.                                   *
 *                                                                       *
 * This program is distributed in the hope that it will be useful,       *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 * GNU General Public License for more details.                          *
 *                                                                       *
 * You should have received a copy of the GNU General Public License     *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
 *                                                                       *
 * Contact information: <http://www.cs.kuleuven.be/~dtai/clus/>.         *
 *************************************************************************/

package clus.heuristic;

import clus.main.*;
import clus.util.*;
import jeans.math.*;

import java.util.*;

import org.apache.commons.math.*;
import org.apache.commons.math.distribution.*;

public class FTest {

    public static double[] FTEST_SIG = { 1.0, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0 };
    public static double FTEST_LIMIT;
    public static double[] FTEST_VALUE;

    public final static FDistribution m_FDist = DistributionFactory.newInstance().createFDistribution(1, 1);

    protected final static double critical_f_01[] = { 39.8161, 8.5264, 5.5225, 4.5369, 4.0804, 3.7636, 3.61, 3.4596,
            3.3489, 3.2761, 3.24, 3.1684, 3.1329, 3.0976, 3.0625, 3.0625, 3.0276, 2.9929, 2.9929, 2.9584 };

    protected final static double critical_f_005[] = { 161.0, 18.5, 10.1, 7.71, 6.61, 5.99, 5.59, 5.32, 5.12, 4.96,
            4.84, 4.75, 4.67, 4.6, 4.54, 4.49, 4.45, 4.41, 4.38, 4.35, 4.32, 4.3, 4.28, 4.26, 4.24, 4.23, 4.21, 4.2,
            4.18, 4.17 };

    protected final static double critical_f_001[] = { 4052.0, 98.5, 34.1, 21.2, 16.3, 13.7, 12.2, 11.3, 10.6, 10.0,
            9.65, 9.33, 9.07, 8.86, 8.68, 8.53, 8.40, 8.29, 8.18, 8.1, 8.02, 7.95, 7.88, 7.82, 7.77, 7.72, 7.68,
            7.64, 7.6, 7.56 };

    // for 0.005: derived from t-table,  squared values (=> approximate!)
    protected final static double critical_f_0005[] = { 15876, 198.81, 55.5025, 31.36, 22.7529, 18.6624, 16.2409,
            14.6689, 13.6161, 12.8164, 12.25, 11.7649, 11.2896, 11.0889, 10.8241, 10.5625, 10.3684, 10.24, 10.0489,
            9.9225, 9.8596, 9.7344, 9.61, 9.5481, 9.4864, 9.4249, 9.3636, 9.3025, 9.2416, 9.1809 };

    // for 0.001: derived from t-table,  squared values (=> approximate!)
    protected final static double critical_f_0001[] = { 405769, 998.56, 166.41, 74.1321, 47.0596, 35.5216, 29.16,
            25.4016, 22.8484, 21.0681, 19.7136, 18.6624, 17.8084, 17.1396, 16.5649, 16.0801, 15.6025, 15.3664,
            15.0544, 14.8225, 14.5924, 14.3641, 14.2129, 13.9876, 13.8384, 13.7641, 13.6161, 13.4689, 13.3956,
            13.3225 };

    public static int getLevelAndComputeArray(double significance) {
        int maxlevel = FTEST_SIG.length - 1;
        for (int level = 0; level < maxlevel; level++) {
            if (Math.abs(significance - FTEST_SIG[level]) / FTEST_SIG[level] < 0.01) {
                return level;
            }
        }
        FTEST_SIG[maxlevel] = significance;
        initializeFTable(significance);
        return maxlevel;
    }

    public static double getCriticalF(int level, int df) {
        switch (level) {
        case 1: // 0.1
            if (df <= 20)
                return critical_f_01[df - 1];
            else if (df <= 30)
                return 2.9;
            else if (df <= 40)
                return 2.86;
            else if (df <= 120)
                return 2.79;
            else
                return 2.7;
        case 2: // 0.05
            if (df <= 30)
                return critical_f_005[df - 1];
            else if (df <= 40)
                return 4.08;
            else if (df <= 60)
                return 4.00;
            else if (df <= 120)
                return 3.92;
            else
                return 3.84;
        case 3: // 0.01
            if (df <= 30)
                return critical_f_001[df - 1];
            else if (df <= 40)
                return 7.31;
            else if (df <= 60)
                return 7.08;
            else if (df <= 120)
                return 6.85;
            else
                return 6.63;
        case 4: // 0.005
            if (df <= 30)
                return critical_f_0005[df - 1];
            else if (df <= 40)
                return 8.82;
            else if (df <= 60)
                return 8.47;
            else if (df <= 120)
                return 8.18;
            else
                return 7.90;
        case 5: // 0.001
            if (df <= 30)
                return critical_f_0001[df - 1];
            else if (df <= 40)
                return 12.60;
            else if (df <= 60)
                return 11.98;
            else if (df <= 120)
                return 11.36;
            else
                return 10.82;
        default:
            return df < FTEST_VALUE.length ? FTEST_VALUE[df] : FTEST_LIMIT;
        }
    }

    public static double getCriticalFCommonsMath(double sig, double df) {
        try {
            m_FDist.setDenominatorDegreesOfFreedom(df);
            return m_FDist.inverseCumulativeProbability(1 - sig);
        } catch (MathException e) {
            System.err.println("F-Distribution error: " + e.getMessage());
            return 0.0;
        }
    }

    // Calling getCriticalFCommonsMath() is slow, so build a table
    public static void initializeFTable(double sig) {
        int df = 3;
        double value = 0.0;
        double limit = getCriticalFCommonsMath(sig, 1e5);
        ArrayList values = new ArrayList();
        do {
            value = getCriticalFCommonsMath(sig, df);
            values.add(new Double(value));
            df++;
        } while ((value - limit) / limit > 0.05);
        System.out.println("F-Test = " + sig + " limit = " + ClusFormat.TWO_AFTER_DOT.format(limit) + " values = "
                + values.size());
        FTEST_LIMIT = limit;
        FTEST_VALUE = new double[values.size() + 3];
        for (int i = 0; i < values.size(); i++) {
            FTEST_VALUE[i + 3] = ((Double) values.get(i)).doubleValue();
        }
    }

    // ftest: Signif, total SS, residual SS, 2nd DF (1st is 1)
    // (this implementation only works for F tests with 1 and n d.f., sorry)
    // ftest predicate succeeds iff H0 is rejected at Signif
    // only works correctly for signif 1.0, 0.1, 0.05, 0.01, 0.005, 0.001

    public static boolean ftest(int level, double sst, double ssr, int df) {
        if (level == 0)
            return true;
        if (sst <= 0.0)
            return false; // added to avoid 0/0; SST = 0 => no improvement possible
        if (ssr == 0.0)
            return true; // avoid x/0; SSR = 0 => F is infinite
        double f = (double) df * (sst - ssr) / ssr;
        double cf = getCriticalF(level, df);
        return f > cf;
    }

    public static double getSettingSig() {
        return FTEST_SIG[Settings.FTEST_LEVEL];
    }

    public static double calcVarianceReductionHeuristic(double n_tot, double ss_tot, double ss_sum) {
        double value = ss_tot - ss_sum;
        if (value < MathUtil.C1E_9) {
            // Gain too small
            return Double.NEGATIVE_INFINITY;
        }
        if (Settings.FTEST_LEVEL == 0) {
            // No F-test -> just return value
            return value;
        }
        int n_2 = (int) Math.floor(n_tot - 2.0 + 0.5);
        if (n_2 <= 0) {
            return Double.NEGATIVE_INFINITY;
        } else {
            if (FTest.ftest(Settings.FTEST_LEVEL, ss_tot, ss_sum, n_2)) {
                return value;
            } else {
                return Double.NEGATIVE_INFINITY;
            }
        }
    }
}