bachelorthesis.methods.detection.bayesian.BayesianDetection.java Source code

Java tutorial

Introduction

Here is the source code for bachelorthesis.methods.detection.bayesian.BayesianDetection.java

Source

package bachelorthesis.methods.detection.bayesian;

import static bachelorthesis.methods.UtilityMethods.*;
import static bachelorthesis.utility.Configurations.*;
import static bachelorthesis.utility.Constants.EARLIEST_YEAR;

import java.util.Arrays;
import java.util.Random;
import java.util.function.DoubleFunction;
import java.util.zip.DataFormatException;

import org.apache.commons.lang3.ArrayUtils;

import bachelorthesis.data.DataPoint;
import bachelorthesis.data.Result;
import bachelorthesis.data.Value;
import bachelorthesis.methods.detection.DetectionMethod;
import bachelorthesis.utility.ExceptionHandler;

/**
 * Translated from a Python implementation with following license
 * 
 * The MIT License (MIT)
    
Copyright (c) 2014 Johannes Kulick
    
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
    
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
    
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
 * 
 *
 */
public class BayesianDetection extends DetectionMethod {

    @Override
    protected Result detect() {
        double[] logLikelihoods = expSum(offlineCpd(series.getAllValues(0)));
        for (int i = 1; i < logLikelihoods.length - 1; i++) {
            if (logLikelihoods[i] >= BAYESIAN_THRESHOLD) {
                result.addInterestingPoint(i + EARLIEST_YEAR);
            }
            if (logLikelihoods[i - 1] < logLikelihoods[i] && logLikelihoods[i] >= logLikelihoods[i + 1]) {
                result.addInterestingPoint(new DataPoint(i + EARLIEST_YEAR, 0.5f));
            }
        }
        return result;
    }

    /**
     * Generates time series for code testing
     * @param numberOfParts number of parts with constant mean
     * @param samePointsMin   min number of points per part
     * @param samePointsMax max number of points per part
     * @param maxDeviation maximal deviation of mean (e.g. 0.1)
     * @return generated time series with given specs
     */
    private Value[] generate(int numberOfParts, int samePointsMin, int samePointsMax, double maxDeviation) {
        Value[] data = new Value[0];
        int sum = EARLIEST_YEAR;
        for (int i = 0; i < numberOfParts; i++) {
            Random rand = new Random();
            int numberOfPoints = rand.nextInt(samePointsMax - samePointsMin + 1) + samePointsMin;
            double mean = rand.nextDouble();
            double var = rand.nextDouble() * maxDeviation;
            System.out.println(sum + " - " + (sum + numberOfPoints) + "\t" + mean);
            sum += numberOfPoints;
            Value[] add = new Value[numberOfPoints];
            for (int j = 0; j < numberOfPoints; j++) {
                add[j] = new Value(mean + ((rand.nextDouble() * 2 - 1) * var));
            }
            data = ArrayUtils.addAll(data, add);
        }
        return data;
    }

    private double[][] offlineCpd(Value[] data) {
        int n = data.length;
        double[] Q = new double[n];
        double[] g = new double[n];
        double[] G = new double[n];
        double[][] P = new double[n][n];

        Arrays.fill(g, Math.log(1.d / (data.length + 1)));
        G[0] = g[0];
        for (int i = 1; i < G.length; i++) {
            G[i] = Math.log((Math.exp(G[i - 1]) + Math.exp(g[i])));
        }
        for (double[] array : P) {
            Arrays.fill(array, Double.NEGATIVE_INFINITY);
        }

        P[n - 1][n - 1] = gaussianObsLogLikelihood(data, n - 1, n);
        Q[n - 1] = P[n - 1][n - 1];

        for (int t = n - 2; t >= 0; t--) {
            double p_next_cp = Double.NEGATIVE_INFINITY;
            for (int s = t; s < n - 1; s++) {
                P[t][s] = gaussianObsLogLikelihood(data, t, s + 1);
                double summand = P[t][s] + Q[s + 1] + g[s + 1 - t];
                p_next_cp = Math.log((Math.exp(p_next_cp) + Math.exp(summand)));
                if (summand - p_next_cp < BAYESIAN_TRUNCATE) {
                    break;
                }
            }
            P[t][n - 1] = gaussianObsLogLikelihood(data, t, n);
            double antiG;
            if (G[n - 1 - t] < -1e-15) {
                antiG = Math.log(1.d - Math.exp(G[n - 1 - t]));
            } else {
                antiG = Math.log(-G[n - 1 - t]);
            }
            Q[t] = Math.log((Math.exp(p_next_cp) + Math.exp(P[t][n - 1] + antiG)));
        }

        double[][] Pcp = new double[n - 1][n - 1];
        for (double[] array : Pcp) {
            Arrays.fill(array, Double.NEGATIVE_INFINITY);
        }
        for (int t = 0; t < n - 1; t++) {
            Pcp[0][t] = P[0][t] + Q[t + 1] + g[t] - Q[0];
            if (Double.isNaN(Pcp[0][t])) {
                Pcp[0][t] = Double.NEGATIVE_INFINITY;
            }
        }
        for (int j = 1; j < n - 1; j++) {
            for (int t = j; t < n - 1; t++) {
                double[] tmp_cond = copyOfRange(Pcp[j - 1], j - 1, t);

                tmp_cond = add(tmp_cond, getSameEntryOfAllArrays(copyOfRange(P, j, t + 1), t));
                double summand = Q[t + 1];
                tmp_cond = forEach(tmp_cond, value -> value + summand);
                tmp_cond = add(tmp_cond, copyOfRange(g, 0, t - j + 1));
                double[] negativePart = forEach(copyOfRange(Q, j, t + 1), value -> -value);
                tmp_cond = add(tmp_cond, negativePart);

                double[] tempArray = forEach(tmp_cond, value -> Math.exp(value));
                Pcp[j][t] = Math.log(sum(tempArray));
                if (Double.isNaN(Pcp[j][t])) {
                    Pcp[j][t] = Double.NEGATIVE_INFINITY;
                }
            }
        }
        return Pcp;
    }

    private double[] add(double[] array1, double[] array2) {
        if (array1.length != array2.length) {
            ExceptionHandler.handleOwnException(new DataFormatException("Arrays are of different length"));
        }
        double[] result = new double[array1.length];
        for (int i = 0; i < array1.length; i++) {
            result[i] = array1[i] + array2[i];
        }
        return result;
    }

    private double gaussianObsLogLikelihood(Value[] data, int t, int s) {
        s++;
        double n = s - t;
        double mean = calculateMean(data, t, s).doubleValue();

        double muT = mean * n / (1 + n);
        double nuT = n + 1;
        double alphaT = n / 2 + 1;
        double betaT = calculateQuadSum(data, t, s).doubleValue() / 2 + 1 + (n / (n + 1)) * Math.pow(mean, 2) / 2;
        double scale = (betaT * (nuT + 1)) / (alphaT * nuT);

        double prob = getProb(data, t, s, muT, nuT, scale);
        double lgA = getLgA(nuT, scale);
        return n * lgA - (nuT + 1) / 2 * prob;
    }

    private double getLgA(double nuT, double scale) {
        double result = Gamma.logGamma((nuT + 1) / 2);
        result -= Math.log(Math.sqrt(Math.PI * nuT * scale));
        result -= Gamma.logGamma(nuT / 2);
        return result;
    }

    private double getProb(Value[] data, int t, int s, double muT, double nuT, double scale) {
        double[] array = Value.getDoubleArray(copyOfRange(data, t, s));
        array = forEach(array, value -> Math.pow(value - muT, 2));
        array = forEach(array, value -> value / (nuT * scale) + 1);
        array = forEach(array, value -> Math.log(value));
        return sum(array);
    }

    private double[] expSum(double[][] likelihoods) {
        double[] result = new double[likelihoods.length];
        for (double[] subLikelihood : likelihoods) {
            for (int i = 0; i < subLikelihood.length; i++) {
                result[i] = result[i] + Math.exp(subLikelihood[i]);
            }
        }
        return result;
    }

    private double sum(double[] array) {
        double result = 0;
        for (int i = 0; i < array.length; i++) {
            result += array[i];
        }
        return result;
    }

    private double[] forEach(double[] values, DoubleFunction<Double> function) {
        double[] result = new double[values.length];
        for (int i = 0; i < values.length; i++) {
            result[i] = function.apply(values[i]);
        }
        return result;
    }

    private double[] getSameEntryOfAllArrays(double[][] arrays, int t) {
        double[] result = new double[arrays.length];
        for (int i = 0; i < result.length; i++) {
            result[i] = arrays[i][t];
        }
        return result;
    }

    private Value[] copyOfRange(Value[] array, int from, int to) {
        return Arrays.copyOfRange(array, from, Math.min(to, array.length));
    }

    private double[] copyOfRange(double[] array, int from, int to) {
        return Arrays.copyOfRange(array, from, Math.min(to, array.length));
    }

    private double[][] copyOfRange(double[][] array, int from, int to) {
        return Arrays.copyOfRange(array, from, Math.min(to, array.length));
    }
}