Compute the covariance matrix between all column pairs (variables) in the multivariate data set - Java java.lang

Java examples for java.lang:Math Matrix

Description

Compute the covariance matrix between all column pairs (variables) in the multivariate data set

Demo Code

/*/*from   ww  w.jav  a 2 s  .  c  o  m*/
 *  Java Information Dynamics Toolkit (JIDT)
 *  Copyright (C) 2012, Joseph T. Lizier
 *  
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *  
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
//package com.java2s;

public class Main {
    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set
     * 
     * @param data multivariate array of data; first index is time, second is 
     *    variable number
     * @return covariance matrix
     */
    public static double[][] covarianceMatrix(double[][] data) {
        return covarianceMatrix(data, means(data));
    }

    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set
     * 
     * @param data multivariate array of data; first index is time, second is 
     *    variable number
     * @param means the mean of each variable (column) in the data
     * @return covariance matrix
     */
    public static double[][] covarianceMatrix(double[][] data,
            double[] means) {
        int numVariables = data[0].length;
        double[][] covariances = new double[numVariables][numVariables];
        for (int r = 0; r < numVariables; r++) {
            for (int c = r; c < numVariables; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][c] = covarianceTwoColumns(data, r, c,
                        means[r], means[c]);
                // And of course this is symmetric between c and r:
                covariances[c][r] = covariances[r][c];
            }
        }
        return covariances;
    }

    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set, which consists of two separate
     *  multivariate vectors.
     * 
     * @param data1 multivariate array of data; first index is time, second is 
     *    variable number
     * @param data2 a second multivariate array of data, which can be though
     *    of as extensions of rows of the first.
     * @return covariance matrix, where the columns of dat1 are numbered
     *   first, and the columns of data2 after that.
     */
    public static double[][] covarianceMatrix(double[][] data1,
            double[][] data2) {
        return covarianceMatrix(data1, data2, 0);
    }

    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set, which consists of two separate
     *  multivariate vectors.
     * 
     * @param data1 multivariate array of data; first index is time, second is 
     *    variable number
     * @param data2 a second multivariate array of data, which can be thought
     *    of as extensions of rows of the first.
     * @param delay compute the lagged covariance of the given delay from
     *    data1 to data2 (assumes delay >= 0); i.e. compute correlation
     *    between data1[x] and data2[x+delay]. 
     * @return covariance matrix, where the columns of data1 are numbered
     *   first, and the columns of data2 after that.
     */
    public static double[][] covarianceMatrix(double[][] data1,
            double[][] data2, int delay) {
        if (delay > 0) {
            // Trim out the last delay rows of data1, and the
            //  first delay rows of data2:
            double[][] data1Trimmed = new double[data1.length - delay][];
            double[][] data2Trimmed = new double[data2.length - delay][];
            for (int x = 0; x < data1.length - delay; x++) {
                data1Trimmed[x] = data1[x];
                data2Trimmed[x] = data2[x + delay];
            }
            // Just overwrite our local copy of the pointers to the
            //  original data
            data1 = data1Trimmed;
            data2 = data2Trimmed;
        }

        int numVariables1 = data1[0].length;
        int numVariables2 = data2[0].length;
        int numVariables = numVariables1 + numVariables2;
        double[][] covariances = new double[numVariables][numVariables];
        // Compute means of each variable once up front to save time
        double[] means1 = new double[numVariables1];
        double[] means2 = new double[numVariables2];
        for (int r = 0; r < numVariables1; r++) {
            means1[r] = mean(data1, r);
        }
        for (int r = 0; r < numVariables2; r++) {
            means2[r] = mean(data2, r);
        }
        // Now compute the covariances:
        for (int r = 0; r < numVariables1; r++) {
            // Compute the covariances internal to data1:
            for (int c = r; c < numVariables1; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][c] = covarianceTwoColumns(data1, r, c,
                        means1[r], means1[c]);
                // And of course this is symmetric between c and r:
                covariances[c][r] = covariances[r][c];
            }
            // Compute the covariances between data1 and data2
            for (int c = 0; c < numVariables2; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][numVariables1 + c] = covarianceTwoColumns(
                        data1, data2, r, c, means1[r], means2[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + c][r] = covariances[r][numVariables1
                        + c];
            }
        }
        // Now compute the covariances internal to data2:
        for (int r = 0; r < numVariables2; r++) {
            for (int c = r; c < numVariables2; c++) {
                // Compute the covariance between variable r and c:
                covariances[numVariables1 + r][numVariables1 + c] = covarianceTwoColumns(
                        data2, r, c, means2[r], means2[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + c][numVariables1 + r] = covariances[numVariables1
                        + r][numVariables1 + c];
            }
        }
        return covariances;
    }

    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set, which consists of three separate
     *  multivariate vectors.
     * 
     * @param data1 multivariate array of data; first index is time, second is 
     *    variable number
     * @param data2 a second multivariate array of data, which can be thought
     *    of as extensions of rows of the first.
     * @param data2 a third multivariate array of data, which can be thought
     *    of as extensions of rows of the first and second.
     * @return covariance matrix, where the columns of data1 are numbered
     *   first, the columns of data2 after that, and finally the columns
     *   of data3.
     */
    public static double[][] covarianceMatrix(double[][] data1,
            double[][] data2, double[][] data3) {
        int numVariables1 = data1[0].length;
        int numVariables2 = data2[0].length;
        int numVariables3 = data3[0].length;
        int numVariables = numVariables1 + numVariables2 + numVariables3;
        double[][] covariances = new double[numVariables][numVariables];
        // Compute means of each variable once up front to save time
        double[] means1 = new double[numVariables1];
        double[] means2 = new double[numVariables2];
        double[] means3 = new double[numVariables3];
        for (int r = 0; r < numVariables1; r++) {
            means1[r] = mean(data1, r);
        }
        for (int r = 0; r < numVariables2; r++) {
            means2[r] = mean(data2, r);
        }
        for (int r = 0; r < numVariables3; r++) {
            means3[r] = mean(data3, r);
        }
        // Now compute the covariances:
        for (int r = 0; r < numVariables1; r++) {
            // Compute the covariances internal to data1:
            for (int c = r; c < numVariables1; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][c] = covarianceTwoColumns(data1, r, c,
                        means1[r], means1[c]);
                // And of course this is symmetric between c and r:
                covariances[c][r] = covariances[r][c];
            }
            // Compute the covariances between data1 and data2
            for (int c = 0; c < numVariables2; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][numVariables1 + c] = covarianceTwoColumns(
                        data1, data2, r, c, means1[r], means2[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + c][r] = covariances[r][numVariables1
                        + c];
            }
            // Compute the covariances between data1 and data3
            for (int c = 0; c < numVariables3; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][numVariables1 + numVariables2 + c] = covarianceTwoColumns(
                        data1, data3, r, c, means1[r], means3[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + numVariables2 + c][r] = covariances[r][numVariables1
                        + numVariables2 + c];
            }
        }
        // Compute the other covariances for data2
        for (int r = 0; r < numVariables2; r++) {
            // Compute the covariances internal to data2:
            for (int c = r; c < numVariables2; c++) {
                // Compute the covariance between variable r and c:
                covariances[numVariables1 + r][numVariables1 + c] = covarianceTwoColumns(
                        data2, r, c, means2[r], means2[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + c][numVariables1 + r] = covariances[numVariables1
                        + r][numVariables1 + c];
            }
            // Compute the covariances between data2 and data3
            for (int c = 0; c < numVariables3; c++) {
                // Compute the covariance between variable r and c:
                covariances[numVariables1 + r][numVariables1
                        + numVariables2 + c] = covarianceTwoColumns(data2,
                        data3, r, c, means2[r], means3[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + numVariables2 + c][numVariables1
                        + r] = covariances[numVariables1 + r][numVariables1
                        + numVariables2 + c];
            }
        }
        // Compute the internal covariances for data3
        for (int r = 0; r < numVariables3; r++) {
            for (int c = r; c < numVariables3; c++) {
                // Compute the covariance between variable r and c:
                covariances[numVariables1 + numVariables2 + r][numVariables1
                        + numVariables2 + c] = covarianceTwoColumns(data3,
                        r, c, means3[r], means3[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + numVariables2 + c][numVariables1
                        + numVariables2 + r] = covariances[numVariables1
                        + numVariables2 + r][numVariables1 + numVariables2
                        + c];
            }
        }
        return covariances;
    }

    /**
     * Return an array of the means of each column in the 2D input
     * 
     * @param input
     * @return
     */
    public static double[] means(double[][] input) {
        double[] theMeans = sums(input);
        for (int i = 0; i < theMeans.length; i++) {
            theMeans[i] = theMeans[i] / input.length;
        }
        return theMeans;
    }

    /**
     * Return an array of the means of each column in the 2D input
     * 
     * @param input
     * @param startRow which row to start from
     * @param length how many rows to take the mean over
     * @return
     */
    public static double[] means(double[][] input, int startRow, int length) {
        double[] theMeans = sums(input, startRow, length);
        for (int i = 0; i < theMeans.length; i++) {
            theMeans[i] = theMeans[i] / length;
        }
        return theMeans;
    }

    /**
     * <p>Returns the covariance between two columns of data in
     *  a multivariate array.</p>
     * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a>
     * </p>
     * 
     * @param data multivariate array of data; first index is time, second is 
     *    variable number
     * @param col1 variable number 1 to compute the covariance to
     * @param col2 variable number 2 to compute the covariance to
     * @return the covariance
     */
    public static double covarianceTwoColumns(double[][] data, int col1,
            int col2) {
        double mean1 = mean(data, col1);
        double mean2 = mean(data, col2);
        return covarianceTwoColumns(data, col1, col2, mean1, mean2);
    }

    /**
     * <p>Returns the covariance between two columns of data in
     *  a multivariate array.</p>
     * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a>
     * </p>
     * 
     * @param data multivariate array of data; first index is time, second is 
     *    variable number
     * @param col1 variable number 1 to compute the covariance to
     * @param col2 variable number 2 to compute the covariance to
     * @param mean1 mean of variable 1
     * @param mean2 mean of variable 2
     * @return the covariance
     */
    public static double covarianceTwoColumns(double[][] data, int col1,
            int col2, double mean1, double mean2) {
        double c = 0;
        for (int t = 0; t < data.length; t++) {
            c += (data[t][col1] - mean1) * (data[t][col2] - mean2);
        }
        return c / (double) (data.length - 1);
    }

    /**
     * <p>Returns the covariance between two columns of data in
     *  two multivariate arrays.</p>
     * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a>
     * </p>
     * 
     * @param data1 first multivariate array of data; first index is time, second is 
     *    variable number
     * @param data2 second multivariate array of data; first index is time, second is 
     *    variable number
     * @param col1 variable number 1 to compute the covariance to
     * @param col2 variable number 2 to compute the covariance to
     * @param mean1 mean of variable 1
     * @param mean2 mean of variable 2
     * @return the covariance
     */
    public static double covarianceTwoColumns(double[][] data1,
            double[][] data2, int col1, int col2, double mean1, double mean2) {
        double c = 0;
        for (int t = 0; t < data1.length; t++) {
            c += (data1[t][col1] - mean1) * (data2[t][col2] - mean2);
        }
        return c / (double) (data1.length - 1);
    }

    public static double mean(int[] input) {
        return sum(input) / (double) input.length;
    }

    public static double mean(double[] input) {
        return sum(input) / (double) input.length;
    }

    public static double mean(double[] input, int startIndex, int length) {
        return sum(input, startIndex, length) / (double) length;
    }

    public static double mean(double[][] input) {
        return sum(input) / (double) (input.length * input[0].length);
    }

    /**
     * Compute the mean along the given column 
     * 
     * @param input
     * @param column
     * @return
     */
    public static double mean(double[][] input, int column) {
        return sum(input, column) / (double) input.length;
    }

    /**
     * Return an array of the sums for each column in the 2D input
     * 
     * @param input
     * @return
     */
    public static double[] sums(double[][] input) {
        double[] theSums = new double[input[0].length];
        for (int r = 0; r < input.length; r++) {
            for (int c = 0; c < input[r].length; c++) {
                theSums[c] += input[r][c];
            }
        }
        return theSums;
    }

    /**
     * Return an array of the sums for each column in the 2D input
     * 
     * @param input
     * @param startRow which row to start from
     * @param length how many rows to take the sum over
     * @return
     */
    public static double[] sums(double[][] input, int startRow, int length) {
        double[] theSums = new double[input[0].length];
        for (int r = startRow; r < startRow + length; r++) {
            for (int c = 0; c < input[r].length; c++) {
                theSums[c] += input[r][c];
            }
        }
        return theSums;
    }

    public static double sum(double[] input) {
        double total = 0;
        for (int i = 0; i < input.length; i++) {
            total += input[i];
        }
        return total;
    }

    public static double sum(double[] input, int startIndex, int length) {
        double total = 0;
        for (int i = startIndex; i < startIndex + length; i++) {
            total += input[i];
        }
        return total;
    }

    public static double sum(double[][] input) {
        double total = 0;
        for (int i = 0; i < input.length; i++) {
            for (int j = 0; j < input[i].length; j++) {
                total += input[i][j];
            }
        }
        return total;
    }

    public static double sum(double[][] input, int column) {
        double total = 0;
        for (int i = 0; i < input.length; i++) {
            total += input[i][column];
        }
        return total;
    }

    public static int sum(int[] input) {
        int total = 0;
        for (int i = 0; i < input.length; i++) {
            total += input[i];
        }
        return total;
    }

    public static int sum(int[][] input) {
        int total = 0;
        for (int i = 0; i < input.length; i++) {
            for (int j = 0; j < input[i].length; j++) {
                total += input[i][j];
            }
        }
        return total;
    }
}

Related Tutorials