Java Matrix Covariance covarianceMatrix(double[][] data)

Here you can find the source of covarianceMatrix(double[][] data)

Description

Compute the covariance matrix between all column pairs (variables) in the multivariate data set

License

Apache License

Parameter

Parameter Description
data multivariate array of data; first index is time, second is variable number

Return

covariance matrix

Declaration

public static double[][] covarianceMatrix(double[][] data) 

Method Source Code

//package com.java2s;
/*******************************************************************************
 * Copyright 2013 Karlsruhe Institute of Technology. This Work has been partially supported by the EIT ICT Labs funded research project Towards a Mobile Cloud (activity CLD 12206).
 * //  w ww. jav a  2 s .c  o  m
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/

public class Main {
    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set
     * 
     * @param data multivariate array of data; first index is time, second is 
     *    variable number
     * @return covariance matrix
     */
    public static double[][] covarianceMatrix(double[][] data) {
        return covarianceMatrix(data, means(data));
    }

    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set
     * 
     * @param data multivariate array of data; first index is time, second is 
     *    variable number
     * @param means the mean of each variable (column) in the data
     * @return covariance matrix
     */
    public static double[][] covarianceMatrix(double[][] data, double[] means) {
        int numVariables = data[0].length;
        double[][] covariances = new double[numVariables][numVariables];
        for (int r = 0; r < numVariables; r++) {
            for (int c = r; c < numVariables; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][c] = covarianceTwoColumns(data, r, c, means[r], means[c]);
                // And of course this is symmetric between c and r:
                covariances[c][r] = covariances[r][c];
            }
        }
        return covariances;
    }

    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set, which consists of two separate
     *  multivariate vectors.
     * 
     * @param data1 multivariate array of data; first index is time, second is 
     *    variable number
     * @param data2 a second multivariate array of data, which can be though
     *    of as extensions of rows of the first.
     * @return covariance matrix, where the columns of dat1 are numbered
     *   first, and the columns of data2 after that.
     */
    public static double[][] covarianceMatrix(double[][] data1, double[][] data2) {
        return covarianceMatrix(data1, data2, 0);
    }

    /**
     * Compute the covariance matrix between all column pairs (variables) in the
     *  multivariate data set, which consists of two separate
     *  multivariate vectors.
     * 
     * @param data1 multivariate array of data; first index is time, second is 
     *    variable number
     * @param data2 a second multivariate array of data, which can be though
     *    of as extensions of rows of the first.
     * @param delay compute the lagged covariance of the given delay from
     *    data1 to data2 (assumes delay >= 0); i.e. compute correlation
     *    between data1[x] and data2[x+delay]. 
     * @return covariance matrix, where the columns of dat1 are numbered
     *   first, and the columns of data2 after that.
     */
    public static double[][] covarianceMatrix(double[][] data1, double[][] data2, int delay) {
        if (delay > 0) {
            // Trim out the last delay rows of data1, and the
            //  first delay rows of data2:
            double[][] data1Trimmed = new double[data1.length - delay][];
            double[][] data2Trimmed = new double[data2.length - delay][];
            for (int x = 0; x < data1.length - delay; x++) {
                data1Trimmed[x] = data1[x];
                data2Trimmed[x] = data2[x + delay];
            }
            // Just overwrite our local copy of the pointers to the
            //  original data
            data1 = data1Trimmed;
            data2 = data2Trimmed;
        }

        int numVariables1 = data1[0].length;
        int numVariables2 = data2[0].length;
        int numVariables = numVariables1 + numVariables2;
        double[][] covariances = new double[numVariables][numVariables];
        // Compute means of each variable once up front to save time
        double[] means1 = new double[numVariables1];
        double[] means2 = new double[numVariables2];
        for (int r = 0; r < numVariables1; r++) {
            means1[r] = mean(data1, r);
        }
        for (int r = 0; r < numVariables2; r++) {
            means2[r] = mean(data2, r);
        }
        // Now compute the covariances:
        for (int r = 0; r < numVariables1; r++) {
            // Compute the covariances internal to data1:
            for (int c = r; c < numVariables1; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][c] = covarianceTwoColumns(data1, r, c, means1[r], means1[c]);
                // And of course this is symmetric between c and r:
                covariances[c][r] = covariances[r][c];
            }
            // Compute the covariances between data1 and data2
            for (int c = 0; c < numVariables2; c++) {
                // Compute the covariance between variable r and c:
                covariances[r][numVariables1 + c] = covarianceTwoColumns(data1, data2, r, c, means1[r], means2[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + c][r] = covariances[r][numVariables1 + c];
            }
        }
        // Now compute the covariances internal to data2:
        for (int r = 0; r < numVariables2; r++) {
            for (int c = r; c < numVariables2; c++) {
                // Compute the covariance between variable r and c:
                covariances[numVariables1 + r][numVariables1 + c] = covarianceTwoColumns(data2, r, c, means2[r],
                        means2[c]);
                // And of course this is symmetric between c and r:
                covariances[numVariables1 + c][numVariables1 + r] = covariances[numVariables1 + r][numVariables1
                        + c];
            }
        }
        return covariances;
    }

    /**
     * Return an array of the means of each column in the 2D input
     * 
     * @param input
     * @return
     */
    public static double[] means(double[][] input) {
        double[] theMeans = sums(input);
        for (int i = 0; i < theMeans.length; i++) {
            theMeans[i] = theMeans[i] / input.length;
        }
        return theMeans;
    }

    /**
     * Return an array of the means of each column in the 2D input
     * 
     * @param input
     * @param startRow which row to start from
     * @param length how many rows to take the mean over
     * @return
     */
    public static double[] means(double[][] input, int startRow, int length) {
        double[] theMeans = sums(input, startRow, length);
        for (int i = 0; i < theMeans.length; i++) {
            theMeans[i] = theMeans[i] / length;
        }
        return theMeans;
    }

    /**
     * <p>Returns the covariance between two columns of data in
     *  a multivariate array.</p>
     * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a>
     * </p>
     * 
     * @param data multivariate array of data; first index is time, second is 
     *    variable number
     * @param col1 variable number 1 to compute the covariance to
     * @param col2 variable number 2 to compute the covariance to
     * @return the covariance
     */
    public static double covarianceTwoColumns(double[][] data, int col1, int col2) {
        double mean1 = mean(data, col1);
        double mean2 = mean(data, col2);
        return covarianceTwoColumns(data, col1, col2, mean1, mean2);
    }

    /**
     * <p>Returns the covariance between two columns of data in
     *  a multivariate array.</p>
     * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a>
     * </p>
     * 
     * @param data multivariate array of data; first index is time, second is 
     *    variable number
     * @param col1 variable number 1 to compute the covariance to
     * @param col2 variable number 2 to compute the covariance to
     * @param mean1 mean of variable 1
     * @param mean2 mean of variable 2
     * @return the covariance
     */
    public static double covarianceTwoColumns(double[][] data, int col1, int col2, double mean1, double mean2) {
        double c = 0;
        for (int t = 0; t < data.length; t++) {
            c += (data[t][col1] - mean1) * (data[t][col2] - mean2);
        }
        return c / data.length;
    }

    /**
     * <p>Returns the covariance between two columns of data in
     *  two multivariate arrays.</p>
     * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a>
     * </p>
     * 
     * @param data1 first multivariate array of data; first index is time, second is 
     *    variable number
     * @param data2 second multivariate array of data; first index is time, second is 
     *    variable number
     * @param col1 variable number 1 to compute the covariance to
     * @param col2 variable number 2 to compute the covariance to
     * @param mean1 mean of variable 1
     * @param mean2 mean of variable 2
     * @return the covariance
     */
    public static double covarianceTwoColumns(double[][] data1, double[][] data2, int col1, int col2, double mean1,
            double mean2) {
        double c = 0;
        for (int t = 0; t < data1.length; t++) {
            c += (data1[t][col1] - mean1) * (data2[t][col2] - mean2);
        }
        return c / data1.length;
    }

    public static double mean(int[] input) {
        return sum(input) / (double) input.length;
    }

    public static double mean(double[] input) {
        return sum(input) / input.length;
    }

    public static double mean(double[] input, int startIndex, int length) {
        return sum(input, startIndex, length) / length;
    }

    public static double mean(double[][] input) {
        return sum(input) / (input.length * input[0].length);
    }

    /**
     * Compute the mean along the given column 
     * 
     * @param input
     * @param column
     * @return
     */
    public static double mean(double[][] input, int column) {
        return sum(input, column) / input.length;
    }

    /**
     * Return an array of the sums for each column in the 2D input
     * 
     * @param input
     * @return
     */
    public static double[] sums(double[][] input) {
        double[] theSums = new double[input[0].length];
        for (int r = 0; r < input.length; r++) {
            for (int c = 0; c < input[r].length; c++) {
                theSums[c] += input[r][c];
            }
        }
        return theSums;
    }

    /**
     * Return an array of the sums for each column in the 2D input
     * 
     * @param input
     * @param startRow which row to start from
     * @param length how many rows to take the sum over
     * @return
     */
    public static double[] sums(double[][] input, int startRow, int length) {
        double[] theSums = new double[input[0].length];
        for (int r = startRow; r < startRow + length; r++) {
            for (int c = 0; c < input[r].length; c++) {
                theSums[c] += input[r][c];
            }
        }
        return theSums;
    }

    public static double sum(double[] input) {
        double total = 0;
        for (int i = 0; i < input.length; i++) {
            total += input[i];
        }
        return total;
    }

    public static double sum(double[] input, int startIndex, int length) {
        double total = 0;
        for (int i = startIndex; i < startIndex + length; i++) {
            total += input[i];
        }
        return total;
    }

    public static double sum(double[][] input) {
        double total = 0;
        for (int i = 0; i < input.length; i++) {
            for (int j = 0; j < input[i].length; j++) {
                total += input[i][j];
            }
        }
        return total;
    }

    public static double sum(double[][] input, int column) {
        double total = 0;
        for (int i = 0; i < input.length; i++) {
            total += input[i][column];
        }
        return total;
    }

    public static int sum(int[] input) {
        int total = 0;
        for (int i = 0; i < input.length; i++) {
            total += input[i];
        }
        return total;
    }

    public static int sum(int[][] input) {
        int total = 0;
        for (int i = 0; i < input.length; i++) {
            for (int j = 0; j < input[i].length; j++) {
                total += input[i][j];
            }
        }
        return total;
    }
}

Related

  1. covarianceMatrix(double[][] data)