Here you can find the source of covarianceMatrix(double[][] data)
Parameter | Description |
---|---|
data | multivariate array of data; first index is time, second is variable number |
public static double[][] covarianceMatrix(double[][] data)
//package com.java2s; /******************************************************************************* * Copyright 2013 Karlsruhe Institute of Technology. This Work has been partially supported by the EIT ICT Labs funded research project Towards a Mobile Cloud (activity CLD 12206). * // w ww. jav a 2 s .c o m * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ public class Main { /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set * * @param data multivariate array of data; first index is time, second is * variable number * @return covariance matrix */ public static double[][] covarianceMatrix(double[][] data) { return covarianceMatrix(data, means(data)); } /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set * * @param data multivariate array of data; first index is time, second is * variable number * @param means the mean of each variable (column) in the data * @return covariance matrix */ public static double[][] covarianceMatrix(double[][] data, double[] means) { int numVariables = data[0].length; double[][] covariances = new double[numVariables][numVariables]; for (int r = 0; r < numVariables; r++) { for (int c = r; c < numVariables; c++) { // Compute the covariance between variable r and c: covariances[r][c] = covarianceTwoColumns(data, r, c, means[r], means[c]); // And of course this is symmetric between c and r: covariances[c][r] = covariances[r][c]; } } return covariances; } /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set, which consists of two separate * multivariate vectors. * * @param data1 multivariate array of data; first index is time, second is * variable number * @param data2 a second multivariate array of data, which can be though * of as extensions of rows of the first. * @return covariance matrix, where the columns of dat1 are numbered * first, and the columns of data2 after that. */ public static double[][] covarianceMatrix(double[][] data1, double[][] data2) { return covarianceMatrix(data1, data2, 0); } /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set, which consists of two separate * multivariate vectors. * * @param data1 multivariate array of data; first index is time, second is * variable number * @param data2 a second multivariate array of data, which can be though * of as extensions of rows of the first. * @param delay compute the lagged covariance of the given delay from * data1 to data2 (assumes delay >= 0); i.e. compute correlation * between data1[x] and data2[x+delay]. * @return covariance matrix, where the columns of dat1 are numbered * first, and the columns of data2 after that. */ public static double[][] covarianceMatrix(double[][] data1, double[][] data2, int delay) { if (delay > 0) { // Trim out the last delay rows of data1, and the // first delay rows of data2: double[][] data1Trimmed = new double[data1.length - delay][]; double[][] data2Trimmed = new double[data2.length - delay][]; for (int x = 0; x < data1.length - delay; x++) { data1Trimmed[x] = data1[x]; data2Trimmed[x] = data2[x + delay]; } // Just overwrite our local copy of the pointers to the // original data data1 = data1Trimmed; data2 = data2Trimmed; } int numVariables1 = data1[0].length; int numVariables2 = data2[0].length; int numVariables = numVariables1 + numVariables2; double[][] covariances = new double[numVariables][numVariables]; // Compute means of each variable once up front to save time double[] means1 = new double[numVariables1]; double[] means2 = new double[numVariables2]; for (int r = 0; r < numVariables1; r++) { means1[r] = mean(data1, r); } for (int r = 0; r < numVariables2; r++) { means2[r] = mean(data2, r); } // Now compute the covariances: for (int r = 0; r < numVariables1; r++) { // Compute the covariances internal to data1: for (int c = r; c < numVariables1; c++) { // Compute the covariance between variable r and c: covariances[r][c] = covarianceTwoColumns(data1, r, c, means1[r], means1[c]); // And of course this is symmetric between c and r: covariances[c][r] = covariances[r][c]; } // Compute the covariances between data1 and data2 for (int c = 0; c < numVariables2; c++) { // Compute the covariance between variable r and c: covariances[r][numVariables1 + c] = covarianceTwoColumns(data1, data2, r, c, means1[r], means2[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + c][r] = covariances[r][numVariables1 + c]; } } // Now compute the covariances internal to data2: for (int r = 0; r < numVariables2; r++) { for (int c = r; c < numVariables2; c++) { // Compute the covariance between variable r and c: covariances[numVariables1 + r][numVariables1 + c] = covarianceTwoColumns(data2, r, c, means2[r], means2[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + c][numVariables1 + r] = covariances[numVariables1 + r][numVariables1 + c]; } } return covariances; } /** * Return an array of the means of each column in the 2D input * * @param input * @return */ public static double[] means(double[][] input) { double[] theMeans = sums(input); for (int i = 0; i < theMeans.length; i++) { theMeans[i] = theMeans[i] / input.length; } return theMeans; } /** * Return an array of the means of each column in the 2D input * * @param input * @param startRow which row to start from * @param length how many rows to take the mean over * @return */ public static double[] means(double[][] input, int startRow, int length) { double[] theMeans = sums(input, startRow, length); for (int i = 0; i < theMeans.length; i++) { theMeans[i] = theMeans[i] / length; } return theMeans; } /** * <p>Returns the covariance between two columns of data in * a multivariate array.</p> * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a> * </p> * * @param data multivariate array of data; first index is time, second is * variable number * @param col1 variable number 1 to compute the covariance to * @param col2 variable number 2 to compute the covariance to * @return the covariance */ public static double covarianceTwoColumns(double[][] data, int col1, int col2) { double mean1 = mean(data, col1); double mean2 = mean(data, col2); return covarianceTwoColumns(data, col1, col2, mean1, mean2); } /** * <p>Returns the covariance between two columns of data in * a multivariate array.</p> * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a> * </p> * * @param data multivariate array of data; first index is time, second is * variable number * @param col1 variable number 1 to compute the covariance to * @param col2 variable number 2 to compute the covariance to * @param mean1 mean of variable 1 * @param mean2 mean of variable 2 * @return the covariance */ public static double covarianceTwoColumns(double[][] data, int col1, int col2, double mean1, double mean2) { double c = 0; for (int t = 0; t < data.length; t++) { c += (data[t][col1] - mean1) * (data[t][col2] - mean2); } return c / data.length; } /** * <p>Returns the covariance between two columns of data in * two multivariate arrays.</p> * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a> * </p> * * @param data1 first multivariate array of data; first index is time, second is * variable number * @param data2 second multivariate array of data; first index is time, second is * variable number * @param col1 variable number 1 to compute the covariance to * @param col2 variable number 2 to compute the covariance to * @param mean1 mean of variable 1 * @param mean2 mean of variable 2 * @return the covariance */ public static double covarianceTwoColumns(double[][] data1, double[][] data2, int col1, int col2, double mean1, double mean2) { double c = 0; for (int t = 0; t < data1.length; t++) { c += (data1[t][col1] - mean1) * (data2[t][col2] - mean2); } return c / data1.length; } public static double mean(int[] input) { return sum(input) / (double) input.length; } public static double mean(double[] input) { return sum(input) / input.length; } public static double mean(double[] input, int startIndex, int length) { return sum(input, startIndex, length) / length; } public static double mean(double[][] input) { return sum(input) / (input.length * input[0].length); } /** * Compute the mean along the given column * * @param input * @param column * @return */ public static double mean(double[][] input, int column) { return sum(input, column) / input.length; } /** * Return an array of the sums for each column in the 2D input * * @param input * @return */ public static double[] sums(double[][] input) { double[] theSums = new double[input[0].length]; for (int r = 0; r < input.length; r++) { for (int c = 0; c < input[r].length; c++) { theSums[c] += input[r][c]; } } return theSums; } /** * Return an array of the sums for each column in the 2D input * * @param input * @param startRow which row to start from * @param length how many rows to take the sum over * @return */ public static double[] sums(double[][] input, int startRow, int length) { double[] theSums = new double[input[0].length]; for (int r = startRow; r < startRow + length; r++) { for (int c = 0; c < input[r].length; c++) { theSums[c] += input[r][c]; } } return theSums; } public static double sum(double[] input) { double total = 0; for (int i = 0; i < input.length; i++) { total += input[i]; } return total; } public static double sum(double[] input, int startIndex, int length) { double total = 0; for (int i = startIndex; i < startIndex + length; i++) { total += input[i]; } return total; } public static double sum(double[][] input) { double total = 0; for (int i = 0; i < input.length; i++) { for (int j = 0; j < input[i].length; j++) { total += input[i][j]; } } return total; } public static double sum(double[][] input, int column) { double total = 0; for (int i = 0; i < input.length; i++) { total += input[i][column]; } return total; } public static int sum(int[] input) { int total = 0; for (int i = 0; i < input.length; i++) { total += input[i]; } return total; } public static int sum(int[][] input) { int total = 0; for (int i = 0; i < input.length; i++) { for (int j = 0; j < input[i].length; j++) { total += input[i][j]; } } return total; } }