Java examples for java.lang:Math Matrix
Compute the covariance matrix between all column pairs (variables) in the multivariate data set
/*/*from ww w.jav a 2 s . c o m*/ * Java Information Dynamics Toolkit (JIDT) * Copyright (C) 2012, Joseph T. Lizier * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ //package com.java2s; public class Main { /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set * * @param data multivariate array of data; first index is time, second is * variable number * @return covariance matrix */ public static double[][] covarianceMatrix(double[][] data) { return covarianceMatrix(data, means(data)); } /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set * * @param data multivariate array of data; first index is time, second is * variable number * @param means the mean of each variable (column) in the data * @return covariance matrix */ public static double[][] covarianceMatrix(double[][] data, double[] means) { int numVariables = data[0].length; double[][] covariances = new double[numVariables][numVariables]; for (int r = 0; r < numVariables; r++) { for (int c = r; c < numVariables; c++) { // Compute the covariance between variable r and c: covariances[r][c] = covarianceTwoColumns(data, r, c, means[r], means[c]); // And of course this is symmetric between c and r: covariances[c][r] = covariances[r][c]; } } return covariances; } /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set, which consists of two separate * multivariate vectors. * * @param data1 multivariate array of data; first index is time, second is * variable number * @param data2 a second multivariate array of data, which can be though * of as extensions of rows of the first. * @return covariance matrix, where the columns of dat1 are numbered * first, and the columns of data2 after that. */ public static double[][] covarianceMatrix(double[][] data1, double[][] data2) { return covarianceMatrix(data1, data2, 0); } /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set, which consists of two separate * multivariate vectors. * * @param data1 multivariate array of data; first index is time, second is * variable number * @param data2 a second multivariate array of data, which can be thought * of as extensions of rows of the first. * @param delay compute the lagged covariance of the given delay from * data1 to data2 (assumes delay >= 0); i.e. compute correlation * between data1[x] and data2[x+delay]. * @return covariance matrix, where the columns of data1 are numbered * first, and the columns of data2 after that. */ public static double[][] covarianceMatrix(double[][] data1, double[][] data2, int delay) { if (delay > 0) { // Trim out the last delay rows of data1, and the // first delay rows of data2: double[][] data1Trimmed = new double[data1.length - delay][]; double[][] data2Trimmed = new double[data2.length - delay][]; for (int x = 0; x < data1.length - delay; x++) { data1Trimmed[x] = data1[x]; data2Trimmed[x] = data2[x + delay]; } // Just overwrite our local copy of the pointers to the // original data data1 = data1Trimmed; data2 = data2Trimmed; } int numVariables1 = data1[0].length; int numVariables2 = data2[0].length; int numVariables = numVariables1 + numVariables2; double[][] covariances = new double[numVariables][numVariables]; // Compute means of each variable once up front to save time double[] means1 = new double[numVariables1]; double[] means2 = new double[numVariables2]; for (int r = 0; r < numVariables1; r++) { means1[r] = mean(data1, r); } for (int r = 0; r < numVariables2; r++) { means2[r] = mean(data2, r); } // Now compute the covariances: for (int r = 0; r < numVariables1; r++) { // Compute the covariances internal to data1: for (int c = r; c < numVariables1; c++) { // Compute the covariance between variable r and c: covariances[r][c] = covarianceTwoColumns(data1, r, c, means1[r], means1[c]); // And of course this is symmetric between c and r: covariances[c][r] = covariances[r][c]; } // Compute the covariances between data1 and data2 for (int c = 0; c < numVariables2; c++) { // Compute the covariance between variable r and c: covariances[r][numVariables1 + c] = covarianceTwoColumns( data1, data2, r, c, means1[r], means2[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + c][r] = covariances[r][numVariables1 + c]; } } // Now compute the covariances internal to data2: for (int r = 0; r < numVariables2; r++) { for (int c = r; c < numVariables2; c++) { // Compute the covariance between variable r and c: covariances[numVariables1 + r][numVariables1 + c] = covarianceTwoColumns( data2, r, c, means2[r], means2[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + c][numVariables1 + r] = covariances[numVariables1 + r][numVariables1 + c]; } } return covariances; } /** * Compute the covariance matrix between all column pairs (variables) in the * multivariate data set, which consists of three separate * multivariate vectors. * * @param data1 multivariate array of data; first index is time, second is * variable number * @param data2 a second multivariate array of data, which can be thought * of as extensions of rows of the first. * @param data2 a third multivariate array of data, which can be thought * of as extensions of rows of the first and second. * @return covariance matrix, where the columns of data1 are numbered * first, the columns of data2 after that, and finally the columns * of data3. */ public static double[][] covarianceMatrix(double[][] data1, double[][] data2, double[][] data3) { int numVariables1 = data1[0].length; int numVariables2 = data2[0].length; int numVariables3 = data3[0].length; int numVariables = numVariables1 + numVariables2 + numVariables3; double[][] covariances = new double[numVariables][numVariables]; // Compute means of each variable once up front to save time double[] means1 = new double[numVariables1]; double[] means2 = new double[numVariables2]; double[] means3 = new double[numVariables3]; for (int r = 0; r < numVariables1; r++) { means1[r] = mean(data1, r); } for (int r = 0; r < numVariables2; r++) { means2[r] = mean(data2, r); } for (int r = 0; r < numVariables3; r++) { means3[r] = mean(data3, r); } // Now compute the covariances: for (int r = 0; r < numVariables1; r++) { // Compute the covariances internal to data1: for (int c = r; c < numVariables1; c++) { // Compute the covariance between variable r and c: covariances[r][c] = covarianceTwoColumns(data1, r, c, means1[r], means1[c]); // And of course this is symmetric between c and r: covariances[c][r] = covariances[r][c]; } // Compute the covariances between data1 and data2 for (int c = 0; c < numVariables2; c++) { // Compute the covariance between variable r and c: covariances[r][numVariables1 + c] = covarianceTwoColumns( data1, data2, r, c, means1[r], means2[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + c][r] = covariances[r][numVariables1 + c]; } // Compute the covariances between data1 and data3 for (int c = 0; c < numVariables3; c++) { // Compute the covariance between variable r and c: covariances[r][numVariables1 + numVariables2 + c] = covarianceTwoColumns( data1, data3, r, c, means1[r], means3[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + numVariables2 + c][r] = covariances[r][numVariables1 + numVariables2 + c]; } } // Compute the other covariances for data2 for (int r = 0; r < numVariables2; r++) { // Compute the covariances internal to data2: for (int c = r; c < numVariables2; c++) { // Compute the covariance between variable r and c: covariances[numVariables1 + r][numVariables1 + c] = covarianceTwoColumns( data2, r, c, means2[r], means2[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + c][numVariables1 + r] = covariances[numVariables1 + r][numVariables1 + c]; } // Compute the covariances between data2 and data3 for (int c = 0; c < numVariables3; c++) { // Compute the covariance between variable r and c: covariances[numVariables1 + r][numVariables1 + numVariables2 + c] = covarianceTwoColumns(data2, data3, r, c, means2[r], means3[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + numVariables2 + c][numVariables1 + r] = covariances[numVariables1 + r][numVariables1 + numVariables2 + c]; } } // Compute the internal covariances for data3 for (int r = 0; r < numVariables3; r++) { for (int c = r; c < numVariables3; c++) { // Compute the covariance between variable r and c: covariances[numVariables1 + numVariables2 + r][numVariables1 + numVariables2 + c] = covarianceTwoColumns(data3, r, c, means3[r], means3[c]); // And of course this is symmetric between c and r: covariances[numVariables1 + numVariables2 + c][numVariables1 + numVariables2 + r] = covariances[numVariables1 + numVariables2 + r][numVariables1 + numVariables2 + c]; } } return covariances; } /** * Return an array of the means of each column in the 2D input * * @param input * @return */ public static double[] means(double[][] input) { double[] theMeans = sums(input); for (int i = 0; i < theMeans.length; i++) { theMeans[i] = theMeans[i] / input.length; } return theMeans; } /** * Return an array of the means of each column in the 2D input * * @param input * @param startRow which row to start from * @param length how many rows to take the mean over * @return */ public static double[] means(double[][] input, int startRow, int length) { double[] theMeans = sums(input, startRow, length); for (int i = 0; i < theMeans.length; i++) { theMeans[i] = theMeans[i] / length; } return theMeans; } /** * <p>Returns the covariance between two columns of data in * a multivariate array.</p> * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a> * </p> * * @param data multivariate array of data; first index is time, second is * variable number * @param col1 variable number 1 to compute the covariance to * @param col2 variable number 2 to compute the covariance to * @return the covariance */ public static double covarianceTwoColumns(double[][] data, int col1, int col2) { double mean1 = mean(data, col1); double mean2 = mean(data, col2); return covarianceTwoColumns(data, col1, col2, mean1, mean2); } /** * <p>Returns the covariance between two columns of data in * a multivariate array.</p> * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a> * </p> * * @param data multivariate array of data; first index is time, second is * variable number * @param col1 variable number 1 to compute the covariance to * @param col2 variable number 2 to compute the covariance to * @param mean1 mean of variable 1 * @param mean2 mean of variable 2 * @return the covariance */ public static double covarianceTwoColumns(double[][] data, int col1, int col2, double mean1, double mean2) { double c = 0; for (int t = 0; t < data.length; t++) { c += (data[t][col1] - mean1) * (data[t][col2] - mean2); } return c / (double) (data.length - 1); } /** * <p>Returns the covariance between two columns of data in * two multivariate arrays.</p> * <p>See - <a href="http://mathworld.wolfram.com/Covariance.html">Mathworld</a> * </p> * * @param data1 first multivariate array of data; first index is time, second is * variable number * @param data2 second multivariate array of data; first index is time, second is * variable number * @param col1 variable number 1 to compute the covariance to * @param col2 variable number 2 to compute the covariance to * @param mean1 mean of variable 1 * @param mean2 mean of variable 2 * @return the covariance */ public static double covarianceTwoColumns(double[][] data1, double[][] data2, int col1, int col2, double mean1, double mean2) { double c = 0; for (int t = 0; t < data1.length; t++) { c += (data1[t][col1] - mean1) * (data2[t][col2] - mean2); } return c / (double) (data1.length - 1); } public static double mean(int[] input) { return sum(input) / (double) input.length; } public static double mean(double[] input) { return sum(input) / (double) input.length; } public static double mean(double[] input, int startIndex, int length) { return sum(input, startIndex, length) / (double) length; } public static double mean(double[][] input) { return sum(input) / (double) (input.length * input[0].length); } /** * Compute the mean along the given column * * @param input * @param column * @return */ public static double mean(double[][] input, int column) { return sum(input, column) / (double) input.length; } /** * Return an array of the sums for each column in the 2D input * * @param input * @return */ public static double[] sums(double[][] input) { double[] theSums = new double[input[0].length]; for (int r = 0; r < input.length; r++) { for (int c = 0; c < input[r].length; c++) { theSums[c] += input[r][c]; } } return theSums; } /** * Return an array of the sums for each column in the 2D input * * @param input * @param startRow which row to start from * @param length how many rows to take the sum over * @return */ public static double[] sums(double[][] input, int startRow, int length) { double[] theSums = new double[input[0].length]; for (int r = startRow; r < startRow + length; r++) { for (int c = 0; c < input[r].length; c++) { theSums[c] += input[r][c]; } } return theSums; } public static double sum(double[] input) { double total = 0; for (int i = 0; i < input.length; i++) { total += input[i]; } return total; } public static double sum(double[] input, int startIndex, int length) { double total = 0; for (int i = startIndex; i < startIndex + length; i++) { total += input[i]; } return total; } public static double sum(double[][] input) { double total = 0; for (int i = 0; i < input.length; i++) { for (int j = 0; j < input[i].length; j++) { total += input[i][j]; } } return total; } public static double sum(double[][] input, int column) { double total = 0; for (int i = 0; i < input.length; i++) { total += input[i][column]; } return total; } public static int sum(int[] input) { int total = 0; for (int i = 0; i < input.length; i++) { total += input[i]; } return total; } public static int sum(int[][] input) { int total = 0; for (int i = 0; i < input.length; i++) { for (int j = 0; j < input[i].length; j++) { total += input[i][j]; } } return total; } }