Java tutorial
/////////////////////////////////////////////////////////////////////////////// // For information as to what this class does, see the Javadoc, below. // // Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, // // 2007, 2008, 2009, 2010, 2014, 2015 by Peter Spirtes, Richard Scheines, Joseph // // Ramsey, and Clark Glymour. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // /////////////////////////////////////////////////////////////////////////////// package edu.cmu.tetrad.data; //import cern.colt.matrix.DoubleMatrix2D; import edu.cmu.tetrad.graph.Node; import edu.cmu.tetrad.stat.Variance; import edu.cmu.tetrad.util.*; import org.apache.commons.math3.linear.RealMatrix; import java.io.IOException; import java.io.ObjectInputStream; import java.text.NumberFormat; import java.util.*; import java.util.concurrent.RecursiveTask; /** * Stores a covariance matrix together with variable names and sample size, * intended as a representation of a data set. When constructed from a * continuous data set, the matrix is not checked for positive definiteness; * however, when a covariance matrix is supplied, its positive definiteness is * always checked. If the sample size is less than the number of variables, the * positive definiteness is "spot-checked"--that is, checked for various * submatrices. * * @author Joseph Ramsey jdramsey@andrew.cmu.edu * @see edu.cmu.tetrad.data.CorrelationMatrix */ public class CovarianceMatrix implements ICovarianceMatrix { static final long serialVersionUID = 23L; /** * The name of the covariance matrix. * * @serial May be null. */ private String name; /** * The variables (in order) for this covariance matrix. * * @serial Cannot be null. */ private List<Node> variables; /** * The size of the sample from which this covariance matrix was calculated. * * @serial Range > 0. */ private int sampleSize; /** * Stored matrix data. Should be square. This may be set by derived classes, * but it must always be set to a legitimate covariance matrix. * * @serial Cannot be null. Must be symmetric and positive definite. */ private TetradMatrix matrix; /** * The list of selected variables. * * @serial Cannot be null. */ private Set<Node> selectedVariables = new HashSet<>(); /** * The knowledge for this data. * * @serial Cannot be null. */ private IKnowledge knowledge = new Knowledge2(); private double[][] vectors = null; //=============================CONSTRUCTORS=========================// /** * Constructs a new covariance matrix from the given data set. * * @throws IllegalArgumentException if this is not a continuous data set. */ public CovarianceMatrix(DataSet dataSet) { if (!dataSet.isContinuous()) { throw new IllegalArgumentException("Not a continuous data set."); } this.matrix = new TetradMatrix(dataSet.getNumColumns(), dataSet.getNumColumns()); this.variables = Collections.unmodifiableList(dataSet.getVariables()); this.sampleSize = dataSet.getNumRows(); if (dataSet instanceof BoxDataSet) { DataBox box = ((BoxDataSet) dataSet).getDataBox().copy(); if (box instanceof VerticalDoubleDataBox) { if (!dataSet.getVariables().equals(variables)) throw new IllegalArgumentException(); vectors = ((VerticalDoubleDataBox) box).getVariableVectors(); // final TetradMatrix doubleData = dataSet.getDoubleData(); // DataUtils.remean(doubleData, means); } } if (vectors == null) { final TetradMatrix doubleData = dataSet.getDoubleData().copy(); TetradVector means = DataUtils.means(doubleData); DataUtils.demean(doubleData, means); final RealMatrix realMatrix = doubleData.getRealMatrix(); vectors = new double[variables.size()][]; for (int i = 0; i < variables.size(); i++) { vectors[i] = realMatrix.getColumnVector(i).toArray(); } } TetradVector means = DataUtils.means(vectors); int NTHREADS = Runtime.getRuntime().availableProcessors() * 10; int _chunk = variables.size() / NTHREADS + 1; int minChunk = 100; final int chunk = _chunk < minChunk ? minChunk : _chunk; class VarianceTask extends RecursiveTask<Boolean> { private int chunk; private int from; private int to; public VarianceTask(int chunk, int from, int to) { this.chunk = chunk; this.from = from; this.to = to; } @Override protected Boolean compute() { if (to - from <= chunk) { for (int i = from; i < to; i++) { double d = 0.0D; int count = 0; double[] v1 = vectors[i]; for (int k = 0; k < sampleSize; ++k) { if (Double.isNaN(v1[k])) { continue; } d += v1[k] * v1[k]; count++; } double v = d; v /= (count - 1); matrix.set(i, i, v); if (v == 0) { System.out.println("Zero variance! " + variables.get(i)); } } return true; } else { int mid = (to + from) / 2; VarianceTask left = new VarianceTask(chunk, from, mid); VarianceTask right = new VarianceTask(chunk, mid, to); left.fork(); right.compute(); left.join(); return true; } } } class RestOfThemTask extends RecursiveTask<Boolean> { private int chunk; private int from; private int to; public RestOfThemTask(int chunk, int from, int to) { this.chunk = chunk; this.from = from; this.to = to; } @Override protected Boolean compute() { if (to - from <= chunk) { for (int i = from; i < to; i++) { for (int j = 0; j < i; j++) { double d = 0.0D; double[] v1 = vectors[i]; double[] v2 = vectors[j]; int count = 0; for (int k = 0; k < sampleSize; k++) { if (Double.isNaN(v1[k])) continue; if (Double.isNaN(v2[k])) continue; d += v1[k] * v2[k]; count++; } double v = d; v /= (count - 1); matrix.set(i, j, v); matrix.set(j, i, v); } } return true; } else { int mid = (to + from) / 2; RestOfThemTask left = new RestOfThemTask(chunk, from, mid); RestOfThemTask right = new RestOfThemTask(chunk, mid, to); left.fork(); right.compute(); left.join(); return true; } } } VarianceTask task = new VarianceTask(chunk, 0, variables.size()); ForkJoinPoolInstance.getInstance().getPool().invoke(task); RestOfThemTask task2 = new RestOfThemTask(chunk, 0, variables.size()); ForkJoinPoolInstance.getInstance().getPool().invoke(task2); DataUtils.demean(vectors, means); this.variables = Collections.unmodifiableList(dataSet.getVariables()); this.sampleSize = dataSet.getNumRows(); } /** * Protected constructor to construct a new covariance matrix using the * supplied continuous variables and the the given symmetric, positive * definite matrix and sample size. The number of variables must equal the * dimension of the array. * * @param variables the list of variables (in order) for the covariance * matrix. * @param matrix an square array of containing covariances. * @param sampleSize the sample size of the data for these covariances. * @throws IllegalArgumentException if the given matrix is not symmetric (to * a tolerance of 1.e-5) and positive * definite, if the number of variables * does not equal the dimension of m, or if * the sample size is not positive. */ public CovarianceMatrix(List<Node> variables, TetradMatrix matrix, int sampleSize) { if (variables.size() != matrix.rows() && variables.size() != matrix.columns()) { throw new IllegalArgumentException("# variables not equal to matrix dimension."); } this.variables = Collections.unmodifiableList(variables); this.sampleSize = sampleSize; this.matrix = matrix; checkMatrix(); } /** * Copy constructor. */ public CovarianceMatrix(CovarianceMatrix covMatrix) { this(covMatrix.variables, covMatrix.matrix, covMatrix.sampleSize); } public CovarianceMatrix(ICovarianceMatrix covMatrix) { this(covMatrix.getVariables(), covMatrix.getMatrix(), covMatrix.getSampleSize()); } /** * Generates a simple exemplar of this class to test serialization. */ public static ICovarianceMatrix serializableInstance() { List<Node> variables = new ArrayList<>(); Node x = new ContinuousVariable("X"); variables.add(x); TetradMatrix matrix = TetradAlgebra.identity(1); return new CovarianceMatrix(variables, matrix, 100); } //============================PUBLIC METHODS=========================// /** * @return the list of variables (unmodifiable). */ public final List<Node> getVariables() { return this.variables; } /** * @return the variable names, in order. */ public final List<String> getVariableNames() { List<String> names = new ArrayList<>(); for (int i = 0; i < getVariables().size(); i++) { Node variable = getVariables().get(i); names.add(variable.getName()); } return names; } /** * @return the variable name at the given index. */ public final String getVariableName(int index) { if (index >= getVariables().size()) { throw new IllegalArgumentException("Index out of range: " + index); } Node variable = getVariables().get(index); return variable.getName(); } /** * @return the dimension of the covariance matrix. */ public final int getDimension() { return variables.size(); } /** * The size of the sample used to calculated this covariance matrix. * * @return The sample size (> 0). */ public final int getSampleSize() { return this.sampleSize; } /** * Gets the name of the covariance matrix. */ public final String getName() { return this.name; } /** * Sets the name of the covariance matrix. */ public final void setName(String name) { this.name = name; } /** * @return the knowledge associated with this data. */ public final IKnowledge getKnowledge() { return this.knowledge.copy(); } /** * Associates knowledge with this data. */ public final void setKnowledge(IKnowledge knowledge) { if (knowledge == null) { throw new NullPointerException(); } this.knowledge = knowledge.copy(); } /** * @return a submatrix of the covariance matrix with variables in the * given order. */ public final ICovarianceMatrix getSubmatrix(int[] indices) { List<Node> submatrixVars = new LinkedList<>(); for (int indice : indices) { submatrixVars.add(variables.get(indice)); } TetradMatrix cov = matrix.getSelection(indices, indices); return new CovarianceMatrix(submatrixVars, cov, getSampleSize()); } public final ICovarianceMatrix getSubmatrix(List<String> submatrixVarNames) { String[] varNames = new String[submatrixVarNames.size()]; for (int i = 0; i < submatrixVarNames.size(); i++) { varNames[i] = submatrixVarNames.get(i); } return getSubmatrix(varNames); } /** * @return a submatrix of this matrix, with variables in the given * order. */ public final CovarianceMatrix getSubmatrix(String[] submatrixVarNames) { List<Node> submatrixVars = new LinkedList<>(); for (String submatrixVarName : submatrixVarNames) { submatrixVars.add(getVariable(submatrixVarName)); } if (!getVariables().containsAll(submatrixVars)) { throw new IllegalArgumentException("The variables in the submatrix " + "must be in the original matrix: original==" + getVariables() + ", sub==" + submatrixVars); } for (int i = 0; i < submatrixVars.size(); i++) { if (submatrixVars.get(i) == null) { throw new NullPointerException("The variable name at index " + i + " is null."); } } int[] indices = new int[submatrixVars.size()]; for (int i = 0; i < indices.length; i++) { indices[i] = getVariables().indexOf(submatrixVars.get(i)); } TetradMatrix cov = matrix.getSelection(indices, indices); return new CovarianceMatrix(submatrixVars, cov, getSampleSize()); } /** * @return the value of element (i,j) in the matrix */ public final double getValue(int i, int j) { return matrix.get(i, j); } public void setMatrix(TetradMatrix matrix) { this.matrix = matrix; checkMatrix(); } public final void setSampleSize(int sampleSize) { if (sampleSize <= 0) { throw new IllegalArgumentException("Sample size must be > 0."); } this.sampleSize = sampleSize; } /** * @return the size of the square matrix. */ public final int getSize() { return matrix.rows(); } /** * @return a copy of the covariance matrix. */ public final TetradMatrix getMatrix() { return matrix; } public final void select(Node variable) { if (variables.contains(variable)) { getSelectedVariables().add(variable); } } public final void clearSelection() { getSelectedVariables().clear(); } public final boolean isSelected(Node variable) { if (variable == null) { throw new NullPointerException("Null variable. Try again."); } return getSelectedVariables().contains(variable); } public final List<String> getSelectedVariableNames() { List<String> selectedVariableNames = new LinkedList<>(); for (Node variable : selectedVariables) { selectedVariableNames.add(variable.getName()); } return selectedVariableNames; } /** * Prints out the matrix */ public final String toString() { NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat(); StringBuilder buf = new StringBuilder(); int numVars = getVariableNames().size(); buf.append(getSampleSize()).append("\n"); for (int i = 0; i < numVars; i++) { String name = getVariableNames().get(i); buf.append(name).append("\t"); } buf.append("\n"); for (int j = 0; j < numVars; j++) { for (int i = 0; i <= j; i++) { buf.append(nf.format(getValue(i, j))).append("\t"); } buf.append("\n"); } // buf.append("\nCovariance matrix:"); // buf.append("\n\tVariables = ").append(getVariables()); // buf.append("\n\tSample size = ").append(getSampleSize()); // buf.append("\n"); // buf.append(MatrixUtils.toString(matrixC.toArray())); // // if (getKnowledge() != null && !getKnowledge().isEmpty()) { // buf.append(getKnowledge()); // } return buf.toString(); } public void setVariables(List<Node> variables) { if (variables.size() != this.variables.size()) throw new IllegalArgumentException("Wrong # of variables."); for (int i = 0; i < variables.size(); i++) { if (!variables.get(i).getName().equals(variables.get(i).getName())) { throw new IllegalArgumentException("Variable in index " + (i + 1) + " does not have the same name " + "as the variable being substituted for it."); } this.variables = variables; } } @Override public TetradMatrix getSelection(int[] rows, int[] cols) { return matrix.getSelection(rows, cols); } //========================PRIVATE METHODS============================// public Node getVariable(String name) { for (int i = 0; i < getVariables().size(); i++) { Node variable = getVariables().get(i); if (name.equals(variable.getName())) { return variable; } } return null; } @Override public void setValue(int i, int j, double v) { if (i == j) { matrix.set(i, j, v); } else { matrix.set(i, j, v); matrix.set(j, i, v); } } @Override public void removeVariables(List<String> remaining) { ICovarianceMatrix cov = getSubmatrix(remaining); this.matrix = cov.getMatrix(); this.variables = cov.getVariables(); clearSelection(); } private Set<Node> getSelectedVariables() { return selectedVariables; } /** * Checks the sample size, variable, and matrix information. */ private void checkMatrix() { for (Node variable : variables) { if (variable == null) { throw new NullPointerException(); } } if (sampleSize < 1) { throw new IllegalArgumentException("Sample size must be at least 1."); } for (int i = 0; i < matrix.rows(); i++) { for (int j = 0; j < matrix.columns(); j++) { if (Double.isNaN(matrix.get(i, j))) { throw new IllegalArgumentException("Please remove or impute missing values."); } } } } /** * Adds semantic checks to the default deserialization method. This method * must have the standard signature for a readObject method, and the body of * the method must begin with "s.defaultReadObject();". Other than that, any * semantic checks can be specified and do not need to stay the same from * version to version. A readObject method of this form may be added to any * class, even if Tetrad sessions were previously saved out using a version * of the class that didn't include it. (That's what the * "s.defaultReadObject();" is for. See J. Bloch, Effective Java, for help. * * @throws java.io.IOException * @throws ClassNotFoundException */ private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException { s.defaultReadObject(); if (getVariables() == null) { throw new NullPointerException(); } // if (matrixC != null) { // matrix = new TetradMatrix(matrixC.toArray()); // matrixC = null; // } if (knowledge == null) { throw new NullPointerException(); } if (sampleSize < -1) { throw new IllegalStateException(); } if (selectedVariables == null) { selectedVariables = new HashSet<>(); } } }