edu.cmu.tetrad.data.CovarianceMatrixOnTheFly.java Source code

Java tutorial

Introduction

Here is the source code for edu.cmu.tetrad.data.CovarianceMatrixOnTheFly.java

Source

///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015 by Peter Spirtes, Richard Scheines, Joseph   //
// Ramsey, and Clark Glymour.                                                //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetrad.data;

import cern.colt.matrix.DoubleMatrix2D;
import edu.cmu.tetrad.graph.Node;
import edu.cmu.tetrad.util.*;
import org.apache.commons.math3.linear.RealMatrix;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.text.NumberFormat;
import java.util.*;
import java.util.concurrent.RecursiveTask;

/**
 * Stores a covariance matrix together with variable names and sample size,
 * intended as a representation of a data set. When constructed from a
 * continuous data set, the matrix is not checked for positive definiteness;
 * however, when a covariance matrix is supplied, its positive definiteness is
 * always checked. If the sample size is less than the number of variables, the
 * positive definiteness is "spot-checked"--that is, checked for various
 * submatrices.
 *
 * @author Joseph Ramsey jdramsey@andrew.cmu.edu
 * @see CorrelationMatrix
 */
public class CovarianceMatrixOnTheFly implements ICovarianceMatrix {
    static final long serialVersionUID = 23L;

    /**
     * The name of the covariance matrix.
     *
     * @serial May be null.
     */
    private String name;

    /**
     * The variables (in order) for this covariance matrix.
     *
     * @serial Cannot be null.
     */
    private List<Node> variables;

    /**
     * The size of the sample from which this covariance matrix was calculated.
     *
     * @serial Range > 0.
     */
    private int sampleSize;

    /**
     * Stored matrix data. Should be square. This may be set by derived classes,
     * but it must always be set to a legitimate covariance matrix.
     *
     * @serial Cannot be null. Must be symmetric and positive definite.
     */
    private TetradMatrix matrix;

    /**
     * @serial Do not remove this field; it is needed for serialization.
     */
    private DoubleMatrix2D matrixC;

    /**
     * The list of selected variables.
     *
     * @serial Cannot be null.
     */
    private Set<Node> selectedVariables = new HashSet<Node>();

    /**
     * The knowledge for this data.
     *
     * @serial Cannot be null.
     */
    private IKnowledge knowledge = new Knowledge2();

    private double[][] vectors = null;

    private double[] variances;

    private int NTHREADS = Runtime.getRuntime().availableProcessors() * 10;
    private final int minChunk = 100;

    //=============================CONSTRUCTORS=========================//

    /**
     * Constructs a new covariance matrix from the given data set.
     *
     * @throws IllegalArgumentException if this is not a continuous data set.
     */
    public CovarianceMatrixOnTheFly(DataSet dataSet) {
        if (!dataSet.isContinuous()) {
            throw new IllegalArgumentException("Not a continuous data set.");
        }

        this.variables = Collections.unmodifiableList(dataSet.getVariables());
        this.sampleSize = dataSet.getNumRows();

        if (dataSet instanceof BoxDataSet) {

            DataBox box = ((BoxDataSet) dataSet).getDataBox();

            if (box instanceof VerticalDoubleDataBox) {
                if (!dataSet.getVariables().equals(variables))
                    throw new IllegalArgumentException();

                vectors = ((VerticalDoubleDataBox) box).getVariableVectors();

                //                final TetradMatrix doubleData = dataSet.getDoubleData();
                TetradVector means = DataUtils.means(vectors);
                DataUtils.demean(vectors, means);

                //                DataUtils.remean(doubleData, means);
            }

        }

        if (vectors == null) {
            final TetradMatrix doubleData = dataSet.getDoubleData();
            TetradVector means = DataUtils.means(doubleData);
            DataUtils.demean(doubleData, means);

            final RealMatrix realMatrix = doubleData.getRealMatrix();

            vectors = new double[variables.size()][];

            for (int i = 0; i < variables.size(); i++) {
                vectors[i] = realMatrix.getColumnVector(i).toArray();
            }

            DataUtils.remean(doubleData, means);
        }

        this.variances = new double[variables.size()];

        class VarianceTask extends RecursiveTask<Boolean> {
            private int chunk;
            private int from;
            private int to;

            public VarianceTask(int chunk, int from, int to) {
                this.chunk = chunk;
                this.from = from;
                this.to = to;
            }

            @Override
            protected Boolean compute() {
                if (to - from <= chunk) {
                    for (int i = from; i < to; i++) {
                        double d = 0.0D;

                        double[] v1 = vectors[i];

                        for (int k = 0; k < sampleSize; ++k) {
                            d += v1[k] * v1[k];
                        }

                        double v = d;
                        v /= sampleSize;

                        variances[i] = v;

                        if (v == 0)
                            System.out.println("Zero variance! " + variables.get(i));
                    }

                    return true;
                } else {
                    final int numIntervals = 4;

                    int step = (to - from) / numIntervals + 1;

                    List<VarianceTask> tasks = new ArrayList<VarianceTask>();

                    for (int i = 0; i < numIntervals; i++) {
                        tasks.add(new VarianceTask(chunk, from + i * step, Math.min(from + (i + 1) * step, to)));
                    }

                    invokeAll(tasks);

                    return true;
                }
            }
        }

        int _chunk = variables.size() / NTHREADS + 1;
        final int chunk = _chunk < minChunk ? minChunk : _chunk;

        ForkJoinPoolInstance.getInstance().getPool().invoke(new VarianceTask(chunk, 0, variables.size()));

        System.out.println("Done with variances.");

    }

    /**
     * Protected constructor to construct a new covariance matrix using the
     * supplied continuous variables and the the given symmetric, positive
     * definite matrix and sample size. The number of variables must equal the
     * dimension of the array.
     *
     * @param variables  the list of variables (in order) for the covariance
     *                   matrix.
     * @param matrix     an square array of containing covariances.
     * @param sampleSize the sample size of the data for these covariances.
     * @throws IllegalArgumentException if the given matrix is not symmetric (to
     *                                  a tolerance of 1.e-5) and positive
     *                                  definite, if the number of variables
     *                                  does not equal the dimension of m, or if
     *                                  the sample size is not positive.
     */
    public CovarianceMatrixOnTheFly(List<Node> variables, TetradMatrix matrix, int sampleSize) {
        throw new UnsupportedOperationException();
    }

    /**
     * Copy constructor.
     */
    public CovarianceMatrixOnTheFly(CovarianceMatrixOnTheFly covMatrix) {
        this(covMatrix.variables, covMatrix.matrix, covMatrix.sampleSize);
    }

    public CovarianceMatrixOnTheFly(ICovarianceMatrix covMatrix) {
        this(covMatrix.getVariables(), covMatrix.getMatrix(), covMatrix.getSampleSize());
    }

    /**
     * Generates a simple exemplar of this class to test serialization.
     *
     * @see edu.cmu.TestSerialization
     * @see edu.cmu.tetradapp.util.TetradSerializableUtils
     */
    public static ICovarianceMatrix serializableInstance() {
        List<Node> variables = new ArrayList<Node>();
        Node x = new ContinuousVariable("X");
        variables.add(x);
        TetradMatrix matrix = TetradAlgebra.identity(1);
        return new CovarianceMatrix(variables, matrix, 100); // TODO make one that works for CovarianceMatrixOnTheFly
    }

    //============================PUBLIC METHODS=========================//

    /**
     * Returns the list of variables (unmodifiable).
     */
    public final List<Node> getVariables() {
        return this.variables;
    }

    /**
     * Returns the variable names, in order.
     */
    public final List<String> getVariableNames() {
        List<String> names = new ArrayList<String>();

        for (int i = 0; i < getVariables().size(); i++) {
            Node variable = getVariables().get(i);
            names.add(variable.getName());
        }

        return names;
    }

    /**
     * Returns the variable name at the given index.
     */
    public final String getVariableName(int index) {
        if (index >= getVariables().size()) {
            throw new IllegalArgumentException("Index out of range: " + index);
        }

        Node variable = getVariables().get(index);
        return variable.getName();
    }

    /**
     * Returns the dimension of the covariance matrix.
     */
    public final int getDimension() {
        return variables.size();
    }

    /**
     * The size of the sample used to calculated this covariance matrix.
     *
     * @return The sample size (> 0).
     */
    public final int getSampleSize() {
        return this.sampleSize;
    }

    /**
     * Gets the name of the covariance matrix.
     */
    public final String getName() {
        return this.name;
    }

    /**
     * Sets the name of the covariance matrix.
     */
    public final void setName(String name) {
        this.name = name;
    }

    /**
     * Returns the knowledge associated with this data.
     */
    public final IKnowledge getKnowledge() {
        return this.knowledge.copy();
    }

    /**
     * Associates knowledge with this data.
     */
    public final void setKnowledge(IKnowledge knowledge) {
        if (knowledge == null) {
            throw new NullPointerException();
        }

        this.knowledge = knowledge.copy();
    }

    /**
     * Returns a submatrix of the covariance matrix with variables in the
     * given order.
     */
    public final ICovarianceMatrix getSubmatrix(int[] indices) {
        List<Node> submatrixVars = new LinkedList<Node>();

        for (int indice : indices) {
            submatrixVars.add(variables.get(indice));
        }

        TetradMatrix cov = matrix.getSelection(indices, indices);
        return new CovarianceMatrixOnTheFly(submatrixVars, cov, getSampleSize());
    }

    public final ICovarianceMatrix getSubmatrix(List<String> submatrixVarNames) {
        String[] varNames = new String[submatrixVarNames.size()];

        for (int i = 0; i < submatrixVarNames.size(); i++) {
            varNames[i] = submatrixVarNames.get(i);
        }

        return getSubmatrix(varNames);
    }

    /**
     * Returns a submatrix of this matrix, with variables in the given
     * order.
     */
    public final CovarianceMatrixOnTheFly getSubmatrix(String[] submatrixVarNames) {
        List<Node> submatrixVars = new LinkedList<Node>();

        for (String submatrixVarName : submatrixVarNames) {
            submatrixVars.add(getVariable(submatrixVarName));
        }

        if (!getVariables().containsAll(submatrixVars)) {
            throw new IllegalArgumentException("The variables in the submatrix "
                    + "must be in the original matrix: original==" + getVariables() + ", sub==" + submatrixVars);
        }

        for (int i = 0; i < submatrixVars.size(); i++) {
            if (submatrixVars.get(i) == null) {
                throw new NullPointerException("The variable name at index " + i + " is null.");
            }
        }

        int[] indices = new int[submatrixVars.size()];
        for (int i = 0; i < indices.length; i++) {
            indices[i] = getVariables().indexOf(submatrixVars.get(i));
        }

        TetradMatrix cov = matrix.getSelection(indices, indices);
        return new CovarianceMatrixOnTheFly(submatrixVars, cov, getSampleSize());
    }

    /**
     * Returns the value of element (i,j) in the matrix
     */
    public final double getValue(int i, int j) {
        if (i == j) {
            return variances[i];
        }

        double d = 0.0D;

        double[] v1 = vectors[i];
        double[] v2 = vectors[j];

        for (int k = 0; k < sampleSize; ++k) {
            d += v1[k] * v2[k];
        }

        double v = d;
        v /= sampleSize;
        return v;
    }

    public void setMatrix(TetradMatrix matrix) {
        this.matrix = matrix;
        checkMatrix();
    }

    public final void setSampleSize(int sampleSize) {
        if (sampleSize <= 0) {
            throw new IllegalArgumentException("Sample size must be > 0.");
        }

        this.sampleSize = sampleSize;
    }

    /**
     * Returns the size of the square matrix.
     */
    public final int getSize() {
        return matrix.rows();
    }

    /**
     * Returns a copy of the covariance matrix.
     */
    public final TetradMatrix getMatrix() {
        return matrix;
    }

    public final void select(Node variable) {
        if (variables.contains(variable)) {
            getSelectedVariables().add(variable);
        }
    }

    public final void clearSelection() {
        getSelectedVariables().clear();
    }

    public final boolean isSelected(Node variable) {
        if (variable == null) {
            throw new NullPointerException("Null variable. Try again.");
        }

        return getSelectedVariables().contains(variable);
    }

    public final List<String> getSelectedVariableNames() {
        List<String> selectedVariableNames = new LinkedList<String>();

        for (Node variable : selectedVariables) {
            selectedVariableNames.add(variable.getName());
        }

        return selectedVariableNames;
    }

    /**
     * Prints out the matrix
     */
    public final String toString() {
        NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat();

        StringBuilder buf = new StringBuilder();

        int numVars = getVariableNames().size();
        buf.append(getSampleSize()).append("\n");

        for (int i = 0; i < numVars; i++) {
            String name = getVariableNames().get(i);
            buf.append(name + "\t");
        }

        buf.append("\n");

        for (int j = 0; j < numVars; j++) {
            for (int i = 0; i <= j; i++) {
                buf.append(nf.format(getValue(i, j)) + "\t");
            }
            buf.append("\n");
        }

        //        buf.append("\nCovariance matrix:");
        //        buf.append("\n\tVariables = ").append(getVariables());
        //        buf.append("\n\tSample size = ").append(getSampleSize());
        //        buf.append("\n");
        //        buf.append(MatrixUtils.toString(matrixC.toArray()));
        //
        //        if (getKnowledge() != null && !getKnowledge().isEmpty()) {
        //            buf.append(getKnowledge());
        //        }

        return buf.toString();
    }

    public void setVariables(List<Node> variables) {
        if (variables.size() != this.variables.size())
            throw new IllegalArgumentException("Wrong # of variables.");
        this.variables = variables;
    }

    @Override
    public TetradMatrix getSelection(int[] rows, int[] cols) {
        TetradMatrix m = new TetradMatrix(rows.length, cols.length);

        for (int i = 0; i < rows.length; i++) {
            for (int j = 0; j < cols.length; j++) {
                m.set(i, j, getValue(rows[i], cols[j]));
            }
        }

        return m;
    }

    //========================PRIVATE METHODS============================//

    public Node getVariable(String name) {
        for (int i = 0; i < getVariables().size(); i++) {
            Node variable = getVariables().get(i);
            if (name.equals(variable.getName())) {
                return variable;
            }
        }

        return null;
    }

    @Override
    public void setValue(int i, int j, double v) {
        throw new IllegalArgumentException();
        //        if (i == j) {
        //            matrix.set(i, j, v);
        //        } else {
        //            matrix.set(i, j, v);
        //            matrix.set(j, i, v);
        //        }
    }

    @Override
    public void removeVariables(List<String> remaining) {
        ICovarianceMatrix cov = getSubmatrix(remaining);
        this.matrix = cov.getMatrix();
        this.variables = cov.getVariables();
        clearSelection();
    }

    private Set<Node> getSelectedVariables() {
        return selectedVariables;
    }

    /**
     * Checks the sample size, variable, and matrix information.
     */
    private void checkMatrix() {
        int numVars = variables.size();

        for (int i = 0; i < numVars; i++) {
            if (variables.get(i) == null) {
                throw new NullPointerException();
            }

            //            if (!(variables.get(i) instanceof ContinuousVariable)) {
            //                throw new IllegalArgumentException();
            //            }
        }

        if (sampleSize < 1) {
            throw new IllegalArgumentException("Sample size must be at least 1.");
        }

        if (numVars != matrix.rows() || numVars != matrix.columns()) {
            throw new IllegalArgumentException(
                    "Number of variables does not " + "equal the dimension of the matrix.");
        }

        if (sampleSize > matrix.rows()) {
            //            if (!MatrixUtils.isSymmetricPositiveDefinite(matrixC)) {
            //                throw new IllegalArgumentException("Matrix is not positive definite.");
            //                System.out.println("Matrix is not positive definite.");
            //            }
        } else {
            //            System.out.println(
            //                    "Covariance matrix cannot be positive definite since " +
            //                            "\nthere are more variables than sample points. Spot-checking " +
            //                            "\nsome submatrices.");
            //
            //            for (int from = 0; from < numVars; from += sampleSize / 2) {
            //                int to = from + sampleSize / 2;
            //
            //                if (to > numVars) {
            //                    to = numVars;
            //                }
            //
            //                int[] indices = new int[to - from];
            //
            //                for (int i = 0; i < indices.length; i++) {
            //                    indices[i] = from + i;
            //                }
            //
            //                TetradMatrix m2 = matrixC.viewSelection(indices, indices);
            //
            //                if (!MatrixUtils.isPositiveDefinite(m2)) {
            //                    System.out.println("Positive definite spot-check failed.");
            //                }
            //            }
        }
    }

    /**
     * Adds semantic checks to the default deserialization method. This method
     * must have the standard signature for a readObject method, and the body of
     * the method must begin with "s.defaultReadObject();". Other than that, any
     * semantic checks can be specified and do not need to stay the same from
     * version to version. A readObject method of this form may be added to any
     * class, even if Tetrad sessions were previously saved out using a version
     * of the class that didn't include it. (That's what the
     * "s.defaultReadObject();" is for. See J. Bloch, Effective Java, for help.
     *
     * @throws java.io.IOException
     * @throws ClassNotFoundException
     */
    private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
        s.defaultReadObject();

        if (getVariables() == null) {
            throw new NullPointerException();
        }

        if (matrixC != null) {
            matrix = new TetradMatrix(matrixC.toArray());
            matrixC = null;
        }

        if (knowledge == null) {
            throw new NullPointerException();
        }

        if (sampleSize < -1) {
            throw new IllegalStateException();
        }

        if (selectedVariables == null) {
            selectedVariables = new HashSet<Node>();
        }
    }
}