ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2006 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.core.datastructure.matrix;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.io.ByteArrayConverter;
import ubic.gemma.model.common.quantitationtype.PrimitiveType;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector;
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;

import java.text.NumberFormat;
import java.util.*;

/**
 * A data structure that holds a reference to the data for a given expression experiment. The data can be queried by row
 * or column, returning data for a specific DesignElement or data for a specific BioAssay. This class is not database
 * aware so the vectors provided must already be 'thawed'.
 *
 * @author pavlidis
 * @author keshav
 */
public class ExpressionDataDoubleMatrix extends BaseExpressionDataMatrix<Double> {

    private static final int MAX_ROWS_TO_STRING = 200;
    private static final long serialVersionUID = 1L;
    private static final Log log = LogFactory.getLog(ExpressionDataDoubleMatrix.class.getName());
    private DoubleMatrix<CompositeSequence, BioMaterial> matrix;

    private Map<CompositeSequence, Double> ranks = new HashMap<>();

    /**
     * To comply with bean specifications. Not to be instantiated.
     */
    public ExpressionDataDoubleMatrix() {
    }

    public ExpressionDataDoubleMatrix(Collection<? extends DesignElementDataVector> vectors) {
        this.init();

        for (DesignElementDataVector dedv : vectors) {
            if (!dedv.getQuantitationType().getRepresentation().equals(PrimitiveType.DOUBLE)) {
                throw new IllegalStateException("Cannot convert non-double quantitation types into double matrix:"
                        + dedv.getQuantitationType());
            }
        }

        this.selectVectors(vectors);
        this.vectorsToMatrix(vectors);
    }

    public ExpressionDataDoubleMatrix(Collection<? extends DesignElementDataVector> dataVectors,
            Collection<QuantitationType> quantitationTypes) {
        this.init();
        for (QuantitationType qt : quantitationTypes) {
            if (!qt.getRepresentation().equals(PrimitiveType.DOUBLE)) {
                throw new IllegalStateException(
                        "Cannot convert non-double quantitation types into double matrix: " + qt);
            }
        }
        Collection<DesignElementDataVector> selectedVectors = this.selectVectors(dataVectors, quantitationTypes);
        this.vectorsToMatrix(selectedVectors);
    }

    public ExpressionDataDoubleMatrix(Collection<? extends DesignElementDataVector> dataVectors,
            QuantitationType quantitationType) {
        this.init();
        if (!quantitationType.getRepresentation().equals(PrimitiveType.DOUBLE)) {
            throw new IllegalStateException(
                    "Cannot convert non-double quantitation types into double matrix: " + quantitationType);
        }
        Collection<DesignElementDataVector> selectedVectors = this.selectVectors(dataVectors, quantitationType);
        this.vectorsToMatrix(selectedVectors);
    }

    /**
     * Create a data matrix like sourceMatrix but use the values from dataMatrix.
     *
     * @param sourceMatrix source matrix
     * @param dataMatrix the rows can be different than the original matrix, but the columns must be the same.
     */
    public ExpressionDataDoubleMatrix(ExpressionDataDoubleMatrix sourceMatrix,
            DoubleMatrix<CompositeSequence, BioMaterial> dataMatrix) {
        this.init();
        this.expressionExperiment = sourceMatrix.expressionExperiment;
        this.bioAssayDimensions = sourceMatrix.bioAssayDimensions;
        this.columnAssayMap = sourceMatrix.columnAssayMap;
        this.columnBioAssayMapByInteger = sourceMatrix.columnBioAssayMapByInteger;
        this.columnBioMaterialMap = sourceMatrix.columnBioMaterialMap;
        this.columnBioMaterialMapByInteger = sourceMatrix.columnBioMaterialMapByInteger;
        this.quantitationTypes = sourceMatrix.quantitationTypes;
        this.matrix = dataMatrix;

        for (int i = 0; i < dataMatrix.rows(); i++) {
            this.addToRowMaps(i, dataMatrix.getRowName(i));
        }

    }

    /**
     * Create a matrix based on another one's selected rows.
     *
     * @param rowsToUse rows
     * @param sourceMatrix matrix
     */
    public ExpressionDataDoubleMatrix(ExpressionDataDoubleMatrix sourceMatrix, List<CompositeSequence> rowsToUse) {
        this.init();
        this.expressionExperiment = sourceMatrix.expressionExperiment;
        this.bioAssayDimensions = sourceMatrix.bioAssayDimensions;
        this.columnAssayMap = sourceMatrix.columnAssayMap;
        this.columnBioAssayMapByInteger = sourceMatrix.columnBioAssayMapByInteger;
        this.columnBioMaterialMap = sourceMatrix.columnBioMaterialMap;
        this.columnBioMaterialMapByInteger = sourceMatrix.columnBioMaterialMapByInteger;
        this.quantitationTypes = sourceMatrix.getQuantitationTypes();
        this.matrix = new DenseDoubleMatrix<>(rowsToUse.size(), sourceMatrix.columns());
        this.matrix.setColumnNames(sourceMatrix.getMatrix().getColNames());

        ExpressionDataDoubleMatrix.log
                .debug("Creating a filtered matrix " + rowsToUse.size() + " x " + sourceMatrix.columns());

        int i = 0;
        for (CompositeSequence element : rowsToUse) {
            super.addToRowMaps(i, element);
            Double[] rowVals = sourceMatrix.getRow(element);
            assert rowVals != null : "Source matrix does not have row for " + element;

            this.matrix.addRowName(element);

            for (int j = 0; j < rowVals.length; j++) {
                Double val = rowVals[j];
                this.set(i, j, val);
            }
            i++;
        }
    }

    /**
     * Create a matrix given a 'raw' matrix that uses the same samples as the experiment. Only simple situations are
     * supported (one platform, not subsetting the dataset).
     *
     * @param ee to be associated with this
     * @param qt to be associated with this
     * @param matrix with valid row and column elements, and the data
     */
    public ExpressionDataDoubleMatrix(ExpressionExperiment ee, QuantitationType qt,
            DoubleMatrix<CompositeSequence, BioMaterial> matrix) {

        if (ee == null) {
            throw new IllegalArgumentException("Experiment cannot be null");
        }

        if (matrix.rows() == 0 || matrix.columns() == 0 || matrix.getRowNames().isEmpty()
                || matrix.getColNames().isEmpty()) {
            throw new IllegalArgumentException("Matrix is invalid");
        }

        this.init();
        this.expressionExperiment = ee;
        this.matrix = matrix;
        this.quantitationTypes.add(qt);

        BioAssayDimension dim = BioAssayDimension.Factory.newInstance();

        List<BioAssay> bioassays = new ArrayList<>();
        for (BioMaterial bm : matrix.getColNames()) {
            Collection<BioAssay> bioAssaysUsedIn = bm.getBioAssaysUsedIn();
            if (bioAssaysUsedIn.size() > 1) {
                throw new UnsupportedOperationException(
                        "Can't make new data from matrix that has multiple bioassays per biomaterial");
            }

            BioAssay bioAssay = bioAssaysUsedIn.iterator().next();

            if (!ee.getBioAssays().contains(bioAssay)) {
                throw new IllegalArgumentException("Bioassays in the matrix must match those in the experiment");
            }

            bioassays.add(bioAssay);

        }

        if (bioassays.size() != ee.getBioAssays().size()) {
            throw new IllegalArgumentException("All bioassays in the experiment must be used in the matrix");
        }

        dim.setBioAssays(bioassays);
        dim.setDescription("Built from matrix supplied to Constructor for " + ee + " from matrix");
        dim.setName(StringUtils.abbreviate("For " + ee.getShortName() + " from matrix", 255));

        assert !matrix.getRowNames().isEmpty();
        int i = 0;
        for (CompositeSequence cs : matrix.getRowNames()) {
            bioAssayDimensions.put(cs, dim);
            this.addToRowMaps(i, cs);
            i++;
        }

        assert !bioAssayDimensions.isEmpty();

        this.setUpColumnElements();

    }

    /**
     * Create a matrix based on another one's selected columns. The results will be somewhat butchered - only a single
     * BioAssayDimension and the ranks will be copied over (not recomputed based on the selected columns).
     *
     * @param columnsToUse columns
     * @param sourceMatrix matrix
     * @param reorderedDim the reordered bioAssayDimension.
     */
    public ExpressionDataDoubleMatrix(ExpressionDataDoubleMatrix sourceMatrix, List<BioMaterial> columnsToUse,
            BioAssayDimension reorderedDim) {
        this.init();
        this.expressionExperiment = sourceMatrix.expressionExperiment;

        this.matrix = new DenseDoubleMatrix<>(sourceMatrix.rows(), columnsToUse.size());
        this.matrix.setRowNames(sourceMatrix.getMatrix().getRowNames());
        this.matrix.setColumnNames(columnsToUse);

        this.ranks = sourceMatrix.ranks; // not strictly correct if we are using subcolumns

        this.getQuantitationTypes().addAll(sourceMatrix.getQuantitationTypes());

        List<Integer> originalBioMaterialIndices = new ArrayList<>();
        for (BioMaterial bm : columnsToUse) {
            originalBioMaterialIndices.add(sourceMatrix.getColumnIndex(bm));
        }

        this.bioAssayDimensions.clear();

        int i = 0;
        for (ExpressionDataMatrixRowElement element : sourceMatrix.getRowElements()) {
            CompositeSequence designElement = element.getDesignElement();
            super.addToRowMaps(i, designElement);

            Double[] sourceRow = sourceMatrix.getRow(designElement);

            assert sourceRow != null : "Source matrix does not have row for " + designElement;
            bioAssayDimensions.put(designElement, reorderedDim);

            for (int j = 0; j < originalBioMaterialIndices.size(); j++) {
                Double val = sourceRow[originalBioMaterialIndices.get(j)];
                this.set(i, j, val);
            }
            i++;
        }

        super.setUpColumnElements();
    }

    @Override
    public int columns() {
        return matrix.columns();
    }

    @Override
    public Double get(CompositeSequence designElement, BioAssay bioAssay) {
        Integer i = this.rowElementMap.get(designElement);
        Integer j = this.columnAssayMap.get(bioAssay);
        if (i == null || j == null) {
            ExpressionDataDoubleMatrix.log.warn("No matrix element for " + designElement + ", " + bioAssay);
            return null;
        }
        return this.matrix.get(i, j);
    }

    @Override
    public Double get(int row, int column) {
        return matrix.get(row, column);
    }

    @Override
    public Double[][] get(List<CompositeSequence> designElements, List<BioAssay> bioAssays) {
        throw new UnsupportedOperationException("Sorry, not implemented yet");
    }

    @Override
    public Double[] getColumn(BioAssay bioAssay) {
        int index = this.columnAssayMap.get(bioAssay);

        return this.getColumn(index);
    }

    @Override
    public Double[] getColumn(Integer index) {
        double[] rawResult = this.matrix.getColumn(index);
        assert rawResult != null;
        Double[] result = new Double[rawResult.length];
        for (int i = 0; i < rawResult.length; i++) {
            result[i] = rawResult[i];
        }
        return result;
    }

    @Override
    public Double[][] getColumns(List<BioAssay> bioAssays) {
        throw new UnsupportedOperationException("Sorry, not implemented yet");
    }

    @Override
    public Double[][] getRawMatrix() {

        Double[][] dMatrix = new Double[matrix.rows()][matrix.columns()];
        for (int i = 0; i < matrix.rows(); i++) {
            Double[] row = matrix.getRowObj(i);
            dMatrix[i] = row;
        }

        return dMatrix;
    }

    @Override
    public Double[] getRow(CompositeSequence designElement) {
        Integer row = this.rowElementMap.get(designElement);
        if (row == null)
            return null;
        return this.getRow(row);
    }

    @Override
    public Double[] getRow(Integer index) {
        double[] rawRow = matrix.getRow(index);
        return ArrayUtils.toObject(rawRow);
    }

    @Override
    public Double[][] getRows(List<CompositeSequence> designElements) {
        if (designElements == null) {
            return null;
        }

        Double[][] result = new Double[designElements.size()][];
        int i = 0;
        for (CompositeSequence element : designElements) {
            Double[] rowResult = this.getRow(element);
            result[i] = rowResult;
            i++;
        }
        return result;
    }

    @Override
    public boolean hasMissingValues() {
        for (int i = 0; i < matrix.rows(); i++) {
            for (int j = 0; j < matrix.columns(); j++) {
                if (Double.isNaN(matrix.get(i, j)))
                    return true;
            }
        }
        return false;
    }

    @Override
    public int rows() {
        return matrix.rows();
    }

    @Override
    public void set(int row, int column, Double value) {
        if (value == null) {
            matrix.set(row, column, Double.NaN);
        } else {
            matrix.set(row, column, value);
        }
    }

    /**
     * @return Convert this to a collection of vectors.
     */
    public Collection<ProcessedExpressionDataVector> toProcessedDataVectors() {
        Collection<ProcessedExpressionDataVector> result = new HashSet<>();
        QuantitationType qt = this.getQuantitationTypes().iterator().next();

        ByteArrayConverter bac = new ByteArrayConverter();
        if (this.getQuantitationTypes().size() > 1) {
            throw new UnsupportedOperationException(
                    "Cannot convert matrix that has more than one quantitation type");
        }

        for (int i = 0; i < this.rows(); i++) {

            Double[] data = this.getRow(i);

            ProcessedExpressionDataVector v = ProcessedExpressionDataVector.Factory.newInstance();
            v.setBioAssayDimension(this.getBestBioAssayDimension());
            v.setDesignElement(this.getRowNames().get(i));
            v.setQuantitationType(qt);
            v.setData(bac.doubleArrayToBytes(data));
            v.setExpressionExperiment(this.expressionExperiment);
            // we don't fill in the ranks because we only have the mean value here.

            result.add(v);
        }

        return result;
    }

    /**
     * Same as toProcessedDataVectors but uses RawExpressionDataVector
     * 
     * @return Convert this to a collection of vectors.
     */
    public Collection<RawExpressionDataVector> toRawDataVectors() {
        Collection<RawExpressionDataVector> result = new HashSet<>();
        QuantitationType qt = this.getQuantitationTypes().iterator().next();

        ByteArrayConverter bac = new ByteArrayConverter();
        if (this.getQuantitationTypes().size() > 1) {
            throw new UnsupportedOperationException(
                    "Cannot convert matrix that has more than one quantitation type");
        }

        for (int i = 0; i < this.rows(); i++) {

            Double[] data = this.getRow(i);

            RawExpressionDataVector v = RawExpressionDataVector.Factory.newInstance();
            v.setBioAssayDimension(this.getBestBioAssayDimension());
            v.setDesignElement(this.getRowNames().get(i));
            v.setQuantitationType(qt);
            v.setData(bac.doubleArrayToBytes(data));
            v.setExpressionExperiment(this.expressionExperiment);
            // we don't fill in the ranks because we only have the mean value here.

            result.add(v);
        }

        assert result.size() == this.rows();

        return result;
    }

    public DoubleMatrix<CompositeSequence, BioMaterial> getMatrix() {
        return matrix;
    }

    /**
     * @return The expression level ranks (based on mean signal intensity in the vectors); this will be empty if the
     *         vectors used to construct the matrix were not ProcessedExpressionDataVectors.
     */
    public Map<CompositeSequence, Double> getRanks() {
        return this.ranks;
    }

    public double[] getRawRow(Integer index) {
        return matrix.getRow(index);
    }

    public List<CompositeSequence> getRowNames() {
        return this.getMatrix().getRowNames();
    }

    public void set(CompositeSequence designElement, BioAssay bioAssay, Double value) {
        int row = this.getRowIndex(designElement);
        int column = this.getColumnIndex(bioAssay);
        matrix.set(row, column, value);
    }

    /**
     * Sets the row of matrix to the input data.
     *
     * @param rowIndex The row index of the data in the matrix to be replaced.
     * @param data The input data.
     */
    @SuppressWarnings("unused") // Useful interface
    public void setRow(int rowIndex, Double[] data) {
        if (rowIndex > this.matrix.rows()) {
            throw new RuntimeException("Specified row index " + rowIndex + " is larger than the matrix of size "
                    + this.matrix.rows() + ".");
        }

        for (int j = 0; j < data.length; j++) {
            this.matrix.set(rowIndex, j, data[j]);
        }
    }

    @Override
    public String toString() {
        int columns = this.columns();
        int rows = this.rows();

        NumberFormat nf = NumberFormat.getInstance();
        nf.setMaximumFractionDigits(4);

        StringBuilder buf = new StringBuilder();
        if (rows <= ExpressionDataDoubleMatrix.MAX_ROWS_TO_STRING) {
            buf.append(rows).append(" x ").append(columns).append(" matrix of double values\n");
        } else {
            buf.append(rows).append(" x ").append(columns).append(" matrix of double values, showing up to ")
                    .append(ExpressionDataDoubleMatrix.MAX_ROWS_TO_STRING).append(" rows\n");
        }
        int stop = 0;
        buf.append("Probe");
        for (int i = 0; i < columns; i++) {
            buf.append("\t").append(this.getBioMaterialForColumn(i).getName()).append(":");
            for (BioAssay ba : this.getBioAssaysForColumn(i)) {
                buf.append(ba.getName()).append(",");
            }
        }
        buf.append("\n");

        for (int j = 0; j < rows; j++) {

            buf.append(this.rowDesignElementMapByInteger.get(j).getName());
            for (int i = 0; i < columns; i++) {
                Double val = this.get(j, i);
                if (Double.isNaN(val)) {
                    buf.append("\t").append(val);
                } else {
                    buf.append("\t").append(nf.format(this.get(j, i)));
                }
            }

            buf.append("\n");

            if (stop++ > ExpressionDataDoubleMatrix.MAX_ROWS_TO_STRING) {
                buf.append("\n(Stopping after " + ExpressionDataDoubleMatrix.MAX_ROWS_TO_STRING + " rows) ...\n");
                break;
            }
        }
        return buf.toString();
    }

    /**
     * Convert {@link DesignElementDataVector}s into Double matrix.
     */
    @Override
    protected void vectorsToMatrix(Collection<? extends DesignElementDataVector> vectors) {
        if (vectors == null || vectors.size() == 0) {
            throw new IllegalArgumentException("No vectors!");
        }

        for (DesignElementDataVector vector : vectors) {
            if (vector instanceof ProcessedExpressionDataVector) {
                this.ranks.put(vector.getDesignElement(), ((ProcessedExpressionDataVector) vector).getRankByMean());
            }
        }

        int maxSize = this.setUpColumnElements();
        this.matrix = this.createMatrix(vectors, maxSize);

    }

    /**
     * Fill in the data
     *
     * @return DoubleMatrixNamed
     */
    private DoubleMatrix<CompositeSequence, BioMaterial> createMatrix(
            Collection<? extends DesignElementDataVector> vectors, int maxSize) {

        int numRows = this.rowDesignElementMapByInteger.keySet().size();

        DoubleMatrix<CompositeSequence, BioMaterial> mat = new DenseDoubleMatrix<>(numRows, maxSize);

        for (int j = 0; j < mat.columns(); j++) {
            mat.addColumnName(this.getBioMaterialForColumn(j));
        }

        // initialize the matrix to -Infinity; this marks values that are not yet initialized.
        for (int i = 0; i < mat.rows(); i++) {
            for (int j = 0; j < mat.columns(); j++) {
                mat.set(i, j, Double.NEGATIVE_INFINITY);
            }
        }

        ByteArrayConverter bac = new ByteArrayConverter();

        Map<Integer, CompositeSequence> rowNames = new TreeMap<>();
        for (DesignElementDataVector vector : vectors) {
            BioAssayDimension dimension = vector.getBioAssayDimension();
            byte[] bytes = vector.getData();

            CompositeSequence designElement = vector.getDesignElement();
            assert designElement != null : "No design element for " + vector;

            Integer rowIndex = this.rowElementMap.get(designElement);
            assert rowIndex != null;

            rowNames.put(rowIndex, designElement);

            double[] vals = bac.byteArrayToDoubles(bytes);

            Collection<BioAssay> bioAssays = dimension.getBioAssays();
            if (bioAssays.size() != vals.length)
                throw new IllegalStateException("Mismatch: " + vals.length + " values in vector ( " + bytes.length
                        + " bytes) for " + designElement + " got " + bioAssays.size()
                        + " bioassays in the bioAssayDimension");

            Iterator<BioAssay> it = bioAssays.iterator();

            this.setMatBioAssayValues(mat, rowIndex, ArrayUtils.toObject(vals), bioAssays, it);
        }

        /*
         * Note: these row names aren't that important unless we use the bare matrix.
         */
        for (int i = 0; i < mat.rows(); i++) {
            mat.addRowName(rowNames.get(i));
        }
        assert mat.getRowNames().size() == mat.rows();

        // fill in remaining missing values.
        for (int i = 0; i < mat.rows(); i++) {
            for (int j = 0; j < mat.columns(); j++) {
                if (mat.get(i, j) == Double.NEGATIVE_INFINITY) {
                    // log.debug( "Missing value at " + i + " " + j );
                    mat.set(i, j, Double.NaN);
                }
            }
        }
        ExpressionDataDoubleMatrix.log.debug("Created a " + mat.rows() + " x " + mat.columns() + " matrix");
        return mat;
    }

}