ubic.pubmedgate.interactions.focusedAnalysis.FilterConnectionMatrix.java Source code

Java tutorial

Introduction

Here is the source code for ubic.pubmedgate.interactions.focusedAnalysis.FilterConnectionMatrix.java

Source

/*
 * The WhiteText project
 * 
 * Copyright (c) 2012 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.pubmedgate.interactions.focusedAnalysis;

import java.io.File;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.BAMSandAllen.ABAMSDataMatrix;
import ubic.BAMSandAllen.Util;
import ubic.BAMSandAllen.AnalyzeBAMSandAllenGenes.Direction;
import ubic.BAMSandAllen.BAMSDataLoaders.BAMSDataLoader;
import ubic.BAMSandAllen.BAMSDataLoaders.BAMSFromNIFDataLoader;
import ubic.BAMSandAllen.MatrixPairs.MatrixPair;
import ubic.BAMSandAllen.MatrixPairs.SimpleMatrixPair;
import ubic.BAMSandAllen.adjacency.CorrelationAdjacency;
import ubic.BAMSandAllen.adjacency.IdentityAdjacency;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.dataStructure.params.ParamKeeper;
import ubic.basecode.io.reader.DoubleMatrixReader;
import ubic.pubmedgate.Config;
import ubic.pubmedgate.interactions.NormalizePairs;

/**
 * Propigates, fitlers and analyizes literature based connectivity matrices.
 * 
 * @author leon
 */
public class FilterConnectionMatrix {
    String filename;
    BAMSDataLoader bamsLoader;
    DoubleMatrix<String, String> connectionMatrix;
    protected static Log log = LogFactory.getLog(FilterConnectionMatrix.class);
    ABAMSDataMatrix BAMSNIFConnections;
    Direction direction;

    public FilterConnectionMatrix(String filename) throws Exception {
        direction = Direction.ANYDIRECTION;
        DoubleMatrixReader matrixReader = new DoubleMatrixReader();
        // matrixReader.setTopLeft( false );
        this.filename = filename;
        connectionMatrix = matrixReader.read(filename);
        log.info("Loaded matrix: " + connectionMatrix.rows() + " X " + connectionMatrix.columns());
        LinkedList<String> newRowNames = new LinkedList<String>();
        for (String row : connectionMatrix.getRowNames()) {
            newRowNames.addLast(row.substring(1, row.length() - 1));
        }
        connectionMatrix.setRowNames(newRowNames);
        connectionMatrix.setColumnNames(newRowNames);
        bamsLoader = new BAMSDataLoader();

        // load BAMS matrix from NIF (not up-propigated)
        BAMSFromNIFDataLoader connectionLoader = new BAMSFromNIFDataLoader();
        boolean skipFibers = true;
        DoubleMatrix<String, String> BAMSNIFConnectionMatrix = connectionLoader.getBAMSMatrix(direction, false,
                false, skipFibers);
        BAMSNIFConnections = new ABAMSDataMatrix(BAMSNIFConnectionMatrix, "NIFConnectivity",
                new CorrelationAdjacency(BAMSNIFConnectionMatrix));
        BAMSNIFConnections = BAMSNIFConnections.removeZeroColumns();
        BAMSNIFConnections = BAMSNIFConnections.removeZeroRows();
        log.info("NIF Connections:" + BAMSNIFConnections.getDimensionString());

    }

    public Map<String, String> compareToReal(boolean propigated, double threshold) throws Exception {
        Map<String, String> results = new HashMap<String, String>();

        DoubleMatrix<String, String> BAMSconnectionMatrix = NormalizePairs.getConnectionMatrix(propigated,
                direction);

        int totalDepth = 0;
        int BAMStotalDepth = 0;

        int matchedConnections = 0;
        int weightedMatchedConnections = 0;
        int BAMSConnectionCount = 0;
        List<Integer> positiveRanks = new LinkedList<Integer>();

        int connectionCount = 0;
        for (String row : connectionMatrix.getRowNames()) {
            for (String col : connectionMatrix.getRowNames()) {
                // just do the triangle
                if (connectionMatrix.getRowIndexByName(row) < connectionMatrix.getRowIndexByName(col))
                    continue;

                double connectionMatrixValue = connectionMatrix.getByKeys(row, col);
                double BAMSValue = BAMSconnectionMatrix.getByKeys(row, col);

                boolean BAMSConnection = BAMSValue == 1d;
                boolean literatureConnection = connectionMatrixValue >= threshold;

                boolean NIFBAMSConnection = false;
                try {
                    NIFBAMSConnection = BAMSNIFConnections.getByKeys(row, col) > 0;
                } catch (Exception e) {

                }

                if (literatureConnection) {
                    int rowDepth = bamsLoader.getParents(row).size();
                    int colDepth = bamsLoader.getParents(col).size();
                    connectionCount++;
                    totalDepth += rowDepth + colDepth;
                }

                if (BAMSConnection) {
                    BAMSConnectionCount++;
                    // get parents call is very slow!
                    // BAMStotalDepth += rowDepth + colDepth;
                }

                if (!BAMSConnection && literatureConnection && NIFBAMSConnection) {
                    log.info("Connection in new BAMS but not old BAMS");
                    log.info(row + "->" + col);
                }

                if (BAMSConnection && literatureConnection) {
                    positiveRanks.add(connectionCount);

                    if (row.equals(col)) {
                        log.info("Self connect");
                    }

                    if (connectionMatrixValue > 10d) { // print some out
                        log.info(connectionMatrixValue + " connections:" + row + " -> " + col);
                        log.info("Depth:" + bamsLoader.getParents(row).size() + " -> "
                                + bamsLoader.getParents(col).size());
                    }

                    // log.info( row + " " + col );
                    matchedConnections++;
                    weightedMatchedConnections += connectionMatrix.getByKeys(row, col);
                }
            }
        }

        if (!BAMSconnectionMatrix.getRowNames().equals(connectionMatrix.getRowNames()))
            throw new RuntimeException("Error mismatched matrices");
        if (!BAMSconnectionMatrix.getColNames().equals(connectionMatrix.getColNames()))
            throw new RuntimeException("Error mismatched matrices");

        ABAMSDataMatrix BAMSConnection = new ABAMSDataMatrix(BAMSconnectionMatrix, "BAMSconnectionMatrix",
                new IdentityAdjacency(BAMSconnectionMatrix));
        ABAMSDataMatrix literatureConnections = new ABAMSDataMatrix(connectionMatrix, "LiteratureConnectionMatrix",
                new IdentityAdjacency(connectionMatrix));
        MatrixPair pair = new SimpleMatrixPair(BAMSConnection, literatureConnections);

        ABAMSDataMatrix LiteratureConnectionsThreshold = new ABAMSDataMatrix(threshold(threshold),
                "LiteratureConnectionMatrixThresh", new IdentityAdjacency(connectionMatrix));
        MatrixPair pairThresh = new SimpleMatrixPair(BAMSConnection, LiteratureConnectionsThreshold);

        pair.printDimensions();

        boolean fast = true;
        results.put("Correlation of matrix values", "" + pair.getCorrelation(fast));
        results.put("Correlation of thresholded matrix values", "" + pairThresh.getCorrelation(fast));

        literatureConnections = literatureConnections.removeZeroColumns();
        literatureConnections = literatureConnections.removeZeroRows();
        results.put("Nonzero matrix rows", "" + literatureConnections.rows());
        results.put("Nonzero matrix cols", "" + literatureConnections.columns());

        BAMSConnection = BAMSConnection.retainColumns(literatureConnections.getColNames());
        BAMSConnection = BAMSConnection.retainRows(literatureConnections.getRowNames());

        results.put("Full zSum", "" + Util.zSum(connectionMatrix));
        results.put("Full zSum on matching BAMS matrix", "" + Util.zSum(BAMSConnection));

        results.put("Threshold", "" + threshold);
        results.put("Connections", "" + connectionCount);
        results.put("BAMS Connections", "" + BAMSConnectionCount);
        results.put("Matched connections", "" + matchedConnections);
        results.put("Precision", "" + (matchedConnections / (double) connectionCount));
        results.put("Recall", "" + (matchedConnections / (double) BAMSConnectionCount));

        results.put("Weighted Matched connections", "" + weightedMatchedConnections);
        results.put("TotalDepth", "" + connectionCount);
        // results.put( "BAMSTotalDepth", "" + BAMStotalDepth );
        results.put("Average pair depth", "" + (totalDepth / (double) connectionCount));
        results.put("Propigated BAMS", "" + propigated);
        // results.put( "AUC", ROC.aroc( connectionCount, positiveRanks ) + "" ); // AUC doesnt make sence here, it's
        // not sorted
        results.put("Filename", filename);
        results.put("BaseFilename", getBaseFilename());

        return results;
    }

    public String getBaseFilename() {
        return new File(filename).getName();
    }

    public DoubleMatrix<String, String> threshold(double threshold) {
        List<String> regions = connectionMatrix.getRowNames();
        DoubleMatrix<String, String> newConnectionMatrix = new DenseDoubleMatrix<String, String>(regions.size(),
                regions.size());
        newConnectionMatrix.setRowNames(regions);
        newConnectionMatrix.setColumnNames(regions);

        for (String row : regions) {
            for (String col : regions) {
                double connectionMatrixValue = connectionMatrix.getByKeys(row, col);
                if (connectionMatrixValue >= threshold) {
                    newConnectionMatrix.setByKeys(row, col, 1d);
                } else {
                    newConnectionMatrix.setByKeys(row, col, 0d);
                }
            }
        }
        return newConnectionMatrix;
    }

    public void propigate() {
        // how to deal with weights? add them?

        // for each region count how many it has, plus how many its children has, put in new matrix
        List<String> regions = connectionMatrix.getRowNames();

        DoubleMatrix<String, String> newConnectionMatrix = new DenseDoubleMatrix<String, String>(regions.size(),
                regions.size());
        newConnectionMatrix.setRowNames(regions);
        newConnectionMatrix.setColumnNames(regions);

        for (String row : regions) {
            for (String col : regions) {
                if (col.equals(row))
                    continue;
                double connections = 0;

                boolean indirect = true;
                Set<String> rowChildren = bamsLoader.getChildren(row, indirect);
                if (rowChildren == null) {
                    log.info("Row null:" + row);
                    rowChildren = new HashSet<String>();
                }
                Set<String> colChildren = bamsLoader.getChildren(col, indirect);
                if (colChildren == null)
                    colChildren = new HashSet<String>();

                rowChildren.add(row);
                colChildren.add(col);

                // count any connections between the two trees
                for (String rowChild : rowChildren) {
                    for (String colChild : colChildren) {
                        connections += connectionMatrix.getByKeys(rowChild, colChild);
                    }
                }
                newConnectionMatrix.setByKeys(row, col, connections);
                newConnectionMatrix.setByKeys(col, row, connections);
            }
        }
        connectionMatrix = newConnectionMatrix;
        filename = filename + ".propigated";
    }

    public void writeMatrix() throws Exception {
        log.info("Writing to:" + filename);
        Util.writeRTable(filename, connectionMatrix);
    }

    public void writeMatrixImage() throws Exception {
        ABAMSDataMatrix literatureConnections = new ABAMSDataMatrix(connectionMatrix, "LiteratureConnectionMatrix",
                new IdentityAdjacency(connectionMatrix));

        // remove zero rows and cols
        literatureConnections = literatureConnections.removeZeroColumns();
        literatureConnections = literatureConnections.removeZeroRows();

        // order rows and cols
        List<String> sortedRegions = new LinkedList<String>(literatureConnections.getColNames());
        Collections.sort(sortedRegions);
        literatureConnections = literatureConnections.orderRows(sortedRegions);
        literatureConnections = literatureConnections.orderCols(sortedRegions);

        Util.writeImage(filename + ".zeroes.removed.png", literatureConnections);
        log.info("Write image to:" + filename + ".zeroes.removed.png");

    }

    /**
     * @param args
     */

    public static void makepropigates(String folder) throws Exception {
        File folderF = new File(folder);
        for (String f : folderF.list()) {
            if (f.endsWith("WhiteTextUnseenMScan.predictions.matrix.txt")) {
                String filename = folder + "/" + f.toString();
                log.info(folder);
                FilterConnectionMatrix test = new FilterConnectionMatrix(filename);
                test.propigate();
                test.writeMatrix();
            }
        }
    }

    public DoubleMatrix<String, String> getConnectionMatrix() {
        return connectionMatrix;
    }

    public static void quickTestTwo(FilterConnectionMatrix firstMatrix, FilterConnectionMatrix secondMatrix,
            boolean threshold) throws Exception {
        DoubleMatrix<String, String> firstZeroOne;
        DoubleMatrix<String, String> secondZeroOne;
        if (threshold) {
            firstZeroOne = firstMatrix.threshold(1);
            secondZeroOne = secondMatrix.threshold(1);
        } else {
            firstZeroOne = firstMatrix.getConnectionMatrix();
            secondZeroOne = secondMatrix.getConnectionMatrix();
        }

        ABAMSDataMatrix LiteratureConnections1 = new ABAMSDataMatrix(firstZeroOne, firstMatrix.getBaseFilename(),
                new IdentityAdjacency(firstZeroOne));
        ABAMSDataMatrix LiteratureConnections2 = new ABAMSDataMatrix(secondZeroOne, secondMatrix.getBaseFilename(),
                new IdentityAdjacency(secondZeroOne));
        MatrixPair pair = new SimpleMatrixPair(LiteratureConnections1, LiteratureConnections2);

        log.info("Pairs:" + firstMatrix.getBaseFilename() + " and " + secondMatrix.getBaseFilename());
        log.info("  Correlation:" + pair.getCorrelation(true));
        log.info("  threshold:" + threshold);

        ABAMSDataMatrix matrixA = pair.getMatrixA();
        ABAMSDataMatrix matrixB = pair.getMatrixB();
        Set<String> regions = (Set<String>) Util.union(matrixA.getColNames(), matrixB.getColNames());

        if (threshold) {
            int intersection = 0;
            for (String row : matrixA.getRowNames()) {
                for (String col : regions) {
                    if (row.equals(col))
                        continue;
                    double connectionMatrixValueA = matrixA.getByKeys(row, col);
                    double connectionMatrixValueB = matrixB.getByKeys(row, col);
                    if (connectionMatrixValueA > 0 && connectionMatrixValueB > 0) {
                        intersection++;
                    }
                }
            }
            int matrixACons = (int) Util.zSum(matrixA) / 2;
            int matrixBCons = (int) Util.zSum(matrixB) / 2;
            log.info("  " + "Common connections:" + intersection / 2);
            log.info("  " + "Connections " + matrixA.getName() + ":" + matrixACons + " "
                    + (double) 50 * intersection / matrixACons);
            log.info("  " + "Connections " + matrixB.getName() + ":" + matrixBCons + " "
                    + (double) 50 * intersection / matrixBCons);
        }

    }

    public void degreeTest(double threshold) throws Exception {
        boolean propigated = true;
        Direction direction = Direction.ANYDIRECTION;
        DoubleMatrix<String, String> BAMSconnectionMatrix = NormalizePairs.getConnectionMatrix(propigated,
                direction);

        DoubleMatrix<String, String> connectionMatrixForUse = threshold(threshold);

        ABAMSDataMatrix BAMSConnection = new ABAMSDataMatrix(BAMSconnectionMatrix, "BAMSconnectionMatrix",
                new CorrelationAdjacency(BAMSconnectionMatrix));
        BAMSConnection = BAMSConnection.removeZeroColumns();

        ABAMSDataMatrix literatureConnections = new ABAMSDataMatrix(connectionMatrixForUse,
                "LiteratureConnectionMatrix", new CorrelationAdjacency(connectionMatrixForUse));
        literatureConnections = literatureConnections.removeZeroColumns();

        MatrixPair pair = new SimpleMatrixPair(BAMSConnection, literatureConnections);
        pair.sameSpace();
        pair.printDimensions();

        if (!BAMSconnectionMatrix.getColNames().equals(connectionMatrixForUse.getColNames()))
            throw new RuntimeException("Error mismatched matrices");

        boolean spearman = true;
        log.info("Threshold:" + threshold);
        log.info("Degree, spearman:" + pair.getFlattenedCorrelation(spearman));
        log.info("Degree, pearson:" + pair.getFlattenedCorrelation(!spearman));
        log.info("Mantel (shared connections):" + pair.getCorrelation(true));

    }

    public static void testDegree() throws Exception {
        // tests all columns
        String base = "/grp/java/workspace/PubMedIDtoGate/spreadsheets/interaction.results/matrices/";
        String filename = base;
        // filename += "Positives.WhiteTextUnseen.matrix.txt.propigated";
        filename += "Positives.rat.WhiteTextUnseen.matrix.txt.propigated";

        FilterConnectionMatrix matrix = new FilterConnectionMatrix(filename);
        double threshold = 2;
        matrix.degreeTest(threshold);
    }

    private static void quickTest(String first, String second) throws Exception {
        String base = Config.config.getString("whitetext.iteractions.matricesFolder");
        FilterConnectionMatrix a = new FilterConnectionMatrix(base + first);
        FilterConnectionMatrix b = new FilterConnectionMatrix(base + second);
        quickTestTwo(a, b, true);
        quickTestTwo(a, b, false);
    }

    public static void main(String[] args) throws Exception {

        String filename;
        ParamKeeper keeper = new ParamKeeper();
        String folder = Config.config.getString("whitetext.iteractions.matricesFolder");

        makepropigates(folder);
        System.exit(1);

        // TODO add in degree correlation statistics

        // testDegree();
        // System.exit( 1 );

        // quickTest( "Positives.WhiteTextUnseen.matrix.txt", "Negatives.WhiteTextUnseen.matrix.txt" );

        quickTest("Positives.WhiteTextUnseen.matrix.txt", "Positives.WhiteTextUnseenMScan.predictions.matrix.txt");
        quickTest("Negatives.WhiteTextUnseenMScan.predictions.matrix.txt", "Negatives.WhiteTextUnseen.matrix.txt");

        // quickTest( "Positives.WhiteTextUnseen.matrix.txt.propigated",
        // "Negatives.WhiteTextUnseen.matrix.txt.propigated" );
        // quickTest( "Positives.rat.WhiteTextUnseen.matrix.txt", "Negatives.rat.WhiteTextUnseen.matrix.txt" );
        // quickTest( "Positives.rat.Annotated.matrix.txt", "Negatives.rat.Annotated.matrix.txt" );
        // quickTest( "Positives.Annotated.matrix.txt", "Negatives.Annotated.matrix.txt" );
        System.exit(1);

        // String endfix = "matrix.txt";
        // String endfix = "all.ratWhiteTextUnseen.all.matrix.txt";
        // String endfix = "Positives.rat.WhiteTextUnseen.matrix.txt";
        String endfix = "WhiteTextUnseenMScan.predictions.matrix.txt";

        File folderF = new File(folder);
        for (String f : folderF.list()) {
            if (f.endsWith(endfix)) {
                filename = folder + f.toString();
                log.info(filename);
                FilterConnectionMatrix test = new FilterConnectionMatrix(filename);
                for (double threshold = 1; threshold < 7d; threshold++) {
                    boolean propigated = true;
                    Map<String, String> results = test.compareToReal(propigated, threshold);
                    keeper.addParamInstance(results);

                    propigated = false;
                    results = test.compareToReal(propigated, threshold);
                    keeper.addParamInstance(results);

                    if (threshold == 1)
                        test.writeMatrixImage();

                    if (results.get("Connections").equals("0"))
                        break;

                }
            }
        }
        String outFilename = Config.config.getString("whitetext.iteractions.results.folder") + endfix + ".compared."
                + System.currentTimeMillis() + ".xls";
        keeper.writeExcel(outFilename);
        log.info("Wrote to:" + outFilename);
    }

}