ubic.BAMSandAllen.TopTenInfo.java Source code

Java tutorial

Introduction

Here is the source code for ubic.BAMSandAllen.TopTenInfo.java

Source

/*******************************************************************************
 * The ABAMS project
 * 
 * Copyright (c) 2012 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *       http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package ubic.BAMSandAllen;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.BAMSandAllen.AllenDataLoaders.AllenAtlasAnnotationLoader;
import ubic.BAMSandAllen.AllenDataLoaders.ImageSeriesInfoLoader;
import ubic.BAMSandAllen.AllenDataLoaders.StructureCatalogLoader;
import ubic.BAMSandAllen.AnalyzeBAMSandAllenGenes.Direction;
import ubic.BAMSandAllen.FocusedAnalysis.GeneCoordinateCorrelation;
import ubic.BAMSandAllen.MatrixPairs.ConnectivityAndAllenExpressionMatrixPair;
import ubic.BAMSandAllen.MatrixPairs.ExpressionMatrixPairFactory;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.dataStructure.params.ParamKeeper;
import ubic.basecode.math.CorrelationStats;
import ubic.basecode.math.DescriptiveWithMissing;
import ubic.basecode.math.Rank;
import ubic.basecode.math.Wilcoxon;
import cern.colt.list.DoubleArrayList;

public class TopTenInfo {
    private static Log log = LogFactory.getLog(TopTenInfo.class.getName());

    RankedGeneListLoader loader;
    ConnectivityAndAllenExpressionMatrixPair pair;
    DoubleMatrix<String, String> expression;
    DoubleMatrix<String, String> connectivity;
    String baseFileName;
    Set<String> motorRegions;
    GeneCoordinateCorrelation forCoords;

    public TopTenInfo(RankedGeneListLoader loader, ConnectivityAndAllenExpressionMatrixPair pair) throws Exception {
        super();
        this.baseFileName = loader.filename;
        this.loader = loader;
        this.pair = pair;
        // assume its a connectivity and expression pair, can be generalized later
        expression = pair.getMatrixB();
        connectivity = pair.getMatrixA();
        motorRegions = getMotorRegions();
        forCoords = new GeneCoordinateCorrelation(pair);

    }

    public void writeExpressionInfo() throws Exception {

        //        AllenBrainAtlasService abaService = new AllenBrainAtlasService();
        ImageSeriesInfoLoader imageInfo = new ImageSeriesInfoLoader();

        getMotorRegions();

        List<String> rows;
        if (loader != null) {
            rows = loader.getRows();
        } else {
            // may be too many
            rows = expression.getRowNames();
        }

        DoubleArrayList averageExp = new DoubleArrayList();
        DoubleArrayList sdExp = new DoubleArrayList();
        for (String geneRow : expression.getRowNames()) {
            double[] expValues = expression.getRowByName(geneRow);

            DoubleArrayList expValuesDAL = new DoubleArrayList(expValues);
            double mean = DescriptiveWithMissing.mean(expValuesDAL);
            averageExp.add(mean);

            double sampleStandardDeviation = Math.sqrt(DescriptiveWithMissing.sampleVariance(expValuesDAL, mean));
            sdExp.add(sampleStandardDeviation);
        }
        DoubleArrayList averageRanks = Rank.rankTransform(averageExp);
        DoubleArrayList sdRanks = Rank.rankTransform(sdExp);

        log.info("degree correlation to X:" + forCoords.getDegreeCorrelation("x"));
        log.info("degree correlation to Y:" + forCoords.getDegreeCorrelation("y"));
        log.info("degree correlation to Z:" + forCoords.getDegreeCorrelation("z"));

        // //do motor regions have less connections
        DoubleArrayList motor1 = new DoubleArrayList();
        DoubleArrayList notMotor1 = new DoubleArrayList();

        ABAMSDataMatrix matrixA = pair.getMatrixA();
        DoubleMatrix<String, String> degNamed = Util.columnSums(matrixA);

        for (String region : degNamed.getColNames()) {
            double expvalue = degNamed.getByKeys("Sums", region);
            if (Double.isNaN(expvalue))
                continue;
            if (motorRegions.contains(region))
                motor1.add(expvalue);
            else
                notMotor1.add(expvalue);
        }
        log.info("Wilcox Motor P " + Wilcoxon.exactWilcoxonP(motor1.elements(), notMotor1.elements()));
        log.info("Wilcox Motor P Rev " + Wilcoxon.exactWilcoxonP(notMotor1.elements(), motor1.elements()));
        // //delete

        int rowsInMatrix = expression.rows();

        ParamKeeper stats = new ParamKeeper();
        int columns = pair.getMatrixA().columns();
        int count = 0;

        for (String geneRow : rows) {
            count++;
            if (count % 100 == 0)
                log.info("Count:" + count);

            Map<String, String> geneStats = new HashMap<String, String>();
            geneStats.put("Name", geneRow);
            String geneName = ImageSeriesInfoLoader.getGeneNameFromRowName(geneRow);
            String imageSeriesID = ImageSeriesInfoLoader.getImageIDFromRowLabel(geneRow);
            // String imageSeriesID = geneRow.substring( geneRow.indexOf( "[" ) + 1, geneRow.indexOf( "]" ) );

            geneStats.put("Gene", "'" + geneName);

            geneStats.put("imageSeriesID", "'" + imageSeriesID);

            geneStats.put("NCBI ID", "" + imageInfo.getNCBIIDFromRowName(geneRow));

            Set<String> allImageSets = imageInfo.getRowsFromGene(geneName);

            geneStats.put("ImageSeriesCount", "" + allImageSets.size());

            // do any of it's image series sets have a coronal image set?
            geneStats.put("HasCoronalImage", "" + imageInfo.hasCoronalImageFromRowName(geneName));

            // find out how many other imageseries are in the top list
            allImageSets.retainAll(rows);
            geneStats.put("ImageSeriesInList", "" + allImageSets.size());

            // AbaGene gene = abaService.getGene( geneName );

            // geneStats.put( "plane", imageInfo.getPlaneFromRowName( geneRow ) );

            geneStats.put("Gene Name", imageInfo.getNameFromImageID(imageSeriesID));

            // DoubleArrayList rx = Rank.rankTransform( x );

            int index = rows.indexOf(geneRow);
            geneStats.put("Index", index + "");

            int fullMatrixIndex = expression.getRowNames().indexOf(geneRow);
            geneStats.put("meanRank", "" + (rowsInMatrix - averageRanks.get(fullMatrixIndex)));
            geneStats.put("sdRank", "" + (rowsInMatrix - sdRanks.get(fullMatrixIndex)));

            double[] expValues = expression.getRowByName(geneRow);
            int nans = Util.countNaNs(expValues);
            geneStats.put("NaNs", "" + nans);
            int columnsMinusNaN = columns - nans;

            double pearsonDegreeCor = pair.getDegreeCorrelation(geneRow);
            geneStats.put("Degree correlation", pearsonDegreeCor + "");
            geneStats.put("Degree correlation pval",
                    "" + CorrelationStats.pvalue(pearsonDegreeCor, columnsMinusNaN) * rows.size());

            double spearmanDegreeCor = pair.getRankDegreeCorrelation(geneRow);
            geneStats.put("Rank degree correlation", spearmanDegreeCor + "");
            geneStats.put("Rank degree correlation pval",
                    "" + CorrelationStats.spearmanPvalue(spearmanDegreeCor, columnsMinusNaN) * rows.size());

            boolean removeNaNs = true;
            geneStats.put("expSum", "" + Util.sum(expValues, removeNaNs));

            geneStats.put("plane", imageInfo.getPlaneFromRowName(geneRow));

            double coordCorrelation = forCoords.getXRankCorrelation(geneRow);
            geneStats.put("x correlation", "" + coordCorrelation);
            geneStats.put("x correlation pvalue",
                    "" + CorrelationStats.spearmanPvalue(coordCorrelation, columnsMinusNaN) * rows.size());

            coordCorrelation = forCoords.getYRankCorrelation(geneRow);
            geneStats.put("y correlation", "" + coordCorrelation);
            geneStats.put("y correlation pvalue",
                    "" + CorrelationStats.spearmanPvalue(coordCorrelation, columnsMinusNaN) * rows.size());
            coordCorrelation = forCoords.getZRankCorrelation(geneRow);
            geneStats.put("z correlation", "" + coordCorrelation);
            geneStats.put("z correlation pvalue",
                    "" + CorrelationStats.spearmanPvalue(coordCorrelation, columnsMinusNaN) * rows.size());

            DoubleArrayList expValuesDAL = new DoubleArrayList(expValues);
            double mean = DescriptiveWithMissing.mean(expValuesDAL);
            geneStats.put("mean", "" + mean);

            double sampleStandardDeviation = Math.sqrt(DescriptiveWithMissing.sampleVariance(expValuesDAL, mean));
            geneStats.put("sampleStandardDeviation", "" + sampleStandardDeviation);

            // // do test for genes enriched in motor regions
            DoubleArrayList motor = new DoubleArrayList();
            DoubleArrayList notMotor = new DoubleArrayList();
            for (String region : expression.getColNames()) {
                double expvalue = expression.getByKeys(geneRow, region);
                if (Double.isNaN(expvalue))
                    continue;
                if (motorRegions.contains(region))
                    motor.add(expvalue);
                else
                    notMotor.add(expvalue);
            }
            geneStats.put("Wilcox Motor P",
                    "" + Wilcoxon.exactWilcoxonP(motor.elements(), notMotor.elements()) * rows.size());
            geneStats.put("Wilcox Motor P Rev",
                    "" + Wilcoxon.exactWilcoxonP(notMotor.elements(), motor.elements()) * rows.size());

            // url
            String url = "http://mouse.brain-map.org/brain/" + geneName + "/" + imageSeriesID
                    + ".html?ispopup=true";
            url = "HYPERLINK(\"" + url + "\",\"Series summary\")";
            geneStats.put("URL", url);

            String urlThumbs = "http://mouse.brain-map.org/brain/" + geneName + "/" + imageSeriesID
                    + "/thumbnails.html?ispopup=true";
            urlThumbs = "HYPERLINK(\"" + urlThumbs + "\",\"Thumbnails\")";
            geneStats.put("Thumbs URL", urlThumbs);

            // regions above average?

            stats.addParamInstance(geneStats);

        }
        stats.writeExcel(baseFileName + ".geneInfo.xls");
    }

    private Set<String> getMotorRegions() throws Exception {
        StructureCatalogLoader allenCatalog = new StructureCatalogLoader();
        Set<String> motorRegionsAllen = allenCatalog.getMotorNuclei();
        Set<String> motorRegions = new HashSet<String>();

        // convert to BAMS names
        for (String allenRegion : motorRegionsAllen) {
            motorRegions.addAll(pair.convertBNametoA(allenRegion));
        }
        return motorRegions;
    }

    public void writeColumnInfo() throws Exception {
        ParamKeeper stats = new ParamKeeper();

        for (String colname : connectivity.getColNames()) {
            Map<String, String> colStats = getColInfo(colname);
            stats.addParamInstance(colStats);
        }

        stats.writeExcel(baseFileName + ".colInfo.xls");
    }

    private Map<String, String> getColInfo(String colname) throws Exception {
        Map<String, String> colStats = new HashMap<String, String>();

        colStats.put("Name", colname);
        double[] conValues = connectivity.getColumnByName(colname);
        String connections = "";
        for (String conRow : connectivity.getRowNames()) {
            if (connectivity.getByKeys(conRow, colname) == 1d) {
                connections += "|" + conRow;
            }
        }
        colStats.put("connections", connections);

        double[] expValues = expression.getColumnByName(colname);
        colStats.put("connectionCount", "" + Util.sum(conValues));

        // assume one allen name
        StructureCatalogLoader allenCatalog = new StructureCatalogLoader();

        colStats.put("MotorRegion", "" + motorRegions.contains(colname));
        colStats.put("x", "" + forCoords.getX(colname));
        colStats.put("y", "" + forCoords.getY(colname));
        colStats.put("z", "" + forCoords.getZ(colname));

        if (pair.isVirtualRegion(colname)) {
            colStats.put("allen names", colname);
            colStats.put("virtual", "true");
        } else {
            colStats.put("virtual", "false");
            Set<String> allennames = allenCatalog.getAllenMappedRegions(colname);
            String allenName;
            colStats.put("allen names", allennames.toString());
            if (!allennames.isEmpty()) {
                allenName = allennames.iterator().next();
                AllenAtlasAnnotationLoader spaceLoader = new AllenAtlasAnnotationLoader();
                DoubleMatrix<String, String> volumeMatrix = spaceLoader.getVolumeMatrix();
                colStats.put("volume", "" + volumeMatrix.getByKeys("volume", allenName));
            }
        }

        boolean removeNaNs = true;
        colStats.put("expSum", "" + Util.sum(expValues, removeNaNs));
        colStats.put("expNaNs", "" + Util.countNaNs(expValues));
        return colStats;
    }

    /**
     * @param args
     */
    public static void main(String[] args) throws Exception {
        boolean removeNonExp = true;
        boolean useVirtual = false;
        Direction direction = AnalyzeBAMSandAllenGenes.Direction.APPENDED;
        ConnectivityAndAllenExpressionMatrixPair pair = ExpressionMatrixPairFactory
                .connectivityAndExpression(direction, useVirtual, removeNonExp);

        //        log.info( pair.getCorrelation() );
        //        pair.writeRMatrices();
        //        System.exit( 1 );
        //
        // pair.shuffleDataRows( 1 );

        RankedGeneListLoader loader = new RankedGeneListLoader(pair.getMatrixBDataRows(), "AllGenesAppendedInfo");

        loader = new RankedGeneListLoader(
                "/grp/java/workspace/BAMSandAllen/data/ABACellEnriched/patterns/top100-pattern-b.txt", false);

        TopTenInfo test = new TopTenInfo(loader, pair);
        // test.writeColumnInfo();
        test.writeExpressionInfo();
        System.exit(1);
        //
        // // String endFile = "LOOGenesInOrder.outgoing.partialcon.txt";
        // String endFile = "LOOGenesInOrder.outgoing.partialcon.txt.topGenes.txt";
        // String baseFolder = SetupParameters.config.getString( "abams.dataFolder" ) + "rankedGenes/near final/";
        // // String outputFile = SetupParameters.config.getString( "abams.dataFolder" ) + "/topten/" + endFile;
        // String outputFile = baseFolder + endFile;

        // RankedGeneListLoader loader = new RankedGeneListLoader( baseFolder + endFile, false );

        // TopTenInfo test = new TopTenInfo( loader, pair );
        // test.writeColumnInfo();
        // // test.writeExpressionInfo();
    }
}