org.fhcrc.cpl.viewer.align.commandline.CollapseFractionsCLM.java Source code

Java tutorial

Introduction

Here is the source code for org.fhcrc.cpl.viewer.align.commandline.CollapseFractionsCLM.java

Source

/*
 * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.fhcrc.cpl.viewer.align.commandline;

import org.fhcrc.cpl.viewer.commandline.modules.BaseViewerCommandLineModuleImpl;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.matching.FeatureSetMatcher;
import org.fhcrc.cpl.viewer.amt.*;
import org.fhcrc.cpl.viewer.align.Aligner;
import org.fhcrc.cpl.viewer.align.SplineAligner;
import org.fhcrc.cpl.viewer.align.QuantileRegressionAligner;
import org.fhcrc.cpl.viewer.align.PeptideArrayAnalyzer;
import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException;
import org.fhcrc.cpl.toolbox.commandline.CommandLineModule;
import org.fhcrc.cpl.toolbox.commandline.arguments.*;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.datastructure.Pair;
import org.fhcrc.cpl.toolbox.statistics.RegressionUtilities;
import org.fhcrc.cpl.toolbox.statistics.BasicStatistics;
import org.fhcrc.cpl.toolbox.statistics.MatrixUtil;
import org.fhcrc.cpl.toolbox.gui.chart.PanelWithScatterPlot;
import org.fhcrc.cpl.toolbox.gui.chart.PanelWithHistogram;
import org.apache.log4j.Logger;
import org.jfree.chart.plot.XYPlot;

import java.util.*;
import java.util.List;
import java.io.File;
import java.io.IOException;
import java.awt.*;

/**
 * Command linemodule for plotting the mass calibration of a feature file
 */
public class CollapseFractionsCLM extends BaseViewerCommandLineModuleImpl implements CommandLineModule {
    protected static Logger _log = Logger.getLogger(CollapseFractionsCLM.class);

    protected boolean showCharts = true;

    protected PeptideArrayAnalyzer arrayAnalyzer;

    protected boolean normalizeIntensities = true;

    protected File outFile;

    protected int minFeaturesInFracAndUnfrac = 15;
    protected int minFeaturesForRegress = 15;

    public CollapseFractionsCLM() {
        init();
    }

    protected void init() {
        mCommandName = "collapsefractions";
        mShortDescription = "Collapse multiple fractions into a single featureset";
        mHelpMessage = mShortDescription;
        CommandLineArgumentDefinition[] argDefs = {
                createUnnamedFileArgumentDefinition(true,
                        "peptide array in which first run is unfractionated, all the rest fractionated"),
                new BooleanArgumentDefinition("showcharts", false, "show charts?", showCharts),
                new BooleanArgumentDefinition("normalize", false, "Normalize fraction intensities?",
                        normalizeIntensities),
                new FileToWriteArgumentDefinition("out", true, "output file"), };
        addArgumentDefinitions(argDefs);
    }

    public void assignArgumentValues() throws ArgumentValidationException {
        showCharts = getBooleanArgumentValue("showcharts");
        normalizeIntensities = getBooleanArgumentValue("normalize");
        try {
            arrayAnalyzer = new PeptideArrayAnalyzer(getUnnamedFileArgumentValue());
        } catch (IOException e) {
            throw new ArgumentValidationException("Failed to load peptide array", e);
        }
        outFile = getFileArgumentValue("out");
    }

    /**
     * do the actual work
     */
    public void execute() throws CommandLineModuleExecutionException {
        List<Double[]> intensitiesAllRuns = new ArrayList<Double[]>();

        for (int i = 0; i < arrayAnalyzer.getRunCount(); i++)
            intensitiesAllRuns.add(arrayAnalyzer.getRunIntensities(arrayAnalyzer.getRunNames().get(i)));
        Double[] firstRunIntensities = intensitiesAllRuns.get(0);

        if (normalizeIntensities)
            normalizeIntensities(intensitiesAllRuns);
        int nrow = arrayAnalyzer.getRowMaps().length;

        Double[] featureFracIntensitySums = new Double[nrow];
        //Do not add a sum value if there's a missing value between two non-missing values
        int numFoundMissingBracketedByPresent = 0;
        List<Integer> fractionCounts = new ArrayList<Integer>();
        for (int i = 0; i < nrow; i++) {
            double rowFracSum = 0;
            boolean foundNonMissingFrac = false;
            boolean foundMissingAfterNonMissing = false;
            boolean foundNonMissingAfterMissingAfterNonMissing = false;
            int fractionCount = 0;
            for (int j = 1; j < arrayAnalyzer.getRunCount(); j++) {
                if (intensitiesAllRuns.get(j)[i] != null) {
                    fractionCount++;
                    foundNonMissingFrac = true;
                    if (foundMissingAfterNonMissing)
                        foundNonMissingAfterMissingAfterNonMissing = true;
                    rowFracSum += intensitiesAllRuns.get(j)[i];

                    //                    else ApplicationContext.infoMessage("Present, missing, present: " + intensitiesAllRuns.get(1)[i] + ", " + intensitiesAllRuns.get(2)[i] + ", " + intensitiesAllRuns.get(3)[i] + ", " + intensitiesAllRuns.get(4)[i]);
                } else if (foundNonMissingFrac)
                    foundMissingAfterNonMissing = true;
            }
            if (foundNonMissingAfterMissingAfterNonMissing)
                numFoundMissingBracketedByPresent++;
            if (rowFracSum > 0 && !foundNonMissingAfterMissingAfterNonMissing) {
                featureFracIntensitySums[i] = rowFracSum;
                fractionCounts.add(fractionCount);
            } else
                fractionCounts.add(0);
        }
        ApplicationContext.infoMessage(
                "Number discarded because found missing in between present: " + numFoundMissingBracketedByPresent);

        Map<Integer, List<Double>> unfracFractionInCommonLogIntensitiesMap = new HashMap<Integer, List<Double>>();
        Map<Integer, List<Double>> fracFractionInCommonLogIntensitiesMap = new HashMap<Integer, List<Double>>();

        for (int i = 0; i < nrow; i++) {
            if (featureFracIntensitySums[i] != null && firstRunIntensities[i] != null) {
                int numFractionsUsed = fractionCounts.get(i);
                List<Double> unfracLogIntensitiesThisFractionCount = unfracFractionInCommonLogIntensitiesMap
                        .get(numFractionsUsed);
                List<Double> fracLogIntensitiesThisFractionCount = fracFractionInCommonLogIntensitiesMap
                        .get(numFractionsUsed);
                if (unfracLogIntensitiesThisFractionCount == null) {
                    unfracLogIntensitiesThisFractionCount = new ArrayList<Double>();
                    unfracFractionInCommonLogIntensitiesMap.put(numFractionsUsed,
                            unfracLogIntensitiesThisFractionCount);
                    fracLogIntensitiesThisFractionCount = new ArrayList<Double>();
                    fracFractionInCommonLogIntensitiesMap.put(numFractionsUsed,
                            fracLogIntensitiesThisFractionCount);
                }
                unfracLogIntensitiesThisFractionCount.add(Math.log(firstRunIntensities[i]));
                fracLogIntensitiesThisFractionCount.add(Math.log(featureFracIntensitySums[i]));
            }
        }

        //todo: technically this should only need to be done for runCount > 1

        for (int i = 1; i <= arrayAnalyzer.getRunCount(); i++) {
            if (unfracFractionInCommonLogIntensitiesMap.containsKey(i)) {
                System.err.println("Found " + unfracFractionInCommonLogIntensitiesMap.get(i).size() + " points in "
                        + i + " fractions");
                if (unfracFractionInCommonLogIntensitiesMap.get(i).size() < minFeaturesForRegress) {
                    ApplicationContext.infoMessage("\tToo few for regression, so removing instead of adjusting");
                    int numRemoved = 0;

                    for (int j = 0; j < featureFracIntensitySums.length; j++) {
                        if (fractionCounts.get(j) == i && featureFracIntensitySums[j] != null) {
                            featureFracIntensitySums[j] = null;
                            numRemoved++;
                        }

                    }
                    ApplicationContext
                            .infoMessage("\tRemoved " + numRemoved + " intensities found in " + i + " fractions");
                    continue;
                }

                //normalize intensities per fraction
                double[] coeffs = calcGoodRegressionCoeffs(fracFractionInCommonLogIntensitiesMap.get(i),
                        unfracFractionInCommonLogIntensitiesMap.get(i));

                if (normalizeIntensities) {
                    for (int j = 0; j < featureFracIntensitySums.length; j++) {

                        if (fractionCounts.get(j) == i && featureFracIntensitySums[j] != null) {
                            featureFracIntensitySums[j] = RegressionUtilities.mapValueUsingCoefficients(coeffs,
                                    featureFracIntensitySums[j]);
                        }

                    }
                }

                if (showCharts) {
                    PanelWithScatterPlot pwsp = new PanelWithScatterPlot(
                            fracFractionInCommonLogIntensitiesMap.get(i),
                            unfracFractionInCommonLogIntensitiesMap.get(i), "in" + i + "fracs");
                    pwsp.setAxisLabels("Fractionated", "Unfractionated");
                    pwsp.addLineOrCurve(coeffs);
                    pwsp.addLineOrCurve(new double[] { 0, 1 });
                    pwsp.displayInTab();
                }

                if (normalizeIntensities) {
                    for (int j = 0; j < fracFractionInCommonLogIntensitiesMap.get(i).size(); j++)
                        fracFractionInCommonLogIntensitiesMap.get(i).set(j,
                                RegressionUtilities.mapValueUsingCoefficients(coeffs,
                                        fracFractionInCommonLogIntensitiesMap.get(i).get(j)));
                } else
                    ApplicationContext.infoMessage(
                            "Note: Skipping intensity normalization for features in " + i + " fractions");

                if (showCharts) {
                    PanelWithScatterPlot pwsp = new PanelWithScatterPlot(
                            fracFractionInCommonLogIntensitiesMap.get(i),
                            unfracFractionInCommonLogIntensitiesMap.get(i), "FracUnfrac" + i);
                    pwsp.addLineOrCurve(new double[] { 0, 1 });
                    pwsp.displayInTab();
                    ApplicationContext.infoMessage(
                            "Log-intensity correlation between fractionated and unfrac for peptides in " + i
                                    + " fractions: "
                                    + BasicStatistics.correlationCoefficient(
                                            unfracFractionInCommonLogIntensitiesMap.get(i),
                                            fracFractionInCommonLogIntensitiesMap.get(i)));
                }
            }
        }

        //Now we write out a feature file with features with appropriate intensities
        Map<Integer, Map<String, List<Feature>>> detailsMap = null;
        try {
            ApplicationContext.infoMessage("Loading details file...");
            detailsMap = arrayAnalyzer.loadDetailsRowMapsById();
            ApplicationContext
                    .infoMessage("Loaded " + detailsMap.size() + " array rows' details from details file");

        } catch (IOException e) {
            throw new CommandLineModuleExecutionException("Failed to process details file", e);
        }

        List<Feature> outFeatures = new ArrayList<Feature>();
        for (int i = 0; i < nrow; i++) {
            int id = i + 1;
            if (featureFracIntensitySums[i] == null)
                continue;
            double newIntensity = featureFracIntensitySums[i];
            String firstAvailableRun = detailsMap.get(id).keySet().iterator().next();
            Feature firstDetailFeature = detailsMap.get(id).get(firstAvailableRun).get(0);
            double intensityScaleFactor = newIntensity / firstDetailFeature.getIntensity();
            firstDetailFeature.setIntensity((float) newIntensity);
            firstDetailFeature
                    .setTotalIntensity((float) intensityScaleFactor * firstDetailFeature.getTotalIntensity());

            outFeatures.add(firstDetailFeature);
        }
        ApplicationContext.infoMessage("Created " + outFeatures.size() + " features to save");

        FeatureSet outFeatureSet = new FeatureSet();
        outFeatureSet.setFeatures(outFeatures.toArray(new Feature[outFeatures.size()]));
        try {
            outFeatureSet.save(outFile);
        } catch (IOException e) {
            throw new CommandLineModuleExecutionException("Failed to save output file", e);
        }

        if (showCharts) {
            new PanelWithHistogram(fractionCounts, "FractionCount").displayInTab();

            List<Double> unfracInCommonLogIntensities = new ArrayList<Double>();
            List<Double> fracInCommonLogIntensities = new ArrayList<Double>();

            for (int i = 0; i < nrow; i++) {
                if (featureFracIntensitySums[i] != null && firstRunIntensities[i] != null) {
                    unfracInCommonLogIntensities.add(Math.log(firstRunIntensities[i]));
                    fracInCommonLogIntensities.add(Math.log(featureFracIntensitySums[i]));
                }
            }

            PanelWithScatterPlot fracUnfracScatter = new PanelWithScatterPlot(unfracInCommonLogIntensities,
                    fracInCommonLogIntensities, "Frac vs unfrac");
            fracUnfracScatter.setAxisLabels("Unfractionated", "Fractionated");
            fracUnfracScatter.addLineOrCurve(new double[] { 0, 1 });
            fracUnfracScatter.displayInTab();

            for (int i = 2; i <= arrayAnalyzer.getRunCount(); i++)
                if (unfracFractionInCommonLogIntensitiesMap.containsKey(i)) {

                }
            //            fracUnfracScatterByFracCount.setAxisLabels("Unfractionated","Fractionated");
            //            fracUnfracScatterByFracCount.setPointSize(2);
            //            fracUnfracScatterByFracCount.setShowLegend(true);
            //            fracUnfracScatterByFracCount.setSeriesColors(new Color[]{Color.BLUE, Color.RED, Color.GREEN, Color.ORANGE});
            //            fracUnfracScatterByFracCount.setSeriesColor(0, Color.BLUE);
            //            fracUnfracScatterByFracCount.setSeriesColor(1, Color.RED);
            //            fracUnfracScatterByFracCount.setSeriesColor(2, Color.GREEN);
            //            fracUnfracScatterByFracCount.setSeriesColor(3, Color.ORANGE);
            //            fracUnfracScatterByFracCount.addLineOrCurve(new double[] {0,1});
            //
            //            fracUnfracScatterByFracCount.displayInTab();

            ApplicationContext
                    .infoMessage("Log-intensity correlation between fractionated and unfrac: " + BasicStatistics
                            .correlationCoefficient(unfracInCommonLogIntensities, fracInCommonLogIntensities));
            ApplicationContext.infoMessage(
                    "In-common median intensity, frac: " + BasicStatistics.median(fracInCommonLogIntensities)
                            + ", no frac: " + BasicStatistics.median(unfracInCommonLogIntensities));

            List<Double> unfracNotNullLogIntensities = new ArrayList<Double>();
            for (Double intensity : firstRunIntensities)
                if (intensity != null)
                    unfracNotNullLogIntensities.add(Math.log(intensity));
            new PanelWithHistogram(unfracNotNullLogIntensities, "Unfractionated").displayInTab();
            ApplicationContext.infoMessage("Unfractionated intensities: n=" + unfracNotNullLogIntensities.size()
                    + ", median=" + BasicStatistics.median(unfracNotNullLogIntensities));

            List<Double> fracNotNullLogIntensities = new ArrayList<Double>();
            for (Double intensity : featureFracIntensitySums)
                if (intensity != null) {
                    fracNotNullLogIntensities.add(Math.log(intensity));
                }
            new PanelWithHistogram(fracNotNullLogIntensities, "Fractionated").displayInTab();
            ApplicationContext.infoMessage("Fractionated intensities: n=" + fracNotNullLogIntensities.size()
                    + ", median=" + BasicStatistics.median(fracNotNullLogIntensities));
        }
    }

    protected void normalizeIntensities(List<Double[]> intensitiesAllRuns)
            throws CommandLineModuleExecutionException {
        Double[] firstRunIntensities = intensitiesAllRuns.get(0);

        PanelWithScatterPlot pwspAllRunsNorm = null;
        if (showCharts) {
            pwspAllRunsNorm = new PanelWithScatterPlot();
            pwspAllRunsNorm.setAxisLabels("Original", "Mapped");
            pwspAllRunsNorm.setName("RunNormalization");
        }

        ApplicationContext.infoMessage("Rows in array: " + firstRunIntensities.length);
        for (int i = 1; i < arrayAnalyzer.getRunCount(); i++) {
            ApplicationContext.infoMessage("Normalizing fraction " + i);
            Double[] intensitiesThisRun = intensitiesAllRuns.get(i);
            List<Double> firstRunIntensitiesJustThisRun = new ArrayList<Double>();
            List<Double> thisRunIntensitiesJustThisRun = new ArrayList<Double>();

            for (int j = 0; j < firstRunIntensities.length; j++) {
                if (intensitiesThisRun[j] == null || firstRunIntensities[j] == null)
                    continue;
                boolean foundInAnotherRun = false;
                for (int k = 1; k < arrayAnalyzer.getRunCount(); k++) {
                    if (k != i && intensitiesAllRuns.get(k)[j] != null) {
                        foundInAnotherRun = true;
                        break;
                    }
                }
                if (!foundInAnotherRun) {
                    firstRunIntensitiesJustThisRun.add(firstRunIntensities[j]);
                    thisRunIntensitiesJustThisRun.add(intensitiesThisRun[j]);
                }

            }
            ApplicationContext.infoMessage("values in common between ONLY unfrac and fraction " + i + ":"
                    + firstRunIntensitiesJustThisRun.size());
            List<Double> logFirst = new ArrayList<Double>();
            List<Double> logThis = new ArrayList<Double>();
            double minx = Double.MAX_VALUE;
            double maxx = Double.MIN_VALUE;
            for (int l = 0; l < firstRunIntensitiesJustThisRun.size(); l++) {
                logFirst.add(Math.log(firstRunIntensitiesJustThisRun.get(l)));
                logThis.add(Math.log(thisRunIntensitiesJustThisRun.get(l)));
                minx = Math.min(Math.log(thisRunIntensitiesJustThisRun.get(l)), minx);
                maxx = Math.max(Math.log(thisRunIntensitiesJustThisRun.get(l)), maxx);
            }

            if (logFirst.size() < minFeaturesInFracAndUnfrac) {
                ApplicationContext.infoMessage(
                        "WARNING: unable to normalize run " + i + ", not enough in common with unfractionated run");
                continue;
            }

            double[] coeffs = null;
            //                try
            //                {
            //                    //todo: un-hardcode
            ////                    coeffs = RegressionUtilities.modalRegression(logFirst, logThis, 1, 6, 2.0);
            //                }
            //                catch (IOException e)
            //                {
            //                    throw new CommandLineModuleExecutionException("Failure in modal regression",e);
            //                }
            //todo: un-hardcode
            Pair<double[], double[]> lowLeverageLowResPoints = RegressionUtilities
                    .selectValuesWithLowLeverageAndStudentizedResidual(logThis, logFirst, 17, 1.8, false, 1, false,
                            true);
            //try
            //{
            //            coeffs = RegressionUtilities.modalRegression(
            //                    lowLeverageLowResPoints.first, lowLeverageLowResPoints.second);
            //}
            //catch (IOException e)
            //{throw new CommandLineModuleExecutionException(e);}
            coeffs = calcGoodRegressionCoeffs(logThis, logFirst);

            if (showCharts) {
                PanelWithScatterPlot pwsp = new PanelWithScatterPlot(lowLeverageLowResPoints.first,
                        lowLeverageLowResPoints.second, "logintensities" + i);
                pwsp.addData(logThis, logFirst, "used datapoints");
                pwsp.setAxisLabels("current fraction", "unfractionated run");
                pwsp.addLineOrCurve(coeffs, minx, maxx);
                pwsp.displayInTab();
            }

            List<Double> origNotNullIntensities = null;
            if (showCharts) {
                origNotNullIntensities = new ArrayList<Double>();
                for (int l = 0; l < intensitiesThisRun.length; l++) {
                    if (intensitiesThisRun[l] != null) {
                        origNotNullIntensities.add(Math.log(intensitiesThisRun[l]));
                    }
                }
            }

            for (int l = 0; l < intensitiesThisRun.length; l++) {
                if (intensitiesThisRun[l] != null)
                    intensitiesThisRun[l] = Math.exp(
                            RegressionUtilities.mapValueUsingCoefficients(coeffs, Math.log(intensitiesThisRun[l])));
            }

            if (showCharts) {
                List<Double> mappedNotNullIntensities = new ArrayList<Double>();

                for (int l = 0; l < intensitiesThisRun.length; l++) {
                    if (intensitiesThisRun[l] != null) {
                        mappedNotNullIntensities.add(Math.log(intensitiesThisRun[l]));
                    }
                }

                pwspAllRunsNorm.addData(origNotNullIntensities, mappedNotNullIntensities, "mappedintensities" + i);
            }

        }
        if (showCharts)
            pwspAllRunsNorm.displayInTab();

    }

    protected double[] calcGoodRegressionCoeffs(List<Double> xvalsList, List<Double> yvalsList) {
        double[] xvals = new double[xvalsList.size()];
        double[] yvals = new double[xvalsList.size()];
        for (int i = 0; i < xvals.length; i++) {
            xvals[i] = xvalsList.get(i);
            yvals[i] = yvalsList.get(i);
        }
        return calcGoodRegressionCoeffs(xvals, yvals);
    }

    protected double[] calcGoodRegressionCoeffs(double[] xvals, double[] yvals) {
        Pair<double[], double[]> lowLeverageLowResPoints = RegressionUtilities
                .selectValuesWithLowLeverageAndStudentizedResidual(xvals, yvals, 17, 1.8, false, 1, false, true);
        //try
        //{
        //            coeffs = RegressionUtilities.modalRegression(
        //                    lowLeverageLowResPoints.first, lowLeverageLowResPoints.second);
        //}
        //catch (IOException e)
        //{throw new CommandLineModuleExecutionException(e);}
        return RegressionUtilities.symmetricalRegression(lowLeverageLowResPoints.first,
                lowLeverageLowResPoints.second);
    }

}