Java tutorial
/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.align; import org.apache.log4j.Logger; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.statistics.BasicStatistics; import org.fhcrc.cpl.toolbox.datastructure.Pair; import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.proteomics.feature.matching.FeatureSetMatcher; import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef; import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.AmtExtraInfoDef; import org.fhcrc.cpl.toolbox.filehandler.TempFileManager; import org.fhcrc.cpl.toolbox.proteomics.ProteomicsRegressionUtilities; import org.fhcrc.cpl.toolbox.proteomics.MassUtilities; import org.fhcrc.cpl.viewer.amt.AmtUtilities; import org.fhcrc.cpl.viewer.amt.AmtDatabaseMatcher; import org.fhcrc.cpl.toolbox.gui.chart.*; import org.jfree.data.xy.XYSeriesCollection; import org.jfree.data.xy.XYSeries; import org.jfree.chart.JFreeChart; import org.jfree.chart.ChartFactory; import org.jfree.chart.plot.PlotOrientation; import java.io.*; import java.util.*; import java.util.List; /** * Created by IntelliJ IDEA. * User: migra * Date: Mar 1, 2005 * Time: 10:10:36 AM * * dhmay, 5/7/08. Making this class abstract. Alignment can be performed via two different algorithms: a spline-based * regression, or a quantile regression similar to what's used in AMT. The two algorithms perform very similarly * on data that are tightly filtered, but the quantile regression performs much better on noisy data. * * dhmay, 20100310. Abstracting "elution" so that mapping can be from scan to scan or from time to time. */ public abstract class Aligner { private static Logger _log = Logger.getLogger(Aligner.class); public static final FeaturePairSelector DEFAULT_FEATURE_PAIR_SELECTOR = new MzFeaturePairSelector(); protected FeaturePairSelector featurePairSelector = DEFAULT_FEATURE_PAIR_SELECTOR; protected List<int[]> warpingMaps = null; protected boolean buildCharts = false; protected PanelWithChart alignmentChart = null; double[] mappingSourceValues = null; double[] mappingMappedValues = null; protected double maxStudRes = AmtDatabaseMatcher.DEFAULT_MAX_STUDENTIZED_RESIDUAL; protected double maxLeverageNumerator = AmtDatabaseMatcher.DEFAULT_LEVERAGE_NUMERATOR; //dhmay adding 20100310. Different sources of data for alignment public static final int ALIGNMENT_DATASOURCE_TIME = 0; public static final int ALIGNMENT_DATASOURCE_SCAN = 1; public static final int ALIGNMENT_DATASOURCE_DEFAULT = ALIGNMENT_DATASOURCE_TIME; protected int alignmentDatasource = ALIGNMENT_DATASOURCE_DEFAULT; //dhmay adding 20100201. Defining different ways of ordering the runs for alignment. //align all runs to the first run. "Legacy" behavior public static final int ALIGNMENT_ORDER_MODE_ALIGNTOFIRST = 0; //daisychain the alignment. Align run 1 to run 0, run 2 to run 1.... public static final int ALIGNMENT_ORDER_MODE_DAISYCHAIN = 1; //accumulate a 'cumulative' dataset that we align to public static final int ALIGNMENT_ORDER_MODE_CUMULATIVE = 2; public static final int[] alignmentOrderModes = new int[] { ALIGNMENT_ORDER_MODE_ALIGNTOFIRST, ALIGNMENT_ORDER_MODE_DAISYCHAIN }; public static final int ALIGNMENT_ORDER_MODE_DEFAULT = ALIGNMENT_ORDER_MODE_ALIGNTOFIRST; public static final String[] alignmentOrderModeDescs = new String[] { "alltofirst", "daisychain", "cumulative", }; protected int alignmentOrderMode = ALIGNMENT_ORDER_MODE_DEFAULT; public Aligner() { } /** * Calculate the maximum time of any of these features * @param features * @return */ public double getMaxElution(Feature[] features) { double maxElution = 0; for (Feature feature : features) { if (getFeatureElutionValue(feature) > maxElution) maxElution = getFeatureElutionValue(feature); } return maxElution; } protected double getFeatureElutionValue(Feature feature) { switch (alignmentDatasource) { case ALIGNMENT_DATASOURCE_SCAN: return feature.getScan(); default: return feature.getTime(); } } /** * If scan mode, get time; if time mode, get scan * @param feature * @return */ protected double getFeatureOppositeElutionValue(Feature feature) { switch (alignmentDatasource) { case ALIGNMENT_DATASOURCE_TIME: return feature.getScan(); default: return feature.getTime(); } } /** * Build a map from scan to RT or RT to scan, based on an array of features. In the resulting array, * index 0 represents minValueForResult, last index = maxValueForResult. minScanForResult can be negative. * Fill in values before first and after last. * * Between scan and time, which is the index and which is the value depends on alignmentDatasource: * SCAN: scan is index, time is value * TIME: time is index, scan is value * @param features * @return */ protected float[] createElutionTypeMap(Feature[] features, int minIndexValForResult, int maxIndexValForResult, boolean showCharts) { Feature[] featuresCopy = new Feature[features.length]; System.arraycopy(features, 0, featuresCopy, 0, featuresCopy.length); Arrays.sort(featuresCopy, new Feature.ScanAscComparator()); //Calculate mean amount of time between scans. We will use this to fill in gaps List<Float> elutionValuesBetweenTicks = new ArrayList<Float>(); for (int i = 1; i < featuresCopy.length; i++) { int numTicksBetween = (int) (getFeatureElutionValue(featuresCopy[i]) - getFeatureElutionValue(featuresCopy[i - 1])); if (numTicksBetween > 0) { float valueBetweenTicks = (float) (getFeatureOppositeElutionValue(featuresCopy[i]) - getFeatureOppositeElutionValue(featuresCopy[i - 1])); elutionValuesBetweenTicks.add(valueBetweenTicks / numTicksBetween); } } float meanValueBetweenTicks = (float) BasicStatistics.mean(elutionValuesBetweenTicks); int numScansInResult = maxIndexValForResult - minIndexValForResult + 1; float[] result = new float[numScansInResult]; int featureIndex = 0; List<Integer> xValsForChart = new ArrayList<Integer>(); List<Float> yValsForChart = new ArrayList<Float>(); for (int i = 0; i < numScansInResult; i++) { int integerizedValue = i + minIndexValForResult; //advance the feature array pointer to the first feature that has a higher or equivalent scan number to i while (integerizedValue > getFeatureElutionValue(featuresCopy[featureIndex]) && featureIndex < featuresCopy.length - 1) featureIndex++; Feature firstSameOrHigherScanFeature = featuresCopy[featureIndex]; //Assign the RT for i the value RT value for this feature. But if the feature scan != i, adjust //by adding/subtracting the appropriate number of meanTimeBetweenScans float mappedVal = (float) getFeatureOppositeElutionValue(firstSameOrHigherScanFeature); result[i] = mappedVal + (float) ((integerizedValue - getFeatureElutionValue(featuresCopy[featureIndex])) * meanValueBetweenTicks); //For really nonlinear relationships, we might have created a situation in which one scan's value is lower //than the previous. Bump up. if (i > 0) result[i] = Math.max(result[i - 1], result[i]); //System.err.println(i + ", " + result[i]); if (showCharts) xValsForChart.add(integerizedValue); yValsForChart.add(result[i]); } if (showCharts) { PanelWithScatterPlot pwsp = new PanelWithScatterPlot(xValsForChart, yValsForChart, "scan vs rt, base run"); pwsp.setAxisLabels("scan", "time"); if (alignmentDatasource == ALIGNMENT_DATASOURCE_TIME) pwsp.setAxisLabels("time", "scan"); pwsp.displayInTab(); } return result; } /** * Perform the non-linear mapping between feature sets */ public List<FeatureSet> alignFeatureSets(List<FeatureSet> sets, boolean showCharts) { warpingMaps = new ArrayList<int[]>(sets.size() - 1); if (showCharts) buildCharts = true; List<FeatureSet> alignedSets = new ArrayList<FeatureSet>(); alignedSets.add(sets.get(0)); _log.debug("Align: base featureset has " + sets.get(0).getFeatures().length + " features"); double minElutionAllSetsPostAlign = Double.MAX_VALUE; double maxElutionAllSetsPostAlign = 0; for (Feature feature : sets.get(0).getFeatures()) { double elutionVal = getFeatureElutionValue(feature); if (elutionVal > maxElutionAllSetsPostAlign) maxElutionAllSetsPostAlign = elutionVal; if (elutionVal < minElutionAllSetsPostAlign) minElutionAllSetsPostAlign = elutionVal; } //dhmay adding 20100201 //keep track of the FeatureSet we're aligning to. For align-to-first mode, always the first set. //For daisychain mode, the most recent (post-alignment) set //todo: if adding a new alignment mode, need to mess with this FeatureSet alignToSet = sets.get(0); if (alignmentOrderMode == ALIGNMENT_ORDER_MODE_CUMULATIVE) alignToSet = alignToSet.deepCopy(); for (int i = 1; i < sets.size(); i++) { String sourceFilename = "???"; if (sets.get(i).getSourceFile() != null) sourceFilename = sets.get(i).getSourceFile().getAbsolutePath(); _log.debug( "Aligning Feature File " + sourceFilename + ", features: " + sets.get(i).getFeatures().length); Pair<Feature, Feature>[] pairedFeatures = featurePairSelector.selectPairs(sets.get(i), alignToSet); _log.debug("Aligner.alignFeatureSets: selected " + pairedFeatures.length + " pairs"); double maxElution = 0; for (Feature feature : sets.get(i).getFeatures()) { if (getFeatureElutionValue(feature) > maxElution) maxElution = getFeatureElutionValue(feature); } //necessary cast Pair<Double, Double>[] pairedElutions = (Pair<Double, Double>[]) new Pair[pairedFeatures.length]; for (int j = 0; j < pairedElutions.length; j++) { pairedElutions[j] = new Pair<Double, Double>(getFeatureElutionValue(pairedFeatures[j].first), getFeatureElutionValue(pairedFeatures[j].second)); } Pair<Double, Double>[] restrictedPairs = restrictPairsForRegression(pairedElutions); _log.debug("Aligner.alignFeatureSets: restricted to " + restrictedPairs.length + " pairs"); String mapFileNameStart = (i + 1) + "_onto_1"; //this is where the work is done double[] elutionMap = alignPairs(restrictedPairs, maxElution, mapFileNameStart); //It's lame to take up extra memory this way, but it's nice to be //able to persist this scan map for later investigation int[] intElutionMap = new int[elutionMap.length]; for (int j = 0; j < elutionMap.length; j++) intElutionMap[j] = (int) Math.round(elutionMap[j]); warpingMaps.add(intElutionMap); FeatureSet aligned = sets.get(i).deepCopy(); //indicate that this featureset has been aligned by putting "align" //in the source filename. OK, so this is a bit of a hack for backward //compatibility of the detail files aligned.setSourceFile(new File(getAlignedFileName(sets, i))); for (Feature feature : aligned.getFeatures()) { double newElution = intElutionMap[(int) getFeatureElutionValue(feature)]; switch (alignmentDatasource) { case ALIGNMENT_DATASOURCE_SCAN: feature.setScan((int) newElution); break; default: feature.setTime((float) newElution); break; } if (newElution > maxElutionAllSetsPostAlign) maxElutionAllSetsPostAlign = newElution; if (newElution < minElutionAllSetsPostAlign) { minElutionAllSetsPostAlign = newElution; } } alignedSets.add(aligned); //dhmay adding 20100201 //If doing daisy-chain alignment, save the current aligned set as the one to align the next one to //todo: if adding a new alignment mode, need to mess with this if (alignmentOrderMode == ALIGNMENT_ORDER_MODE_DAISYCHAIN) alignToSet = aligned; else if (alignmentOrderMode == ALIGNMENT_ORDER_MODE_CUMULATIVE) { Feature[] cumFeatures = new Feature[alignToSet.getFeatures().length + aligned.getFeatures().length]; System.arraycopy(alignToSet.getFeatures(), 0, cumFeatures, 0, alignToSet.getFeatures().length); System.arraycopy(aligned.getFeatures(), 0, cumFeatures, alignToSet.getFeatures().length, aligned.getFeatures().length); alignToSet.setFeatures(cumFeatures); } if (buildCharts) { createChart(pairedElutions, restrictedPairs, elutionMap); if (showCharts) { alignmentChart.setName("Alignment " + i); alignmentChart.displayInTab(); double[] massesPPM = new double[pairedFeatures.length]; double[] deltaMassesPPM = new double[pairedFeatures.length]; for (int j = 0; j < deltaMassesPPM.length; j++) { deltaMassesPPM[j] = (pairedFeatures[j].second.getMass() - pairedFeatures[j].first.getMass()) * 1000000 / pairedFeatures[j].first.getMass(); massesPPM[j] = pairedFeatures[j].first.getMass(); } //todo: fix these so extreme values are gone, or just get rid of them permanently // PanelWithHistogram deltaMassHist = new PanelWithHistogram(deltaMassesPPM); // deltaMassHist.setName("massMatchDeltaPPM"); // deltaMassHist.displayInTab(); // // // PanelWithScatterPlot massMassDevPlot = new PanelWithScatterPlot( // massesPPM, deltaMassesPPM, "Mass (x) vs. deltaMass (y), ppm"); // massMassDevPlot.displayInTab(); } } } //maps from scan to time, or from time to scan, depending on alignmentDatasource. // SCAN: scan is index, time is value // TIME: time is index, scan is value //This is so that both values will be in sync. float[] baseElutionTypeMap = createElutionTypeMap(sets.get(0).getFeatures(), (int) minElutionAllSetsPostAlign, (int) maxElutionAllSetsPostAlign, showCharts); for (FeatureSet featureSet : alignedSets) { for (Feature feature : featureSet.getFeatures()) { switch (alignmentDatasource) { case ALIGNMENT_DATASOURCE_SCAN: feature.setTime(baseElutionTypeMap[(int) (feature.getScan() - minElutionAllSetsPostAlign)]); break; default: feature.setScan( (int) baseElutionTypeMap[(int) (feature.getTime() - minElutionAllSetsPostAlign)]); break; } } } //clean up TempFileManager.deleteTempFiles(this); return alignedSets; } protected Pair<Double, Double>[] restrictPairsForRegression(Pair<Double, Double>[] allPairs) { Feature[] featuresForRegression = new Feature[allPairs.length]; double[] baseTimes = new double[allPairs.length]; double[] toAlignTimes = new double[allPairs.length]; //this is a bit silly -- we are modeling this as an AMT regression, so we're setting Time and //Observed Hydrophobicity appropriately in a dummy feature for (int j = 0; j < allPairs.length; j++) { Pair<Double, Double> matchedPair = allPairs[j]; Feature dummyFeature = new Feature(); baseTimes[j] = matchedPair.first; toAlignTimes[j] = matchedPair.second; dummyFeature.setTime((float) baseTimes[j]); AmtExtraInfoDef.setObservedHydrophobicity(dummyFeature, toAlignTimes[j]); featuresForRegression[j] = dummyFeature; } double[] baseTimesForRegression = new double[featuresForRegression.length]; double[] toAlignTimesForRegression = new double[featuresForRegression.length]; for (int j = 0; j < featuresForRegression.length; j++) { //this call to getTime() is OK -- this is the dummy value for regression, which could be time or scan baseTimesForRegression[j] = featuresForRegression[j].getTime(); toAlignTimesForRegression[j] = AmtExtraInfoDef.getObservedHydrophobicity(featuresForRegression[j]); } featuresForRegression = ProteomicsRegressionUtilities.selectFeaturesWithLowLeverageAndStudentizedResidual( featuresForRegression, baseTimes, toAlignTimes, maxLeverageNumerator, maxStudRes, false, 0, false, true); Pair<Double, Double>[] result = (Pair<Double, Double>[]) new Pair[featuresForRegression.length]; _log.debug(featuresForRegression.length + " pairs left after exclusion"); for (int j = 0; j < featuresForRegression.length; j++) { //this call to getTime() is OK -- this is the dummy value for regression, which could be time or scan result[j] = new Pair<Double, Double>((double) featuresForRegression[j].getTime(), AmtExtraInfoDef.getObservedHydrophobicity(featuresForRegression[j])); } ApplicationContext.setMessage("Using " + featuresForRegression.length + " features (out of " + allPairs.length + " mass-matched) for regression"); return result; } /** * Cover method. In the absence of filename information, use a filename that's * likely to be unique * @param pairs * @param maxValueToWarp * @return */ public double[] alignPairs(Pair<Double, Double>[] pairs, int maxValueToWarp) { return alignPairs(pairs, maxValueToWarp, "length" + pairs.length); } /** * Generic method for creating a warping based on pairs of values. * Create temp files with names based on tempFileNameStart, in case the user * wants to peruse them * @param pairs * @param maxValueToWarp * @return */ public abstract double[] alignPairs(Pair<Double, Double>[] pairs, double maxValueToWarp, String tempFileNameStart); protected PanelWithChart createChart(Pair<Double, Double>[] allPairedScans, Pair<Double, Double>[] restrictedPairs, double[] warpedValues) { PanelWithScatterPlot pwsp = new PanelWithScatterPlot(); double[] restrictedScans1 = new double[restrictedPairs.length]; double[] restrictedScans2 = new double[restrictedPairs.length]; for (int i = 0; i < restrictedScans1.length; i++) { restrictedScans1[i] = restrictedPairs[i].first; restrictedScans2[i] = restrictedPairs[i].second; } pwsp.addData(restrictedScans1, restrictedScans2, "Used in regression"); double[] scans1 = new double[allPairedScans.length]; double[] scans2 = new double[allPairedScans.length]; double minScan1 = Double.MAX_VALUE; double maxScan1 = Double.MIN_VALUE; for (int i = 0; i < scans1.length; i++) { scans1[i] = allPairedScans[i].first; scans2[i] = allPairedScans[i].second; if (scans1[i] < minScan1) minScan1 = scans1[i]; if (scans1[i] > maxScan1) maxScan1 = scans1[i]; } pwsp.addData(scans1, scans2, "all"); int firstBaseVal = Math.max((int) minScan1 - 100, 0); int lastBaseVal = Math.min((int) maxScan1 + 100, warpedValues.length - 1); double[] plottedWarpedValues = new double[lastBaseVal - firstBaseVal + 1]; System.arraycopy(warpedValues, firstBaseVal, plottedWarpedValues, 0, plottedWarpedValues.length); double[] unwarpedValues = new double[plottedWarpedValues.length]; for (int i = 0; i < plottedWarpedValues.length; i++) unwarpedValues[i] = i + firstBaseVal; pwsp.addData(unwarpedValues, plottedWarpedValues, "alignment map"); pwsp.setAxisLabels("Time in run 1", "Time in run 2"); alignmentChart = pwsp; return pwsp; } /** * Write a file containing pairs * @param fileName * @throws FileNotFoundException */ protected File writePairsFile(Pair<Double, Double>[] pairs, String fileName) throws FileNotFoundException { File currentPairsFile = TempFileManager.createTempFile(fileName, this); PrintWriter currentPairsPW = new PrintWriter(currentPairsFile); currentPairsPW.println("source\tdest"); for (Pair<Double, Double> pair : pairs) currentPairsPW.println(pair.first + "\t" + pair.second); currentPairsPW.flush(); _log.debug("wrote feature pairs to file " + currentPairsFile.getAbsolutePath()); return currentPairsFile; } /** * Return a filename with an "align" in it. If there's a source file for the * featureset, use that name to start with. If not, index. * @param featureSets * @param i * @return */ protected String getAlignedFileName(List featureSets, int i) { FeatureSet fs = (FeatureSet) featureSets.get(i); String baseFileName; String fileName = fs.getSourceFile() == null ? "set" + i + ".tsv" : fs.getSourceFile().getName(); int dotPos = fileName.lastIndexOf('.'); if (dotPos < 0) baseFileName = fileName; else baseFileName = fileName.substring(0, dotPos); return baseFileName + ".align.tsv"; } /** * Provide access to the scan maps created for individual feature sets. * Obviously, only applicable if you've just done alignment on a bunch of * feature sets * @return */ public List<int[]> getWarpingMaps() { return warpingMaps; } public void plotWarpings() { if (warpingMaps == null) return; XYSeriesCollection dataset = new XYSeriesCollection(); for (int i = 0; i < warpingMaps.size(); i++) { XYSeries series = new XYSeries("run " + (i + 1) + (i == 0 ? " (reference)" : "")); int[] setScanMap = warpingMaps.get(i); for (int j = 0; j < setScanMap.length; j++) series.add(j, setScanMap[j]); dataset.addSeries(series); } JFreeChart chart = ChartFactory.createXYLineChart(null, null, null, dataset, PlotOrientation.VERTICAL, true, false, false); ChartDialog chartDialog = new ChartDialog(chart); chartDialog.setSize(800, 600); chartDialog.setVisible(true); } public void plotWarpings(double[][] warpings) { XYSeriesCollection dataset = new XYSeriesCollection(); for (double[] warping : warpings) { XYSeries series = new XYSeries(""); for (int j = 0; j < warping.length; j++) series.add(j, warping[j]); dataset.addSeries(series); } JFreeChart chart = ChartFactory.createXYLineChart(null, null, null, dataset, PlotOrientation.VERTICAL, true, false, false); ChartDialog chartDialog = new ChartDialog(chart); chartDialog.setSize(800, 600); chartDialog.setVisible(true); } public void plotWarping(double[] warping) { double[][] warpings = new double[1][warping.length]; warpings[0] = warping; plotWarpings(warpings); } /** * Interface for classes that provide a way of selecting pairs of features. * Each implementing class will define a different strategy for selecing pairs. */ public static interface FeaturePairSelector { public abstract Pair<Feature, Feature>[] selectPairs(FeatureSet sourceFeatureSet, FeatureSet destFeatureSet); } /** * Selecte pairs of features based on peptide agreement */ public static class PeptideFeaturePairSelector implements FeaturePairSelector { public Pair<Feature, Feature>[] selectPairs(FeatureSet sourceFeatureSet, FeatureSet destFeatureSet) { List<Pair<Feature, Feature>> resultList = new ArrayList<Pair<Feature, Feature>>(); for (Feature sourceFeature : sourceFeatureSet.getFeatures()) { String sourcePeptide = MS2ExtraInfoDef.getFirstPeptide(sourceFeature); if (sourcePeptide == null) continue; for (Feature destFeature : destFeatureSet.getFeatures()) { String destPeptide = MS2ExtraInfoDef.getFirstPeptide(destFeature); if (destPeptide != null && destPeptide.equals(sourcePeptide)) resultList.add(new Pair<Feature, Feature>(sourceFeature, destFeature)); } } //have to cast, because Java is not good return (Pair<Feature, Feature>[]) resultList.toArray(new Pair[0]); } } /** * Select pairs of features based on peptide agreement, and also mass tolerance */ public static class HybridFeaturePairSelector implements FeaturePairSelector { float massTolerance = 0.1f; public Pair<Feature, Feature>[] selectPairs(FeatureSet sourceFeatureSet, FeatureSet destFeatureSet) { List<Pair<Feature, Feature>> resultList = new ArrayList<Pair<Feature, Feature>>(); for (Feature sourceFeature : sourceFeatureSet.getFeatures()) { String sourcePeptide = MS2ExtraInfoDef.getFirstPeptide(sourceFeature); if (sourcePeptide == null) continue; for (Feature destFeature : destFeatureSet.getFeatures()) { String destPeptide = MS2ExtraInfoDef.getFirstPeptide(destFeature); if (destPeptide != null && destPeptide.equals(sourcePeptide)) resultList.add(new Pair<Feature, Feature>(sourceFeature, destFeature)); } } List<Pair<Feature, Feature>> forRemoval = new ArrayList<Pair<Feature, Feature>>(); for (Pair<Feature, Feature> resultPair : resultList) { if (Math.abs(resultPair.first.getMass() - resultPair.second.getMass()) > massTolerance) forRemoval.add(resultPair); } for (Pair<Feature, Feature> pairToRemove : forRemoval) resultList.remove(pairToRemove); _log.debug("Removed " + forRemoval.size() + " pairs out of mass tolerance"); //have to cast, because Java is not good return (Pair<Feature, Feature>[]) resultList.toArray(new Pair[0]); } } public static class MassFeaturePairSelector extends MassOrMzFeaturePairSelector implements FeaturePairSelector { public static final float DEFAULT_DELTA_MASS = 10f; public MassFeaturePairSelector() { super(); setMassOrMzMode(MODE_MASS); setDeltaMass(DEFAULT_DELTA_MASS); } public void setDeltaMass(float deltaMass) { setDeltaMassOrMz(deltaMass); } public float getDeltaMass() { return getDeltaMassOrMz(); } } public static class MzFeaturePairSelector extends MassOrMzFeaturePairSelector implements FeaturePairSelector { public static final float DEFAULT_DELTA_MZ = 0.1f; public MzFeaturePairSelector() { super(); setMassOrMzMode(MODE_MZ); setDeltaMz(DEFAULT_DELTA_MZ); } public void setDeltaMz(float deltaMz) { setDeltaMassOrMz(deltaMz); } public float getDeltaMz() { return getDeltaMassOrMz(); } } /** * This class selects pairs of features purely based on m/z -- it pairs up features * that are within a given m/z tolerance. This emulates the old behavior, which was * implemented in R */ public static class MassOrMzFeaturePairSelector implements FeaturePairSelector { public static final float DEFAULT_DELTA_MASS_OR_MZ = 10f; public static final float DEFAULT_MIN_INTENSITY = 100; protected int topN = -1; protected float deltaMassOrMz = DEFAULT_DELTA_MASS_OR_MZ; protected float minIntensity = DEFAULT_MIN_INTENSITY; public static final int MODE_MASS = 0; public static final int MODE_MZ = 1; protected int massOrMzMode = MODE_MZ; protected int deltaMassType = DEFAULT_DELTA_MASS_TYPE; public static final int DEFAULT_DELTA_MASS_TYPE = FeatureSetMatcher.DELTA_MASS_TYPE_PPM; public void setTopN(int newTopN) { topN = newTopN; } public int getTopN() { return topN; } public float getDeltaMassOrMz() { return deltaMassOrMz; } public void setDeltaMassOrMz(float deltaMz) { this.deltaMassOrMz = deltaMz; } public float getMinIntensity() { return minIntensity; } public void setMinIntensity(float minIntensity) { this.minIntensity = minIntensity; } /** * This will return more than N entries when more than one entry shares the * _exact_ value of the boundry element; we do this to avoid making random exclusions * of "equivalent" values. * * Note that, in the degenerate case, this will return * all features if all members of the input list have the same value. */ protected FeatureSet stripAllButTopNFeaturesByIntensity(FeatureSet featureSet) { Feature[] features = featureSet.getFeatures(); Arrays.sort(features, new Feature.IntensityDescComparator()); if (features.length == 0) ApplicationContext.infoMessage("ERROR! 0 features in this FeatureSet"); float minIntensity = features[Math.min(features.length - 1, topN - 1)].getIntensity(); FeatureSet.FeatureSelector sel = new FeatureSet.FeatureSelector(); sel.setMinIntensity(minIntensity); return featureSet.filter(sel); } /** * * Select pairs based on m/z or * @return */ public Pair<Feature, Feature>[] selectPairs(FeatureSet sourceFeatureSet, FeatureSet destFeatureSet) { _log.debug("selectPairs: topN=" + topN + ", mode (mass==0): " + massOrMzMode + ", minIntensity: " + minIntensity + " sourceFeatures: " + sourceFeatureSet.getFeatures().length + ", destFeatures: " + destFeatureSet.getFeatures().length + ", deltaMassOrMz: " + deltaMassOrMz + ", deltaMassType (ppm==1): " + deltaMassType); FeatureSet workingSourceFeatureSet = (FeatureSet) sourceFeatureSet.clone(); FeatureSet workingDestFeatureSet = (FeatureSet) destFeatureSet.clone(); if (topN <= 0) { FeatureSet.FeatureSelector sel = new FeatureSet.FeatureSelector(); sel.setMinIntensity(minIntensity); workingSourceFeatureSet = workingSourceFeatureSet.filter(sel); workingDestFeatureSet = workingDestFeatureSet.filter(sel); } else { workingSourceFeatureSet = stripAllButTopNFeaturesByIntensity(workingSourceFeatureSet); workingDestFeatureSet = stripAllButTopNFeaturesByIntensity(workingDestFeatureSet); } Feature[] sourceFeatures = workingSourceFeatureSet.getFeatures(); Feature[] destFeatures = workingDestFeatureSet.getFeatures(); Comparator<Feature> featureMassOrMzAscComparator = null; switch (massOrMzMode) { case MODE_MASS: featureMassOrMzAscComparator = new Feature.MassAscComparator(); break; case MODE_MZ: featureMassOrMzAscComparator = new Feature.MzAscComparator(); break; } Arrays.sort(sourceFeatures, featureMassOrMzAscComparator); Arrays.sort(destFeatures, featureMassOrMzAscComparator); List<Pair<Feature, Feature>> resultList = new ArrayList<Pair<Feature, Feature>>(); _log.debug( "selectPairs: pairing sets of size " + sourceFeatures.length + " and " + destFeatures.length); switch (massOrMzMode) { case MODE_MASS: _log.debug(" deltaMass: " + deltaMassOrMz); break; case MODE_MZ: _log.debug(" deltaMz: " + deltaMassOrMz); break; } int i2top = 0; int i2; for (Feature sourceFeature : sourceFeatures) { float sourceMassOrMz = sourceFeature.getMz(); switch (massOrMzMode) { case MODE_MASS: sourceMassOrMz = sourceFeature.getMass(); break; case MODE_MZ: sourceMassOrMz = sourceFeature.getMz(); break; } i2 = i2top; while (i2 < destFeatures.length) { float destMassOrMz = destFeatures[i2].getMz(); float effectiveDeltaMassOrMz = deltaMassOrMz; switch (massOrMzMode) { case MODE_MASS: destMassOrMz = destFeatures[i2].getMass(); effectiveDeltaMassOrMz = MassUtilities.calculateAbsoluteDeltaMass(sourceMassOrMz, deltaMassOrMz, deltaMassType); break; case MODE_MZ: destMassOrMz = destFeatures[i2].getMz(); effectiveDeltaMassOrMz = deltaMassOrMz; break; } if (destMassOrMz < sourceMassOrMz - effectiveDeltaMassOrMz) { i2++; i2top = i2; continue; } if (destMassOrMz > sourceMassOrMz + effectiveDeltaMassOrMz) break; //this 'if' should not be necessary. However, for some reason, //there seems to be some rounding going on up above, and things close to //the boundary don't always get assigned the right way. //For some reason the 'if' below works. if (Math.abs(destMassOrMz - sourceMassOrMz) <= effectiveDeltaMassOrMz) resultList.add(new Pair<Feature, Feature>(sourceFeature, destFeatures[i2])); i2++; } } //have to cast, because Java is not good return (Pair<Feature, Feature>[]) resultList.toArray(new Pair[0]); } public int getMassOrMzMode() { return massOrMzMode; } public void setMassOrMzMode(int massOrMzMode) { this.massOrMzMode = massOrMzMode; } public int getDeltaMassType() { return deltaMassType; } public void setDeltaMassType(int deltaMassType) { this.deltaMassType = deltaMassType; } } public FeaturePairSelector getFeaturePairSelector() { return featurePairSelector; } public void setFeaturePairSelector(FeaturePairSelector featurePairSelector) { this.featurePairSelector = featurePairSelector; } public boolean isBuildCharts() { return buildCharts; } public void setBuildCharts(boolean buildCharts) { this.buildCharts = buildCharts; } public PanelWithChart getAlignmentChart() { return alignmentChart; } public void setAlignmentChart(PanelWithChart alignmentChart) { this.alignmentChart = alignmentChart; } public double getMaxStudRes() { return maxStudRes; } public void setMaxStudRes(double maxStudRes) { this.maxStudRes = maxStudRes; } public double getMaxLeverageNumerator() { return maxLeverageNumerator; } public void setMaxLeverageNumerator(double maxLeverageNumerator) { this.maxLeverageNumerator = maxLeverageNumerator; } public int getAlignmentOrderMode() { return alignmentOrderMode; } public void setAlignmentOrderMode(int alignmentOrderMode) { this.alignmentOrderMode = alignmentOrderMode; } public int getAlignmentDatasource() { return alignmentDatasource; } public void setAlignmentDatasource(int alignmentDatasource) { this.alignmentDatasource = alignmentDatasource; } }