edu.umn.msi.tropix.proteomics.itraqquantitation.impl.ITraqMatchBuilderImpl.java Source code

Introduction

Here is the source code for edu.umn.msi.tropix.proteomics.itraqquantitation.impl.ITraqMatchBuilderImpl.java
Source

/********************************************************************************
 * Copyright (c) 2009 Regents of the University of Minnesota
 *
 * This Software was written at the Minnesota Supercomputing Institute
 * http://msi.umn.edu
 *
 * All rights reserved. The following statement of license applies
 * only to this file, and and not to the other files distributed with it
 * or derived therefrom.  This file is made available under the terms of
 * the Eclipse Public License v1.0 which accompanies this distribution,
 * and is available at http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 * Minnesota Supercomputing Institute - initial API and implementation
 *******************************************************************************/

package edu.umn.msi.tropix.proteomics.itraqquantitation.impl;

import java.io.File;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.FilenameUtils;

import com.google.common.base.Function;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;

import edu.umn.msi.tropix.common.collect.Closure;
import edu.umn.msi.tropix.common.io.FileUtils;
import edu.umn.msi.tropix.common.io.FileUtilsFactory;
import edu.umn.msi.tropix.common.io.IOUtils;
import edu.umn.msi.tropix.common.io.IOUtilsFactory;
import edu.umn.msi.tropix.proteomics.conversion.Scan;
import edu.umn.msi.tropix.proteomics.conversion.impl.XmlPeakListParser;
import edu.umn.msi.tropix.proteomics.conversion.impl.XmlPeakListParserImpl;
import edu.umn.msi.tropix.proteomics.itraqquantitation.impl.ReportExtractor.ReportType;

class ITraqMatchBuilderImpl implements ITraqMatchBuilder {
    private static final FileUtils FILE_UTILS = FileUtilsFactory.getInstance();
    private static final IOUtils IO_UTILS = IOUtilsFactory.getInstance();
    private ReportExtractor reportParser = new ReportExtractorImpl();
    private XmlPeakListParser peakListParser = new XmlPeakListParserImpl();
    private ITraqMatcher iTraqMatcher;

    private void addSummariesForFile(final File mzxmlFile, final Integer fileIndex,
            final Map<ScanIndex, ITraqScanSummary> scanMap, final ITraqMatchBuilderOptions options,
            boolean useScanNumber) {
        // Parse the mzxml files
        final InputStream inputStream = FILE_UTILS.getFileInputStream(mzxmlFile);
        try {
            final Iterator<Scan> scans = peakListParser.parse(inputStream);
            while (scans.hasNext()) {
                final Scan mzxmlScan = scans.next();
                final int number = useScanNumber ? mzxmlScan.getNumber() : mzxmlScan.getIndex();
                final short charge = mzxmlScan.getPrecursorCharge();
                // System.out.println("UseNumber:" + useScanNumber + " " + number + " <- number charge ->" + charge + " " + mzxmlScan.getIndex() + " "
                // + mzxmlScan.getNumber());
                final double[] peaks = mzxmlScan.getPeaks();
                final ScanIndex scanIndex = new ScanIndex(mzxmlScan.getParentName(), fileIndex, number, charge,
                        Lists.newArrayList(mzxmlScan.getParentName(),
                                FilenameUtils.getBaseName(mzxmlFile.getName()), mzxmlFile.getName()));
                scanMap.put(scanIndex,
                        ITraqScanSummary.fromPeaks(number, number, charge, options.getITraqLabels(), peaks));
            }
        } finally {
            IO_UTILS.closeQuietly(inputStream);
        }
    }

    private static class InputSource {
        private final int index;
        private final File file;

        public InputSource(int index, File file) {
            this.index = index;
            this.file = file;
        }

        protected int getIndex() {
            return index;
        }

        protected File getFile() {
            return file;
        }

    }

    private List<InputSource> getInputSources(final Iterable<File> inputFiles) {
        List<InputSource> inputSources = Lists.newArrayList();
        int fileIndex = 0;
        for (final File inputFile : inputFiles) {
            inputSources.add(new InputSource(fileIndex++, inputFile));
        }
        return inputSources;
    }

    /*
     * Returns a Map with the names of the runs defined in the MzXML files as keys, and a array of scans corresponding to that run as the values.
     */
    private Map<ScanIndex, ITraqScanSummary> parseScans(final Iterable<File> mzxmlInputs,
            final ITraqMatchBuilderOptions options, final boolean useScanNumber) {
        final Map<ScanIndex, ITraqScanSummary> scanMap = Maps.newConcurrentMap();

        // Loop through the mzxml files
        final List<InputSource> inputSources = getInputSources(mzxmlInputs);

        if (options.getThreads() == 1) {
            for (final InputSource inputSource : inputSources) {
                addSummariesForFile(inputSource.getFile(), inputSource.getIndex(), scanMap, options, useScanNumber);
            }
        } else {
            final Closure<InputSource> sourceReader = new Closure<InputSource>() {
                public void apply(final InputSource inputSource) {
                    addSummariesForFile(inputSource.getFile(), inputSource.getIndex(), scanMap, options,
                            useScanNumber);
                }
            };
            if (!Concurrent.processInNThreads(options.getThreads(), sourceReader, inputSources)) {
                throw new RuntimeException("Failed to produce scan map");
            }
        }
        return scanMap;
    }

    public List<ITraqMatch> buildDataEntries(final Iterable<File> mzxmlInputs, final InputReport inputReport,
            final ITraqMatchBuilderOptions options) {
        // Parse the specified Scaffold spectrum report
        System.out.println("Running with " + options.getThreads() + " threads.");
        final Iterable<ReportEntry> reportEntries = reportParser
                .parse(FILE_UTILS.getFileInputStream(inputReport.getReport()), inputReport.getReportType());

        // Parse the MzXML files and obtain scan arrays for each
        System.out.println("Building scansMap");
        final Map<ScanIndex, ITraqScanSummary> scansMap = parseScans(mzxmlInputs, options,
                (inputReport.getReportType() != ReportType.PEPXML));

        System.out.println("Matching build itraq matches");
        return iTraqMatcher.match(reportEntries, new ScanFunctionImpl(scansMap), options);
    }

    interface ScanIndexMatcher {
        boolean match(final ScanIndex query, final ScanIndex target);
    }

    static class AlternativeNameExactScanIndexMatcher implements ScanIndexMatcher {

        public boolean match(ScanIndex query, ScanIndex target) {
            return query.numberChargeAndAlternativeNameMatch(target);
        }

    }

    private static final ScanIndexMatcher ALTERNATIVE_NAME_MATCHER = new AlternativeNameExactScanIndexMatcher();

    static class NameAndScanNumberMatcher implements ScanIndexMatcher {

        public boolean match(ScanIndex query, ScanIndex target) {
            return query.numberAndNameMatch(target);
        }

    }

    private static final ScanIndexMatcher NAME_AND_SCAN_NUMBER_MATCHER = new NameAndScanNumberMatcher();

    static class NumberAndChargeMatcher implements ScanIndexMatcher {

        public boolean match(ScanIndex query, ScanIndex target) {
            return query.numberAndChargeMatch(target);
        }

    }

    static class FileIndexMatcher implements ScanIndexMatcher {

        public boolean match(ScanIndex query, ScanIndex target) {
            return query.numberAndFileIndexMatch(target);
        }

    }

    private static final ScanIndexMatcher NUMBER_CHARGE_AND_FILE_INDEX_MATCHER = new FileIndexMatcher();

    private static final ScanIndexMatcher NUMBER_AND_CHARGE_MATCHER = new NumberAndChargeMatcher();

    private static final ScanIndexMatcher[] MATCHING_ALGORITHMS = new ScanIndexMatcher[] {
            NUMBER_CHARGE_AND_FILE_INDEX_MATCHER, ALTERNATIVE_NAME_MATCHER, NAME_AND_SCAN_NUMBER_MATCHER,
            NUMBER_AND_CHARGE_MATCHER };

    private final class ScanFunctionImpl implements Function<ScanIndex, ITraqScanSummary> {
        private final Map<ScanIndex, ITraqScanSummary> scansMap;
        private final Multimap<Integer, ScanIndex> scanIndices;

        private ScanFunctionImpl(final Map<ScanIndex, ITraqScanSummary> scansMap) {
            this.scansMap = scansMap;
            scanIndices = HashMultimap.create();
            for (final Map.Entry<ScanIndex, ITraqScanSummary> entry : scansMap.entrySet()) {
                scanIndices.put(entry.getKey().getNumber(), entry.getKey());
            }
        }

        private List<ScanIndex> getMatches(final ScanIndex scanIndex, ScanIndexMatcher matcher) {
            final List<ScanIndex> matches = Lists.newArrayList();
            final int number = scanIndex.getNumber();
            for (final ScanIndex possibleIndex : scanIndices.get(number)) {
                if (matcher.match(scanIndex, possibleIndex)) {
                    matches.add(possibleIndex);
                }
            }
            return matches;
        }

        private ITraqScanSummary tryFindSummary(final ScanIndex scanIndex, final List<ScanIndex> potentialMatches) {
            ITraqScanSummary match = null;
            if (potentialMatches.size() > 1) {
                final String ambiguousMatches = Iterables.toString(potentialMatches);
                final String errorMessage = String.format(
                        "Ambigious match for scan %s possible matches include %s ", scanIndex, ambiguousMatches);
                throw new IllegalStateException(errorMessage);
            } else if (!potentialMatches.isEmpty()) {
                match = scansMap.get(potentialMatches.get(0));
                if (match == null) {
                    throw new IllegalStateException("Programming error, invalid HashMap construction.");
                }
            }
            return match;
        }

        public ITraqScanSummary apply(final ScanIndex scanIndex) {
            ITraqScanSummary summary = scansMap.get(scanIndex);
            // Try a little harder if an exact match cannot be found...
            for (ScanIndexMatcher matcher : MATCHING_ALGORITHMS) {
                if (summary != null) {
                    break;
                }
                final List<ScanIndex> potentialMatches = getMatches(scanIndex, matcher);
                summary = tryFindSummary(scanIndex, potentialMatches);
            }
            if (summary == null) {
                throw new IllegalStateException(
                        "Found no match for scan " + scanIndex + "in scans map " + summarizeScansMap());
            }
            return summary;
        }

        private String summarizeScansMap() {
            final Set<String> names = Sets.newHashSet();
            for (final ScanIndex index : scansMap.keySet()) {
                names.add(index.getName());
            }
            return "ScansMap{entries=" + scansMap.size() + ", uniqueNames=" + Iterables.toString(names) + "}";
        }
    }

    public void setReportParser(final ReportExtractor reportParser) {
        this.reportParser = reportParser;
    }

    public void setXmlPeakListParser(final XmlPeakListParser peakListParser) {
        this.peakListParser = peakListParser;
    }

    public void setItraqMatcher(final ITraqMatcher iTraqMatcher) {
        this.iTraqMatcher = iTraqMatcher;
    }

}