com.compomics.colims.distributed.playground.AnnotatedSpectraParser.java Source code

Java tutorial

Introduction

Here is the source code for com.compomics.colims.distributed.playground.AnnotatedSpectraParser.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.compomics.colims.distributed.playground;

import com.compomics.colims.distributed.io.maxquant.MaxQuantConstants;
import com.compomics.colims.distributed.io.maxquant.TabularFileIterator;
import com.compomics.colims.distributed.io.maxquant.headers.MsmsHeader;
import com.compomics.colims.distributed.io.maxquant.headers.MsmsHeaders;
import com.compomics.colims.distributed.io.maxquant.parsers.MaxQuantAplParser;
import com.compomics.colims.distributed.io.maxquant.parsers.ParseUtils;
import com.compomics.util.experiment.massspectrometry.Peak;
import org.apache.commons.io.FilenameUtils;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Parse MSMS and APL files for given MSMS IDs and keep the peak points.
 *
 * @author demet
 */
@Component("annotatedSpectraParser")
public class AnnotatedSpectraParser {

    /**
     * Logger instance.
     */
    private static final Logger LOGGER = Logger.getLogger(MaxQuantAplParser.class);
    /**
     * The start of the spectrum header in the apl file.
     */
    private static final String KEY_START = "RawFile: ";
    private static final String KEY_MIDDLE = " Index: ";
    private static final String APL_SPECTUM_START = "peaklist start";
    private static final String APL_SPECTUM_END = "peaklist end";
    private static final String APL_HEADER_DELIMITER = "=";
    private static final String APL_HEADER = "header";
    /**
     * Spectrum peaks from APL files. key: APL key of the spectrum, value :list of peaks.
     */
    private Map<String, List<Peak>> spectrumPeaks = new HashMap<>();
    /**
     * Annotations from MSMS file
     * key:APL key of the spectrum, value map( key : Peak that keeps mass and intensity, value : match).
     */
    private Map<String, Map<Peak, String>> annotations = new HashMap<>();
    /**
     * List of aplKeys from MSMS file.
     */
    private List<String> aplKeys = new ArrayList<>();
    /**
     * The apl spectrum file paths map (key: apl file path; value: apl param file path);
     */
    private Map<Path, Path> aplFilePaths = new HashMap<>();
    private MsmsHeaders msmsHeaders;

    /**
     * No-arg constructor.
     *
     * @throws IOException in case of an Input/Output related problem while parsing the headers.
     */
    public AnnotatedSpectraParser() throws IOException {
        msmsHeaders = new MsmsHeaders();
    }

    /**
     * Parse spectra for given MSMS IDs.
     *
     * @param msmsFile
     * @param andromedaDirectory
     * @param msmsIDs
     * @throws IOException
     */
    public void parseSpectra(Path msmsFile, Path andromedaDirectory, List<String> msmsIDs) throws IOException {
        parseMSMS(msmsFile, msmsIDs);
        parseAplFilePaths(andromedaDirectory);
        parseAplFile();
    }

    /**
     * Parse msms file only for given ID numbers.
     *
     * @param msmsFile the msms.txt file
     * @param msmsIDs  the list of msms IDs to parse
     */
    private void parseMSMS(Path msmsFile, List<String> msmsIDs) throws IOException {
        TabularFileIterator valuesIterator = new TabularFileIterator(msmsFile, msmsHeaders.getMandatoryHeaders());
        for (Map<String, String> spectrumValues : valuesIterator) {
            if (msmsIDs.contains(spectrumValues.get(MsmsHeader.ID))) {
                //concatenate the RAW file name and scan index
                String aplKey = KEY_START + spectrumValues.get(MsmsHeader.RAW_FILE) + KEY_MIDDLE
                        + spectrumValues.get(MsmsHeader.SCAN_NUMBER);

                //map the spectrum
                if (!aplKeys.contains(aplKey)) {
                    Map<Peak, String> annotatedPeakList = parsePeakList(spectrumValues.get(MsmsHeader.MATCHES),
                            spectrumValues.get(MsmsHeader.INTENSITIES), spectrumValues.get(MsmsHeader.MASSES));
                    annotations.put(aplKey, annotatedPeakList);
                    aplKeys.add(aplKey);

                }
            }
        }
    }

    /**
     * Parse peakList.
     *
     * @param matches
     * @param intensities
     * @param masses
     */
    private Map<Peak, String> parsePeakList(String matches, String intensities, String masses) {
        Map<Peak, String> annotatedPeakList = new HashMap<>();
        if (!matches.isEmpty() && !intensities.isEmpty() && !masses.isEmpty()) {
            String[] matchList = matches.split(";");
            String[] intensityList = intensities.split(";");
            String[] massList = masses.split(";");

            if (intensityList.length != matchList.length || massList.length != matchList.length) {
                throw new IllegalArgumentException("Input lists are not equal length");
            }
            for (int i = 0; i < matchList.length; i++) {
                int charge = 1;
                Double mass = Double.parseDouble(massList[i]) / charge;
                Peak peak = new Peak(mass, Double.parseDouble(intensityList[i]));

                annotatedPeakList.put(peak, matchList[i]);
            }

        }
        return annotatedPeakList;
    }

    /**
     * Parse APL File Paths. Put all the apl files to be used in aplFilePaths list.
     *
     * @param andromedaDirectory
     * @throws FileNotFoundException
     * @throws IOException
     */
    private void parseAplFilePaths(Path andromedaDirectory) throws FileNotFoundException, IOException {
        /**
         * Parse the apl summary file 'aplfiles.txt' to extract the apl spectrum file paths, the spectrum parameter file paths
         * and the mass analyzer and fragmentation type.
         */
        if (!Files.exists(andromedaDirectory)) {
            throw new FileNotFoundException(
                    "The andromeda directory " + andromedaDirectory.toString() + " could not be found.");
        }

        Path aplSummaryPath = Paths.get(andromedaDirectory.toString(), MaxQuantConstants.APL_SUMMARY_FILE.value());
        if (!Files.exists(aplSummaryPath)) {
            throw new FileNotFoundException(
                    "The apl summary file " + MaxQuantConstants.APL_SUMMARY_FILE + " could not be found.");
        }
        Map<String, String> allAplFilePaths = ParseUtils.parseParameters(aplSummaryPath,
                MaxQuantConstants.PARAM_TAB_DELIMITER.value());
        allAplFilePaths.entrySet().stream().forEach(entry -> {
            //use paths relative to the andromeda directory
            Path relativeAplfilePath = Paths.get(andromedaDirectory.toString(),
                    FilenameUtils.getName(entry.getKey()));
            Path relativeSpectrumParametersfilePath = Paths.get(andromedaDirectory.toString(),
                    FilenameUtils.getName(entry.getValue()));
            this.aplFilePaths.put(relativeAplfilePath, relativeSpectrumParametersfilePath);
        });

    }

    /**
     * Parse the APL files for given aplKeys and put the peaks in the spectrumPeaks list.
     */
    private void parseAplFile() throws IOException {
        for (Path aplFilePath : aplFilePaths.keySet()) {
            if (!Files.exists(aplFilePath)) {
                throw new FileNotFoundException(
                        "The apl spectrum file " + aplFilePath.toString() + " could not be found.");
            }
            try (BufferedReader bufferedReader = Files.newBufferedReader(aplFilePath)) {
                String line;
                Map<String, String> headers = new HashMap<>();

                while ((line = bufferedReader.readLine()) != null) {
                    //look for a spectrum entry
                    if (line.startsWith(APL_SPECTUM_START)) {
                        //go to the next line
                        line = bufferedReader.readLine();
                        //parse spectrum header part
                        while (!Character.isDigit(line.charAt(0))) {
                            String[] split = line.split(APL_HEADER_DELIMITER);
                            headers.put(split[0], split[1]);
                            line = bufferedReader.readLine();
                        }
                        //" Precursor: 0 _multi_" is removed before looking up the key in the spectra map
                        String header = org.apache.commons.lang3.StringUtils
                                .substringBefore(headers.get(APL_HEADER), " Precursor");
                        //check if the spectrum was identified and therefore can be found in the spectra map
                        if (aplKeys.contains(header)) {
                            List<Peak> peakList = new ArrayList<>();
                            while (!line.startsWith(APL_SPECTUM_END)) {
                                String[] splitLine = line.split(MaxQuantConstants.PARAM_TAB_DELIMITER.value());
                                Peak peak = new Peak(Double.parseDouble(splitLine[0]),
                                        Double.parseDouble(splitLine[1]));

                                peakList.add(peak);
                                line = bufferedReader.readLine();
                            }
                            spectrumPeaks.put(header, peakList);
                        }
                        //clear headers map
                        headers.clear();
                    }
                }
            }
        }
    }
}