bookChapter.theoretical.AnalyzeTheoreticalMSMSCalculation.java Source code

Java tutorial

Introduction

Here is the source code for bookChapter.theoretical.AnalyzeTheoreticalMSMSCalculation.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package bookChapter.theoretical;

import com.compomics.dbtoolkit.io.DBLoaderLoader;
import com.compomics.dbtoolkit.io.interfaces.DBLoader;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.identification.matches.ModificationMatch;
import com.compomics.util.experiment.massspectrometry.MSnSpectrum;
import com.compomics.util.experiment.massspectrometry.SpectrumFactory;
import com.compomics.util.gui.waiting.waitinghandlers.WaitingHandlerCLIImpl;
import config.ConfigHolder;
import com.compomics.util.protein.Protein;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
import uk.ac.ebi.jmzml.xml.io.MzMLUnmarshallerException;
import util.CalculateMS1Err;

/**
 * This class runs SEQUEST-like and Andromeda-like scoring for given spectra
 * (within a selected folder) and in silico digested protein database The
 * settings are at BookChapte.properties
 *
 * @author Sule
 */
public class AnalyzeTheoreticalMSMSCalculation {

    /**
     *
     * @param args
     * @throws IOException
     * @throws FileNotFoundException
     * @throws ClassNotFoundException
     * @throws InterruptedException
     * @throws MzMLUnmarshallerException
     */
    public static void main(String[] args) throws IOException, FileNotFoundException, ClassNotFoundException,
            IOException, InterruptedException, MzMLUnmarshallerException {
        Logger l = Logger.getLogger("AnalyzeTheoreticalMSMSCalculation");
        Date date = Calendar.getInstance().getTime();
        DateFormat formatter = new SimpleDateFormat("EEEE, dd MMMM yyyy, hh:mm:ss.SSS a");
        String now = formatter.format(date);
        l.log(Level.INFO, "Calculation starts at {0}", now);
        double precursorTolerance = ConfigHolder.getInstance().getDouble("precursor.tolerance"),
                fragmentTolerance = ConfigHolder.getInstance().getDouble("fragment.tolerance");
        String databaseName = ConfigHolder.getInstance().getString("database.name"),
                spectraName = ConfigHolder.getInstance().getString("spectra.name"),
                output = ConfigHolder.getInstance().getString("output");
        int correctionFactor = ConfigHolder.getInstance().getInt("correctionFactor");
        boolean theoFromAllCharges = ConfigHolder.getInstance().getBoolean("hasAllPossCharge");
        BufferedWriter bw = new BufferedWriter(new FileWriter(output));
        bw.write("SpectrumTitle" + "\t" + "PrecursorMZ" + "\t" + "PrecursorCharge" + "\t" + "Observed Mass (M+H)"
                + "\t" + "AndromedaLikeScore" + "\t" + "SequestLikeScore" + "\t" + "PeptideByAndromedaLikeScore"
                + "\t" + "PeptideBySequestLikeScore" + "\t" + "LevenshteinDistance" + "\t" + "TotalScoredPeps"
                + "\t" + "isCorrectMatchByAndromedaLike" + "\t" + "isCorrectMatchBySequestLikeScore" + "\n");
        l.info("Getting database entries");
        // first load all sequences into the memory 
        HashSet<DBEntry> dbEntries = getDBEntries(databaseName);
        // for every spectrum-calculate both score...
        // now convert to binExperimental spectrum
        int num = 0;
        SpectrumFactory fct = SpectrumFactory.getInstance();
        num = 0;
        File f = new File(spectraName);
        if (spectraName.endsWith(".mgf")) {
            fct.addSpectra(f, new WaitingHandlerCLIImpl());
            l.log(Level.INFO, "Spectra scoring starts at {0}", now);
            for (String title : fct.getSpectrumTitles(f.getName())) {
                num++;
                MSnSpectrum ms = (MSnSpectrum) fct.getSpectrum(f.getName(), title);
                // here calculate all except this is an empty spectrum...
                if (ms.getPeakList().size() > 2) {
                    // to check a spectrum with negative values..
                    String text = result(ms, precursorTolerance, dbEntries, fragmentTolerance, correctionFactor,
                            theoFromAllCharges);
                    if (!text.isEmpty()) {
                        bw.write(text);
                    }
                }
                if (num % 500 == 0) {
                    l.info("Running " + num + " spectra." + Calendar.getInstance().getTime());
                }
            }
        }
        l.info("Program finished at " + Calendar.getInstance().getTime());

        bw.close();
    }

    private static String result(MSnSpectrum msms, double precursorTolerance, HashSet<DBEntry> peptideAndMass,
            double fragmentTolerance, int correctionFactor, boolean hasAllPossCharge)
            throws IllegalArgumentException, IOException, MzMLUnmarshallerException {
        String res = "";
        HashMap<Peptide, Boolean> allSelectedPeps = getSelectedTheoPeps(msms, precursorTolerance, peptideAndMass); // select peptides within a given precursor tolerance
        int scoredPeps = allSelectedPeps.size();
        ArrayList<Identify> sequestResults = new ArrayList<Identify>(),
                andromedaResults = new ArrayList<Identify>();
        // for every peptide... calculate each score...
        for (Peptide selectedPep : allSelectedPeps.keySet()) {
            Identify toCalculateSequest = new Identify(msms, selectedPep, fragmentTolerance, true,
                    allSelectedPeps.get(selectedPep), scoredPeps, correctionFactor, hasAllPossCharge),
                    toCalculateAndromeda = new Identify(msms, selectedPep, fragmentTolerance, false,
                            allSelectedPeps.get(selectedPep), scoredPeps, correctionFactor, hasAllPossCharge);
            if (toCalculateSequest.getScore() != Double.NEGATIVE_INFINITY) {
                sequestResults.add(toCalculateSequest);
                andromedaResults.add(toCalculateAndromeda);
            }
        }
        if (!sequestResults.isEmpty()) {
            HashSet<Identify> theBestSEQUESTResults = getBestResult(sequestResults),
                    theBestAndromedaResults = getBestResult(andromedaResults);
            res = printInfo(theBestAndromedaResults, theBestSEQUESTResults);
        }
        return res;
    }

    /**
     * This method load all sequences in a memory
     *
     * @param databaseName
     * @return
     */
    private static HashSet<DBEntry> getDBEntries(String databaseName) throws IOException {
        HashSet<DBEntry> dbEntries = new HashSet<DBEntry>();
        DBLoader loader = DBLoaderLoader.loadDB(new File(databaseName));
        Protein protein = null;
        // get a crossLinkerName object        
        while ((protein = loader.nextProtein()) != null) {
            String sequence = protein.getSequence().getSequence();
            String descrp = protein.getHeader().getDescription(), acc = protein.getHeader().getAccession();
            Peptide tmpPep = new Peptide(sequence, new ArrayList<ModificationMatch>());
            double tmpPepMass = tmpPep.getMass();
            DBEntry dbEntry = new DBEntry(tmpPep, descrp, acc, tmpPepMass);
            dbEntries.add(dbEntry);
        }
        return dbEntries;
    }

    private static HashMap<Peptide, Boolean> getSelectedTheoPeps(MSnSpectrum msms, double precursorTolerance,
            HashSet<DBEntry> dbEntries) throws IOException, IllegalArgumentException {
        // select peptides to be compared...
        HashMap<Peptide, Boolean> allTheoPeps = new HashMap<Peptide, Boolean>();
        for (DBEntry dbEntry : dbEntries) {
            Peptide p = dbEntry.getPeptide();
            double peptideMass = dbEntry.getPeptideMass();
            String acc = dbEntry.getProteinAccession(), descrp = dbEntry.getProteinDescription();
            double tmpMS1Tolerance = CalculateMS1Err.getMS1Err(true, peptideMass,
                    msms.getPrecursor().getMass(msms.getPrecursor().getPossibleCharges().get(0).value));
            boolean isCorrect = false;
            if (Math.abs(tmpMS1Tolerance) <= precursorTolerance && !descrp.contains("contaminant")
                    && acc.contains("ups")) {
                isCorrect = true;
                allTheoPeps.put(p, isCorrect);
            } else if (Math.abs(tmpMS1Tolerance) <= precursorTolerance && !descrp.contains("contaminant")) {
                isCorrect = false;
                allTheoPeps.put(p, isCorrect);
            }
        }
        return allTheoPeps;
    }

    private static HashSet<Identify> getBestResult(ArrayList<Identify> results) {
        Collections.sort(results, Identify.ScoreDESC);
        double bestScore = results.get(0).getScore();
        String pep = results.get(0).getPeptide().getSequence();
        HashSet<Identify> selectedTops = new HashSet<Identify>();
        selectedTops.add(results.get(0));
        for (int i = 1; i < results.size(); i++) {
            Identify ind = results.get(i);
            double tmpScore = ind.getScore();
            String tmpPep = ind.getPeptide().getSequence();
            // making sure that non-redundant peptide would be only found
            if (tmpScore == bestScore && !pep.equals(tmpPep)) {
                bestScore = tmpScore;
                selectedTops.add(ind);
            }
        }
        return selectedTops;
    }

    private static String printInfo(HashSet<Identify> theBestAndromedaResults,
            HashSet<Identify> theBestSEQUESTResults) {
        ///    bw.write("Score" + "\t" + "LevenshteinDistance" + "\t" + "Sequence" + "\t" + "Charge" + "\t" + "Correct" + "\t" + "Spec" + "\n");
        String result = "";
        HashMap<String, Integer> results_and_levDist = new HashMap<String, Integer>();
        for (Identify andromeda : theBestAndromedaResults) {
            for (Identify sequest : theBestSEQUESTResults) {
                double observedMass = CalculateObservedMass.calculateMass(andromeda.getSpectrum());
                int levenshteinDistance = StringUtils.getLevenshteinDistance(andromeda.getPeptide().getSequence(),
                        sequest.getPeptide().getSequence());
                result = andromeda.getSpectrum().getSpectrumTitle() + "\t"
                        + andromeda.getSpectrum().getPrecursor().getMz() + "\t"
                        + andromeda.getSpectrum().getPrecursor().getPossibleChargesAsString() + "\t" + observedMass
                        + "\t" + andromeda.getScore() + "\t" + sequest.getScore() + "\t"
                        + andromeda.getPeptide().getSequence() + "\t" + sequest.getPeptide().getSequence() + "\t"
                        + levenshteinDistance + "\t" + sequest.getTotalScoredPeps() + "\t"
                        + andromeda.isIsCorrectMatch() + "\t" + sequest.isIsCorrectMatch() + "\n";
                results_and_levDist.put(result, levenshteinDistance);
            }
        }
        int maxDist = Integer.MAX_VALUE, dist = 0;
        for (String res : results_and_levDist.keySet()) {
            dist = results_and_levDist.get(res);
            if (dist < maxDist) {
                result = res;
                maxDist = dist;
            }
        }
        return result;
    }

}