MSUmpire.SearchResultWriter.PepXMLWriter.java Source code

Java tutorial

Introduction

Here is the source code for MSUmpire.SearchResultWriter.PepXMLWriter.java

Source

/* 
 * Author: Chih-Chiang Tsou <chihchiang.tsou@gmail.com>
 *             Nesvizhskii Lab, Department of Computational Medicine and Bioinformatics, 
 *             University of Michigan, Ann Arbor
 *
 * Copyright 2014 University of Michigan, Ann Arbor, MI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package MSUmpire.SearchResultWriter;

import MSUmpire.BaseDataStructure.UmpireInfo;
import MSUmpire.FastaParser.FastaParser_V2;
import MSUmpire.PSMDataStructure.PepIonID;
import Utility.DateTimeTag;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.commons.io.FilenameUtils;
import org.apache.log4j.Logger;
import org.xmlpull.v1.XmlPullParserException;

/**
 *
 * @author Chih-Chiang Tsou
 */
public class PepXMLWriter {

    ArrayList<PepIonID> PepList;
    StringBuilder sb = new StringBuilder();
    String Filename;
    String Fasta;

    public class ParentProtein {

        String Acc;
        char PreAA = '-';
        char NextAA = '-';
        String Des;
    }

    public PepXMLWriter(String Filename, String Fasta) throws IOException, XmlPullParserException {
        this.Filename = Filename;
        this.Fasta = Fasta;
    }

    public void SetPepList(HashMap<String, PepIonID> Import) {
        this.PepList = new ArrayList<>();
        for (PepIonID pepIonID : Import.values()) {
            PepList.add(pepIonID);
        }
    }

    public void SetPepList(ArrayList<PepIonID> Import) {
        this.PepList = Import;
    }

    public void Write() throws IOException, XmlPullParserException {
        int minlength = Integer.MAX_VALUE;
        int maxlength = 0;
        int maxmiss = 0;
        for (PepIonID pepIonID : PepList) {
            if (pepIonID.Sequence.length() > maxlength) {
                maxlength = pepIonID.Sequence.length();
            }
            if (pepIonID.Sequence.length() < minlength) {
                minlength = pepIonID.Sequence.length();
            }
            if (pepIonID.getNMissedCleavages() > maxmiss) {
                maxmiss = pepIonID.getNMissedCleavages();
            }
        }

        FastaParser_V2 fastaparser = FastaParser_V2.FasterSerialzationRead(Fasta);
        if (fastaparser == null) {
            fastaparser = new FastaParser_V2(Fasta);
            fastaparser.digestion(maxmiss, minlength, maxlength, "DECOY");
        }
        Header();
        sb.append("<msms_run_summary base_name=\"" + FilenameUtils.getBaseName(Filename)
                + "\" search_engine=\"Umpire\" >\n");
        SearchSummary();
        int index = 0;
        for (PepIonID pepIonID : PepList) {
            ArrayList<ParentProtein> parentprots = new ArrayList<>();
            if (!fastaparser.PeptideList.containsKey(pepIonID.Sequence)) {
                Logger.getRootLogger().warn(pepIonID.Sequence + " is not found as tryptic peptides in " + Fasta
                        + ". Searching protein sequences..");
                for (String acc : fastaparser.ProteinList.keySet()) {
                    String ProtSeq = fastaparser.ProteinList.get(acc).Seq;
                    if (ProtSeq.contains(pepIonID.Sequence)) {
                        ParentProtein prot = new ParentProtein();
                        prot.Acc = acc;
                        prot.Des = fastaparser.ProteinList.get(acc).Des;
                        int aaindex = ProtSeq.indexOf(pepIonID.Sequence) - 1;
                        if (aaindex >= 0) {
                            prot.PreAA = ProtSeq.charAt(aaindex);
                        }
                        aaindex = ProtSeq.indexOf(pepIonID.Sequence) + pepIonID.Sequence.length();
                        if (aaindex < ProtSeq.length()) {
                            prot.NextAA = ProtSeq.charAt(aaindex);
                        }
                        parentprots.add(prot);
                    }
                }
            } else {
                for (String acc : fastaparser.PeptideList.get(pepIonID.Sequence).Proteins) {
                    String ProtSeq = fastaparser.ProteinList.get(acc).Seq;
                    if (ProtSeq.contains(pepIonID.Sequence)) {
                        ParentProtein prot = new ParentProtein();
                        prot.Acc = acc;
                        prot.Des = fastaparser.ProteinList.get(acc).Des;
                        int aaindex = ProtSeq.indexOf(pepIonID.Sequence) - 1;
                        if (aaindex >= 0) {
                            prot.PreAA = ProtSeq.charAt(aaindex);
                        }
                        aaindex = ProtSeq.indexOf(pepIonID.Sequence) + pepIonID.Sequence.length();
                        if (aaindex < ProtSeq.length()) {
                            prot.NextAA = ProtSeq.charAt(aaindex);
                        }
                        parentprots.add(prot);
                    }
                }
            }

            if (parentprots.isEmpty()) {
                Logger.getRootLogger().warn(pepIonID.Sequence + " is not found in " + Fasta);
            } else {
                sb.append("<spectrum_query spectrum=\"" + pepIonID.GetKey() + "\" precursor_neutral_mass=\""
                        + pepIonID.CalcNeutralPepMass() + "\" assumed_charge=\"" + pepIonID.Charge + "\" index=\""
                        + (index++) + "\" retention_time_sec=\"" + pepIonID.PeakRT * 60f + "\">\n"
                        + "<search_result>\n" + "<search_hit hit_rank=\"1\" peptide=\"" + pepIonID.Sequence
                        + "\" peptide_prev_aa=\"" + parentprots.get(0).PreAA + "\" peptide_next_aa=\""
                        + parentprots.get(0).NextAA + "\" protein=\"" + parentprots.get(0).Acc
                        + "\" protein_descr=\"" + parentprots.get(0).Des + "\" num_tot_proteins=\""
                        + parentprots.size() + "\" num_matched_ions=\"" + pepIonID.GetFragCount()
                        + "\" tot_num_ions=\"" + 2 * (pepIonID.Sequence.length() - 1)
                        + "\" calc_neutral_pep_mass=\"" + pepIonID.CalcNeutralPepMass() + "\" massdiff=\""
                        + (pepIonID.CalcNeutralPepMass() - pepIonID.ObservedMass()) + "\" num_missed_cleavages=\""
                        + pepIonID.getNMissedCleavages() + "\" is_rejected=\"0\">\n");

                for (int i = 1; i < parentprots.size(); i++) {
                    sb.append("<alternative_protein protein=\"" + parentprots.get(i).Acc + "\" protein_descr=\""
                            + parentprots.get(i).Des + "\" num_tol_term=\"2\" peptide_prev_aa=\""
                            + parentprots.get(i).PreAA + "\" peptide_next_aa=\"" + parentprots.get(i).NextAA
                            + "\"/>");
                }
                //                        + "<modification_info modified_peptide=\"KITIADCGQLE\">\n"
                //                        + "<mod_aminoacid_mass position=\"7\" mass=\"160.0306\"/>\n"
                //                        + "</modification_info>\n"                        
                //                        + "<analysis_result analysis=\"peptideprophet\">\n"
                //                        + "<peptideprophet_result probability=\"0.8297\" all_ntt_prob=\"(0.0059,0.3657,0.8297)\">\n"
                //                        + "<search_score_summary>\n"
                //                        + "<parameter name=\"fval\" value=\"1.6872\"/>\n"
                //                        + "<parameter name=\"ntt\" value=\"2\"/>\n"
                //                        + "<parameter name=\"nmc\" value=\"0\"/>\n"
                //                        + "<parameter name=\"massd\" value=\"4.011\"/>\n"
                //                        + "<parameter name=\"isomassd\" value=\"0\"/>\n"
                //                        + "</search_score_summary>\n"
                //                        + "</peptideprophet_result>\n"
                //                        + "</analysis_result>\n"
                sb.append("<analysis_result analysis=\"interprophet\">\n" + "<interprophet_result probability=\""
                        + pepIonID.TargetedProbability() + "\">\n"
                        //                        + "<search_score_summary>\n"
                        //                        + "<parameter name=\"nrs\" value=\"0\"/>\n"
                        //                        + "<parameter name=\"nsi\" value=\"0\"/>\n"
                        //                        + "<parameter name=\"nsm\" value=\"0\"/>\n"
                        //                        + "</search_score_summary>\n"
                        + "</interprophet_result>\n" + "</analysis_result>\n" + "</search_hit>\n"
                        + "</search_result>\n" + "</spectrum_query>\n");
            }

        }
        sb.append("</msms_run_summary>\n");
        sb.append("</msms_pipeline_analysis>\n");
        FileWriter writer = new FileWriter(Filename);
        writer.write(sb.toString());
        writer.close();
    }

    private void SearchSummary() {
        sb.append("<sample_enzyme name=\"trypsin\">\n" + "<specificity cut=\"KR\" no_cut=\"P\" sense=\"C\"/>\n"
                + "</sample_enzyme>\n" + "<search_summary base_name=\"" + FilenameUtils.getBaseName(Filename)
                + "\" search_engine=\"Umpire\" precursor_mass_type=\"monoisotopic\" fragment_mass_type=\"monoisotopic\" search_id=\"1\">\n"
                + "<search_database local_path=\"" + Fasta + "\" type=\"AA\"/>\n"
                + "<enzymatic_search_constraint enzyme=\"trypsin\" max_num_internal_cleavages=\"1\" min_number_termini=\"2\"/>\n"
                + "<aminoacid_modification aminoacid=\"C\" massdiff=\"57.0215\" mass=\"160.0306\" variable=\"N\"/>\n"
                + "<aminoacid_modification aminoacid=\"C\" massdiff=\"-17.0265\" mass=\"143.0041\" variable=\"Y\" symbol=\"^\"/>\n"
                + "<!--X! Tandem n-terminal AA variable modification-->\n"
                + "<aminoacid_modification aminoacid=\"E\" massdiff=\"-18.0106\" mass=\"111.0320\" variable=\"Y\" symbol=\"^\"/>\n"
                + "<!--X! Tandem n-terminal AA variable modification-->\n"
                + "<aminoacid_modification aminoacid=\"M\" massdiff=\"15.9949\" mass=\"147.0354\" variable=\"Y\"/>\n"
                + "<aminoacid_modification aminoacid=\"Q\" massdiff=\"-17.0265\" mass=\"111.0321\" variable=\"Y\" symbol=\"^\"/>\n"
                + "<!--X! Tandem n-terminal AA variable modification-->\n"
                + "<terminal_modification terminus=\"n\" massdiff=\"42.0106\" mass=\"43.0184\" protein_terminus=\"N\" variable=\"Y\" symbol=\"^\"/>"
                + "    </search_summary>\n");
    }

    private void Header() {
        sb.append("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n");
        sb.append("<msms_pipeline_analysis date=\"" + DateTimeTag.GetTag() + "\" summary_xml=\"" + Filename
                + "\" xmlns=\"http://regis-web.systemsbiology.net/pepXML\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://sashimi.sourceforge.net/schema_revision/pepXML/pepXML_v18.xsd\">\n");
        sb.append("<analysis_summary analysis=\"Umpire\" version=\"" + UmpireInfo.GetInstance().Version
                + "\" time=\"" + DateTimeTag.GetTag() + "\"/>\n");
    }
}