edu.monash.merc.system.parser.GPMDbParser.java Source code

Java tutorial

Introduction

Here is the source code for edu.monash.merc.system.parser.GPMDbParser.java

Source

/*
 * Copyright (c) 2011-2013, Monash e-Research Centre
 * (Monash University, Australia)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *    * Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *    * Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *    * Neither the name of the Monash University nor the names of its
 *      contributors may be used to endorse or promote products derived from
 *      this software without specific prior written permission.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Affero General Public License for more details.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * You should have received a copy of the GNU Affero General Public License along with
 * this program. If not, see <http://www.gnu.org/licenses/>.
 */

package edu.monash.merc.system.parser;

import edu.monash.merc.common.name.*;
import edu.monash.merc.dto.*;
import edu.monash.merc.dto.gpm.GPMDbBean;
import edu.monash.merc.dto.gpm.GPMDbEntryBean;
import edu.monash.merc.exception.DMFileException;
import edu.monash.merc.exception.DMParserException;
import edu.monash.merc.util.DMUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

/**
 * @author Simon Yu
 *         <p/>
 *         Email: xiaoming.yu@monash.edu
 * @version 1.0
 * @since 1.0
 *        <p/>
 *        Date: 11/06/13 11:47 AM
 */
public class GPMDbParser {

    private static String RELEASE_DATE = "date:";

    private static String NOMINAL_MASS = "nominal_mass:";

    private static String SEQUNCE_ASEMBLY = "sequnce_assembly:";

    private static String SEQUENCE_ASEMBLY = "sequence_assembly:";

    private static String SEQUENCE_SOURCE = "sequence_source:";

    private static String MAXIMUM_LOG_E = "maximum_log_e:";

    private static String PROTEIN = "protein:";

    private static String GENE = "gene:";

    private static String TRANSCRIPT = "transcript:";

    private static String CHROMOSOME = "chromosome:";

    private static String CHROM_START = "start:";

    private static String CHROM_END = "end:";

    private static String CHROM_STRAND = "strand:";

    private static String DES_ENSG_ENSP = "desc:";

    private static String MODIFIED_PEPTIDE_OBS = "modified_peptide_obs:";

    private static String POS = "pos:";

    private static String RES = "res:";

    private static String OBS = "obs:";

    private static String COLON_DELIM = ":";

    private static String GPMDB_BASE_QUERY = "http://psyt.thegpm.org/thegpm-cgi/dblist_pep_modmass.pl";

    private static String PARAM_LABEL = "?label=";

    private static String PARAM_MODMASS = "&modmass=";

    private static String PARAM_AND = "@";

    private static String PARAM_MAXEXPECT = "&maxexpect=";

    private static final Logger logger = Logger.getLogger(GPMDbParser.class.getName());

    public GPMDbBean parse(InputStream ins, String encoding, GPMDbType gpmDbType) {
        try {
            String defaultEncoding = "UTF-8";
            if (StringUtils.isNotBlank(encoding)) {
                defaultEncoding = encoding;
            }
            InputStreamReader insReader = new InputStreamReader(ins, Charset.forName(defaultEncoding));
            BufferedReader reader = new BufferedReader(insReader);

            //create a GPMDbBean
            GPMDbBean gpmDbBean = new GPMDbBean();
            //set the GPMDbType
            gpmDbBean.setGpmDbType(gpmDbType);

            List<GPMDbEntryBean> gpmDbEntryBeanList = new ArrayList<GPMDbEntryBean>();
            GPMDbEntryBean gpmDbEntryBean = null;

            String releaseDate = null;
            String nominalMass = null;
            String sequenceAssembly = null;
            String sequenceSource = null;
            String maximumLoge = null;
            String enspAccession = null;
            String ensgAccession = null;
            String enstAccession = null;
            String chromName = null;
            int chromStart = 0;
            int chromEnd = 0;
            String chromStrand = null;
            int modifiedPeptideObs = 0;
            int pos = 0;
            String res = null;
            int obs = 0;
            String line = null;
            int counterIndex = 0;
            while ((line = reader.readLine()) != null) {

                if (StringUtils.isNotBlank(line) && !StringUtils.startsWith(line, "#")) {
                    //start to parse the head of psyt file from gpm
                    if (StringUtils.startsWith(line, RELEASE_DATE)) {
                        String[] dateLineFields = DMUtil.splitByDelims(line, COLON_DELIM);
                        if (dateLineFields.length != 2) {
                            throw new DMFileException("Invalid gpm psyt file,  the release date is not specified");
                        }
                        //There are a total of 26 release dates, just add the first release date as a primary release date
                        if (StringUtils.isBlank(releaseDate)) {
                            releaseDate = dateLineFields[1];
                            gpmDbBean.setReleaseToken(releaseDate);
                        }
                    }

                    if (StringUtils.startsWith(line, NOMINAL_MASS)) {
                        String[] nominalMassLineFields = DMUtil.splitByDelims(line, COLON_DELIM);
                        if (nominalMassLineFields.length != 2) {
                            throw new DMFileException(
                                    "Invalid gpm psyt file,  the nominal mass number is not specified");
                        }
                        if (StringUtils.isBlank(nominalMass)) {
                            nominalMass = nominalMassLineFields[1];
                            gpmDbBean.setNominalMass(nominalMass);
                        }
                    }

                    if (StringUtils.startsWith(line, SEQUENCE_ASEMBLY)
                            || StringUtils.startsWith(line, SEQUENCE_ASEMBLY)) {
                        String[] sequenceAssemblyLineFields = DMUtil.splitByDelims(line, COLON_DELIM);
                        if (sequenceAssemblyLineFields.length != 2) {
                            throw new DMFileException(
                                    "Invalid gpm psyt file,  the sequence assembly version is not specified");
                        }
                        if (StringUtils.isBlank(sequenceAssembly)) {
                            sequenceAssembly = sequenceAssemblyLineFields[1];
                            gpmDbBean.setSequenceAssembly(sequenceAssembly);
                        }
                    }

                    if (StringUtils.startsWith(line, SEQUENCE_SOURCE)) {
                        String[] sequenceSourceLineFields = DMUtil.splitByDelims(line, COLON_DELIM);
                        if (sequenceSourceLineFields.length != 2) {
                            throw new DMFileException(
                                    "Invalid gpm psyt file,  the sequence source number is not specified");
                        }
                        if (StringUtils.isBlank(sequenceSource)) {
                            sequenceSource = sequenceSourceLineFields[1];
                            gpmDbBean.setSequenceSource(sequenceSource);
                        }
                    }

                    if (StringUtils.startsWith(line, MAXIMUM_LOG_E)) {
                        String[] maxLogELineFields = DMUtil.splitByDelims(line, COLON_DELIM);
                        if (maxLogELineFields.length != 2) {
                            throw new DMFileException(
                                    "Invalid gpm psyt file,  the maximum log(e) number is not specified");
                        }
                        if (StringUtils.isBlank(maximumLoge)) {
                            maximumLoge = maxLogELineFields[1];
                            gpmDbBean.setMaximumLoge(maximumLoge);
                        }
                    }

                    if (StringUtils.startsWith(line, PROTEIN)) {
                        gpmDbEntryBean = new GPMDbEntryBean();
                        //create the primary dbsource bean;
                        DBSourceBean gpmDbSourceBean = new DBSourceBean();
                        //all genes come from the gpm psty file as a datasource
                        if (gpmDbType.equals(GPMDbType.GPMDB_PSYT)) {
                            gpmDbSourceBean.setDbName(DbAcType.GPMPSYT.type());
                        }
                        if (gpmDbType.equals(GPMDbType.GPMDB_LYS)) {
                            gpmDbSourceBean.setDbName(DbAcType.GPMLYS.type());
                        }
                        if (gpmDbType.equals(GPMDbType.GPMDB_NTA)) {
                            gpmDbSourceBean.setDbName(DbAcType.GPMNTA.type());
                        }
                        gpmDbSourceBean.setPrimaryEvidences(true);
                        gpmDbEntryBean.setPrimaryDbSourceBean(gpmDbSourceBean);

                        //parse the protein accession
                        enspAccession = DMUtil.splitStrByDelim(line, COLON_DELIM)[1];
                        if (StringUtils.isBlank(enspAccession)) {
                            throw new DMFileException("The protein accession number not found");
                        } else {
                            //create an identified accession bean
                            AccessionBean identAccessionBean = createAcBean(enspAccession, DbAcType.Protein.type());
                            gpmDbEntryBean.setIdentifiedAccessionBean(identAccessionBean);
                        }
                    }

                    //parse gene accession
                    if (StringUtils.startsWith(line, GENE)) {
                        ensgAccession = DMUtil.splitStrByDelim(line, COLON_DELIM)[1];
                    }

                    //parse transcript accession
                    if (StringUtils.startsWith(line, TRANSCRIPT)) {
                        enstAccession = DMUtil.splitStrByDelim(line, COLON_DELIM)[1];
                    }

                    //chromosome fields
                    if (StringUtils.startsWith(line, CHROMOSOME)) {
                        String[] chromLineFields = DMUtil.splitByDelims(line, "\t", "\r", "\n");
                        for (String chromField : chromLineFields) {
                            if (StringUtils.startsWith(chromField, CHROMOSOME)) {
                                String[] chromNameFileds = DMUtil.splitStrByDelim(chromField, COLON_DELIM);
                                if (chromNameFileds.length == 2) {
                                    chromName = chromNameFileds[1].trim();
                                } else {
                                    chromName = NameType.UNKNOWN.cn();
                                }
                            }
                            if (StringUtils.startsWith(chromField, CHROM_START)) {
                                String[] chromStartFields = DMUtil.splitStrByDelim(chromField, COLON_DELIM);
                                if (chromStartFields.length == 2) {
                                    chromStart = Integer.valueOf(chromStartFields[1].trim());
                                }
                            }
                            if (StringUtils.startsWith(chromField, CHROM_END)) {
                                String[] chromEndFields = DMUtil.splitStrByDelim(chromField, COLON_DELIM);
                                if (chromEndFields.length == 2) {
                                    chromEnd = Integer.valueOf(chromEndFields[1].trim());
                                }
                            }
                            if (StringUtils.startsWith(chromField, CHROM_STRAND)) {
                                String[] chromStrandFields = DMUtil.splitStrByDelim(chromField, COLON_DELIM);
                                if (chromStrandFields.length == 2) {
                                    chromStrand = chromStrandFields[1].trim();
                                }
                            }
                        }
                        //create GeneBean
                        GeneBean geneBean = new GeneBean();
                        geneBean.setEnsgAccession(ensgAccession);
                        geneBean.setChromosome(chromName);
                        geneBean.setStartPosition(chromStart);
                        geneBean.setEndPosition(chromEnd);
                        geneBean.setStrand(chromStrand);
                        gpmDbEntryBean.setGeneBean(geneBean);
                    }

                    //Gene or Protein Desc
                    if (StringUtils.startsWith(line, DES_ENSG_ENSP)) {
                        String[] desLineFields = DMUtil.splitByDelims(line, COLON_DELIM);
                        if (desLineFields.length == 2) {
                            String descValue = desLineFields[1];
                            String[] descValueFields = DMUtil.splitByDelims(descValue, ",", "\t");
                            String desc = "";
                            if (descValueFields.length >= 2) {
                                gpmDbEntryBean.getGeneBean().setDisplayName(descValueFields[0]);
                                for (int i = 1; i < descValueFields.length; i++) {
                                    desc += descValueFields[i];
                                }
                            } else {
                                desc = descValueFields[0];
                            }
                            gpmDbEntryBean.getGeneBean().setDescription(StringUtils.trim(desc));
                        } else {
                            gpmDbEntryBean.getGeneBean().setDisplayName(NameType.UNKNOWN.cn());
                        }

                    }

                    //we add non evidence bean first once we meet the tag : modified_peptide_obs
                    if (StringUtils.startsWith(line, MODIFIED_PEPTIDE_OBS)) {
                        //create dbsource and accession entry bean
                        List<DbSourceAcEntryBean> dbSourceAcEntryBeanList = parseDBSourceAcEntryBeans(enspAccession,
                                ensgAccession, enstAccession);
                        gpmDbEntryBean.setDbSourceAcEntryBeans(dbSourceAcEntryBeanList);

                        if (gpmDbType.equals(GPMDbType.GPMDB_PSYT)) {
                            //create a non phs s evidence bean
                            PTMEvidenceBean nonPhsSEvidenceBean = createNonEvidenceBean(enspAccession, nominalMass,
                                    maximumLoge, GPMPTMSubType.PHS_S, gpmDbType);
                            gpmDbEntryBean.setPtmEvidenceBean(nonPhsSEvidenceBean, GPMPTMSubType.NON_PHS_S);
                            //create a non phs t evidence bean
                            PTMEvidenceBean nonPhsTEvidenceBean = createNonEvidenceBean(enspAccession, nominalMass,
                                    maximumLoge, GPMPTMSubType.PHS_T, gpmDbType);
                            gpmDbEntryBean.setPtmEvidenceBean(nonPhsTEvidenceBean, GPMPTMSubType.NON_PHS_T);
                            //create a non phs y evidence bean
                            PTMEvidenceBean nonPhsYEvidenceBean = createNonEvidenceBean(enspAccession, nominalMass,
                                    maximumLoge, GPMPTMSubType.PHS_Y, gpmDbType);
                            gpmDbEntryBean.setPtmEvidenceBean(nonPhsYEvidenceBean, GPMPTMSubType.NON_PHS_Y);

                        } else if (gpmDbType.equals(GPMDbType.GPMDB_LYS)) {
                            // Create Non LYS evidence bean first
                            PTMEvidenceBean nonLysEvidenceBean = createNonEvidenceBean(enspAccession, nominalMass,
                                    maximumLoge, GPMPTMSubType.LYS, gpmDbType);
                            gpmDbEntryBean.setPtmEvidenceBean(nonLysEvidenceBean, GPMPTMSubType.NON_LYS);

                        } else if (gpmDbType.equals(GPMDbType.GPMDB_NTA)) {
                            //create non nta evidence bean first
                            PTMEvidenceBean nonNtaEvidenceBean = createNonEvidenceBean(enspAccession, nominalMass,
                                    maximumLoge, GPMPTMSubType.NTA, gpmDbType);
                            gpmDbEntryBean.setPtmEvidenceBean(nonNtaEvidenceBean, GPMPTMSubType.NON_NTA);
                        }

                        gpmDbEntryBeanList.add(gpmDbEntryBean);
                        counterIndex = gpmDbEntryBeanList.size() - 1;
                    }

                    //parse the RES: S, T, Y or Others and POS and OBS
                    if (StringUtils.startsWith(line, POS)) {
                        String[] posLineFields = DMUtil.splitByDelims(line, "\t", "\r", "\n");
                        for (String posLineField : posLineFields) {
                            if (StringUtils.startsWith(posLineField, POS)) {
                                String[] posFileds = DMUtil.splitByDelims(posLineField, COLON_DELIM);
                                if (posFileds.length == 2) {
                                    pos = Integer.valueOf(posFileds[1]);
                                } else {
                                    throw new DMFileException("The position value not found.");
                                }
                            }
                            if (StringUtils.startsWith(posLineField, RES)) {
                                String[] resFileds = DMUtil.splitByDelims(posLineField, COLON_DELIM);
                                if (resFileds.length == 2) {
                                    res = resFileds[1];
                                } else {
                                    throw new DMFileException("The res value not found.");
                                }
                            }
                            if (StringUtils.startsWith(posLineField, OBS)) {
                                String[] obsFileds = DMUtil.splitByDelims(posLineField, COLON_DELIM);
                                if (obsFileds.length == 2) {
                                    obs = Integer.valueOf(obsFileds[1].trim());
                                } else {
                                    throw new DMFileException("The obs value not found.");
                                }
                            }
                        }

                        PTMEvidenceBean ptmEvidenceBean = createPTMEvidenceBean(nominalMass, pos, res, obs,
                                enspAccession, maximumLoge, gpmDbType);

                        //Identify the type
                        GPMPTMSubType ptmSubType = GPMPTMSubType.PHS_S;

                        if (gpmDbType.equals(GPMDbType.GPMDB_PSYT)) {
                            ptmSubType = GPMPTMSubType.fromType(res);
                        } else if (gpmDbType.equals(GPMDbType.GPMDB_LYS)) {
                            ptmSubType = GPMPTMSubType.LYS;
                        } else if (gpmDbType.equals(GPMDbType.GPMDB_NTA)) {
                            ptmSubType = GPMPTMSubType.NTA;
                        }
                        //add the ptm evidence bean
                        gpmDbEntryBeanList.get(counterIndex).setPtmEvidenceBean(ptmEvidenceBean, ptmSubType);
                    }
                }
            }
            logger.info("The total entry size of the " + gpmDbType.type() + " is :" + gpmDbEntryBeanList.size());
            gpmDbBean.setPgmDbEntryBeans(gpmDbEntryBeanList);
            return gpmDbBean;
        } catch (Exception ex) {
            logger.error(ex);
            throw new DMParserException(ex);
        } finally {
            if (ins != null) {
                try {
                    ins.close();
                } catch (Exception e) {
                    //ignore whatever caught.
                }
            }
        }
    }

    private PTMEvidenceBean createPTMEvidenceBean(String nominalMass, int pos, String res, int obs,
            String enspAccession, String maxexpect, GPMDbType gpmDbType) {

        PTMEvidenceBean ptmEvidenceBean = new PTMEvidenceBean();
        ptmEvidenceBean.setPos(pos);
        ptmEvidenceBean.setEvidenceValue(String.valueOf(obs));

        //evidence hyper link
        String hyperLink = GPMDB_BASE_QUERY + PARAM_LABEL + enspAccession + PARAM_MODMASS + nominalMass + PARAM_AND
                + res + PARAM_MAXEXPECT + maxexpect;
        //set the evidence hyperlink
        ptmEvidenceBean.setHyperlink(hyperLink);

        TPBDataTypeBean tpbDataTypeBean = new TPBDataTypeBean();
        //set the data type level to 3
        tpbDataTypeBean.setLevel(TLLevel.TL3.level());

        //tpb data type
        if (gpmDbType.equals(GPMDbType.GPMDB_PSYT)) {
            if (StringUtils.equals(res, GPMPTMSubType.PHS_S.type())) {
                //set the data type
                tpbDataTypeBean.setDataType(DataType.PTM_PHS_SER.type());
            }

            if (StringUtils.equals(res, GPMPTMSubType.PHS_Y.type())) {
                //set the data type
                tpbDataTypeBean.setDataType(DataType.PTM_PHS_TYR.type());
            }

            if (StringUtils.equals(res, GPMPTMSubType.PHS_T.type())) {
                //set the data type
                tpbDataTypeBean.setDataType(DataType.PTM_PHS_THR.type());
            }
        } else if (gpmDbType.equals(GPMDbType.GPMDB_LYS)) {
            tpbDataTypeBean.setDataType(DataType.PTM_ACE_LYS.type());
        } else if (gpmDbType.equals(GPMDbType.GPMDB_NTA)) {
            tpbDataTypeBean.setDataType(DataType.PTM_ACE_NTA.type());
        }
        //set the tpb data type
        ptmEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean);

        int colorLevel = ColorType.BLACK.color();
        //evidence color level
        if (obs >= 50) {
            colorLevel = ColorType.GREEN.color();
        } else if (obs >= 20 && obs < 50) {
            colorLevel = ColorType.YELLOW.color();
        } else if (obs >= 5 && obs < 20) {
            colorLevel = ColorType.RED.color();
        }
        //set the color level
        ptmEvidenceBean.setColorLevel(colorLevel);
        return ptmEvidenceBean;
    }

    private PTMEvidenceBean createNonEvidenceBean(String enspAccession, String nominalMass, String maxexpect,
            GPMPTMSubType gpmptmSubType, GPMDbType gpmDbType) {
        if (StringUtils.isBlank(enspAccession)) {
            return null;
        }
        PTMEvidenceBean ptmEvidenceBean = new PTMEvidenceBean();
        ptmEvidenceBean.setPos(0);
        ptmEvidenceBean.setEvidenceValue("0");
        ptmEvidenceBean.setColorLevel(ColorType.BLACK.color());

        TPBDataTypeBean tpbDataTypeBean = new TPBDataTypeBean();
        //set the data type level to 3
        tpbDataTypeBean.setLevel(TLLevel.TL3.level());

        if (gpmDbType.equals(GPMDbType.GPMDB_PSYT)) {
            if (gpmptmSubType.equals(GPMPTMSubType.PHS_S)) {
                //set the data type
                tpbDataTypeBean.setDataType(DataType.PTM_PHS_SER.type());
                ptmEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean);
                String hyperLink = GPMDB_BASE_QUERY + PARAM_LABEL + enspAccession + PARAM_MODMASS + nominalMass
                        + PARAM_AND + gpmptmSubType.type() + PARAM_MAXEXPECT + maxexpect;
                ptmEvidenceBean.setHyperlink(hyperLink);
                return ptmEvidenceBean;
            } else if (gpmptmSubType.equals(GPMPTMSubType.PHS_T)) {
                //set the data type
                tpbDataTypeBean.setDataType(DataType.PTM_PHS_THR.type());
                ptmEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean);

                String hyperLink = GPMDB_BASE_QUERY + PARAM_LABEL + enspAccession + PARAM_MODMASS + nominalMass
                        + PARAM_AND + gpmptmSubType.type() + PARAM_MAXEXPECT + maxexpect;
                ptmEvidenceBean.setHyperlink(hyperLink);
                return ptmEvidenceBean;
            } else if (gpmptmSubType.equals(GPMPTMSubType.PHS_Y)) {
                //set the data type
                tpbDataTypeBean.setDataType(DataType.PTM_PHS_TYR.type());
                ptmEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean);

                String hyperLink = GPMDB_BASE_QUERY + PARAM_LABEL + enspAccession + PARAM_MODMASS + nominalMass
                        + PARAM_AND + gpmptmSubType.type() + PARAM_MAXEXPECT + maxexpect;
                ptmEvidenceBean.setHyperlink(hyperLink);
                return ptmEvidenceBean;
            }
        } else if (gpmDbType.equals(GPMDbType.GPMDB_LYS)) {
            //set the data type
            tpbDataTypeBean.setDataType(DataType.PTM_ACE_LYS.type());
            ptmEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean);
            String hyperLink = GPMDB_BASE_QUERY + PARAM_LABEL + enspAccession + PARAM_MODMASS + nominalMass
                    + PARAM_MAXEXPECT + maxexpect;
            ptmEvidenceBean.setHyperlink(hyperLink);
            return ptmEvidenceBean;

        } else if (gpmDbType.equals(GPMDbType.GPMDB_NTA)) {
            //set the data type
            tpbDataTypeBean.setDataType(DataType.PTM_ACE_NTA.type());
            ptmEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean);
            String hyperLink = GPMDB_BASE_QUERY + PARAM_LABEL + enspAccession + PARAM_MODMASS + nominalMass
                    + PARAM_MAXEXPECT + maxexpect;
            ptmEvidenceBean.setHyperlink(hyperLink);
            return ptmEvidenceBean;
        }
        return null;
    }

    private AccessionBean createAcBean(String accession, String accessionType) {
        if (StringUtils.isNotBlank(accession)) {
            AccessionBean accessionBean = new AccessionBean();
            accessionBean.setAccession(accession);
            accessionBean.setAcType(accessionType);
            return accessionBean;
        }
        return null;
    }

    private List<DbSourceAcEntryBean> parseDBSourceAcEntryBeans(String enspAc, String ensgAc, String enstAc) {
        List<DbSourceAcEntryBean> dbSourceAcEntryBeans = new ArrayList<DbSourceAcEntryBean>();
        DbSourceAcEntryBean dbSourceAcEntryBeanEnsp = generateDbSourceAcEntry(enspAc, DbAcType.Protein);
        if (dbSourceAcEntryBeanEnsp != null) {
            dbSourceAcEntryBeans.add(dbSourceAcEntryBeanEnsp);
        }
        DbSourceAcEntryBean dbSourceAcEntryBeanEnsg = generateDbSourceAcEntry(ensgAc, DbAcType.Gene);
        if (dbSourceAcEntryBeanEnsg != null) {
            dbSourceAcEntryBeans.add(dbSourceAcEntryBeanEnsg);
        }
        DbSourceAcEntryBean dbSourceAcEntryBeanEnst = generateDbSourceAcEntry(enstAc, DbAcType.Transcript);
        if (dbSourceAcEntryBeanEnsg != null) {
            dbSourceAcEntryBeans.add(dbSourceAcEntryBeanEnst);
        }
        return dbSourceAcEntryBeans;
    }

    private DbSourceAcEntryBean generateDbSourceAcEntry(String accession, DbAcType dbAcType) {
        if (StringUtils.isNotBlank(accession)) {
            DbSourceAcEntryBean dbSourceAcEntryBean = new DbSourceAcEntryBean();
            AccessionBean accessionBean = new AccessionBean();
            accessionBean.setAccession(accession);
            if (dbAcType.equals(DbAcType.Gene)) {
                accessionBean.setAcType(DbAcType.Gene.type());
            }
            if (dbAcType.equals(DbAcType.Protein)) {
                accessionBean.setAcType(DbAcType.Protein.type());
            }

            if (dbAcType.equals(DbAcType.Transcript)) {
                accessionBean.setAcType(DbAcType.Transcript.type());
            }
            DBSourceBean dbSourceBean = new DBSourceBean();
            dbSourceBean.setDbName(DbAcType.Ensembl.type());

            dbSourceAcEntryBean.setAccessionBean(accessionBean);
            dbSourceAcEntryBean.setDbSourceBean(dbSourceBean);
            return dbSourceAcEntryBean;
        }
        return null;
    }

    public static void main(String[] arg) throws Exception {

        String filename = "./testData/human_all_chr.mod.txt";
        //String filename = "./testData/nta_hs_all_chr.mod.txt";
        //String filename = "./testData/lys_hs_all_chr.mod.txt";

        FileInputStream fileInputStream = new FileInputStream(new File(filename));

        GPMDbParser parser = new GPMDbParser();
        GPMDbBean gpmDbBean = parser.parse(fileInputStream, "utf-8", GPMDbType.GPMDB_LYS);
        List<GPMDbEntryBean> gpmDbEntryBeans = gpmDbBean.getPgmDbEntryBeans();

        for (GPMDbEntryBean gpmDbEntryBean : gpmDbEntryBeans) {
            AccessionBean identifiedAcBean = gpmDbEntryBean.getIdentifiedAccessionBean();
            System.out.println("============== ensp : " + identifiedAcBean.getAccession());
            GeneBean geneBean = gpmDbEntryBean.getGeneBean();

            System.out.println("================= gene symbol : " + geneBean.getDisplayName() + " desc: "
                    + geneBean.getDescription());

            List<DbSourceAcEntryBean> dbSourceAcEntryBeans = gpmDbEntryBean.getDbSourceAcEntryBeans();

            for (DbSourceAcEntryBean dbSourceAcEntryBean : dbSourceAcEntryBeans) {
                AccessionBean accessionBean = dbSourceAcEntryBean.getAccessionBean();
                System.out.println("============ accession: " + accessionBean.getAccession() + " type: "
                        + accessionBean.getAcType());
                DBSourceBean dbSourceBean = dbSourceAcEntryBean.getDbSourceBean();
                System.out.println("============ dbsource: " + dbSourceBean.getDbName());
            }

            List<PTMEvidenceBean> ptmEvidenceBeans = gpmDbEntryBean.getPtmEvidenceBeans();
            System.out.println("========== ptm evidence bean size: " + ptmEvidenceBeans.size());
            for (PTMEvidenceBean ptmEvidenceBean : ptmEvidenceBeans) {
                TPBDataTypeBean tpbDataTypeBean = ptmEvidenceBean.getTpbDataTypeBean();
                System.out.println("============ tpb data type: " + tpbDataTypeBean.getDataType());
                System.out.println("============ pos: " + ptmEvidenceBean.getPos() + " obs: "
                        + ptmEvidenceBean.getEvidenceValue());
                System.out.println("============ color : " + ptmEvidenceBean.getColorLevel());
                System.out.println("============ hyper link: " + ptmEvidenceBean.getHyperlink());
            }
            if (ptmEvidenceBeans.size() == 0) {
                System.out.println("============= No peptides found for " + identifiedAcBean.getAccession());
            }
            System.out.println("\n");
        }

        System.out.println("================= total size: " + gpmDbEntryBeans.size());

    }
}