edu.monash.merc.system.parser.nextprot.NXHandler.java Source code

Java tutorial

Introduction

Here is the source code for edu.monash.merc.system.parser.nextprot.NXHandler.java

Source

/*
 * Copyright (c) 2011-2013, Monash e-Research Centre
 * (Monash University, Australia)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *    * Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *    * Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *    * Neither the name of the Monash University nor the names of its
 *      contributors may be used to endorse or promote products derived from
 *      this software without specific prior written permission.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Affero General Public License for more details.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * You should have received a copy of the GNU Affero General Public License along with
 * this program. If not, see <http://www.gnu.org/licenses/>.
 */

package edu.monash.merc.system.parser.nextprot;

import edu.monash.merc.common.name.*;
import edu.monash.merc.dto.*;
import org.apache.commons.lang.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.util.ArrayList;
import java.util.List;

/**
 * @author Simon Yu
 *         <p/>
 *         Email: xiaoming.yu@monash.edu
 * @version 1.0
 * @since 1.0
 *        <p/>
 *        Date: 24/07/13 12:31 PM
 */
public class NXHandler extends DefaultHandler {

    private List<NXEntryBean> nxEntryBeans = new ArrayList<NXEntryBean>();

    private NXEntryBean nxEntryBean;

    private AccessionBean identifiedAccession;

    private GeneBean geneBean;

    //accessions and dbsource in the identifiers
    private List<DbSourceAcEntryBean> dbAcEntryBeans;

    private NXPeMsAntiEntryBean peMsAntiEntryBean;

    //all pe ms ann evidences for a xrefs
    List<PEEvidenceBean> peMsAnnEvidenceBeans;

    //individual pe ms ann evidence in the xrefs/xref if any
    private PEEvidenceBean peMsAnnEvidenceBean;

    //individual pe anti ann evidence in the xrefs/xref if any
    private PEEvidenceBean peAntiAnnEvidenceBean;

    // xref three attributes 1. database, 2. category, 3. accession
    private String xrefDatabase;

    private String xrefCategory;

    private String xrefAccession;

    private int peAntiAnnCounter = 0;

    private String peAntiURL;

    private boolean peMsAnnEvExisted = false;

    private boolean peAntiAnnEvExisted = false;

    private boolean mainProteinDesc = false;

    private boolean mainGeneSymbol = false;

    private XMLStack pathStack = new XMLStack();

    private StringBuilder stringBuilder = new StringBuilder();

    public List<NXEntryBean> getNxEntryBeans() {
        return nxEntryBeans;
    }

    public void startElement(String uri, String localName, String qName, Attributes attributes)
            throws SAXException {
        pathStack.push(qName);
        //Reset the StringBuilder
        stringBuilder.setLength(0);

        //get the current from the stack
        String currentPath = pathStack.currentPath();

        //start - nextprotExport/proteins/protein
        if (currentPath.equalsIgnoreCase(NXFields.PATH_PROTEIN)) {
            String nxAc = attributes.getValue(NXFields.ATTR_PROTEIN_UNIQUE_NAME);
            //create NextProt entry bean
            nxEntryBean = new NXEntryBean();
            //set the db source name
            nxEntryBean.setDbSourceName(DbAcType.NextProt.type());

            //create a AccessionBean
            identifiedAccession = new AccessionBean();
            identifiedAccession.setAccession(nxAc);
            identifiedAccession.setAcType(DbAcType.NextProt.type());
            nxEntryBean.setIdentifiedAccessionBean(identifiedAccession);

            //create geneBean
            geneBean = new GeneBean();
            geneBean.setDisplayName(NameType.UNKNOWN.cn());
            geneBean.setChromosome(ChromType.UNKNOWN.chm());
            //set the GeneBean
            nxEntryBean.setGeneBean(geneBean);

            //create DbSourceAcEntryBean list for storing the  each DbSourceAcEntryBean
            dbAcEntryBeans = new ArrayList<DbSourceAcEntryBean>();
            //add the nextport access and nextport dbsource
            DbSourceAcEntryBean nxDbSourceAcEntryBean = createDbAcEntryBean(DbAcType.NextProt.type(), nxAc,
                    DbAcType.NextProt.type());
            dbAcEntryBeans.add(nxDbSourceAcEntryBean);
        }

        //start -- nextprotExport/proteins/protein/proteinExistence
        if (currentPath.equalsIgnoreCase(NXFields.PATH_PROTEIN_EXISTENCE)) {
            String evidence = attributes.getValue(NXFields.ATTR_VALUE);
            if (StringUtils.isNotBlank(evidence)) {
                String nxAc = nxEntryBean.getIdentifiedAccessionBean().getAccession();
                //Create PE TE OTH CUR evidence
                NXPeTeOthEntryBean nxPeTeOthEntryBean = createPeTeOthEvidencesBean(evidence, nxAc);
                //add the NXPeTeOthEntryBean
                nxEntryBean.setNxPeTeOthEntryBean(nxPeTeOthEntryBean);
            }
        }

        //start - protein/proteinNames/entityName Element
        if (currentPath.equalsIgnoreCase(NXFields.PATH_PROTEIN_DESC_MAIN_ENTITY)) {
            mainProteinDesc = Boolean.valueOf(attributes.getValue(NXFields.ATTR_MAIN_NAME)).booleanValue();
        }

        //start - nextprotExport/proteins/protein/chromosomalLocations/chromosomalLocation
        if (currentPath.equalsIgnoreCase(NXFields.PATH_CHROM_LOCATION)) {
            String chrom = attributes.getValue(NXFields.ATTR_CHROMOSOME);
            String band = attributes.getValue(NXFields.ATTR_BAND);
            String strand = attributes.getValue(NXFields.ATTR_STRAND);
            String ensgAc = attributes.getValue(NXFields.ATTR_ACCESSION);
            if (StringUtils.isNotBlank(chrom)) {
                nxEntryBean.getGeneBean().setChromosome(chrom);
            }
            if (StringUtils.isNotBlank(band)) {
                nxEntryBean.getGeneBean().setBand(band);
            }
            if (StringUtils.isNotBlank(strand)) {
                nxEntryBean.getGeneBean().setStrand(strand);
            }
            if (StringUtils.isNotBlank(ensgAc)) {
                nxEntryBean.getGeneBean().setEnsgAccession(ensgAc);
            }
        }

        //start - nextprotExport/proteins/protein/geneNames/entityName
        if (currentPath.equalsIgnoreCase(NXFields.PATH_GENE_SYMBOL_MAIN_ENTITY)) {
            mainGeneSymbol = Boolean.valueOf(attributes.getValue(NXFields.ATTR_MAIN_NAME)).booleanValue();
        }

        //start - nextprotExport/proteins/protein/identifiers
        if (currentPath.equalsIgnoreCase(NXFields.PATH_IDENTIFIERS)) {
            //do nothing,
        }

        //start -- nextprotExport/proteins/protein/identifiers/identifier
        if (currentPath.equalsIgnoreCase(NXFields.PATH_IDENTIFIER)) {
            String type = attributes.getValue(NXFields.ATTR_TYPE);
            String acValue = attributes.getValue(NXFields.ATTR_NAME);
            String dbName = attributes.getValue(NXFields.ATTR_DATABASE);

            //create DbSourceAcEntryBean for individual identifier
            DbSourceAcEntryBean dbSourceAcEntryBean = createDbAcEntryBean(type, acValue, dbName);
            //add this DbSourceAcEntryBean into list
            if (dbSourceAcEntryBean != null) {
                dbAcEntryBeans.add(dbSourceAcEntryBean);
            }
        }

        //start -- nextprotExport/proteins/protein/xrefs
        if (currentPath.equalsIgnoreCase(NXFields.PATH_XREFS)) {
            //create a pe ms an pe anti entry bean
            peMsAntiEntryBean = new NXPeMsAntiEntryBean();

            //create pe ms ann evidence bean list, to store all pe ms ann evidences under the xrefs/xref
            peMsAnnEvidenceBeans = new ArrayList<PEEvidenceBean>();

            //create a pe anti ann evidence empty object
            peAntiAnnEvidenceBean = new PEEvidenceBean();

            //reset peAntiAnnCounter
            peAntiAnnCounter = 0;
        }

        //start -- nextprotExport/proteins/protein/xrefs/xref
        if (currentPath.equalsIgnoreCase(NXFields.PATH_XREF)) {
            String database = attributes.getValue(NXFields.ATTR_XREF_DATABASE);
            String category = attributes.getValue(NXFields.ATTR_XREF_CATEGORY);
            String accession = attributes.getValue(NXFields.ATTR_XREF_ACCESSION);
            this.xrefDatabase = database;
            this.xrefCategory = category;
            this.xrefAccession = accession;

            int colorLevel = findPeMsAnnColorLevel(database, category);
            if (colorLevel != -1) {
                //create a pe ms ann evidence object
                peMsAnnEvidenceBean = new PEEvidenceBean();
                peMsAnnEvidenceBean.setColorLevel(colorLevel);
                String peMsAnnEv = xrefDatabase + " - " + xrefAccession;
                if (StringUtils.isNotBlank(peMsAnnEv)) {
                    peMsAnnEvidenceBean.setEvidenceValue(peMsAnnEv);
                }
                //create a TPBDataTypeBean
                TPBDataTypeBean tpbDataTypeBean = new TPBDataTypeBean();
                //set the data type
                tpbDataTypeBean.setDataType(DataType.PE_MS_ANN.type());
                //set the traffic lights level to 3
                tpbDataTypeBean.setLevel(TLLevel.TL3.level());
                peMsAnnEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean);
                peMsAnnEvExisted = true;
            }
            //count pe anti ann
            countPeAntiEvidences(database, category, accession);
        }
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        //get current path from the stack
        String currentPath = pathStack.currentPath();

        //end - nextprotExport/proteins/protein
        if (currentPath.equalsIgnoreCase(NXFields.PATH_PROTEIN)) {
            nxEntryBeans.add(nxEntryBean);
        }

        //end - protein/proteinNames/entityName/value end
        if (currentPath.equalsIgnoreCase(NXFields.PATH_PROTEIN_DESC)) {
            String desc = stringBuilder.toString();
            identifiedAccession.setDescription(desc);
            mainProteinDesc = false;
        }

        //end - nextprotExport/proteins/protein/geneNames/entityName/value
        if (currentPath.equalsIgnoreCase(NXFields.PATH_GENE_SYMBOL)) {
            String geneSymbol = stringBuilder.toString();
            nxEntryBean.getGeneBean().setDisplayName(geneSymbol);
            mainGeneSymbol = false;
        }

        //end - nextprotExport/proteins/protein/identifiers
        if (currentPath.equalsIgnoreCase(NXFields.PATH_IDENTIFIERS)) {
            //Set the DbSourceAcEntryBean list
            nxEntryBean.setDbSourceAcEntryBeans(dbAcEntryBeans);
        }

        //process pe ms ann and pe anti ann evidence hyperlink.
        //end of nextprotExport/proteins/protein/xrefs/xref/url
        if (currentPath.equalsIgnoreCase(NXFields.PATH_XREF_URL)) {
            String xrefUrl = stringBuilder.toString();
            xrefUrl = StringUtils.trim(xrefUrl);
            if (peMsAnnEvExisted) {
                peMsAnnEvidenceBean.setHyperlink(xrefUrl);
            }
            //find peAntiUrl
            findPeAnTiAnnURL(xrefDatabase, xrefCategory, xrefAccession, xrefUrl);
            //reset the database, category and accession
            xrefDatabase = null;
            xrefCategory = null;
            xrefAccession = null;
        }

        //process individual pe ms ann evidence bean
        //end -- nextprotExport/proteins/protein/xrefs/xref
        if (currentPath.equalsIgnoreCase(NXFields.PATH_XREF)) {
            //add the pe ms ann evidence bean into list
            if (peMsAnnEvExisted) {
                peMsAnnEvidenceBeans.add(peMsAnnEvidenceBean);
                peMsAnnEvExisted = false;
            }
        }

        //end -- nextprotExport/proteins/protein/xrefs
        if (currentPath.equalsIgnoreCase(NXFields.PATH_XREFS)) {
            //add the pe ms ann evidence bean list (peMsAnnEvidenceBeans)into NXPeMsAntiEntryBean
            peMsAntiEntryBean.setPeMsAnnEvidenceBeans(peMsAnnEvidenceBeans);

            //create pe anti ann evidence if it exists
            if (peAntiAnnEvExisted) {

                if (peAntiAnnCounter == 0) {
                    peAntiAnnEvidenceBean.setColorLevel(ColorType.BLACK.color());
                }

                if (peAntiAnnCounter == 1) {
                    peAntiAnnEvidenceBean.setColorLevel(ColorType.RED.color());
                }

                if (peAntiAnnCounter > 1) {
                    peAntiAnnEvidenceBean.setColorLevel(ColorType.YELLOW.color());
                }

                peAntiAnnEvidenceBean.setEvidenceValue(peAntiAnnCounter + " " + NXConts.XREF_HPA_ANTIBODY_DESC);
                peAntiAnnEvidenceBean.setHyperlink(peAntiURL);
                //create a TPBDataTypeBean
                TPBDataTypeBean tpbDataTypeBean = new TPBDataTypeBean();
                //set the data type
                tpbDataTypeBean.setDataType(DataType.PE_ANTI_ANN.type());
                //set the traffic lights level to 3
                tpbDataTypeBean.setLevel(TLLevel.TL3.level());
                peAntiAnnEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean);
                //set the pe anti ann object
                peMsAntiEntryBean.setPeAntiAnnEvidenceBean(peAntiAnnEvidenceBean);
                //dpn't forget to rest pe anti ann evidence existed flag to false.
                peAntiAnnEvExisted = false;
                //reset peAntiURL to null.
                peAntiURL = null;
            }

            //and
            nxEntryBean.setNxPeMsAntiEntryBean(peMsAntiEntryBean);
        }

        //finally pop the current path as it's end of current element.
        pathStack.pop();
    }

    @Override
    public void characters(char ch[], int start, int length) throws SAXException {
        //super the super-class
        super.characters(ch, start, length);
        //using a stringbuilder to get all characters during startElement and endElement processing
        stringBuilder.append(ch, start, length);
    }

    private NXPeTeOthEntryBean createPeTeOthEvidencesBean(String evidence, String nxAccession) {
        if (StringUtils.isNotBlank(evidence)) {
            //create NXPeTeOthEntryBean to store peOthCur Evidence and teOthCur Evidence
            NXPeTeOthEntryBean nxPeTeOthEntryBean = new NXPeTeOthEntryBean();

            //nextprot pe oth cur evidence
            PEEvidenceBean nxPeOthEvidenceBean = new PEEvidenceBean();

            //If it's protein level, we set the evidence level as green level, else we set it as black for pe
            if (StringUtils.containsIgnoreCase(evidence, NXFields.EV_PROTEIN_LEVEL)) {
                nxPeOthEvidenceBean.setColorLevel(ColorType.GREEN.color());
            } else {
                nxPeOthEvidenceBean.setColorLevel(ColorType.BLACK.color());
            }

            //create a TPBDataTypeBean for PE Oth Cur
            TPBDataTypeBean peOthCurTDT = new TPBDataTypeBean();
            //set the data type
            peOthCurTDT.setDataType(DataType.PE_OTH_CUR.type());
            //set the traffic lights level to 3
            peOthCurTDT.setLevel(TLLevel.TL3.level());
            nxPeOthEvidenceBean.setTpbDataTypeBean(peOthCurTDT);

            nxPeOthEvidenceBean.setEvidenceValue(evidence);
            nxPeOthEvidenceBean.setHyperlink(NXFields.NX_BASE_URL + nxAccession);

            //Nextprot te oth cur evidence
            TEEvidenceBean nxTeOthEvidenceBean = new TEEvidenceBean();

            if (StringUtils.containsIgnoreCase(evidence, NXFields.EV_PROTEIN_LEVEL)
                    || StringUtils.containsIgnoreCase(evidence, NXFields.EV_TRANSCRIPT_LEVEL)) {
                nxTeOthEvidenceBean.setColorLevel(ColorType.GREEN.color());
            } else {
                nxTeOthEvidenceBean.setColorLevel(ColorType.BLACK.color());
            }

            //create a TPBDataTypeBean for TE Oth Cur
            TPBDataTypeBean teOthCurTDT = new TPBDataTypeBean();
            //set the data type
            teOthCurTDT.setDataType(DataType.TE_OTH_CUR.type());
            //set the traffic lights level to 3
            teOthCurTDT.setLevel(TLLevel.TL3.level());
            nxTeOthEvidenceBean.setTpbDataTypeBean(teOthCurTDT);
            nxTeOthEvidenceBean.setEvidenceValue(evidence);
            nxTeOthEvidenceBean.setHyperlink(NXFields.NX_BASE_URL + nxAccession);
            //add these two evidence beans
            nxPeTeOthEntryBean.setNxPeOthEvidenceBean(nxPeOthEvidenceBean);
            nxPeTeOthEntryBean.setNxTeOthEvidenceBean(nxTeOthEvidenceBean);
            return nxPeTeOthEntryBean;
        }
        return null;
    }

    private DbSourceAcEntryBean createDbAcEntryBean(String acType, String accession, String dbName) {
        if (StringUtils.isNotBlank(acType) && StringUtils.isNotBlank(accession)) {
            DbSourceAcEntryBean nxDbSourceAcEntryBean = new DbSourceAcEntryBean();
            DBSourceBean dbSourceBean = new DBSourceBean();
            AccessionBean accessionBean = new AccessionBean();
            //set accession type
            accessionBean.setAcType(acType);
            //set accession
            accessionBean.setAccession(accession);

            //set for dbsource
            if (StringUtils.isNotBlank(dbName)) {
                dbSourceBean.setDbName(dbName);
            } else {
                dbSourceBean.setDbName(DbAcType.Unknown.type());
            }
            //set the accession bean
            nxDbSourceAcEntryBean.setAccessionBean(accessionBean);
            //set the dbsource bean
            nxDbSourceAcEntryBean.setDbSourceBean(dbSourceBean);
            return nxDbSourceAcEntryBean;
        }
        return null;
    }

    //find the pe ms ann color level
    private int findPeMsAnnColorLevel(String database, String category) {
        if (StringUtils.isNotBlank(database) && (StringUtils.isNotBlank(category))) {
            if (StringUtils.equalsIgnoreCase(database, NXFields.XREF_DB_PRIDE)
                    && StringUtils.equalsIgnoreCase(category, NXFields.XREF_CA_PROTEOMIC_DATABASES)) {
                return ColorType.RED.color();
            }
            if (StringUtils.equalsIgnoreCase(database, NXFields.XREF_DB_PEPTIDE_ATLAS)
                    && StringUtils.equalsIgnoreCase(category, NXFields.XREF_CA_PROTEOMIC_DATABASES)) {
                return ColorType.YELLOW.color();
            }
        }
        // negative 1 means no pe ms ann evidence
        return -1;
    }

    //count the pe anti ann evidence
    private void countPeAntiEvidences(String database, String category, String accession) {
        if (StringUtils.isNotBlank(database) && StringUtils.isNotBlank(category)
                && StringUtils.isNotBlank(accession)) {
            if (StringUtils.equalsIgnoreCase(database, NXFields.XREF_DB_HPA)
                    && StringUtils.equalsIgnoreCase(category, NXFields.XREF_CA_ANTIBODY_DATABASES)) {
                //ony hpa and cab accession will be taken into account
                if (StringUtils.startsWithIgnoreCase(accession, NXFields.XREF_AC_PREFIX_HPA)
                        || StringUtils.startsWithIgnoreCase(accession, NXFields.XREF_AC_PREFIX_CAB)) {
                    peAntiAnnCounter++;
                }
                peAntiAnnEvExisted = true;
            }
        }
    }

    //find pe anti ann hyperlink url
    private void findPeAnTiAnnURL(String xdatabase, String xcategory, String xaccession, String xrefUrl) {

        if (StringUtils.isNotBlank(xdatabase) && StringUtils.isNotBlank(xcategory)
                && StringUtils.isNotBlank(xaccession)) {
            if (StringUtils.equalsIgnoreCase(xdatabase, NXFields.XREF_DB_HPA)
                    && StringUtils.equalsIgnoreCase(xcategory, NXFields.XREF_CA_ANTIBODY_DATABASES)) {
                //using the ENSG accession as perfer hyperlink
                if (StringUtils.startsWithIgnoreCase(xaccession, NXConts.XREF_AC_PREFIX_ENSG)) {
                    if (StringUtils.isNotBlank(xrefUrl)) {
                        peAntiURL = StringUtils.removeEndIgnoreCase(xrefUrl, NXFields.XREF_AC_ENSG_URL_END_PART);
                    }
                }
                //if no hyperlink for ensg then we have to use others
                if (StringUtils.startsWithIgnoreCase(xrefAccession, NXFields.XREF_AC_PREFIX_HPA)
                        || StringUtils.startsWithIgnoreCase(xrefAccession, NXFields.XREF_AC_PREFIX_CAB)) {
                    if (StringUtils.isBlank(peAntiURL)) {
                        peAntiURL = xrefUrl;
                    }
                }
            }
        }
    }
}