Java tutorial
/* * Copyright (c) 2011-2013, Monash e-Research Centre * (Monash University, Australia) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the Monash University nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Affero General Public License for more details. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * You should have received a copy of the GNU Affero General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. */ package edu.monash.merc.system.parser.nextprot; import edu.monash.merc.common.name.ColorType; import edu.monash.merc.common.name.DataType; import edu.monash.merc.common.name.DbAcType; import edu.monash.merc.common.name.TLLevel; import edu.monash.merc.dto.*; import edu.monash.merc.exception.DMXMLParserException; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.jaxen.jdom.JDOMXPath; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.input.SAXBuilder; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.util.ArrayList; import java.util.List; @Deprecated /** * NxXMLParser class which parses the nextprot xml file. using this parser is not recommended, please use NxXMLSAXParser instead * * @author Simon Yu * <p/> * Email: xiaoming.yu@monash.edu * @version 1.0 * @since 1.0 * <p/> * Date: 24/02/12 1:03 PM */ public class NxXMLParser { private static String PROTEIN_PATH = "//proteins/protein"; private static String ATTR_PROTEIN_NEXTPROT_AC = "uniqueName"; private static String ELE_PROTEIN_EXISTENCE = "proteinExistence"; private static String ATTR_PROTEIN_EXISTENCE_VALUE = "value"; private static String ELE_NEXTPROT_PROTEIN_NAMES = "proteinNames"; private static String ELE_NEXTPROT_GENE_NAMES = "geneNames"; private static String ELE_NEXTPROT_CHROMOSOMAL_LOCATIONS = "chromosomalLocations"; private static String ELE_NEXTPROT_CHROMOSOMAL_LOCATION = "chromosomalLocation"; private static String ATTR_NEXTPROT_CHROMOSOME = "chromosome"; private static String ATTR_NEXTPROT_CHROMOSOMAL_BAND = "band"; private static String ATTR_NEXTPROT_CHROMOSOMAL_STRAND = "strand"; private static String ATTR_NEXTPROT_CHROMOSOMAL_ACCESSION = "accession"; private static String ELE_NEXTPROT_IDENTIFIERS = "identifiers"; private static String ELE_NEXTPROT_IDENTIFIER = "identifier"; private static String ATTR_NEXTPROT_IDENTIFIER_TYPE = "type"; private static String ATTR_NEXTPROT_IDENTIFIER_NAME = "name"; private static String ATTR_NEXTPROT_IDENTIFIER_DATABASE = "database"; private static String ELE_ENTITY_NAME = "entityName"; private static String ATTR_ENTITY_NAME_MAIN = "isMain"; private static String ELE_ENTITY_NAME_VALUE = "value"; private static String ELE_ANNOTATIONS = "annotations"; private static String ELE_ANNOTATION_LIST = "annotationList"; private static String ATTR_CATEGORY = "category"; private static String ELE_ANNOTATION = "annotation"; private static String ATTR_UNIQUE_NAME = "uniqueName"; private static String ATTR_QUALITY_QUALIFIER = "qualityQualifier"; private static String QUALITY_QUALIFIER_GOLD = "GOLD"; private static String QUALITY_QUALIFIER_SILVER = "SILVER"; private static String QUALITY_QUALIFIER_BRONZE = "BRONZE"; private static String ELE_CV_TERM = "cvTerm"; private static String ATTR_ACCESSION = "accession"; private static String ELE_CV_NAME = "cvName"; //annotation description element private static String ELE_DESCRIPTION = "description"; private static String ELE_VARIANT = "variant"; private static String ATTR_ORIGINAL = "original"; private static String ATTR_VARIATION = "variation"; private static String ELE_PROPERTIES = "properties"; private static String ELE_PROPERTY = "property"; private static String ATTR_PROPERTY_AC = "accession"; private static String ATTR_PROPERTY_NAME = "propertyName"; private static String ATTR_PROPERTY_VALUE = "value"; private static String ELE_EXP_EVIDENCES = "experimentalEvidences"; private static String ATTR_EXP_EVI_METHOD = "method"; private static String ATTR_EXP_EVI_RESULT = "result"; private static String ELE_EVIDENCES = "evidences"; private static String ELE_EVIDENCE = "evidence"; private static String ATTR_IS_NEGATIVE = "isNegative"; private static String ATTR_QUALIFIER_TYPE = "qualifierType"; private static String ATTR_RESOURCE_REF = "resourceRef"; private static String ATTR_RESOURCE_ASSOC_TYPE = "resourceAssocType"; private static String ELE_ISO_FORM_SPECIFICITY = "isoformSpecificity"; private static String ELE_ISO_FORM_ANNOT = "isoformAnnot"; private static String ELE_POSITIONS = "positions"; private static String ATTR_ISO_FORM_REF = "isoformRef"; private static String ELE_POSITION = "position"; private static String ATTR_POSITION_FIRST = "first"; private static String ATTR_POSITION_FIRST_STATUS = "firstStatus"; private static String ATTR_POSITION_LAST = "last"; private static String ATTR_POSITION_LAST_STATUS = "lastStatus"; //XRef Elements private static String ELE_XREFS = "xrefs"; private static String ELE_XREF = "xref"; private static String ATTR_XREF_DATABASE = "database"; private static String ATTR_XREF_CATEGORY = "category"; private static String ATTR_XREF_ACCESSION = "accession"; private static String ATTR_XREF_ID = "id"; private static String ELE_XREF_URL = "url"; private SAXBuilder parser = new SAXBuilder(); private Logger logger = Logger.getLogger(this.getClass().getName()); @SuppressWarnings("unchecked") public List<NXEntryBean> parseNextProtXML(InputStream xmlStream) { //for each protein, parsed by protein path JDOMXPath proteinPath = null; Document xmlDoc = null; List<Element> proteinElements = new ArrayList<Element>(); //build the protein path try { proteinPath = new JDOMXPath(PROTEIN_PATH); } catch (Exception ex) { logger.error(ex); throw new DMXMLParserException(ex); } //create an empty NXEntryBean to store the parsed nextprot data List<NXEntryBean> nxEntryBeanList = new ArrayList<NXEntryBean>(); try { //long startTime = System.currentTimeMillis(); xmlDoc = this.parser.build(xmlStream); proteinElements = proteinPath.selectNodes(xmlDoc); System.out.println("NextProt total proteins size: " + proteinElements.size()); //for each protein for (Element protein : proteinElements) { NXEntryBean nxEntryBean = new NXEntryBean(); //get next ac id Attribute nextProtAc = protein.getAttribute(ATTR_PROTEIN_NEXTPROT_AC); if (nextProtAc != null) { String nxAc = nextProtAc.getValue(); // System.out.println("====== nextprot accession : " + nxAc); //set the nextprot accession //create accession and db source for nextprot AccessionBean nextprotAc = new AccessionBean(); nextprotAc.setAccession(nxAc); nextprotAc.setAcType(DbAcType.NextProt.type()); nxEntryBean.setIdentifiedAccessionBean(nextprotAc); } //parse the gene information GeneBean geneBean = parseGene(protein); nxEntryBean.setGeneBean(geneBean); //set the db source name nxEntryBean.setDbSourceName(DbAcType.NextProt.type()); //NextProt Accession AccessionBean accessionBean = nxEntryBean.getIdentifiedAccessionBean(); String nxAccession = accessionBean.getAccession(); //parse the dbsource and accession list List<DbSourceAcEntryBean> nxDbSourceAcEntryBeanList = parseDbSourceAc(protein, nxAccession); //set the dbsource and ac nxEntryBean.setDbSourceAcEntryBeans(nxDbSourceAcEntryBeanList); //parse the proteinExistence evidence - PEEvidence PEEvidenceBean peEvidenceBean = parsePEOthCurEvidence(protein, nxAccession); // nxEntryBean.setPeOthCurEvidenceBean(peEvidenceBean); // this parts only for prototyping purpose //parse the annotations // List<NXAnnEntryBean> nxAnnEntryBeans = parseAnnotationList(protein); // // //set the nextprot annotations for each nextprot entry; // if (nxAnnEntryBeans.size() > 0) { // nxEntryBean.setNxAnnEntryBeanList(nxAnnEntryBeans); // } //add the NXEntryBean into list nxEntryBeanList.add(nxEntryBean); //END //XRefs NXPeMsAntiEntryBean nxPeMsAntiEntryBean = parseNXPeMSAntiAnn(protein); if (nxPeMsAntiEntryBean != null) { nxEntryBean.setNxPeMsAntiEntryBean(nxPeMsAntiEntryBean); } } long endTime = System.currentTimeMillis(); //System.out.println("=====> NextProt total time: " + (endTime - startTime) / 1000 + "seconds"); } catch (Exception ex) { logger.error(ex); throw new DMXMLParserException(ex); } finally { try { if (xmlStream != null) { xmlStream.close(); } } catch (Exception fex) { //ignore } } return nxEntryBeanList; } @SuppressWarnings("unchecked") private GeneBean parseGene(Element protein) { //create gene associated with this nextprot ac GeneBean geneBean = new GeneBean(); //get the gene name Element genNamesEle = protein.getChild(ELE_NEXTPROT_GENE_NAMES); if (genNamesEle == null) { //set the gene name geneBean.setDisplayName(NXConts.UNKNOWN); } else { List<Element> geneNameEntryEles = new ArrayList<Element>(); geneNameEntryEles = genNamesEle.getChildren(ELE_ENTITY_NAME); for (Element gnEntry : geneNameEntryEles) { if (gnEntry != null) { Attribute isMainAttr = gnEntry.getAttribute(ATTR_ENTITY_NAME_MAIN); if (isMainAttr != null) { String isMainAttrValue = isMainAttr.getValue(); if (StringUtils.equalsIgnoreCase(isMainAttrValue, "true")) { Element geneNameValueEl = gnEntry.getChild(ELE_ENTITY_NAME_VALUE); if (geneNameValueEl != null) { String geneDisplayName = geneNameValueEl.getText(); geneBean.setDisplayName(geneDisplayName); } } } } } } //get the gene description Element proteinNamesEle = protein.getChild(ELE_NEXTPROT_PROTEIN_NAMES); if (proteinNamesEle != null) { List<Element> protNameEntryEles = new ArrayList<Element>(); protNameEntryEles = proteinNamesEle.getChildren(ELE_ENTITY_NAME); for (Element protEntry : protNameEntryEles) { if (protEntry != null) { Attribute isMainAttr = protEntry.getAttribute(ATTR_ENTITY_NAME_MAIN); if (isMainAttr != null) { String isMainAttrValue = isMainAttr.getValue(); if (StringUtils.equalsIgnoreCase(isMainAttrValue, "true")) { Element proteinNameValueEl = protEntry.getChild(ELE_ENTITY_NAME_VALUE); if (proteinNameValueEl != null) { String preteinDesc = proteinNameValueEl.getText(); //System.out.println(" protein display name : " + preteinDesc); //set the gene description geneBean.setDescription(preteinDesc); } } } } } } //get the chromosome name, gene band and strand values Element chromosomalLocationsEle = protein.getChild(ELE_NEXTPROT_CHROMOSOMAL_LOCATIONS); List<Element> chlocationElements = chromosomalLocationsEle.getChildren(ELE_NEXTPROT_CHROMOSOMAL_LOCATION); // for (Element element : chlocationElements) { Attribute chromosome = element.getAttribute(ATTR_NEXTPROT_CHROMOSOME); if (chromosome != null) { String chromosomeValue = chromosome.getValue(); //set the chromosome name geneBean.setChromosome(chromosomeValue); } Attribute band = element.getAttribute(ATTR_NEXTPROT_CHROMOSOMAL_BAND); if (band != null) { String bandValue = band.getValue(); //set the band value geneBean.setBand(bandValue); } Attribute strand = element.getAttribute(ATTR_NEXTPROT_CHROMOSOMAL_STRAND); if (strand != null) { String strandValue = strand.getValue(); //set the strand value geneBean.setStrand(strandValue); } Attribute accession = element.getAttribute(ATTR_NEXTPROT_CHROMOSOMAL_ACCESSION); if (accession != null) { String ensgAcValue = accession.getValue(); //set the ensembl ensg accession value geneBean.setEnsgAccession(ensgAcValue); } } return geneBean; } @SuppressWarnings("unchecked") private List<DbSourceAcEntryBean> parseDbSourceAc(Element protein, String nxAc) { List<DbSourceAcEntryBean> nxDbSourceAcEntryBeanList = new ArrayList<DbSourceAcEntryBean>(); Element identifiersEle = protein.getChild(ELE_NEXTPROT_IDENTIFIERS); List<Element> identifierElements = identifiersEle.getChildren(ELE_NEXTPROT_IDENTIFIER); for (Element element : identifierElements) { DbSourceAcEntryBean nxDbSourceAcEntryBean = new DbSourceAcEntryBean(); DBSourceBean dbSourceBean = new DBSourceBean(); AccessionBean accessionBean = new AccessionBean(); Attribute type = element.getAttribute(ATTR_NEXTPROT_IDENTIFIER_TYPE); if (type != null) { String typeValue = type.getValue(); //set accession type accessionBean.setAcType(typeValue); } Attribute name = element.getAttribute(ATTR_NEXTPROT_IDENTIFIER_NAME); if (type != null) { String nameValue = name.getValue(); //set accession accessionBean.setAccession(nameValue); } Attribute database = element.getAttribute(ATTR_NEXTPROT_IDENTIFIER_DATABASE); if (database != null) { String databaseValue = database.getValue(); dbSourceBean.setDbName(databaseValue); } else { dbSourceBean.setDbName(DbAcType.Unknown.type()); } //set the accession bean nxDbSourceAcEntryBean.setAccessionBean(accessionBean); //set the dbsource bean nxDbSourceAcEntryBean.setDbSourceBean(dbSourceBean); //add the NXDbSourceActionEntryBean into list nxDbSourceAcEntryBeanList.add(nxDbSourceAcEntryBean); } //create accession and db source for nextprot AccessionBean nextprotAc = new AccessionBean(); nextprotAc.setAccession(nxAc); nextprotAc.setAcType(DbAcType.NextProt.type()); DBSourceBean nxDbs = new DBSourceBean(); nxDbs.setDbName(DbAcType.NextProt.type()); nxDbs.setPrimaryEvidences(true); DbSourceAcEntryBean nxDbSAcEntryBean = new DbSourceAcEntryBean(); nxDbSAcEntryBean.setAccessionBean(nextprotAc); nxDbSAcEntryBean.setDbSourceBean(nxDbs); nxDbSourceAcEntryBeanList.add(nxDbSAcEntryBean); return nxDbSourceAcEntryBeanList; } private PEEvidenceBean parsePEOthCurEvidence(Element protein, String nxAc) { Element proteinExistenceEle = protein.getChild(ELE_PROTEIN_EXISTENCE); if (proteinExistenceEle != null) { Attribute proteinExistenceAttr = proteinExistenceEle.getAttribute(ATTR_PROTEIN_EXISTENCE_VALUE); if (proteinExistenceAttr != null) { PEEvidenceBean peOthCurEvidenceBean = new PEEvidenceBean(); String proteinExistence = proteinExistenceAttr.getValue(); //create a TPBDataTypeBean TPBDataTypeBean hpbDataTypeBean = new TPBDataTypeBean(); //set the data type hpbDataTypeBean.setDataType(DataType.PE_OTH_CUR.type()); //set the traffic lights level to 3 hpbDataTypeBean.setLevel(TLLevel.TL3.level()); //There is no color level 3 //If it's protein level, then set color level into 4 if (StringUtils.containsIgnoreCase(proteinExistence, NXConts.PE_PROTEIN_LEVEL)) { peOthCurEvidenceBean.setColorLevel(ColorType.GREEN.color()); } else if (StringUtils.containsIgnoreCase(proteinExistence, NXConts.PE_TRANSCRIPT_LEVEL)) { //if it's transcript level, then set color level into 2 (original as 3) // : change the Color Level to Red peOthCurEvidenceBean.setColorLevel(ColorType.RED.color()); //if it's homology or predicted or uncertian. or others, then set color level into 1 //rest of other value. then set color level into 1 } else { peOthCurEvidenceBean.setColorLevel(ColorType.BLACK.color()); } peOthCurEvidenceBean.setTpbDataTypeBean(hpbDataTypeBean); peOthCurEvidenceBean.setEvidenceValue(proteinExistence); peOthCurEvidenceBean.setHyperlink(NXConts.PE_OTH_CUR_NX_BASE_URL + nxAc); return peOthCurEvidenceBean; } } return null; } @SuppressWarnings("unchecked") private NXPeMsAntiEntryBean parseNXPeMSAntiAnn(Element protein) { NXPeMsAntiEntryBean peMsAntiEntryBean = new NXPeMsAntiEntryBean(); List<PEEvidenceBean> peMsEvidenceBeans = new ArrayList<PEEvidenceBean>(); Element xrefsEle = protein.getChild(ELE_XREFS); if (xrefsEle != null) { //PE MS ANTI ANN url String peMsAntiURL = null; //PE MS ANTI int peMsAntiCounter = 0; List<Element> xrefElements = xrefsEle.getChildren(ELE_XREF); for (Element xrefEle : xrefElements) { //create a pe ms evidence object PEEvidenceBean peMsAnnEvidenceBean = new PEEvidenceBean(); String xrefDatabase = null; String xrefCategory = null; String xrefAccession = null; String xrefId = null; String xrefUrl = null; //database Attribute xrefDbAtt = xrefEle.getAttribute(ATTR_XREF_DATABASE); if (xrefDbAtt != null) { xrefDatabase = xrefDbAtt.getValue(); } //category Attribute xrefCategAtt = xrefEle.getAttribute(ATTR_XREF_CATEGORY); if (xrefCategAtt != null) { xrefCategory = xrefCategAtt.getValue(); } //accession Attribute xrefAccessionAtt = xrefEle.getAttribute(ATTR_XREF_ACCESSION); if (xrefAccessionAtt != null) { xrefAccession = xrefAccessionAtt.getValue(); } //id Attribute xrefIdAtt = xrefEle.getAttribute(ATTR_XREF_ID); if (xrefIdAtt != null) { xrefId = xrefIdAtt.getValue(); } //url Element xrefUrlEle = xrefEle.getChild(ELE_XREF_URL); if (xrefEle != null) { xrefUrl = xrefUrlEle.getTextNormalize(); } if (StringUtils.isNotBlank(xrefDatabase) && (StringUtils.isNotBlank(xrefCategory))) { //if category = Proteomic database and database = PRIDE if (StringUtils.equalsIgnoreCase(xrefDatabase, NXConts.XREF_DB_PRIDE) && StringUtils.equalsIgnoreCase(xrefCategory, NXConts.XREF_CA_PROTEOMIC_DATABASES)) { //set the color red peMsAnnEvidenceBean.setColorLevel(ColorType.RED.color()); //set the link value if (StringUtils.isNotBlank(xrefAccession)) { peMsAnnEvidenceBean.setHyperlink(xrefUrl); } //added the evidence String evidence = null; evidence = xrefDatabase; if (StringUtils.isNotBlank(xrefAccession)) { evidence += " - " + xrefAccession; } if (StringUtils.isNotBlank(evidence)) { peMsAnnEvidenceBean.setEvidenceValue(evidence); } //create a TPBDataTypeBean TPBDataTypeBean tpbDataTypeBean = new TPBDataTypeBean(); //set the data type tpbDataTypeBean.setDataType(DataType.PE_MS_ANN.type()); //set the traffic lights level to 3 tpbDataTypeBean.setLevel(TLLevel.TL3.level()); peMsAnnEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean); //add the PE MS Annotation for PRIDE peMsEvidenceBeans.add(peMsAnnEvidenceBean); } //if category = Proteomic database and database = PeptideAtlas if (StringUtils.equalsIgnoreCase(xrefDatabase, NXConts.XREF_DB_PEPTIDE_ATLAS) && StringUtils.equalsIgnoreCase(xrefCategory, NXConts.XREF_CA_PROTEOMIC_DATABASES)) { //set the color yellow peMsAnnEvidenceBean.setColorLevel(ColorType.YELLOW.color()); if (StringUtils.isNotBlank(xrefAccession)) { peMsAnnEvidenceBean.setHyperlink(xrefUrl); } //added the evidence String evidence = null; evidence = xrefDatabase; if (StringUtils.isNotBlank(xrefAccession)) { evidence += " - " + xrefAccession; } if (StringUtils.isNotBlank(evidence)) { peMsAnnEvidenceBean.setEvidenceValue(evidence); } //create a TPBDataTypeBean TPBDataTypeBean tpbDataTypeBean = new TPBDataTypeBean(); //set the data type tpbDataTypeBean.setDataType(DataType.PE_MS_ANN.type()); //set the traffic lights level to 3 tpbDataTypeBean.setLevel(TLLevel.TL3.level()); peMsAnnEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean); //add pe ms annotation for PeptideAtlas peMsEvidenceBeans.add(peMsAnnEvidenceBean); } } //find the PE ANTI ANN if (StringUtils.isNotBlank(xrefDatabase) && StringUtils.isNotBlank(xrefCategory) && StringUtils.isNotBlank(xrefAccession)) { if (StringUtils.equalsIgnoreCase(xrefDatabase, NXConts.XREF_DB_HPA) && StringUtils.equalsIgnoreCase(xrefCategory, NXConts.XREF_CA_ANTIBODY_DATABASES)) { // System.out.println("===================> PE ANTI Ann: Accession: " + xrefAccession); if (StringUtils.startsWithIgnoreCase(xrefAccession, NXConts.XREF_AC_PREFIX_ENSG)) { if (StringUtils.isNotBlank(xrefUrl)) { peMsAntiURL = StringUtils.removeEndIgnoreCase(xrefUrl, NXConts.XREF_AC_ENSG_URL_END_PART); // System.out.println("================== URL: " + peMsAntiURL); } } if (StringUtils.startsWithIgnoreCase(xrefAccession, NXConts.XREF_AC_PREFIX_HPA) || StringUtils.startsWithIgnoreCase(xrefAccession, NXConts.XREF_AC_PREFIX_CAB)) { if (StringUtils.isBlank(peMsAntiURL)) { peMsAntiURL = xrefUrl; } peMsAntiCounter++; } } } } //add the PE MS Ann Evidences if Any peMsAntiEntryBean.setPeMsAnnEvidenceBeans(peMsEvidenceBeans); //PE ANTI ANN if (peMsAntiCounter > 0) { //create a pe anti evidence object PEEvidenceBean peAntiAnnEvidenceBean = new PEEvidenceBean(); if (peMsAntiCounter == 1) { peAntiAnnEvidenceBean.setColorLevel(ColorType.RED.color()); } if (peMsAntiCounter > 1) { peAntiAnnEvidenceBean.setColorLevel(ColorType.YELLOW.color()); } peAntiAnnEvidenceBean.setEvidenceValue(peMsAntiCounter + " " + NXConts.XREF_HPA_ANTIBODY_DESC); if (StringUtils.isNotBlank(peMsAntiURL)) { peAntiAnnEvidenceBean.setHyperlink(peMsAntiURL); } //create a TPBDataTypeBean TPBDataTypeBean tpbDataTypeBean = new TPBDataTypeBean(); //set the data type tpbDataTypeBean.setDataType(DataType.PE_ANTI_ANN.type()); //set the traffic lights level to 3 tpbDataTypeBean.setLevel(TLLevel.TL3.level()); peAntiAnnEvidenceBean.setTpbDataTypeBean(tpbDataTypeBean); //set the pe anti ann object peMsAntiEntryBean.setPeAntiAnnEvidenceBean(peAntiAnnEvidenceBean); } } return peMsAntiEntryBean; } //parse annotation list @SuppressWarnings("unchecked") private List<NXAnnEntryBean> parseAnnotationList(Element protein) { List<NXAnnEntryBean> nxAnnEntryBeanList = new ArrayList<NXAnnEntryBean>(); Element annotationsEle = protein.getChild(ELE_ANNOTATIONS); if (annotationsEle != null) { List<Element> annotationListsEles = annotationsEle.getChildren(ELE_ANNOTATION_LIST); for (Element annListEle : annotationListsEles) { //get the annotation category value Attribute categoryAttr = annListEle.getAttribute(ATTR_CATEGORY); String annotationCategoryValue = null; if (categoryAttr != null) { annotationCategoryValue = categoryAttr.getValue(); } //get all annotation elements List<Element> annos = annListEle.getChildren(ELE_ANNOTATION); //for each annotation element for (Element ann : annos) { NXAnnEntryBean nxAnnEntryBean = new NXAnnEntryBean(); nxAnnEntryBean.setCategory(annotationCategoryValue); //qualityQualifier attribute value Attribute qualityQAttr = ann.getAttribute(ATTR_QUALITY_QUALIFIER); if (qualityQAttr != null) { String qualityAttrValue = qualityQAttr.getValue(); nxAnnEntryBean.setQualityQualifier(qualityAttrValue); } //uniqueName attribute value Attribute uniqueNameAttr = ann.getAttribute(ATTR_UNIQUE_NAME); if (uniqueNameAttr != null) { String uniqueNameValue = qualityQAttr.getValue(); nxAnnEntryBean.setUniqueName(uniqueNameValue); } //cvTerm element Element cvTermEle = ann.getChild(ELE_CV_TERM); if (cvTermEle != null) { //cvTerm accession value Attribute cvTermAcAttr = cvTermEle.getAttribute(ATTR_ACCESSION); if (cvTermAcAttr != null) { String cvTermAc = cvTermAcAttr.getValue(); nxAnnEntryBean.setCvTermAccession(cvTermAc); } //get cvName if any Element cvNameEle = cvTermEle.getChild(ELE_CV_NAME); if (cvNameEle != null) { String cvName = cvNameEle.getText(); nxAnnEntryBean.setCvName(cvName); } } //Description element Element descEle = ann.getChild(ELE_DESCRIPTION); if (descEle != null) { String desc = descEle.getTextTrim(); nxAnnEntryBean.setDescription(desc); } try { List<NXAnnEvidenceBean> nxAnnEvidenceBeans = new ArrayList<NXAnnEvidenceBean>(); //Evidence Elements Element evidencesEle = ann.getChild(ELE_EVIDENCES); if (evidencesEle != null) { List<Element> evidenceList = evidencesEle.getChildren(ELE_EVIDENCE); for (Element evEle : evidenceList) { NXAnnEvidenceBean nxAnnEvidenceBean = new NXAnnEvidenceBean(); //isNegative Attribute Attribute isNegAttr = evEle.getAttribute(ATTR_IS_NEGATIVE); if (isNegAttr != null) { boolean isNegAttrValue = isNegAttr.getBooleanValue(); nxAnnEvidenceBean.setNegative(isNegAttrValue); } //qualifierType attribute Attribute qualifierTypeAttr = evEle.getAttribute(ATTR_QUALIFIER_TYPE); if (qualifierTypeAttr != null) { String qualifierType = qualifierTypeAttr.getValue(); nxAnnEvidenceBean.setQualifierType(qualifierType); } //resourceAssocType Attribute Attribute resourceAssocTypeAttr = evEle.getAttribute(ATTR_RESOURCE_ASSOC_TYPE); if (resourceAssocTypeAttr != null) { String resAssocType = resourceAssocTypeAttr.getValue(); nxAnnEvidenceBean.setResourceAssocType(resAssocType); } //resourceRef attribute Attribute resourceRefAttr = evEle.getAttribute(ATTR_RESOURCE_REF); if (resourceRefAttr != null) { int resourceRef = resourceRefAttr.getIntValue(); nxAnnEvidenceBean.setResourceRef(resourceRef); } nxAnnEvidenceBeans.add(nxAnnEvidenceBean); } //set all evidence for this annotation if (evidenceList.size() > 0) { nxAnnEntryBean.setNxAnnEvidenceBeans(nxAnnEvidenceBeans); } } List<NXIsoFormAnnBean> nxisoFormAnnBeans = new ArrayList<NXIsoFormAnnBean>(); Element isoFormSpecEle = ann.getChild(ELE_ISO_FORM_SPECIFICITY); if (isoFormSpecEle != null) { List<Element> isoFormAnnots = isoFormSpecEle.getChildren(ELE_ISO_FORM_ANNOT); for (Element isoformAnn : isoFormAnnots) { NXIsoFormAnnBean nxisoFormAnnBean = new NXIsoFormAnnBean(); //isoFormRef attribute Attribute isoFormRefAttr = isoformAnn.getAttribute(ATTR_ISO_FORM_REF); if (isoFormRefAttr != null) { String isoFormRef = isoFormRefAttr.getValue(); nxisoFormAnnBean.setIsoFormRef(isoFormRef); } // Positions Element positionsEle = isoformAnn.getChild(ELE_POSITIONS); if (positionsEle != null) { Element positionEle = positionsEle.getChild(ELE_POSITION); if (positionEle != null) { //first position Attribute firstAttr = positionEle.getAttribute(ATTR_POSITION_FIRST); if (firstAttr != null) { int first = firstAttr.getIntValue(); nxisoFormAnnBean.setFirstPosition(first); } //first status Attribute firstStatusAttr = positionEle .getAttribute(ATTR_POSITION_FIRST_STATUS); if (firstStatusAttr != null) { String firstStatus = firstStatusAttr.getValue(); nxisoFormAnnBean.setFirstStatus(firstStatus); } //last position Attribute lastAttr = positionEle.getAttribute(ATTR_POSITION_LAST); if (lastAttr != null) { int last = lastAttr.getIntValue(); nxisoFormAnnBean.setLastPosition(last); } //last status Attribute lastStatusAttr = positionEle .getAttribute(ATTR_POSITION_LAST_STATUS); if (lastStatusAttr != null) { String lastStatus = lastStatusAttr.getValue(); nxisoFormAnnBean.setLastStatus(lastStatus); } } } nxisoFormAnnBeans.add(nxisoFormAnnBean); } //set all isoformSpecificity for this annotation if (nxisoFormAnnBeans.size() > 0) { nxAnnEntryBean.setNxisoFormAnnBeans(nxisoFormAnnBeans); } } } catch (Exception ex) { throw new DMXMLParserException(ex); } //add all annotations for this protein nxAnnEntryBeanList.add(nxAnnEntryBean); } } } return nxAnnEntryBeanList; } public static void main(String[] args) throws Exception { String filename = "./testData/nextprot_chromosome_7.xml"; FileInputStream fileInputStream = new FileInputStream(new File(filename)); NxXMLParser parser = new NxXMLParser(); List<NXEntryBean> nxEntryBeans = parser.parseNextProtXML(fileInputStream); System.out.println("======== total size of nextprot entry: " + nxEntryBeans.size()); // // // String ftpUrl = "ftp://ftp.nextprot.org/pub/current_release/xml/nextprot_chromosome_7.xml.gz"; // String filename = "./testData/nextprot_chromosome_7.xml.gz"; // try { // GZIPInputStream gzipInputStream = null; // FileInputStream fileInputStream = null; // gzipInputStream = new GZIPInputStream(new FileInputStream(new File(filename))); // System.out.println("Opening the output file............:opened"); // String outFilename = "chromosome_7.xml"; // OutputStream out = new FileOutputStream(outFilename); // System.out.println("Transferring bytes from the compressed file to the output file........:Transfer successful "); // byte[] buf = new byte[1024]; // //size can be changed according to programmer 's need. // int len; // while ((len = gzipInputStream.read(buf)) > 0) { // out.write(buf, 0, len); // } // System.out.println("The file and stream is ......closing..........:closed"); // gzipInputStream.close(); // out.close(); // } catch (IOException e) { // System.out.println("Exception has been thrown" + e); // } } }