Java tutorial
package org.biopax.ols.impl; /* * #%L * Ontologies Access * %% * Copyright (C) 2008 - 2013 University of Toronto (baderlab.org) and Memorial Sloan-Kettering Cancer Center (cbio.mskcc.org) * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Lesser Public License for more details. * * You should have received a copy of the GNU General Lesser Public * License along with this program. If not, see * <http://www.gnu.org/licenses/lgpl-3.0.html>. * #L% */ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.biopax.ols.Annotation; import org.biopax.ols.Constants; import org.biopax.ols.DbXref; import org.biopax.ols.Loader; import org.biopax.ols.Ontology; import org.biopax.ols.Parser; import org.biopax.ols.Term; import org.biopax.ols.TermPath; import org.biopax.ols.TermRelationship; import org.biopax.ols.TermSynonym; import org.obo.datamodel.*; import java.io.IOException; import java.util.*; /** * Base class for all loaders. This class will perform OBO-to-OLS object conversion, creating all relevant * objects. Three methods are listed as abstract: configure, parse and printusage, which depend on the OBO * format being used (GOFF or OBO). * * @author Richard Cote */ public abstract class BaseOBO2AbstractLoader implements Loader { protected static Log logger = LogFactory.getLog(BaseOBO2AbstractLoader.class); protected HashMap<String, Term> ontologyTerms = new HashMap<String, Term>(5000); protected String ONTOLOGY_DEFINITION; protected String FULL_NAME; protected String SHORT_NAME; protected String QUERY_URL = null; protected String SOURCE_URL = null; protected OntologyBean ontBean = null; //common objects protected TermBean IS_A = null; protected TermBean PART_OF = null; protected TermBean DEVELOPS_FROM = null; protected TermBean ALT_ID = null; protected TermBean EXACT = null; protected TermBean NARROW = null; protected TermBean BROAD = null; protected TermBean RELATED = null; protected TermBean SYNONYM = null; protected final HashSet<String> PART_OF_SET = new HashSet<String>(); protected final HashSet<String> IS_A_SET = new HashSet<String>(); protected final HashSet<String> DEV_FROM_SET = new HashSet<String>(); private HashMap<String, Term> unknown_relations = new HashMap<String, Term>(); private TreeSet<String> rootTerms = new TreeSet<String>(); protected Parser parser; //for PSI-MOD private Set<String> MOD_NUMERIC_ANNOTATIONS = null; private Set<String> MOD_STRING_ANNOTATIONS = null; /** * batch mode will optimize inserts. set to false if having wonky errors or rdbms does not support it. */ protected static final boolean BATCH_MODE = true; protected void setParser(Parser parser) { this.parser = parser; } /** * determine which root detection algorithm will be used */ protected boolean useGreedy = false; protected void setUseGreedy(boolean useGreedy) { this.useGreedy = useGreedy; } /** * will return a collection of TermBean accesssion strings that correspond to the root term * * @throws IllegalStateException if the parser has not been initialized */ protected Collection<String> getRootTerms() { HashSet<String> retval = new HashSet<String>(); if (parser != null) { Set<OBOObject> roots = parser.getRootTerms(useGreedy); if (roots != null) { for (OBOObject root : roots) { retval.add(safeTrim(root.getID())); } } } else { throw new IllegalStateException("Parser has not been initialized. Did you run configure()?"); } return retval; } /** * Will iterate through all parsed terms and generate memory structure suitable for loading to db * * @throws IllegalStateException if the parser has not been initialized */ protected void process() { //returns unmodifiable set, so need to create a modifiable one Set<OBOObject> terms = new HashSet<OBOObject>(); //sanity check if (parser == null) { throw new IllegalStateException("parser has not been initialized. Did you run configure()?"); } //tmp collection to store terms Collection<OBOObject> toAdd; //add all terms - this will include obsolete and roots //sanity check to avoid NPE toAdd = parser.getTerms(); if (toAdd != null) { terms.addAll(toAdd); } //create common objects needed to build references and synonyms initializeCommonObjects(); //process all terms logger.info("Total Terms to process: " + terms.size()); int count = 0; for (Object obj : terms) { count++; if (obj instanceof OBOObject) { processTerm((OBOObject) obj); if (count % 1000 == 0) { logger.debug("Terms Processed: " + count); } } else { logger.info("Ignored object: " + obj.toString()); } } logger.info("Term processing done"); //need to iterate again to build relationships logger.info("Creating relationships"); count = 0; for (Object obj : terms) { count++; if (obj instanceof OBOObject) { processTermRelationships((OBOObject) obj); if (count % 1000 == 0) { logger.debug("Relationships Processed: " + count); } } else { logger.info("Ignored object: " + obj.toString()); } } logger.info("Relationship processing done"); //add to OntologyAccess ontBean.setTerms(ontologyTerms.values()); } /** * inetrnal helper method to initialize and reset shared objects */ protected void initializeCommonObjects() { //no need to check if parser is not null because //of previous sanity check at the start of process() //this is not true in the cast of the NEWT loader!!! String version; if (parser != null) { version = parser.getSession().getCurrentHistory().getVersion(); //if version is not set, set it to file date if (version == null) { Date tmp = parser.getSession().getCurrentHistory().getDate(); //if date is not set, set it to current date if (tmp != null) version = tmp.toString(); else version = (new Date()).toString(); } } else { version = (new Date()).toString(); } //create ontology ontBean = new OntologyBean(); if (ONTOLOGY_DEFINITION != null && ONTOLOGY_DEFINITION.length() > 2000) { logger.warn("ontology definition longer than allowed database column length - truncating"); ONTOLOGY_DEFINITION = ONTOLOGY_DEFINITION.substring(0, 2000); } ontBean.setDefinition(ONTOLOGY_DEFINITION); if (FULL_NAME != null && FULL_NAME.length() > 128) { logger.warn("ontology full name longer than allowed database column length - truncating"); FULL_NAME = FULL_NAME.substring(0, 128); } ontBean.setFullOntologyName(FULL_NAME); ontBean.setLoadDate(new java.sql.Date(GregorianCalendar.getInstance().getTime().getTime())); if (SHORT_NAME != null && SHORT_NAME.length() > 32) { logger.warn("ontology short name longer than allowed database column length - truncating"); SHORT_NAME = SHORT_NAME.substring(0, 32); } ontBean.setShortOntologyName(SHORT_NAME); if (version != null && version.length() > 128) { logger.warn("ontology version longer than allowed database column length - truncating"); version = version.substring(0, 128); } ontBean.setVersion(version); ontBean.setFullyLoaded(false); if (QUERY_URL != null && QUERY_URL.length() > 255) { logger.warn("ontology query url longer than allowed database column length - truncating"); QUERY_URL = QUERY_URL.substring(0, 255); } ontBean.setQueryURL(QUERY_URL); if (SOURCE_URL != null && SOURCE_URL.length() > 255) { logger.warn("ontology source url longer than allowed database column length - truncating"); SOURCE_URL = SOURCE_URL.substring(0, 255); } ontBean.setSourceURL(SOURCE_URL); //make certain there's no dirty data (esp if we're using it to load multiple ontologies) ontologyTerms.clear(); //create mapping sets IS_A_SET.clear(); IS_A_SET.add(Constants.IS_A_RELATION_TYPE); IS_A_SET.add(Constants.IS_A_RELATION_TYPE.toUpperCase()); //other mappings seen IS_A_SET.add("isa"); IS_A_SET.add("ISA"); IS_A_SET.add("OBO_REL:is_a"); PART_OF_SET.clear(); PART_OF_SET.add(Constants.PART_OF_RELATION_TYPE); PART_OF_SET.add(Constants.PART_OF_RELATION_TYPE.toUpperCase()); //other mappings seen PART_OF_SET.add("partof"); PART_OF_SET.add("PARTOF"); PART_OF_SET.add("OBO_REL:part_of"); PART_OF_SET.add("is_part_of"); DEV_FROM_SET.clear(); DEV_FROM_SET.add(Constants.DEVELOPS_FROM_RELATION_TYPE); DEV_FROM_SET.add(Constants.DEVELOPS_FROM_RELATION_TYPE.toUpperCase()); //other mappings seen DEV_FROM_SET.add("DERIVED/DEVELOPS_FROM"); //set PSI-MOD specific xrefs that will be converted to annotations MOD_NUMERIC_ANNOTATIONS = new HashSet<String>(); MOD_NUMERIC_ANNOTATIONS.add("DiffAvg"); MOD_NUMERIC_ANNOTATIONS.add("DiffMono"); MOD_NUMERIC_ANNOTATIONS.add("MassAvg"); MOD_NUMERIC_ANNOTATIONS.add("MassMono"); MOD_STRING_ANNOTATIONS = new HashSet<String>(); MOD_STRING_ANNOTATIONS.add("DiffFormula"); MOD_STRING_ANNOTATIONS.add("Formula"); MOD_STRING_ANNOTATIONS.add("Source"); MOD_STRING_ANNOTATIONS.add("Origin"); MOD_STRING_ANNOTATIONS.add("TermSpec"); //create relations IS_A = initializeTermBean(Constants.IS_A_RELATION_TYPE, Loader.RELATION_TYPE); ontologyTerms.put(IS_A.getIdentifier(), IS_A); PART_OF = initializeTermBean(Constants.PART_OF_RELATION_TYPE, Loader.RELATION_TYPE); ontologyTerms.put(PART_OF.getIdentifier(), PART_OF); DEVELOPS_FROM = initializeTermBean(Constants.DEVELOPS_FROM_RELATION_TYPE, Loader.RELATION_TYPE); ontologyTerms.put(DEVELOPS_FROM.getIdentifier(), DEVELOPS_FROM); //create synonyms ALT_ID = initializeTermBean(Constants.ALT_ID_SYNONYM_TYPE, Loader.SYNONYM_TYPE); ontologyTerms.put(ALT_ID.getIdentifier(), ALT_ID); EXACT = initializeTermBean(Constants.EXACT_SYNONYM_TYPE, Loader.SYNONYM_TYPE); ontologyTerms.put(EXACT.getIdentifier(), EXACT); NARROW = initializeTermBean(Constants.NARROW_SYNONYM_TYPE, Loader.SYNONYM_TYPE); ontologyTerms.put(NARROW.getIdentifier(), NARROW); BROAD = initializeTermBean(Constants.BROAD_SYNONYM_TYPE, Loader.SYNONYM_TYPE); ontologyTerms.put(BROAD.getIdentifier(), BROAD); RELATED = initializeTermBean(Constants.RELATED_SYNONYM_TYPE, Loader.SYNONYM_TYPE); ontologyTerms.put(RELATED.getIdentifier(), RELATED); SYNONYM = initializeTermBean(Constants.DEFAULT_SYNONYM_TYPE, Loader.SYNONYM_TYPE); ontologyTerms.put(SYNONYM.getIdentifier(), SYNONYM); //initialize synonymTypeDefs if (parser != null) { Collection<SynonymType> synonymTypes = parser.getSession().getSynonymTypes(); if (synonymTypes != null && !synonymTypes.isEmpty()) { for (SynonymType st : synonymTypes) { ontologyTerms.put(st.getID(), initializeTermBean(st.getName(), SHORT_NAME + ":" + st.getID(), getSynonymTypeDef(st.getScope()))); } } } // //get rid of stale data // instances.clear(); //get rid of stale data and get root terms rootTerms.clear(); if (parser != null) { rootTerms.addAll(getRootTerms()); } } /** * helper method to create a TermBean given a name and a definition. This is mostly for terms * associated with an ontology but not defined by it (eg relations types, synonym types, etc) * * @param name - this value will be used to set the term.name and term.identifier * @param def - the term definition. * @return a valid TermBean object * @throws IllegalArgumentException if the term name is null. */ private TermBean initializeTermBean(String name, String def) { if (name != null && !"".equals(name.trim())) { TermBean bean = new TermBean(); bean.setName(name.trim()); bean.setIdentifier(SHORT_NAME + ":" + name.toUpperCase().trim()); bean.setDefinition(safeTrim(def)); bean.setParentOntology(ontBean); //must set PK here because OJB will now not set it automatically //PK will be term_ac+ont+fully_loaded_false bean.setTermPk(bean.getIdentifier() + SHORT_NAME + "0"); return bean; } else { throw new IllegalArgumentException("Can't have a non-null term name!"); } } /** * helper method to create a TermBean given a name, an accession and a definition. * * @param name - this value will be used to set the term.name * @param accession - this value will be used to set the term.identifier * @param def - the term definition. * @return a valid TermBean object * @throws IllegalArgumentException if the term name or accession is null. */ protected TermBean initializeTermBean(String name, String accession, String def) { if (accession != null && !"".equals(accession.trim())) { TermBean bean = initializeTermBean(name, def); bean.setIdentifier(accession.trim()); bean.setTermPk(bean.getIdentifier() + SHORT_NAME + "0"); return bean; } else { throw new IllegalArgumentException("Can't have a non-null term name!"); } } /** * This method will convert an OBOEdit model term into a valid TermBean, while creating synonyms, * xrefs and annotations. The valid TermBean generated will be added to a global HashMap that will * be used at a later stage. * * @param obj being an OBOObject object obtained from the parser */ protected void processTerm(OBOObject obj) { if (obj.getID().startsWith("obo:")) { /* obo:datatype obo:property obo:class */ logger.debug("bogus term: " + obj.getID()); return; } TermBean trm = new TermBean(); //must set PK here because OJB will now not set it automatically //PK will be term_ac+ont+fully_loaded_false trm.setTermPk(safeTrim(obj.getID()) + SHORT_NAME + "0"); if (trm.getTermPk().length() > 255) { throw new IllegalStateException( "term PK longer than allowed database column length: " + trm.getTermPk()); } //trim definition trm.setDefinition(safeTrim(obj.getDefinition())); if (trm.getDefinition() != null && trm.getDefinition().length() > 4000) { logger.warn("term definition longer than allowed database column length - truncating" + trm.getIdentifier()); trm.setDefinition(trm.getDefinition().substring(0, 4000)); } //trim ID trm.setIdentifier(safeTrim(obj.getID())); if (trm.getIdentifier() != null && trm.getIdentifier().length() > 255) { logger.warn("term identifier longer than allowed database column length - truncating" + trm.getIdentifier()); trm.setIdentifier(trm.getIdentifier().substring(0, 255)); } //set as root term if required if (rootTerms.contains(safeTrim(obj.getID()))) { trm.setRootTerm(true); logger.info(obj.getID() + " is a root term"); } //trim name trm.setName(safeTrim(obj.getName())); //trim namespace if (trm.getName() != null && trm.getName().length() > 2000) { logger.warn("term name longer than allowed database column length - truncating" + trm.getIdentifier()); trm.setName(trm.getName().substring(0, 2000)); } Namespace nspace = obj.getNamespace(); if (nspace != null) { trm.setNamespace(safeTrim(nspace.getID())); if (trm.getNamespace() != null && trm.getNamespace().length() > 255) { logger.warn("term namespace longer than allowed database column length - truncating" + trm.getIdentifier()); trm.setNamespace(trm.getNamespace().substring(0, 255)); } } //set if obsolete trm.setObsolete(obj.isObsolete()); //set parent ontology trm.setParentOntology(ontBean); //process synonyms trm.setSynonyms(processSynonyms(obj, trm)); //process xrefs trm.setXrefs(processXrefs(obj, trm)); //process annotations trm.setAnnotations(processAnnotations(obj, trm)); //set number of children int nbChild = 0; if (obj.getChildren() != null) { nbChild = obj.getChildren().size(); } //set leaf status if (nbChild > 0) { trm.setLeaf(false); } else { trm.setLeaf(true); } // if (obj instanceof Instance) { // trm.setInstance(true); // //store type (eg objID is_instance_of typeID // instances.put(obj.getType().getID(), obj.getID()); // } else { // trm.setInstance(false); // } //add to global storage ontologyTerms.put(trm.getIdentifier(), trm); } /** * internal method to create AnnotationBeans objects from values extracted from an OBOEdit term object * and properly setup associations to the parent OLS term objet. Annotations can include comments and * replacement term ids for obsolete or misused terms, as well as defined subsets. * * @param obj - the OBOEdit object to extract information from * @param trm - the parent term to link the annotations to * @return a collection of properly created AnnotationBeans */ private Collection<Annotation> processAnnotations(OBOObject obj, TermBean trm) { ArrayList<Annotation> retval = new ArrayList<Annotation>(); String comment = safeTrim(obj.getComment()); if (comment != null) { AnnotationBean annot = new AnnotationBean(); annot.setAnnotationStringValue(comment); annot.setAnnotationType(Annotation.OBO_COMMENT); if (comment != null && comment.length() > 2000) { logger.warn("annotation comment longer than allowed database column length - truncating " + trm.getIdentifier()); annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000)); } annot.setParentTerm(trm); retval.add(annot); } Set<ObsoletableObject> considers = obj.getConsiderReplacements(); for (ObsoletableObject obsolete : considers) { AnnotationBean annot = new AnnotationBean(); annot.setAnnotationType(Annotation.OBO_CONSIDER_REPLACEMENT); String val = obsolete.getID(); if (obsolete.getName() != null) { val += ": " + obsolete.getName(); } annot.setAnnotationStringValue(val); if (val != null && val.length() > 2000) { logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier()); annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000)); } annot.setParentTerm(trm); retval.add(annot); } Set<ObsoletableObject> replacers = obj.getReplacedBy(); for (ObsoletableObject replacedby : replacers) { AnnotationBean annot = new AnnotationBean(); annot.setAnnotationType(Annotation.OBO_REPLACED_BY); String val = replacedby.getID(); if (replacedby.getName() != null) { val += ": " + replacedby.getName(); } annot.setAnnotationStringValue(val); if (val != null && val.length() > 2000) { logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier()); annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000)); } annot.setParentTerm(trm); retval.add(annot); } Set<TermSubset> subsets = obj.getSubsets(); for (TermSubset subset : subsets) { AnnotationBean annot = new AnnotationBean(); annot.setAnnotationType(Annotation.SUBSET + "_" + subset.getName()); String val = subset.getDesc(); annot.setAnnotationStringValue(val); if (val != null && val.length() > 2000) { logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier()); annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000)); } annot.setParentTerm(trm); retval.add(annot); } Set<PropertyValue> propVal = obj.getPropertyValues(); for (PropertyValue pv : propVal) { AnnotationBean annot = new AnnotationBean(); //property_value: EFO:definition_editor "James Malone" xsd:string //parses to property = property_value // value = EFO:definition_editor "James Malone" xsd:string //so manually process the value to something more informative // property = EFO:definition_editor // value = "James Malone" try { if (pv.getValue() == null) { //invalidly constucted property_value! logger.warn("Error parsing property_value - Ignoring null value: " + pv.toString()); continue; } int ndx; //parse property type String tmpStr = pv.getValue(); ndx = tmpStr.indexOf(' '); if (ndx > 0) { tmpStr = tmpStr.substring(0, ndx).trim(); if (tmpStr.endsWith(":")) { tmpStr = tmpStr.substring(0, tmpStr.length() - 1); } logger.debug("Setting property type: " + tmpStr); annot.setAnnotationType(tmpStr.trim()); if (annot.getAnnotationType() != null && annot.getAnnotationType().length() > 2000) { logger.warn("annotation type longer than allowed database column length - truncating " + trm.getIdentifier()); annot.setAnnotationType(annot.getAnnotationType().substring(0, 2000)); } //parse property value tmpStr = pv.getValue(); tmpStr = tmpStr.substring(ndx + 1); ndx = tmpStr.indexOf("xsd:"); if (ndx > 0) { tmpStr = tmpStr.substring(0, ndx); } tmpStr = tmpStr.trim(); if (tmpStr.startsWith("\"") && tmpStr.endsWith("\"")) { tmpStr = tmpStr.substring(1, tmpStr.length() - 1); } logger.debug("Setting property value: " + tmpStr.trim()); annot.setAnnotationStringValue(tmpStr.trim()); if (tmpStr != null && tmpStr.length() > 2000) { logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier()); annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000)); } } else { //this will capture occations where people just put random key-value tags //as annotations annot.setAnnotationType(pv.getProperty()); if (pv.getProperty() != null && pv.getProperty().length() > 2000) { logger.warn("annotation type longer than allowed database column length - truncating " + trm.getIdentifier()); annot.setAnnotationType(annot.getAnnotationType().substring(0, 2000)); } annot.setAnnotationStringValue(pv.getValue()); if (pv.getValue() != null && pv.getValue().length() > 2000) { logger.warn("annotation value longer than allowed database column length - truncating " + trm.getIdentifier()); annot.setAnnotationStringValue(annot.getAnnotationStringValue().substring(0, 2000)); } } annot.setParentTerm(trm); } catch (RuntimeException re) { logger.warn("Error parsing property_value - Ignoring : " + pv.toString()); logger.debug("pv.getProperty() = " + pv.getProperty()); logger.debug("pv.getValue() = " + pv.getValue()); continue; } retval.add(annot); } if ("MOD".equals(SHORT_NAME)) { for (Object xrObj : obj.getDbxrefs()) { Dbxref xref = (Dbxref) xrObj; if (MOD_STRING_ANNOTATIONS.contains(safeTrim(xref.getDatabase()))) { //create string annotation AnnotationBean annot = new AnnotationBean(); annot.setAnnotationType(safeTrim(xref.getDatabase())); annot.setAnnotationStringValue(safeTrim(xref.getDesc())); annot.setParentTerm(trm); retval.add(annot); } else if (MOD_NUMERIC_ANNOTATIONS.contains(safeTrim(xref.getDatabase()))) { //create numeric annotation AnnotationBean annot = new AnnotationBean(); annot.setAnnotationType(safeTrim(xref.getDatabase())); annot.setAnnotationDoubleValue(safeTrim(xref.getDesc())); annot.setParentTerm(trm); retval.add(annot); } } } return retval; } /** * internal method to create DbXrefBean objects from values extracted from an OBOEdit term object * and properly setup associations to the parent OLS term objet. Xrefs will be generated for analog * xrefs, definition xrefs and synonym xrefs. * * @param obj - the OBOEdit object to extract information from * @param trm - the parent term to link the annotations to * @return a collection of properly created DbXrefBean */ private Collection<DbXref> processXrefs(OBOObject obj, TermBean trm) { HashSet<DbXref> retval = new HashSet<DbXref>(); for (Object xrObj : obj.getDbxrefs()) { Dbxref xref = (Dbxref) xrObj; if (!"MOD".equals(SHORT_NAME)) { //todo - hardcode analog for now and remove it when bug is fixed //fix oboedit codebase error retval.add(createDbXref(xref, Dbxref.ANALOG)); } else { if (MOD_STRING_ANNOTATIONS.contains(safeTrim(xref.getDatabase()))) { //do nothing - annotation will be created later } else if (MOD_NUMERIC_ANNOTATIONS.contains(safeTrim(xref.getDatabase()))) { //do nothing - annotation will be created later } else { //todo - hardcode analog for now and remove it when bug is fixed //fix oboedit codebase error retval.add(createDbXref(xref, Dbxref.ANALOG)); } } } //todo - remove this when bug is fixed for (Object xrObj : obj.getDefDbxrefs()) { Dbxref xref = (Dbxref) xrObj; retval.add(createDbXref(xref, Dbxref.DEFINITION)); } return retval; } /** * helper method to create and populate a DbXrefBean object from an OBOEdit Dbxref object * * @param xref - the OBOEdit object to extract information from * @param xrefType - the xref type * @return a valid OLS model DbXrefBean object */ private DbXref createDbXref(Dbxref xref, int xrefType) { DbXrefBean retval = new DbXrefBean(); retval.setDbName(safeTrim(xref.getDatabase())); if (retval.getDbName() != null && retval.getDbName().length() > 255) { logger.warn( "dbxref dbname longer than allowed database column length - truncating " + retval.getDbName()); retval.setDbName(retval.getDbName().substring(0, 255)); } //stupid oboeit artifact if (xref.getDatabaseID() != null && !xref.getID().trim().equals("none")) { retval.setAccession(safeTrim(xref.getDatabaseID())); // if (retval.getAccession() != null && retval.getAccession().length() > 512) { if (retval.getAccession() != null && retval.getAccession().length() > 256) { System.out.println("retval.getAccession().length() = " + retval.getAccession().length()); logger.warn( "dbxref accession longer than allowed database column length: " + retval.getAccession()); retval.setAccession(retval.getAccession().substring(0, 256)); // retval.setAccession(retval.getAccession().substring(0, 512)); } } //stupid oboeit artifact if (xref.getDesc() != null && !xref.getDesc().trim().equals("none")) { retval.setDescription(safeTrim(xref.getDesc())); if (retval.getDescription() != null && retval.getDescription().length() > 2000) { logger.warn("dbxref description longer than allowed database column length: " + retval.getDescription()); retval.setDescription(retval.getDescription().substring(0, 2000)); } } retval.setXrefType(xrefType); return retval; } /** * Once all the term objects have been created, this method will create the relationships and paths * to link all the children terms to a given parent term and update the OLS TermBean object with * the proper information from the global term storage. * * @param obj - the OBOEdit term object that will be used to extract information from */ private void processTermRelationships(OBOObject obj) { if (obj.getID().startsWith("obo:")) { /* obo:datatype obo:property obo:class */ logger.debug("bogus term: " + obj.getID()); return; } TermBean trm = (TermBean) ontologyTerms.get(safeTrim(obj.getID())); trm.setRelationships(processRelationships(obj, trm)); trm.setPaths(processPaths(obj, trm)); //update term object ontologyTerms.put(trm.getIdentifier(), trm); } /** * internal helper method to create TermRelationshipBeans for a given term. * <pre> * term1 * |_ child1 child1 IS_A term1 * |_ child2 child2 IS_A term1 * subject pred object * </pre> * * @param obj - the OBOEdit term object to extract information from * @param trm - the OLS parent term to link to * @return a Collection of valid TermRelationshipBeans */ private Collection<TermRelationship> processRelationships(OBOObject obj, TermBean trm) { HashSet<TermRelationship> retval = new HashSet<TermRelationship>(); // //check to see if a term has instances // //if there are instances, add them as children so that the relationship can be created by the loader // if (instances.get(obj.getID()) != null && !instances.get(obj.getID()).isEmpty()){ // for(String inst : instances.get(obj.getID())){ // // Term childTrm = ontologyTerms.get(inst); // TermRelationshipBean trb = new TermRelationshipBean(); // trb.setSubjectTerm(childTrm); // trb.setObjectTerm(trm); // TermBean otherRelation = getUnknownRelationTermBean("is_instance_of"); // if (otherRelation != null) { // trb.setPredicateTerm(otherRelation); // } // //set ontology // trb.setParentOntology(ontBean); // //add to retval // retval.add(trb); // // } // } Collection<Link> children = obj.getChildren(); for (Link lnk : children) { //get the child term from the link //use its ID to lookup in the map we created //if null, continue /* term1 |_ child1 child1 IS_A term1 |_ child2 child2 IS_A term1 subject pred object */ Term childTrm = ontologyTerms.get(safeTrim(lnk.getChild().getID())); if (childTrm != null) { TermRelationshipBean trb = new TermRelationshipBean(); trb.setSubjectTerm(childTrm); trb.setObjectTerm(trm); //set predicate type if (IS_A_SET.contains(lnk.getType().getID())) { trb.setPredicateTerm(IS_A); } else if (PART_OF_SET.contains(lnk.getType().getID())) { trb.setPredicateTerm(PART_OF); } else if (DEV_FROM_SET.contains(lnk.getType().getID())) { trb.setPredicateTerm(DEVELOPS_FROM); } else { TermBean otherRelation = getUnknownRelationTermBean(lnk.getType().getID()); if (otherRelation != null) { trb.setPredicateTerm(otherRelation); } else { logger.warn("Unable to create unknown relation type: >" + lnk.getType().getID() + "<"); continue; } } //set ontology trb.setParentOntology(ontBean); //add to retval retval.add(trb); } else { logger.debug("No object term found for link: " + lnk.toString()); } } return retval; } /** * helper method to create unknow relationship terms as they are required. These terms will also * be added to the global term storage for persistence with the ontology. * * @param relationType - the string that defines the relationship from the ontology * @return a valid TermBean */ protected TermBean getUnknownRelationTermBean(String relationType) { TermBean retval = null; if (relationType != null) { retval = (TermBean) unknown_relations.get(relationType.trim().toUpperCase()); if (retval == null) { retval = initializeTermBean(relationType.trim(), Loader.RELATION_TYPE); logger.info("Created unkonwn relation type: " + relationType); unknown_relations.put(relationType.trim().toUpperCase(), retval); //add to storage map so it'll get persisted with the rest ontologyTerms.put(retval.getIdentifier(), retval); } } return retval; } /** * internal helper method to create TermPathBeans for a given term. This method will * precompute all paths from a parent to all its children for the 3 major relationship types: * IS_A, PART_OF and DEVELOPS_FROM. The PART_OF and DEVELOPS_FROM relations can traverse IS_A * relations for maximal completeness and still be semantically correct, but IS_A relationships * cannot traverse other relation types. * <pre> * term1 * |_ child1 child1 IS_A term1 * |_ child2 child2 IS_A term1 * subject pred object * </pre> * * @param obj - the OBOEdit term object to extract information from * @param trm - the OLS parent term to link to * @return a Collection of valid TermRelationshipBeans */ private Collection<TermPath> processPaths(OBOObject obj, TermBean trm) { HashSet<TermPath> retval = new HashSet<TermPath>(); HashMap<String, Integer> paths = parser.computeChildPaths(1, IS_A_SET, obj); retval.addAll(createTermPathBeans(paths, Constants.IS_A_RELATION_TYPE_ID, IS_A, trm)); //the part_of relation can traverse is_a relations to generate term_paths //so the set passed to computeChildPaths needs to contain both PART_OF and IS_A labels. HashSet<String> traversingSet = new HashSet<String>(); traversingSet.addAll(PART_OF_SET); traversingSet.addAll(IS_A_SET); paths = parser.computeChildPaths(1, traversingSet, obj); retval.addAll(createTermPathBeans(paths, Constants.PART_OF_RELATION_TYPE_ID, PART_OF, trm)); //the dev_from relation can traverse is_a relations to generate term_paths //so the set passed to computeChildPaths needs to contain both DEV_FROM and IS_A labels. traversingSet.clear(); traversingSet.addAll(DEV_FROM_SET); traversingSet.addAll(IS_A_SET); paths = parser.computeChildPaths(1, traversingSet, obj); retval.addAll(createTermPathBeans(paths, Constants.DEVELOPS_FROM_RELATION_TYPE_ID, DEVELOPS_FROM, trm)); return retval; } /** * Internal method that actually does all the precomputing of paths */ private Collection<TermPath> createTermPathBeans(HashMap<String, Integer> paths, int relationTypeId, TermBean relationBean, TermBean trm) { HashSet<TermPath> retval = new HashSet<TermPath>(); //get the child term from the link //use its ID to lookup in the map we created //if null, continue /* term1 |_ child1 child1 IS_A term1 |_ child2 child2 IS_A term1 subject pred object */ Term objTrm; for (String termId : paths.keySet()) { //key = termID, value = distance int distance = paths.get(termId); objTrm = ontologyTerms.get(termId); if (objTrm != null) { //create bean TermPathBean tpb = new TermPathBean(); //set distance tpb.setDistance(distance); //set subject term tpb.setSubjectTerm(objTrm); //set object tpb.setObjectTerm(trm); //set predicateTerm - is_a, part_of, develops_from tpb.setPredicateTerm(relationBean); //set relationshipType tpb.setRelationshipTypeId((long) relationTypeId); //set ontology tpb.setParentOntology(ontBean); //add to retval retval.add(tpb); } else { logger.debug("No object term found for term path: " + trm.getIdentifier() + "->" + termId); } } return retval; } private String getSynonymTypeDef(int scope) { switch (scope) { case Synonym.EXACT_SYNONYM: return "Exact synonym type"; case Synonym.NARROW_SYNONYM: return "Narrow synonym type"; case Synonym.BROAD_SYNONYM: return "Broad synonym type"; case Synonym.RELATED_SYNONYM: return "Related synonym type"; case Synonym.UNKNOWN_SCOPE: default: return "Unknown synonym type"; } } /** * Builds the synonyms for a given term * * @param obj OBOObject representing the term * @param trm TermBean to link to the TermSynonym objects being created * @return a collection of properly constructed and linked TermSynonymBean objects */ private Collection<TermSynonym> processSynonyms(OBOObject obj, TermBean trm) { HashSet<TermSynonym> retval = new HashSet<TermSynonym>(); //loop over synonyms Set<Synonym> syns = obj.getSynonyms(); int synCount = 1; for (Synonym aSyn : syns) { TermSynonymBean tsb = new TermSynonymBean(); //link parent term tsb.setParentTerm(trm); String synVal = safeTrim(aSyn.getText()); if (synVal != null) { //set value tsb.setSynonym(synVal); if (synVal.length() > 2000) { logger.warn("synonym value longer than allowed database column length - truncating " + trm.getIdentifier()); tsb.setSynonym(tsb.getSynonym().substring(0, 2000)); } //check to see if there's a defined synonymType for it if (aSyn.getSynonymType() != null) { logger.debug("using user-defined synonym type: " + aSyn.getSynonymType().getName()); Term synTrm = ontologyTerms.get(aSyn.getSynonymType().getID()); if (synTrm != null) { tsb.setSynonymType(synTrm); } else { throw new IllegalStateException( "Attempting to use user-defined synonym type that has not been initialized in common objects: " + aSyn.getSynonymType().getID()); } } else { //logger.debug("using old-style synonym types"); //link synonymType Term switch (aSyn.getScope()) { case Synonym.EXACT_SYNONYM: tsb.setSynonymType(EXACT); break; case Synonym.NARROW_SYNONYM: tsb.setSynonymType(NARROW); break; case Synonym.BROAD_SYNONYM: tsb.setSynonymType(BROAD); break; case Synonym.RELATED_SYNONYM: tsb.setSynonymType(RELATED); break; case Synonym.UNKNOWN_SCOPE: default: tsb.setSynonymType(SYNONYM); break; } } Collection<Dbxref> oboSynXrefs = aSyn.getXrefs(); if (oboSynXrefs != null) { Collection<DbXref> xrefs = new HashSet<DbXref>(); for (Dbxref xref : oboSynXrefs) { xrefs.add(createDbXref(xref, Dbxref.RELATED_SYNONYM)); } tsb.setSynonymXrefs(xrefs); } //set synonym primary key because it is no longer being set by OJB //use syncount to avoid tsb hashcode collisions for a single trm tsb.setSynonymPk(SHORT_NAME + (synCount++ * tsb.hashCode()) + "!" + trm.getTermPk()); if (tsb.getSynonymPk().length() > 255) { throw new IllegalStateException( "synonym PK longer than allowed database column length: " + tsb.getSynonymPk()); } //add TermSynonym to retval collection retval.add(tsb); } else { logger.debug("Null Synonym value encountered for " + trm.getIdentifier()); } } //check for alt_ids Set<String> altIDs = obj.getSecondaryIDs(); String altID; for (Iterator<String> i = altIDs.iterator(); i.hasNext();) { altID = i.next(); TermSynonymBean tsb = new TermSynonymBean(); //link parent term tsb.setParentTerm(trm); //set def tsb.setSynonym(safeTrim(altID)); if (altID != null && altID.length() > 2000) { logger.warn("synonym value longer than allowed database column length - truncating " + trm.getIdentifier()); tsb.setSynonym(tsb.getSynonym().substring(0, 2000)); } //set synType tsb.setSynonymType(ALT_ID); //set synonym primary key because it is no longer being set by OJB //use syncount to avoid tsb hashcode collisions for a single trm tsb.setSynonymPk(SHORT_NAME + (synCount++ * tsb.hashCode()) + "!" + trm.getTermPk()); if (tsb.getSynonymPk().length() > 255) { throw new IllegalStateException( "synonym PK longer than allowed database column length: " + tsb.getSynonymPk()); } //add TermSynonym to retval collection retval.add(tsb); } //if main termID is URL, eg http://www.ebi.ac.uk/EFO_1234 //add synonym with just final portion of URL, eg EFO_1234 try { if (trm.getIdentifier().toLowerCase().startsWith("http:")) { altID = trm.getIdentifier().substring(trm.getIdentifier().lastIndexOf("/") + 1); TermSynonymBean tsb = new TermSynonymBean(); //link parent term tsb.setParentTerm(trm); //set def tsb.setSynonym(safeTrim(altID)); if (altID != null && altID.length() > 2000) { logger.warn("synonym value longer than allowed database column length - truncating " + trm.getIdentifier()); tsb.setSynonym(tsb.getSynonym().substring(0, 2000)); } //set synType tsb.setSynonymType(ALT_ID); //set synonym primary key because it is no longer being set by OJB //use syncount to avoid tsb hashcode collisions for a single trm tsb.setSynonymPk(SHORT_NAME + (synCount++ * tsb.hashCode()) + "!" + trm.getTermPk()); if (tsb.getSynonymPk().length() > 255) { throw new IllegalStateException( "synonym PK longer than allowed database column length: " + tsb.getSynonymPk()); } //add TermSynonym to retval collection retval.add(tsb); } } catch (IndexOutOfBoundsException e) { logger.debug("Could not create alt_id from URL from term: " + trm.getIdentifier()); } return retval; } /** * for debugging */ protected void dumpOntology() { for (String id : getRootTerms()) { logger.debug("Root term: " + id); dumpTerm(ontologyTerms.get(id), ""); } } /** * for debugging */ protected void dumpOntologyStats() { int ts = 0, tp = 0, tr = 0, ta = 0; for (Term tb : ontologyTerms.values()) { if (tb.getSynonyms() != null) { ts += tb.getSynonyms().size(); } if (tb.getPaths() != null) { tp += tb.getPaths().size(); } if (tb.getRelationships() != null) { tr += tb.getRelationships().size(); } if (tb.getAnnotations() != null) { ta += tb.getAnnotations().size(); } } logger.info("Number of terms: " + ontologyTerms.size()); logger.info("Number of synonyms: " + ts); logger.info("Number of relationships: " + tr); logger.info("Number of paths: " + tp); logger.info("Number of annotations: " + ta); } /** * for debugging * * @param term - term to dump * @param indent - spaces to indent */ protected void dumpTerm(Term term, String indent) { if (indent.length() > 15) { return; } if (term != null) { logger.debug(indent + "id: " + term.getIdentifier()); logger.debug(indent + "name: " + term.getName()); if (term.getSynonyms() != null) logger.debug(indent + "nb syn: " + term.getSynonyms().size()); if (term.getAnnotations() != null) logger.debug(indent + "nb annot: " + term.getAnnotations().size()); if (term.getRelationships() != null) { for (TermRelationship tr : term.getRelationships()) { String relationStr = indent + tr.getSubjectTerm().getName() + " " + tr.getPredicateTerm().getName() + " " + tr.getObjectTerm().getName(); logger.debug(relationStr); dumpTerm(tr.getSubjectTerm(), indent + " "); } } } } /** * takes a string and trims whitespace. if resulting string is empty, return null; * * @param inStr - string to trim. if null, return null; */ protected String safeTrim(String inStr) { if (inStr != null) { String tmp = inStr.trim(); if (tmp.length() > 0) { return tmp; } else { return null; } } else { return null; } } /** * returns the OntologyAccess that has been loaded from file. * * @return returns the OntologyAccess that has been loaded from file. * @throws IllegalStateException if the bean has not been properly initialized. */ public Ontology getOntology() throws IOException { if (ontBean != null) return ontBean; else { throw new IllegalStateException( "OntologyAccess bean not properly initialized. Did you call the proper sequence of methods: configure(), parse(), process()?"); } } }