act.installer.brenda.BrendaChebiOntology.java Source code

Java tutorial

Introduction

Here is the source code for act.installer.brenda.BrendaChebiOntology.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package act.installer.brenda;

import act.server.DBIterator;
import act.server.MongoDB;
import act.shared.Chemical;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.annotation.JsonProperty;

import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

public class BrendaChebiOntology {

    private static final Logger LOGGER = LogManager.getFormatterLogger(BrendaChebiOntology.class);
    private static final int IS_SUBTYPE_OF_RELATIONSHIP_TYPE = 1;
    private static final int HAS_ROLE_RELATIONSHIP_TYPE = 12;

    private static ObjectMapper mapper = new ObjectMapper();

    // This ChEBI ID corresponds to the ontology 'Application' which is a top-level role.
    // The method getApplications then traverses the ontologies down from this ontology.
    // The effect is to consider only roles that are applications, defined in the user manual as 'classifying [entities]
    // on the basis of their intended use by humans'.
    private static final String APPLICATION_CHEBI_ID = "CHEBI:33232";

    /**
     * The ChebiOntology class holds an ontology, defined as an ID (the ChEBI ID, for example 'CHEBI:16708' for adenine),
     * a term holding a one-word definition and a longer definition.
     * These are queried from 2 different tables in the Brenda database: ontology_chebi_{Definitions,Terms}
     * We use a workaround (see http://stackoverflow.com/questions/4796872/full-outer-join-in-mysql) to mimic the
     * full outer join in MySQL. That allows us to merge information in both table irrespective of the presence of
     * an ontology in one or the other.
     */
    public static class ChebiOntology {

        // The following query allows to retrieve the terms (basic string defining an ontology) and definitions
        // (when it exists) corresponding to a ChEBI id (ex: "CHEBI:46195") to create ChebiOntology objects.
        public static final String QUERY = StringUtils.join(new String[] { "SELECT", "  terms.id_go,",
                "  terms.term,", "  definitions.definition", "FROM ontology_chebi_Terms terms",
                "LEFT OUTER JOIN ontology_chebi_Definitions definitions", "ON terms.id_go = definitions.id_go",
                "UNION", "SELECT", "  definitions.id_go,", "  terms.term,", "  definitions.definition",
                "FROM ontology_chebi_Terms terms", "RIGHT OUTER JOIN ontology_chebi_Definitions definitions",
                "ON terms.id_go = definitions.id_go" }, " ");

        @JsonProperty("chebi_id")
        private String chebiId;

        @JsonProperty("term")
        private String term;

        @JsonProperty("definition")
        private String definition;

        public ChebiOntology(String chebiId, String term, String definition) {
            this.chebiId = chebiId;
            this.term = term;
            this.definition = definition;
        }

        public String getChebiId() {
            return this.chebiId;
        }

        public String getTerm() {
            return this.term;
        }

        public String getDefinition() {
            return this.definition;
        }

        // We override the equals and hashCode methods to make a ChebiOntology object hashable and allow construction of
        // HashSet and HashMap of ChebiOntology objects.
        @Override
        public boolean equals(Object o) {
            if (this == o)
                return true;
            if (o == null || getClass() != o.getClass())
                return false;

            ChebiOntology that = (ChebiOntology) o;
            return (chebiId != null) ? chebiId.equals(that.chebiId) : (that.chebiId == null);
        }

        @Override
        public int hashCode() {
            int result = chebiId.hashCode();
            return result;
        }

        /* This function creates a ChebiOntology object from a ResultSet resulting from a SQL query.
         * It pulls the 3 first fields from the query, assuming the order:
         * ChebiId,
         * Term,
         * Definition
         */
        public static ChebiOntology fromResultSet(ResultSet resultSet) throws SQLException {
            return new ChebiOntology(resultSet.getString(1), resultSet.getString(2), resultSet.getString(3));
        }

        public BasicDBObject toBasicDBObject() {
            BasicDBObject o = new BasicDBObject();
            o.put("chebi_id", getChebiId());
            o.put("term", getTerm());
            o.put("definition", getDefinition());
            return o;
        }

    }

    public static class ChebiRelationship {

        // The following query allows to retrieve the relations of a given type, passed as argument.
        // It is restricted to ids starting with the string 'CHEBI:'
        public static final String QUERY = StringUtils
                .join(new String[] { "SELECT", " id_go,", " rel_id_go", "FROM ontology_chebi_Relations",
                        "WHERE type = ?", "AND id_go like 'CHEBI:%'", "AND rel_id_go like 'CHEBI:%'" }, " ");

        public static void bindType(PreparedStatement stmt, Integer relationshipType) throws SQLException {
            stmt.setInt(1, relationshipType);
        }

        private String chebiId;
        private String parentChebiId;

        public ChebiRelationship(String chebiId, String parentChebiId) {
            this.chebiId = chebiId;
            this.parentChebiId = parentChebiId;
        }

        public String getChebiId() {
            return chebiId;
        }

        public String getParentChebiId() {
            return parentChebiId;
        }

        /* This function creates a ChebiOntology object from a ResultSet resulting from a SQL query.
         * It pulls the 3 first fields from the query, assuming the order:
         * chebiId,
         * parentChebiId
         * if type = 1, chebiId refers to a subtype of the ontology parentChebiId
         * if type = 12, parentChebiId refers to a role of the ontology chebiId
         */
        public static ChebiRelationship fromResultSet(ResultSet resultSet) throws SQLException {
            return new ChebiRelationship(resultSet.getString(1), resultSet.getString(2));
        }
    }

    public static class ChebiApplicationSet {

        @JsonProperty("direct_applications")
        private Set<ChebiOntology> directApplications;

        @JsonProperty("main_applications")
        private Set<ChebiOntology> mainApplications;

        public ChebiApplicationSet(Set<ChebiOntology> directApplications, Set<ChebiOntology> mainApplications) {
            this.directApplications = directApplications;
            this.mainApplications = mainApplications;
        }

        public Set<ChebiOntology> getMainApplications() {
            return mainApplications;
        }

        public Set<ChebiOntology> getDirectApplications() {
            return directApplications;
        }

        public BasicDBObject toBasicDBObject() {

            BasicDBList directApplications = new BasicDBList();
            BasicDBList mainApplications = new BasicDBList();

            getDirectApplications()
                    .forEach(directApplication -> directApplications.add(directApplication.toBasicDBObject()));
            getMainApplications()
                    .forEach(mainApplication -> mainApplications.add(mainApplication.toBasicDBObject()));

            return new BasicDBObject().append("direct_applications", directApplications).append("main_applications",
                    mainApplications);
        }
    }

    /**
     * This function fetches an ontology map (ChebiId -> ChebiOntology) given a connexion to the BRENDA DB.
     * @param brendaDB A SQLConnexion object to the BRENDA DB
     * @return a map from ChebiId to ChebiOntology objects
     * @throws SQLException
     */
    public static Map<String, ChebiOntology> fetchOntologyMap(SQLConnection brendaDB) throws SQLException {
        int ontologiesProcessed = 0;

        Map<String, ChebiOntology> ontologyMap = new HashMap<>();

        Iterator<ChebiOntology> ontologies = brendaDB.getChebiOntologies();

        while (ontologies.hasNext()) {
            ChebiOntology ontology = ontologies.next();
            // We should not see collisions with the ChEBI ID as key.
            // The number of distinct ChEBI ID in the DB is the same as the number of rows.
            ontologyMap.put(ontology.getChebiId(), ontology);
            ontologiesProcessed++;
        }
        LOGGER.debug("Done processing ontologies");
        LOGGER.debug("Found %d ontologies", ontologiesProcessed);

        return ontologyMap;
    }

    /**
     * This function fetches relationships of type 'isSubTypeOf' between ChebiID given a connexion to the
     * BRENDA DB.
     * @param brendaDB a SQLConnexion object to the BRENDA DB
     * @return a map from a ChEBI ID (String) to a set of its subtypes' ChEBI ID.
     * @throws SQLException
     */
    public static Map<String, Set<String>> fetchIsSubtypeOfRelationships(SQLConnection brendaDB)
            throws SQLException {

        // Initializations
        int relationshipsProcessed = 0;
        Map<String, Set<String>> isSubtypeOfRelationships = new HashMap<>();

        // Get an iterator over all Chebi relationships of type "is subtype of".
        Iterator<ChebiRelationship> relationships = brendaDB.getChebiRelationships(IS_SUBTYPE_OF_RELATIONSHIP_TYPE);

        while (relationships.hasNext()) {
            ChebiRelationship relationship = relationships.next();

            // Get child and parent chebi id
            String parentChebiId = relationship.getParentChebiId();
            String childChebiId = relationship.getChebiId();

            // Add child to the set of existing child ontologies
            Set<String> childchebiIds = isSubtypeOfRelationships.get(parentChebiId);
            if (childchebiIds == null) {
                childchebiIds = new HashSet<>();
                isSubtypeOfRelationships.put(parentChebiId, childchebiIds);
            }
            childchebiIds.add(childChebiId);
            relationshipsProcessed++;
        }

        LOGGER.debug("Done processing 'is subtype of' relationships");
        LOGGER.debug("Found %d 'is subtype of' relationships", relationshipsProcessed);

        return isSubtypeOfRelationships;
    }

    /**
     * This function fetches relationships of type 'hasRole' between ChebiID objects given a connexion to the
     * BRENDA DB.
     * @param brendaDB a SQLConnexion object to the BRENDA DB
     * @return a map from a ChEBI ID (String) to a set of its roles' ChEBI ID.
     * @throws SQLException
     */
    public static Map<String, Set<String>> fetchHasRoleRelationships(SQLConnection brendaDB) throws SQLException {

        // Initializations
        int relationshipsProcessed = 0;
        Map<String, Set<String>> hasRoleRelationships = new HashMap<>();

        // Get an iterator over all Chebi relationships of type "has role".
        Iterator<ChebiRelationship> relationships = brendaDB.getChebiRelationships(HAS_ROLE_RELATIONSHIP_TYPE);

        while (relationships.hasNext()) {
            // For each relationship "has role", we have a child and a parent chebi ids.
            // We call the child the "base chebi id" and the parent the "role chebi id"
            ChebiRelationship relationship = relationships.next();

            String roleChebiId = relationship.getParentChebiId();
            String baseChebiId = relationship.getChebiId();

            // Get the existing set of roles for the chebi id of interest
            Set<String> roles = hasRoleRelationships.get(baseChebiId);
            if (roles == null) {
                roles = new HashSet<>();
                hasRoleRelationships.put(baseChebiId, roles);
            }
            // Add the role the existing set
            roles.add(roleChebiId);
            relationshipsProcessed++;
        }

        LOGGER.debug("Done processing 'has role' relationships");
        LOGGER.debug("Found %s 'has role' relationships", relationshipsProcessed);

        return hasRoleRelationships;
    }

    /**
     * This method processes relatioships "is subtype of" to produce a mapping between each application and its main
     * application, used subsequently (outside of this) to compute each ontology's main application.
     * @param isSubtypeOfRelationships map {chebi id -> subtype's chebi ids}
     * @param applicationChebiId main application's chebi id
     * @return a map {application's chebi id -> related main application's chebi ids}
     */
    public static Map<String, Set<String>> getApplicationToMainApplicationsMap(
            Map<String, Set<String>> isSubtypeOfRelationships, String applicationChebiId) {

        // Compute the set of main applications. These are the ontologies that are subtypes of the ontology 'application'.
        Set<String> mainApplicationsChebiId = isSubtypeOfRelationships.get(applicationChebiId);

        // Compute the initial list of applications to visit from the set of main applications.
        ArrayList<String> applicationsToVisit = new ArrayList<>(mainApplicationsChebiId);

        // For each main application, map it to a set containing only itself.
        Map<String, Set<String>> applicationToMainApplicationsMap = applicationsToVisit.stream()
                .collect(Collectors.toMap(e -> e, Collections::singleton));

        // Then visit all applications in a BFS fashion, appending new applications to visit to the applicationsToVisit
        // and propagating/merging the set of main applications as we progress down the relationship graph.
        int currentIndex = 0;
        while (currentIndex < applicationsToVisit.size()) {

            String currentApplication = applicationsToVisit.get(currentIndex);
            Set<String> subApplications = isSubtypeOfRelationships.get(currentApplication);

            if (subApplications != null) {
                // add all sub-applications to the set of applications to visit
                applicationsToVisit.addAll(subApplications);
                for (String subApplication : subApplications) {
                    Set<String> mainApplicationsSet = applicationToMainApplicationsMap.get(subApplication);
                    if (mainApplicationsSet == null) {
                        mainApplicationsSet = new HashSet<>();
                        applicationToMainApplicationsMap.put(subApplication, mainApplicationsSet);
                    }
                    mainApplicationsSet.addAll(applicationToMainApplicationsMap.get(currentApplication));
                }
            }
            currentIndex++;
        }

        return applicationToMainApplicationsMap;
    }

    /**
     * This function fetches and construct the set of main and direct applications for each ontology that has a role.
     * @param ontologyMap map {chebi id -> ChebiOntology object}
     * @param isSubtypeOfRelationships map {chebi id -> set of chebi id for its subtypes}
     * @param hasRoleRelationships map {chebi id -> set of chebi id for its roles}
     * @return a map from ChebiOntology objects to a ChebiApplicationSet object
     */
    public static Map<ChebiOntology, ChebiApplicationSet> getApplications(Map<String, ChebiOntology> ontologyMap,
            Map<String, Set<String>> isSubtypeOfRelationships, Map<String, Set<String>> hasRoleRelationships) {

        Map<String, Set<String>> applicationToMainApplicationsMap = getApplicationToMainApplicationsMap(
                isSubtypeOfRelationships, APPLICATION_CHEBI_ID);

        // Filter out the roles that are not applications
        Map<String, Set<String>> directApplicationMap = new HashMap<>();
        hasRoleRelationships.forEach((key, value) -> directApplicationMap.put(key,
                value.stream().filter(ontology -> applicationToMainApplicationsMap.keySet().contains(ontology))
                        .collect(Collectors.toSet())));

        // Compute the set of main applications for each ontology that has a role (aka is a chemical entity).
        Map<ChebiOntology, Set<ChebiOntology>> chemicalEntityToMainApplicationMap = new HashMap<>();
        for (String chemicalEntity : directApplicationMap.keySet()) {

            Set<ChebiOntology> mainApplicationsSet = chemicalEntityToMainApplicationMap
                    .get(ontologyMap.get(chemicalEntity));
            if (mainApplicationsSet == null) {
                mainApplicationsSet = new HashSet<>();
                chemicalEntityToMainApplicationMap.put(ontologyMap.get(chemicalEntity), mainApplicationsSet);
            }
            for (String parentApplication : directApplicationMap.get(chemicalEntity)) {
                Set<String> mainApplications = applicationToMainApplicationsMap.get(parentApplication);
                if (mainApplications != null) {
                    mainApplicationsSet.addAll(mainApplications.stream().map(ontologyMap::get)
                            .filter(Objects::nonNull).collect(Collectors.toSet()));
                }
            }
        }

        // Finally, construct a ChebiApplicationSet object containing direct and main applications for the molecules.
        Map<ChebiOntology, ChebiApplicationSet> chemicalEntityToApplicationsMap = new HashMap<>();
        for (String chemicalEntity : directApplicationMap.keySet()) {
            Set<ChebiOntology> directApplications = directApplicationMap.get(chemicalEntity).stream()
                    .map(ontologyMap::get).filter(Objects::nonNull).collect(Collectors.toSet());
            Set<ChebiOntology> mainApplications = chemicalEntityToMainApplicationMap
                    .get(ontologyMap.get(chemicalEntity));
            if (directApplications.size() > 0 || mainApplications.size() > 0) {
                ChebiApplicationSet applications = new ChebiApplicationSet(directApplications, mainApplications);
                chemicalEntityToApplicationsMap.put(ontologyMap.get(chemicalEntity), applications);
            }
        }

        return chemicalEntityToApplicationsMap;
    }

    /**
     * This function contains the main logic for adding ChEBI applications to the Installer database.
     * Provided with a connexion to both the Mongo instance on which the database "actv01" lives and a SQL connexion to
     * Brenda to retrieve the application sets corresponding to each ChEBI chemical.
     * @param db a MongoDB object representing the connexion to the main MongoDB instance
     * @param brendaDB a SQLConnexion to the Brenda database, on which to find the ChEBI ontologies and relationships
     * @throws SQLException
     * @throws IOException
     */
    public void addChebiApplications(MongoDB db, SQLConnection brendaDB) throws SQLException, IOException {

        // Get the ontology map (ChebiId -> ChebiOntology object)
        Map<String, ChebiOntology> ontologyMap = fetchOntologyMap(brendaDB);
        LOGGER.info("Done fetching ontology map: ChEBI ID -> ontology object (id, term, definition)");

        // Get relationships of type 'isSubtypeOf'
        Map<String, Set<String>> isSubtypeOfRelationships = fetchIsSubtypeOfRelationships(brendaDB);
        LOGGER.info("Done fetching 'is subtype of' relationships");

        // Get relationships of type 'hasRole'
        Map<String, Set<String>> hasRoleRelationships = fetchHasRoleRelationships(brendaDB);
        LOGGER.info("Done fetching 'has role' relationships");

        // Get the applications for all chemical entities
        Map<ChebiOntology, ChebiApplicationSet> chemicalEntityToApplicationsMap = getApplications(ontologyMap,
                isSubtypeOfRelationships, hasRoleRelationships);
        LOGGER.info("Done computing applications");

        DBIterator chemicalsIterator = db.getIteratorOverChemicals();
        // Iterate over all chemicals
        while (chemicalsIterator.hasNext()) {
            Chemical chemical = db.getNextChemical(chemicalsIterator);
            String inchi = chemical.getInChI();
            String chebiId = db.getChebiIDFromInchi(inchi);

            if (chebiId == null || chebiId.equals("")) {
                continue;
            }

            LOGGER.info("Processing Chemical with InChI: %s and ChEBI ID: %s", inchi, chebiId);
            ChebiOntology ontology = ontologyMap.get(chebiId);
            ChebiApplicationSet applicationSet = chemicalEntityToApplicationsMap.get(ontology);
            if (applicationSet == null) {
                LOGGER.debug("Found no applications for %s. Skipping database update for this chemical.", chebiId);
                continue;
            }
            db.updateChemicalWithChebiApplications(chebiId, applicationSet);
        }
    }

    public static void main(String[] args) throws Exception {
        // We provide a proof of concept in this main function. This should later be moved to either a test or removed.

        // Connect to the BRENDA DB
        SQLConnection brendaDB = new SQLConnection();
        brendaDB.connect("127.0.0.1", 3306, "brenda_user", "");

        // Get the ontology map (ChebiId -> ChebiOntology object)
        Map<String, ChebiOntology> ontologyMap = fetchOntologyMap(brendaDB);

        // Get "is subtype of" relationships
        Map<String, Set<String>> isSubTypeOfRelationships = fetchIsSubtypeOfRelationships(brendaDB);

        // Get "has role" relationships
        Map<String, Set<String>> hasRoleRelationships = fetchHasRoleRelationships(brendaDB);

        // Get the applications for all chemical entities
        Map<ChebiOntology, ChebiApplicationSet> chemicalEntityToApplicationsMap = getApplications(ontologyMap,
                isSubTypeOfRelationships, hasRoleRelationships);

        ChebiOntology applicationOntology = ontologyMap.get("CHEBI:46195");

        // Convert ChebiApplicationSet to JSON string and pretty print
        String chebiApplicationSetString = mapper.writerWithDefaultPrettyPrinter()
                .writeValueAsString(chemicalEntityToApplicationsMap.get(applicationOntology));

        System.out.println(chebiApplicationSetString);

        // Disconnect from the BRENDA DB
        brendaDB.disconnect();
    }
}