com.act.biointerpretation.networkanalysis.MetabolismNetwork.java Source code

Java tutorial

Introduction

Here is the source code for com.act.biointerpretation.networkanalysis.MetabolismNetwork.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.biointerpretation.networkanalysis;

import act.server.DBIterator;
import act.server.MongoDB;
import act.shared.Reaction;
import com.act.biointerpretation.l2expansion.L2Prediction;
import com.act.biointerpretation.l2expansion.L2PredictionCorpus;
import com.act.workflow.tool_manager.workflow.workflow_mixins.mongo.ReactionKeywords;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.jacob.com.NotImplementedException;
import org.apache.commons.lang.mutable.MutableInt;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONObject;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Represents a metabolism network, cataloging all possible predicted chemical transformations that could be happening
 * in a given sample.
 */
public class MetabolismNetwork implements ImmutableNetwork {

    private static transient final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

    private static final Logger LOGGER = LogManager.getFormatterLogger(MetabolismNetwork.class);

    private static final String ORG_FIELD = ReactionKeywords.ORGANISM$.MODULE$.toString();

    // Map from inchis to nodes.
    // TODO: generalize to case when we no longer exclusively use inchis
    @JsonProperty("nodes")
    List<NetworkNode> nodes;

    @JsonProperty("edges")
    List<NetworkEdge> edges;

    @JsonIgnore
    Map<Integer, NetworkNode> UIDIndex;
    @JsonIgnore
    Map<String, NetworkNode> inchiIndex;

    @JsonCreator
    private MetabolismNetwork(@JsonProperty("nodes") List<NetworkNode> nodes,
            @JsonProperty("edges") List<NetworkEdge> edges) {
        this();
        nodes.forEach(this::addNode);
        edges.forEach(this::addEdge);
    }

    public MetabolismNetwork() {
        nodes = new ArrayList<>();
        edges = new ArrayList<>();
        UIDIndex = new HashMap<>();
        inchiIndex = new HashMap<>();
    }

    @Override
    public NetworkNode getNodeByUID(Integer uid) {
        NetworkNode result = UIDIndex.get(uid);
        if (result == null) {
            throw new IllegalArgumentException("Node with given UID not found!");
        }
        return result;
    }

    @Override
    public Optional<NetworkNode> getNodeOptionByUID(Integer uid) {
        return Optional.ofNullable(UIDIndex.get(uid));
    }

    @Override
    public NetworkNode getNodeByInchi(String inchi) {
        NetworkNode result = inchiIndex.get(inchi);
        if (result == null) {
            throw new IllegalArgumentException("Didn't find node with inchi " + inchi);
        }
        return result;
    }

    @Override
    public Optional<NetworkNode> getNodeOptionByInchi(String inchi) {
        return Optional.ofNullable(inchiIndex.get(inchi));
    }

    @Override
    public List<NetworkNode> getNodesByMass(Double mass, Double massTolerance) {
        throw new NotImplementedException("Mass indexing not yet implemented.");
    }

    @JsonIgnore
    @Override
    public Collection<NetworkNode> getNodes() {
        return Collections.unmodifiableCollection(nodes);
    }

    /**
     * Get all edges from the graph.
     *
     * @return An unmodifiable collection of the graph's edges.
     */
    public Collection<NetworkEdge> getEdges() {
        return Collections.unmodifiableCollection(edges);
    }

    @Override
    public Set<NetworkNode> getSubstrates(NetworkEdge edge) {
        return edge.getSubstrates().stream().map(this::getNodeByUID).collect(Collectors.toSet());
    }

    @Override
    public Set<NetworkNode> getProducts(NetworkEdge edge) {
        return edge.getProducts().stream().map(this::getNodeByUID).collect(Collectors.toSet());
    }

    /**
     * Get all nodes that are one step forward from this node. These are predicted products of reactions that have this
     * node as a substrate.
     *
     * @param node The starting node.
     * @return The list of potential product nodes.
     */
    @Override
    public List<NetworkNode> getDerivatives(NetworkNode node) {
        List<NetworkNode> derivatives = new ArrayList<>();
        for (NetworkEdge edge : node.getOutEdges()) {
            edge.getProducts().forEach(p -> derivatives.add(getNodeByUID(p)));
        }
        return derivatives;
    }

    /**
     * Get all nodes that are one step before this node. These are substrates of reactions that are predicted to produce
     * this node as a product.
     *
     * @param node The starting node.
     * @return The list of potential substrate nodes.
     */
    @Override
    public List<NetworkNode> getPrecursors(NetworkNode node) {
        List<NetworkNode> precursors = new ArrayList<>();
        for (NetworkEdge edge : node.getInEdges()) {
            edge.getSubstrates().forEach(s -> precursors.add(getNodeByUID(s)));
        }
        return precursors;
    }

    /**
     * Trace the pathway back from the given startNode for up to numSteps steps, and return the subgraph of all
     * precursors found.  This is intended to supply explanatory pathways for the input node.
     *
     * @param startNode The node to explain.
     * @param numSteps The number of steps back from the node to search.
     * @return A report representing the precursors of the given starting metabolite.
     */
    public PrecursorReport getPrecursorReport(NetworkNode startNode, int numSteps) {
        if (numSteps <= 0) {
            throw new IllegalArgumentException("Precursor graph is only well-defined for numSteps > 0");
        }

        MetabolismNetwork subgraph = new MetabolismNetwork();
        Map<NetworkNode, Integer> levelMap = new HashMap<>();
        Set<NetworkNode> frontier = new HashSet<>();
        frontier.add(startNode);
        levelMap.put(startNode, 0);

        for (MutableInt l = new MutableInt(1); l.toInteger() <= numSteps; l.increment()) {
            // Get edges leading into the derivative frontier
            List<NetworkEdge> edges = frontier.stream().flatMap(n -> n.getInEdges().stream())
                    .collect(Collectors.toList());
            // Add all of the nodes adjacent to the edges, and the edges themselves, to the subgraph
            edges.forEach(e -> this.getSubstrates(e).forEach(subgraph::addNode));
            edges.forEach(e -> this.getProducts(e).forEach(subgraph::addNode));
            edges.forEach(subgraph::addEdge);
            // Calculate new frontier, excluding already-labeled nodes to avoid cycles
            frontier = edges.stream().flatMap(e -> this.getSubstrates(e).stream()).collect(Collectors.toSet());
            frontier.removeIf(levelMap::containsKey);
            // Label remaining nodes with appropriate level.
            frontier.forEach(n -> levelMap.put(n, l.toInteger()));
        }

        return new PrecursorReport(startNode.getMetabolite(), subgraph, levelMap);
    }

    /**
     * Load all reactions from a given DB into the network.
     *
     * @param db The DB.
     */
    public void loadAllEdgesFromDb(MongoDB db) {
        DBIterator iterator = db.getIteratorOverReactions();
        Reaction reaction;
        int count = 0;
        while ((reaction = db.getNextReaction(iterator)) != null) {
            this.addEdgeFromReaction(db, reaction);
            if (count % 1000 == 0) {
                LOGGER.info("Processed %d reactions.", count);
            }
            count++;
        }
    }

    /**
     * Loads an edge from a DB reaction.
     * TODO: optimize number of DB calls made so that this will run faster.
     *
     * @return The added edge if any, or null if the reaction's substrates or products were empty.
     */
    private NetworkEdge addEdgeFromReaction(MongoDB db, Reaction reaction) {
        List<Long> substrateIds = Arrays.asList(reaction.getSubstrates());
        List<String> substrates = new ArrayList<>();
        for (Long s : substrateIds) {
            String inchi = db.getChemicalFromChemicalUUID(s).getInChI();
            for (int i = 0; i < denullCoeff(reaction.getSubstrateCoefficient(s)); i++) {
                substrates.add(inchi);
            }
        }

        List<Long> productIds = Arrays.asList(reaction.getProducts());
        List<String> products = new ArrayList<>();
        for (Long p : productIds) {
            String inchi = db.getChemicalFromChemicalUUID(p).getInChI();
            for (int i = 0; i < denullCoeff(reaction.getProductCoefficient(p)); i++) {
                products.add(inchi);
            }
        }

        if (substrates.isEmpty() || products.isEmpty()) {
            return null;
        }

        NetworkEdge edge = addEdgeFromInchis(substrates, products);
        edge.addReactionId(reaction.getUUID());

        for (JSONObject protein : reaction.getProteinData()) {
            if (protein.has(ORG_FIELD)) {
                edge.addOrg(db.getOrganismNameFromId(protein.getLong(ORG_FIELD)));
            }
        }

        return edge;
    }

    /**
     * Assumes any coefficient which is null should be 1. Null coefficients were given NullPointerExceptions previously.
     *
     * @param coeffOrNull The Integer value directly from the DB.
     * @return The input value if not null; otherwise 1.
     */
    private Integer denullCoeff(Integer coeffOrNull) {
        if (coeffOrNull == null) {
            return 1;
        }
        return coeffOrNull;
    }

    /**
     * Loads all predictions from a prediction corpus into the network as edges.
     *
     * @param predictionCorpus
     */
    public void loadPredictions(L2PredictionCorpus predictionCorpus) {
        predictionCorpus.getCorpus().forEach(prediction -> loadEdgeFromPrediction(prediction));
    }

    /**
     * Loads a single prediction into the graph as an edge or edges.
     *
     * @param prediction The prediction to load.
     */
    public void loadEdgeFromPrediction(L2Prediction prediction) {
        List<String> substrates = prediction.getSubstrateInchis();
        List<String> products = prediction.getProductInchis();

        NetworkEdge edge = addEdgeFromInchis(substrates, products);
        edge.addProjectorName(prediction.getProjectorName());
    }

    /**
     * Adds an edge; assumes all nodes pointed to by the edge exist
     */
    public NetworkEdge addEdge(NetworkEdge edge) {
        edge.getSubstrates().forEach(s -> getNodeByUID(s).addOutEdge(edge));
        edge.getProducts().forEach(p -> getNodeByUID(p).addInEdge(edge));
        edges.add(edge);
        return edge;
    }

    /**
     * Adds a given edge to the graph. Creates new nodes from inchis where there aren't already existing nodes.
     * First, adds the substrate and product nodes to the graph, if they don't already exist.
     * Then, checks for an already existing edge with the same substrate and product; if such an edge exists, this edge's
     * auxiliary data is merged into the already existing edge.  If no such edge exists, a new edge is added.
     *
     * @return The added edge.
     */
    public NetworkEdge addEdgeFromInchis(List<String> substrates, List<String> products) {
        List<Integer> sNodes = substrates.stream().map(this::createOrGetNodeFromInchi).map(NetworkNode::getUID)
                .collect(Collectors.toList());
        List<Integer> pNodes = products.stream().map(this::createOrGetNodeFromInchi).map(NetworkNode::getUID)
                .collect(Collectors.toList());

        NetworkEdge edge = new NetworkEdge(sNodes, pNodes);

        List<NetworkEdge> equivalentEdges = getNodeByUID(sNodes.get(0)).getOutEdges().stream()
                .filter(e -> e.hasSameSubstratesAndProducts(edge)).collect(Collectors.toList());
        if (equivalentEdges.size() > 1) {
            // Should be at most one edge with a given substrate, product pair
            throw new IllegalStateException("Two edges with same substrates and products found in the same graph");
        }

        if (equivalentEdges.isEmpty()) { // If no equivalent edge exists, add the new edge
            return addEdge(new NetworkEdge(sNodes, pNodes));
        } else { // If there is an equivalent edge, merge the data into that edge.
            return equivalentEdges.get(0).merge(edge);
        }
    }

    /**
     * Checks if a node with a given inchi is already in the map.  If so, returns the node. If not, creates a new node
     * with that inchi and returns it.
     * TODO: generalize this to handle metabolites rather than just inchis
     *
     * @param inchi The inchi.
     * @return The node.
     */
    private NetworkNode createOrGetNodeFromInchi(String inchi) {
        NetworkNode node = inchiIndex.get(inchi);
        if (node == null) {
            return addNode(new NetworkNode(new InchiMetabolite(inchi)));
        }
        return node;
    }

    /**
     * Adds a node to network if its UID is unique. If a node already exists with this UID, returns the existing node
     * without modifying the graph.
     *
     * @param node The node to add.
     * @return The node added, or the existing node.
     */
    public NetworkNode addNode(NetworkNode node) {
        if (UIDIndex.get(node.getUID()) != null) {
            return UIDIndex.get(node.getUID());
        }
        nodes.add(node);
        UIDIndex.put(node.getUID(), node);
        node.getMetabolite().getStructure().ifPresent(s -> inchiIndex.put(s.getInchi(), node));
        return node;
    }

    public void writeToJsonFile(File outputFile) throws IOException {
        try (BufferedWriter predictionWriter = new BufferedWriter(new FileWriter(outputFile))) {
            OBJECT_MAPPER.writeValue(predictionWriter, this);
        }
    }

    public static MetabolismNetwork getNetworkFromJsonFile(File inputFile) throws IOException {
        return OBJECT_MAPPER.readValue(inputFile, MetabolismNetwork.class);
    }
}