Java tutorial
/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation.networkanalysis; import act.server.DBIterator; import act.server.MongoDB; import act.shared.Reaction; import com.act.biointerpretation.l2expansion.L2Prediction; import com.act.biointerpretation.l2expansion.L2PredictionCorpus; import com.act.workflow.tool_manager.workflow.workflow_mixins.mongo.ReactionKeywords; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; import com.jacob.com.NotImplementedException; import org.apache.commons.lang.mutable.MutableInt; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.JSONObject; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; /** * Represents a metabolism network, cataloging all possible predicted chemical transformations that could be happening * in a given sample. */ public class MetabolismNetwork implements ImmutableNetwork { private static transient final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final Logger LOGGER = LogManager.getFormatterLogger(MetabolismNetwork.class); private static final String ORG_FIELD = ReactionKeywords.ORGANISM$.MODULE$.toString(); // Map from inchis to nodes. // TODO: generalize to case when we no longer exclusively use inchis @JsonProperty("nodes") List<NetworkNode> nodes; @JsonProperty("edges") List<NetworkEdge> edges; @JsonIgnore Map<Integer, NetworkNode> UIDIndex; @JsonIgnore Map<String, NetworkNode> inchiIndex; @JsonCreator private MetabolismNetwork(@JsonProperty("nodes") List<NetworkNode> nodes, @JsonProperty("edges") List<NetworkEdge> edges) { this(); nodes.forEach(this::addNode); edges.forEach(this::addEdge); } public MetabolismNetwork() { nodes = new ArrayList<>(); edges = new ArrayList<>(); UIDIndex = new HashMap<>(); inchiIndex = new HashMap<>(); } @Override public NetworkNode getNodeByUID(Integer uid) { NetworkNode result = UIDIndex.get(uid); if (result == null) { throw new IllegalArgumentException("Node with given UID not found!"); } return result; } @Override public Optional<NetworkNode> getNodeOptionByUID(Integer uid) { return Optional.ofNullable(UIDIndex.get(uid)); } @Override public NetworkNode getNodeByInchi(String inchi) { NetworkNode result = inchiIndex.get(inchi); if (result == null) { throw new IllegalArgumentException("Didn't find node with inchi " + inchi); } return result; } @Override public Optional<NetworkNode> getNodeOptionByInchi(String inchi) { return Optional.ofNullable(inchiIndex.get(inchi)); } @Override public List<NetworkNode> getNodesByMass(Double mass, Double massTolerance) { throw new NotImplementedException("Mass indexing not yet implemented."); } @JsonIgnore @Override public Collection<NetworkNode> getNodes() { return Collections.unmodifiableCollection(nodes); } /** * Get all edges from the graph. * * @return An unmodifiable collection of the graph's edges. */ public Collection<NetworkEdge> getEdges() { return Collections.unmodifiableCollection(edges); } @Override public Set<NetworkNode> getSubstrates(NetworkEdge edge) { return edge.getSubstrates().stream().map(this::getNodeByUID).collect(Collectors.toSet()); } @Override public Set<NetworkNode> getProducts(NetworkEdge edge) { return edge.getProducts().stream().map(this::getNodeByUID).collect(Collectors.toSet()); } /** * Get all nodes that are one step forward from this node. These are predicted products of reactions that have this * node as a substrate. * * @param node The starting node. * @return The list of potential product nodes. */ @Override public List<NetworkNode> getDerivatives(NetworkNode node) { List<NetworkNode> derivatives = new ArrayList<>(); for (NetworkEdge edge : node.getOutEdges()) { edge.getProducts().forEach(p -> derivatives.add(getNodeByUID(p))); } return derivatives; } /** * Get all nodes that are one step before this node. These are substrates of reactions that are predicted to produce * this node as a product. * * @param node The starting node. * @return The list of potential substrate nodes. */ @Override public List<NetworkNode> getPrecursors(NetworkNode node) { List<NetworkNode> precursors = new ArrayList<>(); for (NetworkEdge edge : node.getInEdges()) { edge.getSubstrates().forEach(s -> precursors.add(getNodeByUID(s))); } return precursors; } /** * Trace the pathway back from the given startNode for up to numSteps steps, and return the subgraph of all * precursors found. This is intended to supply explanatory pathways for the input node. * * @param startNode The node to explain. * @param numSteps The number of steps back from the node to search. * @return A report representing the precursors of the given starting metabolite. */ public PrecursorReport getPrecursorReport(NetworkNode startNode, int numSteps) { if (numSteps <= 0) { throw new IllegalArgumentException("Precursor graph is only well-defined for numSteps > 0"); } MetabolismNetwork subgraph = new MetabolismNetwork(); Map<NetworkNode, Integer> levelMap = new HashMap<>(); Set<NetworkNode> frontier = new HashSet<>(); frontier.add(startNode); levelMap.put(startNode, 0); for (MutableInt l = new MutableInt(1); l.toInteger() <= numSteps; l.increment()) { // Get edges leading into the derivative frontier List<NetworkEdge> edges = frontier.stream().flatMap(n -> n.getInEdges().stream()) .collect(Collectors.toList()); // Add all of the nodes adjacent to the edges, and the edges themselves, to the subgraph edges.forEach(e -> this.getSubstrates(e).forEach(subgraph::addNode)); edges.forEach(e -> this.getProducts(e).forEach(subgraph::addNode)); edges.forEach(subgraph::addEdge); // Calculate new frontier, excluding already-labeled nodes to avoid cycles frontier = edges.stream().flatMap(e -> this.getSubstrates(e).stream()).collect(Collectors.toSet()); frontier.removeIf(levelMap::containsKey); // Label remaining nodes with appropriate level. frontier.forEach(n -> levelMap.put(n, l.toInteger())); } return new PrecursorReport(startNode.getMetabolite(), subgraph, levelMap); } /** * Load all reactions from a given DB into the network. * * @param db The DB. */ public void loadAllEdgesFromDb(MongoDB db) { DBIterator iterator = db.getIteratorOverReactions(); Reaction reaction; int count = 0; while ((reaction = db.getNextReaction(iterator)) != null) { this.addEdgeFromReaction(db, reaction); if (count % 1000 == 0) { LOGGER.info("Processed %d reactions.", count); } count++; } } /** * Loads an edge from a DB reaction. * TODO: optimize number of DB calls made so that this will run faster. * * @return The added edge if any, or null if the reaction's substrates or products were empty. */ private NetworkEdge addEdgeFromReaction(MongoDB db, Reaction reaction) { List<Long> substrateIds = Arrays.asList(reaction.getSubstrates()); List<String> substrates = new ArrayList<>(); for (Long s : substrateIds) { String inchi = db.getChemicalFromChemicalUUID(s).getInChI(); for (int i = 0; i < denullCoeff(reaction.getSubstrateCoefficient(s)); i++) { substrates.add(inchi); } } List<Long> productIds = Arrays.asList(reaction.getProducts()); List<String> products = new ArrayList<>(); for (Long p : productIds) { String inchi = db.getChemicalFromChemicalUUID(p).getInChI(); for (int i = 0; i < denullCoeff(reaction.getProductCoefficient(p)); i++) { products.add(inchi); } } if (substrates.isEmpty() || products.isEmpty()) { return null; } NetworkEdge edge = addEdgeFromInchis(substrates, products); edge.addReactionId(reaction.getUUID()); for (JSONObject protein : reaction.getProteinData()) { if (protein.has(ORG_FIELD)) { edge.addOrg(db.getOrganismNameFromId(protein.getLong(ORG_FIELD))); } } return edge; } /** * Assumes any coefficient which is null should be 1. Null coefficients were given NullPointerExceptions previously. * * @param coeffOrNull The Integer value directly from the DB. * @return The input value if not null; otherwise 1. */ private Integer denullCoeff(Integer coeffOrNull) { if (coeffOrNull == null) { return 1; } return coeffOrNull; } /** * Loads all predictions from a prediction corpus into the network as edges. * * @param predictionCorpus */ public void loadPredictions(L2PredictionCorpus predictionCorpus) { predictionCorpus.getCorpus().forEach(prediction -> loadEdgeFromPrediction(prediction)); } /** * Loads a single prediction into the graph as an edge or edges. * * @param prediction The prediction to load. */ public void loadEdgeFromPrediction(L2Prediction prediction) { List<String> substrates = prediction.getSubstrateInchis(); List<String> products = prediction.getProductInchis(); NetworkEdge edge = addEdgeFromInchis(substrates, products); edge.addProjectorName(prediction.getProjectorName()); } /** * Adds an edge; assumes all nodes pointed to by the edge exist */ public NetworkEdge addEdge(NetworkEdge edge) { edge.getSubstrates().forEach(s -> getNodeByUID(s).addOutEdge(edge)); edge.getProducts().forEach(p -> getNodeByUID(p).addInEdge(edge)); edges.add(edge); return edge; } /** * Adds a given edge to the graph. Creates new nodes from inchis where there aren't already existing nodes. * First, adds the substrate and product nodes to the graph, if they don't already exist. * Then, checks for an already existing edge with the same substrate and product; if such an edge exists, this edge's * auxiliary data is merged into the already existing edge. If no such edge exists, a new edge is added. * * @return The added edge. */ public NetworkEdge addEdgeFromInchis(List<String> substrates, List<String> products) { List<Integer> sNodes = substrates.stream().map(this::createOrGetNodeFromInchi).map(NetworkNode::getUID) .collect(Collectors.toList()); List<Integer> pNodes = products.stream().map(this::createOrGetNodeFromInchi).map(NetworkNode::getUID) .collect(Collectors.toList()); NetworkEdge edge = new NetworkEdge(sNodes, pNodes); List<NetworkEdge> equivalentEdges = getNodeByUID(sNodes.get(0)).getOutEdges().stream() .filter(e -> e.hasSameSubstratesAndProducts(edge)).collect(Collectors.toList()); if (equivalentEdges.size() > 1) { // Should be at most one edge with a given substrate, product pair throw new IllegalStateException("Two edges with same substrates and products found in the same graph"); } if (equivalentEdges.isEmpty()) { // If no equivalent edge exists, add the new edge return addEdge(new NetworkEdge(sNodes, pNodes)); } else { // If there is an equivalent edge, merge the data into that edge. return equivalentEdges.get(0).merge(edge); } } /** * Checks if a node with a given inchi is already in the map. If so, returns the node. If not, creates a new node * with that inchi and returns it. * TODO: generalize this to handle metabolites rather than just inchis * * @param inchi The inchi. * @return The node. */ private NetworkNode createOrGetNodeFromInchi(String inchi) { NetworkNode node = inchiIndex.get(inchi); if (node == null) { return addNode(new NetworkNode(new InchiMetabolite(inchi))); } return node; } /** * Adds a node to network if its UID is unique. If a node already exists with this UID, returns the existing node * without modifying the graph. * * @param node The node to add. * @return The node added, or the existing node. */ public NetworkNode addNode(NetworkNode node) { if (UIDIndex.get(node.getUID()) != null) { return UIDIndex.get(node.getUID()); } nodes.add(node); UIDIndex.put(node.getUID(), node); node.getMetabolite().getStructure().ifPresent(s -> inchiIndex.put(s.getInchi(), node)); return node; } public void writeToJsonFile(File outputFile) throws IOException { try (BufferedWriter predictionWriter = new BufferedWriter(new FileWriter(outputFile))) { OBJECT_MAPPER.writeValue(predictionWriter, this); } } public static MetabolismNetwork getNetworkFromJsonFile(File inputFile) throws IOException { return OBJECT_MAPPER.readValue(inputFile, MetabolismNetwork.class); } }