Java tutorial
/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package act.server; import act.installer.bing.NamesOfMolecule; import act.installer.bing.UsageTermUrlSet; import act.installer.brenda.BrendaChebiOntology; import act.shared.Chemical; import act.shared.Chemical.REFS; import act.shared.Cofactor; import act.shared.ConsistentInChI; import act.shared.Organism; import act.shared.Reaction; import act.shared.Seq; import act.shared.helpers.MongoDBToJSON; import act.shared.helpers.P; import com.act.workflow.tool_manager.workflow.workflow_mixins.mongo.ChemicalKeywords; import com.act.workflow.tool_manager.workflow.workflow_mixins.mongo.MongoKeywords; import com.fasterxml.jackson.databind.ObjectMapper; import com.ggasoftware.indigo.Indigo; import com.ggasoftware.indigo.IndigoException; import com.ggasoftware.indigo.IndigoInchi; import com.ggasoftware.indigo.IndigoObject; import com.mongodb.AggregationOutput; import com.mongodb.BasicDBList; import com.mongodb.BasicDBObject; import com.mongodb.Bytes; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.DBObject; import com.mongodb.Mongo; import com.mongodb.MongoException; import com.mongodb.WriteConcern; import com.mongodb.WriteResult; import com.mongodb.util.JSON; import org.apache.commons.lang3.StringUtils; import org.biopax.paxtools.model.level3.ConversionDirectionType; import org.biopax.paxtools.model.level3.StepDirection; import org.json.JSONArray; import org.json.JSONObject; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; // TODO Change all default notimeouts to true. public class MongoDB { public static final long ORG_ID_BASE = 5000000000L; private static ObjectMapper mapper = new ObjectMapper(); private String hostname; private String database; private int port; private DBCollection dbReactions; private DBCollection dbChemicals; private DBCollection dbCofactors; private DBCollection dbOrganisms; private DBCollection dbOrganismNames; private DBCollection dbCascades; private DBCollection dbWaterfalls; private DBCollection dbSeq; private DBCollection dbPubmed; // the pubmed collection is separate from actv01 db private DB mongoDB; private Mongo mongo; public MongoDB(String mongoActHost, int port, String dbs) { this.hostname = mongoActHost; this.port = port; this.database = dbs; initDB(); } public static void dropDB(String mongoActHost, int port, String dbs) { dropDB(mongoActHost, port, dbs, false); } public static void dropDB(String mongoActHost, int port, String dbs, boolean force) { try { DB toDropDB = new Mongo(mongoActHost, port).getDB(dbs); if (!force) { // Require explicit confirmation from the user before dropping an existing DB. System.out.format( "Going to drop: %s:%d/%s. Type \"DROP\" (without quotes) and press enter to proceed.\n", mongoActHost, port, dbs); try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) { String readLine = reader.readLine(); if (!"DROP".equals(readLine)) { System.out.format("Invalid input \"%s\", not dropping DB\n", readLine); } else { System.out.format("Dropping DB\n"); // drop DB! toDropDB.dropDatabase(); } } } else { System.out.format("[Force] Dropping DB %s\n", dbs); toDropDB.dropDatabase(); } } catch (UnknownHostException e) { throw new IllegalArgumentException("Invalid host for Mongo Act server."); } catch (MongoException e) { throw new IllegalArgumentException("Could not initialize Mongo driver."); } catch (IOException e) { throw new RuntimeException("Unable to read from stdin"); } } public MongoDB(String host) { this.hostname = host; this.port = 27017; this.database = "actv01"; // default act database; this constructor is rarely, if ever called. initDB(); } public MongoDB() { this.hostname = "localhost"; this.port = 27017; this.database = "actv01"; // default act database; this constructor is rarely, if ever called. initDB(); } public String toString() { return this.hostname + " " + this.port; } public void close() { this.mongo.close(); } private void initDB() { try { mongo = new Mongo(this.hostname, this.port); mongoDB = mongo.getDB(this.database); // in case the db is protected then we would do the following: // boolean auth = db.authenticate(myUserName, myPassword); // but right now we do not care. this.dbReactions = mongoDB.getCollection("reactions"); this.dbChemicals = mongoDB.getCollection("chemicals"); this.dbCofactors = mongoDB.getCollection("cofactors"); this.dbOrganisms = mongoDB.getCollection("organisms"); this.dbOrganismNames = mongoDB.getCollection("organismnames"); this.dbSeq = mongoDB.getCollection("seq"); this.dbCascades = mongoDB.getCollection("cascades"); this.dbWaterfalls = mongoDB.getCollection("waterfalls"); this.dbPubmed = mongoDB.getCollection("pubmed"); initIndices(); } catch (UnknownHostException e) { throw new IllegalArgumentException("Invalid host for Mongo Act server."); } catch (MongoException e) { throw new IllegalArgumentException( String.format("Could not initialize Mongo driver: %s", e.getMessage())); } } private void initIndices() { this.createChemicalsIndex("InChI", true); // create a hashed index this.createChemicalsIndex("InChIKey"); // create a normal index this.createChemicalsIndex("names.brenda"); // create a normal index this.createChemicalsIndex("names.pubchem.values"); // normal index this.createChemicalsIndex("names.synonyms"); // create a normal index this.createCofactorsIndex("InChI", true); // create a hashed index this.createOrganismNamesIndex("name"); this.createOrganismNamesIndex("org_id"); this.createSeqIndex("metadata.accession", false); this.createSeqIndex("seq", true); this.createSeqIndex("rxn_refs", false); } public int port() { return this.port; } public String host() { return this.hostname; } public String dbs() { return this.database; } public String location() { return this.hostname + "." + this.port + "." + this.database; } private String getReactantFromMongoDocument(BasicDBObject family, String which, int i) { BasicDBList o = (BasicDBList) ((DBObject) family.get("enz_summary")).get(which); if (i >= o.size()) return ""; return "" + (Long) ((DBObject) o.get(i)).get("pubchem"); } /* Sanity checks against a ref DB, returns: - P<List, List>: pair(added, deleted) in this over ref DB, list of ids (Object) - Map<Object, DBObject>: id->object map of changed docs */ public static P<P<List, List>, Map<Object, Object>> compare(String coll, String id_key, int thisport, int refport, boolean listsAreSet) throws UnknownHostException { String host = "localhost"; String dbs = "actv01"; List<Object> add = new ArrayList<Object>(); List<Object> del = new ArrayList<Object>(); Set<Object> seen = new HashSet<Object>(); Map<Object, Object> upd = new HashMap<Object, Object>(); DBCollection c = new Mongo(host, thisport).getDB(dbs).getCollection(coll); DBCollection cref = new Mongo(host, refport).getDB(dbs).getCollection(coll); // yes, we indeed need to iterate over the entire collection! so unrestricted find() ok here. DBCursor cur = c.find(); while (cur.hasNext()) { DBObject doc = cur.next(); Object id = doc.get(id_key); DBObject docref = findOneDoc(cref, id_key, id); if (docref == null) { // reference collection does not have doc, log as newly created add.add(id); } else { // reference collection has doc: // compare the differences between these two docs and log it as updated if they differ Object diff = compare(doc, docref, listsAreSet); if (diff != null) { // the docs differ. Log it as updated, and note the diff upd.put(id, diff); } } seen.add(id); } // now iterate over ref db and see if there are any docs deleted (i.e., not in notDeleted) DBCursor curref = c.find(); while (curref.hasNext()) { DBObject doc = curref.next(); Object id = doc.get(id_key); if (!seen.contains(id)) { // this doc was not seen in the updated collection, so deleted. log that del.add(id); } } return new P<P<List, List>, Map<Object, Object>>(new P<List, List>(add, del), upd); } private static DBObject findOneDoc(DBCollection c, String id_key, Object id) { BasicDBObject query = new BasicDBObject(); query.put(id_key, id); DBObject res = c.findOne(query); return res; } private static Object compare(Object d, Object dref, boolean listsAreSet) { if (d == null && dref == null) return null; // identical; return null which indicates identicalicity else if (d == null && dref != null) return "+" + dref; else if (d != null && dref == null) return "-" + d; if ((d instanceof Long && dref instanceof Long) || (d instanceof Double && dref instanceof Double) || (d instanceof Integer && dref instanceof Integer) || (d instanceof Boolean && dref instanceof Boolean) || (d instanceof String && dref instanceof String)) return compare_primitive(d, dref); else if (d instanceof BasicDBList && dref instanceof BasicDBList) return compare((BasicDBList) d, (BasicDBList) dref, listsAreSet); else if (d instanceof DBObject && dref instanceof DBObject) return compare((DBObject) d, (DBObject) dref, listsAreSet); else { System.out.println("+" + d); System.out.println("-" + dref); System.out.println(); return "TYPEDIFF: +" + d.getClass().getName() + " vs -" + dref.getClass().getName(); } } private static Object compare_primitive(Object p, Object pref) { return p.equals(pref) ? null : "+" + p + " vs -" + pref; } private static DBObject compare(DBObject doc, DBObject docref, boolean listsAreSet) { boolean different = false; BasicDBObject diff = new BasicDBObject(); Set<String> refKeys = new HashSet<String>(); refKeys.addAll(docref.keySet()); for (String k : doc.keySet()) { // as numerical calculations are improved, some computed fields are // bound to change: e.g., rarity and estimateEnergy // so make a special exception for those and ignore its val field... // but compare any other key recursively for differences... if (k.equals("rarity") || k.equals("estimateEnergy") || k.equals("coefficient")) continue; Object val = doc.get(k); if (!docref.containsKey(k)) { // this field is new diff.put("+" + k, val); different = true; } else { // field exists in old doc, recursively compare Object refval = docref.get(k); refKeys.remove(k); Object d; if ((d = compare(val, refval, listsAreSet)) != null) { // keys identical but values differ, add without the + or - to key different = true; diff.put(k, d); } else { // values identical and keys same too, do not put in diff. } } } // all remaining fields were deleted from old doc for (String kref : refKeys) { if (kref.equals("rarity") || kref.equals("estimateEnergy") || kref.equals("coefficient")) // see why in loop above continue; diff.put("-" + kref, docref.get(kref)); different = true; } return different ? diff : null; // the following is not order invariant and therefore problematic: // return org.apache.commons.lang.StringUtils.difference(doc.toString(), docref.toString()); } private static BasicDBList compare(BasicDBList l, BasicDBList refl, boolean listsAreSet) { boolean different = false; BasicDBList diff = new BasicDBList(); if (!listsAreSet) { // lists are to be treated as ordered sets and so we can compare element by element for (int i = 0; i < l.size(); i++) { Object val = l.get(i); Object refv = refl.get(i); Object d; if ((d = compare(val, refv, listsAreSet)) != null) { different = true; diff.add(d); } else { // elements at this index are identical, but we don't want to muck up the order // in case future elements are not identical... so add a null to the diff, // BUT IMP: do not set the flag that the list is different diff.add(null); } } } else { // lists are to be treated as unordered sets: we try to match each element best // effort to any one of the list elements, and if it does proceed greedily // we keep this as a list as opposed to a true set because the original (ref) // and the current (new) might have (identical) replicates, and so should not // be flagged different because of that. List<Object> refset = new ArrayList<Object>(); refset.addAll(refl); for (Object e : l) { boolean matches_some = false; for (Object eref : refset) { if (compare(e, eref, listsAreSet) == null) { // this object matches something, great, lets move to the next object // also remove the matched object from the ref list, so that we have // a 1-1 mapping between this and the ref list object matches_some = true; refset.remove(eref); break; } } if (!matches_some) { // if this object in new list could not be matched against something, // the lists are different different = true; diff.add(e); } } if (refset.size() != 0) { // still some elements remain in the ref list, i.e., sets different different = true; diff.addAll(refset); } } return different ? diff : null; } /* * * * Below is the list of functions required for populating MongoAct * * */ public Long getNextAvailableChemicalDBid() { return this.dbChemicals.count(); } public Long getNextAvailableCofactorDBid() { // TODO: do something more robust than this hack. return this.dbCofactors.count(); } public void submitToActWaterfallDB(Long ID, DBObject waterfall) { // insert a new doc to the collection waterfall.put("_id", ID); this.dbWaterfalls.insert(waterfall); } public void submitToActCascadeDB(Long ID, DBObject cascade) { // insert a new doc to the collection cascade.put("_id", ID); this.dbCascades.insert(cascade); } public void submitToActCofactorsDB(Cofactor c, Long ID) { // check if this is already in the DB. long alreadyid = alreadyEntered(c); if (alreadyid != -1) { // cofactor already in DB; what sorcery is this? // hard abort. We do not expect to repeatedly see cofactors throw new RuntimeException("Duplicate entry for cofactor seen! Install abort."); } BasicDBObject doc = createCofactorDoc(c, ID); // insert a new doc to the collection this.dbCofactors.insert(doc); } public BasicDBObject createCofactorDoc(Cofactor c, Long ID) { BasicDBObject doc = new BasicDBObject(); doc.put("_id", ID); doc.put("InChI", c.getInChI()); BasicDBList names = new BasicDBList(); names.addAll(c.getNames()); doc.put("names", names); return doc; } /** * Inserts or updates a chemical document in the DB, returning the id of the chemical document that represents the * specified Chemical object. * @param c The chemical to update in the DB. * @param ID The ID to use if the chemical is new. * @return That actual ID of the chemical document, either ID if the chemical was new or the existing ID if the * chemical was found in the DB. */ public long submitToActChemicalDB(Chemical c, Long ID) { // check if this is already in the DB. long alreadyid = alreadyEntered(c); if (alreadyid != -1) { mergeIntoDB(alreadyid, c); // chemical already exists: merge return alreadyid; } BasicDBObject doc = createChemicalDoc(c, ID); // insert a new doc to the collection this.dbChemicals.insert(doc); return ID; } public void updateActChemical(Chemical c, Long id) { // See comment in updateActReaction about // db.collection.update, and $set BasicDBObject doc = createChemicalDoc(c, id); DBObject query = new BasicDBObject(); query.put("_id", id); this.dbChemicals.update(query, doc); } /** * Appends XRef data for the chemical with the specified inchi. Might only apply to Metacyc for now. Does not crash * if idPath or metaPath are null. * * This uses Mongo's query mechanism to add new ids to a set of xref ids only if they don't already exist, and to * append (without comparison) new xref metadata to an existing list without having to read/de-serialize/add/serialize * the object ourselves. This results in a significant performance improvement, especially towards the end of the * Metacyc installation process. * * TODO: this API is awful. Fix it up to be less Metacyc-specific and more explicit in its behavior. * * @param inchi The inchi of the chemical to update in Mongo. * @param idPath The path to the field where ids should be added, like xref.METACYC.id. * @param id The id for this chemical reference to write. * @param metaPath The path to the field where metadata blobs should be stored, like xref.METACYC.meta. * @param metaObjects A list of metadata objects to append to the metadata list in Mongo. */ public void appendChemicalXRefMetadata(String inchi, String idPath, String id, String metaPath, BasicDBList metaObjects) { if (idPath == null && metaPath == null) { return; } // Get chemical by InChI. BasicDBObject query = new BasicDBObject("InChI", inchi); BasicDBObject update = new BasicDBObject(); if (idPath != null) { // Add to set will add an id to the array of xref ids only if it doesn't already exist in the array. update.put("$addToSet", new BasicDBObject(idPath, id)); } if (metaPath != null) { /* Add all metadata objects to the xref list containing metadata for this source. * Note: $push + $each applied to an array of objects is like $pushAll, which is now deprecated. */ update.put("$push", new BasicDBObject(metaPath, new BasicDBObject("$each", metaObjects))); } // Run exactly one query to update, which should save a lot of time over the course of the installation. this.dbChemicals.update(query, update); } public static String chemicalAsString(Chemical c, Long ID) { // called by cytoscape plugin to serialize the entire chemical as a fulltxt string return createChemicalDoc(c, ID).toString(); } public static BasicDBObject createChemicalDoc(Chemical c, Long ID) { BasicDBObject doc = new BasicDBObject(); doc.put("_id", ID); doc.put("canonical", c.getCanon()); doc.put("SMILES", c.getSmiles()); doc.put("InChI", c.getInChI()); doc.put("InChIKey", c.getInChIKey()); doc.put("isCofactor", c.isCofactor()); doc.put("isNative", c.isNative()); BasicDBObject names = new BasicDBObject(); BasicDBList synonyms = new BasicDBList(); synonyms.addAll(c.getSynonyms()); names.put("synonyms", synonyms); BasicDBList pubchemNames = new BasicDBList(); for (String type : c.getPubchemNameTypes()) { String[] temp = c.getPubchemNames(type); BasicDBList dbNames = new BasicDBList(); for (String t : temp) { dbNames.add(t); } BasicDBObject dbNameObj = new BasicDBObject(); dbNameObj.put("type", type); dbNameObj.put("values", dbNames); pubchemNames.add(dbNameObj); } names.put("pubchem", pubchemNames); BasicDBList brendaNames = new BasicDBList(); // will really get its fields later if initial install brendaNames.addAll(c.getBrendaNames()); // but for cases where we call it post install, we construct full chem entry names.put("brenda", brendaNames); doc.put("names", names); BasicDBObject xrefs = new BasicDBObject(); xrefs.put("pubchem", c.getPubchemID()); int cnt = 0; for (REFS xrefTyp : Chemical.REFS.values()) { if (c.getRef(xrefTyp) != null) { xrefs.put(xrefTyp.name(), MongoDBToJSON.conv((JSONObject) c.getRef(xrefTyp))); cnt++; } } doc.put("xref", xrefs); doc.put("estimateEnergy", c.getEstimatedEnergy()); doc.put("keywords", c.getKeywords()); doc.put("keywords_case_insensitive", c.getCaseInsensitiveKeywords()); doc.put("csid", c.getChemSpiderID()); doc.put("num_vendors", c.getChemSpiderNumUniqueVendors()); doc.put("vendors", MongoDBToJSON.conv(c.getChemSpiderVendorXrefs())); return doc; } private void mergeIntoDB(long id, Chemical c) { Chemical oldc = getChemicalFromChemicalUUID(id); Chemical mergedc = c.createNewByMerge(oldc); if (mergedc == null) { // whoa! inconsistent values on unmergables, so recover System.err.println("\n\n\n\n\n\n\n\n\n\n"); System.err.println("---- Conflicting uuid or name or smiles or inchi or inchikey or pubchem_id:"); System.err.println("---- NEW\t " + c); System.err.println("---- OLD\t " + oldc); System.err.println("---- Keeping OLD entry"); System.err.println("\n\n\n\n\n\n\n\n\n\n"); return; } BasicDBObject withID = new BasicDBObject(); withID.put("_id", id); this.dbChemicals.remove(withID, WriteConcern.SAFE); // remove the old entry oldc from the collection submitToActChemicalDB(mergedc, id); // now that the old entry is removed, we can simply add } public void updateChemicalWithRoBinningInformation(long id, List<Integer> matchedROs) { BasicDBObject query = new BasicDBObject("_id", id); BasicDBObject createDerivedDataContainer = new BasicDBObject("$set", new BasicDBObject("derived_data", new BasicDBObject())); this.dbChemicals.update(query, createDerivedDataContainer); BasicDBList listOfRos = new BasicDBList(); listOfRos.addAll(matchedROs); BasicDBObject updateDerivedDataContainerWithMatchedRos = new BasicDBObject("$set", new BasicDBObject("derived_data.matched_ros", listOfRos)); this.dbChemicals.update(query, updateDerivedDataContainerWithMatchedRos); } public void updateChemicalWithBrenda(Chemical c, String brendaName) { long id = alreadyEntered(c); if (id < 0) { System.err.println("Update chemical with brenda: " + brendaName + " can't find matching inchi"); return; } BasicDBObject query = new BasicDBObject(); query.put("_id", id); BasicDBObject update = new BasicDBObject(); update.put("$push", new BasicDBObject("names.brenda", brendaName.toLowerCase())); this.dbChemicals.update(query, update); } public void updateChemicalAsNative(String inchi) { Chemical c = this.getChemicalFromInChI(inchi); if (c == null) { System.err.println("Can't find native in DB: " + inchi); return; } long id = c.getUuid(); BasicDBObject query = new BasicDBObject(); query.put("_id", id); BasicDBObject update = new BasicDBObject(); update.put("$set", new BasicDBObject("isNative", true)); this.dbChemicals.update(query, update); } // 1. update the chemical entry to point to all these patents // 2. update the patents collection with the (patent_id, scores, patent_text) public void updateChemicalWithPatents(String inchi, Integer num_patents, DBObject patents) { Chemical c = this.getChemicalFromInChI(inchi); if (c == null) { System.err.println("Attempting to add patent. Can't find chem in DB: " + inchi); return; } long id = c.getUuid(); BasicDBObject query = new BasicDBObject(); query.put("_id", id); BasicDBObject update = new BasicDBObject(); BasicDBObject set = new BasicDBObject(); // TODO!!!!!!! // patents is Array of { patent_num: Int, patent_txt: String, patent_score: Int } // ie { patent ID, full text of patent, relevance to biosynthesis } // // put the patents DBObject (all elements of Array) in db.patents. // put the references to the entries within it in db.chemicals // i.e., only an array { patent ID } // TODO!!!!!!! // TODO!!!!!!! // // Need to update functions that serialize and deserialize from the db : // createChemicalDoc and convertDBObjectToChemical // to recreate vendors, patents etc fields.... // // TODO!!!!!!! System.out.println("Installing patents needs to go into separate collections.. see code."); System.exit(-1); set.put("patents", patents); set.put("num_patents", num_patents); update.put("$set", set); this.dbChemicals.update(query, update); } public void updateChemicalWithVendors(String inchi, Integer csid, Integer num_vendors, JSONArray vendors) { Chemical c = this.getChemicalFromInChI(inchi); if (c == null) { System.err.println("Attempting to add vendor. Can't find chem in DB: " + inchi); return; } long id = c.getUuid(); BasicDBObject query = new BasicDBObject(); query.put("_id", id); BasicDBObject update = new BasicDBObject(); BasicDBObject set = new BasicDBObject(); DBObject vendors_dbobject = MongoDBToJSON.conv(vendors); set.put("vendors", vendors_dbobject); set.put("csid", csid); set.put("num_vendors", num_vendors); update.put("$set", set); this.dbChemicals.update(query, update); } static boolean jeff_cleanup_quiet = true; // retrieve the entry with InChI = @inchi (or create if one does not exist) // set one of its synonyms to @synonym public long updateOrCreateWithSynonym(String inchi, String synonym) { Chemical c = this.getChemicalFromInChI(inchi); if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] Synonym: " + synonym); if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] InChI : " + inchi); long id = -1; if (c != null) { id = c.getUuid(); if (c.getSynonyms().contains(synonym)) { if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] Already in synonyms. This move gets a -1 count."); } else { c.addSynonym(synonym); BasicDBObject update = createChemicalDoc(c, id); this.dbChemicals.save(update); if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] MOVED to id=" + id); } } else { id = getNextAvailableChemicalDBid(); c = new Chemical(id); c.setInchi(inchi); c.addSynonym(synonym); submitToActChemicalDB(c, id); if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] NEW ENTRY id=" + id); } return id; } // lookup the entry corresponding to @inchi and remove this @synonym from its list of synonyms. public long removeSynonym(String inchi, String synonym) { Chemical c = this.getChemicalFromInChI(inchi); if (c == null) { System.err.println("[Jeff cleanup] ERROR? Can't find chemical entry to remove synonym from: " + inchi); return -1; } long id = c.getUuid(); // the synonym can be either under: // canon:String (shortestName) // brendaNames:List<String> // synonyms:List<String> // names:Map<String,String[]> (pubchem names type->names) if (c.getCanon() != null && c.getCanon().trim().equals(synonym)) { c.setCanon(null); if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] Removed from Canonical"); } if (c.getBrendaNames() != null) { // the trim is the important bit in all of this. else we could have just .remove(synonym)'ed List<String> toRemove = new ArrayList<String>(); for (String s : c.getBrendaNames()) { if (s.trim().equals(synonym)) toRemove.add(s); } if (toRemove.size() > 0) { if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] Removed from Brenda"); c.getBrendaNames().removeAll(toRemove); } } if (c.getSynonyms() != null) { // the trim is the important bit in all of this. else we could have just c.getSynonyms().remove(synonym); List<String> toRemove = new ArrayList<String>(); for (String s : c.getSynonyms()) { if (s.trim().equals(synonym)) toRemove.add(s); } if (toRemove.size() > 0) { if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] Removed from Synonyms"); c.getSynonyms().removeAll(toRemove); } } for (String type : c.getPubchemNameTypes()) { List<String> names = new ArrayList<String>(); for (String s : c.getPubchemNames(type)) { if (s.trim().equals(synonym)) { if (!jeff_cleanup_quiet) System.err.println("[Jeff cleanup] Removed from Pubchem"); continue; } names.add(s); } c.getPubchemNames().put(type, names.toArray(new String[0])); } BasicDBObject update = createChemicalDoc(c, id); this.dbChemicals.save(update); return id; } public void updateStoichiometry(Reaction r) { BasicDBObject query = new BasicDBObject().append("_id", r.getUUID()); DBObject obj = this.dbReactions.findOne(query); DBObject enz_summary = (DBObject) obj.get("enz_summary"); BasicDBList substrates = (BasicDBList) enz_summary.get("substrates"); BasicDBList newSubstrates = new BasicDBList(); Set<Long> originalSubstrateIDs = new HashSet<Long>(); for (int i = 0; i < substrates.size(); i++) { DBObject substrate = (DBObject) substrates.get(i); Long substrateID = (Long) substrate.get("pubchem"); Boolean isForBalance = (Boolean) substrate.get("balance"); if (isForBalance != null && isForBalance) continue; originalSubstrateIDs.add(substrateID); substrate.put("coefficient", r.getSubstrateCoefficient(substrateID)); newSubstrates.add(substrate); } Set<Long> substratesNew = r.getSubstrateIdsOfSubstrateCoefficients(); for (Long s : substratesNew) { if (originalSubstrateIDs.contains(s)) continue; if (r.getSubstrateCoefficient(s) == null) continue; DBObject substrate = new BasicDBObject(); substrate.put("pubchem", s); substrate.put("coefficient", r.getSubstrateCoefficient(s)); substrate.put("balance", true); newSubstrates.add(substrate); } BasicDBList products = (BasicDBList) enz_summary.get("products"); BasicDBList newProducts = new BasicDBList(); Set<Long> originalProductIDs = new HashSet<Long>(); for (int i = 0; i < products.size(); i++) { DBObject product = (DBObject) products.get(i); Long productID = (Long) product.get("pubchem"); Boolean isForBalance = (Boolean) product.get("balance"); if (isForBalance != null && isForBalance) continue; originalProductIDs.add(productID); product.put("coefficient", r.getProductCoefficient(productID)); newProducts.add(product); } Set<Long> productsNew = r.getProductIdsOfProductCoefficients(); for (Long p : productsNew) { if (originalProductIDs.contains(p)) continue; if (r.getProductCoefficient(p) == null) continue; DBObject product = new BasicDBObject(); product.put("pubchem", p); product.put("coefficient", r.getProductCoefficient(p)); product.put("balance", true); newProducts.add(product); } enz_summary.put("substrates", newSubstrates); enz_summary.put("products", newProducts); this.dbReactions.update(query, obj); } public void updateEstimatedEnergy(Chemical chemical) { BasicDBObject query = new BasicDBObject().append("_id", chemical.getUuid()); DBObject obj = this.dbChemicals.findOne(query); obj.put("estimateEnergy", chemical.getEstimatedEnergy()); this.dbChemicals.update(query, obj); } public void updateEstimatedEnergy(Reaction reaction) { BasicDBObject query = new BasicDBObject().append("_id", reaction.getUUID()); DBObject obj = this.dbReactions.findOne(query); obj.put("estimateEnergy", reaction.getEstimatedEnergy()); this.dbReactions.update(query, obj); } public void updateReactionRefsOf(Seq seq) { BasicDBObject query = new BasicDBObject().append("_id", seq.getUUID()); DBObject obj = this.dbSeq.findOne(query); BasicDBList refs = new BasicDBList(); for (Long r : seq.getReactionsCatalyzed()) refs.add(r); obj.put("rxn_refs", refs); this.dbSeq.update(query, obj); } public void updateKeywordsCascade(Long id, Set<String> kwrds, Set<String> ciKwrds) { BasicDBObject query = new BasicDBObject().append("_id", id); DBObject obj = this.dbCascades.findOne(query); obj.put("keywords", kwrds); obj.put("keywords_case_insensitive", ciKwrds); this.dbCascades.update(query, obj); } public void updateKeywordsWaterfall(Long id, Set<String> kwrds, Set<String> ciKwrds) { BasicDBObject query = new BasicDBObject().append("_id", id); DBObject obj = this.dbWaterfalls.findOne(query); obj.put("keywords", kwrds); obj.put("keywords_case_insensitive", ciKwrds); this.dbWaterfalls.update(query, obj); } public void updateKeywords(Reaction reaction) { BasicDBObject query = new BasicDBObject().append("_id", reaction.getUUID()); DBObject obj = this.dbReactions.findOne(query); obj.put("keywords", reaction.getKeywords()); obj.put("keywords_case_insensitive", reaction.getCaseInsensitiveKeywords()); this.dbReactions.update(query, obj); } public int submitToActReactionDB(Reaction r) { // if reaction already present in Act, then ignore. if (alreadyEntered(r)) { System.out.println("___ Duplicate reaction? : " + r.getUUID()); return -1; } if (r.getUUID() != -1) { // this function is designed to only submit a new entry // if you need to update an existing entry, use updateActReaction String msg = StringUtils.join(new String[] { "FATAL Error: Aborting in MongoDB.submitToActReactionDB.", "Reaction asked to add has a populated ID field,", "i.e., != -1, while this function strictly appends", "to the DB and so will not honor the id field.", r.toString() }, "\n"); System.err.println(msg); throw new RuntimeException(msg); } int id = new Long(this.dbReactions.count()).intValue(); // O(1) BasicDBObject doc = createReactionDoc(r, id); // writing to MongoDB collection act this.dbReactions.insert(doc); return id; } public void updateActReaction(Reaction r, int id) { // db.collection.update(query, update, options) // updates document(s) that match query with the update doc // Ref: http://docs.mongodb.org/manual/reference/method/db.collection.update/ // // Update doc: Can be { $set : { <field> : <val> } } // in case you need to keep the old document, but just update // some fields inside of it. // Ref: http://docs.mongodb.org/manual/reference/operator/update/set/ // // But here (and in updateActChemical) we want to overwrite // the entire document with a new one, and so // a simple update call with the new document is what we need. BasicDBObject doc = createReactionDoc(r, id); DBObject query = new BasicDBObject(); query.put("_id", id); this.dbReactions.update(query, doc); } public static BasicDBObject createReactionDoc(Reaction r, int id) { BasicDBObject doc = new BasicDBObject(); doc.put("_id", id); doc.put("ecnum", r.getECNum()); doc.put("easy_desc", r.getReactionName()); BasicDBList substr = new BasicDBList(); Long[] ss = r.getSubstrates(); for (int i = 0; i < ss.length; i++) { DBObject o = getObject("pubchem", ss[i]); o.put("coefficient", r.getSubstrateCoefficient(ss[i])); substr.put(i, o); } BasicDBList prods = new BasicDBList(); Long[] pp = r.getProducts(); for (int i = 0; i < pp.length; i++) { DBObject o = getObject("pubchem", pp[i]); o.put("coefficient", r.getProductCoefficient(pp[i])); prods.put(i, o); } BasicDBList prodCofactors = new BasicDBList(); Long[] ppc = r.getProductCofactors(); for (int i = 0; i < ppc.length; i++) { DBObject o = getObject("pubchem", ppc[i]); prodCofactors.put(i, o); } BasicDBList substrCofactors = new BasicDBList(); Long[] ssc = r.getSubstrateCofactors(); for (int i = 0; i < ssc.length; i++) { DBObject o = getObject("pubchem", ssc[i]); substrCofactors.put(i, o); } BasicDBList coenzymes = new BasicDBList(); Long[] coenz = r.getCoenzymes(); for (int i = 0; i < coenz.length; i++) { DBObject o = getObject("pubchem", coenz[i]); coenzymes.put(i, o); } BasicDBObject enz = new BasicDBObject(); enz.put("products", prods); enz.put("substrates", substr); enz.put("product_cofactors", prodCofactors); enz.put("substrate_cofactors", substrCofactors); enz.put("coenzymes", coenzymes); doc.put("enz_summary", enz); doc.put("is_abstract", r.getRxnDetailType().name()); if (r.getDataSource() != null) doc.put("datasource", r.getDataSource().name()); if (r.getMechanisticValidatorResult() != null) { doc.put("mechanistic_validator_result", MongoDBToJSON.conv(r.getMechanisticValidatorResult())); } BasicDBList refs = new BasicDBList(); for (P<Reaction.RefDataSource, String> ref : r.getReferences()) { BasicDBObject refEntry = new BasicDBObject(); refEntry.put("src", ref.fst().toString()); refEntry.put("val", ref.snd()); refs.add(refEntry); } doc.put("references", refs); BasicDBList proteins = new BasicDBList(); for (JSONObject proteinData : r.getProteinData()) { proteins.add(MongoDBToJSON.conv(proteinData)); } doc.put("proteins", proteins); ConversionDirectionType cd = r.getConversionDirection(); doc.put("conversion_direction", cd == null ? null : cd.toString()); StepDirection psd = r.getPathwayStepDirection(); doc.put("pathway_step_direction", psd == null ? null : psd.toString()); return doc; } public void submitToActOrganismDB(Organism o) { BasicDBObject doc = new BasicDBObject(); doc.put("_id", o.getUUID()); doc.put("parent_id", o.getParent()); doc.put("rank", o.getRank()); if (this.dbOrganisms == null) { System.out.print("Organism: " + o); } else { this.dbOrganisms.insert(doc); } } public void submitToActOrganismNameDB(Organism o) { BasicDBObject doc = new BasicDBObject(); doc.put("org_id", o.getUUID()); doc.put("name", o.getName()); // TODO: support NCBI ids too. if (this.dbOrganismNames == null) { System.out.print("Organism: " + o); } else { this.dbOrganismNames.insert(doc); } } /** * establishes new numbering system for organisms not already in our database * @param name the name of the organism to be added to the database * @return the id of the new organism added to the database */ public Long submitToActOrganismNameDB(String name) { BasicDBObject doc = new BasicDBObject(); Long id = this.dbOrganismNames.count() + ORG_ID_BASE; doc.put("org_id", id); doc.put("name", name); // TODO: support NCBI ids too. if (this.dbOrganismNames == null) { System.out.print("Organism: " + name); return null; } else { this.dbOrganismNames.insert(doc); return id; } } public void submitToPubmedDB(PubmedEntry entry) { List<String> xPath = new ArrayList<String>(); xPath.add("MedlineCitation"); xPath.add("PMID"); int pmid = Integer.parseInt(entry.getXPathString(xPath)); if (this.dbPubmed != null) { WriteResult result; if (alreadyEntered(entry, pmid)) return; DBObject doc = (DBObject) JSON.parse(entry.toJSON()); doc.put("_id", pmid); this.dbPubmed.insert(doc); } else Logger.printf(0, "Pubmed Entry [%d]: %s\n", pmid, entry); // human readable... } private static BasicDBObject getObject(String field, Long val) { BasicDBObject singularObj = new BasicDBObject(); singularObj.put(field, val); return singularObj; } private BasicDBObject getObject(String f1, Long v1, String f2, Float v2) { BasicDBObject o = new BasicDBObject(); o.put(f1, v1); o.put(f2, v2); return o; } /* * Return -1 if the chemical doesn't exist in the database yet. * Else return the id. */ private long alreadyEntered(Chemical c) { BasicDBObject query; String inchi = c.getInChI(); long retId = -1; if (inchi != null) { query = new BasicDBObject(); query.put("InChI", inchi); DBObject o = this.dbChemicals.findOne(query); if (o != null) retId = (Long) o.get("_id"); // checked: db type IS long } return retId; } public boolean alreadyEnteredChemical(String inchi) { if (this.dbChemicals == null) return false; // TODO: should this throw an exception instead? BasicDBObject query = new BasicDBObject("InChI", inchi); long c = this.dbChemicals.count(query); return c > 0; } public Long getExistingDBIdForInChI(String inchi) { // TODO: should this return some UUID type instead of Long? if (this.dbChemicals == null) return null; // TODO: should this throw an exception instead? BasicDBObject query = new BasicDBObject("InChI", inchi); BasicDBObject fields = new BasicDBObject("_id", true); DBObject o = this.dbChemicals.findOne(query, fields); if (o == null) { return null; } // TODO: does this need to be checked? return (Long) o.get("_id"); } private long alreadyEntered(Cofactor cof) { BasicDBObject query; String inchi = cof.getInChI(); long retId = -1; if (inchi != null) { query = new BasicDBObject(); query.put("InChI", inchi); DBObject o = this.dbCofactors.findOne(query); if (o != null) retId = (Long) o.get("_id"); // checked: db type IS long } return retId; } private boolean alreadyEntered(Reaction r) { BasicDBObject query = new BasicDBObject(); query.put("_id", r.getUUID()); DBObject o = this.dbReactions.findOne(query); return o != null; // meaning there is at least one document that matches } private boolean alreadyEntered(PubmedEntry entry, int pmid) { BasicDBObject query = new BasicDBObject(); query.put("_id", pmid); DBObject o = this.dbPubmed.findOne(query); return o != null; } /* * * * End of functions required for populating MongoAct * * */ public List<Long> getRxnsWith(Long reactant, Long product) { BasicDBObject query = new BasicDBObject(); query.put("enz_summary.products.pubchem", product); query.put("enz_summary.substrates.pubchem", reactant); DBCursor cur = this.dbReactions.find(query); List<Long> reactions = new ArrayList<Long>(); while (cur.hasNext()) { DBObject o = cur.next(); long id = (Integer) o.get("_id"); // checked: db type IS int reactions.add(id); } cur.close(); return reactions; } public List<Reaction> getRxnsWithAll(List<Long> reactants, List<Long> products) { if (reactants.size() == 0 && products.size() == 0) { throw new IllegalArgumentException("Reactants and products both empty! Query would return entire DB."); } BasicDBObject query = new BasicDBObject(); if (!reactants.isEmpty()) { BasicDBList substrateIds = new BasicDBList(); substrateIds.addAll(reactants); query.put("enz_summary.substrates.pubchem", new BasicDBObject("$all", substrateIds)); } if (!products.isEmpty()) { BasicDBList productIds = new BasicDBList(); productIds.addAll(products); query.put("enz_summary.products.pubchem", new BasicDBObject("$all", productIds)); } DBCursor cur = this.dbReactions.find(query); List<Reaction> reactions = new ArrayList<Reaction>(); try { while (cur.hasNext()) { DBObject o = cur.next(); reactions.add(convertDBObjectToReaction(o)); } } finally { cur.close(); } return reactions; } public List<Long> getRxnsWithEnzyme(String enzyme, Long org, List<Long> substrates) { BasicDBObject query = new BasicDBObject(); query.put("ecnum", enzyme); query.put("organisms.id", org); for (Long substrate : substrates) { BasicDBObject mainQuery = new BasicDBObject(); mainQuery.put("$ne", substrate); BasicDBList queryList = new BasicDBList(); BasicDBObject productQuery = new BasicDBObject(); productQuery.put("enz_summary.products.pubchem", mainQuery); BasicDBObject substrateQuery = new BasicDBObject(); substrateQuery.put("enz_summary.substrates.pubchem", mainQuery); queryList.add(substrateQuery); queryList.add(productQuery); query.put("$or", queryList); } DBCursor cur = this.dbReactions.find(query); List<Long> reactions = new ArrayList<Long>(); while (cur.hasNext()) { DBObject o = cur.next(); long id = (Integer) o.get("_id"); // checked: db type IS int reactions.add(id); } cur.close(); return reactions; } public List<Long> getRxnsWithSubstrate(String enzyme, Long org, List<Long> substrates) { BasicDBObject query = new BasicDBObject(); query.put("organisms.id", org); BasicDBObject enzymeQuery = new BasicDBObject(); enzymeQuery.put("ecnum", enzyme); query.put("$ne", enzymeQuery); for (Long substrate : substrates) { BasicDBList queryList = new BasicDBList(); DBObject querySubstrate = new BasicDBObject(); querySubstrate.put("enz_summary.substrates.pubchem", substrate); DBObject queryProduct = new BasicDBObject(); queryProduct.put("enz_summary.products.pubchem", substrate); queryList.add(querySubstrate); queryList.add(queryProduct); query.put("$or", queryList); } DBCursor cur = this.dbReactions.find(query); List<Long> reactions = new ArrayList<Long>(); while (cur.hasNext()) { DBObject o = cur.next(); long id = (Integer) o.get("_id"); // checked: db type IS int reactions.add(id); } cur.close(); return reactions; } public String getShortestName(Long id) { Chemical chem = this.getChemicalFromChemicalUUID(id); if (chem == null) return "unknown_chemical"; String name = chem.getShortestBRENDAName(); if (name == null) name = chem.getShortestName(); if (name == null) name = "no_name"; return name; } public List<Chemical> getNativeMetaboliteChems() { return constructAllChemicalsFromActData("isNative", true); } private List<Long> _cofactor_ids_cache = null; private List<Chemical> _cofactor_chemicals_cache = null; public List<Chemical> getCofactorChemicals() { List<Chemical> cof = constructAllChemicalsFromActData("isCofactor", true); // before we return this set, we need to make sure some // cases that for some reason are not in the db as cofactors // are marked as such. HashMap<String, Chemical> inchis = new HashMap<String, Chemical>(); for (Chemical c : cof) if (c.getInChI() != null) inchis.put(c.getInChI(), c); else Logger.print(1, String.format( "[MongoDB.getCofactorChemicals] No inchi for cofactor(id:%d): %s\n " + c.getUuid(), c.getSynonyms())); for (SomeCofactorNames cofactor : SomeCofactorNames.values()) { String shouldbethere = cofactor.getInChI(); if (!inchis.containsKey(shouldbethere)) { List<Chemical> toAdd = constructAllChemicalsFromActData("InChI", shouldbethere); cof.addAll(toAdd); for (Chemical c : toAdd) { addToDefiniteCofactorsMaps(cofactor, c); //Logger.print(1, String.format("MongoDB.getCofactorChemicals] Added extra cofactor: id=%d, Synonyms=%s, Inchi=%s\n", c.getUuid(), c.getSynonyms(), c.getInChI())); } } else { addToDefiniteCofactorsMaps(cofactor, inchis.get(shouldbethere)); } } // on first call, install the cofactors read from db into cache if (_cofactor_ids_cache == null) { _cofactor_chemicals_cache = cof; _cofactor_ids_cache = new ArrayList<Long>(); for (Chemical c : cof) _cofactor_ids_cache.add(c.getUuid()); } return cof; } private boolean isCofactor(Long c) { if (_cofactor_ids_cache == null) { // getCofactorChemicals inits cache as a side-effect getCofactorChemicals(); } return _cofactor_ids_cache.contains(c); } private void addToDefiniteCofactorsMaps(SomeCofactorNames cofactor, Chemical c) { Long id = c.getUuid(); switch (cofactor) { case Water: SomeCofactorNames.Water.setMongoDBId(id); break; case ATP: SomeCofactorNames.ATP.setMongoDBId(id); break; case Acceptor: SomeCofactorNames.Acceptor.setMongoDBId(id); break; case AcceptorH2: SomeCofactorNames.AcceptorH2.setMongoDBId(id); break; case ReducedAcceptor: SomeCofactorNames.ReducedAcceptor.setMongoDBId(id); break; case OxidizedFerredoxin: SomeCofactorNames.OxidizedFerredoxin.setMongoDBId(id); break; case ReducedFerredoxin: SomeCofactorNames.ReducedFerredoxin.setMongoDBId(id); break; case CO2: SomeCofactorNames.CO2.setMongoDBId(id); break; case BicarbonateHCO3: SomeCofactorNames.BicarbonateHCO3.setMongoDBId(id); break; case CoA: SomeCofactorNames.CoA.setMongoDBId(id); break; case H: SomeCofactorNames.H.setMongoDBId(id); break; case NH3: SomeCofactorNames.NH3.setMongoDBId(id); break; case HCl: SomeCofactorNames.HCl.setMongoDBId(id); break; case Cl: SomeCofactorNames.Cl.setMongoDBId(id); break; case O2: SomeCofactorNames.O2.setMongoDBId(id); break; case CTP: SomeCofactorNames.CTP.setMongoDBId(id); break; case dATP: SomeCofactorNames.dATP.setMongoDBId(id); break; case H2S: SomeCofactorNames.H2S.setMongoDBId(id); break; case dGTP: SomeCofactorNames.dGTP.setMongoDBId(id); break; case PhosphoricAcid: SomeCofactorNames.PhosphoricAcid.setMongoDBId(id); break; case I: SomeCofactorNames.I.setMongoDBId(id); break; case MolI: SomeCofactorNames.MolI.setMongoDBId(id); break; case AMP: SomeCofactorNames.AMP.setMongoDBId(id); break; case Phosphoadenylylsulfate: SomeCofactorNames.Phosphoadenylylsulfate.setMongoDBId(id); break; case H2SO3: SomeCofactorNames.H2SO3.setMongoDBId(id); break; case adenylylsulfate: SomeCofactorNames.adenylylsulfate.setMongoDBId(id); break; case GTP: SomeCofactorNames.GTP.setMongoDBId(id); break; case NADPH: SomeCofactorNames.NADPH.setMongoDBId(id); break; case dADP: SomeCofactorNames.dADP.setMongoDBId(id); break; case NADP: SomeCofactorNames.NADP.setMongoDBId(id); break; case UMP: SomeCofactorNames.UMP.setMongoDBId(id); break; case dCDP: SomeCofactorNames.dCDP.setMongoDBId(id); break; case ADP: SomeCofactorNames.ADP.setMongoDBId(id); break; case ADPm: SomeCofactorNames.ADPm.setMongoDBId(id); break; case UDP: SomeCofactorNames.UDP.setMongoDBId(id); break; default: break; } } // These should all be by default in the DB, but if not we augment the DB cofactors tags with these chemicals // It is ok for this list to not be exhaustive.... this is just for parent assignment in visualization public enum SomeCofactorNames { Water(0), ATP(1), Acceptor(2), AcceptorH2(3), ReducedAcceptor(4), OxidizedFerredoxin(5), ReducedFerredoxin( 6), CO2(7), BicarbonateHCO3(8), CoA(9), H(10), NH3(11), HCl(12), Cl(13), O2(14), CTP(15), dATP( 16), H2S(17), dGTP(18), PhosphoricAcid(19), I(20), MolI(21), AMP( 22), Phosphoadenylylsulfate(23), H2SO3(24), adenylylsulfate(25), GTP(26), NADPH( 27), dADP(28), NADP(29), UMP(30), dCDP(31), ADP(32), ADPm(33), UDP(34); int internalId; Long mongodbId; private SomeCofactorNames(int id) { this.internalId = id; this.mongodbId = null; } public String getInChI() { return this._definiteCofactors[internalId]; } public void setMongoDBId(Long id) { this.mongodbId = id; } public Long getMongoDBId() { return this.mongodbId; } private static final String[] raw_definiteCofactors = { // 0 Water: "InChI=1S/H2O/h1H2", // [H2o, H2O, h2O][water, Dihydrogen oxide, Water vapor, Distilled water, oxidane, Deionized water, Purified water, Water, purified, Dihydrogen Monoxide, DHMO, oxygen, OH-, monohydrate, aqua, hydrate, o-] // 1 ATP: "InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6+,7?,10-/m1/s1", // [L-ATP, D-ATP, araATP, alphaATP, adenosyl-ribose triphosphate, adenosine 5'-triphosphate, 5'-ATP, ATP, adenosine triphosphate][Adenosine triphosphate, Striadyne, Myotriphos, Triadenyl, Triphosphaden, Atriphos, Glucobasin, Adephos, Adetol, Triphosaden, AC1NSUB1, [[(2S,5S)-5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] phosphono hydrogen phosphate, Adenosine 5'-(tetrahydrogen triphosphate)] // 2 Acceptor: "InChI=1S/R", // [acceptor, oxidized adrenal ferredoxin, oxidized adrenodoxin][] // 3 AcceptorH2: "InChI=1S/RH2/h1H2", // [reduced adrenal ferredoxin, reduced adrenodoxin, acceptor-H2, acceptorH2][] // 4 ReducedAcceptor: "InChI=1S/RH3/h1H3", // [reduced acceptor, AH2, putidaredoxin, donor][] // 5 OxidizedFerredoxin: "InChI=1S/4RS.2Fe.2S/c4*1-2;;;;/q4*-1;2*+5;;", // [oxidized ferredoxin][] // 6 ReducedFerredoxin: "InChI=1S/4RS.2Fe.2S/c4*1-2;;;;/q4*-1;2*+4;;", // [reduced ferredoxin][] // 7 CO2: "InChI=1S/CO2/c2-1-3", // [carbon dioxide, carbon dioxide, carbonic acid gas] // 8 BicarbonateHCO3: "InChI=1S/CH2O3/c2-1(3)4/h(H2,2,3,4)/p-1", // [HCO3-, bicarbonate, bicarbonate] // 9 CoA "InChI=1S/C21H36N7O16P3S/c1-21(2,16(31)19(32)24-4-3-12(29)23-5-6-48)8-41-47(38,39)44-46(36,37)40-7-11-15(43-45(33,34)35)14(30)20(42-11)28-10-27-13-17(22)25-9-26-18(13)28/h9-11,14-16,20,30-31,48H,3-8H2,1-2H3,(H,23,29)(H,24,32)(H,36,37)(H,38,39)(H2,22,25,26)(H2,33,34,35)/t11-,14-,15-,16+,20-/m1/s1", // [coenzyme A, CoA-SH, CoASH] // 10 H "InChI=1S/p+1", // [H+/out, H+/in, H+out] // 11 NH3 "InChI=1S/H3N/h1H3", // Ammonia Gas // 12 HCl, Cl- "InChI=1S/ClH/h1H", // hydrochloric acid, hydrogen chloride, Muriatic acid // 13 Cl- "InChI=1S/ClH/h1H/p-1", // [Cl-/out, Cl-/in, chloride] // 14 O2 "InChI=1S/O2/c1-2", // oxygen molecule, Molecular oxygen, Dioxygen // 15 CTP "InChI=1S/C9H16N3O14P3/c10-5-1-2-12(9(15)11-5)8-7(14)6(13)4(24-8)3-23-28(19,20)26-29(21,22)25-27(16,17)18/h1-2,4,6-8,13-14H,3H2,(H,19,20)(H,21,22)(H2,10,11,15)(H2,16,17,18)/t4-,6-,7+,8-/m1/s1", // L-CTP, D-CTP, cytosine arabinoside 5'-triphosphate // 16 dATP "InChI=1S/C10H16N5O12P3/c11-9-8-10(13-3-12-9)15(4-14-8)7-1-5(16)6(25-7)2-24-29(20,21)27-30(22,23)26-28(17,18)19/h3-7,16H,1-2H2,(H,20,21)(H,22,23)(H2,11,12,13)(H2,17,18,19)/t5-,6+,7+/m0/s1", // deoxyATP, L-dATP, L-2'-dATP // 17 hydrogen sulfide "InChI=1S/H2S/h1H2", // hydrogensulfide, hydrogen sulfide, hydrogen sulfide // 18 dGTP "InChI=1S/C10H16N5O13P3/c11-10-13-8-7(9(17)14-10)12-3-15(8)6-1-4(16)5(26-6)2-25-30(21,22)28-31(23,24)27-29(18,19)20/h3-6,16H,1-2H2,(H,21,22)(H,23,24)(H2,18,19,20)(H3,11,13,14,17)/t4-,5+,6+/m0/s1", // 2'-dGTP, D-GTP, deoxyGTP // 19 Phosphoric acid "InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)", // phosphate/out, phosphate/in, Phosphoric acid // 20 Iodide ion "InChI=1S/HI/h1H/p-1", // [iodide, Iodide, Iodide ion] // 21 Molecular iodine "InChI=1S/I2/c1-2", // [Molecular iodine, Iodine solution, Tincture iodine] // 22 AMP "InChI=1S/C10H14N5O7P/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(22-10)1-21-23(18,19)20/h2-4,6-7,10,16-17H,1H2,(H2,11,12,13)(H2,18,19,20)/t4-,6-,7+,10-/m1/s1", // 5'AMP, arabinosyl adenine 5'-phosphate, arabinosyl adenine 5'-monophosphate // 23 3-phosphoadenylylsulfate "InChI=1S/C10H15N5O13P2S/c11-8-5-9(13-2-12-8)15(3-14-5)10-6(16)7(27-29(17,18)19)4(26-10)1-25-30(20,21)28-31(22,23)24/h2-4,6-7,10,16H,1H2,(H,20,21)(H2,11,12,13)(H2,17,18,19)(H,22,23,24)/t4-,6-,7-,10-/m1/s1", // [3'-phosphoadenylylsulfate, 3'-phosphoadenylyl 5'-phosphosulfate, 3-phosphoadenylylsulfate] // 24 Sulfur dioxide solution "InChI=1S/H2O3S/c1-4(2)3/h(H2,1,2,3)", // [Sulfurous acid, Sulphurous acid, Sulfur dioxide solution] // 25 adenylylsulfate "InChI=1S/C10H14N5O10PS/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-26(18,19)25-27(20,21)22/h2-4,6-7,10,16-17H,1H2,(H,18,19)(H2,11,12,13)(H,20,21,22)/t4-,6-,7-,10-/m1/s1", // adenosine 5-phosphosulfate, adenylylsulfate, adenosine 5'-phosphate 5'-sulfate // 26 GTP "InChI=1S/C10H16N5O14P3/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(27-9)1-26-31(22,23)29-32(24,25)28-30(19,20)21/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H,24,25)(H2,19,20,21)(H3,11,13,14,18)/t3-,5-,6-,9-/m1/s1", // guanosine 5'-triphosphate, GUANOSINE TRIPHOSPHATE, 5'-GTP // 27 NADPH "InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1", // NAD(P)H, 2'-NADPH, NADPH // 28 dADP "InChI=1S/C10H15N5O9P2/c11-9-8-10(13-3-12-9)15(4-14-8)7-1-5(16)6(23-7)2-22-26(20,21)24-25(17,18)19/h3-7,16H,1-2H2,(H,20,21)(H2,11,12,13)(H2,17,18,19)/t5-,6+,7+/m0/s1", // 2'-dADP, 2'-deoxy-ADP, deoxyADP // 29 NADP+ "InChI=1S/C21H28N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1-4,7-8,10-11,13-16,20-21,29-31H,5-6H2,(H7-,22,23,24,25,32,33,34,35,36,37,38,39)/p+1/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1", // NAD(P)+, beta-NADP+, 2'-NADP+ // 30 UMP "InChI=1S/C9H13N2O9P/c12-5-1-2-11(9(15)10-5)8-7(14)6(13)4(20-8)3-19-21(16,17)18/h1-2,4,6-8,13-14H,3H2,(H,10,12,15)(H2,16,17,18)/t4-,6+,7?,8-/m1/s1", // D-UMP, deazauridine 5'-phosphate, ara-UMP // 31 dCDP "InChI=1S/C9H15N3O10P2/c10-7-1-2-12(9(14)11-7)8-3-5(13)6(21-8)4-20-24(18,19)22-23(15,16)17/h1-2,5-6,8,13H,3-4H2,(H,18,19)(H2,10,11,14)(H2,15,16,17)/t5-,6+,8+/m0/s1", // L-dCDP, D-dCDP, 2'-deoxy-CDP // 32 ADP "InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7+,10-/m1/s1", // L-ADP, D-ADP, araADP // 33 ADP from metacyc "InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)/p-3", // ADP // 34 UDP from metacyc "InChI=1S/C9H14N2O12P2/c12-5-1-2-11(9(15)10-5)8-7(14)6(13)4(22-8)3-21-25(19,20)23-24(16,17)18/h1-2,4,6-8,13-14H,3H2,(H,19,20)(H,10,12,15)(H2,16,17,18)", // UDP }; private static String[] _definiteCofactors = convertToConsistent(raw_definiteCofactors, "Installed cofactors"); }; private static String[] convertToConsistent(String[] raw, String debug_tag) { String[] consistent = new String[raw.length]; for (int i = 0; i < raw.length; i++) { consistent[i] = ConsistentInChI.consistentInChI(raw[i], debug_tag); } return consistent; } public Set<Long> getNativeIDs() { List<Chemical> cofactorChemicals = getCofactorChemicals(); List<Chemical> nativeChemicals = getNativeMetaboliteChems(); Set<Long> ids = new HashSet<Long>(); for (Chemical c : cofactorChemicals) ids.add(c.getUuid()); for (Chemical c : nativeChemicals) ids.add(c.getUuid()); return ids; } public Chemical getChemicalFromSMILES(String smile) { return convertDBObjectToChemicalFromActData("SMILES", smile); } /** * Transform inchis into chemical ids. * @param inchis A list of inchis to transform. * @return The corresponding chemical ids. */ public Map<String, Long> getIdsFromInChIs(List<String> inchis) { Map<String, Long> results = new HashMap<>(); for (String inchi : inchis) { Chemical chemical = getChemicalFromInChI(inchi); if (chemical != null) { results.put(inchi, chemical.getUuid()); } } return results; } public Chemical getChemicalFromInChI(String inchi) { return convertDBObjectToChemicalFromActData("InChI", inchi); } public Chemical getChemicalFromInChIKey(String inchiKey) { return convertDBObjectToChemicalFromActData("InChIKey", inchiKey); } public Chemical getChemicalFromChemicalUUID(Long cuuid) { return convertDBObjectToChemicalFromActData("_id", cuuid); } public Chemical getChemicalFromCanonName(String chemName) { return convertDBObjectToChemicalFromActData("canonical", chemName); } /** * Retrieves a Chemical from its CHEBI ID. A ChEBI ID maps to at most one chemical in the installer database. * @param chebiId String representation of the ChEBI ID * @return Chemical object corresponding to this ChEBI ID if it exists, otherwise null. */ public Chemical getChemicalFromChebiId(String chebiId) { return convertDBObjectToChemicalFromActData("xref.CHEBI.dbid", chebiId); } public long getChemicalIDFromName(String chemName) { return getChemicalIDFromName(chemName, false); } public long getChemicalIDFromName(String chemName, boolean caseInsensitive) { BasicDBObject query = new BasicDBObject(); DBObject brenda = new BasicDBObject(); DBObject pubchem = new BasicDBObject(); DBObject synonyms = new BasicDBObject(); if (caseInsensitive) { String escapedName = Pattern.quote(chemName); Pattern regex = Pattern.compile("^" + escapedName + "$", Pattern.CASE_INSENSITIVE); brenda.put("names.brenda", regex); pubchem.put("names.pubchem.values", regex); synonyms.put("names.synonyms", regex); } else { brenda.put("names.brenda", chemName); pubchem.put("names.pubchem.values", chemName); synonyms.put("names.synonyms", chemName); } BasicDBList ors = new BasicDBList(); ors.add(brenda); ors.add(pubchem); ors.add(synonyms); query.put("$or", ors); Long id; DBObject o = this.dbChemicals.findOne(query); if (o != null) id = (Long) o.get("_id"); // checked: db type IS Long else id = -1L; return id; } public long getChemicalIDFromExactBrendaName(String chemName) { BasicDBObject query = new BasicDBObject(); query.put("names.brenda", chemName.toLowerCase()); Long id; DBObject o = this.dbChemicals.findOne(query); if (o != null) id = (Long) o.get("_id"); // checked: db type IS Long else id = -1L; return id; } public String getChemicalDBJSON(Long uuid) { BasicDBObject query = new BasicDBObject(); query.put("_id", uuid); DBObject o = this.dbChemicals.findOne(query); if (o == null) return null; Set<String> keys = o.keySet(); String json = "{\n"; for (String key : keys) { json += "\t" + key + " : " + o.get(key) + ",\n"; } json += "}"; return json; } public List<Chemical> getChemicalsThatHaveField(String field) { DBObject val = new BasicDBObject(); val.put("$exists", "true"); return constructAllChemicalsFromActData(field, val); } public List<Chemical> getDrugbankChemicals() { DBObject val = new BasicDBObject(); val.put("$ne", null); String field = "xref.DRUGBANK"; return constructAllChemicalsFromActData(field, val); } public List<Chemical> getSigmaChemicals() { DBObject val = new BasicDBObject(); val.put("$ne", null); String field = "xref.SIGMA"; return constructAllChemicalsFromActData(field, val); } public List<Chemical> constructAllChemicalsFromActData(String field, Object val) { return constructAllChemicalsFromActData(field, val, new BasicDBObject()); } public List<Chemical> constructAllChemicalsFromActData(String field, Object val, BasicDBObject keys) { DBCursor cur = constructCursorForMatchingChemicals(field, val, keys); List<Chemical> chems = new ArrayList<Chemical>(); while (cur.hasNext()) chems.add(convertDBObjectToChemical(cur.next())); cur.close(); return chems; } public DBIterator getIdCursorForFakeChemicals() { DBObject fakeRegex = new BasicDBObject(); DBObject abstractInchi = new BasicDBObject(); fakeRegex.put(ChemicalKeywords.INCHI$.MODULE$.toString(), new BasicDBObject(MongoKeywords.REGEX$.MODULE$.toString(), "^InChI=/FAKE")); abstractInchi.put(ChemicalKeywords.INCHI$.MODULE$.toString(), new BasicDBObject(MongoKeywords.REGEX$.MODULE$.toString(), "^InChI=.*R.*")); BasicDBList conditionList = new BasicDBList(); conditionList.add(fakeRegex); conditionList.add(abstractInchi); BasicDBObject conditions = new BasicDBObject(MongoKeywords.OR$.MODULE$.toString(), conditionList); return getIteratorOverChemicals(conditions, new BasicDBObject(ChemicalKeywords.ID$.MODULE$.toString(), true)); } private DBCursor constructCursorForAllChemicals() { return constructCursorForMatchingChemicals(null, null, null); } private static final BasicDBObject DEFAULT_CURSOR_ORDER_BY_ID = new BasicDBObject("$query", new BasicDBObject()) .append("$orderby", new BasicDBObject("_id", 1)); private DBCursor constructCursorForMatchingChemicals(String field, Object val, BasicDBObject keys) { DBCursor cur; if (field != null) { BasicDBObject query; query = new BasicDBObject(); query.put(field, val); if (keys == null) { cur = this.dbChemicals.find(query); } else { cur = this.dbChemicals.find(query, keys); } } else if (keys != null) { cur = this.dbChemicals.find(new BasicDBObject(), keys); } else { /* Ensure a default ordering when iterating over a whole collection. * This helps maintain result stability and should have minimal performance cost since we're iterating over * the primary keys in their natural order. */ cur = this.dbChemicals.find(DEFAULT_CURSOR_ORDER_BY_ID); } return cur; } private DBCursor constructCursorForAllCofactors() { return this.dbCofactors.find(); } public Map<String, Long> constructAllInChIs() { Map<String, Long> chems = new HashMap<String, Long>(); BasicDBObject keys = new BasicDBObject(); keys.append("_id", true); keys.append("InChI", true); DBCursor cur = constructCursorForMatchingChemicals(null, null, keys); while (cur.hasNext()) { DBObject o = cur.next(); long uuid = (Long) o.get("_id"); // checked: db type IS long String inchi = (String) o.get("InChI"); chems.put(inchi, uuid); } cur.close(); return chems; } public void smartsMatchAllChemicals(String target) { Indigo indigo = new Indigo(); IndigoInchi inchi = new IndigoInchi(indigo); IndigoObject query = indigo.loadSmarts(target); query.optimize(); DBCursor cur = constructCursorForAllChemicals(); IndigoObject mol = null, matcher; int cnt; while (cur.hasNext()) { Chemical c = convertDBObjectToChemical(cur.next()); try { mol = inchi.loadMolecule(c.getInChI()); } catch (IndigoException e) { if (e.getMessage().startsWith("core: Indigo-InChI: InChI loading failed:")) continue; // could not load } matcher = indigo.substructureMatcher(mol); if ((cnt = matcher.countMatches(query)) > 0) { // matches.add(c); memout's System.out.format("%d\t%s\n", c.getUuid(), c.getInChI()); } } cur.close(); } private Chemical convertDBObjectToChemicalFromActData(String field, Object val) { BasicDBObject query = new BasicDBObject(); query.put(field, val); // project out the synonyms field, even though we don't have anything in it right now. BasicDBObject keys = new BasicDBObject(); // keys.put("names", 0); // 0 means exclude, rest are included DBObject o = this.dbChemicals.findOne(query, keys); if (o == null) return null; return convertDBObjectToChemical(o); } public Chemical convertDBObjectToChemical(DBObject o) { long uuid; // WTF!? Are some chemicals ids int and some long? // this code below should not be needed, unless our db is mucked up try { uuid = (Long) o.get("_id"); // checked: db type IS long } catch (ClassCastException e) { System.err.println("WARNING: MongoDB.convertDBObjectToChemical ClassCast db.chemicals.id is not Long?"); uuid = ((Integer) o.get("_id")).longValue(); // this should be dead code } String chemName = (String) o.get("canonical"); DBObject xrefs = (DBObject) o.get("xref"); Long pcid = null; try { pcid = (Long) (xrefs.get("pubchem")); } catch (Exception e) { } if (pcid == null) { pcid = (long) -1; } String inchi = (String) o.get("InChI"); String inchiKey = (String) o.get("InChIKey"); String smiles = (String) o.get("SMILES"); Chemical c = new Chemical(uuid, pcid, chemName, smiles); c.setInchi(inchi); c.setCanon((String) o.get("canonical")); try { for (String typ : xrefs.keySet()) { if (typ.equals("pubchem")) continue; c.putRef(Chemical.REFS.valueOf(typ), MongoDBToJSON.conv((DBObject) xrefs.get(typ))); } } catch (Exception e) { } if (o.get("derived_data") != null) { BasicDBList matchedRos = (BasicDBList) ((DBObject) o.get("derived_data")).get("matched_ros"); if (matchedRos != null) { for (Object roId : matchedRos) { c.addSubstructureRoId((Integer) roId); } } } BasicDBList names = (BasicDBList) ((DBObject) o.get("names")).get("brenda"); if (names != null) { for (Object n : names) { c.addBrendaNames((String) n); } } if (names != null) { names = (BasicDBList) ((DBObject) o.get("names")).get("synonyms"); for (Object n : names) { c.addSynonym((String) n); } } if (names != null) { names = (BasicDBList) ((DBObject) o.get("names")).get("pubchem"); for (Object n : names) { String typ = (String) ((DBObject) n).get("type"); BasicDBList pnames = (BasicDBList) ((DBObject) n).get("values"); List<String> s = new ArrayList<String>(); for (Object os : pnames) s.add((String) os); c.addNames(typ, s.toArray(new String[0])); } } if ((Boolean) o.get("isCofactor")) c.setAsCofactor(); if ((Boolean) o.get("isNative")) c.setAsNative(); if ((Double) o.get("estimateEnergy") != null) c.setEstimatedEnergy((Double) o.get("estimateEnergy")); BasicDBList keywords = (BasicDBList) o.get("keywords"); if (keywords != null) for (Object k : keywords) c.addKeyword((String) k); BasicDBList cikeywords = (BasicDBList) o.get("keywords_case_insensitive"); if (cikeywords != null) for (Object k : cikeywords) c.addCaseInsensitiveKeyword((String) k); BasicDBList vendors = (BasicDBList) o.get("vendors"); Integer num_vendors = (Integer) o.get("num_vendors"); Integer chemspiderid = (Integer) o.get("csid"); c.setChemSpiderVendorXrefs(vendors == null ? null : MongoDBToJSON.conv(vendors)); c.setChemSpiderNumUniqueVendors(num_vendors); c.setChemSpiderID(chemspiderid); /** * Shortest name is most useful so just use that. */ //TODO: what are we doing with shortest name here? String shortestName = c.getCanon(); for (String name : c.getBrendaNames()) { if (shortestName == null || name.length() < shortestName.length()) shortestName = name; } for (String name : c.getSynonyms()) { if (shortestName == null || name.length() < shortestName.length()) shortestName = name; } return c; } public DBIterator getDbIteratorOverSeq() { DBCursor cursor = this.dbSeq.find(); return new DBIterator(cursor); } public DBIterator getDbIteratorOverSeq(BasicDBObject matchCriterion, BasicDBObject keys) { if (keys == null) { keys = new BasicDBObject(); } DBCursor cursor = this.dbSeq.find(matchCriterion, keys); cursor = cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); return new DBIterator(cursor); } public Seq getNextSeq(DBIterator iterator) { if (!iterator.hasNext()) { iterator.close(); return null; } DBObject o = iterator.next(); return convertDBObjectToSeq(o); } public DBIterator getIteratorOverWaterfalls() { DBCursor cursor = this.dbWaterfalls.find(); return new DBIterator(cursor); } public DBObject getNextWaterfall(DBIterator iterator) { if (!iterator.hasNext()) { iterator.close(); return null; } DBObject o = iterator.next(); return convertDBObjectToWaterfall(o); } public DBIterator getIteratorOverCascades() { DBCursor cursor = this.dbCascades.find(); return new DBIterator(cursor); } public DBObject getNextCascade(DBIterator iterator) { if (!iterator.hasNext()) { iterator.close(); return null; } DBObject o = iterator.next(); return convertDBObjectToCascade(o); } public DBIterator getIteratorOverChemicals() { DBCursor cursor = constructCursorForAllChemicals(); return new DBIterator(cursor); } public DBIterator getIteratorOverChemicals(BasicDBObject matchCriterion, BasicDBObject keys) { if (keys == null) { keys = new BasicDBObject(); } DBCursor cursor = this.dbChemicals.find(matchCriterion, keys); cursor = cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); return new DBIterator(cursor); // DBIterator is just a wrapper class } public Iterator<Chemical> getJavaIteratorOverChemicals(BasicDBObject matchCriterion) { final DBIterator iter = getIteratorOverChemicals(matchCriterion, null); return new Iterator<Chemical>() { @Override public boolean hasNext() { boolean hasNext = iter.hasNext(); if (!hasNext) iter.close(); return hasNext; } @Override public Chemical next() { DBObject o = iter.next(); return convertDBObjectToChemical(o); } }; } public Iterator<String> getIteratorOverInchis(BasicDBObject matchCriterion) { BasicDBObject keys = new BasicDBObject(ChemicalKeywords.INCHI$.MODULE$.toString(), true); final DBIterator iter = getIteratorOverChemicals(matchCriterion, keys); return new Iterator<String>() { @Override public boolean hasNext() { boolean hasNext = iter.hasNext(); if (!hasNext) iter.close(); return hasNext; } @Override public String next() { DBObject o = iter.next(); return (String) o.get("InChI"); } }; } public Iterator<Chemical> getChemicalsbyIds(List<Long> ids, boolean notimeout) { BasicDBList queryList = new BasicDBList(); for (Long id : ids) { queryList.add(new BasicDBObject(ChemicalKeywords.ID$.MODULE$.toString(), id)); } return getJavaIteratorOverChemicals(new BasicDBObject(MongoKeywords.OR$.MODULE$.toString(), queryList)); } public DBIterator getIteratorOverReactions() { return getIteratorOverReactions(DEFAULT_CURSOR_ORDER_BY_ID, null); } private DBIterator getIteratorOverReactions(Long low, Long high) { return getIteratorOverReactions(getRangeUUIDRestriction(low, high), null); } public DBIterator getIteratorOverReactions(BasicDBObject matchCriterion, BasicDBObject keys) { if (keys == null) { keys = new BasicDBObject(); } DBCursor cursor = this.dbReactions.find(matchCriterion, keys); cursor = cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); return new DBIterator(cursor); // DBIterator is just a wrapper classs } public Reaction getNextReaction(DBIterator iterator) { if (!iterator.hasNext()) { iterator.close(); return null; } DBObject o = iterator.next(); return convertDBObjectToReaction(o); } public Chemical getNextChemical(DBIterator iterator) { if (!iterator.hasNext()) { iterator.close(); return null; } DBObject o = iterator.next(); return convertDBObjectToChemical(o); } public Cofactor getNextCofactor(DBIterator iterator) { if (!iterator.hasNext()) { iterator.close(); return null; } DBObject o = iterator.next(); return convertDBObjectToCofactor(o); } public Organism getNextOrganism(DBIterator iterator) { if (!iterator.hasNext()) { iterator.close(); return null; } DBObject o = iterator.next(); return convertDBObjectToOrg(o); } public DBIterator getIteratorOverCofactors() { DBCursor cursor = constructCursorForAllCofactors(); return new DBIterator(cursor); } public Reaction convertDBObjectToReaction(DBObject o) { long uuid = (Integer) o.get("_id"); // checked: db type IS int String ecnum = (String) o.get("ecnum"); String name_field = (String) o.get("easy_desc"); Reaction.RxnDetailType type = Reaction.RxnDetailType.valueOf((String) o.get("is_abstract")); BasicDBList substrates = (BasicDBList) ((DBObject) o.get("enz_summary")).get("substrates"); BasicDBList products = (BasicDBList) ((DBObject) o.get("enz_summary")).get("products"); BasicDBList substrateCofactors = (BasicDBList) ((DBObject) o.get("enz_summary")).get("substrate_cofactors"); BasicDBList productCofactors = (BasicDBList) ((DBObject) o.get("enz_summary")).get("product_cofactors"); BasicDBList coenzymes = (BasicDBList) ((DBObject) o.get("enz_summary")).get("coenzymes"); BasicDBList refs = (BasicDBList) (o.get("references")); BasicDBList proteins = (BasicDBList) (o.get("proteins")); DBObject mechanisticValidatorResults = (DBObject) (o.get("mechanistic_validator_result")); BasicDBList keywords = (BasicDBList) (o.get("keywords")); BasicDBList cikeywords = (BasicDBList) (o.get("keywords_case_insensitive")); List<Long> substr = new ArrayList<Long>(); List<Long> prod = new ArrayList<Long>(); List<Long> substrCofact = new ArrayList<Long>(); List<Long> prodCofact = new ArrayList<Long>(); List<Long> coenz = new ArrayList<Long>(); String conversionDirectionString = (String) o.get("conversion_direction"); ConversionDirectionType conversionDirection = conversionDirectionString == null ? null : ConversionDirectionType.valueOf(conversionDirectionString); String pathwayStepDirectionString = (String) o.get("pathway_step_direction"); StepDirection pathwayStepDirection = pathwayStepDirectionString == null ? null : StepDirection.valueOf(pathwayStepDirectionString); for (int i = 0; i < substrates.size(); i++) { Boolean forBalance = (Boolean) ((DBObject) substrates.get(i)).get("balance"); if (forBalance != null && forBalance) continue; substr.add(getEnzSummaryIDAsLong(substrates, i)); } for (int i = 0; i < products.size(); i++) { Boolean forBalance = (Boolean) ((DBObject) products.get(i)).get("balance"); if (forBalance != null && forBalance) continue; prod.add(getEnzSummaryIDAsLong(products, i)); } for (int i = 0; i < substrateCofactors.size(); i++) { substrCofact.add(getEnzSummaryIDAsLong(substrateCofactors, i)); } for (int i = 0; i < productCofactors.size(); i++) { prodCofact.add(getEnzSummaryIDAsLong(productCofactors, i)); } for (int i = 0; i < coenzymes.size(); i++) { coenz.add(getEnzSummaryIDAsLong(coenzymes, i)); } Reaction result = new Reaction(uuid, (Long[]) substr.toArray(new Long[0]), (Long[]) prod.toArray(new Long[0]), (Long[]) substrCofact.toArray(new Long[0]), (Long[]) prodCofact.toArray(new Long[0]), (Long[]) coenz.toArray(new Long[0]), ecnum, conversionDirection, pathwayStepDirection, name_field, type); for (int i = 0; i < substrates.size(); i++) { Integer c = (Integer) ((DBObject) substrates.get(i)).get("coefficient"); if (c != null) result.setSubstrateCoefficient(getEnzSummaryIDAsLong(substrates, i), c); } for (int i = 0; i < products.size(); i++) { Integer c = (Integer) ((DBObject) products.get(i)).get("coefficient"); if (c != null) result.setProductCoefficient(getEnzSummaryIDAsLong(products, i), c); } Double estimatedEnergy = (Double) o.get("estimateEnergy"); result.setEstimatedEnergy(estimatedEnergy); String datasrc = (String) o.get("datasource"); if (datasrc != null && !datasrc.equals("")) result.setDataSource(Reaction.RxnDataSource.valueOf(datasrc)); if (mechanisticValidatorResults != null) { result.setMechanisticValidatorResult(MongoDBToJSON.conv(mechanisticValidatorResults)); } if (refs != null) { for (Object oo : refs) { DBObject ref = (DBObject) oo; Reaction.RefDataSource src = Reaction.RefDataSource.valueOf((String) ref.get("src")); String val = (String) ref.get("val"); result.addReference(src, val); } } if (proteins != null) { for (Object oo : proteins) { result.addProteinData(MongoDBToJSON.conv((DBObject) oo)); } } if (keywords != null) for (Object k : keywords) result.addKeyword((String) k); if (cikeywords != null) for (Object k : cikeywords) result.addCaseInsensitiveKeyword((String) k); return result; } private Long getEnzSummaryIDAsLong(BasicDBList reactant, int i) { try { return (Long) ((DBObject) reactant.get(i)).get("pubchem"); } catch (ClassCastException e) { return ((Integer) ((DBObject) reactant.get(i)).get("pubchem")).longValue(); } } public Set<Reaction> getReactionsConstrained(Map<String, Object> equalityCriteria) { BasicDBList andList = new BasicDBList(); for (String k : equalityCriteria.keySet()) { BasicDBObject query = new BasicDBObject(); query.put(k, equalityCriteria.get(k)); andList.add(query); } BasicDBObject query = new BasicDBObject(); query.put("$and", andList); DBCursor cur = this.dbReactions.find(query); Set<Reaction> results = new HashSet<Reaction>(); while (cur.hasNext()) { results.add(convertDBObjectToReaction(cur.next())); } return results; } public List<Chemical> keywordInChemicals(String keyword) { return keywordInChemicals("keywords", keyword); } public List<Chemical> keywordInChemicalsCaseInsensitive(String keyword) { return keywordInChemicals("keywords_case_insensitive", keyword); } private List<Chemical> keywordInChemicals(String in_field, String keyword) { List<Chemical> chemicals = new ArrayList<Chemical>(); DBCursor cur = constructCursorForMatchingChemicals(in_field, keyword, null); while (cur.hasNext()) { DBObject o = cur.next(); chemicals.add(convertDBObjectToChemical(o)); } cur.close(); return chemicals; } public List<Seq> keywordInSequence(String keyword) { return keywordInSequence("keywords", keyword); } public List<Seq> keywordInSequenceCaseInsensitive(String keyword) { return keywordInSequence("keywords_case_insensitive", keyword); } private List<Seq> keywordInSequence(String in_field, String keyword) { List<Seq> seqs = new ArrayList<Seq>(); BasicDBObject query = new BasicDBObject(); query.put(in_field, keyword); BasicDBObject keys = new BasicDBObject(); DBCursor cur = this.dbSeq.find(query, keys); while (cur.hasNext()) { DBObject o = cur.next(); seqs.add(convertDBObjectToSeq(o)); } cur.close(); return seqs; } public List<DBObject> keywordInCascade(String keyword) { return keywordInCascade("keywords", keyword); } public List<DBObject> keywordInCascadeCaseInsensitive(String keyword) { return keywordInCascade("keywords_case_insensitive", keyword); } private List<DBObject> keywordInCascade(String in_field, String keyword) { List<DBObject> cascades = new ArrayList<DBObject>(); BasicDBObject query = new BasicDBObject(); query.put(in_field, keyword); BasicDBObject keys = new BasicDBObject(); DBCursor cur = this.dbCascades.find(query, keys); while (cur.hasNext()) { DBObject o = cur.next(); cascades.add(convertDBObjectToCascade(o)); } cur.close(); return cascades; } DBObject convertDBObjectToCascade(DBObject o) { // TODO: later on, we will have a cascade object that is // more descriptive object of cascades rather than just a DBObject return o; } public List<DBObject> keywordInWaterfall(String keyword) { return keywordInWaterfall("keywords", keyword); } public List<DBObject> keywordInWaterfallCaseInsensitive(String keyword) { return keywordInWaterfall("keywords_case_insensitive", keyword); } private List<DBObject> keywordInWaterfall(String in_field, String keyword) { List<DBObject> waterfalls = new ArrayList<DBObject>(); BasicDBObject query = new BasicDBObject(); query.put(in_field, keyword); BasicDBObject keys = new BasicDBObject(); DBCursor cur = this.dbWaterfalls.find(query, keys); while (cur.hasNext()) { DBObject o = cur.next(); waterfalls.add(convertDBObjectToWaterfall(o)); } cur.close(); return waterfalls; } DBObject convertDBObjectToWaterfall(DBObject o) { // TODO: later on, we will have a waterfall object that is // more descriptive object of cascades rather than just a DBObject return o; } public List<Reaction> keywordInReaction(String keyword) { return keywordInReaction("keywords", keyword); } public List<Reaction> keywordInReactionCaseInsensitive(String keyword) { return keywordInReaction("keywords_case_insensitive", keyword); } private List<Reaction> keywordInReaction(String in_field, String keyword) { List<Reaction> rxns = new ArrayList<Reaction>(); BasicDBObject query = new BasicDBObject(); query.put(in_field, keyword); BasicDBObject keys = new BasicDBObject(); DBCursor cur = this.dbReactions.find(query, keys); while (cur.hasNext()) { DBObject o = cur.next(); rxns.add(convertDBObjectToReaction(o)); } cur.close(); return rxns; } public Cofactor getCofactorFromUUID(Long cofactorUUID) { return getCofactorFromDB("_id", cofactorUUID); } public Cofactor getCofactorFromInChI(String inchi) { return getCofactorFromDB("InChI", inchi); } private Cofactor getCofactorFromDB(String field, Object val) { BasicDBObject query = new BasicDBObject(); query.put(field, val); BasicDBObject keys = new BasicDBObject(); DBObject o = this.dbCofactors.findOne(query, keys); if (o == null) return null; return convertDBObjectToCofactor(o); } public Cofactor convertDBObjectToCofactor(DBObject o) { long uuid = (Long) o.get("_id"); String inchi = (String) o.get("InChI"); BasicDBList ns = (BasicDBList) o.get("names"); List<String> names = new ArrayList<>(); if (ns != null) { for (Object n : ns) { names.add((String) n); } } Cofactor cofactor = new Cofactor(uuid, inchi, names); return cofactor; } public Reaction getReactionFromUUID(Long reactionUUID) { if (reactionUUID < 0) { throw new RuntimeException(String.format( "getReactionFromUUID called with a negaive number (%d). It used to reverse the reaction.", reactionUUID)); } BasicDBObject query = new BasicDBObject(); query.put("_id", reactionUUID); BasicDBObject keys = new BasicDBObject(); DBObject o = this.dbReactions.findOne(query, keys); if (o == null) return null; return convertDBObjectToReaction(o); } public Iterator<Reaction> getReactionsIteratorById(List<Long> ids, boolean notimeout) { BasicDBList reactionList = new BasicDBList(); for (Long id : ids) { reactionList.add(new BasicDBObject(ChemicalKeywords.ID$.MODULE$.toString(), id)); } BasicDBObject query = new BasicDBObject(MongoKeywords.OR$.MODULE$.toString(), reactionList); final DBIterator iter = getIteratorOverReactions(query, null); return new Iterator<Reaction>() { @Override public boolean hasNext() { boolean hasNext = iter.hasNext(); if (!hasNext) iter.close(); return hasNext; } @Override public Reaction next() { DBObject o = iter.next(); return convertDBObjectToReaction(o); } }; } public Iterator<Reaction> getReactionsIterator() { final DBIterator iter = getIteratorOverReactions(); return new Iterator<Reaction>() { @Override public boolean hasNext() { boolean hasNext = iter.hasNext(); if (!hasNext) iter.close(); return hasNext; } @Override public Reaction next() { DBObject o = iter.next(); return convertDBObjectToReaction(o); } }; } public Iterator<Seq> getSeqIterator(BasicDBObject query) { final DBIterator iter = getDbIteratorOverSeq(query, new BasicDBObject()); return new Iterator<Seq>() { @Override public boolean hasNext() { boolean hasNext = iter.hasNext(); if (!hasNext) iter.close(); return hasNext; } @Override public Seq next() { DBObject o = iter.next(); return convertDBObjectToSeq(o); } }; } public BasicDBObject getRangeUUIDRestriction(Long lowUUID, Long highUUID) { BasicDBObject restrictTo = new BasicDBObject(); // need to encode { "_id" : { $gte : lowUUID, $lte : highUUID } } BasicDBObject range = new BasicDBObject(); if (lowUUID != null) range.put("$gte", lowUUID); if (highUUID != null) range.put("$lte", highUUID); restrictTo.put("_id", range); return restrictTo; } public List<Long> getAllReactionUUIDs() { return getAllCollectionUUIDs(this.dbReactions); } public List<Long> getAllSeqUUIDs() { return getAllCollectionUUIDs(this.dbSeq); } public List<Long> getAllCollectionUUIDs(DBCollection collection) { List<Long> ids = new ArrayList<Long>(); BasicDBObject query = new BasicDBObject(); BasicDBObject keys = new BasicDBObject(); keys.put("_id", 1); // 0 means exclude, rest are included DBCursor cur = collection.find(query, keys); while (cur.hasNext()) { DBObject o = cur.next(); long uuid = (Integer) o.get("_id"); // checked: db type IS int ids.add(uuid); } cur.close(); return ids; } public Seq getSeqFromID(Long seqID) { BasicDBObject query = new BasicDBObject(); query.put("_id", seqID); BasicDBObject keys = new BasicDBObject(); DBObject o = this.dbSeq.findOne(query, keys); if (o == null) return null; return convertDBObjectToSeq(o); } public Seq getSeqFromSequence(String seq) { DBObject o = this.dbSeq.findOne(new BasicDBObject("seq", seq), new BasicDBObject()); if (o == null) return null; return convertDBObjectToSeq(o); } public List<Seq> getSeqFromSeqEcOrg(String seq, String ec, String organism) { List<Seq> seqs = new ArrayList<>(); BasicDBObject query = new BasicDBObject(); query.put("seq", seq); query.put("ecnum", ec); query.put("org", organism); DBCursor cur = this.dbSeq.find(query, new BasicDBObject()); try { while (cur.hasNext()) { DBObject o = cur.next(); seqs.add(convertDBObjectToSeq(o)); } } finally { if (cur != null) { cur.close(); } } return seqs; } public List<Seq> getSeqFromGenbankProtAccession(String accession) { List<Seq> seqs = new ArrayList<>(); BasicDBObject query = new BasicDBObject(); query.put("metadata.accession.genbank_protein", new BasicDBObject("$elemMatch", new BasicDBObject("$eq", accession))); DBCursor cur = this.dbSeq.find(query, new BasicDBObject()); try { while (cur.hasNext()) { DBObject o = cur.next(); seqs.add(convertDBObjectToSeq(o)); } } finally { if (cur != null) { cur.close(); } } return seqs; } public List<Seq> getSeqFromGenbankNucAccessionSeq(String accession, String seq) { List<Seq> seqs = new ArrayList<>(); BasicDBObject query = new BasicDBObject(); query.put("seq", seq); query.put("metadata.accession.genbank_nucleotide", new BasicDBObject("$elemMatch", new BasicDBObject("$eq", accession))); DBCursor cur = this.dbSeq.find(query, new BasicDBObject()); try { while (cur.hasNext()) { DBObject o = cur.next(); seqs.add(convertDBObjectToSeq(o)); } } finally { if (cur != null) { cur.close(); } } return seqs; } public List<Seq> getSeqWithSARConstraints() { List<Seq> seqs = new ArrayList<Seq>(); BasicDBObject query = new BasicDBObject(); query.put("sar_constraints", new BasicDBObject("$exists", true)); BasicDBObject keys = new BasicDBObject(); DBCursor cur = this.dbSeq.find(query, keys); while (cur.hasNext()) { DBObject o = cur.next(); seqs.add(convertDBObjectToSeq(o)); } cur.close(); return seqs; } public List<Seq> getSeqWithRxnRef(Long rxnId) { List<Seq> seqs = new ArrayList<>(); BasicDBObject query = new BasicDBObject(); query.put("rxn_refs", rxnId); DBCursor cur = this.dbSeq.find(query, new BasicDBObject()); try { while (cur.hasNext()) { DBObject o = cur.next(); seqs.add(convertDBObjectToSeq(o)); } } finally { if (cur != null) { cur.close(); } } return seqs; } public Iterator<Seq> getSeqIterator() { final DBIterator iter = getDbIteratorOverSeq(); return new Iterator<Seq>() { @Override public boolean hasNext() { boolean hasNext = iter.hasNext(); if (!hasNext) iter.close(); return hasNext; } @Override public Seq next() { DBObject o = iter.next(); return convertDBObjectToSeq(o); } }; } public Seq convertDBObjectToSeq(DBObject o) { long id = (Integer) o.get("_id"); // checked: db type IS int String ecnum = (String) o.get("ecnum"); String org_name = (String) o.get("org"); Long org_id = (Long) o.get("org_id"); String aa_seq = (String) o.get("seq"); String srcdb = (String) o.get("src"); BasicDBList refs = (BasicDBList) o.get("references"); DBObject meta = (DBObject) o.get("metadata"); BasicDBList rxn_refs = (BasicDBList) (o.get("rxn_refs")); if (srcdb == null) srcdb = Seq.AccDB.swissprot.name(); Seq.AccDB src = Seq.AccDB.valueOf(srcdb); // genbank | uniprot | trembl | embl | swissprot List<JSONObject> references = new ArrayList<>(); if (refs != null) for (Object r : refs) references.add(MongoDBToJSON.conv((DBObject) r)); String dummyString = ""; // for type differentiation in overloaded method Long dummyLong = 0L; // for type differentiation in overloaded method Set<Long> rxns_catalyzed = from_dblist(rxn_refs, dummyLong); return Seq.rawInit(id, ecnum, org_id, org_name, aa_seq, references, meta, src, // the rest of the params are the ones that are typically // "constructed". But since we are reading from the DB, we manually init rxns_catalyzed); } public Organism convertDBObjectToOrg(DBObject o) { Long id = (long) o.get("org_id"); String name = (String) o.get("name"); return new Organism(id, name); } public DBIterator getDbIteratorOverOrgs() { DBCursor cursor = this.dbOrganismNames.find(); return new DBIterator(cursor); } public DBIterator getDbIteratorOverOrgs(BasicDBObject matchCriterion, BasicDBObject keys) { if (keys == null) { keys = new BasicDBObject(); } DBCursor cursor = this.dbOrganismNames.find(matchCriterion, keys); cursor = cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT); return new DBIterator(cursor); } public String getOrganismNameFromId(Long id) { BasicDBObject query = new BasicDBObject(); query.put("org_id", id); BasicDBObject keys = new BasicDBObject(); keys.put("name", 1); if (this.dbOrganismNames != null) { DBObject cur = this.dbOrganismNames.findOne(query, keys); if (cur == null) { //System.out.println("Did not find in organismnames: " + name); return null; } return (String) cur.get("name"); } else { //System.out.println("no organism names collection"); } return null; } public long getOrganismId(String name) { BasicDBObject query = new BasicDBObject(); query.put("name", name); BasicDBObject keys = new BasicDBObject(); keys.put("org_id", 1); if (this.dbOrganismNames != null) { DBObject cur = this.dbOrganismNames.findOne(query, keys); if (cur == null) { //System.out.println("Did not find in organismnames: " + name); return -1; } return (Long) cur.get("org_id"); // checked: db type IS long } else { //System.out.println("no organism names collection"); } return -1; } /* * Returns set of all organism ids involved in reactions */ public Set<Long> getOrganismIDs() { DBIterator iterator = getIteratorOverReactions(new BasicDBObject(), null); Set<Long> ids = new HashSet<Long>(); while (iterator.hasNext()) { DBObject r = iterator.next(); BasicDBList orgs = (BasicDBList) r.get("organisms"); for (Object o : orgs) { ids.add((Long) ((DBObject) o).get("id")); // checked: db type IS Long } } return ids; } public Set<Long> getOrganismIDs(Long reactionID) { if (reactionID < 0) { reactionID = Reaction.reverseID(reactionID); } DBObject query = new BasicDBObject(); query.put("_id", reactionID); Set<Long> ids = new HashSet<Long>(); DBObject reaction = this.dbReactions.findOne(query); if (reaction != null) { BasicDBList orgs = (BasicDBList) reaction.get("organisms"); for (Object o : orgs) { ids.add((Long) ((DBObject) o).get("id")); // checked: db type IS long } } return ids; } public List<P<Reaction.RefDataSource, String>> getReferences(Long reactionID) { if (reactionID < 0) { reactionID = Reaction.reverseID(reactionID); } DBObject query = new BasicDBObject(); query.put("_id", reactionID); List<P<Reaction.RefDataSource, String>> refs = new ArrayList<>(); DBObject reaction = this.dbReactions.findOne(query); if (reaction != null) { BasicDBList dbrefs = (BasicDBList) reaction.get("references"); if (dbrefs != null) for (Object oo : dbrefs) { DBObject ref = (DBObject) oo; Reaction.RefDataSource src = Reaction.RefDataSource.valueOf((String) ref.get("src")); String val = (String) ref.get("val"); refs.add(new P<Reaction.RefDataSource, String>(src, val)); } } return refs; } public Set<String> getKMValues(Long reactionID) { DBObject query = new BasicDBObject(); query.put("_id", reactionID); Set<String> kmSet = new HashSet<String>(); DBObject reaction = this.dbReactions.findOne(query); if (reaction != null) { BasicDBList kms = (BasicDBList) reaction.get("km_values"); if (kms != null) { for (Object km : kms) { kmSet.add((String) km); } } } return kmSet; } public Set<String> getTurnoverNumbers(Long reactionID) { DBObject query = new BasicDBObject(); query.put("_id", reactionID); Set<String> turnoverSet = new HashSet<String>(); DBObject reaction = this.dbReactions.findOne(query); if (reaction != null) { BasicDBList turnovers = (BasicDBList) reaction.get("turnover_numbers"); if (turnovers != null) { for (Object turnover : turnovers) { turnoverSet.add((String) turnover); } } } return turnoverSet; } private void createCofactorsIndex(String field) { createCofactorsIndex(field, false); // create normal/non-hashed index } private void createCofactorsIndex(String field, boolean hashedIndex) { if (hashedIndex) { this.dbCofactors.createIndex(new BasicDBObject(field, "hashed")); } else { this.dbCofactors.createIndex(new BasicDBObject(field, 1)); } } private void createChemicalsIndex(String field) { createChemicalsIndex(field, false); // create normal/non-hashed index } private void createChemicalsIndex(String field, boolean hashedIndex) { if (hashedIndex) { this.dbChemicals.createIndex(new BasicDBObject(field, "hashed")); } else { this.dbChemicals.createIndex(new BasicDBObject(field, 1)); } } private void createSeqIndex(String field, boolean hashedIndex) { if (hashedIndex) { this.dbSeq.createIndex(new BasicDBObject(field, "hashed")); } else { this.dbSeq.createIndex(new BasicDBObject(field, 1)); } } private void createOrganismNamesIndex(String field) { this.dbOrganismNames.createIndex(new BasicDBObject(field, 1)); } public int submitToActSeqDB(Seq.AccDB src, String ec, String org, Long org_id, String seq, List<JSONObject> references, Set<Long> rxns, DBObject meta) { BasicDBObject doc = new BasicDBObject(); int id = new Long(this.dbSeq.count()).intValue(); doc.put("_id", id); doc.put("src", src.name()); // genbank, uniprot, swissprot, trembl, embl doc.put("ecnum", ec); doc.put("org", org); doc.put("org_id", org_id); // this is the NCBI Taxonomy id, should correlate with db.organismnames{org_id} and db.organisms.{id} doc.put("seq", seq); BasicDBList refs = new BasicDBList(); for (JSONObject ref : references) { refs.add(MongoDBToJSON.conv(ref)); } doc.put("references", refs); doc.put("metadata", meta); // the metadata contains the uniprot acc#, name, uniprot catalytic activity, Object accession = meta.get("accession"); doc.put("rxn_refs", to_dblist(rxns)); this.dbSeq.insert(doc); if (org != null && seq != null) System.out.format("Inserted %s = [%s, %s] = %s %s\n", accession, ec, org.substring(0, Math.min(10, org.length())), seq.substring(0, Math.min(20, seq.length())), refs); return id; } <X> BasicDBList to_dblist(Set<X> set) { BasicDBList dblist = new BasicDBList(); if (set != null) dblist.addAll(set); return dblist; } <X> Set<X> from_dblist(BasicDBList dblist, X dummy) { Set<X> set = new HashSet<X>(); if (dblist != null) for (Object o : dblist) set.add((X) o); return set; } public void updateMetadata(Seq seq) { BasicDBObject query = new BasicDBObject().append("_id", seq.getUUID()); DBObject obj = this.dbSeq.findOne(query); obj.put("metadata", MongoDBToJSON.conv(seq.getMetadata())); this.dbSeq.update(query, obj); } public void updateReferences(Seq seq) { BasicDBObject query = new BasicDBObject().append("_id", seq.getUUID()); DBObject obj = this.dbSeq.findOne(query); BasicDBList refs = new BasicDBList(); List<DBObject> newReferences = new ArrayList<>(); for (JSONObject ref : seq.getReferences()) { newReferences.add(MongoDBToJSON.conv(ref)); } refs.addAll(newReferences); obj.put("references", refs); this.dbSeq.update(query, obj); } public void updateRxnRefs(Seq seq) { BasicDBObject query = new BasicDBObject().append("_id", seq.getUUID()); DBObject obj = this.dbSeq.findOne(query); obj.put("rxn_refs", seq.getReactionsCatalyzed()); this.dbSeq.update(query, obj); } /* * * * End of other helper functions * * */ /** * The following functions are for performing organism specific retrievals. */ /** * Retrieve all reaction ids observed in given species * @param speciesID * @return */ public Set<Long> getReactionsBySpecies(Long speciesID) { Map<Long, Set<Long>> speciesIDs = getOrganisms(); Set<Long> relevantIDs = speciesIDs.get(speciesID); Set<Long> result = new HashSet<Long>(); for (Long id : relevantIDs) { result.addAll(graphByOrganism(id)); } return result; } /** * graphByOrganism() returns a list of all reactionIDs containing the given organismID. * * @param organismID * @return List<Long> List of reaction IDs for given organismID */ public List<Long> graphByOrganism(Long organismID) { DBObject query = new BasicDBObject(); if (organismID == null || organismID > -1) query.put("organisms.id", organismID); List<Long> graphList = new ArrayList<Long>(); DBCursor reactionCursor = this.dbReactions.find(query); for (DBObject i : reactionCursor) { graphList.add(((Integer) i.get("_id")).longValue()); // checked: db type IS int } return graphList; } /** * getOrganisms() returns a list of all unique species IDs in database * mapped to itself, its parents, and descendants * * @return */ @SuppressWarnings("unchecked") public Map<Long, Set<Long>> getOrganisms() { List<Long> ids = (List<Long>) this.dbReactions.distinct("organisms.id"); //map species id to all ids associated with it Map<Long, Set<Long>> speciesIDs = new HashMap<Long, Set<Long>>(); for (Long organismID : ids) { //check if organism id on species level List<Long> idsToAdd = new ArrayList<Long>(); Long speciesID; DBObject orgQuery = new BasicDBObject(); orgQuery.put("_id", organismID); DBObject org = dbOrganisms.findOne(orgQuery); String rank = (String) org.get("rank"); Long parent = (Long) org.get("parent_id"); // checked: db type IS long speciesID = null; while (organismID != 1) { idsToAdd.add(organismID); if (rank.equals("species")) { speciesID = organismID; //break; } orgQuery.put("_id", parent); org = dbOrganisms.findOne(orgQuery); organismID = parent; rank = (String) org.get("rank"); parent = (Long) org.get("parent_id"); // checked: db type IS long } if (speciesID == null) continue; if (!speciesIDs.containsKey(speciesID)) { speciesIDs.put(speciesID, new HashSet<Long>()); } speciesIDs.get(speciesID).addAll(idsToAdd); } return speciesIDs; } /** * End of organism queries. */ /** * Getting KEGG data */ private Map<String, Long> keggID_ActID; public Map<String, Long> getKeggID_ActID(boolean useCached) { if (keggID_ActID == null || !useCached) keggID_ActID = new HashMap<String, Long>(); else return keggID_ActID; DBIterator it = getIteratorOverChemicals(); while (it.hasNext()) { Chemical c = getNextChemical(it); DBObject o = (DBObject) c.getRef(Chemical.REFS.KEGG); if (o == null) continue; BasicDBList list = (BasicDBList) o.get("id"); for (Object s : list) { keggID_ActID.put((String) s, c.getUuid()); } } return keggID_ActID; } /** * Following methods are related to Bing cross-references installation in the Installer DB along with various * queries to obtain names (aka synonyms) */ public BasicDBObject createBingMetadataDoc(Set<UsageTermUrlSet> usageTerms, Long totalCountSearchResults, String bestName) { BasicDBObject metadata = new BasicDBObject(); if (usageTerms != null) { BasicDBList usageTermsDBObject = new BasicDBList(); for (UsageTermUrlSet usageTerm : usageTerms) { // What happens if you don't translate to basic db obj in the next line? usageTermsDBObject.add(usageTerm.getBasicDBObject()); } metadata.put("usage_terms", usageTermsDBObject); } if (totalCountSearchResults >= 0) { metadata.put("total_count_search_results", totalCountSearchResults); } if (!bestName.equals("")) { metadata.put("best_name", bestName); } return metadata; } public void updateChemicalWithBingSearchResults(String inchi, String bestName, BasicDBObject metadata) { Chemical c = this.getChemicalFromInChI(inchi); if (c != null) { long id = c.getUuid(); BasicDBObject set = new BasicDBObject("xref.BING.metadata", metadata); set.put("xref.BING.dbid", bestName); BasicDBObject query = new BasicDBObject("_id", id); BasicDBObject update = new BasicDBObject("$set", set); this.dbChemicals.update(query, update); } } public NamesOfMolecule getNamesFromBasicDBObject(BasicDBObject c) { String inchi = (String) c.get("InChI"); NamesOfMolecule moleculeNames = new NamesOfMolecule(inchi); BasicDBObject names = (BasicDBObject) c.get("names"); if (names != null) { BasicDBList brendaNamesList = (BasicDBList) names.get("brenda"); if (brendaNamesList != null) { Set<String> brendaNames = new HashSet<>(); for (Object brendaName : brendaNamesList) { brendaNames.add((String) brendaName); } moleculeNames.setBrendaNames(brendaNames); } } // XREF BasicDBObject xref = (BasicDBObject) c.get("xref"); if (xref != null) { // CHEBI BasicDBObject chebi = (BasicDBObject) xref.get("CHEBI"); if (chebi != null) { Set<String> chebiNames = new HashSet<>(); BasicDBObject chebiMetadata = (BasicDBObject) chebi.get("metadata"); BasicDBList chebiSynonymsList = (BasicDBList) chebiMetadata.get("Synonym"); if (chebiSynonymsList != null) { for (Object chebiName : chebiSynonymsList) { chebiNames.add((String) chebiName); } moleculeNames.setChebiNames(chebiNames); } } // METACYC BasicDBObject metacyc = (BasicDBObject) xref.get("METACYC"); if (metacyc != null) { Set<String> metacycNames = new HashSet<>(); BasicDBList metacycMetadata = (BasicDBList) metacyc.get("meta"); if (metacycMetadata != null) { for (Object metaCycMeta : metacycMetadata) { BasicDBObject metaCycMetaDBObject = (BasicDBObject) metaCycMeta; String metaCycName = (String) metaCycMetaDBObject.get("sname"); if (metaCycName == null) { continue; } metacycNames.add(metaCycName); } moleculeNames.setMetacycNames(metacycNames); } } // DRUGBANK BasicDBObject drugbank = (BasicDBObject) xref.get("DRUGBANK"); if (drugbank != null) { Set<String> drugbankNames = new HashSet<>(); BasicDBObject drugbankMetadata = (BasicDBObject) drugbank.get("metadata"); drugbankNames.add((String) drugbankMetadata.get("name")); BasicDBObject drugbankSynonyms = (BasicDBObject) drugbankMetadata.get("synonyms"); if (drugbankSynonyms != null) { if (drugbankSynonyms.get("synonym") instanceof String) { drugbankNames.add((String) drugbankSynonyms.get("synonym")); moleculeNames.setDrugbankNames(drugbankNames); } else { BasicDBList drugbankSynonymsList = (BasicDBList) drugbankSynonyms.get("synonym"); if (drugbankSynonymsList != null) { for (Object drugbankSynonym : drugbankSynonymsList) { drugbankNames.add((String) drugbankSynonym); } moleculeNames.setDrugbankNames(drugbankNames); } } } Set<String> drugbankBrands = new HashSet<>(); BasicDBObject drugbankBrandsObject = (BasicDBObject) drugbankMetadata.get("brands"); if (drugbankBrandsObject != null) { if (drugbankBrandsObject.get("brand") instanceof String) { drugbankBrands.add((String) drugbankBrandsObject.get("brand")); moleculeNames.setDrugbankBrands(drugbankBrands); } else { BasicDBList drugbankBrandsList = (BasicDBList) drugbankBrandsObject.get("brand"); if (drugbankBrandsList != null) { for (Object drugbankBrand : drugbankBrandsList) { drugbankBrands.add((String) drugbankBrand); } moleculeNames.setDrugbankBrands(drugbankBrands); } } } } // WIKIPEDIA BasicDBObject wikipedia = (BasicDBObject) xref.get("WIKIPEDIA"); if (wikipedia != null) { BasicDBObject wikipediaMetadata = (BasicDBObject) wikipedia.get("metadata"); if (wikipediaMetadata != null) { String wikipediaName = (String) wikipediaMetadata.get("article"); moleculeNames.setWikipediaName(wikipediaName); } } } return moleculeNames; } public DBCursor fetchNamesAndUsageForInchis(Set<String> inchis) { BasicDBList inchiList = new BasicDBList(); inchiList.addAll(inchis); BasicDBObject inClause = new BasicDBObject("$in", inchiList); BasicDBObject whereQuery = new BasicDBObject("InChI", inClause); whereQuery.put("xref.BING", new BasicDBObject("$exists", true)); BasicDBObject fields = new BasicDBObject(); fields.put("InChI", true); fields.put("names.brenda", true); fields.put("xref", true); DBCursor cursor = dbChemicals.find(whereQuery, fields); return cursor; } public NamesOfMolecule fetchNamesFromInchi(String inchi) { BasicDBObject whereQuery = new BasicDBObject("InChI", inchi); BasicDBObject fields = new BasicDBObject(); fields.put("InChI", true); fields.put("names.brenda", true); fields.put("xref.CHEBI.metadata.Synonym", true); fields.put("xref.DRUGBANK.metadata", true); fields.put("xref.METACYC.meta", true); fields.put("xref.WIKIPEDIA.metadata.article", true); BasicDBObject c = (BasicDBObject) dbChemicals.findOne(whereQuery, fields); if (c == null) { return null; } NamesOfMolecule moleculeNames = getNamesFromBasicDBObject(c); return moleculeNames; } public boolean hasBingSearchResultsFromInchi(String inchi) { BasicDBObject whereQuery = new BasicDBObject("InChI", inchi); BasicDBObject existsQuery = new BasicDBObject("$exists", true); whereQuery.put("xref.BING", existsQuery); BasicDBObject fields = new BasicDBObject(); BasicDBObject c = (BasicDBObject) dbChemicals.findOne(whereQuery, fields); return (c != null); } /** * The following methods are related to ChEBI cross-references installation in the Installer DB. */ /** * This function retrieves the ChEBI ID corresponding to an InChI. In the (frequent) case where no ChEBI xref is * present, null is returned. * @param inchi input InChI representation of the chemical * @return String: the ChEBI ID corresponding to the InChI representation provided */ public String getChebiIDFromInchi(String inchi) { BasicDBObject whereQuery = new BasicDBObject("InChI", inchi); BasicDBObject existsQuery = new BasicDBObject("$exists", true); whereQuery.put("xref.CHEBI", existsQuery); BasicDBObject c = (BasicDBObject) dbChemicals.findOne(whereQuery, new BasicDBObject()); if (c == null) { return null; } else { BasicDBObject xref = (BasicDBObject) c.get("xref"); BasicDBObject chebi = (BasicDBObject) xref.get("CHEBI"); return (String) chebi.get("dbid"); } } /** * This function retrieves the chemical corresponding to a ChEBI ID and update its metadata with the ChEBI * applications provided * @param chebiId ChEBI ID for the chemical to update * @param applicationSet Set of main and direct ChEBI applications, represented in a ChebiApplicationSet */ public void updateChemicalWithChebiApplications(String chebiId, BrendaChebiOntology.ChebiApplicationSet applicationSet) { Chemical c = this.getChemicalFromChebiId(chebiId); if (c != null && applicationSet != null) { long id = c.getUuid(); BasicDBObject query = new BasicDBObject("_id", id); BasicDBObject update = new BasicDBObject("$set", new BasicDBObject("xref.CHEBI.metadata.applications", applicationSet.toBasicDBObject())); this.dbChemicals.update(query, update); } } /** * Setup the ability to use MongoDB's aggregation framework. * This greatly greatly simplifies pulling out highly nested and unstructured data from the db. * * This method performs the query over the sequence database. * * References: https://docs.mongodb.com/manual/aggregation/ * @param pipeline A list of DBObjects that will be sequentially applied via aggregate. * @return An iterator over all the matching objects. */ public Iterator<DBObject> applyPipelineOverSequences(List<DBObject> pipeline) { AggregationOutput cursor = this.dbSeq.aggregate(pipeline); return cursor.results().iterator(); } /** * Setup the ability to use MongoDB's aggregation framework. * This greatly greatly simplifies pulling out highly nested and unstructured data from the db. * * This method performs the query over the sequence reaction. * * References: https://docs.mongodb.com/manual/aggregation/ * @param pipeline A list of DBObjects that will be sequentially applied via aggregate. * @return An iterator over all the matching objects. */ public Iterator<DBObject> applyPipelineOverReactions(List<DBObject> pipeline) { AggregationOutput cursor = this.dbReactions.aggregate(pipeline); return cursor.results().iterator(); } }