act.server.MongoDB.java Source code

Java tutorial

Introduction

Here is the source code for act.server.MongoDB.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package act.server;

import act.installer.bing.NamesOfMolecule;
import act.installer.bing.UsageTermUrlSet;
import act.installer.brenda.BrendaChebiOntology;
import act.shared.Chemical;
import act.shared.Chemical.REFS;
import act.shared.Cofactor;
import act.shared.ConsistentInChI;
import act.shared.Organism;
import act.shared.Reaction;
import act.shared.Seq;
import act.shared.helpers.MongoDBToJSON;
import act.shared.helpers.P;
import com.act.workflow.tool_manager.workflow.workflow_mixins.mongo.ChemicalKeywords;
import com.act.workflow.tool_manager.workflow.workflow_mixins.mongo.MongoKeywords;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ggasoftware.indigo.Indigo;
import com.ggasoftware.indigo.IndigoException;
import com.ggasoftware.indigo.IndigoInchi;
import com.ggasoftware.indigo.IndigoObject;
import com.mongodb.AggregationOutput;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.Bytes;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.MongoException;
import com.mongodb.WriteConcern;
import com.mongodb.WriteResult;
import com.mongodb.util.JSON;
import org.apache.commons.lang3.StringUtils;
import org.biopax.paxtools.model.level3.ConversionDirectionType;
import org.biopax.paxtools.model.level3.StepDirection;
import org.json.JSONArray;
import org.json.JSONObject;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

// TODO Change all default notimeouts to true.
public class MongoDB {

    public static final long ORG_ID_BASE = 5000000000L;

    private static ObjectMapper mapper = new ObjectMapper();

    private String hostname;
    private String database;
    private int port;

    private DBCollection dbReactions;
    private DBCollection dbChemicals;
    private DBCollection dbCofactors;
    private DBCollection dbOrganisms;
    private DBCollection dbOrganismNames;
    private DBCollection dbCascades;
    private DBCollection dbWaterfalls;
    private DBCollection dbSeq;
    private DBCollection dbPubmed; // the pubmed collection is separate from actv01 db

    private DB mongoDB;
    private Mongo mongo;

    public MongoDB(String mongoActHost, int port, String dbs) {
        this.hostname = mongoActHost;
        this.port = port;
        this.database = dbs;

        initDB();
    }

    public static void dropDB(String mongoActHost, int port, String dbs) {
        dropDB(mongoActHost, port, dbs, false);
    }

    public static void dropDB(String mongoActHost, int port, String dbs, boolean force) {
        try {
            DB toDropDB = new Mongo(mongoActHost, port).getDB(dbs);

            if (!force) {
                // Require explicit confirmation from the user before dropping an existing DB.
                System.out.format(
                        "Going to drop: %s:%d/%s. Type \"DROP\" (without quotes) and press enter to proceed.\n",
                        mongoActHost, port, dbs);
                try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) {
                    String readLine = reader.readLine();
                    if (!"DROP".equals(readLine)) {
                        System.out.format("Invalid input \"%s\", not dropping DB\n", readLine);
                    } else {
                        System.out.format("Dropping DB\n");
                        // drop DB!
                        toDropDB.dropDatabase();
                    }
                }
            } else {
                System.out.format("[Force] Dropping DB %s\n", dbs);
                toDropDB.dropDatabase();
            }
        } catch (UnknownHostException e) {
            throw new IllegalArgumentException("Invalid host for Mongo Act server.");
        } catch (MongoException e) {
            throw new IllegalArgumentException("Could not initialize Mongo driver.");
        } catch (IOException e) {
            throw new RuntimeException("Unable to read from stdin");
        }
    }

    public MongoDB(String host) {
        this.hostname = host;
        this.port = 27017;
        this.database = "actv01"; // default act database; this constructor is rarely, if ever called.
        initDB();
    }

    public MongoDB() {
        this.hostname = "localhost";
        this.port = 27017;
        this.database = "actv01"; // default act database; this constructor is rarely, if ever called.
        initDB();
    }

    public String toString() {
        return this.hostname + " " + this.port;
    }

    public void close() {
        this.mongo.close();
    }

    private void initDB() {
        try {
            mongo = new Mongo(this.hostname, this.port);
            mongoDB = mongo.getDB(this.database);

            // in case the db is protected then we would do the following:
            // boolean auth = db.authenticate(myUserName, myPassword);
            // but right now we do not care.

            this.dbReactions = mongoDB.getCollection("reactions");
            this.dbChemicals = mongoDB.getCollection("chemicals");
            this.dbCofactors = mongoDB.getCollection("cofactors");
            this.dbOrganisms = mongoDB.getCollection("organisms");
            this.dbOrganismNames = mongoDB.getCollection("organismnames");
            this.dbSeq = mongoDB.getCollection("seq");
            this.dbCascades = mongoDB.getCollection("cascades");
            this.dbWaterfalls = mongoDB.getCollection("waterfalls");
            this.dbPubmed = mongoDB.getCollection("pubmed");

            initIndices();
        } catch (UnknownHostException e) {
            throw new IllegalArgumentException("Invalid host for Mongo Act server.");
        } catch (MongoException e) {
            throw new IllegalArgumentException(
                    String.format("Could not initialize Mongo driver: %s", e.getMessage()));
        }
    }

    private void initIndices() {

        this.createChemicalsIndex("InChI", true); // create a hashed index
        this.createChemicalsIndex("InChIKey"); // create a normal index
        this.createChemicalsIndex("names.brenda"); // create a normal index
        this.createChemicalsIndex("names.pubchem.values"); // normal index
        this.createChemicalsIndex("names.synonyms"); // create a normal index

        this.createCofactorsIndex("InChI", true); // create a hashed index

        this.createOrganismNamesIndex("name");
        this.createOrganismNamesIndex("org_id");

        this.createSeqIndex("metadata.accession", false);
        this.createSeqIndex("seq", true);
        this.createSeqIndex("rxn_refs", false);
    }

    public int port() {
        return this.port;
    }

    public String host() {
        return this.hostname;
    }

    public String dbs() {
        return this.database;
    }

    public String location() {
        return this.hostname + "." + this.port + "." + this.database;
    }

    private String getReactantFromMongoDocument(BasicDBObject family, String which, int i) {
        BasicDBList o = (BasicDBList) ((DBObject) family.get("enz_summary")).get(which);
        if (i >= o.size())
            return "";
        return "" + (Long) ((DBObject) o.get(i)).get("pubchem");
    }

    /*
       Sanity checks against a ref DB, returns:
       - P<List, List>: pair(added, deleted) in this over ref DB, list of ids (Object)
       - Map<Object, DBObject>: id->object map of changed docs
       */
    public static P<P<List, List>, Map<Object, Object>> compare(String coll, String id_key, int thisport,
            int refport, boolean listsAreSet) throws UnknownHostException {
        String host = "localhost";
        String dbs = "actv01";

        List<Object> add = new ArrayList<Object>();
        List<Object> del = new ArrayList<Object>();
        Set<Object> seen = new HashSet<Object>();
        Map<Object, Object> upd = new HashMap<Object, Object>();

        DBCollection c = new Mongo(host, thisport).getDB(dbs).getCollection(coll);
        DBCollection cref = new Mongo(host, refport).getDB(dbs).getCollection(coll);

        // yes, we indeed need to iterate over the entire collection! so unrestricted find() ok here.
        DBCursor cur = c.find();
        while (cur.hasNext()) {
            DBObject doc = cur.next();
            Object id = doc.get(id_key);

            DBObject docref = findOneDoc(cref, id_key, id);
            if (docref == null) {
                // reference collection does not have doc, log as newly created
                add.add(id);
            } else {
                // reference collection has doc:
                // compare the differences between these two docs and log it as updated if they differ

                Object diff = compare(doc, docref, listsAreSet);
                if (diff != null) {
                    // the docs differ. Log it as updated, and note the diff
                    upd.put(id, diff);
                }
            }
            seen.add(id);
        }

        // now iterate over ref db and see if there are any docs deleted (i.e., not in notDeleted)
        DBCursor curref = c.find();
        while (curref.hasNext()) {
            DBObject doc = curref.next();
            Object id = doc.get(id_key);

            if (!seen.contains(id)) {
                // this doc was not seen in the updated collection, so deleted. log that
                del.add(id);
            }
        }

        return new P<P<List, List>, Map<Object, Object>>(new P<List, List>(add, del), upd);
    }

    private static DBObject findOneDoc(DBCollection c, String id_key, Object id) {
        BasicDBObject query = new BasicDBObject();
        query.put(id_key, id);
        DBObject res = c.findOne(query);
        return res;
    }

    private static Object compare(Object d, Object dref, boolean listsAreSet) {
        if (d == null && dref == null)
            return null; // identical; return null which indicates identicalicity
        else if (d == null && dref != null)
            return "+" + dref;
        else if (d != null && dref == null)
            return "-" + d;

        if ((d instanceof Long && dref instanceof Long) || (d instanceof Double && dref instanceof Double)
                || (d instanceof Integer && dref instanceof Integer)
                || (d instanceof Boolean && dref instanceof Boolean)
                || (d instanceof String && dref instanceof String))
            return compare_primitive(d, dref);
        else if (d instanceof BasicDBList && dref instanceof BasicDBList)
            return compare((BasicDBList) d, (BasicDBList) dref, listsAreSet);
        else if (d instanceof DBObject && dref instanceof DBObject)
            return compare((DBObject) d, (DBObject) dref, listsAreSet);
        else {
            System.out.println("+" + d);
            System.out.println("-" + dref);
            System.out.println();
            return "TYPEDIFF: +" + d.getClass().getName() + " vs -" + dref.getClass().getName();
        }
    }

    private static Object compare_primitive(Object p, Object pref) {
        return p.equals(pref) ? null : "+" + p + " vs -" + pref;
    }

    private static DBObject compare(DBObject doc, DBObject docref, boolean listsAreSet) {
        boolean different = false;

        BasicDBObject diff = new BasicDBObject();
        Set<String> refKeys = new HashSet<String>();
        refKeys.addAll(docref.keySet());
        for (String k : doc.keySet()) {
            // as numerical calculations are improved, some computed fields are
            // bound to change: e.g., rarity and estimateEnergy
            // so make a special exception for those and ignore its val field...
            // but compare any other key recursively for differences...
            if (k.equals("rarity") || k.equals("estimateEnergy") || k.equals("coefficient"))
                continue;

            Object val = doc.get(k);

            if (!docref.containsKey(k)) {
                // this field is new
                diff.put("+" + k, val);
                different = true;
            } else {
                // field exists in old doc, recursively compare
                Object refval = docref.get(k);
                refKeys.remove(k);

                Object d;
                if ((d = compare(val, refval, listsAreSet)) != null) {
                    // keys identical but values differ, add without the + or - to key
                    different = true;
                    diff.put(k, d);
                } else {
                    // values identical and keys same too, do not put in diff.
                }
            }
        }

        // all remaining fields were deleted from old doc
        for (String kref : refKeys) {
            if (kref.equals("rarity") || kref.equals("estimateEnergy") || kref.equals("coefficient")) // see why in loop above
                continue;

            diff.put("-" + kref, docref.get(kref));
            different = true;
        }

        return different ? diff : null;

        // the following is not order invariant and therefore problematic:
        // return org.apache.commons.lang.StringUtils.difference(doc.toString(), docref.toString());
    }

    private static BasicDBList compare(BasicDBList l, BasicDBList refl, boolean listsAreSet) {
        boolean different = false;
        BasicDBList diff = new BasicDBList();

        if (!listsAreSet) {
            // lists are to be treated as ordered sets and so we can compare element by element
            for (int i = 0; i < l.size(); i++) {
                Object val = l.get(i);
                Object refv = refl.get(i);
                Object d;
                if ((d = compare(val, refv, listsAreSet)) != null) {
                    different = true;
                    diff.add(d);
                } else {
                    // elements at this index are identical, but we don't want to muck up the order
                    // in case future elements are not identical... so add a null to the diff,
                    // BUT IMP: do not set the flag that the list is different
                    diff.add(null);
                }
            }
        } else {
            // lists are to be treated as unordered sets: we try to match each element best
            // effort to any one of the list elements, and if it does proceed greedily

            // we keep this as a list as opposed to a true set because the original (ref)
            // and the current (new) might have (identical) replicates, and so should not
            // be flagged different because of that.
            List<Object> refset = new ArrayList<Object>();
            refset.addAll(refl);

            for (Object e : l) {
                boolean matches_some = false;
                for (Object eref : refset) {
                    if (compare(e, eref, listsAreSet) == null) {
                        // this object matches something, great, lets move to the next object
                        // also remove the matched object from the ref list, so that we have
                        // a 1-1 mapping between this and the ref list object
                        matches_some = true;
                        refset.remove(eref);
                        break;
                    }
                }
                if (!matches_some) {
                    // if this object in new list could not be matched against something,
                    // the lists are different
                    different = true;
                    diff.add(e);
                }
            }

            if (refset.size() != 0) {
                // still some elements remain in the ref list, i.e., sets different
                different = true;
                diff.addAll(refset);
            }

        }

        return different ? diff : null;
    }

    /*
       *
       *
       * Below is the list of functions required for populating MongoAct
       *
       *
       */
    public Long getNextAvailableChemicalDBid() {
        return this.dbChemicals.count();
    }

    public Long getNextAvailableCofactorDBid() {
        // TODO: do something more robust than this hack.
        return this.dbCofactors.count();
    }

    public void submitToActWaterfallDB(Long ID, DBObject waterfall) {
        // insert a new doc to the collection
        waterfall.put("_id", ID);
        this.dbWaterfalls.insert(waterfall);
    }

    public void submitToActCascadeDB(Long ID, DBObject cascade) {
        // insert a new doc to the collection
        cascade.put("_id", ID);
        this.dbCascades.insert(cascade);
    }

    public void submitToActCofactorsDB(Cofactor c, Long ID) {
        // check if this is already in the DB.
        long alreadyid = alreadyEntered(c);
        if (alreadyid != -1) {
            // cofactor already in DB; what sorcery is this?
            // hard abort. We do not expect to repeatedly see cofactors
            throw new RuntimeException("Duplicate entry for cofactor seen! Install abort.");
        }

        BasicDBObject doc = createCofactorDoc(c, ID);

        // insert a new doc to the collection
        this.dbCofactors.insert(doc);

    }

    public BasicDBObject createCofactorDoc(Cofactor c, Long ID) {
        BasicDBObject doc = new BasicDBObject();

        doc.put("_id", ID);
        doc.put("InChI", c.getInChI());

        BasicDBList names = new BasicDBList();
        names.addAll(c.getNames());
        doc.put("names", names);

        return doc;
    }

    /**
     * Inserts or updates a chemical document in the DB, returning the id of the chemical document that represents the
     * specified Chemical object.
     * @param c The chemical to update in the DB.
     * @param ID The ID to use if the chemical is new.
     * @return That actual ID of the chemical document, either ID if the chemical was new or the existing ID if the
     *         chemical was found in the DB.
     */
    public long submitToActChemicalDB(Chemical c, Long ID) {
        // check if this is already in the DB.
        long alreadyid = alreadyEntered(c);
        if (alreadyid != -1) {
            mergeIntoDB(alreadyid, c); // chemical already exists: merge
            return alreadyid;
        }

        BasicDBObject doc = createChemicalDoc(c, ID);

        // insert a new doc to the collection
        this.dbChemicals.insert(doc);

        return ID;
    }

    public void updateActChemical(Chemical c, Long id) {
        // See comment in updateActReaction about
        // db.collection.update, and $set

        BasicDBObject doc = createChemicalDoc(c, id);
        DBObject query = new BasicDBObject();
        query.put("_id", id);
        this.dbChemicals.update(query, doc);
    }

    /**
     * Appends XRef data for the chemical with the specified inchi.  Might only apply to Metacyc for now.  Does not crash
     * if idPath or metaPath are null.
     *
     * This uses Mongo's query mechanism to add new ids to a set of xref ids only if they don't already exist, and to
     * append (without comparison) new xref metadata to an existing list without having to read/de-serialize/add/serialize
     * the object ourselves.  This results in a significant performance improvement, especially towards the end of the
     * Metacyc installation process.
     *
     * TODO: this API is awful.  Fix it up to be less Metacyc-specific and more explicit in its behavior.
     *
     * @param inchi The inchi of the chemical to update in Mongo.
     * @param idPath The path to the field where ids should be added, like xref.METACYC.id.
     * @param id The id for this chemical reference to write.
     * @param metaPath The path to the field where metadata blobs should be stored, like xref.METACYC.meta.
     * @param metaObjects A list of metadata objects to append to the metadata list in Mongo.
     */
    public void appendChemicalXRefMetadata(String inchi, String idPath, String id, String metaPath,
            BasicDBList metaObjects) {
        if (idPath == null && metaPath == null) {
            return;
        }

        // Get chemical by InChI.
        BasicDBObject query = new BasicDBObject("InChI", inchi);
        BasicDBObject update = new BasicDBObject();
        if (idPath != null) {
            // Add to set will add an id to the array of xref ids only if it doesn't already exist in the array.
            update.put("$addToSet", new BasicDBObject(idPath, id));
        }

        if (metaPath != null) {
            /* Add all metadata objects to the xref list containing metadata for this source.
             * Note: $push + $each applied to an array of objects is like $pushAll, which is now deprecated. */
            update.put("$push", new BasicDBObject(metaPath, new BasicDBObject("$each", metaObjects)));
        }
        // Run exactly one query to update, which should save a lot of time over the course of the installation.
        this.dbChemicals.update(query, update);
    }

    public static String chemicalAsString(Chemical c, Long ID) {
        // called by cytoscape plugin to serialize the entire chemical as a fulltxt string
        return createChemicalDoc(c, ID).toString();
    }

    public static BasicDBObject createChemicalDoc(Chemical c, Long ID) {
        BasicDBObject doc = new BasicDBObject();

        doc.put("_id", ID);

        doc.put("canonical", c.getCanon());

        doc.put("SMILES", c.getSmiles());
        doc.put("InChI", c.getInChI());
        doc.put("InChIKey", c.getInChIKey());

        doc.put("isCofactor", c.isCofactor());
        doc.put("isNative", c.isNative());

        BasicDBObject names = new BasicDBObject();
        BasicDBList synonyms = new BasicDBList();
        synonyms.addAll(c.getSynonyms());
        names.put("synonyms", synonyms);

        BasicDBList pubchemNames = new BasicDBList();

        for (String type : c.getPubchemNameTypes()) {
            String[] temp = c.getPubchemNames(type);
            BasicDBList dbNames = new BasicDBList();
            for (String t : temp) {
                dbNames.add(t);
            }
            BasicDBObject dbNameObj = new BasicDBObject();
            dbNameObj.put("type", type);
            dbNameObj.put("values", dbNames);
            pubchemNames.add(dbNameObj);
        }
        names.put("pubchem", pubchemNames);
        BasicDBList brendaNames = new BasicDBList(); // will really get its fields later if initial install
        brendaNames.addAll(c.getBrendaNames()); // but for cases where we call it post install, we construct full chem entry
        names.put("brenda", brendaNames);

        doc.put("names", names);

        BasicDBObject xrefs = new BasicDBObject();
        xrefs.put("pubchem", c.getPubchemID());
        int cnt = 0;
        for (REFS xrefTyp : Chemical.REFS.values()) {
            if (c.getRef(xrefTyp) != null) {
                xrefs.put(xrefTyp.name(), MongoDBToJSON.conv((JSONObject) c.getRef(xrefTyp)));
                cnt++;
            }
        }
        doc.put("xref", xrefs);

        doc.put("estimateEnergy", c.getEstimatedEnergy());

        doc.put("keywords", c.getKeywords());
        doc.put("keywords_case_insensitive", c.getCaseInsensitiveKeywords());

        doc.put("csid", c.getChemSpiderID());
        doc.put("num_vendors", c.getChemSpiderNumUniqueVendors());
        doc.put("vendors", MongoDBToJSON.conv(c.getChemSpiderVendorXrefs()));

        return doc;
    }

    private void mergeIntoDB(long id, Chemical c) {
        Chemical oldc = getChemicalFromChemicalUUID(id);
        Chemical mergedc = c.createNewByMerge(oldc);

        if (mergedc == null) {
            // whoa! inconsistent values on unmergables, so recover

            System.err.println("\n\n\n\n\n\n\n\n\n\n");
            System.err.println("---- Conflicting uuid or name or smiles or inchi or inchikey or pubchem_id:");
            System.err.println("---- NEW\t " + c);
            System.err.println("---- OLD\t " + oldc);
            System.err.println("---- Keeping OLD entry");
            System.err.println("\n\n\n\n\n\n\n\n\n\n");

            return;
        }

        BasicDBObject withID = new BasicDBObject();
        withID.put("_id", id);
        this.dbChemicals.remove(withID, WriteConcern.SAFE); // remove the old entry oldc from the collection
        submitToActChemicalDB(mergedc, id); // now that the old entry is removed, we can simply add
    }

    public void updateChemicalWithRoBinningInformation(long id, List<Integer> matchedROs) {
        BasicDBObject query = new BasicDBObject("_id", id);
        BasicDBObject createDerivedDataContainer = new BasicDBObject("$set",
                new BasicDBObject("derived_data", new BasicDBObject()));
        this.dbChemicals.update(query, createDerivedDataContainer);

        BasicDBList listOfRos = new BasicDBList();
        listOfRos.addAll(matchedROs);

        BasicDBObject updateDerivedDataContainerWithMatchedRos = new BasicDBObject("$set",
                new BasicDBObject("derived_data.matched_ros", listOfRos));
        this.dbChemicals.update(query, updateDerivedDataContainerWithMatchedRos);
    }

    public void updateChemicalWithBrenda(Chemical c, String brendaName) {
        long id = alreadyEntered(c);

        if (id < 0) {
            System.err.println("Update chemical with brenda: " + brendaName + " can't find matching inchi");
            return;
        }
        BasicDBObject query = new BasicDBObject();
        query.put("_id", id);
        BasicDBObject update = new BasicDBObject();
        update.put("$push", new BasicDBObject("names.brenda", brendaName.toLowerCase()));
        this.dbChemicals.update(query, update);

    }

    public void updateChemicalAsNative(String inchi) {
        Chemical c = this.getChemicalFromInChI(inchi);
        if (c == null) {
            System.err.println("Can't find native in DB: " + inchi);
            return;
        }
        long id = c.getUuid();

        BasicDBObject query = new BasicDBObject();
        query.put("_id", id);
        BasicDBObject update = new BasicDBObject();
        update.put("$set", new BasicDBObject("isNative", true));
        this.dbChemicals.update(query, update);
    }

    // 1. update the chemical entry to point to all these patents
    // 2. update the patents collection with the (patent_id, scores, patent_text)
    public void updateChemicalWithPatents(String inchi, Integer num_patents, DBObject patents) {
        Chemical c = this.getChemicalFromInChI(inchi);
        if (c == null) {
            System.err.println("Attempting to add patent. Can't find chem in DB: " + inchi);
            return;
        }
        long id = c.getUuid();

        BasicDBObject query = new BasicDBObject();
        query.put("_id", id);
        BasicDBObject update = new BasicDBObject();
        BasicDBObject set = new BasicDBObject();

        // TODO!!!!!!!
        //     patents is Array of {  patent_num: Int, patent_txt: String, patent_score: Int }
        //                     ie  {   patent ID, full text of patent, relevance to biosynthesis }
        //
        // put the patents DBObject (all elements of Array) in db.patents.
        // put the references to the entries within it in db.chemicals
        //     i.e., only an array { patent ID }
        // TODO!!!!!!!

        // TODO!!!!!!!
        //
        // Need to update functions that serialize and deserialize from the db :
        //          createChemicalDoc and convertDBObjectToChemical
        // to recreate vendors, patents etc fields....
        //
        // TODO!!!!!!!
        System.out.println("Installing patents needs to go into separate collections.. see code.");
        System.exit(-1);

        set.put("patents", patents);
        set.put("num_patents", num_patents);
        update.put("$set", set);
        this.dbChemicals.update(query, update);
    }

    public void updateChemicalWithVendors(String inchi, Integer csid, Integer num_vendors, JSONArray vendors) {
        Chemical c = this.getChemicalFromInChI(inchi);
        if (c == null) {
            System.err.println("Attempting to add vendor. Can't find chem in DB: " + inchi);
            return;
        }
        long id = c.getUuid();

        BasicDBObject query = new BasicDBObject();
        query.put("_id", id);
        BasicDBObject update = new BasicDBObject();
        BasicDBObject set = new BasicDBObject();
        DBObject vendors_dbobject = MongoDBToJSON.conv(vendors);
        set.put("vendors", vendors_dbobject);
        set.put("csid", csid);
        set.put("num_vendors", num_vendors);
        update.put("$set", set);
        this.dbChemicals.update(query, update);
    }

    static boolean jeff_cleanup_quiet = true;

    // retrieve the entry with InChI = @inchi (or create if one does not exist)
    // set one of its synonyms to @synonym
    public long updateOrCreateWithSynonym(String inchi, String synonym) {
        Chemical c = this.getChemicalFromInChI(inchi);
        if (!jeff_cleanup_quiet)
            System.err.println("[Jeff cleanup] Synonym: " + synonym);
        if (!jeff_cleanup_quiet)
            System.err.println("[Jeff cleanup] InChI  : " + inchi);
        long id = -1;
        if (c != null) {
            id = c.getUuid();
            if (c.getSynonyms().contains(synonym)) {
                if (!jeff_cleanup_quiet)
                    System.err.println("[Jeff cleanup] Already in synonyms. This move gets a -1 count.");
            } else {
                c.addSynonym(synonym);
                BasicDBObject update = createChemicalDoc(c, id);
                this.dbChemicals.save(update);
                if (!jeff_cleanup_quiet)
                    System.err.println("[Jeff cleanup] MOVED to id=" + id);
            }
        } else {
            id = getNextAvailableChemicalDBid();
            c = new Chemical(id);
            c.setInchi(inchi);
            c.addSynonym(synonym);

            submitToActChemicalDB(c, id);
            if (!jeff_cleanup_quiet)
                System.err.println("[Jeff cleanup] NEW ENTRY id=" + id);
        }
        return id;
    }

    // lookup the entry corresponding to @inchi and remove this @synonym from its list of synonyms.
    public long removeSynonym(String inchi, String synonym) {
        Chemical c = this.getChemicalFromInChI(inchi);
        if (c == null) {
            System.err.println("[Jeff cleanup] ERROR? Can't find chemical entry to remove synonym from: " + inchi);
            return -1;
        }
        long id = c.getUuid();

        // the synonym can be either under:
        //      canon:String (shortestName)
        //      brendaNames:List<String>
        //      synonyms:List<String>
        //      names:Map<String,String[]> (pubchem names type->names)
        if (c.getCanon() != null && c.getCanon().trim().equals(synonym)) {
            c.setCanon(null);
            if (!jeff_cleanup_quiet)
                System.err.println("[Jeff cleanup] Removed from Canonical");
        }
        if (c.getBrendaNames() != null) {
            // the trim is the important bit in all of this. else we could have just .remove(synonym)'ed
            List<String> toRemove = new ArrayList<String>();
            for (String s : c.getBrendaNames()) {
                if (s.trim().equals(synonym))
                    toRemove.add(s);
            }
            if (toRemove.size() > 0) {
                if (!jeff_cleanup_quiet)
                    System.err.println("[Jeff cleanup] Removed from Brenda");
                c.getBrendaNames().removeAll(toRemove);
            }
        }
        if (c.getSynonyms() != null) {
            // the trim is the important bit in all of this. else we could have just c.getSynonyms().remove(synonym);
            List<String> toRemove = new ArrayList<String>();
            for (String s : c.getSynonyms()) {
                if (s.trim().equals(synonym))
                    toRemove.add(s);
            }
            if (toRemove.size() > 0) {
                if (!jeff_cleanup_quiet)
                    System.err.println("[Jeff cleanup] Removed from Synonyms");
                c.getSynonyms().removeAll(toRemove);
            }
        }
        for (String type : c.getPubchemNameTypes()) {
            List<String> names = new ArrayList<String>();
            for (String s : c.getPubchemNames(type)) {
                if (s.trim().equals(synonym)) {
                    if (!jeff_cleanup_quiet)
                        System.err.println("[Jeff cleanup] Removed from Pubchem");
                    continue;
                }
                names.add(s);
            }
            c.getPubchemNames().put(type, names.toArray(new String[0]));
        }
        BasicDBObject update = createChemicalDoc(c, id);

        this.dbChemicals.save(update);
        return id;
    }

    public void updateStoichiometry(Reaction r) {
        BasicDBObject query = new BasicDBObject().append("_id", r.getUUID());
        DBObject obj = this.dbReactions.findOne(query);
        DBObject enz_summary = (DBObject) obj.get("enz_summary");
        BasicDBList substrates = (BasicDBList) enz_summary.get("substrates");
        BasicDBList newSubstrates = new BasicDBList();
        Set<Long> originalSubstrateIDs = new HashSet<Long>();
        for (int i = 0; i < substrates.size(); i++) {
            DBObject substrate = (DBObject) substrates.get(i);
            Long substrateID = (Long) substrate.get("pubchem");
            Boolean isForBalance = (Boolean) substrate.get("balance");
            if (isForBalance != null && isForBalance)
                continue;
            originalSubstrateIDs.add(substrateID);
            substrate.put("coefficient", r.getSubstrateCoefficient(substrateID));
            newSubstrates.add(substrate);
        }
        Set<Long> substratesNew = r.getSubstrateIdsOfSubstrateCoefficients();
        for (Long s : substratesNew) {
            if (originalSubstrateIDs.contains(s))
                continue;
            if (r.getSubstrateCoefficient(s) == null)
                continue;
            DBObject substrate = new BasicDBObject();
            substrate.put("pubchem", s);
            substrate.put("coefficient", r.getSubstrateCoefficient(s));
            substrate.put("balance", true);
            newSubstrates.add(substrate);
        }

        BasicDBList products = (BasicDBList) enz_summary.get("products");
        BasicDBList newProducts = new BasicDBList();
        Set<Long> originalProductIDs = new HashSet<Long>();
        for (int i = 0; i < products.size(); i++) {
            DBObject product = (DBObject) products.get(i);
            Long productID = (Long) product.get("pubchem");
            Boolean isForBalance = (Boolean) product.get("balance");
            if (isForBalance != null && isForBalance)
                continue;
            originalProductIDs.add(productID);
            product.put("coefficient", r.getProductCoefficient(productID));
            newProducts.add(product);
        }
        Set<Long> productsNew = r.getProductIdsOfProductCoefficients();
        for (Long p : productsNew) {
            if (originalProductIDs.contains(p))
                continue;
            if (r.getProductCoefficient(p) == null)
                continue;
            DBObject product = new BasicDBObject();
            product.put("pubchem", p);
            product.put("coefficient", r.getProductCoefficient(p));
            product.put("balance", true);
            newProducts.add(product);
        }
        enz_summary.put("substrates", newSubstrates);
        enz_summary.put("products", newProducts);
        this.dbReactions.update(query, obj);
    }

    public void updateEstimatedEnergy(Chemical chemical) {
        BasicDBObject query = new BasicDBObject().append("_id", chemical.getUuid());
        DBObject obj = this.dbChemicals.findOne(query);
        obj.put("estimateEnergy", chemical.getEstimatedEnergy());
        this.dbChemicals.update(query, obj);
    }

    public void updateEstimatedEnergy(Reaction reaction) {
        BasicDBObject query = new BasicDBObject().append("_id", reaction.getUUID());
        DBObject obj = this.dbReactions.findOne(query);
        obj.put("estimateEnergy", reaction.getEstimatedEnergy());
        this.dbReactions.update(query, obj);
    }

    public void updateReactionRefsOf(Seq seq) {
        BasicDBObject query = new BasicDBObject().append("_id", seq.getUUID());
        DBObject obj = this.dbSeq.findOne(query);
        BasicDBList refs = new BasicDBList();
        for (Long r : seq.getReactionsCatalyzed())
            refs.add(r);

        obj.put("rxn_refs", refs);
        this.dbSeq.update(query, obj);
    }

    public void updateKeywordsCascade(Long id, Set<String> kwrds, Set<String> ciKwrds) {
        BasicDBObject query = new BasicDBObject().append("_id", id);
        DBObject obj = this.dbCascades.findOne(query);
        obj.put("keywords", kwrds);
        obj.put("keywords_case_insensitive", ciKwrds);
        this.dbCascades.update(query, obj);
    }

    public void updateKeywordsWaterfall(Long id, Set<String> kwrds, Set<String> ciKwrds) {
        BasicDBObject query = new BasicDBObject().append("_id", id);
        DBObject obj = this.dbWaterfalls.findOne(query);
        obj.put("keywords", kwrds);
        obj.put("keywords_case_insensitive", ciKwrds);
        this.dbWaterfalls.update(query, obj);
    }

    public void updateKeywords(Reaction reaction) {
        BasicDBObject query = new BasicDBObject().append("_id", reaction.getUUID());
        DBObject obj = this.dbReactions.findOne(query);
        obj.put("keywords", reaction.getKeywords());
        obj.put("keywords_case_insensitive", reaction.getCaseInsensitiveKeywords());
        this.dbReactions.update(query, obj);
    }

    public int submitToActReactionDB(Reaction r) {
        // if reaction already present in Act, then ignore.
        if (alreadyEntered(r)) {
            System.out.println("___ Duplicate reaction? : " + r.getUUID());
            return -1;
        }

        if (r.getUUID() != -1) {
            // this function is designed to only submit a new entry
            // if you need to update an existing entry, use updateActReaction
            String msg = StringUtils.join(new String[] { "FATAL Error: Aborting in MongoDB.submitToActReactionDB.",
                    "Reaction asked to add has a populated ID field,",
                    "i.e., != -1, while this function strictly appends",
                    "to the DB and so will not honor the id field.", r.toString() }, "\n");
            System.err.println(msg);
            throw new RuntimeException(msg);
        }

        int id = new Long(this.dbReactions.count()).intValue(); // O(1)
        BasicDBObject doc = createReactionDoc(r, id);

        // writing to MongoDB collection act
        this.dbReactions.insert(doc);

        return id;
    }

    public void updateActReaction(Reaction r, int id) {
        // db.collection.update(query, update, options)
        // updates document(s) that match query with the update doc
        // Ref: http://docs.mongodb.org/manual/reference/method/db.collection.update/
        //
        // Update doc: Can be { $set : { <field> : <val> } }
        // in case you need to keep the old document, but just update
        // some fields inside of it.
        // Ref: http://docs.mongodb.org/manual/reference/operator/update/set/
        //
        // But here (and in updateActChemical) we want to overwrite
        // the entire document with a new one, and so
        // a simple update call with the new document is what we need.

        BasicDBObject doc = createReactionDoc(r, id);
        DBObject query = new BasicDBObject();
        query.put("_id", id);
        this.dbReactions.update(query, doc);
    }

    public static BasicDBObject createReactionDoc(Reaction r, int id) {
        BasicDBObject doc = new BasicDBObject();
        doc.put("_id", id);
        doc.put("ecnum", r.getECNum());
        doc.put("easy_desc", r.getReactionName());

        BasicDBList substr = new BasicDBList();
        Long[] ss = r.getSubstrates();
        for (int i = 0; i < ss.length; i++) {
            DBObject o = getObject("pubchem", ss[i]);
            o.put("coefficient", r.getSubstrateCoefficient(ss[i]));
            substr.put(i, o);
        }

        BasicDBList prods = new BasicDBList();
        Long[] pp = r.getProducts();
        for (int i = 0; i < pp.length; i++) {
            DBObject o = getObject("pubchem", pp[i]);
            o.put("coefficient", r.getProductCoefficient(pp[i]));
            prods.put(i, o);
        }

        BasicDBList prodCofactors = new BasicDBList();
        Long[] ppc = r.getProductCofactors();
        for (int i = 0; i < ppc.length; i++) {
            DBObject o = getObject("pubchem", ppc[i]);
            prodCofactors.put(i, o);
        }

        BasicDBList substrCofactors = new BasicDBList();
        Long[] ssc = r.getSubstrateCofactors();
        for (int i = 0; i < ssc.length; i++) {
            DBObject o = getObject("pubchem", ssc[i]);
            substrCofactors.put(i, o);
        }

        BasicDBList coenzymes = new BasicDBList();
        Long[] coenz = r.getCoenzymes();
        for (int i = 0; i < coenz.length; i++) {
            DBObject o = getObject("pubchem", coenz[i]);
            coenzymes.put(i, o);
        }

        BasicDBObject enz = new BasicDBObject();
        enz.put("products", prods);
        enz.put("substrates", substr);
        enz.put("product_cofactors", prodCofactors);
        enz.put("substrate_cofactors", substrCofactors);
        enz.put("coenzymes", coenzymes);
        doc.put("enz_summary", enz);

        doc.put("is_abstract", r.getRxnDetailType().name());

        if (r.getDataSource() != null)
            doc.put("datasource", r.getDataSource().name());

        if (r.getMechanisticValidatorResult() != null) {
            doc.put("mechanistic_validator_result", MongoDBToJSON.conv(r.getMechanisticValidatorResult()));
        }

        BasicDBList refs = new BasicDBList();
        for (P<Reaction.RefDataSource, String> ref : r.getReferences()) {
            BasicDBObject refEntry = new BasicDBObject();
            refEntry.put("src", ref.fst().toString());
            refEntry.put("val", ref.snd());
            refs.add(refEntry);
        }
        doc.put("references", refs);

        BasicDBList proteins = new BasicDBList();
        for (JSONObject proteinData : r.getProteinData()) {
            proteins.add(MongoDBToJSON.conv(proteinData));
        }
        doc.put("proteins", proteins);
        ConversionDirectionType cd = r.getConversionDirection();
        doc.put("conversion_direction", cd == null ? null : cd.toString());
        StepDirection psd = r.getPathwayStepDirection();
        doc.put("pathway_step_direction", psd == null ? null : psd.toString());

        return doc;
    }

    public void submitToActOrganismDB(Organism o) {
        BasicDBObject doc = new BasicDBObject();
        doc.put("_id", o.getUUID());
        doc.put("parent_id", o.getParent());
        doc.put("rank", o.getRank());

        if (this.dbOrganisms == null) {
            System.out.print("Organism: " + o);
        } else {
            this.dbOrganisms.insert(doc);
        }
    }

    public void submitToActOrganismNameDB(Organism o) {
        BasicDBObject doc = new BasicDBObject();
        doc.put("org_id", o.getUUID());
        doc.put("name", o.getName());
        // TODO: support NCBI ids too.
        if (this.dbOrganismNames == null) {
            System.out.print("Organism: " + o);
        } else {
            this.dbOrganismNames.insert(doc);
        }
    }

    /**
     * establishes new numbering system for organisms not already in our database
     * @param name the name of the organism to be added to the database
     * @return the id of the new organism added to the database
     */
    public Long submitToActOrganismNameDB(String name) {
        BasicDBObject doc = new BasicDBObject();
        Long id = this.dbOrganismNames.count() + ORG_ID_BASE;
        doc.put("org_id", id);
        doc.put("name", name);
        // TODO: support NCBI ids too.
        if (this.dbOrganismNames == null) {
            System.out.print("Organism: " + name);
            return null;
        } else {
            this.dbOrganismNames.insert(doc);
            return id;
        }
    }

    public void submitToPubmedDB(PubmedEntry entry) {
        List<String> xPath = new ArrayList<String>();
        xPath.add("MedlineCitation");
        xPath.add("PMID");
        int pmid = Integer.parseInt(entry.getXPathString(xPath));
        if (this.dbPubmed != null) {
            WriteResult result;
            if (alreadyEntered(entry, pmid))
                return;
            DBObject doc = (DBObject) JSON.parse(entry.toJSON());
            doc.put("_id", pmid);
            this.dbPubmed.insert(doc);
        } else
            Logger.printf(0, "Pubmed Entry [%d]: %s\n", pmid, entry); // human readable...
    }

    private static BasicDBObject getObject(String field, Long val) {
        BasicDBObject singularObj = new BasicDBObject();
        singularObj.put(field, val);
        return singularObj;
    }

    private BasicDBObject getObject(String f1, Long v1, String f2, Float v2) {
        BasicDBObject o = new BasicDBObject();
        o.put(f1, v1);
        o.put(f2, v2);
        return o;
    }

    /*
     * Return -1 if the chemical doesn't exist in the database yet.
     * Else return the id.
     */
    private long alreadyEntered(Chemical c) {
        BasicDBObject query;
        String inchi = c.getInChI();
        long retId = -1;

        if (inchi != null) {
            query = new BasicDBObject();
            query.put("InChI", inchi);
            DBObject o = this.dbChemicals.findOne(query);
            if (o != null)
                retId = (Long) o.get("_id"); // checked: db type IS long
        }
        return retId;
    }

    public boolean alreadyEnteredChemical(String inchi) {
        if (this.dbChemicals == null)
            return false; // TODO: should this throw an exception instead?

        BasicDBObject query = new BasicDBObject("InChI", inchi);
        long c = this.dbChemicals.count(query);
        return c > 0;
    }

    public Long getExistingDBIdForInChI(String inchi) { // TODO: should this return some UUID type instead of Long?
        if (this.dbChemicals == null)
            return null; // TODO: should this throw an exception instead?

        BasicDBObject query = new BasicDBObject("InChI", inchi);
        BasicDBObject fields = new BasicDBObject("_id", true);
        DBObject o = this.dbChemicals.findOne(query, fields);
        if (o == null) {
            return null;
        }
        // TODO: does this need to be checked?
        return (Long) o.get("_id");
    }

    private long alreadyEntered(Cofactor cof) {
        BasicDBObject query;
        String inchi = cof.getInChI();
        long retId = -1;

        if (inchi != null) {
            query = new BasicDBObject();
            query.put("InChI", inchi);
            DBObject o = this.dbCofactors.findOne(query);
            if (o != null)
                retId = (Long) o.get("_id"); // checked: db type IS long
        }
        return retId;
    }

    private boolean alreadyEntered(Reaction r) {
        BasicDBObject query = new BasicDBObject();
        query.put("_id", r.getUUID());

        DBObject o = this.dbReactions.findOne(query);
        return o != null; // meaning there is at least one document that matches
    }

    private boolean alreadyEntered(PubmedEntry entry, int pmid) {
        BasicDBObject query = new BasicDBObject();
        query.put("_id", pmid);

        DBObject o = this.dbPubmed.findOne(query);
        return o != null;
    }

    /*
     *
     *
     * End of functions required for populating MongoAct
     *
     *
     */

    public List<Long> getRxnsWith(Long reactant, Long product) {

        BasicDBObject query = new BasicDBObject();
        query.put("enz_summary.products.pubchem", product);
        query.put("enz_summary.substrates.pubchem", reactant);
        DBCursor cur = this.dbReactions.find(query);

        List<Long> reactions = new ArrayList<Long>();
        while (cur.hasNext()) {
            DBObject o = cur.next();
            long id = (Integer) o.get("_id"); // checked: db type IS int
            reactions.add(id);
        }
        cur.close();
        return reactions;
    }

    public List<Reaction> getRxnsWithAll(List<Long> reactants, List<Long> products) {

        if (reactants.size() == 0 && products.size() == 0) {
            throw new IllegalArgumentException("Reactants and products both empty! Query would return entire DB.");
        }
        BasicDBObject query = new BasicDBObject();

        if (!reactants.isEmpty()) {
            BasicDBList substrateIds = new BasicDBList();
            substrateIds.addAll(reactants);
            query.put("enz_summary.substrates.pubchem", new BasicDBObject("$all", substrateIds));
        }

        if (!products.isEmpty()) {
            BasicDBList productIds = new BasicDBList();
            productIds.addAll(products);
            query.put("enz_summary.products.pubchem", new BasicDBObject("$all", productIds));
        }

        DBCursor cur = this.dbReactions.find(query);
        List<Reaction> reactions = new ArrayList<Reaction>();

        try {
            while (cur.hasNext()) {
                DBObject o = cur.next();
                reactions.add(convertDBObjectToReaction(o));
            }
        } finally {
            cur.close();
        }

        return reactions;
    }

    public List<Long> getRxnsWithEnzyme(String enzyme, Long org, List<Long> substrates) {
        BasicDBObject query = new BasicDBObject();
        query.put("ecnum", enzyme);
        query.put("organisms.id", org);
        for (Long substrate : substrates) {
            BasicDBObject mainQuery = new BasicDBObject();
            mainQuery.put("$ne", substrate);
            BasicDBList queryList = new BasicDBList();
            BasicDBObject productQuery = new BasicDBObject();
            productQuery.put("enz_summary.products.pubchem", mainQuery);
            BasicDBObject substrateQuery = new BasicDBObject();
            substrateQuery.put("enz_summary.substrates.pubchem", mainQuery);
            queryList.add(substrateQuery);
            queryList.add(productQuery);
            query.put("$or", queryList);
        }
        DBCursor cur = this.dbReactions.find(query);

        List<Long> reactions = new ArrayList<Long>();
        while (cur.hasNext()) {
            DBObject o = cur.next();
            long id = (Integer) o.get("_id"); // checked: db type IS int
            reactions.add(id);
        }
        cur.close();
        return reactions;
    }

    public List<Long> getRxnsWithSubstrate(String enzyme, Long org, List<Long> substrates) {
        BasicDBObject query = new BasicDBObject();
        query.put("organisms.id", org);
        BasicDBObject enzymeQuery = new BasicDBObject();
        enzymeQuery.put("ecnum", enzyme);
        query.put("$ne", enzymeQuery);
        for (Long substrate : substrates) {
            BasicDBList queryList = new BasicDBList();
            DBObject querySubstrate = new BasicDBObject();
            querySubstrate.put("enz_summary.substrates.pubchem", substrate);
            DBObject queryProduct = new BasicDBObject();
            queryProduct.put("enz_summary.products.pubchem", substrate);
            queryList.add(querySubstrate);
            queryList.add(queryProduct);
            query.put("$or", queryList);
        }

        DBCursor cur = this.dbReactions.find(query);
        List<Long> reactions = new ArrayList<Long>();
        while (cur.hasNext()) {
            DBObject o = cur.next();
            long id = (Integer) o.get("_id"); // checked: db type IS int
            reactions.add(id);
        }
        cur.close();
        return reactions;
    }

    public String getShortestName(Long id) {
        Chemical chem = this.getChemicalFromChemicalUUID(id);
        if (chem == null)
            return "unknown_chemical";
        String name = chem.getShortestBRENDAName();
        if (name == null)
            name = chem.getShortestName();
        if (name == null)
            name = "no_name";
        return name;
    }

    public List<Chemical> getNativeMetaboliteChems() {
        return constructAllChemicalsFromActData("isNative", true);
    }

    private List<Long> _cofactor_ids_cache = null;
    private List<Chemical> _cofactor_chemicals_cache = null;

    public List<Chemical> getCofactorChemicals() {
        List<Chemical> cof = constructAllChemicalsFromActData("isCofactor", true);

        // before we return this set, we need to make sure some
        // cases that for some reason are not in the db as cofactors
        // are marked as such.
        HashMap<String, Chemical> inchis = new HashMap<String, Chemical>();
        for (Chemical c : cof)
            if (c.getInChI() != null)
                inchis.put(c.getInChI(), c);
            else
                Logger.print(1,
                        String.format(
                                "[MongoDB.getCofactorChemicals] No inchi for cofactor(id:%d): %s\n " + c.getUuid(),
                                c.getSynonyms()));

        for (SomeCofactorNames cofactor : SomeCofactorNames.values()) {
            String shouldbethere = cofactor.getInChI();
            if (!inchis.containsKey(shouldbethere)) {
                List<Chemical> toAdd = constructAllChemicalsFromActData("InChI", shouldbethere);
                cof.addAll(toAdd);
                for (Chemical c : toAdd) {
                    addToDefiniteCofactorsMaps(cofactor, c);
                    //Logger.print(1, String.format("MongoDB.getCofactorChemicals] Added extra cofactor: id=%d, Synonyms=%s, Inchi=%s\n", c.getUuid(), c.getSynonyms(), c.getInChI()));
                }
            } else {
                addToDefiniteCofactorsMaps(cofactor, inchis.get(shouldbethere));
            }
        }

        // on first call, install the cofactors read from db into cache
        if (_cofactor_ids_cache == null) {
            _cofactor_chemicals_cache = cof;
            _cofactor_ids_cache = new ArrayList<Long>();
            for (Chemical c : cof)
                _cofactor_ids_cache.add(c.getUuid());
        }

        return cof;
    }

    private boolean isCofactor(Long c) {
        if (_cofactor_ids_cache == null) {
            // getCofactorChemicals inits cache as a side-effect
            getCofactorChemicals();
        }

        return _cofactor_ids_cache.contains(c);
    }

    private void addToDefiniteCofactorsMaps(SomeCofactorNames cofactor, Chemical c) {
        Long id = c.getUuid();
        switch (cofactor) {
        case Water:
            SomeCofactorNames.Water.setMongoDBId(id);
            break;
        case ATP:
            SomeCofactorNames.ATP.setMongoDBId(id);
            break;
        case Acceptor:
            SomeCofactorNames.Acceptor.setMongoDBId(id);
            break;
        case AcceptorH2:
            SomeCofactorNames.AcceptorH2.setMongoDBId(id);
            break;
        case ReducedAcceptor:
            SomeCofactorNames.ReducedAcceptor.setMongoDBId(id);
            break;
        case OxidizedFerredoxin:
            SomeCofactorNames.OxidizedFerredoxin.setMongoDBId(id);
            break;
        case ReducedFerredoxin:
            SomeCofactorNames.ReducedFerredoxin.setMongoDBId(id);
            break;
        case CO2:
            SomeCofactorNames.CO2.setMongoDBId(id);
            break;
        case BicarbonateHCO3:
            SomeCofactorNames.BicarbonateHCO3.setMongoDBId(id);
            break;
        case CoA:
            SomeCofactorNames.CoA.setMongoDBId(id);
            break;
        case H:
            SomeCofactorNames.H.setMongoDBId(id);
            break;
        case NH3:
            SomeCofactorNames.NH3.setMongoDBId(id);
            break;
        case HCl:
            SomeCofactorNames.HCl.setMongoDBId(id);
            break;
        case Cl:
            SomeCofactorNames.Cl.setMongoDBId(id);
            break;
        case O2:
            SomeCofactorNames.O2.setMongoDBId(id);
            break;
        case CTP:
            SomeCofactorNames.CTP.setMongoDBId(id);
            break;
        case dATP:
            SomeCofactorNames.dATP.setMongoDBId(id);
            break;
        case H2S:
            SomeCofactorNames.H2S.setMongoDBId(id);
            break;
        case dGTP:
            SomeCofactorNames.dGTP.setMongoDBId(id);
            break;
        case PhosphoricAcid:
            SomeCofactorNames.PhosphoricAcid.setMongoDBId(id);
            break;
        case I:
            SomeCofactorNames.I.setMongoDBId(id);
            break;
        case MolI:
            SomeCofactorNames.MolI.setMongoDBId(id);
            break;
        case AMP:
            SomeCofactorNames.AMP.setMongoDBId(id);
            break;
        case Phosphoadenylylsulfate:
            SomeCofactorNames.Phosphoadenylylsulfate.setMongoDBId(id);
            break;
        case H2SO3:
            SomeCofactorNames.H2SO3.setMongoDBId(id);
            break;
        case adenylylsulfate:
            SomeCofactorNames.adenylylsulfate.setMongoDBId(id);
            break;
        case GTP:
            SomeCofactorNames.GTP.setMongoDBId(id);
            break;
        case NADPH:
            SomeCofactorNames.NADPH.setMongoDBId(id);
            break;
        case dADP:
            SomeCofactorNames.dADP.setMongoDBId(id);
            break;
        case NADP:
            SomeCofactorNames.NADP.setMongoDBId(id);
            break;
        case UMP:
            SomeCofactorNames.UMP.setMongoDBId(id);
            break;
        case dCDP:
            SomeCofactorNames.dCDP.setMongoDBId(id);
            break;
        case ADP:
            SomeCofactorNames.ADP.setMongoDBId(id);
            break;
        case ADPm:
            SomeCofactorNames.ADPm.setMongoDBId(id);
            break;
        case UDP:
            SomeCofactorNames.UDP.setMongoDBId(id);
            break;
        default:
            break;
        }

    }

    // These should all be by default in the DB, but if not we augment the DB cofactors tags with these chemicals
    // It is ok for this list to not be exhaustive.... this is just for parent assignment in visualization
    public enum SomeCofactorNames {
        Water(0), ATP(1), Acceptor(2), AcceptorH2(3), ReducedAcceptor(4), OxidizedFerredoxin(5), ReducedFerredoxin(
                6), CO2(7), BicarbonateHCO3(8), CoA(9), H(10), NH3(11), HCl(12), Cl(13), O2(14), CTP(15), dATP(
                        16), H2S(17), dGTP(18), PhosphoricAcid(19), I(20), MolI(21), AMP(
                                22), Phosphoadenylylsulfate(23), H2SO3(24), adenylylsulfate(25), GTP(26), NADPH(
                                        27), dADP(28), NADP(29), UMP(30), dCDP(31), ADP(32), ADPm(33), UDP(34);

        int internalId;
        Long mongodbId;

        private SomeCofactorNames(int id) {
            this.internalId = id;
            this.mongodbId = null;
        }

        public String getInChI() {
            return this._definiteCofactors[internalId];
        }

        public void setMongoDBId(Long id) {
            this.mongodbId = id;
        }

        public Long getMongoDBId() {
            return this.mongodbId;
        }

        private static final String[] raw_definiteCofactors = {
                // 0 Water:
                "InChI=1S/H2O/h1H2", // [H2o, H2O, h2O][water, Dihydrogen oxide, Water vapor, Distilled water, oxidane, Deionized water, Purified water, Water, purified, Dihydrogen Monoxide, DHMO, oxygen, OH-, monohydrate, aqua, hydrate, o-]
                // 1 ATP:
                "InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6+,7?,10-/m1/s1", // [L-ATP, D-ATP, araATP, alphaATP, adenosyl-ribose triphosphate, adenosine 5'-triphosphate, 5'-ATP, ATP, adenosine triphosphate][Adenosine triphosphate, Striadyne, Myotriphos, Triadenyl, Triphosphaden, Atriphos, Glucobasin, Adephos, Adetol, Triphosaden, AC1NSUB1, [[(2S,5S)-5-(6-aminopurin-9-yl)-3,4-dihydroxyoxolan-2-yl]methoxy-hydroxyphosphoryl] phosphono hydrogen phosphate, Adenosine 5'-(tetrahydrogen triphosphate)]
                // 2 Acceptor:
                "InChI=1S/R", // [acceptor, oxidized adrenal ferredoxin, oxidized adrenodoxin][]
                // 3 AcceptorH2:
                "InChI=1S/RH2/h1H2", // [reduced adrenal ferredoxin, reduced adrenodoxin, acceptor-H2, acceptorH2][]
                // 4 ReducedAcceptor:
                "InChI=1S/RH3/h1H3", // [reduced acceptor, AH2, putidaredoxin, donor][]
                // 5 OxidizedFerredoxin:
                "InChI=1S/4RS.2Fe.2S/c4*1-2;;;;/q4*-1;2*+5;;", // [oxidized ferredoxin][]
                // 6 ReducedFerredoxin:
                "InChI=1S/4RS.2Fe.2S/c4*1-2;;;;/q4*-1;2*+4;;", // [reduced ferredoxin][]
                // 7 CO2:
                "InChI=1S/CO2/c2-1-3", // [carbon dioxide, carbon dioxide, carbonic acid gas]
                // 8 BicarbonateHCO3:
                "InChI=1S/CH2O3/c2-1(3)4/h(H2,2,3,4)/p-1", // [HCO3-, bicarbonate, bicarbonate]
                // 9 CoA
                "InChI=1S/C21H36N7O16P3S/c1-21(2,16(31)19(32)24-4-3-12(29)23-5-6-48)8-41-47(38,39)44-46(36,37)40-7-11-15(43-45(33,34)35)14(30)20(42-11)28-10-27-13-17(22)25-9-26-18(13)28/h9-11,14-16,20,30-31,48H,3-8H2,1-2H3,(H,23,29)(H,24,32)(H,36,37)(H,38,39)(H2,22,25,26)(H2,33,34,35)/t11-,14-,15-,16+,20-/m1/s1", // [coenzyme A, CoA-SH, CoASH]
                // 10 H
                "InChI=1S/p+1", // [H+/out, H+/in, H+out]
                // 11 NH3
                "InChI=1S/H3N/h1H3", // Ammonia Gas
                // 12 HCl, Cl-
                "InChI=1S/ClH/h1H", // hydrochloric acid, hydrogen chloride, Muriatic acid
                // 13 Cl-
                "InChI=1S/ClH/h1H/p-1", // [Cl-/out, Cl-/in, chloride]
                // 14 O2
                "InChI=1S/O2/c1-2", // oxygen molecule, Molecular oxygen, Dioxygen
                // 15 CTP
                "InChI=1S/C9H16N3O14P3/c10-5-1-2-12(9(15)11-5)8-7(14)6(13)4(24-8)3-23-28(19,20)26-29(21,22)25-27(16,17)18/h1-2,4,6-8,13-14H,3H2,(H,19,20)(H,21,22)(H2,10,11,15)(H2,16,17,18)/t4-,6-,7+,8-/m1/s1", // L-CTP, D-CTP, cytosine arabinoside 5'-triphosphate
                // 16 dATP
                "InChI=1S/C10H16N5O12P3/c11-9-8-10(13-3-12-9)15(4-14-8)7-1-5(16)6(25-7)2-24-29(20,21)27-30(22,23)26-28(17,18)19/h3-7,16H,1-2H2,(H,20,21)(H,22,23)(H2,11,12,13)(H2,17,18,19)/t5-,6+,7+/m0/s1", // deoxyATP, L-dATP, L-2'-dATP
                // 17 hydrogen sulfide
                "InChI=1S/H2S/h1H2", // hydrogensulfide, hydrogen sulfide, hydrogen sulfide
                // 18 dGTP
                "InChI=1S/C10H16N5O13P3/c11-10-13-8-7(9(17)14-10)12-3-15(8)6-1-4(16)5(26-6)2-25-30(21,22)28-31(23,24)27-29(18,19)20/h3-6,16H,1-2H2,(H,21,22)(H,23,24)(H2,18,19,20)(H3,11,13,14,17)/t4-,5+,6+/m0/s1", // 2'-dGTP, D-GTP, deoxyGTP
                // 19 Phosphoric acid
                "InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)", // phosphate/out, phosphate/in, Phosphoric acid
                // 20 Iodide ion
                "InChI=1S/HI/h1H/p-1", // [iodide, Iodide, Iodide ion]
                // 21 Molecular iodine
                "InChI=1S/I2/c1-2", // [Molecular iodine, Iodine solution, Tincture iodine]
                // 22 AMP
                "InChI=1S/C10H14N5O7P/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(22-10)1-21-23(18,19)20/h2-4,6-7,10,16-17H,1H2,(H2,11,12,13)(H2,18,19,20)/t4-,6-,7+,10-/m1/s1", // 5'AMP, arabinosyl adenine 5'-phosphate, arabinosyl adenine 5'-monophosphate
                // 23 3-phosphoadenylylsulfate
                "InChI=1S/C10H15N5O13P2S/c11-8-5-9(13-2-12-8)15(3-14-5)10-6(16)7(27-29(17,18)19)4(26-10)1-25-30(20,21)28-31(22,23)24/h2-4,6-7,10,16H,1H2,(H,20,21)(H2,11,12,13)(H2,17,18,19)(H,22,23,24)/t4-,6-,7-,10-/m1/s1", // [3'-phosphoadenylylsulfate, 3'-phosphoadenylyl 5'-phosphosulfate, 3-phosphoadenylylsulfate]
                // 24 Sulfur dioxide solution
                "InChI=1S/H2O3S/c1-4(2)3/h(H2,1,2,3)", // [Sulfurous acid, Sulphurous acid, Sulfur dioxide solution]
                // 25 adenylylsulfate
                "InChI=1S/C10H14N5O10PS/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-26(18,19)25-27(20,21)22/h2-4,6-7,10,16-17H,1H2,(H,18,19)(H2,11,12,13)(H,20,21,22)/t4-,6-,7-,10-/m1/s1", // adenosine 5-phosphosulfate, adenylylsulfate, adenosine 5'-phosphate 5'-sulfate
                // 26 GTP
                "InChI=1S/C10H16N5O14P3/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(27-9)1-26-31(22,23)29-32(24,25)28-30(19,20)21/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H,24,25)(H2,19,20,21)(H3,11,13,14,18)/t3-,5-,6-,9-/m1/s1", // guanosine 5'-triphosphate, GUANOSINE TRIPHOSPHATE, 5'-GTP
                // 27 NADPH
                "InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1,3-4,7-8,10-11,13-16,20-21,29-31H,2,5-6H2,(H2,23,32)(H,36,37)(H,38,39)(H2,22,24,25)(H2,33,34,35)/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1", // NAD(P)H, 2'-NADPH, NADPH
                // 28 dADP
                "InChI=1S/C10H15N5O9P2/c11-9-8-10(13-3-12-9)15(4-14-8)7-1-5(16)6(23-7)2-22-26(20,21)24-25(17,18)19/h3-7,16H,1-2H2,(H,20,21)(H2,11,12,13)(H2,17,18,19)/t5-,6+,7+/m0/s1", // 2'-dADP, 2'-deoxy-ADP, deoxyADP
                // 29 NADP+
                "InChI=1S/C21H28N7O17P3/c22-17-12-19(25-7-24-17)28(8-26-12)21-16(44-46(33,34)35)14(30)11(43-21)6-41-48(38,39)45-47(36,37)40-5-10-13(29)15(31)20(42-10)27-3-1-2-9(4-27)18(23)32/h1-4,7-8,10-11,13-16,20-21,29-31H,5-6H2,(H7-,22,23,24,25,32,33,34,35,36,37,38,39)/p+1/t10-,11-,13-,14-,15-,16-,20-,21-/m1/s1", // NAD(P)+, beta-NADP+, 2'-NADP+
                // 30 UMP
                "InChI=1S/C9H13N2O9P/c12-5-1-2-11(9(15)10-5)8-7(14)6(13)4(20-8)3-19-21(16,17)18/h1-2,4,6-8,13-14H,3H2,(H,10,12,15)(H2,16,17,18)/t4-,6+,7?,8-/m1/s1", // D-UMP, deazauridine 5'-phosphate, ara-UMP
                // 31 dCDP
                "InChI=1S/C9H15N3O10P2/c10-7-1-2-12(9(14)11-7)8-3-5(13)6(21-8)4-20-24(18,19)22-23(15,16)17/h1-2,5-6,8,13H,3-4H2,(H,18,19)(H2,10,11,14)(H2,15,16,17)/t5-,6+,8+/m0/s1", // L-dCDP, D-dCDP, 2'-deoxy-CDP
                // 32 ADP
                "InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7+,10-/m1/s1", // L-ADP, D-ADP, araADP
                // 33 ADP from metacyc
                "InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)/p-3", // ADP
                // 34 UDP from metacyc
                "InChI=1S/C9H14N2O12P2/c12-5-1-2-11(9(15)10-5)8-7(14)6(13)4(22-8)3-21-25(19,20)23-24(16,17)18/h1-2,4,6-8,13-14H,3H2,(H,19,20)(H,10,12,15)(H2,16,17,18)", // UDP
        };

        private static String[] _definiteCofactors = convertToConsistent(raw_definiteCofactors,
                "Installed cofactors");
    };

    private static String[] convertToConsistent(String[] raw, String debug_tag) {
        String[] consistent = new String[raw.length];
        for (int i = 0; i < raw.length; i++) {
            consistent[i] = ConsistentInChI.consistentInChI(raw[i], debug_tag);
        }
        return consistent;
    }

    public Set<Long> getNativeIDs() {
        List<Chemical> cofactorChemicals = getCofactorChemicals();
        List<Chemical> nativeChemicals = getNativeMetaboliteChems();
        Set<Long> ids = new HashSet<Long>();
        for (Chemical c : cofactorChemicals)
            ids.add(c.getUuid());
        for (Chemical c : nativeChemicals)
            ids.add(c.getUuid());
        return ids;
    }

    public Chemical getChemicalFromSMILES(String smile) {
        return convertDBObjectToChemicalFromActData("SMILES", smile);
    }

    /**
     * Transform inchis into chemical ids.
     * @param inchis A list of inchis to transform.
     * @return The corresponding chemical ids.
     */
    public Map<String, Long> getIdsFromInChIs(List<String> inchis) {
        Map<String, Long> results = new HashMap<>();

        for (String inchi : inchis) {
            Chemical chemical = getChemicalFromInChI(inchi);
            if (chemical != null) {
                results.put(inchi, chemical.getUuid());
            }
        }
        return results;
    }

    public Chemical getChemicalFromInChI(String inchi) {
        return convertDBObjectToChemicalFromActData("InChI", inchi);
    }

    public Chemical getChemicalFromInChIKey(String inchiKey) {
        return convertDBObjectToChemicalFromActData("InChIKey", inchiKey);
    }

    public Chemical getChemicalFromChemicalUUID(Long cuuid) {
        return convertDBObjectToChemicalFromActData("_id", cuuid);
    }

    public Chemical getChemicalFromCanonName(String chemName) {
        return convertDBObjectToChemicalFromActData("canonical", chemName);
    }

    /**
     * Retrieves a Chemical from its CHEBI ID. A ChEBI ID maps to at most one chemical in the installer database.
     * @param chebiId String representation of the ChEBI ID
     * @return Chemical object corresponding to this ChEBI ID if it exists, otherwise null.
     */
    public Chemical getChemicalFromChebiId(String chebiId) {
        return convertDBObjectToChemicalFromActData("xref.CHEBI.dbid", chebiId);
    }

    public long getChemicalIDFromName(String chemName) {
        return getChemicalIDFromName(chemName, false);
    }

    public long getChemicalIDFromName(String chemName, boolean caseInsensitive) {
        BasicDBObject query = new BasicDBObject();
        DBObject brenda = new BasicDBObject();
        DBObject pubchem = new BasicDBObject();
        DBObject synonyms = new BasicDBObject();
        if (caseInsensitive) {
            String escapedName = Pattern.quote(chemName);
            Pattern regex = Pattern.compile("^" + escapedName + "$", Pattern.CASE_INSENSITIVE);
            brenda.put("names.brenda", regex);
            pubchem.put("names.pubchem.values", regex);
            synonyms.put("names.synonyms", regex);
        } else {
            brenda.put("names.brenda", chemName);
            pubchem.put("names.pubchem.values", chemName);
            synonyms.put("names.synonyms", chemName);
        }
        BasicDBList ors = new BasicDBList();
        ors.add(brenda);
        ors.add(pubchem);
        ors.add(synonyms);
        query.put("$or", ors);
        Long id;
        DBObject o = this.dbChemicals.findOne(query);
        if (o != null)
            id = (Long) o.get("_id"); // checked: db type IS Long
        else
            id = -1L;
        return id;
    }

    public long getChemicalIDFromExactBrendaName(String chemName) {
        BasicDBObject query = new BasicDBObject();
        query.put("names.brenda", chemName.toLowerCase());
        Long id;
        DBObject o = this.dbChemicals.findOne(query);
        if (o != null)
            id = (Long) o.get("_id"); // checked: db type IS Long
        else
            id = -1L;
        return id;
    }

    public String getChemicalDBJSON(Long uuid) {
        BasicDBObject query = new BasicDBObject();
        query.put("_id", uuid);

        DBObject o = this.dbChemicals.findOne(query);
        if (o == null)
            return null;

        Set<String> keys = o.keySet();
        String json = "{\n";
        for (String key : keys) {
            json += "\t" + key + " : " + o.get(key) + ",\n";
        }
        json += "}";
        return json;
    }

    public List<Chemical> getChemicalsThatHaveField(String field) {
        DBObject val = new BasicDBObject();
        val.put("$exists", "true");
        return constructAllChemicalsFromActData(field, val);
    }

    public List<Chemical> getDrugbankChemicals() {
        DBObject val = new BasicDBObject();
        val.put("$ne", null);
        String field = "xref.DRUGBANK";

        return constructAllChemicalsFromActData(field, val);
    }

    public List<Chemical> getSigmaChemicals() {
        DBObject val = new BasicDBObject();
        val.put("$ne", null);
        String field = "xref.SIGMA";

        return constructAllChemicalsFromActData(field, val);
    }

    public List<Chemical> constructAllChemicalsFromActData(String field, Object val) {
        return constructAllChemicalsFromActData(field, val, new BasicDBObject());
    }

    public List<Chemical> constructAllChemicalsFromActData(String field, Object val, BasicDBObject keys) {
        DBCursor cur = constructCursorForMatchingChemicals(field, val, keys);

        List<Chemical> chems = new ArrayList<Chemical>();
        while (cur.hasNext())
            chems.add(convertDBObjectToChemical(cur.next()));

        cur.close();
        return chems;
    }

    public DBIterator getIdCursorForFakeChemicals() {
        DBObject fakeRegex = new BasicDBObject();
        DBObject abstractInchi = new BasicDBObject();
        fakeRegex.put(ChemicalKeywords.INCHI$.MODULE$.toString(),
                new BasicDBObject(MongoKeywords.REGEX$.MODULE$.toString(), "^InChI=/FAKE"));

        abstractInchi.put(ChemicalKeywords.INCHI$.MODULE$.toString(),
                new BasicDBObject(MongoKeywords.REGEX$.MODULE$.toString(), "^InChI=.*R.*"));

        BasicDBList conditionList = new BasicDBList();
        conditionList.add(fakeRegex);
        conditionList.add(abstractInchi);

        BasicDBObject conditions = new BasicDBObject(MongoKeywords.OR$.MODULE$.toString(), conditionList);

        return getIteratorOverChemicals(conditions,
                new BasicDBObject(ChemicalKeywords.ID$.MODULE$.toString(), true));
    }

    private DBCursor constructCursorForAllChemicals() {
        return constructCursorForMatchingChemicals(null, null, null);
    }

    private static final BasicDBObject DEFAULT_CURSOR_ORDER_BY_ID = new BasicDBObject("$query", new BasicDBObject())
            .append("$orderby", new BasicDBObject("_id", 1));

    private DBCursor constructCursorForMatchingChemicals(String field, Object val, BasicDBObject keys) {
        DBCursor cur;
        if (field != null) {
            BasicDBObject query;
            query = new BasicDBObject();
            query.put(field, val);
            if (keys == null) {
                cur = this.dbChemicals.find(query);
            } else {
                cur = this.dbChemicals.find(query, keys);
            }
        } else if (keys != null) {
            cur = this.dbChemicals.find(new BasicDBObject(), keys);
        } else {
            /* Ensure a default ordering when iterating over a whole collection.
             * This helps maintain result stability and should have minimal performance cost since we're iterating over
             * the primary keys in their natural order. */
            cur = this.dbChemicals.find(DEFAULT_CURSOR_ORDER_BY_ID);
        }

        return cur;
    }

    private DBCursor constructCursorForAllCofactors() {
        return this.dbCofactors.find();
    }

    public Map<String, Long> constructAllInChIs() {
        Map<String, Long> chems = new HashMap<String, Long>();
        BasicDBObject keys = new BasicDBObject();
        keys.append("_id", true);
        keys.append("InChI", true);
        DBCursor cur = constructCursorForMatchingChemicals(null, null, keys);
        while (cur.hasNext()) {
            DBObject o = cur.next();
            long uuid = (Long) o.get("_id"); // checked: db type IS long
            String inchi = (String) o.get("InChI");
            chems.put(inchi, uuid);
        }

        cur.close();
        return chems;
    }

    public void smartsMatchAllChemicals(String target) {
        Indigo indigo = new Indigo();
        IndigoInchi inchi = new IndigoInchi(indigo);
        IndigoObject query = indigo.loadSmarts(target);
        query.optimize();

        DBCursor cur = constructCursorForAllChemicals();
        IndigoObject mol = null, matcher;
        int cnt;
        while (cur.hasNext()) {
            Chemical c = convertDBObjectToChemical(cur.next());
            try {
                mol = inchi.loadMolecule(c.getInChI());
            } catch (IndigoException e) {
                if (e.getMessage().startsWith("core: Indigo-InChI: InChI loading failed:"))
                    continue; // could not load
            }
            matcher = indigo.substructureMatcher(mol);
            if ((cnt = matcher.countMatches(query)) > 0) {
                // matches.add(c); memout's
                System.out.format("%d\t%s\n", c.getUuid(), c.getInChI());
            }
        }
        cur.close();
    }

    private Chemical convertDBObjectToChemicalFromActData(String field, Object val) {
        BasicDBObject query = new BasicDBObject();
        query.put(field, val);

        // project out the synonyms field, even though we don't have anything in it right now.
        BasicDBObject keys = new BasicDBObject();
        // keys.put("names", 0); // 0 means exclude, rest are included
        DBObject o = this.dbChemicals.findOne(query, keys);
        if (o == null)
            return null;
        return convertDBObjectToChemical(o);
    }

    public Chemical convertDBObjectToChemical(DBObject o) {
        long uuid;
        // WTF!? Are some chemicals ids int and some long?
        // this code below should not be needed, unless our db is mucked up
        try {
            uuid = (Long) o.get("_id"); // checked: db type IS long
        } catch (ClassCastException e) {
            System.err.println("WARNING: MongoDB.convertDBObjectToChemical ClassCast db.chemicals.id is not Long?");
            uuid = ((Integer) o.get("_id")).longValue(); // this should be dead code
        }

        String chemName = (String) o.get("canonical");
        DBObject xrefs = (DBObject) o.get("xref");
        Long pcid = null;
        try {
            pcid = (Long) (xrefs.get("pubchem"));
        } catch (Exception e) {

        }
        if (pcid == null) {
            pcid = (long) -1;
        }
        String inchi = (String) o.get("InChI");
        String inchiKey = (String) o.get("InChIKey");
        String smiles = (String) o.get("SMILES");
        Chemical c = new Chemical(uuid, pcid, chemName, smiles);
        c.setInchi(inchi);
        c.setCanon((String) o.get("canonical"));
        try {
            for (String typ : xrefs.keySet()) {
                if (typ.equals("pubchem"))
                    continue;
                c.putRef(Chemical.REFS.valueOf(typ), MongoDBToJSON.conv((DBObject) xrefs.get(typ)));
            }
        } catch (Exception e) {

        }

        if (o.get("derived_data") != null) {
            BasicDBList matchedRos = (BasicDBList) ((DBObject) o.get("derived_data")).get("matched_ros");
            if (matchedRos != null) {
                for (Object roId : matchedRos) {
                    c.addSubstructureRoId((Integer) roId);
                }
            }
        }

        BasicDBList names = (BasicDBList) ((DBObject) o.get("names")).get("brenda");
        if (names != null) {
            for (Object n : names) {
                c.addBrendaNames((String) n);
            }
        }
        if (names != null) {
            names = (BasicDBList) ((DBObject) o.get("names")).get("synonyms");
            for (Object n : names) {
                c.addSynonym((String) n);
            }
        }
        if (names != null) {
            names = (BasicDBList) ((DBObject) o.get("names")).get("pubchem");
            for (Object n : names) {
                String typ = (String) ((DBObject) n).get("type");
                BasicDBList pnames = (BasicDBList) ((DBObject) n).get("values");
                List<String> s = new ArrayList<String>();
                for (Object os : pnames)
                    s.add((String) os);
                c.addNames(typ, s.toArray(new String[0]));
            }
        }
        if ((Boolean) o.get("isCofactor"))
            c.setAsCofactor();
        if ((Boolean) o.get("isNative"))
            c.setAsNative();
        if ((Double) o.get("estimateEnergy") != null)
            c.setEstimatedEnergy((Double) o.get("estimateEnergy"));
        BasicDBList keywords = (BasicDBList) o.get("keywords");
        if (keywords != null)
            for (Object k : keywords)
                c.addKeyword((String) k);
        BasicDBList cikeywords = (BasicDBList) o.get("keywords_case_insensitive");
        if (cikeywords != null)
            for (Object k : cikeywords)
                c.addCaseInsensitiveKeyword((String) k);

        BasicDBList vendors = (BasicDBList) o.get("vendors");
        Integer num_vendors = (Integer) o.get("num_vendors");
        Integer chemspiderid = (Integer) o.get("csid");

        c.setChemSpiderVendorXrefs(vendors == null ? null : MongoDBToJSON.conv(vendors));
        c.setChemSpiderNumUniqueVendors(num_vendors);
        c.setChemSpiderID(chemspiderid);

        /**
         * Shortest name  is most useful so just use that.
         */
        //TODO: what are we doing with shortest name here?
        String shortestName = c.getCanon();

        for (String name : c.getBrendaNames()) {
            if (shortestName == null || name.length() < shortestName.length())
                shortestName = name;
        }
        for (String name : c.getSynonyms()) {
            if (shortestName == null || name.length() < shortestName.length())
                shortestName = name;
        }

        return c;
    }

    public DBIterator getDbIteratorOverSeq() {
        DBCursor cursor = this.dbSeq.find();
        return new DBIterator(cursor);
    }

    public DBIterator getDbIteratorOverSeq(BasicDBObject matchCriterion, BasicDBObject keys) {
        if (keys == null) {
            keys = new BasicDBObject();
        }

        DBCursor cursor = this.dbSeq.find(matchCriterion, keys);
        cursor = cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
        return new DBIterator(cursor);
    }

    public Seq getNextSeq(DBIterator iterator) {
        if (!iterator.hasNext()) {
            iterator.close();
            return null;
        }

        DBObject o = iterator.next();
        return convertDBObjectToSeq(o);
    }

    public DBIterator getIteratorOverWaterfalls() {
        DBCursor cursor = this.dbWaterfalls.find();
        return new DBIterator(cursor);
    }

    public DBObject getNextWaterfall(DBIterator iterator) {
        if (!iterator.hasNext()) {
            iterator.close();
            return null;
        }

        DBObject o = iterator.next();
        return convertDBObjectToWaterfall(o);
    }

    public DBIterator getIteratorOverCascades() {
        DBCursor cursor = this.dbCascades.find();
        return new DBIterator(cursor);
    }

    public DBObject getNextCascade(DBIterator iterator) {
        if (!iterator.hasNext()) {
            iterator.close();
            return null;
        }

        DBObject o = iterator.next();
        return convertDBObjectToCascade(o);
    }

    public DBIterator getIteratorOverChemicals() {
        DBCursor cursor = constructCursorForAllChemicals();
        return new DBIterator(cursor);
    }

    public DBIterator getIteratorOverChemicals(BasicDBObject matchCriterion, BasicDBObject keys) {
        if (keys == null) {
            keys = new BasicDBObject();
        }
        DBCursor cursor = this.dbChemicals.find(matchCriterion, keys);
        cursor = cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);

        return new DBIterator(cursor); // DBIterator is just a wrapper class
    }

    public Iterator<Chemical> getJavaIteratorOverChemicals(BasicDBObject matchCriterion) {
        final DBIterator iter = getIteratorOverChemicals(matchCriterion, null);

        return new Iterator<Chemical>() {
            @Override
            public boolean hasNext() {
                boolean hasNext = iter.hasNext();
                if (!hasNext)
                    iter.close();
                return hasNext;
            }

            @Override
            public Chemical next() {
                DBObject o = iter.next();
                return convertDBObjectToChemical(o);
            }
        };
    }

    public Iterator<String> getIteratorOverInchis(BasicDBObject matchCriterion) {
        BasicDBObject keys = new BasicDBObject(ChemicalKeywords.INCHI$.MODULE$.toString(), true);
        final DBIterator iter = getIteratorOverChemicals(matchCriterion, keys);

        return new Iterator<String>() {
            @Override
            public boolean hasNext() {
                boolean hasNext = iter.hasNext();
                if (!hasNext)
                    iter.close();
                return hasNext;
            }

            @Override
            public String next() {
                DBObject o = iter.next();
                return (String) o.get("InChI");
            }
        };
    }

    public Iterator<Chemical> getChemicalsbyIds(List<Long> ids, boolean notimeout) {
        BasicDBList queryList = new BasicDBList();
        for (Long id : ids) {
            queryList.add(new BasicDBObject(ChemicalKeywords.ID$.MODULE$.toString(), id));
        }

        return getJavaIteratorOverChemicals(new BasicDBObject(MongoKeywords.OR$.MODULE$.toString(), queryList));
    }

    public DBIterator getIteratorOverReactions() {
        return getIteratorOverReactions(DEFAULT_CURSOR_ORDER_BY_ID, null);
    }

    private DBIterator getIteratorOverReactions(Long low, Long high) {
        return getIteratorOverReactions(getRangeUUIDRestriction(low, high), null);
    }

    public DBIterator getIteratorOverReactions(BasicDBObject matchCriterion, BasicDBObject keys) {

        if (keys == null) {
            keys = new BasicDBObject();
        }

        DBCursor cursor = this.dbReactions.find(matchCriterion, keys);
        cursor = cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
        return new DBIterator(cursor); // DBIterator is just a wrapper classs
    }

    public Reaction getNextReaction(DBIterator iterator) {

        if (!iterator.hasNext()) {
            iterator.close();
            return null;
        }

        DBObject o = iterator.next();

        return convertDBObjectToReaction(o);
    }

    public Chemical getNextChemical(DBIterator iterator) {
        if (!iterator.hasNext()) {
            iterator.close();
            return null;
        }

        DBObject o = iterator.next();
        return convertDBObjectToChemical(o);
    }

    public Cofactor getNextCofactor(DBIterator iterator) {
        if (!iterator.hasNext()) {
            iterator.close();
            return null;
        }

        DBObject o = iterator.next();
        return convertDBObjectToCofactor(o);
    }

    public Organism getNextOrganism(DBIterator iterator) {
        if (!iterator.hasNext()) {
            iterator.close();
            return null;
        }

        DBObject o = iterator.next();
        return convertDBObjectToOrg(o);
    }

    public DBIterator getIteratorOverCofactors() {
        DBCursor cursor = constructCursorForAllCofactors();
        return new DBIterator(cursor);
    }

    public Reaction convertDBObjectToReaction(DBObject o) {
        long uuid = (Integer) o.get("_id"); // checked: db type IS int
        String ecnum = (String) o.get("ecnum");
        String name_field = (String) o.get("easy_desc");
        Reaction.RxnDetailType type = Reaction.RxnDetailType.valueOf((String) o.get("is_abstract"));
        BasicDBList substrates = (BasicDBList) ((DBObject) o.get("enz_summary")).get("substrates");
        BasicDBList products = (BasicDBList) ((DBObject) o.get("enz_summary")).get("products");
        BasicDBList substrateCofactors = (BasicDBList) ((DBObject) o.get("enz_summary")).get("substrate_cofactors");
        BasicDBList productCofactors = (BasicDBList) ((DBObject) o.get("enz_summary")).get("product_cofactors");
        BasicDBList coenzymes = (BasicDBList) ((DBObject) o.get("enz_summary")).get("coenzymes");
        BasicDBList refs = (BasicDBList) (o.get("references"));
        BasicDBList proteins = (BasicDBList) (o.get("proteins"));
        DBObject mechanisticValidatorResults = (DBObject) (o.get("mechanistic_validator_result"));

        BasicDBList keywords = (BasicDBList) (o.get("keywords"));
        BasicDBList cikeywords = (BasicDBList) (o.get("keywords_case_insensitive"));

        List<Long> substr = new ArrayList<Long>();
        List<Long> prod = new ArrayList<Long>();
        List<Long> substrCofact = new ArrayList<Long>();
        List<Long> prodCofact = new ArrayList<Long>();
        List<Long> coenz = new ArrayList<Long>();

        String conversionDirectionString = (String) o.get("conversion_direction");
        ConversionDirectionType conversionDirection = conversionDirectionString == null ? null
                : ConversionDirectionType.valueOf(conversionDirectionString);

        String pathwayStepDirectionString = (String) o.get("pathway_step_direction");
        StepDirection pathwayStepDirection = pathwayStepDirectionString == null ? null
                : StepDirection.valueOf(pathwayStepDirectionString);

        for (int i = 0; i < substrates.size(); i++) {
            Boolean forBalance = (Boolean) ((DBObject) substrates.get(i)).get("balance");
            if (forBalance != null && forBalance)
                continue;
            substr.add(getEnzSummaryIDAsLong(substrates, i));
        }
        for (int i = 0; i < products.size(); i++) {
            Boolean forBalance = (Boolean) ((DBObject) products.get(i)).get("balance");
            if (forBalance != null && forBalance)
                continue;
            prod.add(getEnzSummaryIDAsLong(products, i));
        }
        for (int i = 0; i < substrateCofactors.size(); i++) {
            substrCofact.add(getEnzSummaryIDAsLong(substrateCofactors, i));
        }
        for (int i = 0; i < productCofactors.size(); i++) {
            prodCofact.add(getEnzSummaryIDAsLong(productCofactors, i));
        }
        for (int i = 0; i < coenzymes.size(); i++) {
            coenz.add(getEnzSummaryIDAsLong(coenzymes, i));
        }

        Reaction result = new Reaction(uuid, (Long[]) substr.toArray(new Long[0]),
                (Long[]) prod.toArray(new Long[0]), (Long[]) substrCofact.toArray(new Long[0]),
                (Long[]) prodCofact.toArray(new Long[0]), (Long[]) coenz.toArray(new Long[0]), ecnum,
                conversionDirection, pathwayStepDirection, name_field, type);

        for (int i = 0; i < substrates.size(); i++) {
            Integer c = (Integer) ((DBObject) substrates.get(i)).get("coefficient");
            if (c != null)
                result.setSubstrateCoefficient(getEnzSummaryIDAsLong(substrates, i), c);
        }
        for (int i = 0; i < products.size(); i++) {
            Integer c = (Integer) ((DBObject) products.get(i)).get("coefficient");
            if (c != null)
                result.setProductCoefficient(getEnzSummaryIDAsLong(products, i), c);
        }

        Double estimatedEnergy = (Double) o.get("estimateEnergy");
        result.setEstimatedEnergy(estimatedEnergy);

        String datasrc = (String) o.get("datasource");
        if (datasrc != null && !datasrc.equals(""))
            result.setDataSource(Reaction.RxnDataSource.valueOf(datasrc));

        if (mechanisticValidatorResults != null) {
            result.setMechanisticValidatorResult(MongoDBToJSON.conv(mechanisticValidatorResults));
        }

        if (refs != null) {
            for (Object oo : refs) {
                DBObject ref = (DBObject) oo;
                Reaction.RefDataSource src = Reaction.RefDataSource.valueOf((String) ref.get("src"));
                String val = (String) ref.get("val");
                result.addReference(src, val);
            }
        }

        if (proteins != null) {
            for (Object oo : proteins) {
                result.addProteinData(MongoDBToJSON.conv((DBObject) oo));
            }
        }

        if (keywords != null)
            for (Object k : keywords)
                result.addKeyword((String) k);

        if (cikeywords != null)
            for (Object k : cikeywords)
                result.addCaseInsensitiveKeyword((String) k);

        return result;
    }

    private Long getEnzSummaryIDAsLong(BasicDBList reactant, int i) {
        try {
            return (Long) ((DBObject) reactant.get(i)).get("pubchem");
        } catch (ClassCastException e) {
            return ((Integer) ((DBObject) reactant.get(i)).get("pubchem")).longValue();
        }
    }

    public Set<Reaction> getReactionsConstrained(Map<String, Object> equalityCriteria) {
        BasicDBList andList = new BasicDBList();
        for (String k : equalityCriteria.keySet()) {
            BasicDBObject query = new BasicDBObject();
            query.put(k, equalityCriteria.get(k));
            andList.add(query);
        }
        BasicDBObject query = new BasicDBObject();
        query.put("$and", andList);
        DBCursor cur = this.dbReactions.find(query);

        Set<Reaction> results = new HashSet<Reaction>();
        while (cur.hasNext()) {
            results.add(convertDBObjectToReaction(cur.next()));
        }
        return results;
    }

    public List<Chemical> keywordInChemicals(String keyword) {
        return keywordInChemicals("keywords", keyword);
    }

    public List<Chemical> keywordInChemicalsCaseInsensitive(String keyword) {
        return keywordInChemicals("keywords_case_insensitive", keyword);
    }

    private List<Chemical> keywordInChemicals(String in_field, String keyword) {
        List<Chemical> chemicals = new ArrayList<Chemical>();

        DBCursor cur = constructCursorForMatchingChemicals(in_field, keyword, null);
        while (cur.hasNext()) {
            DBObject o = cur.next();
            chemicals.add(convertDBObjectToChemical(o));
        }
        cur.close();

        return chemicals;
    }

    public List<Seq> keywordInSequence(String keyword) {
        return keywordInSequence("keywords", keyword);
    }

    public List<Seq> keywordInSequenceCaseInsensitive(String keyword) {
        return keywordInSequence("keywords_case_insensitive", keyword);
    }

    private List<Seq> keywordInSequence(String in_field, String keyword) {
        List<Seq> seqs = new ArrayList<Seq>();
        BasicDBObject query = new BasicDBObject();
        query.put(in_field, keyword);

        BasicDBObject keys = new BasicDBObject();

        DBCursor cur = this.dbSeq.find(query, keys);
        while (cur.hasNext()) {
            DBObject o = cur.next();
            seqs.add(convertDBObjectToSeq(o));
        }
        cur.close();

        return seqs;
    }

    public List<DBObject> keywordInCascade(String keyword) {
        return keywordInCascade("keywords", keyword);
    }

    public List<DBObject> keywordInCascadeCaseInsensitive(String keyword) {
        return keywordInCascade("keywords_case_insensitive", keyword);
    }

    private List<DBObject> keywordInCascade(String in_field, String keyword) {
        List<DBObject> cascades = new ArrayList<DBObject>();
        BasicDBObject query = new BasicDBObject();
        query.put(in_field, keyword);

        BasicDBObject keys = new BasicDBObject();

        DBCursor cur = this.dbCascades.find(query, keys);
        while (cur.hasNext()) {
            DBObject o = cur.next();
            cascades.add(convertDBObjectToCascade(o));
        }
        cur.close();

        return cascades;
    }

    DBObject convertDBObjectToCascade(DBObject o) {
        // TODO: later on, we will have a cascade object that is
        // more descriptive object of cascades rather than just a DBObject
        return o;
    }

    public List<DBObject> keywordInWaterfall(String keyword) {
        return keywordInWaterfall("keywords", keyword);
    }

    public List<DBObject> keywordInWaterfallCaseInsensitive(String keyword) {
        return keywordInWaterfall("keywords_case_insensitive", keyword);
    }

    private List<DBObject> keywordInWaterfall(String in_field, String keyword) {
        List<DBObject> waterfalls = new ArrayList<DBObject>();
        BasicDBObject query = new BasicDBObject();
        query.put(in_field, keyword);

        BasicDBObject keys = new BasicDBObject();

        DBCursor cur = this.dbWaterfalls.find(query, keys);
        while (cur.hasNext()) {
            DBObject o = cur.next();
            waterfalls.add(convertDBObjectToWaterfall(o));
        }
        cur.close();

        return waterfalls;
    }

    DBObject convertDBObjectToWaterfall(DBObject o) {
        // TODO: later on, we will have a waterfall object that is
        // more descriptive object of cascades rather than just a DBObject
        return o;
    }

    public List<Reaction> keywordInReaction(String keyword) {
        return keywordInReaction("keywords", keyword);
    }

    public List<Reaction> keywordInReactionCaseInsensitive(String keyword) {
        return keywordInReaction("keywords_case_insensitive", keyword);
    }

    private List<Reaction> keywordInReaction(String in_field, String keyword) {
        List<Reaction> rxns = new ArrayList<Reaction>();
        BasicDBObject query = new BasicDBObject();
        query.put(in_field, keyword);

        BasicDBObject keys = new BasicDBObject();

        DBCursor cur = this.dbReactions.find(query, keys);
        while (cur.hasNext()) {
            DBObject o = cur.next();
            rxns.add(convertDBObjectToReaction(o));
        }
        cur.close();

        return rxns;
    }

    public Cofactor getCofactorFromUUID(Long cofactorUUID) {
        return getCofactorFromDB("_id", cofactorUUID);
    }

    public Cofactor getCofactorFromInChI(String inchi) {
        return getCofactorFromDB("InChI", inchi);
    }

    private Cofactor getCofactorFromDB(String field, Object val) {
        BasicDBObject query = new BasicDBObject();
        query.put(field, val);
        BasicDBObject keys = new BasicDBObject();
        DBObject o = this.dbCofactors.findOne(query, keys);
        if (o == null)
            return null;
        return convertDBObjectToCofactor(o);
    }

    public Cofactor convertDBObjectToCofactor(DBObject o) {
        long uuid = (Long) o.get("_id");
        String inchi = (String) o.get("InChI");
        BasicDBList ns = (BasicDBList) o.get("names");
        List<String> names = new ArrayList<>();
        if (ns != null) {
            for (Object n : ns) {
                names.add((String) n);
            }
        }
        Cofactor cofactor = new Cofactor(uuid, inchi, names);

        return cofactor;
    }

    public Reaction getReactionFromUUID(Long reactionUUID) {
        if (reactionUUID < 0) {
            throw new RuntimeException(String.format(
                    "getReactionFromUUID called with a negaive number (%d).  It used to reverse the reaction.",
                    reactionUUID));
        }
        BasicDBObject query = new BasicDBObject();
        query.put("_id", reactionUUID);

        BasicDBObject keys = new BasicDBObject();
        DBObject o = this.dbReactions.findOne(query, keys);
        if (o == null)
            return null;
        return convertDBObjectToReaction(o);
    }

    public Iterator<Reaction> getReactionsIteratorById(List<Long> ids, boolean notimeout) {
        BasicDBList reactionList = new BasicDBList();

        for (Long id : ids) {
            reactionList.add(new BasicDBObject(ChemicalKeywords.ID$.MODULE$.toString(), id));
        }

        BasicDBObject query = new BasicDBObject(MongoKeywords.OR$.MODULE$.toString(), reactionList);

        final DBIterator iter = getIteratorOverReactions(query, null);

        return new Iterator<Reaction>() {
            @Override
            public boolean hasNext() {
                boolean hasNext = iter.hasNext();
                if (!hasNext)
                    iter.close();
                return hasNext;
            }

            @Override
            public Reaction next() {
                DBObject o = iter.next();
                return convertDBObjectToReaction(o);
            }
        };
    }

    public Iterator<Reaction> getReactionsIterator() {
        final DBIterator iter = getIteratorOverReactions();

        return new Iterator<Reaction>() {
            @Override
            public boolean hasNext() {
                boolean hasNext = iter.hasNext();
                if (!hasNext)
                    iter.close();
                return hasNext;
            }

            @Override
            public Reaction next() {
                DBObject o = iter.next();
                return convertDBObjectToReaction(o);
            }
        };
    }

    public Iterator<Seq> getSeqIterator(BasicDBObject query) {
        final DBIterator iter = getDbIteratorOverSeq(query, new BasicDBObject());

        return new Iterator<Seq>() {
            @Override
            public boolean hasNext() {
                boolean hasNext = iter.hasNext();
                if (!hasNext)
                    iter.close();
                return hasNext;
            }

            @Override
            public Seq next() {
                DBObject o = iter.next();
                return convertDBObjectToSeq(o);
            }
        };
    }

    public BasicDBObject getRangeUUIDRestriction(Long lowUUID, Long highUUID) {
        BasicDBObject restrictTo = new BasicDBObject();
        // need to encode { "_id" : { $gte : lowUUID, $lte : highUUID } }
        BasicDBObject range = new BasicDBObject();
        if (lowUUID != null)
            range.put("$gte", lowUUID);
        if (highUUID != null)
            range.put("$lte", highUUID);
        restrictTo.put("_id", range);
        return restrictTo;
    }

    public List<Long> getAllReactionUUIDs() {
        return getAllCollectionUUIDs(this.dbReactions);
    }

    public List<Long> getAllSeqUUIDs() {
        return getAllCollectionUUIDs(this.dbSeq);
    }

    public List<Long> getAllCollectionUUIDs(DBCollection collection) {

        List<Long> ids = new ArrayList<Long>();

        BasicDBObject query = new BasicDBObject();
        BasicDBObject keys = new BasicDBObject();
        keys.put("_id", 1); // 0 means exclude, rest are included
        DBCursor cur = collection.find(query, keys);

        while (cur.hasNext()) {
            DBObject o = cur.next();
            long uuid = (Integer) o.get("_id"); // checked: db type IS int
            ids.add(uuid);
        }
        cur.close();

        return ids;
    }

    public Seq getSeqFromID(Long seqID) {
        BasicDBObject query = new BasicDBObject();
        query.put("_id", seqID);

        BasicDBObject keys = new BasicDBObject();
        DBObject o = this.dbSeq.findOne(query, keys);
        if (o == null)
            return null;
        return convertDBObjectToSeq(o);
    }

    public Seq getSeqFromSequence(String seq) {
        DBObject o = this.dbSeq.findOne(new BasicDBObject("seq", seq), new BasicDBObject());
        if (o == null)
            return null;
        return convertDBObjectToSeq(o);
    }

    public List<Seq> getSeqFromSeqEcOrg(String seq, String ec, String organism) {
        List<Seq> seqs = new ArrayList<>();
        BasicDBObject query = new BasicDBObject();
        query.put("seq", seq);
        query.put("ecnum", ec);
        query.put("org", organism);

        DBCursor cur = this.dbSeq.find(query, new BasicDBObject());
        try {
            while (cur.hasNext()) {
                DBObject o = cur.next();
                seqs.add(convertDBObjectToSeq(o));
            }
        } finally {
            if (cur != null) {
                cur.close();
            }
        }

        return seqs;
    }

    public List<Seq> getSeqFromGenbankProtAccession(String accession) {
        List<Seq> seqs = new ArrayList<>();
        BasicDBObject query = new BasicDBObject();
        query.put("metadata.accession.genbank_protein",
                new BasicDBObject("$elemMatch", new BasicDBObject("$eq", accession)));

        DBCursor cur = this.dbSeq.find(query, new BasicDBObject());
        try {
            while (cur.hasNext()) {
                DBObject o = cur.next();
                seqs.add(convertDBObjectToSeq(o));
            }
        } finally {
            if (cur != null) {
                cur.close();
            }
        }

        return seqs;
    }

    public List<Seq> getSeqFromGenbankNucAccessionSeq(String accession, String seq) {
        List<Seq> seqs = new ArrayList<>();
        BasicDBObject query = new BasicDBObject();
        query.put("seq", seq);
        query.put("metadata.accession.genbank_nucleotide",
                new BasicDBObject("$elemMatch", new BasicDBObject("$eq", accession)));

        DBCursor cur = this.dbSeq.find(query, new BasicDBObject());
        try {
            while (cur.hasNext()) {
                DBObject o = cur.next();
                seqs.add(convertDBObjectToSeq(o));
            }
        } finally {
            if (cur != null) {
                cur.close();
            }
        }

        return seqs;
    }

    public List<Seq> getSeqWithSARConstraints() {
        List<Seq> seqs = new ArrayList<Seq>();
        BasicDBObject query = new BasicDBObject();
        query.put("sar_constraints", new BasicDBObject("$exists", true));

        BasicDBObject keys = new BasicDBObject();

        DBCursor cur = this.dbSeq.find(query, keys);
        while (cur.hasNext()) {
            DBObject o = cur.next();
            seqs.add(convertDBObjectToSeq(o));
        }
        cur.close();

        return seqs;
    }

    public List<Seq> getSeqWithRxnRef(Long rxnId) {
        List<Seq> seqs = new ArrayList<>();
        BasicDBObject query = new BasicDBObject();
        query.put("rxn_refs", rxnId);

        DBCursor cur = this.dbSeq.find(query, new BasicDBObject());
        try {
            while (cur.hasNext()) {
                DBObject o = cur.next();
                seqs.add(convertDBObjectToSeq(o));
            }
        } finally {
            if (cur != null) {
                cur.close();
            }
        }

        return seqs;
    }

    public Iterator<Seq> getSeqIterator() {
        final DBIterator iter = getDbIteratorOverSeq();

        return new Iterator<Seq>() {
            @Override
            public boolean hasNext() {
                boolean hasNext = iter.hasNext();
                if (!hasNext)
                    iter.close();
                return hasNext;
            }

            @Override
            public Seq next() {
                DBObject o = iter.next();
                return convertDBObjectToSeq(o);
            }
        };
    }

    public Seq convertDBObjectToSeq(DBObject o) {
        long id = (Integer) o.get("_id"); // checked: db type IS int
        String ecnum = (String) o.get("ecnum");
        String org_name = (String) o.get("org");
        Long org_id = (Long) o.get("org_id");
        String aa_seq = (String) o.get("seq");
        String srcdb = (String) o.get("src");

        BasicDBList refs = (BasicDBList) o.get("references");
        DBObject meta = (DBObject) o.get("metadata");
        BasicDBList rxn_refs = (BasicDBList) (o.get("rxn_refs"));

        if (srcdb == null)
            srcdb = Seq.AccDB.swissprot.name();
        Seq.AccDB src = Seq.AccDB.valueOf(srcdb); // genbank | uniprot | trembl | embl | swissprot

        List<JSONObject> references = new ArrayList<>();
        if (refs != null)
            for (Object r : refs)
                references.add(MongoDBToJSON.conv((DBObject) r));

        String dummyString = ""; // for type differentiation in overloaded method
        Long dummyLong = 0L; // for type differentiation in overloaded method

        Set<Long> rxns_catalyzed = from_dblist(rxn_refs, dummyLong);

        return Seq.rawInit(id, ecnum, org_id, org_name, aa_seq, references, meta, src,
                // the rest of the params are the ones that are typically
                // "constructed". But since we are reading from the DB, we manually init
                rxns_catalyzed);
    }

    public Organism convertDBObjectToOrg(DBObject o) {
        Long id = (long) o.get("org_id");
        String name = (String) o.get("name");

        return new Organism(id, name);
    }

    public DBIterator getDbIteratorOverOrgs() {
        DBCursor cursor = this.dbOrganismNames.find();
        return new DBIterator(cursor);
    }

    public DBIterator getDbIteratorOverOrgs(BasicDBObject matchCriterion, BasicDBObject keys) {
        if (keys == null) {
            keys = new BasicDBObject();
        }

        DBCursor cursor = this.dbOrganismNames.find(matchCriterion, keys);
        cursor = cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
        return new DBIterator(cursor);
    }

    public String getOrganismNameFromId(Long id) {
        BasicDBObject query = new BasicDBObject();
        query.put("org_id", id);
        BasicDBObject keys = new BasicDBObject();
        keys.put("name", 1);

        if (this.dbOrganismNames != null) {
            DBObject cur = this.dbOrganismNames.findOne(query, keys);
            if (cur == null) {
                //System.out.println("Did not find in organismnames: " + name);
                return null;
            }
            return (String) cur.get("name");
        } else {
            //System.out.println("no organism names collection");
        }
        return null;
    }

    public long getOrganismId(String name) {
        BasicDBObject query = new BasicDBObject();
        query.put("name", name);
        BasicDBObject keys = new BasicDBObject();
        keys.put("org_id", 1);

        if (this.dbOrganismNames != null) {
            DBObject cur = this.dbOrganismNames.findOne(query, keys);
            if (cur == null) {
                //System.out.println("Did not find in organismnames: " + name);
                return -1;
            }
            return (Long) cur.get("org_id"); // checked: db type IS long
        } else {
            //System.out.println("no organism names collection");
        }
        return -1;
    }

    /*
     * Returns set of all organism ids involved in reactions
     */
    public Set<Long> getOrganismIDs() {
        DBIterator iterator = getIteratorOverReactions(new BasicDBObject(), null);
        Set<Long> ids = new HashSet<Long>();
        while (iterator.hasNext()) {
            DBObject r = iterator.next();
            BasicDBList orgs = (BasicDBList) r.get("organisms");
            for (Object o : orgs) {
                ids.add((Long) ((DBObject) o).get("id")); // checked: db type IS Long
            }
        }
        return ids;
    }

    public Set<Long> getOrganismIDs(Long reactionID) {
        if (reactionID < 0) {
            reactionID = Reaction.reverseID(reactionID);
        }
        DBObject query = new BasicDBObject();
        query.put("_id", reactionID);
        Set<Long> ids = new HashSet<Long>();
        DBObject reaction = this.dbReactions.findOne(query);
        if (reaction != null) {
            BasicDBList orgs = (BasicDBList) reaction.get("organisms");
            for (Object o : orgs) {
                ids.add((Long) ((DBObject) o).get("id")); // checked: db type IS long
            }
        }
        return ids;
    }

    public List<P<Reaction.RefDataSource, String>> getReferences(Long reactionID) {
        if (reactionID < 0) {
            reactionID = Reaction.reverseID(reactionID);
        }
        DBObject query = new BasicDBObject();
        query.put("_id", reactionID);
        List<P<Reaction.RefDataSource, String>> refs = new ArrayList<>();
        DBObject reaction = this.dbReactions.findOne(query);
        if (reaction != null) {
            BasicDBList dbrefs = (BasicDBList) reaction.get("references");
            if (dbrefs != null)
                for (Object oo : dbrefs) {
                    DBObject ref = (DBObject) oo;
                    Reaction.RefDataSource src = Reaction.RefDataSource.valueOf((String) ref.get("src"));
                    String val = (String) ref.get("val");
                    refs.add(new P<Reaction.RefDataSource, String>(src, val));
                }
        }
        return refs;
    }

    public Set<String> getKMValues(Long reactionID) {
        DBObject query = new BasicDBObject();
        query.put("_id", reactionID);
        Set<String> kmSet = new HashSet<String>();
        DBObject reaction = this.dbReactions.findOne(query);
        if (reaction != null) {
            BasicDBList kms = (BasicDBList) reaction.get("km_values");
            if (kms != null) {
                for (Object km : kms) {
                    kmSet.add((String) km);
                }
            }
        }
        return kmSet;
    }

    public Set<String> getTurnoverNumbers(Long reactionID) {
        DBObject query = new BasicDBObject();
        query.put("_id", reactionID);
        Set<String> turnoverSet = new HashSet<String>();
        DBObject reaction = this.dbReactions.findOne(query);
        if (reaction != null) {
            BasicDBList turnovers = (BasicDBList) reaction.get("turnover_numbers");
            if (turnovers != null) {
                for (Object turnover : turnovers) {
                    turnoverSet.add((String) turnover);
                }
            }
        }
        return turnoverSet;
    }

    private void createCofactorsIndex(String field) {
        createCofactorsIndex(field, false); // create normal/non-hashed index
    }

    private void createCofactorsIndex(String field, boolean hashedIndex) {
        if (hashedIndex) {
            this.dbCofactors.createIndex(new BasicDBObject(field, "hashed"));
        } else {
            this.dbCofactors.createIndex(new BasicDBObject(field, 1));
        }
    }

    private void createChemicalsIndex(String field) {
        createChemicalsIndex(field, false); // create normal/non-hashed index
    }

    private void createChemicalsIndex(String field, boolean hashedIndex) {
        if (hashedIndex) {
            this.dbChemicals.createIndex(new BasicDBObject(field, "hashed"));
        } else {
            this.dbChemicals.createIndex(new BasicDBObject(field, 1));
        }
    }

    private void createSeqIndex(String field, boolean hashedIndex) {
        if (hashedIndex) {
            this.dbSeq.createIndex(new BasicDBObject(field, "hashed"));
        } else {
            this.dbSeq.createIndex(new BasicDBObject(field, 1));
        }
    }

    private void createOrganismNamesIndex(String field) {
        this.dbOrganismNames.createIndex(new BasicDBObject(field, 1));
    }

    public int submitToActSeqDB(Seq.AccDB src, String ec, String org, Long org_id, String seq,
            List<JSONObject> references, Set<Long> rxns, DBObject meta) {
        BasicDBObject doc = new BasicDBObject();
        int id = new Long(this.dbSeq.count()).intValue();
        doc.put("_id", id);
        doc.put("src", src.name()); // genbank, uniprot, swissprot, trembl, embl
        doc.put("ecnum", ec);
        doc.put("org", org);
        doc.put("org_id", org_id); // this is the NCBI Taxonomy id, should correlate with db.organismnames{org_id} and db.organisms.{id}
        doc.put("seq", seq);

        BasicDBList refs = new BasicDBList();
        for (JSONObject ref : references) {
            refs.add(MongoDBToJSON.conv(ref));
        }
        doc.put("references", refs);

        doc.put("metadata", meta); // the metadata contains the uniprot acc#, name, uniprot catalytic activity,
        Object accession = meta.get("accession");

        doc.put("rxn_refs", to_dblist(rxns));

        this.dbSeq.insert(doc);

        if (org != null && seq != null)
            System.out.format("Inserted %s = [%s, %s] = %s %s\n", accession, ec,
                    org.substring(0, Math.min(10, org.length())), seq.substring(0, Math.min(20, seq.length())),
                    refs);

        return id;
    }

    <X> BasicDBList to_dblist(Set<X> set) {
        BasicDBList dblist = new BasicDBList();
        if (set != null)
            dblist.addAll(set);
        return dblist;
    }

    <X> Set<X> from_dblist(BasicDBList dblist, X dummy) {
        Set<X> set = new HashSet<X>();
        if (dblist != null)
            for (Object o : dblist)
                set.add((X) o);
        return set;
    }

    public void updateMetadata(Seq seq) {
        BasicDBObject query = new BasicDBObject().append("_id", seq.getUUID());
        DBObject obj = this.dbSeq.findOne(query);
        obj.put("metadata", MongoDBToJSON.conv(seq.getMetadata()));
        this.dbSeq.update(query, obj);
    }

    public void updateReferences(Seq seq) {
        BasicDBObject query = new BasicDBObject().append("_id", seq.getUUID());
        DBObject obj = this.dbSeq.findOne(query);
        BasicDBList refs = new BasicDBList();

        List<DBObject> newReferences = new ArrayList<>();
        for (JSONObject ref : seq.getReferences()) {
            newReferences.add(MongoDBToJSON.conv(ref));
        }

        refs.addAll(newReferences);
        obj.put("references", refs);
        this.dbSeq.update(query, obj);
    }

    public void updateRxnRefs(Seq seq) {
        BasicDBObject query = new BasicDBObject().append("_id", seq.getUUID());
        DBObject obj = this.dbSeq.findOne(query);
        obj.put("rxn_refs", seq.getReactionsCatalyzed());
        this.dbSeq.update(query, obj);
    }

    /*
     *
     *
     * End of other helper functions
     *
     *
     */

    /**
     * The following functions are for performing organism specific retrievals.
     */

    /**
     * Retrieve all reaction ids observed in given species
     * @param speciesID
     * @return
     */
    public Set<Long> getReactionsBySpecies(Long speciesID) {
        Map<Long, Set<Long>> speciesIDs = getOrganisms();
        Set<Long> relevantIDs = speciesIDs.get(speciesID);
        Set<Long> result = new HashSet<Long>();
        for (Long id : relevantIDs) {
            result.addAll(graphByOrganism(id));
        }
        return result;
    }

    /**
     * graphByOrganism() returns a list of all reactionIDs containing the given organismID.
     *
     * @param organismID
     * @return List<Long> List of reaction IDs for given organismID
     */
    public List<Long> graphByOrganism(Long organismID) {

        DBObject query = new BasicDBObject();
        if (organismID == null || organismID > -1)
            query.put("organisms.id", organismID);
        List<Long> graphList = new ArrayList<Long>();

        DBCursor reactionCursor = this.dbReactions.find(query);
        for (DBObject i : reactionCursor) {
            graphList.add(((Integer) i.get("_id")).longValue()); // checked: db type IS int
        }
        return graphList;
    }

    /**
     * getOrganisms() returns a list of all unique species IDs in database
     * mapped to itself, its parents, and descendants
     *
     * @return
     */
    @SuppressWarnings("unchecked")
    public Map<Long, Set<Long>> getOrganisms() {
        List<Long> ids = (List<Long>) this.dbReactions.distinct("organisms.id");
        //map species id to all ids associated with it
        Map<Long, Set<Long>> speciesIDs = new HashMap<Long, Set<Long>>();
        for (Long organismID : ids) {
            //check if organism id on species level
            List<Long> idsToAdd = new ArrayList<Long>();
            Long speciesID;
            DBObject orgQuery = new BasicDBObject();
            orgQuery.put("_id", organismID);
            DBObject org = dbOrganisms.findOne(orgQuery);
            String rank = (String) org.get("rank");
            Long parent = (Long) org.get("parent_id"); // checked: db type IS long
            speciesID = null;
            while (organismID != 1) {
                idsToAdd.add(organismID);
                if (rank.equals("species")) {
                    speciesID = organismID;
                    //break;
                }
                orgQuery.put("_id", parent);
                org = dbOrganisms.findOne(orgQuery);
                organismID = parent;
                rank = (String) org.get("rank");
                parent = (Long) org.get("parent_id"); // checked: db type IS long
            }
            if (speciesID == null)
                continue;
            if (!speciesIDs.containsKey(speciesID)) {
                speciesIDs.put(speciesID, new HashSet<Long>());
            }
            speciesIDs.get(speciesID).addAll(idsToAdd);
        }
        return speciesIDs;
    }

    /**
     * End of organism queries.
     */

    /**
     * Getting KEGG data
     */

    private Map<String, Long> keggID_ActID;

    public Map<String, Long> getKeggID_ActID(boolean useCached) {
        if (keggID_ActID == null || !useCached)
            keggID_ActID = new HashMap<String, Long>();
        else
            return keggID_ActID;
        DBIterator it = getIteratorOverChemicals();
        while (it.hasNext()) {
            Chemical c = getNextChemical(it);
            DBObject o = (DBObject) c.getRef(Chemical.REFS.KEGG);
            if (o == null)
                continue;
            BasicDBList list = (BasicDBList) o.get("id");
            for (Object s : list) {
                keggID_ActID.put((String) s, c.getUuid());
            }
        }
        return keggID_ActID;
    }

    /**
     * Following methods are related to Bing cross-references installation in the Installer DB along with various
     * queries to obtain names (aka synonyms)
     */

    public BasicDBObject createBingMetadataDoc(Set<UsageTermUrlSet> usageTerms, Long totalCountSearchResults,
            String bestName) {
        BasicDBObject metadata = new BasicDBObject();
        if (usageTerms != null) {
            BasicDBList usageTermsDBObject = new BasicDBList();
            for (UsageTermUrlSet usageTerm : usageTerms) {
                // What happens if you don't translate to basic db obj in the next line?
                usageTermsDBObject.add(usageTerm.getBasicDBObject());
            }
            metadata.put("usage_terms", usageTermsDBObject);
        }
        if (totalCountSearchResults >= 0) {
            metadata.put("total_count_search_results", totalCountSearchResults);
        }
        if (!bestName.equals("")) {
            metadata.put("best_name", bestName);
        }
        return metadata;
    }

    public void updateChemicalWithBingSearchResults(String inchi, String bestName, BasicDBObject metadata) {
        Chemical c = this.getChemicalFromInChI(inchi);
        if (c != null) {
            long id = c.getUuid();
            BasicDBObject set = new BasicDBObject("xref.BING.metadata", metadata);
            set.put("xref.BING.dbid", bestName);
            BasicDBObject query = new BasicDBObject("_id", id);
            BasicDBObject update = new BasicDBObject("$set", set);
            this.dbChemicals.update(query, update);
        }
    }

    public NamesOfMolecule getNamesFromBasicDBObject(BasicDBObject c) {

        String inchi = (String) c.get("InChI");

        NamesOfMolecule moleculeNames = new NamesOfMolecule(inchi);
        BasicDBObject names = (BasicDBObject) c.get("names");
        if (names != null) {
            BasicDBList brendaNamesList = (BasicDBList) names.get("brenda");
            if (brendaNamesList != null) {
                Set<String> brendaNames = new HashSet<>();
                for (Object brendaName : brendaNamesList) {
                    brendaNames.add((String) brendaName);
                }
                moleculeNames.setBrendaNames(brendaNames);
            }
        }
        // XREF
        BasicDBObject xref = (BasicDBObject) c.get("xref");
        if (xref != null) {
            // CHEBI
            BasicDBObject chebi = (BasicDBObject) xref.get("CHEBI");
            if (chebi != null) {
                Set<String> chebiNames = new HashSet<>();
                BasicDBObject chebiMetadata = (BasicDBObject) chebi.get("metadata");
                BasicDBList chebiSynonymsList = (BasicDBList) chebiMetadata.get("Synonym");
                if (chebiSynonymsList != null) {
                    for (Object chebiName : chebiSynonymsList) {
                        chebiNames.add((String) chebiName);
                    }
                    moleculeNames.setChebiNames(chebiNames);
                }
            }
            // METACYC
            BasicDBObject metacyc = (BasicDBObject) xref.get("METACYC");
            if (metacyc != null) {
                Set<String> metacycNames = new HashSet<>();
                BasicDBList metacycMetadata = (BasicDBList) metacyc.get("meta");
                if (metacycMetadata != null) {
                    for (Object metaCycMeta : metacycMetadata) {
                        BasicDBObject metaCycMetaDBObject = (BasicDBObject) metaCycMeta;
                        String metaCycName = (String) metaCycMetaDBObject.get("sname");
                        if (metaCycName == null) {
                            continue;
                        }
                        metacycNames.add(metaCycName);
                    }
                    moleculeNames.setMetacycNames(metacycNames);
                }
            }
            // DRUGBANK
            BasicDBObject drugbank = (BasicDBObject) xref.get("DRUGBANK");
            if (drugbank != null) {
                Set<String> drugbankNames = new HashSet<>();
                BasicDBObject drugbankMetadata = (BasicDBObject) drugbank.get("metadata");
                drugbankNames.add((String) drugbankMetadata.get("name"));
                BasicDBObject drugbankSynonyms = (BasicDBObject) drugbankMetadata.get("synonyms");
                if (drugbankSynonyms != null) {
                    if (drugbankSynonyms.get("synonym") instanceof String) {
                        drugbankNames.add((String) drugbankSynonyms.get("synonym"));
                        moleculeNames.setDrugbankNames(drugbankNames);
                    } else {
                        BasicDBList drugbankSynonymsList = (BasicDBList) drugbankSynonyms.get("synonym");
                        if (drugbankSynonymsList != null) {
                            for (Object drugbankSynonym : drugbankSynonymsList) {
                                drugbankNames.add((String) drugbankSynonym);
                            }
                            moleculeNames.setDrugbankNames(drugbankNames);
                        }
                    }
                }
                Set<String> drugbankBrands = new HashSet<>();
                BasicDBObject drugbankBrandsObject = (BasicDBObject) drugbankMetadata.get("brands");
                if (drugbankBrandsObject != null) {
                    if (drugbankBrandsObject.get("brand") instanceof String) {
                        drugbankBrands.add((String) drugbankBrandsObject.get("brand"));
                        moleculeNames.setDrugbankBrands(drugbankBrands);
                    } else {
                        BasicDBList drugbankBrandsList = (BasicDBList) drugbankBrandsObject.get("brand");
                        if (drugbankBrandsList != null) {
                            for (Object drugbankBrand : drugbankBrandsList) {
                                drugbankBrands.add((String) drugbankBrand);
                            }
                            moleculeNames.setDrugbankBrands(drugbankBrands);
                        }
                    }
                }
            }
            // WIKIPEDIA
            BasicDBObject wikipedia = (BasicDBObject) xref.get("WIKIPEDIA");
            if (wikipedia != null) {
                BasicDBObject wikipediaMetadata = (BasicDBObject) wikipedia.get("metadata");
                if (wikipediaMetadata != null) {
                    String wikipediaName = (String) wikipediaMetadata.get("article");
                    moleculeNames.setWikipediaName(wikipediaName);
                }
            }
        }
        return moleculeNames;
    }

    public DBCursor fetchNamesAndUsageForInchis(Set<String> inchis) {
        BasicDBList inchiList = new BasicDBList();
        inchiList.addAll(inchis);
        BasicDBObject inClause = new BasicDBObject("$in", inchiList);
        BasicDBObject whereQuery = new BasicDBObject("InChI", inClause);
        whereQuery.put("xref.BING", new BasicDBObject("$exists", true));
        BasicDBObject fields = new BasicDBObject();
        fields.put("InChI", true);
        fields.put("names.brenda", true);
        fields.put("xref", true);
        DBCursor cursor = dbChemicals.find(whereQuery, fields);
        return cursor;
    }

    public NamesOfMolecule fetchNamesFromInchi(String inchi) {
        BasicDBObject whereQuery = new BasicDBObject("InChI", inchi);
        BasicDBObject fields = new BasicDBObject();
        fields.put("InChI", true);
        fields.put("names.brenda", true);
        fields.put("xref.CHEBI.metadata.Synonym", true);
        fields.put("xref.DRUGBANK.metadata", true);
        fields.put("xref.METACYC.meta", true);
        fields.put("xref.WIKIPEDIA.metadata.article", true);

        BasicDBObject c = (BasicDBObject) dbChemicals.findOne(whereQuery, fields);
        if (c == null) {
            return null;
        }
        NamesOfMolecule moleculeNames = getNamesFromBasicDBObject(c);
        return moleculeNames;
    }

    public boolean hasBingSearchResultsFromInchi(String inchi) {
        BasicDBObject whereQuery = new BasicDBObject("InChI", inchi);
        BasicDBObject existsQuery = new BasicDBObject("$exists", true);
        whereQuery.put("xref.BING", existsQuery);
        BasicDBObject fields = new BasicDBObject();
        BasicDBObject c = (BasicDBObject) dbChemicals.findOne(whereQuery, fields);
        return (c != null);
    }

    /**
     * The following methods are related to ChEBI cross-references installation in the Installer DB.
     */

    /**
     * This function retrieves the ChEBI ID corresponding to an InChI. In the (frequent) case where no ChEBI xref is
     * present, null is returned.
     * @param inchi input InChI representation of the chemical
     * @return String: the ChEBI ID corresponding to the InChI representation provided
     */
    public String getChebiIDFromInchi(String inchi) {
        BasicDBObject whereQuery = new BasicDBObject("InChI", inchi);
        BasicDBObject existsQuery = new BasicDBObject("$exists", true);
        whereQuery.put("xref.CHEBI", existsQuery);
        BasicDBObject c = (BasicDBObject) dbChemicals.findOne(whereQuery, new BasicDBObject());
        if (c == null) {
            return null;
        } else {
            BasicDBObject xref = (BasicDBObject) c.get("xref");
            BasicDBObject chebi = (BasicDBObject) xref.get("CHEBI");
            return (String) chebi.get("dbid");
        }
    }

    /**
     * This function retrieves the chemical corresponding to a ChEBI ID and update its metadata with the ChEBI
     * applications provided
     * @param chebiId ChEBI ID for the chemical to update
     * @param applicationSet Set of main and direct ChEBI applications, represented in a ChebiApplicationSet
     */
    public void updateChemicalWithChebiApplications(String chebiId,
            BrendaChebiOntology.ChebiApplicationSet applicationSet) {
        Chemical c = this.getChemicalFromChebiId(chebiId);
        if (c != null && applicationSet != null) {
            long id = c.getUuid();
            BasicDBObject query = new BasicDBObject("_id", id);
            BasicDBObject update = new BasicDBObject("$set",
                    new BasicDBObject("xref.CHEBI.metadata.applications", applicationSet.toBasicDBObject()));
            this.dbChemicals.update(query, update);
        }
    }

    /**
     * Setup the ability to use MongoDB's aggregation framework.
     * This greatly greatly simplifies pulling out highly nested and unstructured data from the db.
     *
     * This method performs the query over the sequence database.
     *
     * References: https://docs.mongodb.com/manual/aggregation/
     * @param pipeline A list of DBObjects that will be sequentially applied via aggregate.
     * @return An iterator over all the matching objects.
       */
    public Iterator<DBObject> applyPipelineOverSequences(List<DBObject> pipeline) {
        AggregationOutput cursor = this.dbSeq.aggregate(pipeline);
        return cursor.results().iterator();
    }

    /**
     * Setup the ability to use MongoDB's aggregation framework.
     * This greatly greatly simplifies pulling out highly nested and unstructured data from the db.
     *
     * This method performs the query over the sequence reaction.
     *
     * References: https://docs.mongodb.com/manual/aggregation/
     * @param pipeline A list of DBObjects that will be sequentially applied via aggregate.
     * @return An iterator over all the matching objects.
     */
    public Iterator<DBObject> applyPipelineOverReactions(List<DBObject> pipeline) {
        AggregationOutput cursor = this.dbReactions.aggregate(pipeline);
        return cursor.results().iterator();
    }
}