act.installer.brenda.SQLConnection.java Source code

Java tutorial

Introduction

Here is the source code for act.installer.brenda.SQLConnection.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package act.installer.brenda;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.rocksdb.ColumnFamilyDescriptor;
import org.rocksdb.ColumnFamilyHandle;
import org.rocksdb.DBOptions;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class SQLConnection {

    public static final String QUERY_SUBSTRATES_PRODUCTS = StringUtils.join(new String[] { "select", "  EC_Number,", // 1
            "  Substrates,", // 2
            "  Commentary_Substrates,", // 3
            "  Literature_Substrates,", // 4
            "  Organism_Substrates,", // 5
            "  Products,", // 6
            "  Reversibility,", // 7
            "  id", // 8
            "from Substrates_Products", }, " ");

    public static final String QUERY_NATURAL_SUBSTRATES_PRODUCTS = StringUtils.join(
            new String[] { "select", "  EC_Number,", "  Natural_Substrates,", "  Commentary_Natural_Substrates,",
                    "  Literature_Natural_Substrates,", "  Organism_Natural_Substrates,", "  Natural_Products,",
                    "  Reversibility,", "  id", "from Natural_Substrates_Products", },
            " ");

    public static final String QUERY_GET_SYNONYMS = StringUtils
            .join(new String[] { "select", "  lm.Ligand", "from ligand_molfiles lm1",
                    "join ligand_molfiles lm on lm1.groupID = lm.groupID", "where lm1.Ligand = ?", }, " ");

    private Connection brendaConn;
    private Connection brendaLigandConn;

    public SQLConnection() {
    }

    public void connect(String host, Integer port, String username, String password) throws SQLException {
        String brendaConnectionUrl = String.format("jdbc:mysql://%s:%s/brenda", host,
                port == null ? "3306" : port.toString());
        String brendaLigandConnectionUrl = String.format("jdbc:mysql://%s:%s/brenda_ligand", host,
                port == null ? "3306" : port.toString());
        brendaConn = DriverManager.getConnection(brendaConnectionUrl, username, password);
        brendaLigandConn = DriverManager.getConnection(brendaLigandConnectionUrl, username, password);
    }

    public void disconnect() throws SQLException {
        if (!brendaConn.isClosed()) {
            brendaConn.close();
        }
        if (!brendaLigandConn.isClosed()) {
            brendaLigandConn.close();
        }
    }

    /**
     * A handy function that closes a result set when an iterator has hit the end.  Does some ugly stuff with exceptions
     * but needs to be used inside an iterator.
     * @param results The result set to check for another row.
     * @param stmt A statement to close when we're out of results.
     * @return True if the result set has more rows, false otherwise (after closing).
     */
    private static boolean hasNextHelper(ResultSet results, Statement stmt) {
        try {
            // TODO: is there a better way to do this?
            if (results.isLast()) {
                results.close(); // Tidy up if we find we're at the end.
                stmt.close();
                return false;
            } else {
                return true;
            }
        } catch (SQLException e) {
            /* Note: this is usually not a great thing to do.  In this circumstance we don't expect the
             * calling code to do anything but crash anyway, so... */
            throw new RuntimeException(e);
        }
    }

    private Iterator<BrendaRxnEntry> runSPQuery(final boolean isNatural) throws SQLException {
        String query = isNatural ? QUERY_NATURAL_SUBSTRATES_PRODUCTS : QUERY_SUBSTRATES_PRODUCTS;
        final PreparedStatement stmt = brendaConn.prepareStatement(query);
        final ResultSet results = stmt.executeQuery();

        return new Iterator<BrendaRxnEntry>() {
            @Override
            public boolean hasNext() {
                return hasNextHelper(results, stmt);
            }

            @Override
            public BrendaRxnEntry next() {
                try {
                    results.next();
                    Integer literatureSubstrates = results.getInt(4);

                    if (results.wasNull()) {
                        literatureSubstrates = null;
                    }
                    BrendaRxnEntry sp = new BrendaRxnEntry(results.getString(1), results.getString(2),
                            results.getString(3),
                            literatureSubstrates == null ? null : literatureSubstrates.toString(),
                            results.getString(5), results.getString(6), results.getString(7), results.getInt(8),
                            isNatural);
                    return sp;
                } catch (SQLException e) {
                    throw new RuntimeException(e);
                }

            }
        };
    }

    /**
     * Query the DB's Substrates_Products table, producing an iterator of results rows.
     * @return An iterator that returns one Substrates_Products object at a time.
     * @throws SQLException
     */
    public Iterator<BrendaRxnEntry> getRxns() throws SQLException {
        return runSPQuery(false);
    }

    /**
     * Query the DB's Natural_Substrates_Products table, producing an iterator of results rows.
     * @return An iterator that returns one Substrates_Products object at a time.
     * @throws SQLException
     */
    public Iterator<BrendaRxnEntry> getNaturalRxns() throws SQLException {
        return runSPQuery(true);
    }

    /**
     * Look up all BRENDA synonyms for a particular chemical name.
     * @param name The name for which to search.
     * @return A list of synonyms.
     * @throws SQLException
     */
    public List<String> getSynonymsForChemicalName(String name) throws SQLException {
        try (PreparedStatement stmt = brendaLigandConn.prepareStatement(QUERY_GET_SYNONYMS)) {
            stmt.setString(1, name);
            try (ResultSet resultSet = stmt.executeQuery()) {
                List<String> synonyms = new ArrayList<>();
                while (resultSet.next()) {
                    synonyms.add(resultSet.getString(1));
                }
                return synonyms;
            }
        }
    }

    /**
     * Iterate over all BRENDA ligands (from the ligands_molfiles table).
     * @return An iterator over all BRENDA ligands.
     * @throws SQLException
     */
    public Iterator<BrendaSupportingEntries.Ligand> getLigands() throws SQLException {
        final PreparedStatement stmt = brendaLigandConn.prepareStatement(BrendaSupportingEntries.Ligand.QUERY);
        final ResultSet results = stmt.executeQuery();

        return new Iterator<BrendaSupportingEntries.Ligand>() {
            @Override
            public boolean hasNext() {
                return hasNextHelper(results, stmt);
            }

            @Override
            public BrendaSupportingEntries.Ligand next() {
                try {
                    results.next();
                    return BrendaSupportingEntries.Ligand.fromResultSet(results);
                } catch (SQLException e) {
                    throw new RuntimeException(e);
                }

            }
        };
    }

    /**
     * Iterate over all BRENDA organisms.
     * @return An iterator over all BRENDA organisms.
     * @throws SQLException
     */
    public Iterator<BrendaSupportingEntries.Organism> getOrganisms() throws SQLException {
        final PreparedStatement stmt = brendaConn.prepareStatement(BrendaSupportingEntries.Organism.QUERY);
        final ResultSet results = stmt.executeQuery();

        return new Iterator<BrendaSupportingEntries.Organism>() {
            @Override
            public boolean hasNext() {
                return hasNextHelper(results, stmt);
            }

            @Override
            public BrendaSupportingEntries.Organism next() {
                try {
                    results.next();
                    return BrendaSupportingEntries.Organism.fromResultSet(results);
                } catch (SQLException e) {
                    throw new RuntimeException(e);
                }
            }
        };
    }

    /**
     * Iterate over all ChEBI ontologies.
     * @return An iterator over all ChEBI ontologies.
     * @throws SQLException
     */
    public Iterator<BrendaChebiOntology.ChebiOntology> getChebiOntologies() throws SQLException {
        final PreparedStatement stmt = brendaConn.prepareStatement(BrendaChebiOntology.ChebiOntology.QUERY);
        final ResultSet results = stmt.executeQuery();

        return new Iterator<BrendaChebiOntology.ChebiOntology>() {
            @Override
            public boolean hasNext() {
                return SQLConnection.hasNextHelper(results, stmt);
            }

            @Override
            public BrendaChebiOntology.ChebiOntology next() {
                try {
                    results.next();
                    return BrendaChebiOntology.ChebiOntology.fromResultSet(results);
                } catch (SQLException e) {
                    throw new RuntimeException(e);
                }

            }
        };
    }

    /**
     * Iterate over all ChEBI relationships of a certain type.
     * @return An iterator over all ChEBI relationships of a certain type.
     * @throws SQLException
     */
    public Iterator<BrendaChebiOntology.ChebiRelationship> getChebiRelationships(int relationshipType)
            throws SQLException {
        PreparedStatement stmt = brendaConn.prepareStatement(BrendaChebiOntology.ChebiRelationship.QUERY);
        BrendaChebiOntology.ChebiRelationship.bindType(stmt, relationshipType);
        stmt.setInt(1, relationshipType);
        final ResultSet results = stmt.executeQuery();

        return new Iterator<BrendaChebiOntology.ChebiRelationship>() {
            @Override
            public boolean hasNext() {
                return SQLConnection.hasNextHelper(results, stmt);
            }

            @Override
            public BrendaChebiOntology.ChebiRelationship next() {
                try {
                    results.next();
                    return BrendaChebiOntology.ChebiRelationship.fromResultSet(results);
                } catch (SQLException e) {
                    throw new RuntimeException(e);
                }

            }
        };
    }

    /**
     * Fetch all sequences corresponding to the specified reaction.
     * @param rxnEntry A reaction whose sequences to search for.
     * @return A list of all matching BRENDA sequence entries.
     * @throws SQLException
     */
    public List<BrendaSupportingEntries.Sequence> getSequencesForReaction(BrendaRxnEntry rxnEntry)
            throws SQLException {
        List<BrendaSupportingEntries.Sequence> results = new ArrayList<>();

        boolean requireExactMatch = true;
        try (PreparedStatement stmt = BrendaSupportingEntries.Sequence.prepareStatementVague(brendaConn, rxnEntry,
                requireExactMatch); ResultSet resultSet = stmt.executeQuery();) {
            while (resultSet.next()) {
                results.add(BrendaSupportingEntries.Sequence.sequenceFromResultSet(resultSet, requireExactMatch));
            }
        }

        /* Fall back to vague match if there are no sequences that are precisely attributable to this reaction.
         *
         * If we find a perfect association, there is little utility in considering BRENDA sequences for the same EC number
         * and organism (in which we'll have much less confidence), so we just terminate if we get a precise match.
         */
        if (results.size() == 0) {
            requireExactMatch = false;
            try (PreparedStatement stmt = BrendaSupportingEntries.Sequence.prepareStatementVague(brendaConn,
                    rxnEntry, requireExactMatch); ResultSet resultSet = stmt.executeQuery();) {
                while (resultSet.next()) {
                    results.add(
                            BrendaSupportingEntries.Sequence.sequenceFromResultSet(resultSet, requireExactMatch));
                }
            }
        }

        return results;
    }

    // Helpers for reaction-associated data sets.

    /**
     * Get all values of a particular BRENDA DB type.
     * @param instance An instance to use when reading the table rows.
     * @param query The query to run against the BRENDA MySQL DB.
     * @param ecNumber The EC number to use when querying the DB (should come from a reaction).
     * @param literatureId The literature id to use when querying the DB (should come from a reaction).
     * @param organism The organism name to use when querying the DB (should come from a reaction).
     * @param <T> The type of data to retrieve; corresponds to a BRENDA DB table.
     * @return A list of all instances of the secified type that share the EC number, literature id, and organism name.
     * @throws SQLException
     */
    private <T extends FromBrendaDB<T> & Serializable> List<T> getRSValues(T instance, String query,
            String ecNumber, String literatureId, String organism) throws SQLException {
        try (PreparedStatement st = brendaConn.prepareStatement(query)) {
            st.setString(1, ecNumber);
            st.setString(2, "%" + literatureId + "%");
            st.setString(3, organism);
            try (ResultSet resultSet = st.executeQuery()) {
                List<T> results = new ArrayList<>();
                while (resultSet.next()) {
                    if (BrendaSupportingEntries.findIdInList(resultSet.getString(instance.getLiteratureField()),
                            literatureId)) {
                        results.add(instance.fromResultSet(resultSet));
                    }
                    // TODO: log when we can't find the exact literature ID in the query results.
                }
                return results;
            }
        }
    }

    // TODO: these could probably be consolidated via a single polymorphic method.
    public List<BrendaSupportingEntries.KMValue> getKMValue(BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.KMValue.INSTANCE, BrendaSupportingEntries.KMValue.QUERY,
                reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.Cloned> getClonedValue(BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.Cloned.INSTANCE, BrendaSupportingEntries.Cloned.QUERY,
                reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.PosttranslationalModification> getPosttranslationalModification(
            BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.PosttranslationalModification.INSTANCE,
                BrendaSupportingEntries.PosttranslationalModification.QUERY, reaction.getEC(),
                reaction.getLiteratureRef(), reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.SpecificActivity> getSpecificActivity(BrendaRxnEntry reaction)
            throws SQLException {
        return getRSValues(BrendaSupportingEntries.SpecificActivity.INSTANCE,
                BrendaSupportingEntries.SpecificActivity.QUERY, reaction.getEC(), reaction.getLiteratureRef(),
                reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.OrganismCommentary> getOrganismCommentary(BrendaRxnEntry reaction)
            throws SQLException {
        return getRSValues(BrendaSupportingEntries.OrganismCommentary.INSTANCE,
                BrendaSupportingEntries.OrganismCommentary.QUERY, reaction.getEC(), reaction.getLiteratureRef(),
                reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.GeneralInformation> getGeneralInformation(BrendaRxnEntry reaction)
            throws SQLException {
        return getRSValues(BrendaSupportingEntries.GeneralInformation.INSTANCE,
                BrendaSupportingEntries.GeneralInformation.QUERY, reaction.getEC(), reaction.getLiteratureRef(),
                reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.Cofactor> getCofactors(BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.Cofactor.INSTANCE, BrendaSupportingEntries.Cofactor.QUERY,
                reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.Inhibitors> getInhibitors(BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.Inhibitors.INSTANCE, BrendaSupportingEntries.Inhibitors.QUERY,
                reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.ActivatingCompound> getActivatingCompounds(BrendaRxnEntry reaction)
            throws SQLException {
        return getRSValues(BrendaSupportingEntries.ActivatingCompound.INSTANCE,
                BrendaSupportingEntries.ActivatingCompound.QUERY, reaction.getEC(), reaction.getLiteratureRef(),
                reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.KCatKMValue> getKCatKMValues(BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.KCatKMValue.INSTANCE, BrendaSupportingEntries.KCatKMValue.QUERY,
                reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.Expression> getExpression(BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.Expression.INSTANCE, BrendaSupportingEntries.Expression.QUERY,
                reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.Subunits> getSubunits(BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.Subunits.INSTANCE, BrendaSupportingEntries.Subunits.QUERY,
                reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism());
    }

    public List<BrendaSupportingEntries.Localization> getLocalization(BrendaRxnEntry reaction) throws SQLException {
        return getRSValues(BrendaSupportingEntries.Localization.INSTANCE,
                BrendaSupportingEntries.Localization.QUERY, reaction.getEC(), reaction.getLiteratureRef(),
                reaction.getOrganism());
    }

    public BrendaSupportingEntries.RecommendNameTable fetchRecommendNameTable() throws SQLException {
        return BrendaSupportingEntries.RecommendNameTable.fetchRecommendedNameTable(brendaConn);
    }

    /**
     * Create an on-disk index of BRENDA data that supports reactions at the specified path.
     * @param path A path where the index will be stored.
     * @throws IOException
     * @throws ClassNotFoundException
     * @throws RocksDBException
     * @throws SQLException
     */
    public void createSupportingIndex(File path)
            throws IOException, ClassNotFoundException, RocksDBException, SQLException {
        new BrendaSupportingEntries().constructOnDiskBRENDAIndex(path, this.brendaConn);
    }

    public void deleteSupportingIndex(File path) throws IOException {
        // With help from http://stackoverflow.com/questions/779519/delete-files-recursively-in-java.
        FileUtils.deleteDirectory(path);
    }

    public Pair<RocksDB, Map<String, ColumnFamilyHandle>> openSupportingIndex(File supportingIndex)
            throws RocksDBException {
        List<FromBrendaDB> instances = BrendaSupportingEntries.allFromBrendaDBInstances();
        List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>(instances.size() + 1);
        columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes()));
        for (FromBrendaDB instance : instances) {
            columnFamilyDescriptors.add(new ColumnFamilyDescriptor(instance.getColumnFamilyName().getBytes()));
        }
        List<ColumnFamilyHandle> columnFamilyHandles = new ArrayList<>(columnFamilyDescriptors.size());

        DBOptions dbOptions = new DBOptions();
        dbOptions.setCreateIfMissing(false);
        RocksDB rocksDB = RocksDB.open(dbOptions, supportingIndex.getAbsolutePath(), columnFamilyDescriptors,
                columnFamilyHandles);
        Map<String, ColumnFamilyHandle> columnFamilyHandleMap = new HashMap<>(columnFamilyHandles.size());
        // TODO: can we zip these together more easily w/ Java 8?

        for (int i = 0; i < columnFamilyDescriptors.size(); i++) {
            ColumnFamilyDescriptor cfd = columnFamilyDescriptors.get(i);
            ColumnFamilyHandle cfh = columnFamilyHandles.get(i);
            columnFamilyHandleMap.put(new String(cfd.columnFamilyName(), BrendaSupportingEntries.UTF8), cfh);
        }

        return Pair.of(rocksDB, columnFamilyHandleMap);
    }
}