Java tutorial
/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package act.installer.brenda; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.DBOptions; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import java.io.File; import java.io.IOException; import java.io.Serializable; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; public class SQLConnection { public static final String QUERY_SUBSTRATES_PRODUCTS = StringUtils.join(new String[] { "select", " EC_Number,", // 1 " Substrates,", // 2 " Commentary_Substrates,", // 3 " Literature_Substrates,", // 4 " Organism_Substrates,", // 5 " Products,", // 6 " Reversibility,", // 7 " id", // 8 "from Substrates_Products", }, " "); public static final String QUERY_NATURAL_SUBSTRATES_PRODUCTS = StringUtils.join( new String[] { "select", " EC_Number,", " Natural_Substrates,", " Commentary_Natural_Substrates,", " Literature_Natural_Substrates,", " Organism_Natural_Substrates,", " Natural_Products,", " Reversibility,", " id", "from Natural_Substrates_Products", }, " "); public static final String QUERY_GET_SYNONYMS = StringUtils .join(new String[] { "select", " lm.Ligand", "from ligand_molfiles lm1", "join ligand_molfiles lm on lm1.groupID = lm.groupID", "where lm1.Ligand = ?", }, " "); private Connection brendaConn; private Connection brendaLigandConn; public SQLConnection() { } public void connect(String host, Integer port, String username, String password) throws SQLException { String brendaConnectionUrl = String.format("jdbc:mysql://%s:%s/brenda", host, port == null ? "3306" : port.toString()); String brendaLigandConnectionUrl = String.format("jdbc:mysql://%s:%s/brenda_ligand", host, port == null ? "3306" : port.toString()); brendaConn = DriverManager.getConnection(brendaConnectionUrl, username, password); brendaLigandConn = DriverManager.getConnection(brendaLigandConnectionUrl, username, password); } public void disconnect() throws SQLException { if (!brendaConn.isClosed()) { brendaConn.close(); } if (!brendaLigandConn.isClosed()) { brendaLigandConn.close(); } } /** * A handy function that closes a result set when an iterator has hit the end. Does some ugly stuff with exceptions * but needs to be used inside an iterator. * @param results The result set to check for another row. * @param stmt A statement to close when we're out of results. * @return True if the result set has more rows, false otherwise (after closing). */ private static boolean hasNextHelper(ResultSet results, Statement stmt) { try { // TODO: is there a better way to do this? if (results.isLast()) { results.close(); // Tidy up if we find we're at the end. stmt.close(); return false; } else { return true; } } catch (SQLException e) { /* Note: this is usually not a great thing to do. In this circumstance we don't expect the * calling code to do anything but crash anyway, so... */ throw new RuntimeException(e); } } private Iterator<BrendaRxnEntry> runSPQuery(final boolean isNatural) throws SQLException { String query = isNatural ? QUERY_NATURAL_SUBSTRATES_PRODUCTS : QUERY_SUBSTRATES_PRODUCTS; final PreparedStatement stmt = brendaConn.prepareStatement(query); final ResultSet results = stmt.executeQuery(); return new Iterator<BrendaRxnEntry>() { @Override public boolean hasNext() { return hasNextHelper(results, stmt); } @Override public BrendaRxnEntry next() { try { results.next(); Integer literatureSubstrates = results.getInt(4); if (results.wasNull()) { literatureSubstrates = null; } BrendaRxnEntry sp = new BrendaRxnEntry(results.getString(1), results.getString(2), results.getString(3), literatureSubstrates == null ? null : literatureSubstrates.toString(), results.getString(5), results.getString(6), results.getString(7), results.getInt(8), isNatural); return sp; } catch (SQLException e) { throw new RuntimeException(e); } } }; } /** * Query the DB's Substrates_Products table, producing an iterator of results rows. * @return An iterator that returns one Substrates_Products object at a time. * @throws SQLException */ public Iterator<BrendaRxnEntry> getRxns() throws SQLException { return runSPQuery(false); } /** * Query the DB's Natural_Substrates_Products table, producing an iterator of results rows. * @return An iterator that returns one Substrates_Products object at a time. * @throws SQLException */ public Iterator<BrendaRxnEntry> getNaturalRxns() throws SQLException { return runSPQuery(true); } /** * Look up all BRENDA synonyms for a particular chemical name. * @param name The name for which to search. * @return A list of synonyms. * @throws SQLException */ public List<String> getSynonymsForChemicalName(String name) throws SQLException { try (PreparedStatement stmt = brendaLigandConn.prepareStatement(QUERY_GET_SYNONYMS)) { stmt.setString(1, name); try (ResultSet resultSet = stmt.executeQuery()) { List<String> synonyms = new ArrayList<>(); while (resultSet.next()) { synonyms.add(resultSet.getString(1)); } return synonyms; } } } /** * Iterate over all BRENDA ligands (from the ligands_molfiles table). * @return An iterator over all BRENDA ligands. * @throws SQLException */ public Iterator<BrendaSupportingEntries.Ligand> getLigands() throws SQLException { final PreparedStatement stmt = brendaLigandConn.prepareStatement(BrendaSupportingEntries.Ligand.QUERY); final ResultSet results = stmt.executeQuery(); return new Iterator<BrendaSupportingEntries.Ligand>() { @Override public boolean hasNext() { return hasNextHelper(results, stmt); } @Override public BrendaSupportingEntries.Ligand next() { try { results.next(); return BrendaSupportingEntries.Ligand.fromResultSet(results); } catch (SQLException e) { throw new RuntimeException(e); } } }; } /** * Iterate over all BRENDA organisms. * @return An iterator over all BRENDA organisms. * @throws SQLException */ public Iterator<BrendaSupportingEntries.Organism> getOrganisms() throws SQLException { final PreparedStatement stmt = brendaConn.prepareStatement(BrendaSupportingEntries.Organism.QUERY); final ResultSet results = stmt.executeQuery(); return new Iterator<BrendaSupportingEntries.Organism>() { @Override public boolean hasNext() { return hasNextHelper(results, stmt); } @Override public BrendaSupportingEntries.Organism next() { try { results.next(); return BrendaSupportingEntries.Organism.fromResultSet(results); } catch (SQLException e) { throw new RuntimeException(e); } } }; } /** * Iterate over all ChEBI ontologies. * @return An iterator over all ChEBI ontologies. * @throws SQLException */ public Iterator<BrendaChebiOntology.ChebiOntology> getChebiOntologies() throws SQLException { final PreparedStatement stmt = brendaConn.prepareStatement(BrendaChebiOntology.ChebiOntology.QUERY); final ResultSet results = stmt.executeQuery(); return new Iterator<BrendaChebiOntology.ChebiOntology>() { @Override public boolean hasNext() { return SQLConnection.hasNextHelper(results, stmt); } @Override public BrendaChebiOntology.ChebiOntology next() { try { results.next(); return BrendaChebiOntology.ChebiOntology.fromResultSet(results); } catch (SQLException e) { throw new RuntimeException(e); } } }; } /** * Iterate over all ChEBI relationships of a certain type. * @return An iterator over all ChEBI relationships of a certain type. * @throws SQLException */ public Iterator<BrendaChebiOntology.ChebiRelationship> getChebiRelationships(int relationshipType) throws SQLException { PreparedStatement stmt = brendaConn.prepareStatement(BrendaChebiOntology.ChebiRelationship.QUERY); BrendaChebiOntology.ChebiRelationship.bindType(stmt, relationshipType); stmt.setInt(1, relationshipType); final ResultSet results = stmt.executeQuery(); return new Iterator<BrendaChebiOntology.ChebiRelationship>() { @Override public boolean hasNext() { return SQLConnection.hasNextHelper(results, stmt); } @Override public BrendaChebiOntology.ChebiRelationship next() { try { results.next(); return BrendaChebiOntology.ChebiRelationship.fromResultSet(results); } catch (SQLException e) { throw new RuntimeException(e); } } }; } /** * Fetch all sequences corresponding to the specified reaction. * @param rxnEntry A reaction whose sequences to search for. * @return A list of all matching BRENDA sequence entries. * @throws SQLException */ public List<BrendaSupportingEntries.Sequence> getSequencesForReaction(BrendaRxnEntry rxnEntry) throws SQLException { List<BrendaSupportingEntries.Sequence> results = new ArrayList<>(); boolean requireExactMatch = true; try (PreparedStatement stmt = BrendaSupportingEntries.Sequence.prepareStatementVague(brendaConn, rxnEntry, requireExactMatch); ResultSet resultSet = stmt.executeQuery();) { while (resultSet.next()) { results.add(BrendaSupportingEntries.Sequence.sequenceFromResultSet(resultSet, requireExactMatch)); } } /* Fall back to vague match if there are no sequences that are precisely attributable to this reaction. * * If we find a perfect association, there is little utility in considering BRENDA sequences for the same EC number * and organism (in which we'll have much less confidence), so we just terminate if we get a precise match. */ if (results.size() == 0) { requireExactMatch = false; try (PreparedStatement stmt = BrendaSupportingEntries.Sequence.prepareStatementVague(brendaConn, rxnEntry, requireExactMatch); ResultSet resultSet = stmt.executeQuery();) { while (resultSet.next()) { results.add( BrendaSupportingEntries.Sequence.sequenceFromResultSet(resultSet, requireExactMatch)); } } } return results; } // Helpers for reaction-associated data sets. /** * Get all values of a particular BRENDA DB type. * @param instance An instance to use when reading the table rows. * @param query The query to run against the BRENDA MySQL DB. * @param ecNumber The EC number to use when querying the DB (should come from a reaction). * @param literatureId The literature id to use when querying the DB (should come from a reaction). * @param organism The organism name to use when querying the DB (should come from a reaction). * @param <T> The type of data to retrieve; corresponds to a BRENDA DB table. * @return A list of all instances of the secified type that share the EC number, literature id, and organism name. * @throws SQLException */ private <T extends FromBrendaDB<T> & Serializable> List<T> getRSValues(T instance, String query, String ecNumber, String literatureId, String organism) throws SQLException { try (PreparedStatement st = brendaConn.prepareStatement(query)) { st.setString(1, ecNumber); st.setString(2, "%" + literatureId + "%"); st.setString(3, organism); try (ResultSet resultSet = st.executeQuery()) { List<T> results = new ArrayList<>(); while (resultSet.next()) { if (BrendaSupportingEntries.findIdInList(resultSet.getString(instance.getLiteratureField()), literatureId)) { results.add(instance.fromResultSet(resultSet)); } // TODO: log when we can't find the exact literature ID in the query results. } return results; } } } // TODO: these could probably be consolidated via a single polymorphic method. public List<BrendaSupportingEntries.KMValue> getKMValue(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.KMValue.INSTANCE, BrendaSupportingEntries.KMValue.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.Cloned> getClonedValue(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.Cloned.INSTANCE, BrendaSupportingEntries.Cloned.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.PosttranslationalModification> getPosttranslationalModification( BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.PosttranslationalModification.INSTANCE, BrendaSupportingEntries.PosttranslationalModification.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.SpecificActivity> getSpecificActivity(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.SpecificActivity.INSTANCE, BrendaSupportingEntries.SpecificActivity.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.OrganismCommentary> getOrganismCommentary(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.OrganismCommentary.INSTANCE, BrendaSupportingEntries.OrganismCommentary.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.GeneralInformation> getGeneralInformation(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.GeneralInformation.INSTANCE, BrendaSupportingEntries.GeneralInformation.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.Cofactor> getCofactors(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.Cofactor.INSTANCE, BrendaSupportingEntries.Cofactor.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.Inhibitors> getInhibitors(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.Inhibitors.INSTANCE, BrendaSupportingEntries.Inhibitors.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.ActivatingCompound> getActivatingCompounds(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.ActivatingCompound.INSTANCE, BrendaSupportingEntries.ActivatingCompound.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.KCatKMValue> getKCatKMValues(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.KCatKMValue.INSTANCE, BrendaSupportingEntries.KCatKMValue.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.Expression> getExpression(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.Expression.INSTANCE, BrendaSupportingEntries.Expression.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.Subunits> getSubunits(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.Subunits.INSTANCE, BrendaSupportingEntries.Subunits.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public List<BrendaSupportingEntries.Localization> getLocalization(BrendaRxnEntry reaction) throws SQLException { return getRSValues(BrendaSupportingEntries.Localization.INSTANCE, BrendaSupportingEntries.Localization.QUERY, reaction.getEC(), reaction.getLiteratureRef(), reaction.getOrganism()); } public BrendaSupportingEntries.RecommendNameTable fetchRecommendNameTable() throws SQLException { return BrendaSupportingEntries.RecommendNameTable.fetchRecommendedNameTable(brendaConn); } /** * Create an on-disk index of BRENDA data that supports reactions at the specified path. * @param path A path where the index will be stored. * @throws IOException * @throws ClassNotFoundException * @throws RocksDBException * @throws SQLException */ public void createSupportingIndex(File path) throws IOException, ClassNotFoundException, RocksDBException, SQLException { new BrendaSupportingEntries().constructOnDiskBRENDAIndex(path, this.brendaConn); } public void deleteSupportingIndex(File path) throws IOException { // With help from http://stackoverflow.com/questions/779519/delete-files-recursively-in-java. FileUtils.deleteDirectory(path); } public Pair<RocksDB, Map<String, ColumnFamilyHandle>> openSupportingIndex(File supportingIndex) throws RocksDBException { List<FromBrendaDB> instances = BrendaSupportingEntries.allFromBrendaDBInstances(); List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>(instances.size() + 1); columnFamilyDescriptors.add(new ColumnFamilyDescriptor("default".getBytes())); for (FromBrendaDB instance : instances) { columnFamilyDescriptors.add(new ColumnFamilyDescriptor(instance.getColumnFamilyName().getBytes())); } List<ColumnFamilyHandle> columnFamilyHandles = new ArrayList<>(columnFamilyDescriptors.size()); DBOptions dbOptions = new DBOptions(); dbOptions.setCreateIfMissing(false); RocksDB rocksDB = RocksDB.open(dbOptions, supportingIndex.getAbsolutePath(), columnFamilyDescriptors, columnFamilyHandles); Map<String, ColumnFamilyHandle> columnFamilyHandleMap = new HashMap<>(columnFamilyHandles.size()); // TODO: can we zip these together more easily w/ Java 8? for (int i = 0; i < columnFamilyDescriptors.size(); i++) { ColumnFamilyDescriptor cfd = columnFamilyDescriptors.get(i); ColumnFamilyHandle cfh = columnFamilyHandles.get(i); columnFamilyHandleMap.put(new String(cfd.columnFamilyName(), BrendaSupportingEntries.UTF8), cfh); } return Pair.of(rocksDB, columnFamilyHandleMap); } }