Java tutorial
/** * Copyright 2010 CMBI (contact: <Gerrit.Vriend@radboudumc.nl>) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package nl.ru.cmbi.pisa.wrapper; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import lombok.Cleanup; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.Setter; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Wrapper around EBI's PISA webservice that caches retrieved results locally. * <p> * This class supplies a java interface to the PISA RESTful interface described at the <a * href="https://www.ebi.ac.uk/msd-srv/prot_int/pi_download.html">PISA download page</a>, adding some level of * abstraction and error handling. * <p> * To avoid overloading the EBI-service, the returned result pages are stored locally for future reference. This cache * currently does not expire automatically (yet), it should be maintained manually; the commands listed here may be * useful. * <p> * <h1>useful (linux) commands to maintain the cache:</h1> * - Clean up all error-message cache-files, and empty directories resulting from that clean-up.<br> * # find ./ -size -90c | xargs rm ; find -empty | xargs rmdir; find -empty | xargs rmdir<br> * <p> * - find all assembly files that have an error message other than "unavailable (out of range)"<br> * # find ./ -name *.pdb -size -200c | xargs cat | grep -v ^$ | grep -v "unavailable (out of range)"<br> * * @author jkerssem */ //@Service @EqualsAndHashCode(of = { "cacheDir", "pisaBaseUrl" }) public class PisaCachedWebDao { /** the obligatory logger */ private static final Logger log = LoggerFactory.getLogger(PisaCachedWebDao.class); /** * Convenience constant: the URL the PISA REST-service run at at the EBI:<br/> * currently: {@value} */ public static final String defaultPisaBaseUrl = "http://www.ebi.ac.uk/pdbe/pisa/cgi-bin/"; /** * Convenience constant: a default local directory where the PISA cache is maintained<br> * currently: {@value} */ public static final String defaultPisaCacheDir = "/data/PisaCache/"; // Cache-specific variables; /** * disk-location of the cache, must be a directory. * <p> * It should not be changed after initialisation, or you 'lose' the existing cache. * <p> * It is used together with the {@link #multimerFileSuffix}, {@link #assemblyFileSuffix} and * {@link #interfaceFileSuffix} to generate the actual file paths for the cache. */ @Getter private File cacheDir; private static final String multimerFileSuffix = "_multimer.xml"; private static final String interfaceFileSuffix = "_interface.xml"; private static final String assemblyFileSuffix = "_assembly.pdb"; // PISA-web URLs /** * The base URL where the pisa scripts are hosted. * <p> * From this are derived the {@link #multimerBaseUrl}, {@link #interfaceBaseUrl} and {@link #assemblyBaseUrl}. Set * it to <code>null</code> to disable webfetching altogether. */ @Getter private String pisaBaseUrl; @Getter private String multimerBaseUrl; @Getter private String interfaceBaseUrl; @Getter private String assemblyBaseUrl; /** * Amount of pdbIDs to get in one webrequest, currently {@value} . * <p> * PISA suggests between 20 to 50 to avoid overloading the server. */ @Getter @Setter private int batchSize = 50; /** * Amount of milliseconds to wait between sending out web-requests. * <p> * This is the throttle delay to ensure PISA servers are not overloaded. Setting this too low/fast will cause PISA * to silently drop further requests made to it. A good indication is 1000 milliseconds. */ @Getter @Setter private int webFetchThrottleMillis = 1000; /** * Internal tracker for time of last web-request. * * @see #webFetchThrottleMillis * @see #throttledOpenStream(URL) */ private long lastWebRequestTime = 0; /** parser for PISA XML files, stored here and re-used to avoid instantiation overhead */ private SAXReader saxReader; /* * Constructors & other housekeeping =============================================================================== */ /** Constructor that loads from the default "pisa-wrapper.properties" file */ public PisaCachedWebDao() { try { // Load properties specified in pisa-wrapper.properties final Properties props = new Properties(); final InputStream inStream = this.getClass().getResourceAsStream("/pisa-wrapper.properties"); props.load(inStream); initURLs(props.getProperty("pisaurl").trim()); initCache(new File(props.getProperty("cachedir").trim())); String webdelayString = null; try { webdelayString = props.getProperty("webfetchdelay", "1000").trim(); setWebFetchThrottleMillis(Integer.valueOf(webdelayString)); } catch (final NumberFormatException nfe) { setWebFetchThrottleMillis(1000); log.warn("Couldn't parse webfetchdelay parameter from config file: \"{}\"", webdelayString); } String webbatchString = null; try { webbatchString = props.getProperty("webfetchbatchsize", "50").trim(); setBatchSize(Integer.valueOf(webbatchString)); } catch (final NumberFormatException nfe) { log.warn("Couldn't parse webfetchbatchsize parameter from config file: \"{}\"", webbatchString); setBatchSize(50); } } catch (final IOException e) { throw new IllegalStateException("Couldn't instantiate pisa-wrapper", e); } log.info("Created caching PISA DAO from properties; cache @ \"{}\", PISA @ \"{}\"", cacheDir.getAbsolutePath(), pisaBaseUrl); } /** * pass-through constructor for cache-location-as-string, see {@link #PisaCachedWebDao(String, File)} */ public PisaCachedWebDao(final String pisaBaseUrl, final String cacheDirPath) throws FileNotFoundException { this(pisaBaseUrl, new File(cacheDirPath)); } /** * Constructor, builds the cache-dir and PISA URL's. * * @param pisaBaseUrl * the base URL where PISA is listening. This DAO will append the necessary parameters to * get multimers, interfaces and assemblies. * @param cacheDir * the location of a readable/writable directory where this DAO will write it's cache files. * Set this to <code>null</code> to disable caching entirely. * @throws FileNotFoundException * in case the provided cache-directory is unwritable or not a directory * @see #defaultPisaBaseUrl */ public PisaCachedWebDao(final String pisaBaseUrl, final File cacheDir) throws FileNotFoundException { initURLs(pisaBaseUrl); initCache(cacheDir); log.info("Created caching PISA DAO; cache @ \"{}\", PISA @ \"{}\"", cacheDir.getAbsolutePath(), pisaBaseUrl); } /** * Initialises the cache directory, if desired. * * @param newCacheDir * File where the cachedir should be, can be a: * <ul> * <li>existing directory, empty * <li>existing directory, containing an existing cache * <li>non-existing directory, it will be created (including parent directories, if needed) * </ul> * Of course, the location needs to be writeable. * @throws FileNotFoundException * if the cache directory cannot be created or written too */ private void initCache(final File newCacheDir) throws FileNotFoundException { if (newCacheDir == null) { cacheDir = null; } else { if (!newCacheDir.exists()) { // 1) dir not existing, create it log.info("Creating PISA cache directory at \"{}\"", newCacheDir.getAbsolutePath()); if (!newCacheDir.mkdirs()) { // 1a) Can't create directory log.error("Could not create PISA cache directory at {}", newCacheDir.getAbsolutePath()); throw new FileNotFoundException("Could not create PISA cache directory"); } } else if (!newCacheDir.isDirectory()) { // 2) not a dir, error out log.error("Suggested PISA cache location {} exists, but is not a directory", newCacheDir.getAbsolutePath()); throw new FileNotFoundException("Suggested cache location exists, but is no directory"); } else if (!newCacheDir.canWrite()) { // 3) dir, but not writable, error out log.error("Suggested cache directory {} exists, but is not writable", newCacheDir.getAbsolutePath()); throw new FileNotFoundException("Suggested cachedirectory exists, but is not writable"); } // All is well, set cache directory cacheDir = newCacheDir; } } /** Initialise the URLs, pass in <code>null</code> to disable webfetching */ private void initURLs(final String newPisaBaseUrl) { if (null == newPisaBaseUrl) { pisaBaseUrl = null; multimerBaseUrl = null; interfaceBaseUrl = null; assemblyBaseUrl = null; } else { pisaBaseUrl = newPisaBaseUrl; multimerBaseUrl = newPisaBaseUrl + "multimers.pisa?"; interfaceBaseUrl = newPisaBaseUrl + "interfaces.pisa?"; assemblyBaseUrl = newPisaBaseUrl + "multimer.pdb?"; } } @Override public String toString() { return "PisaCachedWebDao{pisa@\"" + pisaBaseUrl + "\", cache@\"" + cacheDir.getAbsolutePath() + "\"}"; } /* * END Constructors ================================================================================================ */ /* * Data Methods ==================================================================================================== */ /** * Gets a map, keyed by PDB-id, containing the associated PISA multimer xml file * <p> * This method first checks the cache to see if the desired files are already present, and adds these to the result. * If some or all items are not present, the next step depends on if webfetching is enabled.<br> * If enabled, a batched web-request is sent to PISA for all ID's not found in cache. Depending on how many items * need to be fetched and how the throttling parameters {@link #webFetchThrottleMillis} and {@link #batchSize} are * set, this may take some time.<br> * If webfetching is disabled, indicated by {@link #pisaBaseUrl} being <code>null</code>, a best-effort, incomplete * result is returned from the entries in the cache. This may mean the returned {@link Map} is completely empty! * <p> * Entries in the map are in the same order as they were given in <code>pdbIDs</code>. * * @param pdbIDs * the PDB id's for which to get the multimer info. * @return A map, sorted in the same order as <code>pdbIDs</code>, containing the XML files keyed to their pdbIDs. * @throws IOException * in case web-fetching fails. */ public LinkedHashMap<String, String> getRawMultimerInfoMap(final String... pdbIDs) throws IOException { final LinkedHashMap<String, String> outputBuffer = new LinkedHashMap<String, String>(); final List<String> webQueue = new ArrayList<String>(); // Loop over the requested ID's a first time to see what can be gotten from cache. for (final String pdbId : pdbIDs) { final String cacheContent = getFromCache(fetchType.Multimer, pdbId); if (cacheContent != null) { outputBuffer.put(pdbId, cacheContent); } else { // Not in cache.. outputBuffer.put(pdbId, null); // reserve a spot to preserve output order equal to input order webQueue.add(pdbId); // add to get-later queue } } // Now that we know all entries to get from web, get them, overriding the null's we just put in. if (pisaBaseUrl != null) { // (except if webfetching is disabled) outputBuffer.putAll(getMultipleFromWeb(fetchType.Multimer, webQueue)); } return outputBuffer; } /** * Obtains the raw Multimer XML file for a single PDB ID. * This is a convenience wrapper around {@link #getRawMultimerInfoMap(String...)} * * @param pdbID * A pdb code * @return a single XML-formatted string containing the PISA result. * @throws IOException */ public String getRawMultimerInfoSingle(final String pdbID) throws IOException { final LinkedHashMap<String, String> rawMultimerInfo = getRawMultimerInfoMap(pdbID); final String multimerXml = rawMultimerInfo.get(pdbID); if (multimerXml == null) { log.warn("No multimer results for {}", pdbID); } return multimerXml; } /** * Gets a map, keyed by PDB-id, containing the associated PISA interface xml file * <p> * This method first checks the cache to see if the desired files are already present, and adds these to the result. * If some or all items are not present, the next step depends on if webfetching is enabled.<br> * If enabled, a batched web-request is sent to PISA for all ID's not found in cache. Depending on how many items * need to be fetched and how the throttling parameters {@link #webFetchThrottleMillis} and {@link #batchSize} are * set, this may take some time.<br> * If webfetching is disabled, indicated by {@link #pisaBaseUrl} being <code>null</code>, a best-effort, incomplete * result is returned from the entries in the cache. This may mean the returned {@link Map} is completely empty! * <p> * Entries in the map are in the same order as they were given in <code>pdbIDs</code>. * * @param pdbIDs * the PDB id's for which to get the interface info. * @return A map, sorted in the same order as <code>pdbIDs</code>, containing the XML files. * @throws IOException */ public LinkedHashMap<String, String> getRawInterfaceInfoMap(final String... pdbIDs) throws IOException { final LinkedHashMap<String, String> outputBuffer = new LinkedHashMap<String, String>(); final List<String> webQueue = new ArrayList<String>(); // Loop over the requested ID's a first time to see what can be gotten from cache. for (final String pdbId : pdbIDs) { final String cacheContent = getFromCache(fetchType.Interface, pdbId); if (cacheContent != null) { outputBuffer.put(pdbId, cacheContent); } else { // Not in cache.. outputBuffer.put(pdbId, null); // reserve a spot to preserve output order equal to input order webQueue.add(pdbId); // add to get-later queue } } // Now that we know all entries to get from web, get them, overriding the null's we just put in. if (null != pisaBaseUrl) { // (except if webfetching is disabled) outputBuffer.putAll(getMultipleFromWeb(fetchType.Interface, webQueue)); } return outputBuffer; } /** * Obtains the raw Interface XML file for a single PDB ID. * This is a convenience wrapper around {@link #getRawInterfaceInfoMap(String...)} * * @param pdbID * A pdb code * @return a single XML-formatted string containing the PISA result. * @throws IOException */ public String getRawInterfaceInfoSingle(final String pdbID) throws IOException { final LinkedHashMap<String, String> rawInterfaceInfo = getRawInterfaceInfoMap(pdbID); final String interfaceXml = rawInterfaceInfo.get(pdbID); if (interfaceXml == null) { log.warn("No interface results for {}", pdbID); } return interfaceXml; } /** * Fetches the pdb-format coordinates file for the specified assembly. * <p> * First, the entry is looked up in cache, if present, the cached version is served, if not present and webfetching * is enabled, the file is fetched from web and returned (and put to cache too). If webfetching is disabled and the * entry was not in cache, <code>null</code> is returned. * * @param rawPdbId * The pdb id (case insensitive) * @param setNr * the pisa major number indicating the assembly set * @param assemblyNr * the pisa minor number indicating the assembly subgroup * @return pdb-style ATOM-coordinates, or <code>null</code> if the assembly does not exist/could not be fetched * from web * @throws IOException * when something goes awry reading from cache or from web. */ public String getAssembly(final String rawPdbId, final int setNr, final int assemblyNr) throws IOException { final String pdbId = rawPdbId.trim().toLowerCase(); String pisaResult = getFromCacheAssembly(pdbId, setNr, assemblyNr); if (null == pisaResult) { // not in cache.. if (null == pisaBaseUrl) { // web-fetching disabled return null; } else { // get from web. try { pisaResult = getAssemblyFromWeb(pdbId, setNr, assemblyNr); } catch (final IOException ioex) { log.warn("Problem getting PISA assembly for {}", pdbId); throw new IOException("Problem getting PISA assembly", ioex); } } } // Check if output indicates errors (meaning either "no such assembly", or actual error) if (pisaResult.startsWith(" *** ")) { if (pisaResult.trim().endsWith("unavailable (out of range)") || pisaResult.trim().endsWith("not found in PISA database")) { // not an error, assembly doesn't exist. return null; } // actual error, probably URL formatting.. log.warn("No PISA assembly found, PISA says: \"{}\"", pisaResult.substring(5).trim()); return null; } return pisaResult; } /** * Gets the 3D rotation-translation biomolecule matrices from PISA. * <p> * These matrices detail how chains in the specified PDB entry should be manipulated to obtain desired the PISA * assembly. * * @param rawPdbId * the 4-letter PDB code we want the assembly of (e.g. "1crn") * @param setNr * the PISA assembly set. "1" is most stable in solution, higher numbers increasingly unstable. * @param assemblyNr * the substructure within an assembly. Note that structurally identical substructures get the * same assemblyNr. This DAO ignores all but the first substructure. Ignored chains (those chains not * needed to make this assembly) are listed in the {@link MatricesResult#ignoredChains} property of the * returned result. * @return A compound result of the transformation matrices to apply and the ignored chains (I.E. those * chains whose transformation lead to an identical substructure to the one returned). This result may be * empty. * @throws IOException * when web-fetching fails. */ @SuppressWarnings("unchecked") public MatricesResult getMatrices(final String rawPdbId, final int setNr, final int assemblyNr) throws IOException { final String pdbId = rawPdbId.trim().toLowerCase(); Document mmrInfo; try { mmrInfo = parseDocumentOf(getRawMultimerInfoSingle(pdbId)); } catch (final DocumentException docex) { log.error("Problem parsing Pisa raw multimer info for {}, see debug level for details", pdbId); log.debug("Exception: ", docex); throw new IOException("Problem parsing multimer XML document", docex); } final List<ChainTransform> output = new ArrayList<ChainTransform>(); final Set<String> ignoredChainIds = new HashSet<String>(); try { // Unfortunately, multiple assemblies share the same <ID> subnode if they are structurally identical // So, we need indexing ("[1]") to get only the first of an identical set of assemblies // ('identical' meaning "structurally identical except for ChainIDs") // Sigh... reusing IDs for different things! final String matricesXpath = String.format( "(/pisa_multimers/pdb_entry[ pdb_code='%s' ]/asm_set[ ser_no=%d ]/assembly[ id=%d ])[1]/molecule", pdbId, setNr, assemblyNr); final List<Node> matrices = mmrInfo.selectNodes(matricesXpath); for (final Node node : matrices) { final ChainTransform ct = new ChainTransform(node.valueOf("chain_id"), // X-row (Double) node.numberValueOf("rxx"), (Double) node.numberValueOf("rxy"), (Double) node.numberValueOf("rxz"), (Double) node.numberValueOf("tx"), // Y-row (Double) node.numberValueOf("ryx"), (Double) node.numberValueOf("ryy"), (Double) node.numberValueOf("ryz"), (Double) node.numberValueOf("ty"), // Z-row (Double) node.numberValueOf("rzx"), (Double) node.numberValueOf("rzy"), (Double) node.numberValueOf("rzz"), (Double) node.numberValueOf("tz")); if (ct.isUnity() && ct.isStationary()) { ct.setDuplication(false); } else { ct.setDuplication(true); } output.add(ct); } final String ignoredMatricesXpath = String.format( "(/pisa_multimers/pdb_entry[ pdb_code='%s' ]/asm_set[ ser_no=%d ]/assembly[ id=%d ])[position() > 1]/molecule/chain_id", pdbId, setNr, assemblyNr); final List<Node> ignoredChains = mmrInfo.selectNodes(ignoredMatricesXpath); for (final Node ignoredChain : ignoredChains) { ignoredChainIds.add(ignoredChain.getText()); } } catch (final ClassCastException ccex) { log.error( "malformed XML response from PISA when getting matrices for assembly {}:{}.{}: expected list of matrix nodes, but obtained something else", new Object[] { pdbId, setNr, assemblyNr }); throw new IOException("malformed XML response from PISA when getting matrices for assembly of " + pdbId + ": expected list of matrix nodes, but obtained something else", ccex); } return new MatricesResult(output, ignoredChainIds); } /** * @param rawXml * A string containing an XML file * @return the XML file parsed as {@link Document}, parsed by {@link #saxReader}, or <code>null</code> if rawXml is * <code>null</code>. * @throws DocumentException * whenever parsing goes wrong. */ private Document parseDocumentOf(final String rawXml) throws DocumentException { if (rawXml == null) { log.debug("Attempt to read 'null' xml"); return null; } if (saxReader == null) { saxReader = new SAXReader(); } final Document parsedDoc = saxReader.read(new StringReader(rawXml)); return parsedDoc; } private enum fetchType { Multimer, Interface } private String getBaseUrlFor(final fetchType type) { switch (type) { case Multimer: return multimerBaseUrl; case Interface: return interfaceBaseUrl; default: return null; // only used if type==null is entered, in which case you deserve pain! } } /** * Retrieves multiple Pisa entries, in batches with delays to not-stress the PISA servers. * * @param type * To indicate if you want multimer info or interface info. * @param pdbIds * the list of PDB id's for which you want info, the order is maintained in the output. * @return a map, keyed by pdb-id, containing the PISA XML file for that ID. * (iteration order is the same as that of <code>pdbIds</code> through use of {@link LinkedHashMap}) * or an empty {@link Map} if web-fetch is disabled ({@link #pisaBaseUrl} set to <code>null</code>) * @throws IOException * whenever webstream reading goes wrong. * @see #webFetchThrottleMillis * @see #batchSize */ private LinkedHashMap<String, String> getMultipleFromWeb(final fetchType type, final List<String> pdbIds) throws IOException { if (null == pisaBaseUrl) { return new LinkedHashMap<String, String>(0); } // linked map to preserve iteration-order of provided pdbIds final LinkedHashMap<String, String> output = new LinkedHashMap<String, String>(); final String baseUrl = getBaseUrlFor(type); // loop through PDB-ids, processing them in <batchsize>-sized blocks final int size = pdbIds.size(); int i = 0; while (i < size) { // make a url for a subset of the pdb-IDs final List<String> subBlock = pdbIds.subList(i, Math.min(size, i + batchSize)); final URL fetchUrl = new URL(baseUrl + Util.join(subBlock, ",")); // read the subset XML results log.debug("Getting {}-block from web: {}", type, subBlock); @Cleanup final BufferedReader pisaResult = new BufferedReader(throttledOpenStream(fetchUrl)); // Get xml header and footer. String header = pisaResult.readLine() + "\n"; final String footer = header.replaceAll("<", "</"); header += pisaResult.readLine() + "\n"; // <status>-line // get XML-result contents, split them per "<pdb_entry>"-block String line; String pdbAc = null; StringBuilder buf = new StringBuilder(); while ((line = pisaResult.readLine()) != null) { // encountered start of entry, start a new buffer with header if (" <pdb_entry>".equals(line)) { buf = new StringBuilder(); pdbAc = null; buf.append(header); } // trap accession codes we encounter if (line.startsWith(" <pdb_code>")) { pdbAc = line.substring(14, 18); } // store line buf.append(line); buf.append('\n'); // at end of an entry, process it if (" </pdb_entry>".equals(line)) { buf.append(footer); final String fullXml = buf.toString(); putToCache(fullXml, getCacheFileFor(type, pdbAc)); output.put(pdbAc, fullXml); } } i += batchSize; } return output; } /** * Retrieves the specified assembly from the PISA website. * * @param pdbId * PDB accession to get assembly for * @param setNr * Assembly set identifier * @param assemblyNr * Assembly sub-structure identifier * @return the raw pisa assembly file, * or <code>null</code> if webfetching is disabled ({@link #pisaBaseUrl} set to <code>null</code>) * @throws IOException * whenever something goes wrong in the making of the * web-connection, or reading from it. */ private String getAssemblyFromWeb(final String pdbId, final int setNr, final int assemblyNr) throws IOException { if (null == pisaBaseUrl) { return null; } String pisaResult; log.debug("Fetching assembly {}:{},{} from web.", new Object[] { pdbId, setNr, assemblyNr }); URL asmUrl; try { asmUrl = new URL( assemblyBaseUrl + pdbId + ":" + String.valueOf(setNr) + "," + String.valueOf(assemblyNr)); log.debug("Reading assembly from {}", asmUrl); pisaResult = Util.readAllFrom(throttledOpenStream(asmUrl)); putToCache(pisaResult, getCacheFileForAssembly(pdbId, setNr, assemblyNr)); return pisaResult; } catch (final MalformedURLException muex) { throw new IOException("Couldn't read PISA Assembly from web: URL invalid", muex); } catch (final IOException ioex) { throw new IOException("Couldn't read PISA Assembly from web: Problem reading stream", ioex); } } /** * @param pdbId * for which accession should info be gotten from cache? * @param type * what type of info should be gotten from cache: interface or multimer information? * @return The cache contents, or null if there was no cache. * (Either because caching was disabled, or because this specific entry wasn't present (yet)) * @throws IOException */ private String getFromCache(final fetchType type, final String pdbId) { if (cacheDir == null) { return null; } String cacheContent = null; final File cacheFile = getCacheFileFor(type, pdbId); if (cacheFile.exists()) { log.trace("{} info for {} found in cache", type, pdbId); try { cacheContent = Util.readAllFrom(new FileReader(cacheFile)); } catch (final IOException ioex) { // problem reading from cache log.warn("Couldn't read {}-cachefile for {}, see debug level for details", type, pdbId); log.debug("Exception details:", ioex); return null; } } return cacheContent; } /** * @param pdbId * @return The cache contents, or null if there was no cache. * (Either because caching was disabled, or because this specific entry wasn't present) * @throws IOException */ private String getFromCacheAssembly(final String pdbId, final int setNr, final int assemblyNr) { if (cacheDir == null) { return null; } String cacheContent = null; final File cacheFile = getCacheFileForAssembly(pdbId, setNr, assemblyNr); if (cacheFile.exists()) { log.debug("Assembly for {} found in cache", pdbId); try { cacheContent = Util.readAllFrom(new FileReader(cacheFile)); } catch (final IOException ioex) { // problem reading from cache log.warn("Couldn't read assembly cachefile for {}, see debug level for details", pdbId); log.debug("Exception details:", ioex); return null; } } return cacheContent; } /** * Puts the raw PISA info to the specified cache file. * <p> * NB: this swallows any {@link IOException} thrown during the writing of said cachefile.<br> * Reasoning:<br> * A cache is non-essential, so exceptions aren't that bad. Log statements ARE emitted at Warn-level when this * happens. The full exception stack-trace is available at Debug log-level * * @param pisaContent * The pisa information to store in cache * @param cacheFile * The cache location, should be obtained from {@link #getInterfaceFile(String)}, * {@link #getMultimerFile(String)} or {@link #getCacheFileForAssembly(String, int, int)} */ private void putToCache(final String pisaContent, final File cacheFile) { // Store web-result in cache try { cacheFile.getParentFile().mkdirs(); log.trace("Writing cache file {}", cacheFile.getAbsolutePath()); @Cleanup final FileWriter writer = new FileWriter(cacheFile); writer.write(pisaContent); writer.close(); } catch (final IOException ioex) { // not writing to cache is inconvenient, but not lethal. // Log it, but don't let it fail the entire get-operation log.warn("Couldn't write cache file {}", cacheFile.getAbsolutePath()); log.debug("Exception: ", ioex); } } /** * Gets the base directory for this PDB-id the entry, directory may not exist yet */ private File getCacheBaseDirFor(final String pdbId) { final File file = new File(cacheDir, pdbId.substring(1, 3) + File.separator + pdbId + File.separator); return file; } /** * Gets the file-path for an entry's cache file. File may not exist yet. * * <p> * This gets the the file reference to what may be the cache-file for the specified interface or multimer * (determined by type parameter). If the file.exists(), then the entry is in cache, otherwise it doesn't exist in * cache, but should be stored to this location after fetching. */ private File getCacheFileFor(final fetchType type, final String rawPdbId) { final String pdbId = rawPdbId.trim().toLowerCase(); switch (type) { case Multimer: return new File(getCacheBaseDirFor(pdbId), pdbId + multimerFileSuffix); case Interface: return new File(getCacheBaseDirFor(pdbId), pdbId + interfaceFileSuffix); default: return null; // only used if type==null is entered, in which case you deserve pain! } } /** * Gets the file-path for an entry's assembly file. File may not exist yet. * * <p> * This gets the the file reference to what may be the cache-file for the specified assembly. If the file.exists(), * then the entry is in cache, otherwise it doesn't exist in cache, but should be stored to this location after * fetching. * * @param rawPdbId * as for {@link #getAssembly(String, int, int)} * @param setNr * as for {@link #getAssembly(String, int, int)} * @param assemblyNr * as for {@link #getAssembly(String, int, int)} * @return the file reference, whether it exists or not. * @see #getAssembly(String, int, int) * @see #getCacheFileFor(fetchType, String) */ private File getCacheFileForAssembly(final String rawPdbId, final int setNr, final int assemblyNr) { final String pdbId = rawPdbId.trim().toLowerCase(); return new File(getCacheBaseDirFor(pdbId), pdbId + "_" + Integer.toString(setNr) + "_" + Integer.toString(assemblyNr) + assemblyFileSuffix); } /** * Opens a stream to the URL, delaying so that only one request * is made per {@link #webFetchThrottleMillis} milliseconds. * * @see #throttleDelay() * @see #webFetchThrottleMillis * @see #lastWebRequestTime */ private InputStreamReader throttledOpenStream(final URL fetchUrl) throws IOException { throttleDelay(); lastWebRequestTime = System.currentTimeMillis(); log.trace("Opening URL stream: {}", fetchUrl); return new InputStreamReader(fetchUrl.openStream()); } /** * Delays execution if last web request was less than {@link #webFetchThrottleMillis} milliseconds ago. */ private void throttleDelay() { final long now = System.currentTimeMillis(); final long nextOpening = lastWebRequestTime + webFetchThrottleMillis; if (now < nextOpening) { // log.trace("Throttling: delaying thread {} ms", nextOpening - now); Util.delay(nextOpening - now); } } }