Java tutorial
/** * Copyright (c) 2011 William Greenwood and Jose Cruz-Toledo * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.semanticscience.narf.structures.factories.tertiary; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.zip.GZIPInputStream; import org.apache.commons.io.FileUtils; import org.semanticscience.narf.structures.factories.ExtractedStructureFactory; import org.semanticscience.narf.structures.interactions.NucleotideInteraction; import org.semanticscience.narf.structures.lib.PdbHelper; import org.semanticscience.narf.structures.lib.exceptions.InvalidResidueException; import org.semanticscience.narf.structures.parts.Sequence; import org.semanticscience.narf.structures.tertiary.ExtractedTertiaryStructure; /** * An abstract class for nucleic acid tertiary structure annotators to minimize * the effort required for their implementation by sharing common methods. * * @author Jose Cruz-Toledo * @author William Greenwood * @since 1.6 */ public abstract class ExtractedTertiaryStructureFactory extends ExtractedStructureFactory { /** * A temporary directory where the output of running a tertiary structure * extractor will be stored */ private File tempDir = null; /** * Construct a annotated tertiary structure factory for annotated tertiary * nucleic acid structures. * * @param aPredictorName * the name of the program that annotated the nucleic acid * tertiary structure * @param aPredictorVersion * the version of the program that annotated the nucleic acid * tertiary structure */ protected ExtractedTertiaryStructureFactory(String aPredictorName, String aPredictorVersion) { super(aPredictorName, aPredictorVersion); } /** * Get all nucleic acid extracted structures produced using information * generated by the tertiary structure annotator. * * @param aFile * a PDB structure file * @return a set of annotated nucleic acid tertiary structures * @throws FileNotFoundException * if the PDB structure file does not exist * @throws IOException * if any IO error occur reading the output of the tertiary * structure annotator or writing the output of the tertiary * structure annotator * @throws InvalidResidueException * if any of the residues created are invalid */ public Set<ExtractedTertiaryStructure> getStructures(File aFile) throws FileNotFoundException, IOException, InvalidResidueException { return this.getStructures(aFile, new String[0]); } /** * Get all nucleic acid extracted structures found in the input directory. * Store the annotator's output files in the output directory * * @param anInputDir * the input directory containing pdb files * @param anOutputDir * the directory where the output of the annotator will be stored * @return a map of a set of extracted tertiary structures, where the key is * a PDBId and the value is the set of extracted tertiary structures * @throws IOException * if either the input or output directory are not valid */ protected abstract Map<String, Set<ExtractedTertiaryStructure>> getStructures(File anInputDir, File anOutputDir) throws IOException; /** * Get all nucleic acid tertiary structures produced using information * generated by the tertiary structure predictor. * * @param aPdbFile * a PDB structure file * @param commands * set of commands to modify the execution of the annoator * @return a set of annotated nucleic acid tertiary structures * @throws FileNotFoundException * if the PDB structure file does not exist * @throws IOException * if any IO error occur reading the output of the tertiary * structure annotator or writing the output of the tertiary * structure annotator * @throws InvalidResidueException * if any of the residues are invalid */ public Set<ExtractedTertiaryStructure> getStructures(File aPdbFile, String[] commands) throws FileNotFoundException, IOException, InvalidResidueException { if (!aPdbFile.exists() || aPdbFile.isDirectory()) { throw new FileNotFoundException("There is no PDB file with the name specified."); } String extension = PdbHelper.getFileExtension(aPdbFile); File directory = new File(FileUtils.getTempDirectoryPath() + "/pdb/"); File pdbFile = new File(directory.getAbsolutePath() + "/" + aPdbFile.getName()); FileUtils.copyFile(aPdbFile, pdbFile); // check if the file is compressed if (extension.equals("gz")) { GZIPInputStream gzipInputStream = new GZIPInputStream(new FileInputStream(pdbFile)); File gunzippedFile = new File(directory.getAbsolutePath() + pdbFile.getName().replace(".gz", "")); OutputStream out = new FileOutputStream(gunzippedFile); byte[] buf = new byte[1024]; int len; while ((len = gzipInputStream.read(buf)) > 0) out.write(buf, 0, len); gzipInputStream.close(); out.close(); pdbFile.delete(); pdbFile = gunzippedFile; } String pdbId = PdbHelper.findPdbId(pdbFile); int numberOfModels = PdbHelper.findNumberOfModels(pdbFile); Set<ExtractedTertiaryStructure> tertiaryAnnotatedStructures = new HashSet<ExtractedTertiaryStructure>(); if (numberOfModels == 1) { File annotatedStructure = this.execute(pdbFile, commands); Map<String, Sequence> sequenceMap = this.parseSequences(pdbFile, annotatedStructure); Set<NucleotideInteraction> interactions = this.parseInteractions(sequenceMap, annotatedStructure); tertiaryAnnotatedStructures .add(new ExtractedTertiaryStructure(this, pdbFile, pdbId, 1, sequenceMap, interactions)); } else { for (int modelNumber = 1; modelNumber <= numberOfModels; modelNumber++) { File modelFile = PdbHelper.extractModelFromPDB(pdbFile, new File(FileUtils.getTempDirectoryPath() + "/pdb/"), pdbId, modelNumber); File outputFile = this.execute(modelFile, commands); Map<String, Sequence> sequenceMap = this.parseSequences(modelFile, outputFile); Set<NucleotideInteraction> interactions = this.parseInteractions(sequenceMap, outputFile); tertiaryAnnotatedStructures.add(new ExtractedTertiaryStructure(this, pdbFile, pdbId, modelNumber, sequenceMap, interactions)); } } return tertiaryAnnotatedStructures; } /** * Parse the sequence data out of the PDB structure file and the raw output * file of the tertiary structure annotator. Depending on the annotator, * there might be discrepancies between PDB's annotation of residue * positions. Use this method to normalize both files. Usually trust what * the PDB file gives you * * @param aPdbFile * a PDB structure file * @param anOutputFile * the raw output file of the tertiary structure annotator * @return a map of the chain identifiers of a nucleic acid to the sequence * of the chain * @throws IOException * if any IO error occurs reading the raw output of the tertiary * structure annotator */ protected abstract Map<String, Sequence> parseSequences(File aPdbFile, File anOutputFile) throws IOException, InvalidResidueException; /** * Parse all interactions from the raw output of the tertiary structure * annotator. * * @param aSequenceMap * a mapping of the chain identifiers of a nucleic acid to the * sequence of the chain * @param annotatorOutputFile * the raw output file of the tertiary structure annotator * @return a set of interactions * @throws IOException * if any IO error occurs reading the raw output of the tertiary * structure annotator */ protected abstract Set<NucleotideInteraction> parseInteractions(Map<String, Sequence> aSequenceMap, File annotatorOutputFile) throws IOException; }