Java tutorial
/** * Title: Force Field X. * * Description: Force Field X - Software for Molecular Biophysics. * * Copyright: Copyright (c) Michael J. Schnieders 2001-2015. * * This file is part of Force Field X. * * Force Field X is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 3 as published by * the Free Software Foundation. * * Force Field X is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * Force Field X; if not, write to the Free Software Foundation, Inc., 59 Temple * Place, Suite 330, Boston, MA 02111-1307 USA * * Linking this library statically or dynamically with other modules is making a * combined work based on this library. Thus, the terms and conditions of the * GNU General Public License cover the whole combination. * * As a special exception, the copyright holders of this library give you * permission to link this library with independent modules to produce an * executable, regardless of the license terms of these independent modules, and * to copy and distribute the resulting executable under terms of your choice, * provided that you also meet, for each linked independent module, the terms * and conditions of the license of that module. An independent module is a * module which is not derived from or based on this library. If you modify this * library, you may extend this exception to your version of the library, but * you are not obligated to do so. If you do not wish to do so, delete this * exception statement from your version. */ // Copyright license for hierarchical-clustering-java /** * ***************************************************************************** * Copyright 2013 Lars Behnke * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * **************************************************************************** */ // Copyright license for BioJava /* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * * Created on 7/8/2014 * */ package ffx.xray; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import com.apporiented.algorithm.clustering.Cluster; import org.apache.commons.configuration.CompositeConfiguration; import org.apache.commons.io.FilenameUtils; import org.biojava.bio.structure.Structure; import org.biojava.bio.structure.StructureException; import org.biojava.bio.structure.align.StructurePairAligner; import org.biojava.bio.structure.align.pairwise.AlternativeAlignment; import org.biojava.bio.structure.io.PDBFileReader; import ffx.algorithms.AlgorithmFunctions; import ffx.potential.MolecularAssembly; import ffx.utilities.DoubleIndexPair; import ffx.utilities.Keyword; import ffx.xray.CrystalReciprocalSpace.SolventModel; import ffx.xray.RefinementMinimize.RefinementMode; import static ffx.xray.RescoreAndCluster.ClustAlg.NO_CLUSTERS; import static ffx.xray.RescoreAndCluster.RescoreStrategy.ENERGY_EVAL; import static ffx.xray.RescoreAndCluster.RescoreStrategy.MINIMIZE; import static ffx.xray.RescoreAndCluster.RescoreStrategy.NO_RESCORE; import static ffx.xray.RescoreAndCluster.RescoreStrategy.RS_MIN; import static ffx.xray.RescoreAndCluster.RescoreStrategy.XRAY_MIN; /** * This class performs rescoring and clustering on a provided list of structure * files. Rescoring can be based on energy evaluations, minimization, x-ray * minimization, or real-space minimization. Clustering is based on all-atom * RMSD (using BioJava libraries) and hierarchical clustering from Lars Behnke's * Java clustering package. * * @author Michael J. Schnieders * @since 1.0 * */ public class RescoreAndCluster { private static final Logger logger = Logger.getLogger(RescoreAndCluster.class.getName()); private final RefinementMode refinementMode = RefinementMode.COORDINATES; private final AlgorithmFunctions utils; private RescoreStrategy rscType = NO_RESCORE; private ClustAlg clusterAlg = NO_CLUSTERS; private List<DiffractionFile> diffractionFiles; private List<RealSpaceFile> mapFiles; private boolean doRescore = false; // private boolean doCluster = false; // private double eps = -1.0; private int maxiter = 1000; private double acceptThreshold = 0.0; private int numToAccept = 10; private boolean includeRejected = true; private String fileSuffix = "_rsc"; private final Path pwdPath; private File resultsFile; private File resultDir; private Path resultPath; private boolean printModels = false; public RescoreAndCluster(AlgorithmFunctions utils) { this.pwdPath = generatePath(new File("")); diffractionFiles = new ArrayList<>(); mapFiles = new ArrayList<>(); this.utils = utils; } public void setRefineEps(double eps) { this.eps = eps; } public void setMaxIter(int maxiter) { this.maxiter = maxiter; } public void setResultDirectory(File directory) throws IOException { if (directory == null) { resultDir = null; } else if (directory.exists() && !directory.isDirectory()) { throw new IOException(" Results directory could not be recognized as folder or directory."); } else { directory.mkdirs(); resultDir = directory; resultPath = pwdPath.relativize(generatePath(resultDir)); } } public void setAcceptThreshold(double acceptThreshold) { this.acceptThreshold = acceptThreshold; } public void setNumToAccept(int numToAccept) { this.numToAccept = numToAccept; } public void setIncludeRejected(boolean includeRejected) { this.includeRejected = includeRejected; } public void setFileSuffix(String fileSuffix) { this.fileSuffix = fileSuffix; } public void setResultsFile(File resultsFile) { if (resultsFile != null) { this.resultsFile = resultsFile; } } public void setPrintModels(boolean printModels) { this.printModels = printModels; } public void setRescoreStrategy(RescoreStrategy rscType) { this.rscType = rscType; if (rscType != NO_RESCORE) { //energies = new DoubleIndexPair[numFiles]; } else { //rescoredFiles = modelFiles; } } public void setClusterAlg(ClustAlg clusterAlg) { this.clusterAlg = clusterAlg; } public void setDiffractionFiles(List<DiffractionFile> diffractionFiles) { this.diffractionFiles.addAll(diffractionFiles); } public void setMapFiles(List<RealSpaceFile> mapFiles) { this.mapFiles.addAll(mapFiles); } /** * Utility method which attempts to generate a file Path using the canonical * path string, else uses the absolute path. * * @param file To find path of * @return Canonical or absolute path. */ private Path generatePath(File file) { Path path; try { path = Paths.get(file.getCanonicalPath()); } catch (IOException ex) { path = Paths.get(file.getAbsolutePath()); } return path; } public Cluster cluster(File[] clustFiles) { int numFiles = clustFiles.length; double[][] distanceMatrix = new double[numFiles][numFiles]; PDBFileReader reader = new PDBFileReader(); StructurePairAligner aligner = new StructurePairAligner(); try { for (int i = 0; i < numFiles; i++) { Structure strucI = reader.getStructure(clustFiles[i]); for (int j = i + 1; j < numFiles; j++) { Structure strucJ = reader.getStructure(clustFiles[j]); aligner.align(strucI, strucJ); AlternativeAlignment[] alignments = aligner.getAlignments(); double bestRMSD = alignments[0].getRmsd(); for (int k = 1; k < alignments.length; k++) { double alignRMSD = alignments[k].getRmsd(); bestRMSD = alignRMSD < bestRMSD ? alignRMSD : bestRMSD; } distanceMatrix[i][j] = bestRMSD; } } } catch (StructureException ex) { } catch (IOException ex) { Logger.getLogger(RescoreAndCluster.class.getName()).log(Level.SEVERE, null, ex); } return null; } /** * Launch the rescoring/clustering algorithms on provided files. Assumes it * has been given valid files to be run on; use * CoordinateFileFilter.acceptDeep(File file) before sending files to this * method. * * @param modelFiles Files to rescore and/or cluster. */ public void runRsc(File[] modelFiles) { int numFiles = modelFiles.length; if (rscType != NO_RESCORE) { if (clusterAlg != NO_CLUSTERS) { logger.info(String.format(" Rescoring and clustering %d files", numFiles)); logger.info(String.format(" Rescore algorithm: %s, clustering algorithm: %s", rscType.toString(), clusterAlg.toString())); cluster(rescore(modelFiles)); // Cluster the files returned by rescore. } else { logger.info(String.format(" Rescoring %d files", numFiles)); logger.info(String.format(" Rescore algorithm: %s", rscType.toString())); rescore(modelFiles); } } else if (clusterAlg != NO_CLUSTERS) { logger.info(String.format(" Clustering %d files", numFiles)); logger.info(String.format(" Clustering algorithm: %s", clusterAlg.toString())); cluster(modelFiles); } else { logger.info(" No rescoring or clustering algorithm selected."); } } private File rescoreSingle(File modelFile, RescoreStrategy rscType, DoubleIndexPair[] energies, int i) { Path filepath = generatePath(modelFile); if (filepath == null) { logger.warning(String.format(" Could not generate path to file %s", modelFile.toPath())); return null; } String filename = pwdPath.relativize(filepath).toString(); File retFile = modelFile; try { MolecularAssembly[] openedAssemblies = utils.open(filename); MolecularAssembly assembly = openedAssemblies[0]; switch (rscType) { case NO_RESCORE: logger.warning(" Rescore is being called with rscType = NO_RESCORE"); break; case ENERGY_EVAL: break; case MINIMIZE: logger.info(String.format("\n Running minimize on %s", filename)); logger.info(String.format(" RMS gradient convergence criteria: %f", eps)); utils.energy(assembly); utils.minimize(assembly, eps); String ext = FilenameUtils.getExtension(filename); ext = ".".concat(ext); if (resultDir != null) { filename = FilenameUtils.getBaseName(filename); filename = FilenameUtils.concat(resultPath.toString(), filename); } else { filename = FilenameUtils.removeExtension(filename); } filename = filename.concat(fileSuffix).concat(ext); retFile = new File(filename); if (ext.toUpperCase().contains("XYZ")) { utils.saveAsXYZ(assembly, retFile); } else { utils.saveAsPDB(assembly, retFile); } break; case XRAY_MIN: logger.info(String.format("\n Running x-ray minimize on %s", filename)); DiffractionFile diffractionFile = null; if (diffractionFiles.isEmpty()) { diffractionFile = new DiffractionFile(assembly, 1.0, false); diffractionFiles.add(diffractionFile); } CompositeConfiguration properties = Keyword.loadProperties(modelFile); DiffractionData diffractionData = new DiffractionData(assembly, properties, SolventModel.POLYNOMIAL, diffractionFiles.toArray(new DiffractionFile[diffractionFiles.size()])); diffractionData.scaleBulkFit(); diffractionData.printStats(); utils.energy(assembly); RefinementMinimize refinementMinimize = new RefinementMinimize(diffractionData, refinementMode); if (eps < 0.0) { eps = refinementMinimize.getEps(); } logger.info(String.format("\n RMS gradient convergence criteria: %8.5f max number of iterations %d", eps, maxiter)); refinementMinimize.minimize(eps, maxiter); diffractionData.scaleBulkFit(); diffractionData.printStats(); ext = FilenameUtils.getExtension(filename); ext = ".".concat(ext); if (resultDir != null) { filename = FilenameUtils.getBaseName(filename); filename = FilenameUtils.concat(resultPath.toString(), filename); } else { filename = FilenameUtils.removeExtension(filename); } filename = filename.concat(fileSuffix); diffractionData.writeData(filename + ".mtz"); filename = filename.concat(ext); diffractionData.writeModel(filename); retFile = new File(filename); if (diffractionFile != null) { try { diffractionFiles.remove(diffractionFile); } catch (UnsupportedOperationException ex) { // This should never occur, because diffractionFiles should be of a List type supporting remove(object). diffractionFiles = new ArrayList<>(); } } break; case RS_MIN: logger.info(String.format("\n Running real-space minimize on %s", filename)); RealSpaceFile realspaceFile = null; if (mapFiles.isEmpty()) { realspaceFile = new RealSpaceFile(assembly); mapFiles.add(realspaceFile); } properties = Keyword.loadProperties(modelFile); RealSpaceData realspaceData = new RealSpaceData(assembly, properties, mapFiles.toArray(new RealSpaceFile[mapFiles.size()])); utils.energy(assembly); refinementMinimize = new RefinementMinimize(realspaceData, refinementMode); if (eps < 0.0) { eps = 1.0; } logger.info(String.format("\n RMS gradient convergence criteria: %8.5f max number of iterations %d", eps, maxiter)); refinementMinimize.minimize(eps, maxiter); ext = FilenameUtils.getExtension(filename); ext = ".".concat(ext); if (resultDir != null) { filename = FilenameUtils.getBaseName(filename); filename = FilenameUtils.concat(resultPath.toString(), filename); } else { filename = FilenameUtils.removeExtension(filename); } filename = filename.concat(fileSuffix).concat(ext); retFile = new File(filename); if (ext.toUpperCase().contains("XYZ")) { utils.saveAsXYZ(assembly, retFile); } else { utils.saveAsPDB(assembly, retFile); } if (realspaceFile != null) { try { mapFiles.remove(realspaceFile); } catch (UnsupportedOperationException ex) { // This should never occur, because diffractionFiles should be of a List type supporting remove(object). mapFiles = new ArrayList<>(); } } break; default: logger.severe(" No valid rescore type: FFX will not continue."); } double e = utils.returnEnergy(assembly); energies[i] = new DoubleIndexPair(i, e); utils.closeAll(openedAssemblies); } catch (Exception ex) { logger.warning(String.format(" Exception rescoring on file %s", filename)); logger.info(ex.toString()); } return retFile; } public File[] rescore(File[] modelFiles) { int numFiles = modelFiles.length; DoubleIndexPair[] energies = new DoubleIndexPair[numFiles]; File[] rescoredFiles = new File[numFiles]; for (int i = 0; i < numFiles; i++) { rescoredFiles[i] = rescoreSingle(modelFiles[i], rscType, energies, i); } Arrays.sort(energies); ArrayList<Integer> acceptedFileIndices = new ArrayList<>(); int numAccepted = 1; acceptedFileIndices.add(energies[0].getIndex()); double minEnergy = energies[0].getDoubleValue(); if (acceptThreshold > 0.0) { for (int i = 1; i < numFiles; i++) { DoubleIndexPair currentPair = energies[i]; double e = currentPair.getDoubleValue(); if (e - minEnergy <= acceptThreshold) { acceptedFileIndices.add(currentPair.getIndex()); ++numAccepted; } else { break; } } } else { numAccepted = numToAccept < numFiles ? numToAccept : numFiles; for (int i = 1; i < numAccepted; i++) { acceptedFileIndices.add(energies[i].getIndex()); } } for (int i = 0; i < numAccepted; i++) { File filei = rescoredFiles[energies[i].getIndex()]; Path pathi; pathi = generatePath(filei); String relPath = pwdPath.relativize(pathi).toString(); logger.info(String.format(" Accepted: %s at %10.6f kcal/mol", relPath, energies[i].getDoubleValue())); } for (int i = numAccepted; i < numFiles; i++) { File filei = rescoredFiles[energies[i].getIndex()]; Path pathi; pathi = generatePath(filei); String relPath = pwdPath.relativize(pathi).toString(); logger.info(String.format(" Rejected: %s at %10.6f kcal/mol", relPath, energies[i].getDoubleValue())); } try (BufferedWriter bw = new BufferedWriter(new FileWriter(resultsFile, true))) { Path rscFilePath = generatePath(resultsFile); String rscFileName = pwdPath.relativize(rscFilePath).toString(); logger.info(String.format(" Printing accepted files to rescore file %s", rscFileName)); if (acceptThreshold > 0.0) { String message = String.format("Minimum potential energy: %f, threshold = %6.4f", minEnergy, acceptThreshold); bw.write(message); message = " " + message + "\n"; logger.info(message); } else { double maxEnergy = energies[numAccepted - 1].getDoubleValue(); String message = String.format("Minimum potential energy: %f, maximum accepted energy %f", minEnergy, maxEnergy); bw.write(message); message = " " + message + "\n"; logger.info(message); } bw.newLine(); bw.newLine(); String message = String.format("Number of files accepted: %d", numAccepted); bw.write(message); bw.newLine(); logger.info(String.format(" %s", message)); for (int i = 0; i < numAccepted; i++) { int fileIndex = energies[i].getIndex(); File pointedFile = rescoredFiles[fileIndex]; Path pointedPath = generatePath(pointedFile); String relPath = pwdPath.relativize(pointedPath).toString(); double thisEnergy = energies[i].getDoubleValue(); logger.info(String.format(" Accepted file %d energy %9.3f < %9.3f kcal/mol", (i + 1), thisEnergy, minEnergy + acceptThreshold)); logger.info(String.format(" %s", relPath)); try { bw.write(String.format("Accepted file: %s rank %d energy %f\n", relPath, (i + 1), thisEnergy)); if (printModels) { bw.newLine(); try (BufferedReader br = new BufferedReader(new FileReader(pointedFile))) { String line = br.readLine(); while (line != null) { bw.write(line); bw.newLine(); line = br.readLine(); } } bw.newLine(); } } catch (IOException ex) { logger.warning(String.format(" File %s had exception printing to rescore file %s", relPath, ex.toString())); } } message = String.format("\n Number of files not accepted: %d", numFiles - numAccepted); logger.info(String.format(" %s", message)); bw.newLine(); bw.write(message); bw.newLine(); if (includeRejected) { for (int i = numAccepted; i < numFiles; i++) { int fileIndex = energies[i].getIndex(); File pointedFile = rescoredFiles[fileIndex]; Path pointedPath = generatePath(pointedFile); String relPath = pwdPath.relativize(pointedPath).toString(); double thisEnergy = energies[i].getDoubleValue(); message = String.format("Non-accepted file: %s rank %d energy %f", relPath, (i + 1), thisEnergy); logger.info(String.format(" %s", message)); try { bw.write(message); bw.newLine(); } catch (IOException ex) { logger.warning(String.format(" File %s had exception printing to rescore file %s", relPath, ex.toString())); } } } } catch (IOException ex) { logger.warning(String.format(" Exception in writing rescore file: %s", ex.toString())); } return rescoredFiles; } public enum RescoreStrategy { NO_RESCORE { @Override public String toString() { return "none"; } }, ENERGY_EVAL { @Override public String toString() { return "potential energy"; } }, MINIMIZE { @Override public String toString() { return "force field minimization"; } }, XRAY_MIN { @Override public String toString() { return "x-ray hybrid target minimization"; } }, RS_MIN { @Override public String toString() { return "real-space hybrid target minimization"; } }; } public enum ClustAlg { /** * All algorithms start with each point a cluster, and then join the * closest clusters together until everything is one cluster. SLINK is * Single Linkage; cluster-cluster distance is defined by the nearest * two points. This is vulnerable to chaining; two clusters might be * joined by a handful of intermediate points. CLINK is Complete * Linkage; CLINK uses the greatest distance between points in two * clusters. AV_LINK (average link) is the UPGMA (Unweighted Pair Group * Method with Arithmetic Mean) function, which takes the mean distance * between points in a cluster. * * Makes me wonder if there's a WPGMA algorithm which does weight one * way or the other, or perhaps a RPGMA RMSD-like algorithm. */ NO_CLUSTERS { @Override public String toString() { return "none"; } }, SLINK { @Override public String toString() { return "single linkage"; } }, AV_LINK { @Override public String toString() { return "average linkage (UPGMA)"; } }, CLINK { @Override public String toString() { return "complete linkage"; } }; } }