Java tutorial
/* * * * Copyright (C) 2009-2015 Syed Asad Rahman <asad@ebi.ac.uk> * * Contact: cdk-devel@lists.sourceforge.net * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * All we ask is that proper credit is given for our work, which includes * - but is not limited to - adding the above copyright notice to the beginning * of your source code files, and to any copyright notice that you may distribute * with programs based on this work. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received queryLocal copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * */ package cmd; import java.io.IOException; import java.util.*; import org.apache.commons.cli.MissingOptionException; import org.apache.commons.cli.ParseException; import org.openscience.cdk.CDKConstants; import org.openscience.cdk.AtomContainer; import org.openscience.cdk.AtomContainerSet; import org.openscience.cdk.DefaultChemObjectBuilder; import org.openscience.cdk.exception.CDKException; import org.openscience.cdk.graph.ConnectivityChecker; import org.openscience.cdk.interfaces.IAtom; import org.openscience.cdk.interfaces.IChemObjectBuilder; import org.openscience.cdk.interfaces.IAtomContainer; import org.openscience.cdk.interfaces.IAtomContainerSet; import org.openscience.cdk.tools.ILoggingTool; import org.openscience.cdk.tools.LoggingToolFactory; import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; import org.openscience.smsd.AtomAtomMapping; import org.openscience.smsd.BaseMapping; import org.openscience.smsd.Isomorphism; import org.openscience.smsd.Substructure; import org.openscience.smsd.interfaces.Algorithm; import org.openscience.smsd.mcss.JobType; import org.openscience.smsd.mcss.MCSS; import org.openscience.smsd.tools.AtomContainerComparator; import org.openscience.smsd.tools.ExtAtomContainerManipulator; /** * * @author Syed Asad Rahman <asad@ebi.ac.uk> */ public class SMSDcmd { private final static ILoggingTool logger = LoggingToolFactory.createLoggingTool(InputHandler.class); /** * @param args the command line arguments */ @SuppressWarnings("unchecked") public static void main(String[] args) { ArgumentHandler argumentHandler = new ArgumentHandler(); try { argumentHandler.parseCommandLineOptions(args); InputHandler inputHandler = new InputHandler(argumentHandler); if (argumentHandler.isHelp()) { argumentHandler.printHelp(); inputHandler.printDataTypeHelp(); } else if (argumentHandler.isImageOptionHelp()) { OutputHandler outputHandler = new OutputHandler(argumentHandler); outputHandler.printImageOptionsHelp(); } else { run(argumentHandler, inputHandler); } } catch (ParseException pe) { System.err.println("Problem with the arguments : " + pe.getMessage()); } } public static void run(ArgumentHandler argumentHandler) { run(argumentHandler, new InputHandler(argumentHandler)); } /** * * @param argumentHandler * @param inputHandler */ public static void run(ArgumentHandler argumentHandler, InputHandler inputHandler) { OutputHandler outputHandler = new OutputHandler(argumentHandler); try { InputHandler.MatchType matchType = inputHandler.validateInput(); switch (matchType) { case SINGLE_QUERY_SINGLE_TARGET: runSingleQuerySingleTarget(inputHandler, outputHandler, argumentHandler); break; case SINGLE_QUERY_MULTIPLE_TARGET: runSingleQueryMultipleTarget(inputHandler, outputHandler, argumentHandler); break; case NMCS: runNMCS(inputHandler, outputHandler, argumentHandler); break; case UNKNOWN: default: throw new IOException( "Unknown types " + argumentHandler.getQueryType() + " " + argumentHandler.getTargetType()); } } catch (IOException ioe) { logger.error("IO Problem : " + ioe.getMessage()); // ioe.printStackTrace(); } catch (CDKException e) { logger.error("CDK Problem : " + e.getMessage()); // e.printStackTrace(); } catch (CloneNotSupportedException e) { logger.error(e.toString()); } catch (MissingOptionException e) { logger.error("Missing argument : " + e.getMessage()); } } /** * * @param inputHandler * @param outputHandler * @param argumentHandler * @throws IOException * @throws CDKException * @throws CloneNotSupportedException */ public static void runNMCS(InputHandler inputHandler, OutputHandler outputHandler, ArgumentHandler argumentHandler) throws IOException, CDKException, CloneNotSupportedException { List<IAtomContainer> atomContainerSet = inputHandler.getAllTargets(); String targetType = argumentHandler.getTargetType(); if (atomContainerSet == null) { throw new IOException("Unknown input type " + targetType); } Comparator<IAtomContainer> comparator = new AtomContainerComparator(); Collections.sort(atomContainerSet, comparator); boolean matchBonds = argumentHandler.isMatchBondType(); boolean matchRings = argumentHandler.isMatchRingType(); boolean matchAtomTypes = argumentHandler.isMatchAtomType(); int filter = argumentHandler.getChemFilter(); /* * Configure the targets */ for (IAtomContainer target : atomContainerSet) { inputHandler.configure(target, targetType); } /* * Run N MULTIPLE on targets */ MCSS mcss = new MCSS(atomContainerSet, JobType.MULTIPLE, 0, matchBonds, matchRings, matchAtomTypes); Collection<IAtomContainer> calculatedMCSS = mcss.getCalculateMCSS(); IAtomContainerSet solutions = new AtomContainerSet(); for (IAtomContainer mcsAtomContainer : calculatedMCSS) { if (mcsAtomContainer != null && mcsAtomContainer.getAtomCount() > 0) { boolean flag = ConnectivityChecker.isConnected(mcsAtomContainer); if (!flag) { System.err.println("WARNING : Skipping file " + mcsAtomContainer.getProperty(CDKConstants.TITLE) + " not connected "); return; } else if (mcsAtomContainer.getProperty(CDKConstants.TITLE) != null) { String mcsFilenName = mcsAtomContainer.getProperty(CDKConstants.TITLE).equals("untitled") ? "mcs" : (String) mcsAtomContainer.getProperty(CDKConstants.TITLE); mcsAtomContainer.setID(mcsFilenName); argumentHandler.setQueryMolOutName(mcsAtomContainer.getID()); } else if (mcsAtomContainer.getProperty(CDKConstants.TITLE) == null) { String mcsFilenName = "Fragment"; mcsAtomContainer.setID(mcsFilenName); argumentHandler.setQueryMolOutName(mcsAtomContainer.getID()); } inputHandler.configure(mcsAtomContainer, targetType); solutions.addAtomContainer(mcsAtomContainer); } } if (argumentHandler.shouldOutputSubgraph()) { String outpath = argumentHandler.getOutputFilepath(); String outtype = argumentHandler.getOutputFiletype(); outputHandler.writeMol(outtype, solutions, outpath); } /* * For image generation RE-RUN the MULTIPLE with the common fragment */ if (argumentHandler.isImage() && !solutions.isEmpty()) { int index = 1; for (IAtomContainer ac : solutions.atomContainers()) { if (ac != null && ac.getAtomCount() > 0) { IAtomContainer mcsAtomContainer = ac.clone(); // now that we have the N-MULTIPLE, remap List<Map<Integer, Integer>> mappings = new ArrayList<>(); List<IAtomContainer> secondRoundTargets = new ArrayList<>(); IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance(); for (IAtomContainer target : atomContainerSet) { BaseMapping smsd = run(mcsAtomContainer, target, filter, matchBonds, matchRings, matchBonds); mappings.add(getIndexMapping(smsd.getFirstAtomMapping())); secondRoundTargets.add( builder.newInstance(IAtomContainer.class, smsd.getFirstAtomMapping().getTarget())); } String name = inputHandler.getTargetName() + "_" + String.valueOf(index); outputHandler.writeCircleImage(mcsAtomContainer, secondRoundTargets, name, mappings); } index++; } } } /** * * @param inputHandler * @param outputHandler * @param argumentHandler * @throws IOException * @throws CDKException * @throws CloneNotSupportedException */ public static void runSingleQueryMultipleTarget(InputHandler inputHandler, OutputHandler outputHandler, ArgumentHandler argumentHandler) throws IOException, CDKException, CloneNotSupportedException { IAtomContainer query = inputHandler.getQuery(); String name = (String) query.getProperty(CDKConstants.TITLE); boolean removeHydrogens = argumentHandler.isApplyHRemoval(); /* * check connectivity */ boolean flag = ConnectivityChecker.isConnected(query); if (!flag) { System.err.println("WARNING : Skipping file " + inputHandler.getQueryName() + " not connected "); return; } if (removeHydrogens) { query = new AtomContainer(AtomContainerManipulator.removeHydrogens(query)); query.setProperty(CDKConstants.TITLE, name); query.setID(name); } outputHandler.writeQueryMol(query); String out = ".out"; outputHandler.startAppending(out); long startTime = System.currentTimeMillis(); BaseMapping smsd; boolean matchBonds = argumentHandler.isMatchBondType(); boolean matchRings = argumentHandler.isMatchRingType(); boolean matchAtomTypes = argumentHandler.isMatchAtomType(); int targetNumber = 0; List<IAtomContainer> allTargets = inputHandler.getAllTargets(); String targetType = argumentHandler.getTargetType(); if (allTargets == null) { throw new IOException("Unknown input type " + targetType); } for (IAtomContainer target : allTargets) { flag = ConnectivityChecker.isConnected(target); if (!flag) { logger.error("WARNING : Skipping target AtomContainer " + target.getProperty(CDKConstants.TITLE) + " as it is not connected."); continue; } inputHandler.configure(target, targetType); if (argumentHandler.isSubstructureMode()) { smsd = runSubstructure(query, target, argumentHandler.getChemFilter(), matchBonds, matchRings, matchAtomTypes); } else { smsd = run(query, target, argumentHandler.getChemFilter(), matchBonds, matchRings, matchAtomTypes); } long endTime = System.currentTimeMillis(); long executionTime = endTime - startTime; outputHandler.writeTargetMol(smsd.getTarget()); String queryPath = argumentHandler.getQueryFilepath(); String targetPath = argumentHandler.getTargetFilepath(); IAtomContainer queryLocal = query.getBuilder().newInstance(IAtomContainer.class, smsd.getFirstAtomMapping().getQuery()); IAtomContainer targetLocal = target.getBuilder().newInstance(IAtomContainer.class, smsd.getFirstAtomMapping().getTarget()); Map<IAtom, IAtom> mcs = smsd.getFirstAtomMapping().getMappingsByAtoms(); int nAtomsMatched = (mcs == null) ? 0 : mcs.size(); double tanimotoSimilarity = smsd.getTanimotoSimilarity(); //print out all mappings if (mcs != null && !mcs.isEmpty() && argumentHandler.isAllMapping()) { outputHandler.printHeader(queryPath, targetPath, nAtomsMatched); int counter = 0; for (AtomAtomMapping aam : smsd.getAllAtomMapping()) { Map<Integer, Integer> mapping = aam.getMappingsByIndex(); if (argumentHandler.isImage() && !mapping.isEmpty()) { double stereoScore = smsd.getStereoScore(counter); String label = outputHandler.makeLabel(tanimotoSimilarity, stereoScore); outputHandler.addImage(queryLocal, targetLocal, label, mapping); } outputHandler.printMapping((counter + 1), mapping); counter += 1; } } //print out top one else if (mcs != null && !mcs.isEmpty() && !argumentHandler.isAllMapping()) { Map<Integer, Integer> mcsNumber = smsd.getFirstAtomMapping().getMappingsByIndex(); double stereoScore = smsd.getStereoScore(0); outputHandler.printHeader(queryPath, targetPath, nAtomsMatched); String qrefName = inputHandler.getQRefName(); String trefName = inputHandler.getTRefName(); outputHandler.printTopMapping(nAtomsMatched, mcs, mcsNumber, qrefName, trefName); if (argumentHandler.isImage() && !mcs.isEmpty()) { String label = outputHandler.makeLabel(tanimotoSimilarity, stereoScore); outputHandler.makeImage(queryLocal, targetLocal, label, mcsNumber); } } double tanimotoGraph = smsd.getTanimotoSimilarity(); // double tanimotoAtom = smsd.getTanimotoAtomSimilarity(); // double tanimotoBond = smsd.getTanimotoBondSimilarity(); double euclidianGraph = smsd.getEuclideanDistance(); // outputHandler.writeResults(queryLocal, targetLocal, tanimotoGraph, tanimotoAtom, tanimotoBond, euclidianGraph, nAtomsMatched, executionTime); outputHandler.writeResults(queryLocal, targetLocal, tanimotoGraph, euclidianGraph, nAtomsMatched, executionTime); if (mcs != null && !mcs.isEmpty() && argumentHandler.isImage()) { String qName = inputHandler.getQueryName(); String tName = inputHandler.getTargetName() + "_" + targetNumber; outputHandler.writeImage(qName, tName); } targetNumber++; } outputHandler.closeFiles(); } /** * * @param inputHandler * @param outputHandler * @param argumentHandler * @throws IOException * @throws CDKException * @throws CloneNotSupportedException */ public static void runSingleQuerySingleTarget(InputHandler inputHandler, OutputHandler outputHandler, ArgumentHandler argumentHandler) throws IOException, CDKException, CloneNotSupportedException { IAtomContainer query = inputHandler.getQuery(); IAtomContainer target = inputHandler.getTarget(); boolean removeHydrogens = argumentHandler.isApplyHRemoval(); /* * check connectivity */ if (argumentHandler.isImage()) { boolean flag = ConnectivityChecker.isConnected(query); if (!flag) { logger.error("WARNING : Skipping file " + inputHandler.getQueryName() + " not connectted "); return; } flag = ConnectivityChecker.isConnected(target); if (!flag) { logger.error("WARNING : Skipping target AtomContainer " + inputHandler.getTargetName() + " as it is not connected."); return; } } String fileNameQ = "Query"; String fileNameT = "Target"; if (target.getProperty(CDKConstants.TITLE) != null) { fileNameQ = target.getProperty(CDKConstants.TITLE) == null ? fileNameT : (String) target.getProperty(CDKConstants.TITLE); target.setID(fileNameQ); argumentHandler.setTargetMolOutName(target.getID()); } if (query.getProperty(CDKConstants.TITLE) != null) { fileNameT = query.getProperty(CDKConstants.TITLE) == null ? fileNameQ : (String) query.getProperty(CDKConstants.TITLE); query.setID(fileNameT); argumentHandler.setQueryMolOutName(query.getID()); } /* * remove hydrogens */ if (removeHydrogens) { query = new AtomContainer(AtomContainerManipulator.removeHydrogens(query)); query.setID(fileNameQ); target = new AtomContainer(AtomContainerManipulator.removeHydrogens(target)); target.setID(fileNameT); } String out = ".out"; if (!argumentHandler.isAppendMode()) { outputHandler.startAppending(out); } else { outputHandler.startNew(out); } ExtAtomContainerManipulator.aromatizeDayLight(query); ExtAtomContainerManipulator.aromatizeDayLight(target); if (argumentHandler.isApplyHAdding()) { AtomContainerManipulator.convertImplicitToExplicitHydrogens(query); AtomContainerManipulator.convertImplicitToExplicitHydrogens(target); } long startTime = System.currentTimeMillis(); BaseMapping smsd; boolean matchBonds = argumentHandler.isMatchBondType(); boolean matchRings = argumentHandler.isMatchRingType(); boolean matchAtomTypes = argumentHandler.isMatchAtomType(); if (argumentHandler.isSubstructureMode()) { smsd = runSubstructure(query, target, argumentHandler.getChemFilter(), matchBonds, matchRings, matchAtomTypes); } else { smsd = run(query, target, argumentHandler.getChemFilter(), matchBonds, matchRings, matchAtomTypes); } query = query.getBuilder().newInstance(IAtomContainer.class, smsd.getFirstAtomMapping().getQuery()); target = target.getBuilder().newInstance(IAtomContainer.class, smsd.getFirstAtomMapping().getTarget()); long endTime = System.currentTimeMillis(); long executionTime = endTime - startTime; // write out the input AtomContainers to files outputHandler.writeQueryMol(smsd.getFirstAtomMapping().getQuery()); outputHandler.writeTargetMol(smsd.getFirstAtomMapping().getTarget()); String queryPath = argumentHandler.getQueryFilepath(); String targetPath = argumentHandler.getTargetFilepath(); Map<IAtom, IAtom> mcs = smsd.getFirstAtomMapping().getMappingsByAtoms(); int nAtomsMatched = (mcs == null) ? 0 : mcs.size(); double tanimotoSimilarity = smsd.getTanimotoSimilarity(); //print out all mappings if (mcs != null && !mcs.isEmpty() && argumentHandler.isAllMapping()) { outputHandler.printHeader(queryPath, targetPath, nAtomsMatched); int counter = 0; for (AtomAtomMapping aam : smsd.getAllAtomMapping()) { Map<Integer, Integer> mapping = aam.getMappingsByIndex(); if (argumentHandler.isImage() && !mapping.isEmpty()) { double stereoScore = smsd.getStereoScore(counter); String label = outputHandler.makeLabel(tanimotoSimilarity, stereoScore); outputHandler.addImage(query, target, label, mapping); } outputHandler.printMapping((counter + 1), mapping); counter += 1; } } //print out top one else if (mcs != null && !mcs.isEmpty() && !argumentHandler.isAllMapping()) { Map<Integer, Integer> mcsNumber = smsd.getFirstAtomMapping().getMappingsByIndex(); double stereoScore = smsd.getStereoScore(0); outputHandler.printHeader(queryPath, targetPath, nAtomsMatched); String qrefName = inputHandler.getQRefName(); String trefName = inputHandler.getTRefName(); outputHandler.printTopMapping(nAtomsMatched, mcs, mcsNumber, qrefName, trefName); if (argumentHandler.isImage() && !mcsNumber.isEmpty()) { String label = outputHandler.makeLabel(tanimotoSimilarity, stereoScore); outputHandler.makeImage(query, target, label, mcsNumber); } } double tanimotoGraph = smsd.getTanimotoSimilarity(); // double tanimotoAtom = smsd.getTanimotoAtomSimilarity(); // double tanimotoBond = smsd.getTanimotoBondSimilarity(); double euclidianGraph = smsd.getEuclideanDistance(); // outputHandler.writeResults(queryLocal, targetLocal, tanimotoGraph, tanimotoAtom, tanimotoBond, euclidianGraph, nAtomsMatched, executionTime); outputHandler.writeResults(query, target, tanimotoGraph, euclidianGraph, nAtomsMatched, executionTime); if (mcs != null && !mcs.isEmpty() && argumentHandler.isImage()) { String qName = inputHandler.getQueryName(); String tName = inputHandler.getTargetName(); outputHandler.writeImage(qName, tName); } if (argumentHandler.shouldOutputSubgraph()) { IAtomContainer subgraph = smsd.getFirstAtomMapping().getCommonFragment(); String outpath = argumentHandler.getOutputFilepath(); String outtype = argumentHandler.getOutputFiletype(); outputHandler.writeMol(outtype, subgraph, outpath); } outputHandler.closeFiles(); } private static BaseMapping run(IAtomContainer query, IAtomContainer target, int filter, boolean matchBonds, boolean matchRings, boolean matchAtomType) throws CDKException { // XXX - if clean and configure is 'true', is that not duplicate configuring? BaseMapping smsd = new Isomorphism(query, target, Algorithm.DEFAULT, matchBonds, matchRings, matchAtomType); if (filter == 0) { smsd.setChemFilters(false, false, false); } if (filter == 1) { smsd.setChemFilters(true, false, false); } if (filter == 2) { smsd.setChemFilters(true, true, false); } if (filter == 3) { smsd.setChemFilters(true, true, true); } return smsd; } private static BaseMapping runSubstructure(IAtomContainer query, IAtomContainer target, int filter, boolean matchBonds, boolean matchRings, boolean matchAtomTypes) throws CDKException { // XXX - if clean and configure is 'true', is that not duplicate configuring? BaseMapping smsd = new Substructure(query, target, matchBonds, matchRings, matchAtomTypes, true); if (smsd.isSubgraph()) { if (filter == 0) { smsd.setChemFilters(false, false, false); } if (filter == 1) { smsd.setChemFilters(true, false, false); } if (filter == 2) { smsd.setChemFilters(true, true, false); } if (filter == 3) { smsd.setChemFilters(true, true, true); } } return smsd; } private static Map<Integer, Integer> getIndexMapping(AtomAtomMapping aam) { return aam.isEmpty() ? new TreeMap<Integer, Integer>() : aam.getMappingsByIndex(); } }