cmd.InputHandler.java Source code

Java tutorial

Introduction

Here is the source code for cmd.InputHandler.java

Source

/*
 *
 *
 * Copyright (C) 2009-2015  Syed Asad Rahman <asad@ebi.ac.uk>
 *
 * Contact: cdk-devel@lists.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 * All we ask is that proper credit is given for our work, which includes
 * - but is not limited to - adding the above copyright notice to the beginning
 * of your source code files, and to any copyright notice that you may distribute
 * with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received query copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 * 
 */
package cmd;

import cmd.pdb.LigandHelper;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.cli.MissingOptionException;
import org.openscience.cdk.AtomContainer;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.ChemFile;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.ConnectivityChecker;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.io.CMLReader;
import org.openscience.cdk.io.IChemObjectReader;
import org.openscience.cdk.io.ISimpleChemObjectReader;
import org.openscience.cdk.io.MDLV2000Reader;
import org.openscience.cdk.io.Mol2Reader;
import org.openscience.cdk.io.PDBReader;
import org.openscience.cdk.io.ReaderFactory;
import org.openscience.cdk.io.SMILESReader;
import org.openscience.cdk.io.iterator.IteratingSDFReader;
import org.openscience.cdk.layout.StructureDiagramGenerator;
import org.openscience.cdk.signature.MoleculeSignature;
import org.openscience.cdk.smiles.DeduceBondSystemTool;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.tools.CDKHydrogenAdder;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;
import org.openscience.smsd.tools.ExtAtomContainerManipulator;

/**
 *
 * @author Syed Asad Rahman <asad@ebi.ac.uk>
 */
public class InputHandler {

    private final static ILoggingTool logger = LoggingToolFactory.createLoggingTool(InputHandler.class);
    private final ArgumentHandler argumentHandler;
    private final StructureDiagramGenerator sdg;
    private final Map<String, String> singularDataTypes;
    private final Map<String, String> multipleDataTypes;
    private final Map<String, String> stringDataTypes;
    private boolean isSingleFileQuery;
    private boolean isSingleFileTarget;
    private boolean isMultipleTarget;
    private boolean isStringQuery;
    private boolean isStringTarget;

    public enum MatchType {

        SINGLE_QUERY_SINGLE_TARGET, SINGLE_QUERY_MULTIPLE_TARGET, NMCS, UNKNOWN
    };

    private MatchType matchType;

    public InputHandler(ArgumentHandler argumentHandler) {
        this.argumentHandler = argumentHandler;
        sdg = new StructureDiagramGenerator();

        singularDataTypes = new HashMap<>();
        singularDataTypes.put("CML", "Chemical Markup Language");
        singularDataTypes.put("MOL", "MDL V2000 format");
        singularDataTypes.put("ML2", "MOL2 Tripos format");
        singularDataTypes.put("PDB", "Protein Databank Format");

        multipleDataTypes = new HashMap<>();
        multipleDataTypes.put("SDF", "SD file format");
        multipleDataTypes.put("SMIF", "SMILES file format");

        stringDataTypes = new HashMap<>();
        stringDataTypes.put("SMI", "SMILES string format");
        stringDataTypes.put("SIG", "Signature string format");
    }

    public void printDataTypeHelp() {
        System.out.println("Allowed types for single-molecules (query or target):");
        for (String singularType : singularDataTypes.keySet()) {
            String description = singularDataTypes.get(singularType);
            System.out.println(String.format("%s\t%s", singularType, description));
        }
        for (String stringType : stringDataTypes.keySet()) {
            String description = stringDataTypes.get(stringType);
            System.out.println(String.format("%s\t%s", stringType, description));
        }
        System.out.println();
        System.out.println("Allowed types for multiple-molecules (targets only):");
        for (String multipleType : multipleDataTypes.keySet()) {
            String description = multipleDataTypes.get(multipleType);
            System.out.println(String.format("%s\t%s", multipleType, description));
        }
    }

    public Map<String, String> getStringDataTypes() {
        return Collections.unmodifiableMap(stringDataTypes);
    }

    public Map<String, String> getSingularDataTypes() {
        return Collections.unmodifiableMap(singularDataTypes);
    }

    public Map<String, String> getMultipleDataTypes() {
        return Collections.unmodifiableMap(multipleDataTypes);
    }

    public String getQRefName() {
        String suffix = argumentHandler.getSuffix();
        String fileName = argumentHandler.getQueryMolOutName() == null ? "Query"
                : argumentHandler.getQueryMolOutName();
        if (!fileName.equals("Query")) {
            fileName = argumentHandler.getQueryMolOutName().equals("untitled") ? "Query"
                    : argumentHandler.getQueryMolOutName();
        }
        return fileName + suffix + ".mol";
    }

    public String getTRefName() {
        String suffix = argumentHandler.getSuffix();
        String fileName = argumentHandler.getTargetMolOutName() == null ? "Target"
                : argumentHandler.getTargetMolOutName();
        if (!fileName.equals("Target")) {
            fileName = argumentHandler.getTargetMolOutName().equals("untitled") ? "Target"
                    : argumentHandler.getTargetMolOutName();
        }
        return fileName + suffix + ".mol";
    }

    public MatchType validateInput() throws MissingOptionException {
        validateQueryType();
        validateTargetType();
        if ((isSingleFileQuery && isSingleFileTarget) || (isStringQuery && isSingleFileTarget)
                || (isSingleFileQuery && isStringTarget) || (isStringQuery && isStringTarget)) {
            matchType = MatchType.SINGLE_QUERY_SINGLE_TARGET;
        } else if ((isSingleFileQuery || isStringQuery) && isMultipleTarget) {
            matchType = MatchType.SINGLE_QUERY_MULTIPLE_TARGET;
        } else if (!isSingleFileQuery && isMultipleTarget) {
            if (argumentHandler.isNMCS()) {
                matchType = MatchType.NMCS;
            } else {
                throw new MissingOptionException("Set N-MCS to true");
            }
        } else {
            matchType = MatchType.UNKNOWN;
        }
        return matchType;
    }

    private void validateQueryType() {
        String queryType = argumentHandler.getQueryType();
        if (queryType != null) {
            queryType = queryType.toUpperCase();
            if (singularDataTypes.containsKey(queryType)) {
                isSingleFileQuery = true;
            } else if (multipleDataTypes.containsKey(queryType)) {
                // TODO : throw error!
            } else if (stringDataTypes.containsKey(queryType)) {
                isStringQuery = true;
                isSingleFileQuery = false;
            }
        } else {
            isSingleFileQuery = false;
            isStringQuery = false;
        }
    }

    private void validateTargetType() {
        String targetType = argumentHandler.getTargetType().toUpperCase();
        if (singularDataTypes.containsKey(targetType)) {
            isSingleFileTarget = true;
        } else if (multipleDataTypes.containsKey(targetType)) {
            isMultipleTarget = true;
        } else if (stringDataTypes.containsKey(targetType)) {
            isSingleFileTarget = false;
            isMultipleTarget = false;
            isStringTarget = true;
        } else {
            // TODO : throw error! - must have either a target
        }
    }

    public boolean isNMCSInput() {
        return false; // TODO
    }

    public boolean isSingleFileQueryInput() {
        return isSingleFileQuery;
    }

    public boolean isSingleFileTargetInput() {
        return isSingleFileTarget;
    }

    public boolean isMultiTargetInput() {
        return isMultipleTarget;
    }

    private ISimpleChemObjectReader getReader(String type, String filename) throws IOException {
        File input = new File(filename);
        if (input.isDirectory()) {
            throw new IOException("Input path " + filename + " is a directory, not a file");
        }
        switch (type) {
        case "MOL":
            return new MDLV2000Reader(new FileReader(input), IChemObjectReader.Mode.RELAXED);
        case "CML":
            return new CMLReader(new FileInputStream(input));
        case "ML2":
            return new Mol2Reader(new FileReader(input));
        case "PDB":
            return new PDBReader(new FileReader(input));
        }
        return null;
    }

    /**
     *
     * @param molecule
     * @param type
     * @throws CDKException
     */
    public void configure(IAtomContainer molecule, String type) throws CDKException {
        IAtomContainer mol = molecule;
        String id = "";
        switch (type) {
        case "PDB":
            LigandHelper.addMissingBondOrders(mol);
            break;
        case "SDF":
            id = (String) mol.getProperty(CDKConstants.TITLE);
            break;
        }
        ExtAtomContainerManipulator.aromatizeMolecule(mol);
        mol = new AtomContainer(mol);
        mol.setID(id);

        if (argumentHandler.isImage()) {
            sdg.setMolecule(mol, false);
            sdg.generateCoordinates();
        }
        setAtomID(mol);
    }

    private IAtomContainer getMolFromString(String stringData, String type) throws CDKException {
        switch (type) {
        case "SMI":
            return getMolFromSmiles(stringData);
        case "SIG":
            return getMolFromSignature(stringData);
        default:
            return null;
        }
    }

    private IAtomContainer getMolFromSmiles(String smiles) throws CDKException {
        SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
        IAtomContainer atomContainer = sp.parseSmiles(smiles);
        IAtomContainer mol = new AtomContainer(atomContainer);
        configure(mol, "SMI");
        return mol;
    }

    private IAtomContainer getMolFromSignature(String signatureString) throws CDKException {
        IAtomContainer atomContainer = MoleculeSignature.fromSignatureString(signatureString,
                DefaultChemObjectBuilder.getInstance());
        IAtomContainer mol = new AtomContainer(atomContainer);
        configure(mol, "SIG");
        return mol;
    }

    public String getQueryName() {
        String filename = argumentHandler.getQueryFilepath();
        File input = new File(filename);
        return input.getName().split("\\.")[0];
    }

    public String getTargetName() {
        String filename = argumentHandler.getTargetFilepath();
        File input = new File(filename);
        return input.getName().split("\\.")[0];
    }

    /**
     * Return Query molecule
     *
     * @return
     * @throws IOException
     * @throws CDKException
     */
    public IAtomContainer getQuery() throws IOException, CDKException {
        String filenameOrData = argumentHandler.getQueryFilepath();
        String type = argumentHandler.getQueryType();
        if (isSingleFileQuery) {
            ISimpleChemObjectReader reader = getReader(type, filenameOrData);
            IChemFile chemFile = reader.read(new ChemFile());
            List<IAtomContainer> allAtomContainers = ChemFileManipulator.getAllAtomContainers(chemFile);
            IAtomContainer molecule = null;
            for (IAtomContainer frag : allAtomContainers) {
                if (molecule == null || frag.getAtomCount() > molecule.getAtomCount()) {
                    molecule = frag;
                }
            }
            configure(molecule, type);
            return molecule;
        } else {
            return getMolFromString(filenameOrData, type);
        }
    }

    /**
     * Returns Target molecule
     *
     * @return
     * @throws IOException
     * @throws CDKException
     */
    public IAtomContainer getTarget() throws IOException, CDKException {
        String filenameOrData = argumentHandler.getTargetFilepath();
        String type = argumentHandler.getTargetType();
        if (isSingleFileTarget) {
            ISimpleChemObjectReader reader = getReader(type, filenameOrData);
            IChemFile chemFile = reader.read(new ChemFile());
            List<IAtomContainer> allAtomContainers = ChemFileManipulator.getAllAtomContainers(chemFile);
            IAtomContainer molecule = null;
            for (IAtomContainer frag : allAtomContainers) {
                if (molecule == null || frag.getAtomCount() > molecule.getAtomCount()) {
                    molecule = frag;
                }
            }
            configure(molecule, type);
            return molecule;
        } else {
            return getMolFromString(filenameOrData, type);
        }
    }

    /**
     * Returns an SDF files iterator
     *
     * @return
     * @throws FileNotFoundException
     * @throws IOException
     * @throws CDKException
     */
    public List<IAtomContainer> getAllTargets() throws FileNotFoundException, IOException, CDKException {
        String type = argumentHandler.getTargetType();
        IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance();
        ISimpleChemObjectReader reader;
        boolean deducebonds = false;

        ReaderFactory readerFactory = new ReaderFactory();
        IChemFile emptyChemFile;
        IChemFile chemFile;

        String infileName = argumentHandler.getTargetFilepath();
        File inputFile = new File(infileName);

        if (!inputFile.isFile()) {
            throw new FileNotFoundException("ERROR: Input File Not Found " + infileName);
        }

        List<IAtomContainer> allAtomContainers = new ArrayList<>();
        switch (type) {
        case "SDF":
            IteratingSDFReader iteratingSDFReader = new IteratingSDFReader(new FileReader(inputFile),
                    DefaultChemObjectBuilder.getInstance());
            while (iteratingSDFReader.hasNext()) {
                IAtomContainer mol = iteratingSDFReader.next();
                String id = (String) mol.getProperty(CDKConstants.TITLE);
                mol.setID(id);
                allAtomContainers.add(mol);
            }
            iteratingSDFReader.close();
            break;
        case "SMIF":
            reader = new SMILESReader(new FileReader(inputFile));
            deducebonds = true;
            emptyChemFile = builder.newInstance(IChemFile.class);
            chemFile = reader.read(emptyChemFile);
            allAtomContainers = ChemFileManipulator.getAllAtomContainers(chemFile);
            break;
        default:
            reader = readerFactory.createReader(new FileReader(inputFile));
            emptyChemFile = builder.newInstance(IChemFile.class);
            chemFile = reader.read(emptyChemFile);
            allAtomContainers = ChemFileManipulator.getAllAtomContainers(chemFile);
            break;
        }
        if (!allAtomContainers.isEmpty()) {
            // Get Molecules
            List<IAtomContainer> atomContainerList = new ArrayList<>(allAtomContainers.size());

            CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(DefaultChemObjectBuilder.getInstance());
            for (int atomContainerNr = 0; atomContainerNr < allAtomContainers.size();) {
                IAtomContainer temp = allAtomContainers.get(atomContainerNr);
                IAtomContainer atomcontainerHFree = ExtAtomContainerManipulator.removeHydrogens(temp);
                ExtAtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(atomcontainerHFree);

                if (deducebonds) {
                    DeduceBondSystemTool dbst = new DeduceBondSystemTool();
                    atomcontainerHFree = dbst.fixAromaticBondOrders(atomcontainerHFree);
                }

                adder.addImplicitHydrogens(atomcontainerHFree);
                String index = String.valueOf((atomContainerNr + 1));
                boolean flag = ConnectivityChecker.isConnected(atomcontainerHFree);
                String title = atomcontainerHFree.getProperty(CDKConstants.TITLE) != null
                        ? (String) atomcontainerHFree.getProperty(CDKConstants.TITLE)
                        : index;
                atomcontainerHFree.setProperty(CDKConstants.TITLE, index);
                if (!flag) {
                    System.err.println(
                            "WARNING : Skipping target AtomContainer " + title + " as it is not connected.");
                    continue;
                } else {
                    if (title != null) {
                        atomcontainerHFree.setID(title);
                    }
                    argumentHandler.setTargetMolOutName(atomcontainerHFree.getID());
                }

                atomContainerList.add(atomContainerNr, atomcontainerHFree);
                atomContainerNr++;
            }
            allAtomContainers.clear();
            return atomContainerList;
        } else {
            return null;
        }
    }

    private static void setAtomID(IAtomContainer mol) {
        int index = 1;
        for (IAtom atom : mol.atoms()) {
            atom.setID(String.valueOf(index));
            index++;
        }
    }
}