Java tutorial
/** * ***************************************************************************** * Copyright C 2015, The Pistoia Alliance * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. ***************************************************************************** */ package org.helm.notation2.tools; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.helm.chemtoolkit.AbstractMolecule; import org.helm.chemtoolkit.CTKException; import org.helm.chemtoolkit.IAtomBase; import org.helm.chemtoolkit.IBondBase; import org.helm.notation2.Attachment; import org.helm.notation2.Chemistry; import org.helm.notation2.Monomer; import org.helm.notation2.MonomerFactory; import org.helm.notation2.exception.ChemistryException; import org.helm.notation2.exception.EncoderException; import org.helm.notation2.exception.MonomerException; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.Namespace; import org.jdom2.input.SAXBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * MonomerParser * * @author hecht */ public class MonomerParser { /** The Logger for this class */ private static final Logger LOG = LoggerFactory.getLogger(MonomerParser.class); public static final String MONOMER_ELEMENT = "Monomer"; public static final String MONOMER_ID_ELEMENT = "MonomerID"; public static final String MONOMER_SMILES_ELEMENT = "MonomerSmiles"; public static final String MONOMER_MOL_FILE_ELEMENT = "MonomerMolFile"; public static final String MONOMER_TYPE_ELEMENT = "MonomerType"; public static final String POLYMER_TYPE_ELEMENT = "PolymerType"; public static final String NATURAL_ANALOG_ELEMENT = "NaturalAnalog"; public static final String MONOMER_NAME_ELEMENT = "MonomerName"; public static final String ATTACHEMENTS_LIST_ELEMENT = "AttachmentList"; public static final String ATTACHEMENTS_ELEMENT = "Attachments"; public static final String ATTACHEMENT_ELEMENT = "Attachment"; public static final String ATTACHEMENT_ID_ELEMENT = "AttachmentID"; public static final String ATTACHEMENT_LABEL_ELEMENT = "AttachmentLabel"; public static final String CAP_GROUP_NAME_ELEMENT = "CapGroupName"; public static final String CAP_GROUP_SMILES_ELEMENT = "CapGroupSmiles"; private static List<String> polymerTypes = new ArrayList<String>(); static { polymerTypes = Arrays.asList(Monomer.SUPPORTED_POLYMER_TYPES); } /** * Convert ATTACHMENT element to Attachment object * * @param attachment element * @return Attachment */ public static Attachment getAttachment(Element attachment) { Namespace ns = attachment.getNamespace(); Attachment att = new Attachment(); att.setAlternateId(attachment.getChildText(ATTACHEMENT_ID_ELEMENT, ns)); att.setLabel(attachment.getChildText(ATTACHEMENT_LABEL_ELEMENT, ns)); att.setCapGroupName(attachment.getChildText(CAP_GROUP_NAME_ELEMENT, ns)); att.setCapGroupSMILES(attachment.getChildText(CAP_GROUP_SMILES_ELEMENT, ns)); return att; } /** * This method converts Attachment to ATTACHMENT XML element * * @param att -- Attachment * @return Element */ public static Element getAttachementElement(Attachment att) { Element attachment = new Element(ATTACHEMENT_ELEMENT); if (null != att.getAlternateId() && att.getAlternateId().length() > 0) { Element e = new Element(ATTACHEMENT_ID_ELEMENT); e.setText(att.getAlternateId()); attachment.getChildren().add(e); } if (null != att.getLabel() && att.getLabel().length() > 0) { Element e = new Element(ATTACHEMENT_LABEL_ELEMENT); e.setText(att.getLabel()); attachment.getChildren().add(e); } if (null != att.getCapGroupName() && att.getCapGroupName().length() > 0) { Element e = new Element(CAP_GROUP_NAME_ELEMENT); e.setText(att.getCapGroupName()); attachment.getChildren().add(e); } if (null != att.getCapGroupSMILES() && att.getCapGroupSMILES().length() > 0) { Element e = new Element(CAP_GROUP_SMILES_ELEMENT); e.setText(att.getCapGroupSMILES()); attachment.getChildren().add(e); } return attachment; } /** * This method validates Attachment by the following rules<br> <li>Attachment * must have unique ID<br> <li>cap group SMILES must be valid <br> <li>cap * group SMILES must contain one R group<br> <li>R group in SMILES must match * R group label<br> * * @param attachment * @return true or false * @throws org.helm.notation2.exception.MonomerException * @throws java.io.IOException * @throws ChemistryException */ public static boolean validateAttachement(Attachment attachment) throws MonomerException, IOException, ChemistryException { String alternateId = attachment.getAlternateId(); if (null == alternateId) { throw new MonomerException("Attachment must have unique ID"); } String smiles = attachment.getCapGroupSMILES(); if (null != smiles) { if (!Chemistry.getInstance().getManipulator().validateSMILES(smiles)) { throw new MonomerException("Attachment cap group SMILES is invalid"); } List<String> labels = getAttachmentLabels(smiles); if (null == labels || labels.size() != 1) { throw new MonomerException("Attachment must have one R group in SMILES"); } if (!(labels.get(0).equals(attachment.getLabel()))) { throw new MonomerException("R group in monomer SMILES and R group label must match"); } } return true; } /** * Convert monomer element to Monomer object * * @param monomer element * @return Monomer */ public static Monomer getMonomer(Element monomer) throws MonomerException { Monomer m = new Monomer(); Namespace ns = monomer.getNamespace(); m.setAlternateId(monomer.getChildText(MONOMER_ID_ELEMENT, ns)); m.setCanSMILES(monomer.getChildText(MONOMER_SMILES_ELEMENT, ns)); String encodedMolfile = monomer.getChildText(MONOMER_MOL_FILE_ELEMENT, ns); String molfile = null; try { molfile = MolfileEncoder.decode(encodedMolfile); } catch (EncoderException ex) { throw new MonomerException("Invalid monomer molfile"); } m.setMolfile(molfile); m.setMonomerType(monomer.getChildText(MONOMER_TYPE_ELEMENT, ns)); m.setPolymerType(monomer.getChildText(POLYMER_TYPE_ELEMENT, ns)); m.setNaturalAnalog(monomer.getChildText(NATURAL_ANALOG_ELEMENT, ns)); m.setName(monomer.getChildText(MONOMER_NAME_ELEMENT, ns)); Element attachmentElement = monomer.getChild(ATTACHEMENTS_ELEMENT, ns); if (null != attachmentElement) { List attachments = attachmentElement.getChildren(ATTACHEMENT_ELEMENT, ns); List<Attachment> l = new ArrayList<Attachment>(); Iterator i = attachments.iterator(); while (i.hasNext()) { Element attachment = (Element) i.next(); Attachment att = getAttachment(attachment); l.add(att); } m.setAttachmentList(l); } return m; } /** * This method converts Monomer to MONOMER XML element * * @param monomer * @return Element */ public static Element getMonomerElement(Monomer monomer) throws MonomerException { Element element = new Element(MONOMER_ELEMENT); if (null != monomer.getAlternateId()) { Element e = new Element(MONOMER_ID_ELEMENT); e.setText(monomer.getAlternateId()); element.getChildren().add(e); } if (null != monomer.getCanSMILES()) { Element e = new Element(MONOMER_SMILES_ELEMENT); e.setText(monomer.getCanSMILES()); element.getChildren().add(e); } if (null != monomer.getMolfile()) { Element e = new Element(MONOMER_MOL_FILE_ELEMENT); String encodedMolfile = null; try { encodedMolfile = MolfileEncoder.encode(monomer.getMolfile()); } catch (EncoderException ex) { throw new MonomerException("Invalid monomer molfile"); } // CDATA cdata = new CDATA(monomer.getMolfile()); // e.setContent(cdata); e.setText(encodedMolfile); element.getChildren().add(e); } if (null != monomer.getMonomerType()) { Element e = new Element(MONOMER_TYPE_ELEMENT); e.setText(monomer.getMonomerType()); element.getChildren().add(e); } if (null != monomer.getPolymerType()) { Element e = new Element(POLYMER_TYPE_ELEMENT); e.setText(monomer.getPolymerType()); element.getChildren().add(e); } if (null != monomer.getNaturalAnalog()) { Element e = new Element(NATURAL_ANALOG_ELEMENT); e.setText(monomer.getNaturalAnalog()); element.getChildren().add(e); } if (null != monomer.getName()) { Element e = new Element(MONOMER_NAME_ELEMENT); e.setText(monomer.getName()); element.getChildren().add(e); } List<Attachment> l = monomer.getAttachmentList(); if (null != l && l.size() > 0) { Element attachments = new Element(ATTACHEMENTS_ELEMENT); for (int i = 0; i < l.size(); i++) { Attachment att = l.get(i); Element attachment = getAttachementElement(att); attachments.getChildren().add(attachment); } element.getChildren().add(attachments); } return element; } public static List<Monomer> getMonomerList(String monomerXMLString) throws JDOMException, IOException, MonomerException, CTKException, ChemistryException { List<Monomer> l = new ArrayList<Monomer>(); if (null != monomerXMLString && monomerXMLString.length() > 0) { SAXBuilder builder = new SAXBuilder(); ByteArrayInputStream bais = new ByteArrayInputStream(monomerXMLString.getBytes()); Document doc = builder.build(bais); Element root = doc.getRootElement(); List monomers = root.getChildren(); Iterator it = monomers.iterator(); while (it.hasNext()) { Element monomer = (Element) it.next(); Monomer m = getMonomer(monomer); if (MonomerParser.validateMonomer(m)) { l.add(m); } } } return l; } public static Monomer getMonomer(String monomerXMLString) throws JDOMException, IOException, MonomerException { Monomer m = null; if (monomerXMLString != null && monomerXMLString.length() > 0) { SAXBuilder builder = new SAXBuilder(); ByteArrayInputStream bais = new ByteArrayInputStream(monomerXMLString.getBytes()); Document doc = builder.build(bais); Element root = doc.getRootElement(); m = getMonomer(root); } return m; } /** * This methods checks the validity of the monomer based on the following * rules<br> <li>monomer cannot be null<br> <li>polymer type cannot be null * and must be one of the defined polymer type<br> <li>monomer type cannot be * null and must be one of the defined monomer type for a given polymer * type<br> <li>Monomer ID cannot be null<br> <li>structure cannot be null for * non-chemical type monomer<br> <li>structure SMILES must be valid<br> <li> * attachment labels on monomer must be unique<br> <li>Attachment number on * SMILES must match attachment List size<br> <li>Each attachment in * attachment list must be valid (call validateAttachment())<br> <li> * Attachment labels on monomer must match atachment label on attachment * list<br> <li>For non-chemical type monomers, modified monomer (ID length * greater than 1) must have natural analog<br> <li>All monomers must have at * least one attachment * * @param monomer * @return true or false * @throws org.helm.notation2.exception.MonomerException * @throws java.io.IOException * @throws CTKException * @throws ChemistryException */ public static boolean validateMonomer(Monomer monomer) throws MonomerException, IOException, CTKException, ChemistryException { if (null == monomer) { throw new MonomerException("Monomer is null"); } else { String polymerType = monomer.getPolymerType(); if (null == polymerType) { throw new MonomerException("Monomer has no polymer type defined"); } else if (!polymerTypes.contains(polymerType)) { throw new MonomerException("Unknown polymer type '" + polymerType + "'"); } String monomerType = monomer.getMonomerType(); if (null == monomerType) { throw new MonomerException("Monomer has no monomer type defined"); } else { if (polymerType.equals(Monomer.CHEMICAL_POLYMER_TYPE)) { if (!monomerType.equals(Monomer.UNDEFINED_MOMONER_TYPE)) { throw new MonomerException("Valid monomer type for chemical structures can only be '" + Monomer.UNDEFINED_MOMONER_TYPE + "'"); } } else { if (!(monomerType.equals(Monomer.BACKBONE_MOMONER_TYPE) || monomerType.equals(Monomer.BRANCH_MOMONER_TYPE))) { throw new MonomerException("Valid monomer type for simple polymer can only be '" + Monomer.BACKBONE_MOMONER_TYPE + "' or '" + Monomer.BRANCH_MOMONER_TYPE + "'"); } } } String alternateId = monomer.getAlternateId(); if (null == alternateId || alternateId.length() == 0) { throw new MonomerException("Monomer has no monomerID defined"); } String smiles = monomer.getCanSMILES(); String molfile = monomer.getMolfile(); List<Attachment> attachments = monomer.getAttachmentList(); if (!polymerType.equals(Monomer.CHEMICAL_POLYMER_TYPE)) { if (null == smiles || null == molfile || null == attachments || attachments.size() == 0) { throw new MonomerException("Monomers for specific polymer type must have structure info"); } } String errorNote = alternateId + " (" + polymerType + ")"; if (null != smiles && smiles.length() > 0) { boolean validSmiles = Chemistry.getInstance().getManipulator().validateSMILES(smiles); if (!validSmiles) { throw new MonomerException("Monomer SMILES must be valid: " + errorNote); } List<String> attachmentLabels = getAttachmentLabels(smiles); boolean unique = areAttachmentLabelsUnique(attachmentLabels); if (!unique) { throw new MonomerException("Attachment labels on monomer must be unique: " + errorNote); } if (attachmentLabels.size() != attachments.size()) { throw new MonomerException( "Attachment label number on monomer must match attachment number: " + errorNote); } for (int i = 0; i < attachments.size(); i++) { Attachment att = attachments.get(i); validateAttachement(att); } for (int i = 0; i < attachmentLabels.size(); i++) { String label = attachmentLabels.get(i); boolean found = false; for (int j = 0; j < attachments.size(); j++) { Attachment att = attachments.get(j); if (att.getAlternateId().startsWith(label)) { found = true; break; } } if (!found) { throw new MonomerException( "Attachment label in SMILES is not found in attachment list: " + errorNote); } } } if (monomer.getAlternateId().length() > 0 && !(monomer.getPolymerType().equals(Monomer.CHEMICAL_POLYMER_TYPE))) { String naturalAnalog = monomer.getNaturalAnalog(); if (null == naturalAnalog) { throw new MonomerException("Modified monomer must have natural analog defined: " + errorNote); } else { if (naturalAnalog.length() != 1) { throw new MonomerException("Natural analog must be single letter: " + errorNote); } } } if (monomer.getAttachmentList() == null || monomer.getAttachmentList().size() == 0) { throw new MonomerException("Monomer must have at least one attachment: " + errorNote); } // make sure R group can only be connected to one atom via single // achiral bond // MolBond javadoc: getType()Gets the bond type. Possible values: 1 // (single), 2 (double), 3 (triple), coordinate, conjugated and // query bond types. if (null != smiles && smiles.length() > 0) { AbstractMolecule molecule = Chemistry.getInstance().getManipulator().getMolecule(smiles, null); List<String> attachmentLabels = getAttachmentLabels(smiles); for (int i = 0; i < attachmentLabels.size(); i++) { String rgroupId = attachmentLabels.get(i).substring(1); IAtomBase atom = null; atom = molecule.getRGroupAtom(Integer.parseInt(rgroupId), true); if (atom.getIBondCount() != 1) { throw new MonomerException( "R group can only connect with one atom in monomer: " + errorNote); } else { IBondBase bond = atom.getIBond(0); if (bond.getType() != 1) throw new MonomerException( "R group can only connect with another atom via single bond in monomer: " + errorNote); } } } } return true; } /** * This methods return the list of R groups in the extended SMILES string * * @param extendedSmiles * @return string list */ private static List<String> getAttachmentLabels(String extendedSmiles) { List<String> labels = new ArrayList<String>(); int start = 0; int rPos = extendedSmiles.indexOf("R"); StringBuffer sb = new StringBuffer(); while (rPos > 0) { rPos++; String nextLetter = extendedSmiles.substring(rPos, rPos + 1); if (nextLetter.matches("[0-9]")) { sb.append(nextLetter); } else { labels.add("R" + sb.toString()); sb = new StringBuffer(); start = rPos + 1; rPos = extendedSmiles.indexOf("R", start); } } return labels; } /** * This mehtod checks if strings in a list are unique * * @param labels * @return true or fals */ private static boolean areAttachmentLabelsUnique(List<String> labels) { Map<String, String> map = new HashMap<String, String>(); for (int i = 0; i < labels.size(); i++) { map.put(labels.get(i), labels.get(i)); } if (labels.size() == map.size()) { return true; } else { return false; } } /** * This method checks if attachment label is in the format of R#, where # is a * number * * @param label * @throws org.helm.notation2.exception.MonomerException */ public static void validateAttachmentLabel(String label) throws MonomerException { if (label.equalsIgnoreCase(Attachment.PAIR_ATTACHMENT)) { return; } char[] chars = label.toCharArray(); if (!(String.valueOf(chars[0])).equals("R")) { throw new MonomerException("Invalid Attachment Label format"); } for (int i = 1; i < chars.length; i++) { char c = chars[i]; if (!(String.valueOf(c)).matches("[0-9]")) { throw new MonomerException("Invalid Attachment Label format"); } } } public static void fillAttachmentInfo(Attachment att) throws MonomerException, IOException, JDOMException, ChemistryException, CTKException { Map<String, Attachment> attachmentMap = MonomerFactory.getInstance().getAttachmentDB(); Attachment attach = attachmentMap.get(att.getAlternateId()); att.setLabel(attach.getLabel()); att.setCapGroupSMILES(attach.getCapGroupSMILES()); att.setCapGroupName(attach.getCapGroupName()); } }