org.openscience.cdk.aromaticity.DaylightModel.java Source code

Java tutorial

Introduction

Here is the source code for org.openscience.cdk.aromaticity.DaylightModel.java

Source

/*
 * Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI)
 *                    John May <jwmay@users.sf.net>
 *
 * Contact: cdk-devel@lists.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version. All we ask is that proper credit is given
 * for our work, which includes - but is not limited to - adding the above
 * copyright notice to the beginning of your source code files, and to any
 * copyright notice that you may distribute with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
 */

package org.openscience.cdk.aromaticity;

import com.google.common.collect.Maps;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.ringsearch.RingSearch;

import java.util.Arrays;
import java.util.Map;

import static com.google.common.base.Preconditions.checkNotNull;

/**
 * Electron donation model closely mirroring the Daylight model for use in
 * generating SMILES. The model was interpreted from various resources and as
 * such may not match exactly. If you find an inconsistency please add a request
 * for enhancement to the patch tracker. One known limitation is that this model
 * does not currently consider unknown/pseudo atoms '*'. <p/>
 *
 * The model makes a couple of assumptions which it will not correct for.
 * Checked assumptions cause the model to throw a runtime exception. <ul>
 * <li>there should be no valence errors (unchecked)</li> <li>every atom has a
 * set implicit hydrogen count (checked)</li> <li>every bond has defined order,
 * single, double etc (checked)</li> <li>atomic number of non-pseudo atoms is
 * set (checked)</li> </ul> <p/>
 *
 * The aromaticity model in SMILES was designed to simplify canonicalisation and
 * express symmetry in a molecule. The contributed electrons can be summarised
 * as follows (refer to code for exact specification): <ul> <li>carbon,
 * nitrogen, oxygen, phosphorus, sulphur, arsenic and selenium are allow to be
 * aromatic</li> <li>atoms should be Sp2 hybridised - not actually computed</li>
 * <li>atoms adjacent to a single cyclic pi bond contribute 1 electron</li>
 * <li>neutral or negatively charged atoms with a lone pair contribute 2
 * electrons</li> <li>exocyclic pi bonds are allowed but if the exocyclic atom
 * is more electronegative it consumes an electron. As an example ketone groups
 * contribute '0' electrons.</li></ul>
 *
 * @author John May
 * @cdk.module standard
 * @cdk.githash
 */
final class DaylightModel extends ElectronDonation {

    private static final int CARBON = 6;
    private static final int NITROGEN = 7;
    private static final int OXYGEN = 8;
    private static final int PHOSPHORUS = 15;
    private static final int SULPHUR = 16;
    private static final int ARSENIC = 33;
    private static final int SELENIUM = 34;

    /** @inheritDoc */
    @Override
    int[] contribution(IAtomContainer container, RingSearch ringSearch) {

        int n = container.getAtomCount();

        // we compute values we need for all atoms and then make the decisions
        // - this avoids costly operations such as looking up connected
        // bonds on each atom at the cost of memory
        int[] degree = new int[n];
        int[] bondOrderSum = new int[n];
        int[] nCyclicPiBonds = new int[n];
        int[] exocyclicPiBond = new int[n];
        int[] electrons = new int[n];

        Arrays.fill(exocyclicPiBond, -1);

        // index atoms and set the degree to the number of implicit hydrogens
        Map<IAtom, Integer> atomIndex = Maps.newHashMapWithExpectedSize(n);
        for (int i = 0; i < n; i++) {
            IAtom a = container.getAtom(i);
            atomIndex.put(a, i);
            degree[i] = checkNotNull(a.getImplicitHydrogenCount(),
                    "Aromaticity model requires implicit hydrogen count is set.");
        }

        // for each bond we increase the degree count and check for cyclic and
        // exocyclic pi bonds. if there is a cyclic pi bond the atom is marked.
        // if there is an exocyclic pi bond we store the adjacent atom for
        // lookup later.
        for (IBond bond : container.bonds()) {
            int u = atomIndex.get(bond.getAtom(0));
            int v = atomIndex.get(bond.getAtom(1));
            degree[u]++;
            degree[v]++;

            IBond.Order order = checkNotNull(bond.getOrder(),
                    "Aromaticity model requires that bond orders must be set");

            switch (order) {
            case UNSET:
                throw new IllegalArgumentException("Aromaticity model requires that bond orders must be set");
            case DOUBLE:
                if (ringSearch.cyclic(u, v)) {
                    nCyclicPiBonds[u]++;
                    nCyclicPiBonds[v]++;
                } else {
                    exocyclicPiBond[u] = v;
                    exocyclicPiBond[v] = u;
                }
                // note - fall through
            case SINGLE:
            case TRIPLE:
            case QUADRUPLE:
                bondOrderSum[u] += order.numeric();
                bondOrderSum[v] += order.numeric();
            }
        }

        // now make a decision on how many electrons each atom contributes
        for (int i = 0; i < n; i++) {

            int element = element(container.getAtom(i));
            int charge = charge(container.getAtom(i));

            // abnormal valence, usually indicated a radical. these cause problems
            // with kekulisations
            int bondedValence = bondOrderSum[i] + container.getAtom(i).getImplicitHydrogenCount();
            if (!normal(element, charge, bondedValence)) {
                electrons[i] = -1;
            }

            // non-aromatic element, acyclic atoms, atoms with more than three
            // neighbors and atoms with more than 1 cyclic pi bond are not
            // considered
            else if (!aromaticElement(element) || !ringSearch.cyclic(i) || degree[i] > 3 || nCyclicPiBonds[i] > 1) {
                electrons[i] = -1;
            }

            // exocyclic bond contributes 0 or 1 electrons depending on
            // preset electronegativity - check the exocyclicContribution method
            else if (exocyclicPiBond[i] >= 0) {
                electrons[i] = exocyclicContribution(element, element(container.getAtom(exocyclicPiBond[i])),
                        charge, nCyclicPiBonds[i]);
            }

            // any atom (except arsenic) with one cyclic pi bond contributes a
            // single electron
            else if (nCyclicPiBonds[i] == 1) {
                electrons[i] = element == ARSENIC ? -1 : 1;
            }

            // a anion with a lone pair contributes 2 electrons - simplification
            // here is we count the number free valence electrons but also
            // check if the bonded valence is okay (i.e. not a radical)
            else if (charge <= 0 && charge > -3) {
                if (valence(element, charge) - bondOrderSum[i] >= 2)
                    electrons[i] = 2;
                else
                    electrons[i] = -1;
            }

            else {
                // cation with no double bonds - single exception?
                if (element == CARBON && charge > 0)
                    electrons[i] = 0;
                else
                    electrons[i] = -1;
            }
        }

        return electrons;
    }

    /**
     * Defines the number of electrons contributed when a pi bond is exocyclic
     * (spouting). When an atom is connected to an more electronegative atom
     * then the electrons are 'pulled' from the ring. The preset conditions are
     * as follows:
     *
     * <ul> <li>A cyclic carbon with an exocyclic pi bond to anything but carbon
     * contributes 0 electrons. If the exocyclic atom is also a carbon then 1
     * electron is contributed.</li> <li>A cyclic 4 valent nitrogen or
     * phosphorus cation with an exocyclic pi bond will always contribute 1
     * electron. A 5 valent neutral nitrogen or phosphorus with an exocyclic
     * bond to an oxygen contributes 1 electron. </li> <li>A neutral sulphur
     * connected to an oxygen contributes 2 electrons</li><li>If none of the
     * previous conditions are met the atom is not considered as being able to
     * participate in an aromatic system and -1 is returned.</li> </ul>
     *
     * @param element      the element of the cyclic atom
     * @param otherElement the element of the exocyclic atom which is connected
     *                     to the cyclic atom by a pi bond
     * @param charge       the charge of the cyclic atom
     * @param nCyclic      the number of cyclic pi bonds adjacent to cyclic
     *                     atom
     * @return number of contributed electrons
     */
    private static int exocyclicContribution(int element, int otherElement, int charge, int nCyclic) {
        switch (element) {
        case CARBON:
            return otherElement != CARBON ? 0 : 1;
        case NITROGEN:
        case PHOSPHORUS:
            if (charge == 1)
                return 1;
            else if (charge == 0 && otherElement == OXYGEN && nCyclic == 1)
                return 1;
            return -1;
        case SULPHUR:
            // quirky but try - O=C1C=CS(=O)C=C1
            return charge == 0 && otherElement == OXYGEN ? 2 : -1;
        }
        return -1;
    }

    /**
     * Is the element specified by the atomic number, allowed to be aromatic by
     * the daylight specification. Allowed elements are C, N, O, P, S, As, Se
     * and *. This model allows all except for the unknown ('*') element.
     *
     * @param element atomic number of element
     * @return the element can be aromatic
     */
    private static boolean aromaticElement(int element) {
        switch (element) {
        case CARBON:
        case NITROGEN:
        case OXYGEN:
        case PHOSPHORUS:
        case SULPHUR:
        case ARSENIC:
        case SELENIUM:
            return true;
        }
        return false;
    }

    /**
     * The element has normal valence for the specified charge.
     *
     * @param element atomic number
     * @param charge  formal charge
     * @param valence bonded electrons
     * @return acceptable for this model
     */
    private static boolean normal(int element, int charge, int valence) {
        switch (element) {
        case CARBON:
            if (charge == -1 || charge == +1)
                return valence == 3;
            return charge == 0 && valence == 4;
        case NITROGEN:
        case PHOSPHORUS:
        case ARSENIC:
            if (charge == -1)
                return valence == 2;
            if (charge == +1)
                return valence == 4;
            return charge == 0 && (valence == 3 || (valence == 5 && element == NITROGEN));
        case OXYGEN:
        case SULPHUR:
        case SELENIUM:
            if (charge == +1)
                return valence == 3;
            return charge == 0 && valence == 2;
        }
        return false;
    }

    /**
     * Lookup of the number of valence electrons for the element at a given
     * charge.
     *
     * @param element the atomic number of an element
     * @param charge  the formal charge on the atom
     * @return the valence
     * @throws UnsupportedOperationException encountered an element which the
     *                                       valence was not encoded for
     */
    private int valence(int element, int charge) {
        return valence(element - charge);
    }

    /**
     * Lookup of the number of valence electrons for elements near those which
     * this model considers aromatic. As only the {@link #aromaticElement(int)}
     * are checked we need only consider elements within a charge range.
     *
     * @param element the atomic number of an element
     * @return the valence
     * @throws UnsupportedOperationException encountered an element which the
     *                                       valence was not encoded for
     */
    private int valence(int element) {
        switch (element) {
        case 5: // boron
        case 13: // aluminium
        case 31: // gallium
            return 3;
        case CARBON:
        case 14: // silicon
        case 32: // germanium
            return 4;
        case NITROGEN:
        case PHOSPHORUS:
        case ARSENIC:
            return 5;
        case OXYGEN:
        case SULPHUR:
        case SELENIUM:
            return 6;
        case 9: // fluorine
        case 17: // chlorine
        case 35: // bromine
            return 7;
        }
        throw new UnsupportedOperationException(
                "Valence not yet handled for element with atomic number " + element);
    }

    /**
     * Get the atomic number as an non-null integer value. Although pseudo atoms
     * are not considered by this model the pseudo atoms are intercepted to have
     * non-null atomic number (defaults to 0).
     *
     * @param atom atom to get the element from
     * @return the formal charge
     */
    private int element(IAtom atom) {
        Integer element = atom.getAtomicNumber();
        if (element != null)
            return element;
        if (atom instanceof IPseudoAtom)
            return 0;
        throw new IllegalArgumentException("Aromaiticty model requires atomic numbers to be set");
    }

    /**
     * Get the formal charge as an integer value - null defaults to 0.
     *
     * @param atom the atom to get the charge of
     * @return the formal charge
     */
    private int charge(IAtom atom) {
        return atom.getFormalCharge() != null ? atom.getFormalCharge() : 0;
    }
}