oxis.yologp.YOLogPDescriptor.java Source code

Java tutorial


Here is the source code for oxis.yologp.YOLogPDescriptor.java


 * Copyright (C) 2014 EMBL - European Bioinformatics Institute
 * All rights reserved. This file is part of the YOLogP project.
 * author: oXis (Benjamin Roques)
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the Creative Commons Attribution-NonCommercial-ShareAlike 4.0
 * International License, please visit
 * http://creativecommons.org/licenses/by-nc-sa/4.0/. All we ask is that proper
 * credit is given for my work, which includes - but is not limited to - adding
 * the above copyright notice to the beginning of your source code files, and to
 * any copyright notice that you may distribute with programs based on this
 * work.
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
package oxis.yologp;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
import weka.classifiers.trees.RandomForest;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;

public class YOLogPDescriptor {

    private int poolSize = Runtime.getRuntime().availableProcessors() * 2;
    private int timeout = 80;

    private String path = "./";

    private List<DrugStruct> listDrug = new ArrayList<>();
    private List<DrugStruct> listDrugComputed = new ArrayList<>();

    private RandomForest model;

     * Add instantiated DrugStruct to the list.
     * @param drugs list of DrugStruct
    private YOLogPDescriptor(List<DrugStruct> drugs) {


     * Instantiate a list of DrugStruct.
     * @param ListContainer List<IAtomContainer> list of IAtomContainer
    public static YOLogPDescriptor fromAtomContainer(List<IAtomContainer> ListContainer) throws CDKException {
        List<DrugStruct> setupContainers = new ArrayList<>();
        for (IAtomContainer IAdrug : ListContainer) {
            try {
                setupContainers.add(new DrugStruct(IAdrug));
            } catch (CDKException ex) {
                throw new CDKException("Cannot convert IAtomContainer to DrugStruct", ex);
        return new YOLogPDescriptor(setupContainers);

     * Instantiate a DrugStruct.
     * @param container IAtomContainer
    public static YOLogPDescriptor fromAtomContainer(IAtomContainer container) throws CDKException {
        try {
            return new YOLogPDescriptor(Collections.singletonList(new DrugStruct(container)));
        } catch (CDKException ex) {
            throw new CDKException("Cannot convert IAtomContainer to DrugStruct", ex);

     * Instantiate a list of DrugStruct.
     * @param listSmiles List<String>
    public static YOLogPDescriptor fromSmiles(List<String> listSmiles) throws CDKException {
        List<DrugStruct> setupContainers = new ArrayList<>();
        for (String smiles : listSmiles) {
            try {
                setupContainers.add(new DrugStruct(smiles));
            } catch (CDKException ex) {
                throw new CDKException("Cannot instanciate IAtomContainer with " + smiles, ex);
        return new YOLogPDescriptor(setupContainers);

     * Instantiate a DrugStruct.
     * @param smiles String
    public static YOLogPDescriptor fromSmiles(String smiles) throws CDKException {
        try {
            return new YOLogPDescriptor(Collections.singletonList(new DrugStruct(smiles)));
        } catch (CDKException ex) {
            throw new CDKException("Cannot instanciate IAtomContainer with " + smiles, ex);

     * Add instantiated DrugStruct to the list.
     * @param listDrugStruct list of DrugStruct
    public static YOLogPDescriptor fromDrugStruct(List<DrugStruct> listDrugStruct) {
        List<DrugStruct> setupContainers = new ArrayList<>();
        for (DrugStruct drug : listDrugStruct) {
        return new YOLogPDescriptor(setupContainers);

     * Add instantiated DrugStruct to the list.
     * @param drugStruct list of DrugStruct
    public static YOLogPDescriptor fromDrugStruct(DrugStruct drugStruct) {
        return new YOLogPDescriptor(Collections.singletonList(drugStruct));

     * Computed all properties and then predict the LogP.
     * @return List<DrugStruct> list of DrugStruct
    public List<DrugStruct> calculate() {

        try {
            model = (RandomForest) weka.core.SerializationHelper.read(getClass().getResourceAsStream("/rf.model"));
        } catch (Exception ex) {
            Logger.getLogger(Main.class.getName()).log(Level.SEVERE, "Cannot load default model", ex);


        try {
        } catch (Exception ex) {
            Logger.getLogger(YOLogPDescriptor.class.getName()).log(Level.SEVERE, "Prediction error", ex);

        return listDrug;

    public List<DrugStruct> calculate(String name) {

        try {
            model = (RandomForest) weka.core.SerializationHelper.read(path + name);
        } catch (Exception ex) {
            Logger.getLogger(Main.class.getName()).log(Level.SEVERE, "Could not load model " + name, ex);


        try {
        } catch (Exception ex) {
            Logger.getLogger(YOLogPDescriptor.class.getName()).log(Level.SEVERE, "Prediction error", ex);

        return listDrug;

     * Computed all properties.
    private void compute() {

        List<Future<DrugStruct>> worker = new ArrayList<>();
        ExecutorService pool = Executors.newFixedThreadPool(poolSize);

        for (DrugStruct ds : listDrug) {

        int i = 0;
        DrugStruct tmp;
        for (Future<DrugStruct> result : worker) {
            try {
                tmp = result.get(timeout, TimeUnit.SECONDS);
            } catch (InterruptedException | ExecutionException | TimeoutException ex) {
                        "One worker killed, it took too much time to compute. Timeout = " + timeout + "s", ex);

        //Free mem
        listDrug = listDrugComputed;
        listDrugComputed = null;

     * Predict the LogP.
    private void predict() throws Exception {

        Instances instances = buildDataset();

        Map<Object, Object> properties;
        for (DrugStruct drugStruct : listDrug) {

            if (drugStruct.drug.getProperty("flag")) {
                properties = drugStruct.drug.getProperties();
                Instance instance = new DenseInstance(instances.numAttributes()); //28 + 1024
                for (Object propKey : properties.keySet()) {
                    if (!(propKey.equals("hash") || propKey.equals("flag") || propKey.equals("smiles"))) {
                        try {
                        } catch (NullPointerException ex) {
                                    "Property not used: {0}", propKey.toString());

                double predicted = model.classifyInstance(instance);
                predicted = Math.round(predicted * 100) / 100.0d;
                drugStruct.drug.setProperty("predicted", predicted);

     * Train a model, erase the other one
     * @param String name of the model to save
    public void train(String name) throws Exception {


        Instances instances = buildDataset();

        model = new RandomForest();

        Map<Object, Object> properties;
        for (DrugStruct drugStruct : listDrug) {

            if (drugStruct.drug.getProperty("flag")) {
                properties = drugStruct.drug.getProperties();
                Instance instance = new DenseInstance(instances.numAttributes()); //28 + 1024
                for (Object propKey : properties.keySet()) {
                    if (!(propKey.equals("hash") || propKey.equals("flag") || propKey.equals("smiles"))) {
                        try {
                        } catch (NullPointerException ex) {
                                    "Property not used: {0}", propKey.toString());

        weka.core.SerializationHelper.write(path + name, model);

     * Build the Dataset structure
     * @return Instances
    private Instances buildDataset() {

        ArrayList<Attribute> attInfo = new ArrayList<>();

        attInfo.add(new Attribute("AromaticBondsCountDescriptor"));
        attInfo.add(new Attribute("AromaticAtomsCountDescriptor"));
        attInfo.add(new Attribute("BondPartialSigmaChargeMax"));
        attInfo.add(new Attribute("AutocorrelationDescriptorMass"));
        attInfo.add(new Attribute("EffectiveAtomPolarizabilityMea"));
        attInfo.add(new Attribute("MDEDescriptor5"));
        attInfo.add(new Attribute("BasicGroupCountDescriptor"));
        attInfo.add(new Attribute("AutocorrelationDescriptorCharge2"));
        attInfo.add(new Attribute("APolDescriptor"));
        attInfo.add(new Attribute("BCUTDescriptor3"));
        attInfo.add(new Attribute("AutocorrelationDescriptorPolarizability2"));
        attInfo.add(new Attribute("HBondDonorCountDescriptor"));
        attInfo.add(new Attribute("PartialTChargePEOEMin"));
        attInfo.add(new Attribute("PartialTChargePEOEMed"));
        attInfo.add(new Attribute("PartialSigmaChargeMed"));
        attInfo.add(new Attribute("BondPartialSigmaChargeMea"));
        attInfo.add(new Attribute("PartialSigmaChargeMin"));
        attInfo.add(new Attribute("WeightedPathDescriptor5"));
        attInfo.add(new Attribute("TPSADescriptor"));
        attInfo.add(new Attribute("AutocorrelationDescriptorPolarizability"));
        attInfo.add(new Attribute("CarbonTypesDescriptor4"));
        attInfo.add(new Attribute("ALOGPDescriptor2"));
        attInfo.add(new Attribute("HBondAcceptorCountDescriptor"));
        attInfo.add(new Attribute("MannholdLogPDescriptor"));
        attInfo.add(new Attribute("FractionalPSADescriptor"));
        attInfo.add(new Attribute("ALOGPDescriptor3"));
        attInfo.add(new Attribute("ALOGPDescriptor"));
        attInfo.add(new Attribute("XLogPDescriptor"));
        for (int i = 0; i < 1024; i++) {
            attInfo.add(new Attribute("X" + i));
        attInfo.add(new Attribute("logp"));

        Instances instances = new Instances("logp", attInfo, 0);

        return instances;

     * Print all the compound into an XML file.
     * @param name String, name of the file
    public void printXML(String name) throws Exception {

        File file = new File(path + name);
        try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(file, true)))) {
            for (DrugStruct drug : listDrug) {
                if (drug.drug.getProperty("flag")) {

     * Print all predicted values.
    public void printPredicted() {
        for (DrugStruct drug : listDrug) {

     * Get all predicted values.
     * @return List<Double>
    public List<Double> getPredicted() {
        List<Double> listPredicted = new ArrayList<>();

        for (DrugStruct drug : listDrug) {
            listPredicted.add((double) drug.drug.getProperty("logp"));

        return listPredicted;

     * Print all the compound into an CSV file.
     * @param name String, name of the file
    public void printCSV(String name) throws Exception {

        File file = new File(path + name);
        try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(file, true)))) {
            for (DrugStruct drug : listDrug) {
                if (drug.drug.getProperty("flag")) {

     * Set the size of the pool. Number of thread.
     * @param poolSize integer
    public void setPoolSize(int poolSize) {
        this.poolSize = poolSize;

     * Get the size of the pool.
     * @return integer
    public int getPoolSize() {
        return poolSize;

     * Set the timeout value. When a thread work more than timeout, it is
     * killed.
     * @param timeout integer
    public void setTimeout(int timeout) {
        this.timeout = timeout;

     * Get the timeout.
     * @return integer
    public int getTimeout() {
        return timeout;

     * Set the default path. For XML and CVS writing
     * @param path String
    public void setPath(String path) {
        this.path = path;

     * Get the default path.
     * @return String
    public String getPath() {
        return path;