org.opentox.qsar.processors.trainers.classification.NaiveBayesTrainer.java Source code

Java tutorial

Introduction

Here is the source code for org.opentox.qsar.processors.trainers.classification.NaiveBayesTrainer.java

Source

/*
 *
 * YAQP - Yet Another QSAR Project:
 * Machine Learning algorithms designed for the prediction of toxicological
 * features of chemical compounds become available on the Web. Yaqp is developed
 * under OpenTox (http://opentox.org) which is an FP7-funded EU research project.
 * This project was developed at the Automatic Control Lab in the Chemical Engineering
 * School of the National Technical University of Athens. Please read README for more
 * information.
 *
 * Copyright (C) 2009-2010 Pantelis Sopasakis & Charalampos Chomenides
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contact:
 * Pantelis Sopasakis
 * chvng@mail.ntua.gr
 * Address: Iroon Politechniou St. 9, Zografou, Athens Greece
 * tel. +30 210 7723236
 */
package org.opentox.qsar.processors.trainers.classification;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import org.opentox.config.ServerFolders;
import org.opentox.core.exceptions.Cause;
import org.opentox.ontology.components.Feature;
import org.opentox.ontology.components.QSARModel;
import org.opentox.ontology.components.QSARModel.ModelStatus;
import org.opentox.ontology.util.AlgorithmParameter;
import org.opentox.ontology.util.YaqpAlgorithms;
import org.opentox.qsar.exceptions.QSARException;
import org.opentox.www.rest.components.YaqpForm;
import weka.classifiers.Evaluation;
import weka.classifiers.bayes.NaiveBayes;
import weka.core.Instances;
import weka.core.converters.ArffSaver;

/**
 *
 * @author Pantelis Sopasakis
 * @author Charalampos Chomenides
 */
public class NaiveBayesTrainer extends WekaClassifier {

    public NaiveBayesTrainer(final YaqpForm form) throws QSARException {
        super(form);
    }

    public NaiveBayesTrainer(final Map<String, AlgorithmParameter> parameters) throws QSARException {
        super(parameters);
    }

    public NaiveBayesTrainer() {
        super();
    }

    public QSARModel train(Instances data) throws QSARException {

        // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
        // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
        final String rand = java.util.UUID.randomUUID().toString();
        final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
        final File tempFile = new File(temporaryFilePath);

        // SAVE THE DATA IN THE TEMPORARY FILE
        try {
            ArffSaver dataSaver = new ArffSaver();
            dataSaver.setInstances(data);
            dataSaver.setDestination(new FileOutputStream(tempFile));
            dataSaver.writeBatch();
            if (!tempFile.exists()) {
                throw new IOException("Temporary File was not created");
            }
        } catch (final IOException ex) {/*
                                        * The content of the dataset cannot be
                                        * written to the destination file due to
                                        * some communication issue.
                                        */
            tempFile.delete();
            throw new RuntimeException(
                    "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
        }

        NaiveBayes classifier = new NaiveBayes();

        String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
                /// Save the model in the following directory
                "-d", ServerFolders.models_weka + "/" + uuid };

        try {
            Evaluation.evaluateModel(classifier, generalOptions);
        } catch (final Exception ex) {
            tempFile.delete();
            throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                    + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
        }

        QSARModel model = new QSARModel();

        model.setParams(getParameters());
        model.setCode(uuid.toString());
        model.setAlgorithm(YaqpAlgorithms.NAIVE_BAYES);
        model.setDataset(datasetUri);
        model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT);

        ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
        for (int i = 0; i < data.numAttributes(); i++) {
            Feature f = new Feature(data.attribute(i).name());
            if (data.classIndex() != i) {
                independentFeatures.add(f);
            }
        }

        Feature dependentFeature = new Feature(data.classAttribute().name());
        Feature predictedFeature = dependentFeature;
        model.setDependentFeature(dependentFeature);
        model.setIndependentFeatures(independentFeatures);
        model.setPredictionFeature(predictedFeature);
        tempFile.delete();
        return model;
    }
}