Java tutorial
/* * * YAQP - Yet Another QSAR Project: * Machine Learning algorithms designed for the prediction of toxicological * features of chemical compounds become available on the Web. Yaqp is developed * under OpenTox (http://opentox.org) which is an FP7-funded EU research project. * This project was developed at the Automatic Control Lab in the Chemical Engineering * School of the National Technical University of Athens. Please read README for more * information. * * Copyright (C) 2009-2010 Pantelis Sopasakis & Charalampos Chomenides * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * Contact: * Pantelis Sopasakis * chvng@mail.ntua.gr * Address: Iroon Politechniou St. 9, Zografou, Athens Greece * tel. +30 210 7723236 */ package org.opentox.qsar.processors.trainers.classification; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Map; import org.opentox.config.ServerFolders; import org.opentox.core.exceptions.Cause; import org.opentox.ontology.components.Feature; import org.opentox.ontology.components.QSARModel; import org.opentox.ontology.components.QSARModel.ModelStatus; import org.opentox.ontology.util.AlgorithmParameter; import org.opentox.ontology.util.YaqpAlgorithms; import org.opentox.qsar.exceptions.QSARException; import org.opentox.www.rest.components.YaqpForm; import weka.classifiers.Evaluation; import weka.classifiers.bayes.NaiveBayes; import weka.core.Instances; import weka.core.converters.ArffSaver; /** * * @author Pantelis Sopasakis * @author Charalampos Chomenides */ public class NaiveBayesTrainer extends WekaClassifier { public NaiveBayesTrainer(final YaqpForm form) throws QSARException { super(form); } public NaiveBayesTrainer(final Map<String, AlgorithmParameter> parameters) throws QSARException { super(parameters); } public NaiveBayesTrainer() { super(); } public QSARModel train(Instances data) throws QSARException { // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING. final String rand = java.util.UUID.randomUUID().toString(); final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff"; final File tempFile = new File(temporaryFilePath); // SAVE THE DATA IN THE TEMPORARY FILE try { ArffSaver dataSaver = new ArffSaver(); dataSaver.setInstances(data); dataSaver.setDestination(new FileOutputStream(tempFile)); dataSaver.writeBatch(); if (!tempFile.exists()) { throw new IOException("Temporary File was not created"); } } catch (final IOException ex) {/* * The content of the dataset cannot be * written to the destination file due to * some communication issue. */ tempFile.delete(); throw new RuntimeException( "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex); } NaiveBayes classifier = new NaiveBayes(); String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath, /// Save the model in the following directory "-d", ServerFolders.models_weka + "/" + uuid }; try { Evaluation.evaluateModel(classifier, generalOptions); } catch (final Exception ex) { tempFile.delete(); throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train " + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex); } QSARModel model = new QSARModel(); model.setParams(getParameters()); model.setCode(uuid.toString()); model.setAlgorithm(YaqpAlgorithms.NAIVE_BAYES); model.setDataset(datasetUri); model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT); ArrayList<Feature> independentFeatures = new ArrayList<Feature>(); for (int i = 0; i < data.numAttributes(); i++) { Feature f = new Feature(data.attribute(i).name()); if (data.classIndex() != i) { independentFeatures.add(f); } } Feature dependentFeature = new Feature(data.classAttribute().name()); Feature predictedFeature = dependentFeature; model.setDependentFeature(dependentFeature); model.setIndependentFeatures(independentFeatures); model.setPredictionFeature(predictedFeature); tempFile.delete(); return model; } }