Java tutorial
/* * * YAQP - Yet Another QSAR Project: * Machine Learning algorithms designed for the prediction of toxicological * features of chemical compounds become available on the Web. Yaqp is developed * under OpenTox (http://opentox.org) which is an FP7-funded EU research project. * This project was developed at the Automatic Control Lab in the Chemical Engineering * School of the National Technical University of Athens. Please read README for more * information. * * Copyright (C) 2009-2010 Pantelis Sopasakis & Charalampos Chomenides * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * * Contact: * Pantelis Sopasakis * chvng@mail.ntua.gr * Address: Iroon Politechniou St. 9, Zografou, Athens Greece * tel. +30 210 7723236 */ package org.opentox.qsar.processors.predictors; import java.util.Enumeration; import org.opentox.config.ServerFolders; import org.opentox.ontology.components.QSARModel; import org.opentox.qsar.exceptions.QSARException; import org.opentox.qsar.processors.filters.AttributeCleanup; import org.opentox.qsar.processors.filters.AttributeCleanup.ATTRIBUTE_TYPE; import weka.classifiers.Classifier; import weka.core.Attribute; import weka.core.FastVector; import weka.core.Instance; import weka.core.Instances; import weka.core.SerializationHelper; /** * * An implementation of {@link WekaPredictor } which uses the stored models on the server * to calculated the predicted values for the set of compounds provided in its input. * @author Pantelis Sopasakis * @author Charalampos Chomenides */ public final class SimplePredictor extends WekaPredictor { private String filePath = null; private SimplePredictor() { } public SimplePredictor(QSARModel model) throws QSARException { super(model); filePath = ServerFolders.models_weka + "/" + model.getCode(); } /** * Perform the prediction which is based on the serialized model file on the server. * @param data * Input data for with respect to which the predicitons are calculated * @return * A dataset containing the compounds submitted along with their predicted values. * @throws QSARException * In case the prediction (as a whole) is not feasible. If the prediction is not * feasible for a single instance, the prediction is set to <code>?</code> (unknown/undefined/missing). * If the prediction is not feasible for all instances, an exception (QSARException) is thrown. */ @Override public Instances predict(final Instances data) throws QSARException { Instances dataClone = new Instances(data); /** * IMPORTANT! * String attributes have to be removed from the dataset before * applying the prediciton */ dataClone = new AttributeCleanup(ATTRIBUTE_TYPE.string).filter(dataClone); /** * Set the class attribute of the incoming data to any arbitrary attribute * (Choose the last for instance). */ dataClone.setClass(dataClone.attribute(model.getDependentFeature().getURI())); /** * * Create the Instances that will host the predictions. This object contains * only two attributes: the compound_uri and the target feature of the model. */ Instances predictions = null; FastVector attributes = new FastVector(); final Attribute compoundAttribute = new Attribute("compound_uri", (FastVector) null); final Attribute targetAttribute = dataClone.classAttribute(); attributes.addElement(compoundAttribute); attributes.addElement(targetAttribute); predictions = new Instances("predictions", attributes, 0); predictions.setClassIndex(1); Instance predictionInstance = new Instance(2); try { final Classifier cls = (Classifier) SerializationHelper.read(filePath); for (int i = 0; i < data.numInstances(); i++) { try { String currentCompound = data.instance(i).stringValue(0); predictionInstance.setValue(compoundAttribute, currentCompound); if (targetAttribute.type() == Attribute.NUMERIC) { double clsLabel = cls.classifyInstance(dataClone.instance(i)); predictionInstance.setValue(targetAttribute, clsLabel); } else if (targetAttribute.type() == Attribute.NOMINAL) { double[] clsLable = cls.distributionForInstance(dataClone.instance(i)); int indexForNominalElement = maxInArray(clsLable).getPosition(); Enumeration nominalValues = targetAttribute.enumerateValues(); int counter = 0; String nomValue = ""; while (nominalValues.hasMoreElements()) { if (counter == indexForNominalElement) { nomValue = nominalValues.nextElement().toString(); break; } counter++; } predictionInstance.setValue(targetAttribute, nomValue); predictionInstance.setValue(targetAttribute, cls.classifyInstance(dataClone.instance(i))); } predictions.add(predictionInstance); } catch (Exception ex) { System.out.println(ex); } } } catch (Exception ex) { } return predictions; } /** * Auxiliary class used here only. An ArrayElement stands for an element of a * java array (e.g. String[]) along with its position (an integer). * * @param <E> data type for the element */ private class ArrayElement<E> { private final int position; private final E element; public ArrayElement(final int position, final E element) { this.position = position; this.element = element; } public E getElement() { return element; } public int getPosition() { return position; } } /** * Returns the element of an array where the maximum value occurs. * @param array * A double array * @return * The value and the position of the maximum. */ private ArrayElement<Double> maxInArray(double[] array) { if (array == null) throw new NullPointerException("You provided a null array - cannot proceed"); if (array.length == 0) return new ArrayElement<Double>(-1, Double.NaN); double max = array[0]; int position = 0; for (int i = 0; i < array.length; i++) { if (array[i] > max) { max = array[i]; position = i; } } return new ArrayElement<Double>(position, max); } }