Example usage for weka.core Instances classAttribute

List of usage examples for weka.core Instances classAttribute

Introduction

In this page you can find the example usage for weka.core Instances classAttribute.

Prototype


publicAttribute classAttribute() 

Source Link

Document

Returns the class attribute.

Usage

From source file:org.openml.webapplication.fantail.dc.statistical.AttributeEntropy.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances data) {
    int nominal_count = 0;
    for (int i = 0; i < data.numAttributes(); ++i) {
        if (data.attribute(i).isNominal() && data.classIndex() != i) {
            nominal_count += 1;/* www  . j  ava  2 s.  com*/
        }
    }

    Map<String, Double> qualities = new HashMap<String, Double>();
    if (data.classAttribute().isNominal()) {
        double classEntropy = DCUntils.computeClassEntropy(data);
        double[] attEntropy = DCUntils.computeAttributeEntropy(data);
        double[] mutualInformation = DCUntils.computeMutualInformation(data);

        double meanMI = StatUtils.mean(mutualInformation);
        double meanAttEntropy = nominal_count > 0 ? StatUtils.mean(attEntropy) : -1;

        double noiseSignalRatio;
        double ena = 0;

        if (meanMI <= 0) {
            ena = -1;
            noiseSignalRatio = -1;
        } else {
            ena = classEntropy / meanMI;
            noiseSignalRatio = (meanAttEntropy - meanMI) / meanMI;
        }

        qualities.put(ids[0], classEntropy);
        qualities.put(ids[1], meanAttEntropy);
        qualities.put(ids[2], meanMI);
        qualities.put(ids[3], ena);
        qualities.put(ids[4], noiseSignalRatio);

        qualities.put(ids[5], StatUtils.min(attEntropy));
        qualities.put(ids[6], StatUtils.min(mutualInformation));

        qualities.put(ids[7], StatUtils.max(attEntropy));
        qualities.put(ids[8], StatUtils.max(mutualInformation));

        qualities.put(ids[9], StatUtils.percentile(attEntropy, 25));
        qualities.put(ids[10], StatUtils.percentile(mutualInformation, 25));

        qualities.put(ids[11], StatUtils.percentile(attEntropy, 50));
        qualities.put(ids[12], StatUtils.percentile(mutualInformation, 50));

        qualities.put(ids[13], StatUtils.percentile(attEntropy, 75));
        qualities.put(ids[14], StatUtils.percentile(mutualInformation, 75));
    } else { // numeric target
        for (int i = 0; i < ids.length; ++i) {
            qualities.put(ids[i], -1.0);
        }
    }
    return qualities;
}

From source file:org.openml.webapplication.fantail.dc.statistical.DefaultAccuracy.java

License:Open Source License

@Override
public Map<String, Double> characterize(Instances instances) {

    Attribute class_attrib = instances.classAttribute();
    final double mode = instances.meanOrMode(class_attrib);
    final int count = instances.numInstances();
    int nonerrors = 0;

    for (int i = 0; i < count; i++) {
        if (mode == instances.instance(i).value(class_attrib)) {
            nonerrors++;/*from  w  ww  .  j  a  v  a2s .  c  o  m*/
        }
    }

    Map<String, Double> qualities = new HashMap<String, Double>();
    qualities.put(ids[0], ((double) nonerrors / count));
    return qualities;
}

From source file:org.openml.webapplication.features.ExtractFeatures.java

License:Open Source License

public static List<Feature> getFeatures(Instances dataset, String defaultClass) {
    if (defaultClass != null) {
        dataset.setClass(dataset.attribute(defaultClass));
    } else {/* w  w  w  .ja v a 2s  .  c  o  m*/
        dataset.setClassIndex(dataset.numAttributes() - 1);
    }

    final ArrayList<Feature> resultFeatures = new ArrayList<Feature>();

    for (int i = 0; i < dataset.numAttributes(); i++) {
        Attribute att = dataset.attribute(i);
        int numValues = dataset.classAttribute().isNominal() ? dataset.classAttribute().numValues() : 0;
        AttributeStatistics attributeStats = new AttributeStatistics(dataset.attribute(i), numValues);

        for (int j = 0; j < dataset.numInstances(); ++j) {
            attributeStats.addValue(dataset.get(j).value(i), dataset.get(j).classValue());
        }

        String data_type = null;

        Integer numberOfDistinctValues = null;
        Integer numberOfUniqueValues = null;
        Integer numberOfMissingValues = null;
        Integer numberOfIntegerValues = null;
        Integer numberOfRealValues = null;
        Integer numberOfNominalValues = null;
        Integer numberOfValues = null;

        Double maximumValue = null;
        Double minimumValue = null;
        Double meanValue = null;
        Double standardDeviation = null;

        AttributeStats as = dataset.attributeStats(i);

        numberOfDistinctValues = as.distinctCount;
        numberOfUniqueValues = as.uniqueCount;
        numberOfMissingValues = as.missingCount;
        numberOfIntegerValues = as.intCount;
        numberOfRealValues = as.realCount;
        numberOfMissingValues = as.missingCount;

        if (att.isNominal()) {
            numberOfNominalValues = att.numValues();
        }
        numberOfValues = attributeStats.getTotalObservations();

        if (att.isNumeric()) {
            maximumValue = attributeStats.getMaximum();
            minimumValue = attributeStats.getMinimum();
            meanValue = attributeStats.getMean();
            standardDeviation = 0.0;
            try {
                standardDeviation = attributeStats.getStandardDeviation();
            } catch (Exception e) {
                Conversion.log("WARNING", "StdDev", "Could not compute standard deviation of feature "
                        + att.name() + ": " + e.getMessage());
            }
        }

        if (att.type() == 0) {
            data_type = "numeric";
        } else if (att.type() == 1) {
            data_type = "nominal";
        } else if (att.type() == 2) {
            data_type = "string";
        } else {
            data_type = "unknown";
        }

        resultFeatures.add(new Feature(att.index(), att.name(), data_type, att.index() == dataset.classIndex(),
                numberOfDistinctValues, numberOfUniqueValues, numberOfMissingValues, numberOfIntegerValues,
                numberOfRealValues, numberOfNominalValues, numberOfValues, maximumValue, minimumValue,
                meanValue, standardDeviation, attributeStats.getClassDistribution()));
    }
    return resultFeatures;
}

From source file:org.opentox.qsar.processors.predictors.SimplePredictor.java

License:Open Source License

/**
 * Perform the prediction which is based on the serialized model file on the server.
 * @param data/*from   w  w  w  . j  av  a  2s.com*/
 *      Input data for with respect to which the predicitons are calculated
 * @return
 *      A dataset containing the compounds submitted along with their predicted values.
 * @throws QSARException
 *      In case the prediction (as a whole) is not feasible. If the prediction is not
 *      feasible for a single instance, the prediction is set to <code>?</code> (unknown/undefined/missing).
 *      If the prediction is not feasible for all instances, an exception (QSARException) is thrown.
 */
@Override
public Instances predict(final Instances data) throws QSARException {

    Instances dataClone = new Instances(data);
    /**
     * IMPORTANT!
     * String attributes have to be removed from the dataset before
     * applying the prediciton
     */
    dataClone = new AttributeCleanup(ATTRIBUTE_TYPE.string).filter(dataClone);

    /**
     * Set the class attribute of the incoming data to any arbitrary attribute
     * (Choose the last for instance).
     */
    dataClone.setClass(dataClone.attribute(model.getDependentFeature().getURI()));

    /**
     *
     * Create the Instances that will host the predictions. This object contains
     * only two attributes: the compound_uri and the target feature of the model.
     */
    Instances predictions = null;
    FastVector attributes = new FastVector();
    final Attribute compoundAttribute = new Attribute("compound_uri", (FastVector) null);
    final Attribute targetAttribute = dataClone.classAttribute();
    attributes.addElement(compoundAttribute);
    attributes.addElement(targetAttribute);

    predictions = new Instances("predictions", attributes, 0);
    predictions.setClassIndex(1);

    Instance predictionInstance = new Instance(2);
    try {
        final Classifier cls = (Classifier) SerializationHelper.read(filePath);

        for (int i = 0; i < data.numInstances(); i++) {
            try {
                String currentCompound = data.instance(i).stringValue(0);
                predictionInstance.setValue(compoundAttribute, currentCompound);

                if (targetAttribute.type() == Attribute.NUMERIC) {
                    double clsLabel = cls.classifyInstance(dataClone.instance(i));
                    predictionInstance.setValue(targetAttribute, clsLabel);
                } else if (targetAttribute.type() == Attribute.NOMINAL) {
                    double[] clsLable = cls.distributionForInstance(dataClone.instance(i));
                    int indexForNominalElement = maxInArray(clsLable).getPosition();
                    Enumeration nominalValues = targetAttribute.enumerateValues();
                    int counter = 0;
                    String nomValue = "";
                    while (nominalValues.hasMoreElements()) {
                        if (counter == indexForNominalElement) {
                            nomValue = nominalValues.nextElement().toString();
                            break;
                        }
                        counter++;
                    }
                    predictionInstance.setValue(targetAttribute, nomValue);

                    predictionInstance.setValue(targetAttribute, cls.classifyInstance(dataClone.instance(i)));
                }

                predictions.add(predictionInstance);
            } catch (Exception ex) {
                System.out.println(ex);
            }
        }

    } catch (Exception ex) {
    }

    return predictions;
}

From source file:org.opentox.qsar.processors.trainers.classification.NaiveBayesTrainer.java

License:Open Source License

public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {//w  w  w .java  2s .c  o  m
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
        if (!tempFile.exists()) {
            throw new IOException("Temporary File was not created");
        }
    } catch (final IOException ex) {/*
                                    * The content of the dataset cannot be
                                    * written to the destination file due to
                                    * some communication issue.
                                    */
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    NaiveBayes classifier = new NaiveBayes();

    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", ServerFolders.models_weka + "/" + uuid };

    try {
        Evaluation.evaluateModel(classifier, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
    }

    QSARModel model = new QSARModel();

    model.setParams(getParameters());
    model.setCode(uuid.toString());
    model.setAlgorithm(YaqpAlgorithms.NAIVE_BAYES);
    model.setDataset(datasetUri);
    model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT);

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;
    model.setDependentFeature(dependentFeature);
    model.setIndependentFeatures(independentFeatures);
    model.setPredictionFeature(predictedFeature);
    tempFile.delete();
    return model;
}

From source file:org.opentox.qsar.processors.trainers.classification.SVCTrainer.java

License:Open Source License

public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {/*from   ww w .j  av a 2s . c  o m*/
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
        if (!tempFile.exists()) {
            throw new IOException("Temporary File was not created");
        }
    } catch (final IOException ex) {/*
                                    * The content of the dataset cannot be
                                    * written to the destination file due to
                                    * some communication issue.
                                    */
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    // INITIALIZE THE CLASSIFIER
    SMO classifier = new SMO();
    classifier.setEpsilon(0.1);
    classifier.setToleranceParameter(tolerance);

    // CONSTRUCT A KERNEL ACCORDING TO THE POSTED PARAMETERS
    // SUPPORTED KERNELS ARE {rbf, linear, polynomial}
    Kernel svc_kernel = null;
    if (this.kernel.equalsIgnoreCase("rbf")) {
        RBFKernel rbf_kernel = new RBFKernel();
        rbf_kernel.setGamma(gamma);
        rbf_kernel.setCacheSize(cacheSize);
        svc_kernel = rbf_kernel;
    } else if (this.kernel.equalsIgnoreCase("polynomial")) {
        PolyKernel poly_kernel = new PolyKernel();
        poly_kernel.setExponent(degree);
        poly_kernel.setCacheSize(cacheSize);
        poly_kernel.setUseLowerOrder(true);
        svc_kernel = poly_kernel;
    } else if (this.kernel.equalsIgnoreCase("linear")) {
        PolyKernel linear_kernel = new PolyKernel();
        linear_kernel.setExponent((double) 1.0);
        linear_kernel.setCacheSize(cacheSize);
        linear_kernel.setUseLowerOrder(true);
        svc_kernel = linear_kernel;
    }
    classifier.setKernel(svc_kernel);

    String modelFilePath = ServerFolders.models_weka + "/" + uuid.toString();
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", modelFilePath };

    // AFTER ALL, BUILD THE CLASSIFICATION MODEL AND SAVE IT AS A SERIALIZED
    // WEKA FILE IN THE CORRESPONDING DIRECTORY.
    try {
        Evaluation.evaluateModel(classifier, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "a support vector classification model. Possible explanation : {" + ex.getMessage() + "}",
                ex);
    }

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;

    QSARModel model = new QSARModel();
    model.setCode(uuid.toString());
    model.setAlgorithm(YaqpAlgorithms.SVC);
    model.setPredictionFeature(predictedFeature);
    model.setDependentFeature(dependentFeature);
    model.setIndependentFeatures(independentFeatures);
    model.setDataset(datasetUri);
    model.setParams(getParameters());
    model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT);

    tempFile.delete();
    return model;
}

From source file:org.opentox.qsar.processors.trainers.regression.MLRTrainer.java

License:Open Source License

/**
 * Trains the MLR model given an Instances object with the training data. The prediction
 * feature (class attributre) is specified in the constructor of the class.
 * @param data The training data as <code>weka.core.Instances</code> object.
 * @return The QSARModel corresponding to the trained model.
 * @throws QSARException In case the model cannot be trained
 * <p>// w ww. ja va 2  s  .  c o m
 * <table>
 * <thead>
 * <tr>
 * <td><b>Code</b></td><td><b>Explanation</b></td>
 * </tr>
 * </thead>
 * <tbody>
 * <tr>
 * <td>XQReg1</td><td>Could not train the an model</td>
 * </tr>
 * <tr>
 * <td>XQReg2</td><td>Could not generate PMML representation for the model</td>
 * </tr>
 * <tr>
 * <td>XQReg202</td><td>The prediction feature you provided is not a valid numeric attribute of the dataset</td>
 * </tr>
 * </tbody>
 * </table>
 * </p>
 * @throws NullPointerException
 *      In case the provided training data is null.
 */
public QSARModel train(Instances data) throws QSARException {

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
    } catch (final IOException ex) {
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    LinearRegression linreg = new LinearRegression();
    String[] linRegOptions = { "-S", "1", "-C" };
    try {
        linreg.setOptions(linRegOptions);
        linreg.buildClassifier(data);
    } catch (final Exception ex) {// illegal options or could not build the classifier!
        String message = "MLR Model could not be trained";
        YaqpLogger.LOG.log(new Trace(getClass(), message + " :: " + ex));
        throw new QSARException(Cause.XQReg1, message, ex);
    }

    try {
        generatePMML(linreg, data);
    } catch (final YaqpIOException ex) {
        String message = "Could not generate PMML representation for MLR model :: " + ex;
        throw new QSARException(Cause.XQReg2, message, ex);
    }

    // PERFORM THE TRAINING
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", ServerFolders.models_weka + "/" + uuid };
    try {
        Evaluation.evaluateModel(linreg, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
    }

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;

    QSARModel model = new QSARModel(uuid.toString(), predictedFeature, dependentFeature, independentFeatures,
            YaqpAlgorithms.MLR, new User(), null, datasetUri, ModelStatus.UNDER_DEVELOPMENT);
    model.setParams(new HashMap<String, AlgorithmParameter>());

    return model;

}

From source file:org.opentox.qsar.processors.trainers.regression.MLRTrainer.java

License:Open Source License

/**
 * Generates the PMML representation of the model and stores in the hard
 * disk.//from   ww  w .  ja va  2s  .c  om
 * @param coefficients The vector of the coefficients of the MLR model.
 * @param model_id The id of the generated model.
 * TODO: build the XML using some XML editor
 */
// <editor-fold defaultstate="collapsed" desc="PMML generation routine!">
private void generatePMML(final LinearRegression wekaModel, final Instances data) throws YaqpIOException {
    final double[] coefficients = wekaModel.coefficients();
    StringBuilder pmml = new StringBuilder();
    pmml.append("<?xml version=\"1.0\" ?>");
    pmml.append(PMMLIntro);
    pmml.append("<Model ID=\"" + uuid.toString() + "\" Name=\"MLR Model\">\n");
    pmml.append("<AlgorithmID href=\"" + Configuration.BASE_URI + "/algorithm/mlr\"/>\n");
    pmml.append("<DatasetID href=\"" + datasetUri + "\"/>\n");
    pmml.append("<AlgorithmParameters />\n");
    pmml.append("<FeatureDefinitions>\n");
    for (int k = 1; k <= data.numAttributes(); k++) {
        pmml.append("<link href=\"" + data.attribute(k - 1).name() + "\"/>\n");
    }
    pmml.append("<target index=\"" + data.attribute(predictionFeature).index() + "\" name=\""
            + predictionFeature + "\"/>\n");
    pmml.append("</FeatureDefinitions>\n");
    pmml.append("<Timestamp>" + java.util.GregorianCalendar.getInstance().getTime() + "</Timestamp>\n");
    pmml.append("</Model>\n");

    pmml.append("<DataDictionary numberOfFields=\"" + data.numAttributes() + "\" >\n");
    for (int k = 0; k <= data.numAttributes() - 1; k++) {
        pmml.append("<DataField name=\"" + data.attribute(k).name()
                + "\" optype=\"continuous\" dataType=\"double\" />\n");
    }
    pmml.append("</DataDictionary>\n");
    // RegressionModel
    pmml.append("<RegressionModel modelName=\"" + uuid.toString() + "\"" + " functionName=\"regression\""
            + " modelType=\"linearRegression\"" + " algorithmName=\"linearRegression\"" + " targetFieldName=\""
            + data.classAttribute().name() + "\"" + ">\n");
    // RegressionModel::MiningSchema
    pmml.append("<MiningSchema>\n");
    for (int k = 0; k <= data.numAttributes() - 1; k++) {
        if (k != data.classIndex()) {
            pmml.append("<MiningField name=\"" + data.attribute(k).name() + "\" />\n");
        }
    }
    pmml.append("<MiningField name=\"" + data.attribute(data.classIndex()).name() + "\" "
            + "usageType=\"predicted\"/>\n");
    pmml.append("</MiningSchema>\n");
    // RegressionModel::RegressionTable
    pmml.append("<RegressionTable intercept=\"" + coefficients[coefficients.length - 1] + "\">\n");

    for (int k = 0; k <= data.numAttributes() - 1; k++) {

        if (!(predictionFeature.equals(data.attribute(k).name()))) {
            pmml.append("<NumericPredictor name=\"" + data.attribute(k).name() + "\" " + " exponent=\"1\" "
                    + "coefficient=\"" + coefficients[k] + "\"/>\n");
        }
    }
    pmml.append("</RegressionTable>\n");
    pmml.append("</RegressionModel>\n");
    pmml.append("</PMML>\n\n");
    try {
        FileWriter fwriter = new FileWriter(ServerFolders.models_pmml + "/" + uuid.toString());
        BufferedWriter writer = new BufferedWriter(fwriter);
        writer.write(pmml.toString());
        writer.flush();
        writer.close();
    } catch (IOException ex) {
        throw new YaqpIOException(Cause.XQReg3, "Could not write data to PMML file :" + uuid.toString(), ex);
    }
}

From source file:org.opentox.qsar.processors.trainers.regression.SVMTrainer.java

License:Open Source License

/**
 *
 * @param data//from   w ww.  ja v  a 2  s .co m
 * @return
 * @throws QSARException
 */
public QSARModel train(Instances data) throws QSARException {

    // NOTE: The checks (check if data is null and if the prediction feature is
    //       acceptable are found in WekaRegressor. The method preprocessData(Instances)
    //       does this job.        

    // GET A UUID AND DEFINE THE TEMPORARY FILE WHERE THE TRAINING DATA
    // ARE STORED IN ARFF FORMAT PRIOR TO TRAINING.
    final String rand = java.util.UUID.randomUUID().toString();
    final String temporaryFilePath = ServerFolders.temp + "/" + rand + ".arff";
    final File tempFile = new File(temporaryFilePath);

    // SAVE THE DATA IN THE TEMPORARY FILE
    try {
        ArffSaver dataSaver = new ArffSaver();
        dataSaver.setInstances(data);
        dataSaver.setDestination(new FileOutputStream(tempFile));
        dataSaver.writeBatch();
    } catch (final IOException ex) {
        tempFile.delete();
        throw new RuntimeException(
                "Unexpected condition while trying to save the " + "dataset in a temporary ARFF file", ex);
    }

    // INITIALIZE THE REGRESSOR
    SVMreg regressor = new SVMreg();
    final String[] regressorOptions = { "-P", Double.toString(epsilon), "-T", Double.toString(tolerance) };

    Kernel svm_kernel = null;
    if (kernel.equalsIgnoreCase("rbf")) {
        RBFKernel rbf_kernel = new RBFKernel();
        rbf_kernel.setGamma(Double.parseDouble(Double.toString(gamma)));
        rbf_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
        svm_kernel = rbf_kernel;
    } else if (kernel.equalsIgnoreCase("polynomial")) {
        PolyKernel poly_kernel = new PolyKernel();
        poly_kernel.setExponent(Double.parseDouble(Integer.toString(degree)));
        poly_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
        poly_kernel.setUseLowerOrder(true);
        svm_kernel = poly_kernel;
    } else if (kernel.equalsIgnoreCase("linear")) {
        PolyKernel poly_kernel = new PolyKernel();
        poly_kernel.setExponent((double) 1.0);
        poly_kernel.setCacheSize(Integer.parseInt(Integer.toString(cacheSize)));
        poly_kernel.setUseLowerOrder(true);
        svm_kernel = poly_kernel;
    }
    regressor.setKernel(svm_kernel);
    try {
        regressor.setOptions(regressorOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new IllegalArgumentException("Bad options in SVM trainer for epsilon = {" + epsilon + "} or "
                + "tolerance = {" + tolerance + "}.", ex);
    }

    // PERFORM THE TRAINING
    String[] generalOptions = { "-c", Integer.toString(data.classIndex() + 1), "-t", temporaryFilePath,
            /// Save the model in the following directory
            "-d", ServerFolders.models_weka + "/" + uuid };
    try {
        Evaluation.evaluateModel(regressor, generalOptions);
    } catch (final Exception ex) {
        tempFile.delete();
        throw new QSARException(Cause.XQReg350, "Unexpected condition while trying to train "
                + "an SVM model. Possible explanation : {" + ex.getMessage() + "}", ex);
    }

    QSARModel model = new QSARModel();

    model.setParams(getParameters());
    model.setCode(uuid.toString());
    model.setAlgorithm(YaqpAlgorithms.SVM);
    model.setDataset(datasetUri);
    model.setModelStatus(ModelStatus.UNDER_DEVELOPMENT);

    ArrayList<Feature> independentFeatures = new ArrayList<Feature>();
    for (int i = 0; i < data.numAttributes(); i++) {
        Feature f = new Feature(data.attribute(i).name());
        if (data.classIndex() != i) {
            independentFeatures.add(f);
        }
    }

    Feature dependentFeature = new Feature(data.classAttribute().name());
    Feature predictedFeature = dependentFeature;
    model.setDependentFeature(dependentFeature);
    model.setIndependentFeatures(independentFeatures);
    model.setPredictionFeature(predictedFeature);
    tempFile.delete();
    return model;
}

From source file:org.pentaho.di.scoring.WekaScoringDialog.java

License:Open Source License

private void checkAbilityToProduceProbabilities(WekaScoringModel tempM) {
    // take a look at the model-type and then the class
    // attribute (if set and if necessary) in order
    // to determine whether to disable/enable the
    // output probabilities checkbox
    if (!tempM.isSupervisedLearningModel()) {
        // now, does the clusterer produce probabilities?
        if (((WekaScoringClusterer) tempM).canProduceProbabilities()) {
            m_wOutputProbs.setEnabled(true);
        } else {/*from  www .  jav a  2 s  . c om*/
            m_wOutputProbs.setSelection(false);
            m_wOutputProbs.setEnabled(false);
        }
    } else {
        // take a look at the header and disable the output
        // probs checkbox if there is a class attribute set
        // and the class is numeric
        Instances header = tempM.getHeader();
        if (header.classIndex() >= 0) {
            if (header.classAttribute().isNumeric()) {
                m_wOutputProbs.setSelection(false);
                m_wOutputProbs.setEnabled(false);
            } else {
                m_wOutputProbs.setEnabled(true);
            }
        }
    }
}