org.openml.webapplication.io.Output.java Source code

Java tutorial

Introduction

Here is the source code for org.openml.webapplication.io.Output.java

Source

/*
 *  Webapplication - Java library that runs on OpenML servers
 *  Copyright (C) 2014 
 *  @author Jan N. van Rijn (j.n.van.rijn@liacs.leidenuniv.nl)
 *  
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *  
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *  
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *  
 */
package org.openml.webapplication.io;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.openml.apiconnector.models.Metric;
import org.openml.apiconnector.models.MetricScore;
import org.openml.webapplication.evaluate.TaskType;
import org.openml.webapplication.models.JsonItem;

import weka.classifiers.Evaluation;
import weka.core.Instances;
import weka.core.Utils;

public class Output {

    /*
     *  Transforms a Weka Evaluation object into a HashMap with scores.
     */
    public static Map<Metric, MetricScore> evaluatorToMap(Evaluation evaluator, int classes, TaskType task)
            throws Exception {
        Map<Metric, MetricScore> m = new HashMap<Metric, MetricScore>();

        if (task == TaskType.REGRESSION) {

            // here all measures for regression tasks
            m.put(new Metric("mean_absolute_error", "openml.evaluation.mean_absolute_error(1.0)"),
                    new MetricScore(evaluator.meanAbsoluteError(), (int) evaluator.numInstances()));
            m.put(new Metric("mean_prior_absolute_error", "openml.evaluation.mean_prior_absolute_error(1.0)"),
                    new MetricScore(evaluator.meanPriorAbsoluteError(), (int) evaluator.numInstances()));
            m.put(new Metric("root_mean_squared_error", "openml.evaluation.root_mean_squared_error(1.0)"),
                    new MetricScore(evaluator.rootMeanSquaredError(), (int) evaluator.numInstances()));
            m.put(new Metric("root_mean_prior_squared_error",
                    "openml.evaluation.root_mean_prior_squared_error(1.0)"),
                    new MetricScore(evaluator.rootMeanPriorSquaredError(), (int) evaluator.numInstances()));
            m.put(new Metric("relative_absolute_error", "openml.evaluation.relative_absolute_error(1.0)"),
                    new MetricScore(evaluator.relativeAbsoluteError() / 100, (int) evaluator.numInstances()));
            m.put(new Metric("root_relative_squared_error", "openml.evaluation.root_relative_squared_error(1.0)"),
                    new MetricScore(evaluator.rootRelativeSquaredError() / 100, (int) evaluator.numInstances()));

        } else if (task == TaskType.CLASSIFICATION || task == TaskType.LEARNINGCURVE
                || task == TaskType.TESTTHENTRAIN) {

            m.put(new Metric("average_cost", "openml.evaluation.average_cost(1.0)"),
                    new MetricScore(evaluator.avgCost(), (int) evaluator.numInstances()));
            m.put(new Metric("total_cost", "openml.evaluation.total_cost(1.0)"),
                    new MetricScore(evaluator.totalCost(), (int) evaluator.numInstances()));

            m.put(new Metric("mean_absolute_error", "openml.evaluation.mean_absolute_error(1.0)"),
                    new MetricScore(evaluator.meanAbsoluteError(), (int) evaluator.numInstances()));
            m.put(new Metric("mean_prior_absolute_error", "openml.evaluation.mean_prior_absolute_error(1.0)"),
                    new MetricScore(evaluator.meanPriorAbsoluteError(), (int) evaluator.numInstances()));
            m.put(new Metric("root_mean_squared_error", "openml.evaluation.root_mean_squared_error(1.0)"),
                    new MetricScore(evaluator.rootMeanSquaredError(), (int) evaluator.numInstances()));
            m.put(new Metric("root_mean_prior_squared_error",
                    "openml.evaluation.root_mean_prior_squared_error(1.0)"),
                    new MetricScore(evaluator.rootMeanPriorSquaredError(), (int) evaluator.numInstances()));
            m.put(new Metric("relative_absolute_error", "openml.evaluation.relative_absolute_error(1.0)"),
                    new MetricScore(evaluator.relativeAbsoluteError() / 100, (int) evaluator.numInstances()));
            m.put(new Metric("root_relative_squared_error", "openml.evaluation.root_relative_squared_error(1.0)"),
                    new MetricScore(evaluator.rootRelativeSquaredError() / 100, (int) evaluator.numInstances()));

            m.put(new Metric("prior_entropy", "openml.evaluation.prior_entropy(1.0)"),
                    new MetricScore(evaluator.priorEntropy(), (int) evaluator.numInstances()));
            m.put(new Metric("kb_relative_information_score",
                    "openml.evaluation.kb_relative_information_score(1.0)"),
                    new MetricScore(evaluator.KBRelativeInformation() / 100, (int) evaluator.numInstances()));

            Double[] precision = new Double[classes];
            Double[] recall = new Double[classes];
            Double[] auroc = new Double[classes];
            Double[] fMeasure = new Double[classes];
            Double[] instancesPerClass = new Double[classes];
            double[][] confussion_matrix = evaluator.confusionMatrix();
            for (int i = 0; i < classes; ++i) {
                precision[i] = evaluator.precision(i);
                recall[i] = evaluator.recall(i);
                auroc[i] = evaluator.areaUnderROC(i);
                fMeasure[i] = evaluator.fMeasure(i);
                instancesPerClass[i] = 0.0;
                for (int j = 0; j < classes; ++j) {
                    instancesPerClass[i] += confussion_matrix[i][j];
                }
            }

            m.put(new Metric("predictive_accuracy", "openml.evaluation.predictive_accuracy(1.0)"),
                    new MetricScore(evaluator.pctCorrect() / 100, (int) evaluator.numInstances()));
            m.put(new Metric("kappa", "openml.evaluation.kappa(1.0)"),
                    new MetricScore(evaluator.kappa(), (int) evaluator.numInstances()));

            m.put(new Metric("number_of_instances", "openml.evaluation.number_of_instances(1.0)"),
                    new MetricScore(evaluator.numInstances(), instancesPerClass, (int) evaluator.numInstances()));

            m.put(new Metric("precision", "openml.evaluation.precision(1.0)"),
                    new MetricScore(evaluator.weightedPrecision(), precision, (int) evaluator.numInstances()));
            m.put(new Metric("recall", "openml.evaluation.recall(1.0)"),
                    new MetricScore(evaluator.weightedRecall(), recall, (int) evaluator.numInstances()));
            m.put(new Metric("f_measure", "openml.evaluation.f_measure(1.0)"),
                    new MetricScore(evaluator.weightedFMeasure(), fMeasure, (int) evaluator.numInstances()));
            if (Utils.isMissingValue(evaluator.weightedAreaUnderROC()) == false) {
                m.put(new Metric("area_under_roc_curve", "openml.evaluation.area_under_roc_curve(1.0)"),
                        new MetricScore(evaluator.weightedAreaUnderROC(), auroc, (int) evaluator.numInstances()));
            }
            m.put(new Metric("confusion_matrix", "openml.evaluation.confusion_matrix(1.0)"),
                    new MetricScore(confussion_matrix));
        }
        return m;
    }

    /*
     *  Transforms a Weka Evaluation object into a HashMap with scores. 
     *  In this function, we assume that the evaluator consists of either 2 (bootstrap)
     *  or 1 (otherwise) elements. evaluator[0] contains all unseen instances, evaluator[1]
     *  contains all instances that occured in the trainings set. 
     */
    public static Map<Metric, MetricScore> evaluatorToMap(Evaluation[] evaluator, int classes, TaskType task,
            boolean bootstrap) throws Exception {
        if (bootstrap && evaluator.length != 2) {
            throw new Exception(
                    "Output->evaluatorToMap problem: Can not perform bootstrap scores, evaluation array is of wrong length. ");
        } else if (bootstrap == false && evaluator.length != 1) {
            throw new Exception(
                    "Output->evaluatorToMap exception: Choosen for normal evaluation, but two evaluators provided in array. ");
        }

        if (bootstrap == false) {
            return evaluatorToMap(evaluator[0], classes, task);
        } else {
            Map<Metric, MetricScore> e0 = evaluatorToMap(evaluator[0], classes, task); // pessimistic test score
            Map<Metric, MetricScore> el = evaluatorToMap(evaluator[0], classes, task); // training score

            Map<Metric, MetricScore> total = new HashMap<Metric, MetricScore>();

            // TODO: define the multiply and sum operator for MetricScore
            /*for( Metric m : e0.keySet() ) {
               MetricScore s_total = new MetricSce0.get( m ) + el.get( m );
               total.put(m, value);
            }*/

            return total;
        }

    }

    public static String styleToJsonError(String value) {
        return "{\"error\":\"" + value + "\"}" + "\n";
    }

    public static String statusMessage(String status, String message) {
        StringBuilder sb = new StringBuilder();
        sb.append("{\n");
        sb.append("\t\"status\":\"" + status + "\",\n");
        sb.append("\t\"message\":\"" + message + "\"\n");
        sb.append("}");

        return sb.toString();
    }

    public static String dataFeatureToJson(List<JsonItem> jsonItems) {
        StringBuilder sb = new StringBuilder();
        for (JsonItem item : jsonItems) {
            if (item.useQuotes()) {
                sb.append(", \"" + item.getKey() + "\":\"" + item.getValue() + "\"");
            } else {
                sb.append(", \"" + item.getKey() + "\":" + item.getValue());
            }
        }
        return "{" + sb.toString().substring(2) + "}";
    }

    public static void instanes2file(Instances instances, Writer out, String[] leadingComments) throws IOException {
        BufferedWriter bw = new BufferedWriter(out);

        if (leadingComments != null) {
            for (int i = 0; i < leadingComments.length; ++i) {
                bw.write("% " + leadingComments[i] + "\n");
            }
        }

        // Important: We can not use a std Instances.toString() approach, as instance files can grow
        bw.write("@relation " + instances.relationName() + "\n\n");
        for (int i = 0; i < instances.numAttributes(); ++i) {
            bw.write(instances.attribute(i) + "\n");
        }
        bw.write("\n@data\n");
        for (int i = 0; i < instances.numInstances(); ++i) {
            if (i + 1 == instances.numInstances()) {
                bw.write(instances.instance(i) + ""); // fix for last instance
            } else {
                bw.write(instances.instance(i) + "\n");
            }
        }
        bw.close();
    }
}