Source code

Java tutorial


Here is the source code for



 * Copyright 2014
 * Telecooperation (TK) Lab
 * Technische Universitt Darmstadt
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;


public class ReportGenerator {

    private static final Logger logger = LogManager.getLogger("Statistics");

     * Pre-defined significance levels. Changes will affect what is deemed
     * high/medium/low/not significant in the reports
    private double significance_low = 1;
    private double significance_medium = 1;
    private double significance_high = 1;
    EvaluationResults evalResults = null;

    public ReportGenerator(EvaluationResults evalResults) {
        this.evalResults = evalResults;
        this.significance_low = evalResults.getSignificance_low();
        this.significance_medium = evalResults.getSignificance_medium();
        this.significance_high = evalResults.getSignificance_high();

     * Creates a latex and plain report of the evaluation results and writes it
     * into the specified directory
     * @param pathToDirectory directory in which the reports should be written
     * @param evalResults results of the statistical evaluation to be described in the reports
     * @param inputFilePath path of the input file to attach the filename to the output folder
    public static void createEvaluationReport(String pathToDirectory, String inputFilePath,
            EvaluationResults evalResults) {

        try {
            File input = new File(inputFilePath);
            String inputFileName = input.getName().split("\\.")[0];

            File directory = new File(pathToDirectory);
            if (!directory.isDirectory()) {
            directory = new File(directory,
                    String.format("Report_%s_%s", inputFileName, UUID.randomUUID().toString()));

            File latexFile = new File(directory, "statisticalReport.tex");
            File plainFile = new File(directory, "statisticalReport.txt");

            ReportGenerator resultsWriter = new ReportGenerator(evalResults);
            String latexReport = resultsWriter.createLatexReport(latexFile.getParentFile());
            String plainReport = resultsWriter.createPlainReport();

            Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(latexFile)));
                    String.format("Wrote plain report of statistical evaluation to %s.", plainFile.toPath()));

            out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(plainFile)));
                    String.format("Wrote Latex report of statistical evaluation to %s.", latexFile.toPath()));

        } catch (IOException e) {


     * Escape characters in a string which have to be escaped in latex. Add an
     * additional "\" in order to not sacrifice the escape sequence when writing
     * it to the file.
     * @param s
     *            A string with potential characters that need to be escaped
     * @return A string with escaped characters ready to be written to a
     *         document and handled by latex
    private String escapeLatexCharacters(String s) {

        s = s.replace("&", "\\&");
        s = s.replace("%", "\\%");
        s = s.replace("$", "\\$");
        s = s.replace("#", "\\#");
        s = s.replace("_", "\\_");
        s = s.replace("{", "\\{");
        s = s.replace("}", "\\}");
        s = s.replace("~", "\\~");
        s = s.replace("^", "\\^");

        return s;

     * Overloaded method to escape characters which have to be escaped in latex
     * in the strings of a string array. Add an additional "\" in order to not
     * sacrifice the escape sequence when writing it to the file.
     * @param s
     *            a List with String elements
     * @return a List with escaped Strings
    private List<String> escapeLatexCharacters(List<String> s) {
        List<String> escapedS = new ArrayList<String>();
        for (int i = 0; i < s.size(); i++) {
        return escapedS;

     * Creates a latex table without environment, captions etc. i.e. the
     * tabular-environment only
     * @param header
     *            The table header, i.e. the first row, as a String-array
     * @param formatting
     *            The table formatting, e.g. "|l|l|l|", indicating 3 columns,
     *            aligned to the left, separated by vertical lines
     * @param values
     *            The values to be represented in the table in a 2d-String-array
     *            (first rows, then columns)
     * @return A string representing a latex table
    private String createLatexTabular(String[] header, String formatting, String[][] values) {

        StringBuilder tabular = new StringBuilder();

         * //If this should be a table with only one column, e.g. in case of a
         * baseline evaluation, overwrite the formatting if(twoColumnLayout){
         * formatting ="|l|l|";
         * }
        tabular.append(String.format("\\begin{tabular}{%s} \\hline\n", formatting));

        // Header line
        for (int i = 0; i < header.length; i++) {
            String s = header[i];
            if (i == 0) {
                tabular.append(String.format("%s ", s));
            } else {
                tabular.append(String.format("& %s", s));
        tabular.append("\\\\ \\hline\n");

        // Content
        for (int j = 0; j < values.length; j++) {
            String[] row = values[j];
            for (int i = 0; i < row.length; i++) {
                String v = values[j][i];
                if (i == 0) {
                    tabular.append(String.format("%s ", v));
                } else {
                    tabular.append(String.format("& %s ", v));
            tabular.append("\\\\ \\hline\n");


        return tabular.toString();

     * Create a latex table with captions, label etc. i.e. a table environment
     * comprising a tabular environment.
     * @param caption
     *            The table's caption
     * @param label
     *            The table's label to be used for references
     * @param header
     *            The table header, i.e. the first row, as a String-array
     * @param formatting
     *            The table formatting, e.g. "|l|l|l|", indicating 3 columns,
     *            aligned to the left, separated by vertical lines
     * @param values
     *            The values to be represented in the table in a 2d-String-array
     *            (first rows, then columns)
     * @return A string representing a latex table in a table environment
    private String createLatexTable(String caption, String label, String[] header, String formatting,
            String[][] values) {
        StringBuilder table = new StringBuilder();

        table.append("\\adjustbox{max width=\\linewidth}{\n");
        table.append(createLatexTabular(header, formatting, values));
        table.append("}");// Close adjustbox environment
        table.append(String.format("\\caption{%s}\n", caption));
        table.append(String.format("\\label{%s}\n", label));

        return table.toString();

    private String createLatexLongTable(String caption, String label, String[] header, String formatting,
            String[][] values) {

        StringBuilder table = new StringBuilder();

        // Header line
        StringBuilder headerStr = new StringBuilder();
        for (int i = 0; i < header.length; i++) {
            String s = header[i];
            if (i == 0) {
                headerStr.append(String.format("%s ", s));
            } else {
                headerStr.append(String.format("& %s", s));
        headerStr.append("\\\\ \\hline\n");

        // Resizing not possible for multipage tables. Thus more columns,
        // smaller font
        if (values[0].length > 11) {
        } else if (values[0].length > 6) {

        // Table head
        table.append(String.format("\\begin{longtable}{%s} \\hline\n", formatting));

        // Content
        for (int j = 0; j < values.length; j++) {
            String[] row = values[j];
            for (int i = 0; i < row.length; i++) {
                String v = values[j][i];
                if (i == 0) {
                    table.append(String.format("%s ", v));
                } else {
                    table.append(String.format("& %s ", v));
            table.append("\\\\ \\hline\n");

        // Table bottom
        table.append(String.format("\\caption{%s}\n", caption));
        table.append(String.format("\\label{%s}\n", label));

        return table.toString();

     * Create a latex table environment with more than one latex latex table
     * inside using subfigures
     * @param overallCaption
     *            The caption for all tables
     * @param subCaption
     *            The caption for individual tables specified as String-arrays
     * @param overallLabel
     *            The label for all tables, to be used for referencing
     * @param header
     *            The table header, i.e. the first row, as a String-array
     * @param formatting
     *            The table formatting, e.g. "|l|l|l|", indicating 3 columns,
     *            aligned to the left, separated by vertical lines
     * @param values
     *            The values to be represented in the table in a 3d-String-array
     *            (subtable, rows, columns)
     * @return A string representing several latex tables comprised in a single
     *         table-environment
    private String createLatexSubTable(String overallCaption, String[] subCaption, String overallLabel,
            String[] header, String formatting, String[][][] overallValues) {

        StringBuilder table = new StringBuilder();

        for (int i = 0; i < overallValues.length; i++) {
            StringBuilder tabular = new StringBuilder();
            String[][] values = overallValues[i];
            tabular.append(String.format("\\begin{subfigure}[b]{%dcm}\n", values[0].length + 2));
            tabular.append("\\adjustbox{max width=\\linewidth}{\n");
            tabular.append(createLatexTabular(header, formatting, values));
            tabular.append("}");// Close adjustbox environment
            tabular.append(String.format("\\caption{%s}\n", subCaption[i]));

        table.append(String.format("\\caption{%s}\n", overallCaption));
        table.append(String.format("\\label{%s}\n", overallLabel));

        return table.toString();

     * Create a Latex-conforming String-representation of the test results,
     * accounting for parameters and the test statistic (if available), p-Value
     * and the closest significance level
     * @param r
     *            the test result as {@link TestResult} object
     * @param pThreshold
     *            the significance level which is closes to the p-Value
     * @return A Latex-conforming String-representation of the test results
    private String getTestResultRepresentation(TestResult r, double pThreshold) {

        StringBuilder parameters = new StringBuilder();
        Iterator<String> it = r.getParameter().keySet().iterator();
        while (it.hasNext()) {
            String parameter =;
            double value = r.getParameter().get(parameter);
            parameters.append(String.format("%s=%.3f, ", parameter, value));
        if (r.getStatisticType().isEmpty()) {
            return String.format("$%sp=%.3f, \\alpha=%.2f$", parameters, r.getpValue(), pThreshold);
        } else {
            return String.format("$%s%s=%.3f, p=%.3f, \\alpha=%.2f$", parameters, r.getStatisticType(),
                    r.getStatistic(), r.getpValue(), pThreshold);

     * Get a String representation of the models order, inferred from their
     * pairwise p-values.
     * @param ordering
     *            a HashMap mapping levels of topological order to sets of
     *            models
     * @return A string representing the models order or alternatively a message
     *         that no order could be determined.
    private String getModelOrderingRepresentation(HashMap<Integer, TreeSet<Integer>> ordering) {

        if (ordering != null && ordering.size() > 1) {
            StringBuilder orderSequence = new StringBuilder();
            for (int level = 0; level < ordering.keySet().size(); level++) {
                TreeSet<Integer> s = ordering.get(level);

                if (s.size() == 0)
                    return "These results do not allow for a strict ordering of all models.";

                int n = s.first();
                orderSequence.append(String.format("(M%d", n));
                for (Integer node : ordering.get(level)) {
                    orderSequence.append(String.format(",M%d", node));

                if (level < ordering.keySet().size() - 1) {
            return String.format("These results allow for the follwing ordering of model performances: %s. ",
        } else {
            return "These results do not allow for a strict ordering of all models. ";

     * Creates a report of the statistical evaluation in the Latex-format
     * @param outputFolder
     *            the folder where the report will be written later to store
     *            related images etc. there
     * @param evalResults
     *            an object of type {@link EvaluationResults} comprising the
     *            results of the statistical evaluation
     * @return A String representing the report of the statistical evaluation in
     *         Latex-format
    public String createLatexReport(File outputFolder) {
        // Set locale to English globally to make reports independent of the
        // machine thei're created on, e.g. use "." as decimal points on any
        // machine
        StringBuilder report = new StringBuilder();
        Statistics stats = Statistics.getInstance(true);
        HashMap<String, String> methodsSummary = new HashMap<String, String>();
        HashMap<String, HashMap<String, List<String>>> testSummary = new HashMap<String, HashMap<String, List<String>>>();
        ArrayList<String[]> figures = new ArrayList<String[]>();
        testSummary.put("Parametric", new HashMap<String, List<String>>());
        testSummary.put("Non-Parametric", new HashMap<String, List<String>>());
        String outputFolderPath = "";
        if (outputFolder != null) {
            outputFolderPath = outputFolder.getAbsolutePath();

        // Header
        // Packages
        // Title definition
        report.append("\\title{Statistical Evaluation Report}\n");

        // Evaluation Overview
        report.append("\\section{Evaluation Overview}");

        int nModels = evalResults.getSampleData().getModelMetadata().size();
        ArrayList<String> measures = evalResults.getMeasures();
        int nSamples = evalResults.getSampleData().getSamples().get(measures.get(0)).get(0).size();
        String ref = "tbl:models";

        // Separate training/testing datasets
        List<String> trainingDataList = new ArrayList<String>();
        List<String> testingDataList = new ArrayList<String>();
        List<Pair<String, String>> datasets = evalResults.getSampleData().getDatasetNames();
        Iterator<Pair<String, String>> itp = datasets.iterator();
        while (itp.hasNext()) {
            Pair<String, String> trainTest =;
            if (trainTest.getValue() != null) {
        Set<String> trainingDataSet = new HashSet<String>(trainingDataList);
        Set<String> testingDataSet = new HashSet<String>(testingDataList);

        String pipelineDescription = null;
        String sampleOrigin = "per CV";

        ReportTypes pipelineType = this.evalResults.getSampleData().getPipelineType();
        switch (pipelineType) {
        // One-domain n-fold CV (ReportData=per Fold)
        case CV:
            pipelineDescription = String.format("%d-fold cross validation",
            sampleOrigin = "per fold ";
        case MULTIPLE_CV:
            pipelineDescription = String.format("%dx%s repeated cross validation",
                    evalResults.getSampleData().getnRepetitions(), evalResults.getSampleData().getnFolds());
        case CV_DATASET_LVL:
            pipelineDescription = String.format("%d-fold cross validation over %d datasets",
                    evalResults.getSampleData().getnFolds(), trainingDataSet.size());
            pipelineDescription = String.format("%dx%s repeated cross validation over %d datasets",
                    evalResults.getSampleData().getnRepetitions(), evalResults.getSampleData().getnFolds(),
            sampleOrigin = "per dataset";
            // In the train/test scenario, the number of datasets only includes
            // distinct ones
            Set<String> allDataSets = new HashSet<String>(testingDataSet);
            pipelineDescription = String.format("Train/Test over %d datasets", allDataSets.size());
            sampleOrigin = "per dataset";
            pipelineDescription = "!unknown pipeline type!";
            sampleOrigin = "!unknown pipeline type!";

        boolean isBaselineEvaluation = evalResults.isBaselineEvaluation();
        report.append(String.format("The system performed a %s for the %d models in Tbl \\ref{%s}. ",
                pipelineDescription, nModels, ref));
        if (isBaselineEvaluation) {
            report.append(String.format("The models were compared against the first baseline model. \n",
                    pipelineDescription, nModels, ref));
        } else {
            report.append(String.format("The models were compared against each other. \n", pipelineDescription,
                    nModels, ref));

        String[][] values = new String[nModels][3];
        for (int r = 0; r < nModels; r++) {
            values[r][0] = String.format("M%d", r);
            // Remove package prefix for algorithms, e.g. shorten "trees.J48" to "J48".
            String[] algorithm = evalResults.getSampleData().getModelMetadata().get(r).getKey().split("\\.");
            values[r][1] = escapeLatexCharacters(algorithm[algorithm.length - 1]);
            values[r][2] = escapeLatexCharacters(evalResults.getSampleData().getModelMetadata().get(r).getValue());

        String table = createLatexTable("Evaluated models with classifier algorithm and feature sets", ref,
                new String[] { "Index", "Algorithm", "Feature Set" }, "|l|l|p{11cm}|", values);

        // List test/training datasets. Consider the case when these sets are
        // different.
        if (testingDataSet.isEmpty()) {
            if (trainingDataSet.size() == 1) {
                        String.format("The models were evaluated on the dataset %s. ", trainingDataList.get(0)));
            } else {
                report.append(String.format("The models were evaluated on the datasets %s. ",
        } else {
            if (trainingDataSet.size() == 1 && testingDataSet.size() == 1) {
                        String.format("The models were trained on the dataset %s and tested on the dataset %s. ",
                                trainingDataList.get(0), testingDataList.get(0)));
            } else if (trainingDataSet.size() > 1 && testingDataSet.size() == 1) {
                        "The models were trained on the datasets %s and tested on the dataset %s. ",
                        this.createEnumeration(new ArrayList<String>(trainingDataSet)), testingDataList.get(0)));
            } else if (trainingDataSet.size() == 1 && testingDataSet.size() > 1) {
                        "The models were trained on the dataset %s and tested on the datasets %s. ",
                        trainingDataList.get(0), this.createEnumeration(new ArrayList<String>(testingDataSet))));
            } else {
                        String.format("The models were trained on the datasets %s and tested on the datasets %s. ",
                                this.createEnumeration(new ArrayList<String>(trainingDataSet)),
                                this.createEnumeration(new ArrayList<String>(testingDataSet))));
        report.append(String.format("Their performance was assessed with the %s", createEnumeration(measures)));
                ". In the analysis, the models thus represent levels of the independent variable, while the performance measures are dependent variables.\n");

        // Results (for each measure separately)
        report.append("\\FloatBarrier\n"); // All previous floats must be placed
        // before this point
                "Throughout the report, p-values are annotated if they are significant. While {\\footnotesize *} indicates low significance ($p<\\alpha=%.2f$), the annotations {\\footnotesize **} and {\\footnotesize ***} represent medium ($p<\\alpha=%.2f$) and high significance ($p<\\alpha=%.2f$).",
                significance_low, significance_medium, significance_high));

        for (int i = 0; i < measures.size(); i++) {
             * Create table with samples for the current performance measure If
             * samples are drawn over multiple datasets, transpose table
            String measure = measures.get(i);
            if (!evalResults.getSampleData().getSamples().containsKey(measure)) {
            ArrayList<ArrayList<Double>> measureSamples = evalResults.getSampleData().getSamples().get(measure);
            ArrayList<Double> averageMeasureSamples = evalResults.getSampleData().getSamplesAverage().get(measure);

            report.append(String.format("\\subsection{%s}\n", measure));
            ref = String.format("tbl:%s", measure.replaceAll("\\s", ""));
                    "The %s samples drawn from the %s and the %d models are presented in Tbl. \\ref{%s}.\n",
                    measure, pipelineDescription, nModels, ref));

            // Plot Box-Whisker-Diagram of samples for the current measure and add the figure to the appendix
            // Use the min/max sample value as indicators for the box-plots limits
            String filename = String.format("boxPlot%s", measure.replaceAll("\\s", ""));
            String path = String.format("%s%s%s", outputFolderPath, File.separator, filename);
            String pathR = this.fixSlashes(path);
            String figRef = String.format("fig:boxPlot%s", measure.replaceAll("\\s", ""));
            String caption = String.format("Box-Whisker-Plot of %s samples. Red dots indicate means.", measure);
            double[][] samples = new double[nModels][];
            double minSample = Double.MAX_VALUE;
            double maxSample = Double.MIN_VALUE;
            for (int k = 0; k < nModels; k++) {
                ArrayList<Double> s = measureSamples.get(k);
                samples[k] = new double[s.size()];
                for (int j = 0; j < s.size(); j++) {
                    samples[k][j] = s.get(j);
                    if (minSample > s.get(j)) {
                        minSample = s.get(j);
                    if (maxSample < s.get(j)) {
                        maxSample = s.get(j);
            double sampleRange = maxSample - minSample;
            int lowerLimit = (int) Math.floor(minSample - sampleRange * 0.1);
            int upperLimit = (int) Math.ceil(maxSample + sampleRange * 0.1);
            boolean successful = stats.plotBoxWhisker(samples, lowerLimit, upperLimit, pathR, measure);
            if (successful) {
                figures.add(new String[] { figRef, caption, filename });
                        String.format("See Fig. \\ref{%s} for a Box-Whisker plot of these samples. ", figRef));

            caption = String.format("Samples of the %s drawn from the %s and the %d models", measure,
                    pipelineDescription, nModels);
            switch (pipelineType) {
            case CV:
            case MULTIPLE_CV:
                values = new String[nModels + 1][nSamples + 2];
                for (int r = 0; r <= nModels; r++) {
                    // First line of table = Fold indices
                    if (r == 0) {
                        values[r][0] = "";
                        values[r][nSamples + 1] = "";
                        for (int f = 1; f <= nSamples; f++) {
                            values[r][f] = Integer.toString(f);
                        // Next lines with model indices, samples per fold and
                        // average measure over all samples
                    } else {
                        values[r][0] = String.format("M%d", (r - 1));
                        //values[r][nSamples + 1] = String.format("%.2f", averageMeasureSamples.get(r - 1) * 100);
                        values[r][nSamples + 1] = String.format("%.2f", averageMeasureSamples.get(r - 1));
                        ArrayList<Double> s = measureSamples.get(r - 1);
                        for (int j = 0; j < s.size(); j++) {
                            //values[r][j + 1] = String.format("%.2f", s.get(j) * 100);
                            values[r][j + 1] = String.format("%.2f", s.get(j));
                if (values.length > 58) {
                    table = createLatexLongTable(caption, ref,
                            new String[] { "Classifier",
                                    String.format("\\multicolumn{%d}{|c|}{%s %s}", nSamples, measure, sampleOrigin),
                                    "Average" },
                            String.format("|%s", StringUtils.repeat("l|", nSamples + 2)), values);
                } else {
                    table = createLatexTable(caption, ref,
                            new String[] { "Classifier",
                                    String.format("\\multicolumn{%d}{|c|}{%s %s}", nSamples, measure, sampleOrigin),
                                    "Average" },
                            String.format("|%s", StringUtils.repeat("l|", nSamples + 2)), values);

            case CV_DATASET_LVL:
            case MULTIPLE_CV_DATASET_LVL:
            case TRAIN_TEST_DATASET_LVL:
                values = new String[nSamples + 2][nModels + 1];
                // double[][] valuesNumeric = new double[nSamples][nModels];
                for (int r = 0; r <= nSamples + 1; r++) {
                    // First line of table = Model indices
                    if (r == 0) {
                        values[r][0] = "";
                        for (int j = 0; j < nModels; j++) {
                            values[r][j + 1] = String.format("M%d", (j));
                        // Last line of table = average sums
                    } else if (r == nSamples + 1) {
                        values[r][0] = "Average";
                        for (int j = 0; j < nModels; j++) {
                            //values[r][j + 1] = String.format("%.2f", averageMeasureSamples.get(j) * 100);
                            values[r][j + 1] = String.format("%.2f", averageMeasureSamples.get(j));
                        // Next lines with model indices, samples per fold and
                        // average measure over all samples
                    } else {
                        // Only print both train- and test set if there is more
                        // than one training set
                        Pair<String, String> trainTest = evalResults.getSampleData().getDatasetNames().get(r - 1);
                        if (pipelineType == ReportTypes.TRAIN_TEST_DATASET_LVL) {
                            if (trainingDataSet.size() > 1) {
                                values[r][0] = String.format("%s-%s", trainTest.getKey(), trainTest.getValue());
                            } else {
                                values[r][0] = trainTest.getValue();
                        } else {
                            values[r][0] = trainTest.getKey();
                        for (int j = 0; j < nModels; j++) {
                            ArrayList<Double> s = measureSamples.get(j);
                            //values[r][j + 1] = String.format("%.2f", s.get(r - 1) * 100);
                            values[r][j + 1] = String.format("%.2f", s.get(r - 1));
                if (values.length > 58) {
                    table = createLatexLongTable(caption, ref,
                            new String[] { "Dataset",
                                    String.format("\\multicolumn{%d}{|c|}{%s %s}", nModels, measure,
                                            sampleOrigin) },
                            String.format("|%s", StringUtils.repeat("l|", nModels + 1)), values);
                } else {
                    table = createLatexTable(caption, ref,
                            new String[] { "Dataset",
                                    String.format("\\multicolumn{%d}{|c|}{%s %s}", nModels, measure,
                                            sampleOrigin) },
                            String.format("|%s", StringUtils.repeat("l|", nModels + 1)), values);

            // Results - First parametric tests, then non-parametric (2
            // iterations)
            // Print results for alls non-parametric tests except McNemar.
            // McNemar is not based on the same performance measures but on a
            // contingency matrix, which is
            // printed in a separate section.
            for (String testType : new String[] { "Parametric", "Non-Parametric" }) {
                report.append(String.format("\\subsubsection{%s Testing}", testType));

                Pair<String, AbstractTestResult> result = null;
                if (testType.equals("Parametric")) {
                    result = evalResults.getParametricTestResults().get(measure);
                } else {
                    result = evalResults.getNonParametricTestResults().get(measure);

                // Use pretty-print method descriptor if specified
                String method = result.getKey();
                if (StatsConfigConstants.PRETTY_PRINT_METHODS.containsKey(method)) {
                    method = StatsConfigConstants.PRETTY_PRINT_METHODS.get(method);
                methodsSummary.put(testType, method);

                TestResult r = (TestResult) result.getValue();
                        String.format("The system compared the %d models using the \\emph{%s}. ", nModels, method));

                if (r != null && !Double.isNaN(r.getpValue())) {

                    // A priori test: assumptions
                    boolean assumptionViolated = false;
                    Iterator<String> it = r.getAssumptions().keySet().iterator();
                    while (it.hasNext()) {
                        String assumption =;

                        TestResult at = (TestResult) r.getAssumptions().get(assumption);
                        if (at == null) {
                            report.append(String.format("Testing for %s failed. ", assumption));
                            assumptionViolated = true;
                        if (Double.isNaN(at.getpValue())) {
                                    String.format("Testing for %s using %s failed. ", assumption, at.getMethod()));
                            assumptionViolated = true;
                        double ap = at.getpValue();

                        if (ap <= this.significance_low) {
                            assumptionViolated = true;

                        // Verbalize result according to p value
                        Pair<String, Double> verbalizedP = verbalizeP(ap, true);

                        String testResultRepresentation = getTestResultRepresentation(at, verbalizedP.getValue());
                        report.append(String.format("%s %s violation of %s (%s). ", at.getMethod(),
                                verbalizedP.getKey(), assumption, testResultRepresentation));


                    // Create QQ-Normal diagram to support the analysis of a
                    // normality assumption
                    if (result.getKey().equals("DependentT") && samples.length == 2) {
                        filename = String.format("qqNormPlot%s", measure.replaceAll("\\s", ""));
                        path = String.format("%s%s%s", outputFolderPath, File.separator, filename);
                        pathR = this.fixSlashes(path);
                        figRef = String.format("fig:qqNormPlot%s", measure.replaceAll("\\s", ""));
                        caption = String.format("QQ-Normal plot of pairwise differences between %s samples.",
                        double[] differences = new double[samples[0].length];
                        for (int j = 0; j < samples[0].length; j++) {
                            differences[j] = samples[0][j] - samples[1][j];
                        successful = stats.plotQQNorm(differences, "M0-M1", measure, pathR);
                        if (successful) {
                            figures.add(new String[] { figRef, caption, filename });
                            report.append(String.format("See Fig. \\ref{%s} for a QQ-Normal plot of the samples. ",

                    if (assumptionViolated) {
                                "Given that the assumptions are violated, the following test may be corrupted. ");

                    // A Priori test results
                    // Verbalize result according to p value
                    Pair<String, Double> verbalizedP = verbalizeP(r.getpValue(), false);
                    String testResultRepresentation = getTestResultRepresentation(r, verbalizedP.getValue());
                            "The %s %s differences between the performances of the models (%s).\\\\ \n\n ", method,
                            verbalizedP.getKey(), testResultRepresentation));

                    // Store result for summary
                    if (testSummary.get(testType).containsKey(verbalizedP.getKey())) {
                    } else {
                        ArrayList<String> list = new ArrayList<String>();
                        testSummary.get(testType).put(verbalizedP.getKey(), list);

                    // Post-hoc test for >2 models (pairwise comparisons)
                    if (evalResults.getSampleData().getModelMetadata().size() > 2) {

                        Pair<String, AbstractTestResult> postHocResult = null;
                        if (testType.equals("Parametric")) {
                            postHocResult = evalResults.getParametricPostHocTestResults().get(measure);
                        } else {
                            postHocResult = evalResults.getNonParametricPostHocTestResults().get(measure);
                        method = postHocResult.getKey();
                        if (StatsConfigConstants.PRETTY_PRINT_METHODS.containsKey(method)) {
                            method = StatsConfigConstants.PRETTY_PRINT_METHODS.get(method);
                        methodsSummary.put(String.format("%sPostHoc", testType), method);

                        PairwiseTestResult rPostHoc = (PairwiseTestResult) postHocResult.getValue();
                        report.append(String.format("The system performed the \\emph{%s} post-hoc. ", method));

                        if (rPostHoc == null) {
                            report.append("The test failed. ");

                        // Assumptions
                        boolean assumptionsViolated = false;
                        it = rPostHoc.getAssumptions().keySet().iterator();
                        while (it.hasNext()) {
                            String assumption =;
                            PairwiseTestResult at = (PairwiseTestResult) rPostHoc.getAssumptions().get(assumption);
                            if (at == null) {
                                report.append(String.format("Testing for %s failed. ", assumption));
                                assumptionsViolated = true;

                            // Create table with pairwise p-values for
                            // assumption testing
                            double[][] ap = at.getpValue();
                            Pair<String[], String[][]> tableData = getPValueStringArray(ap, isBaselineEvaluation); // first
                            // element
                            // is
                            // header,
                            // second
                            // are
                            // values
                            caption = String.format("P-values from the %s for %s", at.getMethod(), measure);
                            ref = String.format("tbl:%s%s", at.getMethod().replaceAll("\\s", ""),
                                    measure.replaceAll("\\s", ""));
                            table = createLatexTable(caption, ref, tableData.getKey(),
                                    String.format("|%s", StringUtils.repeat("l|", nModels)), tableData.getValue());

                            double max = getMax(ap);
                            double min = getMin(ap);
                            verbalizedP = verbalizeP(min, true);
                            if ((max > significance_low && min <= significance_low)
                                    || (max > significance_medium && min <= significance_medium)
                                    || (max > significance_high && min <= significance_high)) {
                                // partly significant to degree as specified by
                                // verbalized p-value
                                        "%s partly %s violation of %s ($\\alpha=%.2f$, Tbl. \\ref{%s}).\n",
                                        at.getMethod(), verbalizedP.getKey(), assumption, verbalizedP.getValue(),
                            } else {
                                        "%s %s violation of %s ($\\alpha=%.2f$, Tbl. \\ref{%s}).\n", at.getMethod(),
                                        verbalizedP.getKey(), assumption, verbalizedP.getValue(), ref));

                            if (min <= this.significance_low) {
                                assumptionsViolated = true;


                        if (assumptionViolated) {
                                    "Given that the assumptions are violated, the following test may be corrupted. ");

                        // Result
                        double[][] ap = rPostHoc.getpValue();
                        Pair<String[], String[][]> tableData = getPValueStringArray(ap, isBaselineEvaluation); // first
                        // element
                        // is
                        // header,
                        // second
                        // are
                        // values
                        caption = String.format("P-values from the %s for %s", method, measure);
                        ref = String.format("tbl:%s%s", method.replaceAll("\\s", ""),
                                measure.replaceAll("\\s", ""));
                        String formatting = null;
                        if (!isBaselineEvaluation) {
                            formatting = String.format("|%s", StringUtils.repeat("l|", nModels));
                        } else {
                            formatting = String.format("|l|l|");
                        String tablePNonAdjusted = createLatexTable(caption, ref, tableData.getKey(), formatting,

                        // Already fetch pairwise adjustments here in order to
                        // determine choice of words
                        double max = getMax(ap);
                        double min = getMin(ap);
                        verbalizedP = verbalizeP(min, false);
                        ArrayList<StatsConfigConstants.CORRECTION_VALUES> adjustments = new ArrayList<StatsConfigConstants.CORRECTION_VALUES>(
                        String adjustWord = "";
                        if (adjustments.size() > 0) {
                            adjustWord = " for non-adjusted p-values";
                        if ((max > significance_low && min <= significance_low)
                                || (max > significance_medium && min <= significance_medium)
                                || (max > significance_high && min <= significance_high)) {
                            // partly significant to degree as specified by
                            // verbalized p-value
                                    "The %s partly %s differences between the performances of the models%s ($\\alpha=%.2f$, Tbl. \\ref{%s}). ",
                                    method, verbalizedP.getKey(), adjustWord, verbalizedP.getValue(), ref));
                        } else {
                                    "The %s %s differences between the performances of the models%s ($\\alpha=%.2f$, Tbl. \\ref{%s}). ",
                                    method, verbalizedP.getKey(), adjustWord, verbalizedP.getValue(), ref));

                        // Determine ordering of models
                        HashMap<Integer, TreeSet<Integer>> postHocOrdering = null;
                        int[][] orderingEdgeList = null;
                        if (testType.equals("Parametric")) {
                            postHocOrdering = evalResults.getParameticPostHocOrdering().get(measure);
                            orderingEdgeList = evalResults.getParameticPostHocEdgelist().get(measure);
                        } else {
                            postHocOrdering = evalResults.getNonParameticPostHocOrdering().get(measure);
                            orderingEdgeList = evalResults.getNonParameticPostHocEdgelist().get(measure);
                        String ordering = getModelOrderingRepresentation(postHocOrdering);

                        // Print graphs of ordering for the current measure and
                        // add the figure to the appendix
                        filename = String.format("graphOrdering%s%s", measure.replaceAll("\\s", ""), testType);
                        path = String.format("%s%s%s", outputFolderPath, File.separator, filename);
                        pathR = this.fixSlashes(path);
                        figRef = String.format("fig:graphOrdering%s%s", measure.replaceAll("\\s", ""), testType);
                        caption = String.format(
                                "Directed graph of significant differences for %s, as indicated by the %s post-hoc test.",
                                measure, testType.toLowerCase());
                        // int nodes[] = new int[nModels];
                        // for(int j=0; j<nModels;j++){nodes[j]=j;};
                        successful = stats.plotGraph(orderingEdgeList, nModels, pathR);
                        if (successful) {
                            figures.add(new String[] { figRef, caption, filename });
                            report.append(String.format("The ordering is visualized in Fig. \\ref{%s}. ", figRef));

                        // Pairwise adjustments
                        String tablePAdjusted = null;
                        if (adjustments.size() > 0) {
                            String[] subcaption = new String[adjustments.size()];
                            String[] header = null;
                            String[][][] overallValues = new String[adjustments.size()][][];
                            double[] minAdjustments = new double[adjustments.size()];
                            double[] maxAdjustments = new double[adjustments.size()];
                            for (int j = 0; j < adjustments.size(); j++) {
                                StatsConfigConstants.CORRECTION_VALUES adjustmentMethod = adjustments.get(j);
                                subcaption[j] =;
                                double[][] correctedP = rPostHoc.getpValueCorrections().get(adjustmentMethod);
                                if (StatsConfigConstants.PRETTY_PRINT_METHODS.containsKey(adjustmentMethod)) {
                                    subcaption[j] = StatsConfigConstants.PRETTY_PRINT_METHODS.get(adjustmentMethod);
                                tableData = getPValueStringArray(correctedP, isBaselineEvaluation);
                                header = tableData.getKey();
                                overallValues[j] = tableData.getValue();
                                minAdjustments[j] = getMin(correctedP);
                                maxAdjustments[j] = getMax(correctedP);

                            caption = String.format("Adjusted p-values from the %s for %s", method, measure);
                            ref = String.format("tbl:%s%sAdjusted", method.replaceAll("\\s", ""),
                                    measure.replaceAll("\\s", ""));
                            formatting = null;
                            if (!isBaselineEvaluation) {
                                formatting = String.format("|%s", StringUtils.repeat("l|", nModels));
                            } else {
                                formatting = String.format("|l|l|");
                            tablePAdjusted = createLatexSubTable(caption, subcaption, ref, header, formatting,

                            min = getMin(minAdjustments);
                            max = getMax(maxAdjustments);
                            verbalizedP = verbalizeP(min, false);

                            if ((max > significance_low && min <= significance_low)
                                    || (max > significance_medium && min <= significance_medium)
                                    || (max > significance_high && min <= significance_high)) {
                                // partly significant to degree as specified by
                                // verbalized p-value
                                        "It partly %s differences for adjusted p-values ($\\alpha=%.2f$, Tbl. \\ref{%s}).\n\n ",
                                        verbalizedP.getKey(), verbalizedP.getValue(), ref));
                            } else {
                                        "It %s differences for adjusted p-values ($\\alpha=%.2f$, Tbl. \\ref{%s}).\n\n ",
                                        verbalizedP.getKey(), verbalizedP.getValue(), ref));

                        if (tablePAdjusted != null) {

                } else {
                    report.append(String.format("The %s failed.", method));


        // Contingency table and McNemar results if this test was performed
        if (evalResults.getNonParametricTest().equals("McNemar")) {
            String measure = "Contingency Table";
            String testType = "Non-Parametric";
            report.append("\\subsection{Contingency Table}\n");

            String caption = String
                    .format("Contingency table with correctly and incorrectly classified folds for %s", measure);
            if (evalResults.getSampleData().getPipelineType() == ReportTypes.MULTIPLE_CV) {
                        "The contingency table drawn from the %s and the %d models is listed in Tbl. \\ref{%s}. The correctly and incorrectly classified instances per fold were averaged over all repetitions. \n",
                        pipelineDescription, nModels, ref));
                caption = String.format(
                        "Averaged contingency table with correctly and incorrectly classified folds for %s",
            } else {
                        "The contingency table drawn from the %s and the %d models is listed in Tbl. \\ref{%s}.\n",
                        pipelineDescription, nModels, ref));

            int[][] contingencyMatrix = evalResults.getSampleData().getContingencyMatrix();
            ref = "tbl:ContingencyMatrix";
            values = new String[][] { { "Wrong", "", "" }, { "Correct", "", "" } };
            values[0][1] = String.valueOf(contingencyMatrix[0][0]);
            values[0][2] = String.valueOf(contingencyMatrix[0][1]);
            values[1][1] = String.valueOf(contingencyMatrix[1][0]);
            values[1][2] = String.valueOf(contingencyMatrix[1][1]);

            table = createLatexTable(caption, ref, new String[] { "M0/M1", "Wrong", "Correct" }, "|l|l|l|", values);

            // Test results
            report.append(String.format("\\subsubsection{%s Testing}", testType));
                    String.format("The system compared the %d models using the \\emph{McNemar test}. ", nModels));
            Pair<String, AbstractTestResult> result = evalResults.getNonParametricTestResults().get(measure);

            // Use pretty-print method descriptor if specified
            String method = result.getKey();
            if (StatsConfigConstants.PRETTY_PRINT_METHODS.containsKey(method)) {
                method = StatsConfigConstants.PRETTY_PRINT_METHODS.get(method);
            methodsSummary.put(testType, method);

            TestResult r = (TestResult) result.getValue();
            if (r != null && !Double.isNaN(r.getpValue())) {
                StringBuilder parameters = new StringBuilder();
                Iterator<String> it = r.getParameter().keySet().iterator();
                while (it.hasNext()) {
                    String parameter =;
                    double value = r.getParameter().get(parameter);
                    parameters.append(String.format("%s=%.3f, ", parameter, value));

                // Verbalize result according to p value
                Pair<String, Double> verbalizedP = verbalizeP(r.getpValue(), false);
                        "The test %s differences between the performances of the models ($%sp=%.3f, \\alpha=%.2f$).\\\\ \n",
                        verbalizedP.getKey(), parameters.toString(), r.getpValue(), verbalizedP.getValue()));
                // Store result for summary
                if (testSummary.get(testType).containsKey(verbalizedP.getKey())) {
                } else {
                    ArrayList<String> list = new ArrayList<String>();
                    testSummary.get(testType).put(verbalizedP.getKey(), list);

            } else {
                report.append("The test failed.\\\\ \n");

        // Summary of results
        for (String testType : new String[] { "Parametric", "Non-Parametric" }) {
            String prefix = "";

            if (nModels == 2) {
                        String.format("The system performed %s testing of the %d models using a %s. The test ",
                                testType.toLowerCase(), nModels, methodsSummary.get(testType)));
                prefix = "It";
            } else {
                String postHocTesting = String.format("%sPostHoc", testType);
                        "The system performed %s testing of the %d models using a %s and a %s post-hoc. The tests ",
                        testType.toLowerCase(), nModels, methodsSummary.get(testType),
                prefix = "They";

            // If all tests failed, there're no results to summarize.
            HashMap<String, List<String>> summary = testSummary.get(testType);
            if (summary.keySet().size() == 0) {
                report.append("failed. ");

            Iterator<String> it = summary.keySet().iterator();
            boolean usePrefix = false;
            while (it.hasNext()) {
                String pVerbalization =;
                List<String> affectedMeasures = summary.get(pVerbalization);
                if (!usePrefix) {
                    report.append(String.format("%s differences in performance for the %s. ", pVerbalization,
                } else {
                    report.append(String.format("%s %s differences in performance for the %s. ", prefix,
                            pVerbalization, createEnumeration(affectedMeasures)));
                usePrefix = true;
            report.append("\\\\ \n\n");


        // Appendix
        // Add all figures
        for (int i = 0; i < figures.size(); i++) {
            ref = figures.get(i)[0];
            String caption = figures.get(i)[1];
            String filename = figures.get(i)[2];
            report.append(String.format("\\includegraphics[width=1\\linewidth]{%s}\n", filename));
            report.append(String.format("\\caption{%s}\n", caption));
            report.append(String.format("\\label{%s}\n", ref));

        // Close document
        return report.toString();


    public String createPlainReport() {
        // Set locale to English globally to make reports independent of the
        // machine thei're created on, e.g. use "." as decimal points on any
        // machine

        StringBuilder report = new StringBuilder();

        // Evaluation Overview
        report.append("Evaluation Overview\n");

        int nModels = evalResults.getSampleData().getModelMetadata().size();
        ArrayList<String> measures = evalResults.getMeasures();
        String ref = "tbl:models";

        // Separate training/testing datasets
        List<String> trainingDataList = new ArrayList<String>();
        List<String> testingDataList = new ArrayList<String>();
        List<Pair<String, String>> datasets = evalResults.getSampleData().getDatasetNames();
        Iterator<Pair<String, String>> itp = datasets.iterator();
        while (itp.hasNext()) {
            Pair<String, String> trainTest =;
            if (trainTest.getValue() != null) {
        Set<String> trainingDataSet = new HashSet<String>(trainingDataList);
        Set<String> testingDataSet = new HashSet<String>(testingDataList);

        String pipelineDescription = null;
        String sampleOrigin = "per CV";

        ReportTypes pipelineType = this.evalResults.getSampleData().getPipelineType();
        switch (pipelineType) {
        // One-domain n-fold CV (ReportData=per Fold)
        case CV:
            pipelineDescription = String.format("%d-fold cross validation",
            sampleOrigin = "per fold ";
        case MULTIPLE_CV:
            pipelineDescription = String.format("%dx%s repeated cross validation",
                    evalResults.getSampleData().getnRepetitions(), evalResults.getSampleData().getnFolds());
        case CV_DATASET_LVL:
            pipelineDescription = String.format("%d-fold cross validation over %d datasets",
                    evalResults.getSampleData().getnFolds(), trainingDataSet.size());
            pipelineDescription = String.format("%dx%s repeated cross validation over %d datasets",
                    evalResults.getSampleData().getnRepetitions(), evalResults.getSampleData().getnFolds(),
            sampleOrigin = "per dataset";
            // In the train/test scenario, the number of datasets only includes
            // distinct ones
            Set<String> allDataSets = new HashSet<String>(testingDataSet);
            pipelineDescription = String.format("Train/Test over %d datasets", allDataSets.size());
            sampleOrigin = "per dataset";
            pipelineDescription = "!unknown pipeline type!";
            sampleOrigin = "!unknown pipeline type!";

        boolean isBaselineEvaluation = evalResults.isBaselineEvaluation();
        report.append(String.format("The system performed a %s for the following %d models. \n",
                pipelineDescription, nModels));
        if (isBaselineEvaluation) {
            report.append(String.format("The models were compared against the first baseline model. \n",
                    pipelineDescription, nModels));
        } else {
                    String.format("The models were compared against each other. \n", pipelineDescription, nModels));

        ArrayList<Pair<String, String>> modelMetadata = evalResults.getSampleData().getModelMetadata();
        for (int modelIndex = 0; modelIndex < modelMetadata.size(); modelIndex++) {
            String[] algorithm = modelMetadata.get(modelIndex).getKey().split("\\.");
            String modelAlgorithm = algorithm[algorithm.length - 1];
            String modelFeatureSet = modelMetadata.get(modelIndex).getValue();
            report.append(String.format("M%d: %s; %s\n", modelIndex, modelAlgorithm, modelFeatureSet));

        // List test/training datasets. Consider the case when these sets are
        // different.
        if (testingDataSet.isEmpty()) {
            if (trainingDataSet.size() == 1) {
                        String.format("\nThe models were evaluated on the dataset %s. ", trainingDataList.get(0)));
            } else {
                report.append(String.format("\nThe models were evaluated on the datasets %s. ",
        } else {
            if (trainingDataSet.size() == 1 && testingDataSet.size() == 1) {
                        String.format("\nThe models were trained on the dataset %s and tested on the dataset %s. ",
                                trainingDataList.get(0), testingDataList.get(0)));
            } else if (trainingDataSet.size() > 1 && testingDataSet.size() == 1) {
                        "\nThe models were trained on the datasets %s and tested on the dataset %s. ",
                        this.createEnumeration(new ArrayList<String>(trainingDataSet)), testingDataList.get(0)));
            } else if (trainingDataSet.size() == 1 && testingDataSet.size() > 1) {
                        "\nThe models were trained on the dataset %s and tested on the datasets %s. ",
                        trainingDataList.get(0), this.createEnumeration(new ArrayList<String>(testingDataSet))));
            } else {
                        "\nThe models were trained on the datasets %s and tested on the datasets %s. ",
                        this.createEnumeration(new ArrayList<String>(trainingDataSet)),
                        this.createEnumeration(new ArrayList<String>(testingDataSet))));
        report.append(String.format("Their performance was assessed with the %s", createEnumeration(measures)));

        // Results (for each measure separately)
        report.append("\n\n###\n"); // All previous floats must be placed before
        // this point

        for (int i = 0; i < measures.size(); i++) {

            // Continue for McNemar contingency matrix
            String measure = measures.get(i);
            if (!evalResults.getSampleData().getSamples().containsKey(measure)) {

            // Samples
            report.append(String.format("Evaluation for %s. \n", measure));

            report.append("Samples: \n");

            ArrayList<ArrayList<Double>> models = evalResults.getSampleData().getSamples().get(measure);
            for (int modelId = 0; i < models.size(); i++) {
                ArrayList<Double> samples = models.get(modelId);
                report.append(String.format("C%d: ", modelId));
                for (int j = 0; j < samples.size(); j++) {
                    report.append(String.format("%.3f;", samples.get(j)));

            // Test results
            for (String testType : new String[] { "Parametric", "Non-Parametric" }) {
                report.append(String.format("%s Testing\n", testType));

                Pair<String, AbstractTestResult> result = null;
                if (testType.equals("Parametric")) {
                    result = evalResults.getParametricTestResults().get(measure);
                } else {
                    result = evalResults.getNonParametricTestResults().get(measure);

                // Use pretty-print method descriptor if specified
                String method = result.getKey();
                if (StatsConfigConstants.PRETTY_PRINT_METHODS.containsKey(method)) {
                    method = StatsConfigConstants.PRETTY_PRINT_METHODS.get(method);

                TestResult r = (TestResult) result.getValue();
                report.append(String.format("The system compared the %d models using the %s. ", nModels, method));

                if (r != null && !Double.isNaN(r.getpValue())) {

                    // A priori test: assumptions
                    boolean assumptionViolated = false;
                    Iterator<String> it = r.getAssumptions().keySet().iterator();
                    while (it.hasNext()) {
                        String assumption =;
                        TestResult at = (TestResult) r.getAssumptions().get(assumption);
                        if (at == null) {
                            report.append(String.format("Testing for %s failed. ", assumption));
                            assumptionViolated = true;
                        if (Double.isNaN(at.getpValue())) {
                                    String.format("Testing for %s using %s failed. ", assumption, at.getMethod()));
                            assumptionViolated = true;
                        double ap = at.getpValue();

                        if (ap <= this.significance_low) {
                            assumptionViolated = true;

                        // Verbalize result according to p value
                        Pair<String, Double> verbalizedP = verbalizeP(ap, true);

                        report.append(String.format("%s %s violation of %s (p=%f, alpha=%f). ", at.getMethod(),
                                verbalizedP.getKey(), assumption, ap, verbalizedP.getValue()));


                    if (assumptionViolated) {
                                "Given that the assumptions are violated, the following test may be corrupted. ");

                    // A Priori test results
                    Pair<String, Double> verbalizedP = verbalizeP(r.getpValue(), false);
                            "The %s %s differences between the performances of the models (p=%f, alpha=%f).\n\n",
                            method, verbalizedP.getKey(), r.getpValue(), verbalizedP.getValue()));

                    // Post-hoc test for >2 models (pairwise comparisons)
                    if (evalResults.getSampleData().getModelMetadata().size() > 2) {

                        Pair<String, AbstractTestResult> postHocResult = null;
                        HashMap<Integer, TreeSet<Integer>> postHocOrdering = null;
                        if (testType.equals("Parametric")) {
                            postHocResult = evalResults.getParametricPostHocTestResults().get(measure);
                            postHocOrdering = evalResults.getParameticPostHocOrdering().get(measure);
                        } else {
                            postHocResult = evalResults.getNonParametricPostHocTestResults().get(measure);
                            postHocOrdering = evalResults.getNonParameticPostHocOrdering().get(measure);
                        method = postHocResult.getKey();
                        if (StatsConfigConstants.PRETTY_PRINT_METHODS.containsKey(method)) {
                            method = StatsConfigConstants.PRETTY_PRINT_METHODS.get(method);

                        PairwiseTestResult rPostHoc = (PairwiseTestResult) postHocResult.getValue();
                        report.append(String.format("The system performed the %s post-hoc. ", method));

                        if (rPostHoc == null) {
                            report.append("The test failed. ");

                        // Assumptions
                        boolean assumptionsViolated = false;
                        it = rPostHoc.getAssumptions().keySet().iterator();
                        while (it.hasNext()) {
                            String assumption =;
                            PairwiseTestResult at = (PairwiseTestResult) rPostHoc.getAssumptions().get(assumption);
                            if (at == null) {
                                report.append(String.format("Testing for %s failed. ", assumption));
                                assumptionsViolated = true;

                            report.append(String.format("\nTesting for %s using %s returned p-values:\n%s",
                                    assumption, at.getMethod(), this.pairwiseResultsToString(at.getpValue())));

                            // Create table with pairwise p-values for
                            // assumption testing
                            double[][] ap = at.getpValue();
                            double max = getMax(ap);
                            double min = getMin(ap);
                            verbalizedP = verbalizeP(min, true);
                            if ((max > significance_low && min <= significance_low)
                                    || (max > significance_medium && min <= significance_medium)
                                    || (max > significance_high && min <= significance_high)) {
                                // partly significant to degree as specified by
                                // verbalized p-value
                                report.append(String.format("%s partly %s violation of %s (alpha=%.2f).\n",
                                        at.getMethod(), verbalizedP.getKey(), assumption, verbalizedP.getValue()));
                            } else {
                                report.append(String.format("%s %s violation of %s (alpha=%.2f).\n", at.getMethod(),
                                        verbalizedP.getKey(), assumption, verbalizedP.getValue()));

                            if (min <= this.significance_low) {
                                assumptionsViolated = true;


                        if (assumptionViolated) {
                                    "Given that the assumptions are violated, the following test may be corrupted. ");

                        // Result
                        double[][] ap = rPostHoc.getpValue();
                                String.format("P-values:\n%s", this.pairwiseResultsToString(rPostHoc.getpValue())));

                        // Already fetch pairwise adjustments here in order to
                        // determine choice of words
                        double max = getMax(ap);
                        double min = getMin(ap);
                        verbalizedP = verbalizeP(min, false);
                        ArrayList<StatsConfigConstants.CORRECTION_VALUES> adjustments = new ArrayList<StatsConfigConstants.CORRECTION_VALUES>(
                        String adjustWord = "";
                        if (adjustments.size() > 0) {
                            adjustWord = " for non-adjusted p-values";
                        if ((max > significance_low && min <= significance_low)
                                || (max > significance_medium && min <= significance_medium)
                                || (max > significance_high && min <= significance_high)) {
                            // partly significant to degree as specified by
                            // verbalized p-value
                                    "The %s partly %s differences between the performances of the models%s ($\\alpha=%.2f$, Tbl. \\ref{%s}). ",
                                    method, verbalizedP.getKey(), adjustWord, verbalizedP.getValue(), ref));
                        } else {
                                    "The %s %s differences between the performances of the models%s ($\\alpha=%.2f$, Tbl. \\ref{%s}). ",
                                    method, verbalizedP.getKey(), adjustWord, verbalizedP.getValue(), ref));

                        // Determine ordering of models
                        String ordering = getModelOrderingRepresentation(postHocOrdering);

                        // Pairwise adjustments
                        if (adjustments.size() > 0) {
                            double[] minAdjustments = new double[adjustments.size()];
                            double[] maxAdjustments = new double[adjustments.size()];
                            for (int j = 0; j < adjustments.size(); j++) {
                                StatsConfigConstants.CORRECTION_VALUES adjustmentMethod = adjustments.get(j);
                                double[][] correctedP = rPostHoc.getpValueCorrections().get(adjustmentMethod);
                                String am =;
                                if (StatsConfigConstants.PRETTY_PRINT_METHODS.containsKey(am)) {
                                    am = StatsConfigConstants.PRETTY_PRINT_METHODS.get(am);
                                report.append(String.format("\nAdjusted p-values according to %s:\n%s", am,

                                minAdjustments[j] = getMin(correctedP);
                                maxAdjustments[j] = getMax(correctedP);

                            min = getMin(minAdjustments);
                            max = getMax(maxAdjustments);
                            verbalizedP = verbalizeP(min, false);

                            if ((max > significance_low && min <= significance_low)
                                    || (max > significance_medium && min <= significance_medium)
                                    || (max > significance_high && min <= significance_high)) {
                                // partly significant to degree as specified by
                                // verbalized p-value
                                        "It partly %s differences for adjusted p-values (alpha=%.2f$).\n\n ",
                                        verbalizedP.getKey(), verbalizedP.getValue(), ref));
                            } else {
                                        String.format("It %s differences for adjusted p-values (alpha=%.2f$).\n\n ",
                                                verbalizedP.getKey(), verbalizedP.getValue(), ref));
                } else {
                    report.append(String.format("The %s failed.", method));

        // Contingency table and McNemar results if this test was performed
        if (evalResults.getNonParametricTest().equals("McNemar")) {
            String measure = "Contingency Table";
            String testType = "Non-Parametric";
            report.append("Evaluation for Contingency Table\n");

            int[][] contingencyMatrix = evalResults.getSampleData().getContingencyMatrix();
            if (evalResults.getSampleData().getPipelineType() == ReportTypes.MULTIPLE_CV) {
                        "Contingency table drawn from the %s and the %d models. The correctly and incorrectly classified instances per fold were averaged over all repetitions:\n%s\n",
                        pipelineDescription, nModels, this.contingencyMatrixToString(contingencyMatrix)));
            } else {
                report.append(String.format("Contingency table drawn from the %s and the %d models:\n%s\n",
                        pipelineDescription, nModels, this.contingencyMatrixToString(contingencyMatrix)));

            // Test results
            report.append(String.format("%s Testing\n", testType));
            report.append(String.format("The system compared the %d models using the McNemar test. ", nModels));
            Pair<String, AbstractTestResult> result = evalResults.getNonParametricTestResults().get(measure);

            TestResult r = (TestResult) result.getValue();
            if (r != null && !Double.isNaN(r.getpValue())) {
                StringBuilder parameters = new StringBuilder();
                Iterator<String> it = r.getParameter().keySet().iterator();
                while (it.hasNext()) {
                    String parameter =;
                    double value = r.getParameter().get(parameter);
                    parameters.append(String.format("%s=%.3f, ", parameter, value));

                // Verbalize result according to p value
                Pair<String, Double> verbalizedP = verbalizeP(r.getpValue(), false);
                        "The test %s differences between the performances of the models (%sp=%.3f, alpha=%.2f).\\\\ \n",
                        verbalizedP.getKey(), parameters.toString(), r.getpValue(), verbalizedP.getValue()));

            } else {
                report.append("The test failed.\n");

        return report.toString();

     * Create a written enumeration of several items by adding dashes/"and"
     * inbetween
     * @param items
     *            the items to be enumerated as a list of Strings
     * @return a String representing the enumeration
    private String createEnumeration(List<String> items) {
        if (items.size() < 1) {
            return "";
        } else if (items.size() == 1) {
            return items.get(0);
        } else {
            StringBuilder enumeration = new StringBuilder();
            String separator = "";
            for (int i = 0; i < items.size() - 1; i++) {
                String item = items.get(i);
                item = escapeLatexCharacters(item);
                enumeration.append(String.format("%s%s", separator, item));
                separator = ", ";
            enumeration.append(String.format(" and %s", items.get(items.size() - 1)));
            return enumeration.toString();

     * Creates an array of p-values with assigned signficance indicators and
     * headers
     * @param ap
     *            a double-array of p-values
     * @return A Pair with the table header and table values as String-arrays
    private Pair<String[], String[][]> getPValueStringArray(double[][] ap, boolean isBaselineEvaluation) {

        // Create table with pairwise p-values for assumption testing

        int nCols = ap.length;
        int nRows = ap.length;
        if (isBaselineEvaluation) {
            nCols = 1;

        // Create header first
        String[] header = new String[nCols + 1];
        header[0] = "";
        for (int j = 0; j < nCols; j++) {
            header[j + 1] = String.format("M%d", j);

        // Create value array
        String[][] values = new String[nRows][];
        for (int j = 0; j < nRows; j++) {
            values[j] = new String[nCols + 1];
            values[j][0] = String.format("M%d", j + 1);
            for (int k = 0; k < nCols; k++) {
                if (Double.isNaN(ap[j][k])) {
                    values[j][k + 1] = "-";
                } else {
                    String significance = "";
                    if (ap[j][k] <= significance_high) {
                        significance = "{\\footnotesize ***}";
                    } else if (ap[j][k] <= significance_medium) {
                        significance = "{\\footnotesize **}";
                    } else if (ap[j][k] <= significance_low) {
                        significance = "{\\footnotesize *}";
                    values[j][k + 1] = String.format("%.3f%s", ap[j][k], significance);

        return Pair.of(header, values);

    private String contingencyMatrixToString(int[][] array) {

        StringBuilder str = new StringBuilder();
        if (array == null) {
            return "";
        if (array.length != 2 || array[0].length != 2) {
            return "";

        // Header
        str.append(String.format("            M1 Wrong  M1 Correct\n"));

        // Values
        for (int i = 0; i < array.length; i++) {
            int[] subarray = array[i];
            if (i == 0) {
                str.append("M0 Wrong  ");
            } else {
                str.append("M0 Correct");
            for (int p : subarray) {
                str.append(String.format("%10d ", p));
        return str.toString();

    private String pairwiseResultsToString(double[][] array) {

        StringBuilder str = new StringBuilder();
        if (array == null) {
            return "";

        // Header
        str.append(String.format("    "));
        for (int i = 0; i < array.length; i++) {
            str.append(String.format("M%d%4s", i, " "));

        // Values
        for (int i = 0; i < array.length; i++) {
            double[] subarray = array[i];
            str.append(String.format("M%d  ", i + 1));
            for (double p : subarray) {
                if (Double.isNaN(p)) {
                    str.append(String.format("%f%3s", p, " "));
                    // str.append(String.format("%f  ",p));
                } else {
                    str.append(String.format("%.3f ", p));
        return str.toString();

     * Get the largest double value in a two-dimensional double array
     * @param twodArray
     *            a two-dimensional double array
     * @return The largest double value in the array
    private double getMax(double[][] twodArray) {
        return getMax(flattenCubicArray(twodArray));

     * Get the smalles double value in a two-dimensional double array
     * @param twodArray
     *            a two-dimensional double array
     * @return The smalles double value in the array
    private double getMin(double[][] twodArray) {
        return getMin(flattenCubicArray(twodArray));

     * Get the largest double value in a double array
     * @param array
     *            a double array
     * @return The largest double value in the array
    private double getMax(double[] array) {
        double maxValue = 0;
        for (int i = 0; i < array.length; i++) {
            if (array[i] > maxValue) {
                maxValue = array[i];
        return maxValue;

     * Get the smalles double value in a double array
     * @param array
     *            a double array
     * @return The smalles double value in the array
    private double getMin(double[] array) {
        double minValue = Double.POSITIVE_INFINITY;
        for (int i = 0; i < array.length; i++) {
            if (array[i] < minValue) {
                minValue = array[i];
        return minValue;

     * Flattens a cubic array (dimensions equal) to a one-dimensional array
     * @param array
     *            the two-dimensional, cubic array to be flattened
     * @return the flattened, one-dimensional array
    private double[] flattenCubicArray(double[][] array) {
        double[] flattened = new double[array.length * array.length];
        int i = 0;
        for (double[] subarray : array) {
            for (double d : subarray) {
                flattened[i++] = d;
        return flattened;

     * Verbalizes a p-value according to different significance levels (defined
     * globally) and use-cases
     * @param p
     *            the p-value to be verbalized
     * @param isAssumption
     *            boolean to signfiy whether the p-values belongs to a test
     *            assumption or the test itself. The verbalizations will be
     *            different.
     * @return a verbalization of the p-value according to the different
     *         significance levels
    private Pair<String, Double> verbalizeP(double p, boolean isAssumption) {
        Pair<String, Double> test = null;
        Pair<String, Double> assumption = null;
        if (p > significance_low) {
            if (isAssumption) {
                assumption = Pair.of("did not show a", significance_low);
            } else {
                test = Pair.of("did not show significant", significance_low);
        } else if (p <= significance_low && p > significance_medium) {
            if (isAssumption) {
                assumption = Pair.of("indicated a weak", significance_low);
            } else {
                test = Pair.of("showed weak significant", significance_low);
        } else if (p <= significance_medium && p > significance_high) {
            if (isAssumption) {
                assumption = Pair.of("indicated a medium", significance_medium);
            } else {
                test = Pair.of("showed medium significant", significance_medium);
        } else {
            if (isAssumption) {
                assumption = Pair.of("indicated a strong", significance_high);
            } else {
                test = Pair.of("showed strong significant", significance_high);

        if (isAssumption) {
            return assumption;
        return test;


    private String fixSlashes(String filename) {
        if (File.separator.equals("\\")) {
            return filename.replace("\\", "\\\\");
        } else {
            return filename.replace("/", "//");
