org.languagetool.rules.spelling.suggestions.SuggestionsChanges.java Source code

Java tutorial

Introduction

Here is the source code for org.languagetool.rules.spelling.suggestions.SuggestionsChanges.java

Source

/*
 *  LanguageTool, a natural language style checker
 *  * Copyright (C) 2018 Fabian Richter
 *  *
 *  * This library is free software; you can redistribute it and/or
 *  * modify it under the terms of the GNU Lesser General Public
 *  * License as published by the Free Software Foundation; either
 *  * version 2.1 of the License, or (at your option) any later version.
 *  *
 *  * This library is distributed in the hope that it will be useful,
 *  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  * Lesser General Public License for more details.
 *  *
 *  * You should have received a copy of the GNU Lesser General Public
 *  * License along with this library; if not, write to the Free Software
 *  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 *  * USA
 *
 */

package org.languagetool.rules.spelling.suggestions;

import org.apache.commons.lang3.tuple.Pair;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.io.BufferedWriter;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

/**
 * Helper class for SuggestionChangesTest, tracks experiment configuration and results
 * Rules should use getInstance() != null -> getInstance().getCurrentExperiment() in constructors to fetch relevant parameters
 * Use isRunningExperiment if no parameters are needed
 */
public class SuggestionsChanges {
    private static SuggestionsChanges instance;
    private final SuggestionChangesTestConfig config;
    private final List<SuggestionChangesExperiment> experiments;

    private final ConcurrentMap<SuggestionChangesExperiment, Integer> correctSuggestions = new ConcurrentHashMap<>();
    private final ConcurrentMap<SuggestionChangesExperiment, Integer> notFoundSuggestions = new ConcurrentHashMap<>();
    private final ConcurrentMap<SuggestionChangesExperiment, Integer> suggestionPosSum = new ConcurrentHashMap<>();
    private final ConcurrentMap<SuggestionChangesExperiment, Integer> textSize = new ConcurrentHashMap<>();
    private final ConcurrentMap<SuggestionChangesExperiment, Long> computationTime = new ConcurrentHashMap<>();
    private final ConcurrentMap<SuggestionChangesExperiment, Integer> numSamples = new ConcurrentHashMap<>();

    private final ConcurrentMap<Pair<SuggestionChangesExperiment, SuggestionChangesDataset>, Integer> datasetCorrectSuggestions = new ConcurrentHashMap<>();
    private final ConcurrentMap<Pair<SuggestionChangesExperiment, SuggestionChangesDataset>, Integer> datasetNotFoundSuggestions = new ConcurrentHashMap<>();
    private final ConcurrentMap<Pair<SuggestionChangesExperiment, SuggestionChangesDataset>, Integer> datasetSuggestionPosSum = new ConcurrentHashMap<>();
    private final ConcurrentMap<Pair<SuggestionChangesExperiment, SuggestionChangesDataset>, Integer> datasetNumSamples = new ConcurrentHashMap<>();
    private final ConcurrentMap<Pair<SuggestionChangesExperiment, SuggestionChangesDataset>, Integer> datasetTextSize = new ConcurrentHashMap<>();
    private final ConcurrentMap<Pair<SuggestionChangesExperiment, SuggestionChangesDataset>, Long> datasetComputationTime = new ConcurrentHashMap<>();

    private SuggestionChangesExperiment currentExperiment = null;

    private SuggestionsChanges(SuggestionChangesTestConfig config, BufferedWriter reportWriter) {
        this.config = config;
        experiments = generateExperiments(config.experiments);

        Runtime.getRuntime().addShutdownHook(new Thread(new Report(reportWriter)));
    }

    /**
     * null if nothing is configured, i.e. in most normal use cases
     */
    @Nullable
    public static SuggestionsChanges getInstance() {
        return instance;
    }

    static void init(@NotNull SuggestionChangesTestConfig config, @Nullable BufferedWriter reportWriter) {
        instance = new SuggestionsChanges(config, reportWriter);
    }

    private List<Map<String, Object>> gridsearch(SortedMap<String, List<Object>> grid,
            List<Map<String, Object>> current) {
        if (grid.isEmpty()) { // recursion exit
            return current;
        }

        String name = grid.lastKey();
        List<Object> params = grid.get(name);
        List<Map<String, Object>> result = new LinkedList<>();

        if (current.isEmpty()) {
            for (Object value : params) {
                result.add(Collections.singletonMap(name, value));
            }
        } else {
            for (Map<String, Object> entry : current) {
                for (Object value : params) {
                    Map<String, Object> modified = new HashMap<>(entry);
                    modified.put(name, value);
                    result.add(modified);
                }
            }
        }

        return gridsearch(grid.headMap(name), result);
    }

    private List<SuggestionChangesExperiment> generateExperiments(
            List<SuggestionChangesExperimentRuns> experimentSpecs) {
        List<SuggestionChangesExperiment> experiments = new LinkedList<>();
        for (SuggestionChangesExperimentRuns spec : experimentSpecs) {

            if (spec.parameters == null) {
                experiments.add(new SuggestionChangesExperiment(spec.name, Collections.emptyMap()));
            } else {
                SortedMap<String, List<Object>> params = new TreeMap<>(spec.parameters);
                List<Map<String, Object>> combinations = gridsearch(params, Collections.emptyList());

                for (Map<String, Object> settings : combinations) {
                    experiments.add(new SuggestionChangesExperiment(spec.name, settings));
                }
            }
        }
        return experiments;
    }

    public SuggestionChangesTestConfig getConfig() {
        return config;
    }

    @Nullable
    public SuggestionChangesExperiment getCurrentExperiment() {
        return currentExperiment;
    }

    /**
     * For testing changes to suggestion ordering using a data corpus;
     * iterate over experiments (including grid search for parameters)
     * original behavior (for A/B testing) can be modeled by an experiment without parameters and an arbitrary name
     */
    public void setCurrentExperiment(@Nullable SuggestionChangesExperiment experiment) {
        currentExperiment = experiment;
    }

    public static boolean isRunningExperiment(String name) {
        if (getInstance() == null) {
            return false;
        }
        SuggestionChangesExperiment experiment = getInstance().getCurrentExperiment();
        return experiment != null && experiment.name.equals(name);
    }

    public void trackExperimentResult(Pair<SuggestionChangesExperiment, SuggestionChangesDataset> source,
            int position, int resultTextSize, long resultComputationTime) {
        numSamples.compute(source.getKey(), (ex, value) -> value == null ? 1 : value + 1);
        datasetNumSamples.compute(source, (ex, value) -> value == null ? 1 : value + 1);

        textSize.compute(source.getKey(), (ex, value) -> value == null ? resultTextSize : value + resultTextSize);
        datasetTextSize.compute(source, (ex, value) -> value == null ? resultTextSize : value + resultTextSize);

        computationTime.compute(source.getKey(),
                (ex, value) -> value == null ? resultComputationTime : value + resultComputationTime);
        datasetComputationTime.compute(source,
                (ex, value) -> value == null ? resultComputationTime : value + resultComputationTime);

        if (position == 0) {
            correctSuggestions.compute(source.getKey(), (ex, value) -> value == null ? 1 : value + 1);
            datasetCorrectSuggestions.compute(source, (ex, value) -> value == null ? 1 : value + 1);
        }
        if (position == -1) {
            notFoundSuggestions.compute(source.getKey(), (ex, value) -> value == null ? 1 : value + 1);
            datasetNotFoundSuggestions.compute(source, (ex, value) -> value == null ? 1 : value + 1);
        } else {
            suggestionPosSum.compute(source.getKey(), (ex, value) -> value == null ? position : value + position);
            datasetSuggestionPosSum.compute(source, (ex, value) -> value == null ? position : value + position);
        }
    }

    public List<SuggestionChangesExperiment> getExperiments() {
        return experiments;
    }

    private class Report implements Runnable {

        private final BufferedWriter reportWriter;

        Report(BufferedWriter reportWriter) {
            this.reportWriter = reportWriter;
        }

        @Override
        public void run() {
            if (reportWriter == null) {
                return;
            }
            try {
                StringBuilder report = new StringBuilder();
                report.append("Overall report:\n\n");

                SuggestionChangesExperiment best = null;
                int bestId = -1;
                double bestAccuracy = 0.0;

                int experimentId = 0;
                for (SuggestionChangesExperiment experiment : experiments) {
                    experimentId++;
                    int correct = correctSuggestions.getOrDefault(experiment, 0);
                    int score = suggestionPosSum.getOrDefault(experiment, 0);
                    int notFound = notFoundSuggestions.getOrDefault(experiment, 0);
                    int total = numSamples.getOrDefault(experiment, 0);
                    double accuracy = (double) correct / total * 100.0;
                    double speed = (double) textSize.getOrDefault(experiment, 0)
                            / computationTime.getOrDefault(experiment, 0L) * 1000.0;
                    if (accuracy > bestAccuracy) {
                        best = experiment;
                        bestAccuracy = accuracy;
                        bestId = experimentId;
                    }
                    report.append(String.format(
                            "Experiment #%d (%s): %d / %d correct suggestions -> %f%% accuracy;"
                                    + " score (less = better): %d; not found: %d; processed %f chars/second.%n",
                            experimentId, experiment, correct, total, accuracy, score, notFound, speed));
                }

                report.append(
                        String.format("%nBest experiment: #%d (%s) @ %f%% accuracy%n", bestId, best, bestAccuracy));

                for (SuggestionChangesDataset dataset : config.datasets) {
                    report.append(String.format("%nReport for dataset: %s%n", dataset.name));
                    best = null;
                    bestAccuracy = 0f;
                    bestId = -1;

                    experimentId = 0;
                    for (SuggestionChangesExperiment experiment : experiments) {
                        experimentId++;
                        Pair<SuggestionChangesExperiment, SuggestionChangesDataset> source = Pair.of(experiment,
                                dataset);
                        int correct = datasetCorrectSuggestions.getOrDefault(source, 0);
                        int score = datasetSuggestionPosSum.getOrDefault(source, 0);
                        int notFound = datasetNotFoundSuggestions.getOrDefault(source, 0);
                        int total = datasetNumSamples.getOrDefault(source, 0);
                        double accuracy = (double) correct / total * 100.0;
                        double speed = (double) datasetTextSize.getOrDefault(source, 0)
                                / datasetComputationTime.getOrDefault(source, 0L) * 1000.0;
                        if (accuracy > bestAccuracy) {
                            best = experiment;
                            bestAccuracy = accuracy;
                            bestId = experimentId;
                        }
                        report.append(String.format(
                                "Experiment #%d (%s): %d / %d correct suggestions-> %f%% accuracy;"
                                        + " score (less = better): %d; not found: %d; processed %f chars/second.%n",
                                experimentId, experiment, correct, total, accuracy, score, notFound, speed));
                    }
                    report.append(String.format("%nBest experiment: #%d (%s) @ %f%% accuracy%n", bestId, best,
                            bestAccuracy));
                }
                System.out.println(report);
                reportWriter.write(report.toString());
                reportWriter.close();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

}