org.languagetool.dev.wikipedia.SuggestionReplacerTest.java Source code

Java tutorial

Introduction

Here is the source code for org.languagetool.dev.wikipedia.SuggestionReplacerTest.java

Source

/* LanguageTool, a natural language style checker
 * Copyright (C) 2013 Daniel Naber (http://www.danielnaber.de)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.dev.wikipedia;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.language.English;
import org.languagetool.language.GermanyGerman;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.de.GermanSpellerRule;
import org.languagetool.rules.patterns.AbstractPatternRule;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.List;

import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertTrue;

public class SuggestionReplacerTest {

    private final SwebleWikipediaTextFilter filter = new SwebleWikipediaTextFilter();
    private final GermanyGerman germanyGerman = new GermanyGerman();
    private final JLanguageTool langTool = getLanguageTool();
    private final JLanguageTool englishLangTool = getLanguageTool(new English());

    @Test
    public void testApplySuggestionToOriginalText() throws Exception {
        SwebleWikipediaTextFilter filter = new SwebleWikipediaTextFilter();
        applySuggestion(langTool, filter, "Die CD ROM.", "Die <s>CD-ROM.</s>");
        applySuggestion(langTool, filter, "Die [[verlinkte]] CD ROM.", "Die [[verlinkte]] <s>CD-ROM.</s>");
        applySuggestion(langTool, filter, "Die [[Link|verlinkte]] CD ROM.",
                "Die [[Link|verlinkte]] <s>CD-ROM.</s>");
        applySuggestion(langTool, filter, "Die [[CD ROM]].", "Die <s>[[CD-ROM]].</s>");
        applySuggestion(langTool, filter, "Der [[Abschied]].\n\n==berschrift==\n\nEin Ab schied.",
                "Der [[Abschied]].\n\n==berschrift==\n\nEin <s>Abschied.</s>");
        applySuggestion(langTool, filter, "Ein konomischer Gottesdienst.",
                "Ein <s>kumenischer</s> Gottesdienst.");
        applySuggestion(langTool, filter, "Ein konomischer Gottesdienst mit konomischer Planung.",
                "Ein <s>kumenischer</s> Gottesdienst mit konomischer Planung.");
        applySuggestion(langTool, filter, "\nEin konomischer Gottesdienst.\n",
                "\nEin <s>kumenischer</s> Gottesdienst.\n");
        applySuggestion(langTool, filter, "\n\nEin konomischer Gottesdienst.\n",
                "\n\nEin <s>kumenischer</s> Gottesdienst.\n");
    }

    @Test
    public void testNestedTemplates() throws Exception {
        String markup = "{{FNBox|\n" + "  {{FNZ|1|1979 und 1984}}\n" + "  {{FNZ|2|[[Rundungsfehler]]}}\n"
                + "}}\n\nEin konomischer Gottesdienst.\n";
        applySuggestion(langTool, filter, markup, markup.replace("konomischer", "<s>kumenischer</s>"));
    }

    @Test
    public void testReference1() throws Exception {
        String markup = "Hier <ref name=isfdb>\n" + "Retrieved 2012-07-31.</ref> steht,, das Haus.";
        applySuggestion(langTool, filter, markup, markup.replace("steht,, das Haus.", "<s>steht,</s> das Haus."));
    }

    @Test
    public void testReference2() throws Exception {
        String markup = "Hier <ref name=\"NPOVxxx\" /> steht,, das Haus.";
        applySuggestion(langTool, filter, markup, markup.replace("steht,, das Haus.", "<s>steht, das</s> Haus."));
    }

    @Test
    public void testErrorAtTextBeginning() throws Exception {
        String markup = "A hour ago\n";
        applySuggestion(englishLangTool, filter, markup, markup.replace("A", "<s>An</s>"));
    }

    @Test
    public void testErrorAtParagraphBeginning() throws Exception {
        String markup = "X\n\nA hour ago.\n";
        applySuggestion(englishLangTool, filter, markup, markup.replace("A", "<s>An</s>"));
    }

    @Test
    public void testKnownBug() throws Exception {
        String markup = "{{HdBG GKZ|9761000}}.";
        try {
            applySuggestion(langTool, filter, markup, markup);
        } catch (RuntimeException e) {
            // known problem - Sweble's location seems to be wrong?!
        }
    }

    @Test
    public void testComplexText() throws Exception {
        String markup = "{{Dieser Artikel|behandelt die freie Onlineenzyklopdie Wikipedia; zu dem gleichnamigen Asteroiden siehe [[(274301) Wikipedia]].}}\n"
                + "\n" + "{{Infobox Website\n" + "| Name = '''Wikipedia'''\n"
                + "| Logo = [[Datei:Wikipedia-logo-v2-de.svg|180px|Das Wikipedia-Logo]]\n"
                + "| url = [//de.wikipedia.org/ de.wikipedia.org] (deutschsprachige Version)<br />\n"
                + "[//www.wikipedia.org/ www.wikipedia.org] (bersicht aller Sprachen)\n"
                + "| Kommerziell = nein\n"
                + "| Beschreibung = [[Wiki]] einer freien kollektiv erstellten Online-Enzyklopdie\n" + "}}\n"
                + "\n"
                + "'''Wikipedia''' [{{IPA|vkipe?dia}}] (auch: ''die Wikipedia'') ist ein am [[15. Januar|15.&nbsp;Januar]] [[2001]] gegrndetes Projekt. Und und so.\n";
        applySuggestion(langTool, filter, markup, markup.replace("Und und so.", "<s>Und so.</s>"));
    }

    @Test
    public void testCompleteText() throws Exception {
        InputStream stream = SuggestionReplacerTest.class
                .getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia.txt");
        String origMarkup = IOUtils.toString(stream, "utf-8");
        JLanguageTool langTool = new JLanguageTool(new GermanyGerman() {
            @Override
            protected synchronized List<AbstractPatternRule> getPatternRules() {
                return Collections.emptyList();
            }
        });
        langTool.disableRule(GermanSpellerRule.RULE_ID);
        langTool.disableRule("DE_AGREEMENT");
        langTool.disableRule("GERMAN_WORD_REPEAT_BEGINNING_RULE");
        langTool.disableRule("COMMA_PARENTHESIS_WHITESPACE");
        langTool.disableRule("DE_CASE");
        langTool.disableRule("ABKUERZUNG_LEERZEICHEN");
        langTool.disableRule("TYPOGRAFISCHE_ANFUEHRUNGSZEICHEN");
        langTool.disableRule("OLD_SPELLING");
        langTool.disableRule("DE_TOO_LONG_SENTENCE_40");
        langTool.disableRule("PUNCTUATION_PARAGRAPH_END");
        PlainTextMapping mapping = filter.filter(origMarkup);
        List<RuleMatch> matches = langTool.check(mapping.getPlainText());
        assertThat("Expected 3 matches, got: " + matches, matches.size(), is(3));
        int oldPos = 0;
        for (RuleMatch match : matches) {
            SuggestionReplacer replacer = new SuggestionReplacer(mapping, origMarkup,
                    new ErrorMarker("<s>", "</s>"));
            List<RuleMatchApplication> ruleMatchApplications = replacer.applySuggestionsToOriginalText(match);
            assertThat(ruleMatchApplications.size(), is(1));
            RuleMatchApplication ruleMatchApplication = ruleMatchApplications.get(0);
            assertThat(StringUtils.countMatches(ruleMatchApplication.getTextWithCorrection(),
                    "absichtlicher absichtlicher"), is(2));
            int pos = ruleMatchApplication.getTextWithCorrection().indexOf("<s>absichtlicher</s> Fehler");
            if (pos == -1) {
                // markup area varies because our mapping is sometimes a bit off:
                pos = ruleMatchApplication.getTextWithCorrection().indexOf("<s>absichtlicher Fehler</s>");
            }
            assertTrue("Found correction at: " + pos, pos > oldPos);
            oldPos = pos;
        }
    }

    @Test
    public void testCompleteText2() throws Exception {
        InputStream stream = SuggestionReplacerTest.class
                .getResourceAsStream("/org/languagetool/dev/wikipedia/wikipedia2.txt");
        String origMarkup = IOUtils.toString(stream, "utf-8");
        JLanguageTool langTool = new JLanguageTool(germanyGerman);
        PlainTextMapping mapping = filter.filter(origMarkup);
        langTool.disableRule("PUNCTUATION_PARAGRAPH_END"); //  added to prevent crash; TODO: check if needed
        List<RuleMatch> matches = langTool.check(mapping.getPlainText());
        assertTrue("Expected >= 30 matches, got: " + matches, matches.size() >= 30);
        for (RuleMatch match : matches) {
            SuggestionReplacer replacer = new SuggestionReplacer(mapping, origMarkup,
                    new ErrorMarker("<s>", "</s>"));
            List<RuleMatchApplication> ruleMatchApplications = replacer.applySuggestionsToOriginalText(match);
            if (ruleMatchApplications.isEmpty()) {
                continue;
            }
            RuleMatchApplication ruleMatchApplication = ruleMatchApplications.get(0);
            assertThat(StringUtils.countMatches(ruleMatchApplication.getTextWithCorrection(), "<s>"), is(1));
        }
    }

    private JLanguageTool getLanguageTool() {
        JLanguageTool langTool = getLanguageTool(germanyGerman);
        langTool.disableRule("DE_CASE");
        return langTool;
    }

    private JLanguageTool getLanguageTool(Language language) {
        return new JLanguageTool(language);
    }

    private void applySuggestion(JLanguageTool langTool, SwebleWikipediaTextFilter filter, String text,
            String expected) throws IOException {
        PlainTextMapping mapping = filter.filter(text);
        List<RuleMatch> matches = langTool.check(mapping.getPlainText());
        assertThat("Expected 1 match, got: " + matches, matches.size(), is(1));
        SuggestionReplacer replacer = new SuggestionReplacer(mapping, text, new ErrorMarker("<s>", "</s>"));
        List<RuleMatchApplication> ruleMatchApplications = replacer.applySuggestionsToOriginalText(matches.get(0));
        assertThat(ruleMatchApplications.size(), is(1));
        assertThat(ruleMatchApplications.get(0).getTextWithCorrection(), is(expected));
    }

}