Java tutorial
/* LanguageTool, a natural language style checker * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ package org.languagetool.rules.de; import java.util.*; import org.apache.commons.lang3.StringUtils; import org.languagetool.AnalyzedSentence; import org.languagetool.AnalyzedTokenReadings; import org.languagetool.rules.*; /** * Prft, dass in Bindestrich-Komposita kein Leerzeichen eingefgt wird (wie z.B. in 'Diten- Erhhung'). * * @author Daniel Naber */ public class DashRule extends Rule { public DashRule(ResourceBundle messages) { super.setCategory(Categories.COMPOUNDING.getCategory(messages)); addExamplePair(Example.wrong("Bundestag beschliet <marker>Diten- Erhhung</marker>"), Example.fixed("Bundestag beschliet <marker>Diten-Erhhung</marker>")); } @Override public String getId() { return "DE_DASH"; } @Override public String getDescription() { return "Keine Leerzeichen in Bindestrich-Komposita (wie z.B. in 'Diten- Erhhung')"; } @Override public RuleMatch[] match(AnalyzedSentence sentence) { List<RuleMatch> ruleMatches = new ArrayList<>(); AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace(); String prevToken = null; for (int i = 0; i < tokens.length; i++) { String token = tokens[i].getToken(); if (prevToken != null && prevToken.endsWith("-") && !prevToken.equals("-") && !prevToken.contains("--") && !prevToken.contains("-") // first char is some special kind of dash, found in Wikipedia ) { char firstChar = token.charAt(0); if (Character.isUpperCase(firstChar)) { if (!StringUtils.equalsAny(token, "UND", "ODER", "BZW")) { // ignore "NORD- UND SDKOREA" String msg = "Mglicherweise fehlt ein 'und' oder ein Komma, oder es wurde nach dem Wort " + "ein berflssiges Leerzeichen eingefgt. Eventuell haben Sie auch versehentlich einen Bindestrich statt eines Punktes eingefgt."; String shortMsg = "Fehlendes 'und' oder Komma oder berflssiges Leerzeichen?"; int fromPos = tokens[i - 1].getStartPos(); RuleMatch ruleMatch = new RuleMatch(this, sentence, fromPos, fromPos + prevToken.length() + 1, msg, shortMsg); String prevTokenStr = tokens[i - 1].getToken(); ruleMatch.setSuggestedReplacements(Arrays.asList(prevTokenStr, prevTokenStr + ", ")); ruleMatches.add(ruleMatch); } } } prevToken = token; } return toRuleMatchArray(ruleMatches); } }