Java tutorial
/* LanguageTool, a natural language style checker * Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ package org.languagetool.dev; import org.apache.commons.lang3.StringUtils; import org.languagetool.JLanguageTool; import org.languagetool.Language; import org.languagetool.LanguageMaintainedState; import org.languagetool.Languages; import org.languagetool.databroker.ResourceDataBroker; import org.languagetool.language.Contributor; import org.languagetool.rules.ConfusionSetLoader; import org.languagetool.rules.Rule; import org.languagetool.rules.spelling.hunspell.HunspellNoSuggestionRule; import org.languagetool.tools.StringTools; import org.languagetool.tools.Tools; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.text.SimpleDateFormat; import java.util.*; import static java.util.Comparator.comparing; /** * Command line tool to list supported languages and their number of rules. * * @author Daniel Naber */ @SuppressWarnings("StringConcatenationInsideStringBufferAppend") public final class RuleOverview { private static final List<String> langSpecificWebsites = Arrays.asList("br", "ca", "zh", "da", "nl", "eo", "fr", "gl", "de", "it", "pl", "pt", "ru", "es", "uk"); enum SpellcheckSupport { Full, NoSuggestion, None } public static void main(final String[] args) throws IOException { RuleOverview prg = new RuleOverview(); prg.run(); } private RuleOverview() { // no public constructor } private void run() throws IOException { System.out.println("<p><b>Rules in LanguageTool " + JLanguageTool.VERSION + "</b><br />"); System.out.println("Date: " + new SimpleDateFormat("yyyy-MM-dd").format(new Date()) + "</p>\n"); System.out.println("<table class=\"tablesorter sortable\" style=\"width: auto\">"); System.out.println("<thead>"); System.out.println("<tr>"); System.out.println(" <th valign='bottom' width=\"200\">Language</th>"); System.out.println(" <th valign='bottom' align=\"left\" width=\"60\">XML<br/>rules</th>"); System.out.println(" <th></th>"); System.out.println(" <th align=\"left\" width=\"60\">Java<br/>rules</th>"); System.out.println(" <th align=\"left\" width=\"60\">False<br/>friends</th>"); System.out.println(" <th align=\"left\" width=\"60\">Spell<br/>check*</th>"); System.out.println(" <th align=\"left\" width=\"60\">Confusion<br/>pairs</th>"); //System.out.println(" <th valign='bottom' width=\"65\">Auto-<br/>detected</th>"); System.out.println(" <th valign='bottom' align=\"left\" width=\"90\">Activity</th>"); System.out.println(" <th valign='bottom' align=\"left\">Rule Maintainers</th>"); System.out.println("</tr>"); System.out.println("</thead>"); System.out.println("<tbody>"); final List<Language> sortedLanguages = getSortedLanguages(); //setup false friends counting final String falseFriendFile = JLanguageTool.getDataBroker().getRulesDir() + File.separator + "false-friends.xml"; final String falseFriendRules = StringTools.readStream(Tools.getStream(falseFriendFile), "utf-8") .replaceAll("(?s)<!--.*?-->", "").replaceAll("(?s)<rules.*?>", ""); int overallJavaCount = 0; RuleActivityOverview activity = new RuleActivityOverview(); for (final Language lang : sortedLanguages) { if (lang.isVariant()) { continue; } System.out.print("<tr>"); final String langCode = lang.getShortCode(); final List<String> variants = getVariantNames(sortedLanguages, lang); String variantsText = ""; if (variants.size() > 0) { variantsText = "<br/><span class='langVariants'>Variants for: " + String.join(", ", variants) + "</span>"; } if (langSpecificWebsites.contains(langCode)) { System.out.print("<td valign=\"top\"><a href=\"../" + langCode + "/\">" + lang.getName() + "</a>" + variantsText + "</td>"); } else { System.out.print("<td valign=\"top\">" + lang.getName() + " " + variantsText + "</td>"); } int allRules = countRulesForLanguage(lang); if (allRules == 0) { System.out.println("<td valign=\"top\" align=\"right\">0</td>"); } else { final String ruleBase = "https://github.com/languagetool-org/languagetool/blob/master/languagetool-language-modules/" + langCode + "/src/main/resources/org/languagetool/rules/"; System.out.print("<td valign=\"top\" align=\"right\">" + allRules + "</td>"); System.out.print("<td valign=\"top\" align=\"right\">" + //"<a href=\"" + ruleBase + langCode + "/grammar.xml" + "\">Show</a> / " + "<a href=\"http://community.languagetool.org/rule/list?lang=" + langCode + "\">Browse</a>, " + "<a href=\"" + ruleBase + langCode + "/grammar.xml\">XML</a>" + "</td>"); } // count Java rules: final File dir = new File("../languagetool-language-modules/" + langCode + "/src/main/java" + JLanguageTool.getDataBroker().getRulesDir() + "/" + langCode); if (!dir.exists()) { System.out.print("<td valign=\"top\" align=\"right\">0</td>"); } else { final File[] javaRules = dir.listFiles(new JavaFilter(lang.getName())); final int javaCount = javaRules.length; if (javaCount > 0) { final String sourceCodeLink = "https://github.com/languagetool-org/languagetool/blob/master/languagetool-language-modules/" + langCode + "/src/main/java/org/languagetool/rules/" + langCode + "/"; System.out.print("<td valign=\"top\" align=\"right\"><a href=\"" + sourceCodeLink + "\">" + javaCount + "</a></td>"); } else { System.out.print("<td valign=\"top\" align=\"right\">" + javaCount + "</td>"); } overallJavaCount++; } System.out.print("<td valign=\"top\" align=\"right\">" + countFalseFriendRules(falseFriendRules, lang) + "</td>"); SpellcheckSupport spellcheckSupport = spellcheckSupport(lang, sortedLanguages); String spellSupportStr = ""; if (spellcheckSupport == SpellcheckSupport.Full) { spellSupportStr = ""; } else if (spellcheckSupport == SpellcheckSupport.NoSuggestion) { spellSupportStr = "<span title='spell check without suggestions'>()</span>"; } System.out.print("<td valign=\"top\" align=\"right\">" + spellSupportStr + "</td>"); System.out.print("<td valign=\"top\" align=\"right\">" + countConfusionPairs(lang) + "</td>"); // activity: int commits = activity.getActivityFor(lang, 365 / 2); int width = (int) Math.max(commits * 0.5, 1); String images = ""; if (width > 50) { images += "<img title='" + commits + " commits in the last 6 months' src='../images/bar-end.png' width='22' height='10'/>"; width = 50; } images += "<img title='" + commits + " commits in the last 6 months' src='../images/bar.png' width='" + width + "' height='10'/>"; System.out.print("<td valign=\"top\" align=\"right\"><span style='display:none'>" + commits + "</span>" + images + "</td>"); // maintainer information: String maintainerInfo = getMaintainerInfo(lang); String maintainerText; boolean greyOutMaintainer = false; if (lang.getMaintainedState() != LanguageMaintainedState.ActivelyMaintained) { maintainerText = "<span class='maintainerNeeded'><a href='http://wiki.languagetool.org/tasks-for-language-maintainers'>Looking for maintainer</a></span> - "; greyOutMaintainer = true; } else { maintainerText = ""; } if (greyOutMaintainer) { maintainerInfo = "<span class='previousMaintainer'><br>previous maintainer: " + maintainerInfo + "</span>"; } System.out.print("<td valign=\"top\" align=\"left\">" + maintainerText + maintainerInfo + "</td>"); System.out.println("</tr>"); } if (overallJavaCount == 0) { throw new RuntimeException( "No Java rules found - start this script from the languagetool-standalone directory"); } System.out.println("</tbody>"); System.out.println("</table>"); } private int countRulesForLanguage(Language lang) throws IOException { List<String> ruleFileNames = lang.getRuleFileNames(); int count = 0; for (String ruleFileName : ruleFileNames) { final URL url = this.getClass().getResource(ruleFileName); if (url != null) { String xmlRules = StringTools.readStream(Tools.getStream(ruleFileName), "utf-8"); xmlRules = xmlRules.replaceAll("(?s)<!--.*?-->", ""); xmlRules = xmlRules.replaceAll("(?s)<rules.*?>", ""); count += countXmlRules(xmlRules); count += countXmlRuleGroupRules(xmlRules); } } return count; } private List<String> getVariantNames(List<Language> allLanguages, Language lang) { List<Language> variants = getVariants(allLanguages, lang); List<String> result = new ArrayList<>(); for (Language l : variants) { result.add(l.getName().replaceAll(".*\\((.*?)\\).*", "$1").trim()); } return result; } private List<Language> getVariants(List<Language> allLanguages, Language lang) { List<Language> variants = new ArrayList<>(); for (Language sortedLanguage : allLanguages) { if (sortedLanguage.isVariant() && lang.getShortCode().equals(sortedLanguage.getShortCode())) { variants.add(sortedLanguage); } } return variants; } private List<Language> getSortedLanguages() { final List<Language> sortedLanguages = new ArrayList<>(Languages.get()); sortedLanguages.sort(comparing(Language::getName)); return sortedLanguages; } private int countXmlRules(String xmlRules) { return StringUtils.countMatches(xmlRules, "<rule "); // rules with IDs } private int countXmlRuleGroupRules(String xmlRules) { return StringUtils.countMatches(xmlRules, "<rule>"); // rules in rule groups have no ID } private int countFalseFriendRules(String falseFriendRules, Language lang) { int pos = 0; int count = 0; while (true) { pos = falseFriendRules.indexOf("<pattern lang=\"" + lang.getShortCode(), pos + 1); if (pos == -1) { break; } count++; } return count; } private SpellcheckSupport spellcheckSupport(Language lang, List<Language> allLanguages) throws IOException { if (spellcheckSupport(lang) != SpellcheckSupport.None) { return spellcheckSupport(lang); } List<Language> variants = getVariants(allLanguages, lang); for (Language variant : variants) { if (spellcheckSupport(variant) != SpellcheckSupport.None) { return spellcheckSupport(variant); } } return SpellcheckSupport.None; } private SpellcheckSupport spellcheckSupport(Language lang) throws IOException { List<Rule> rules = new ArrayList<>( lang.getRelevantRules(JLanguageTool.getMessageBundle(), null, null, Collections.emptyList())); rules.addAll(lang.getRelevantLanguageModelCapableRules(JLanguageTool.getMessageBundle(), null, null, null, Collections.emptyList())); for (Rule rule : rules) { if (rule.isDictionaryBasedSpellingRule()) { if (rule instanceof HunspellNoSuggestionRule) { return SpellcheckSupport.NoSuggestion; } else { return SpellcheckSupport.Full; } } } return SpellcheckSupport.None; } private int countConfusionPairs(Language lang) { String path = "/" + lang.getShortCode() + "/confusion_sets.txt"; ResourceDataBroker dataBroker = JLanguageTool.getDataBroker(); if (dataBroker.resourceExists(path)) { try (InputStream confusionSetStream = dataBroker.getFromResourceDirAsStream(path)) { ConfusionSetLoader confusionSetLoader = new ConfusionSetLoader(); return confusionSetLoader.loadConfusionPairs(confusionSetStream).size() / 2; } catch (IOException e) { throw new RuntimeException(e); } } return 0; } private String getMaintainerInfo(Language lang) { final StringBuilder maintainerInfo = new StringBuilder(); if (lang.getMaintainers() != null) { for (Contributor contributor : lang.getMaintainers()) { if (!StringTools.isEmpty(maintainerInfo.toString())) { maintainerInfo.append(", "); } if (contributor.getUrl() != null) { maintainerInfo.append("<a href=\""); maintainerInfo.append(contributor.getUrl()); maintainerInfo.append("\">"); } maintainerInfo.append(contributor.getName()); if (contributor.getUrl() != null) { maintainerInfo.append("</a>"); } } } return maintainerInfo.toString(); } private static class JavaFilter implements FileFilter { private final String langName; JavaFilter(String langName) { this.langName = langName; } @Override public boolean accept(final File f) { final String filename = f.getName(); final boolean isAbstractTopClass = filename.endsWith(langName + "Rule.java"); final boolean isSpellerClass = filename.endsWith("SpellerRule.java"); return filename.endsWith(".java") && !isAbstractTopClass && !isSpellerClass; } } }