lv.semti.Thesaurus.struct.Gram.java Source code

Java tutorial

Introduction

Here is the source code for lv.semti.Thesaurus.struct.Gram.java

Source

/*******************************************************************************
 * Copyright 2013, 2014 Institute of Mathematics and Computer Science, University of Latvia
 * Author: Lauma Pretkalnia
 * 
 *     This program is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * 
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 * 
 *     You should have received a copy of the GNU General Public License
 *     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *******************************************************************************/
package lv.semti.Thesaurus.struct;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import lv.semti.Thesaurus.utils.HasToJSON;
import lv.semti.Thesaurus.utils.MappingSet;
import lv.semti.Thesaurus.utils.Tuple;
import lv.semti.Thesaurus.utils.JSONUtils;

import org.json.simple.JSONObject;
import org.w3c.dom.Node;

/**
 * g (gramatika) field.
 */
public class Gram implements HasToJSON {
    public String orig;
    public HashSet<String> flags;
    public LinkedList<LinkedList<String>> leftovers;
    public HashSet<Integer> paradigm;
    /**
     * If grammar contains additional information about lemmas, it is
     * collected here. Mapping from paradigms to lemma-flagset tuples.
     * Flag set contains only flags for which alternate lemma differs from
     * general flags given in "flags" field in this grammar.
     */
    public MappingSet<Integer, Tuple<Lemma, HashSet<String>>> altLemmas;

    /**
     * Known abbreviations and their de-abbreviations.
     */
    public static MappingSet<String, String> knownAbbr = generateKnownAbbr();

    private static MappingSet<String, String> generateKnownAbbr() {
        MappingSet<String, String> res = new MappingSet<String, String>();

        // TODO Sort out this mess.
        // Source: LLVV, data.

        res.put("adj.", "pabas v?rds");
        res.put("adv.", "Apst?ka v?rds");
        res.put("apst.", "Apst?ka v?rds");
        res.put("divd.", "Divdabis");
        res.put("Divd.", "Divdabis");
        res.put("interj.", "Izsauksmes v?rds");
        res.put("p. v.", "pabas v?rds");
        res.put("p.", "pabas v?rds");
        res.put("izsauk.", "Izsauksmes v?rds");
        res.put("jaut.", "Jaut?jamais vietniekv?rds");
        res.put("lietv.", "Lietv?rds");
        res.put("noliedz.", "Noliedzamais vietniekv?rds");
        res.put("nor?d.", "Nor?d?mais vietniekv?rds");
        res.put("noteic.", "Noteicamais vietniekv?rds");
        res.put("part.", "Partikula");
        res.put("pieder.", "Piederbas vietniekv?rds");
        res.put("pried.", "Priedklis"); // Specific processing needed.
        res.put("priev.", "Priev?rds");
        res.put("skait.", "Skaita v?rds");
        res.put("vietn.", "Vietniekv?rds");
        res.put("vietniekv.", "Vietniekv?rds"); // ?
        res.put("visp?rin.", "Visp?rin?mais vietniekv?rds");
        res.put("sas.", "Sasin?jums");
        res.put("simb.", "Sasin?jums"); // ?
        res.put("salikteu pirm? daa.", "Salikteu daa");
        res.put("salikteu pirm? daa", "Salikteu daa");
        res.put("saliktea pirm? daa.", "Salikteu daa");
        res.put("saliktea pirm? daa", "Salikteu daa");
        res.put("salikteu daa.", "Salikteu daa");
        res.put("salikteu daa", "Salikteu daa");

        res.put("priev. ar en.", "Priev?rds");
        res.put("priev. ar en.", "Lieto ar enetvu");
        res.put("ar en.", "Priev?rds"); // It seems that without additional comments this is used for prepositions only
        res.put("ar en.", "Lieto ar enetvu");
        res.put("priev. ar dat.", "Priev?rds");
        res.put("priev. ar dat.", "Lieto ar datvu");

        res.put("persv.", "Personv?rds");
        res.put("vietv.", "Vietv?rds");

        res.put("akuz.", "Akuzatvs");
        res.put("dat.", "Datvs");
        res.put("en.", "enitvs");
        res.put("instr.", "Instrument?lis");
        res.put("lok.", "Lokatvs");
        res.put("nom.", "Nominatvs");

        res.put("divsk.", "Divskaitlis"); // Do we really still have one of these?!
        res.put("dsk.", "Daudzskaitlis");
        res.put("vsk.", "Vienskaitlis");

        res.put("n?k.", "N?kotne");
        res.put("pag.", "Pag?tne");
        res.put("tag.", "Tagadne");

        res.put("nenot.", "Nenoteikt? galotne");
        res.put("not.", "Noteikt? galotne");

        res.put("s.", "Sievieu dzimte");
        res.put("v.", "Vrieu dzimte");
        res.put("kopdz.", "Kopdzimte");

        res.put("intrans.", "Nep?rejos");
        res.put("intr.", "Nep?rejos");
        res.put("trans.", "P?rejos");
        // TODO vai ie vienmr ir darbbas v?rdi?

        res.put("konj.", "Konjug?cija");
        res.put("pers.", "Persona");

        //res.put("atgr.", "Atgriezensisks (vietniekv?rds?)"); //not present
        res.put("dem.", "Deminutvs");
        res.put("Dem.", "Deminutvs");
        res.put("imperf.", "Imperfektva forma"); //???
        res.put("nelok.", "Nelok?ms v?rds");
        res.put("Nol.", "Noliegums"); // Check with other sources!
        res.put("refl.", "Refleksvs");
        res.put("refl.", "Darbbas v?rds");
        res.put("Refl.", "Refleksvs");
        res.put("Refl.", "Darbbas v?rds");

        res.put("aeron.", "Aeronautika"); // ?
        res.put("anat.", "Anatomija");
        res.put("arheol.", "Arheoloija");
        res.put("arhit.", "Arhitektra");
        res.put("arh.", "Arhitektra");
        res.put("astr.", "Astronomija");
        res.put("av.", "Avi?cija");
        res.put("biol.", "Bioloija");
        res.put("bikop.", "Bikopba");
        res.put("bot.", "Bot?nika");
        res.put("bvn.", "Bvniecba");
        res.put("ek.", "Ekonomika");
        res.put("ekol.", "Ekoloija"); // ?
        res.put("ekon.", "Ekonomika");
        res.put("el.", "Elektrotehnika");
        res.put("etn.", "Etnogr?fija");
        res.put("farm.", "Farmakoloija");
        res.put("filoz.", "Filozofija");
        res.put("fin.", "Finanses");
        res.put("fiz.", "Fizika");
        res.put("fiziol.", "Fizioloija");
        res.put("fizk.", "Fizisk? kultra un sports");
        res.put("folkl.", "Folklora");
        res.put("ent.", "entika"); // ?
        res.put("eod.", "eodzija");
        res.put("eogr.", "eogr?fija");
        res.put("eol.", "eoloija");
        res.put("eom.", "eometrija");
        res.put("gr?matv.", "Gr?matvedba");
        res.put("hidr.", "Hidroloija");
        res.put("hidrotehn.", "Hidrotehnika");
        res.put("inf.", "Inform?tika");
        res.put("jur.", "Jurisprudence");
        res.put("jrn.", "Jrniecba");
        res.put("kap.", "Attiecas uz kapit?listisko iek?rtu, kapit?listisko sabiedrbu");
        res.put("kardioloij?", "Kardioloija");
        res.put("kart.", "Kartogr?fija"); // ?
        res.put("kibern.", "Kiberntika");
        res.put("kino", "Kinematogr?fija");
        res.put("kokapstr.", "Kokapstr?de"); // ?
        res.put("kul.", "Kulin?rija");
        res.put("m.", "mija");
        res.put("lauks.", "Lauksaimniecba");
        res.put("lauks. tehn.", "Lauksaimniecbas tehnika"); // ?
        res.put("literat.", "Literatrzin?tne");
        res.put("lo.", "Loika");
        res.put("lopk.", "Lopkopba");
        res.put("mat.", "Matem?tika");
        res.put("matem.", "Matem?tika"); // ?
        res.put("med.", "Medicna");
        res.put("medn.", "Medniecba");
        res.put("met.", "Meteoroloija"); // ?
        res.put("metal.", "Metalurija");
        res.put("met?l.", "Met?lapstr?de"); // ?
        res.put("meteorol.", "Meteoroloija");
        res.put("me.", "Meniecba"); // ?
        res.put("mer.", "Merpniecba");
        res.put("mes.", "Mesaimniecba");
        res.put("mil.", "Milit?r?s zin?tnes");
        res.put("min.", "Mineraloija");
        res.put("mit.", "Mitoloija");
        res.put("mz.", "Mzika");
        res.put("oftalmoloij?", "Oftalmoloija");
        res.put("ornit.", "Ornitoloija");
        res.put("pol.", "Politika");
        res.put("poligr.", "Poligr?fija");
        res.put("psih.", "Psiholoija");
        res.put("rel.", "Reliija");
        res.put("social.", "Socioloija"); // ?
        res.put("sociol.", "Socioloija");
        res.put("tehn.", "Tehnika");
        res.put("tehnol.", "Tehnoloija");
        res.put("telek.", "Telekomunik?cijas"); // ?
        res.put("tekst.", "Tekstilrpniecba");
        res.put("tekstilr.", "Tekstilrpniecba"); // ?
        res.put("TV", "Televzija");
        res.put("val.", "Valodniecba");
        res.put("vet.", "Veterin?rija");
        res.put("zool.", "Zooloija");

        res.put("ar?bu", "Ar?bu");
        res.put("ar?bu", "V?rds svevalod?");
        res.put("ar?bu val.", "Ar?bu");
        res.put("ar?bu val.", "V?rds svevalod?");
        res.put("v?cu val.", "V?cu");
        res.put("v?cu val.", "V?rds svevalod?");
        res.put("fr.", "Fran?u");
        res.put("fr.", "V?rds svevalod?");
        res.put("grieu", "Grieu");
        res.put("grieu", "V?rds svevalod?");
        res.put("gr.", "Grieu");
        res.put("gr.", "V?rds svevalod?");
        res.put("it.", "It?lieu"); //Muz
        res.put("it.", "V?rds svevalod?");
        res.put("lat.", "Latu");
        res.put("lat.", "V?rds svevalod?");
        res.put("liet.", "Lietuvieu");
        res.put("liet.", "V?rds svevalod?");
        res.put("sengr.", "Sengrieu");
        res.put("sengr.", "V?rds svevalod?");

        res.put("dial. (augzemnieku)", "Aguzemnieku"); // Unique.
        res.put("dial. (augzemnieku)", "Dialekts"); // Unique.
        res.put("latg.", "Latgalieu");
        res.put("latg.", "Dialekts");

        res.put("apv.", "Apvidv?rds");
        res.put("vst.", "Vsturisks");
        res.put("novec.", "Novecojis");
        res.put("neakt.", "Neaktu?ls");
        res.put("pot.", "Potiska stilistisk? nokr?sa");
        res.put("niev.", "Nievga ekspresv? nokr?sa");
        res.put("iron.", "Ironiska ekspresv? nokr?sa");
        res.put("hum.", "Humoristiska ekspresv? nokr?sa");
        res.put("vienk.", "Vienk?rrunas stilistisk? nokr?sa");
        res.put("p?rn.", "P?rnest? nozm");
        res.put("nevl.", "Nevlams"); // TODO - nevlamos, neliter?ros un argonus apvienot??
        res.put("nelit.", "Neliter?rs");
        res.put("arg.", "argonv?rds");
        res.put("sar.", "Sarunvaloda");
        res.put("vulg.", "Vulg?risms"); // ?

        //TODO - os drz?k k? atseviu koment?ru lauku(s)
        res.put("ar vsk.", "Ar vienskaitl"); // aunums.
        res.put("parasti vsk.", "Parasti vienskaitl");
        res.put("parasti vsk", "Parasti vienskaitl");
        res.put("par. vsk.", "Parasti vienskaitl");
        res.put("tikai vsk.", "Tikai vienskaitl");
        res.put("parasti dsk.", "Parasti daudzskaitl");
        res.put("tikai dsk.", "Tikai daudzskaitl");
        res.put("parasti 3. pers.", "Parasti 3. person?");
        res.put("parasti saliktajos laikos", "Parasti saliktajos laikos");
        res.put("parasti saliktajos laikos.", "Parasti saliktajos laikos");
        res.put("parasti nenoteiksm", "Parasti nenoteiksm");
        res.put("parasti nenoteiksm", "Darbbas v?rds");
        res.put("parasti pavles form?", "Parasti pavles izteiksm");
        res.put("parasti pavles form?", "Darbbas v?rds");
        res.put("parasti pavles form?.", "Parasti pavles izteiksm");
        res.put("parasti pavles form?.", "Darbbas v?rds");
        res.put("nelok.", "Nelok?ms");
        res.put("subst. noz.", "Lietv?rda nozm");
        res.put("lietv. nozm.", "Lietv?rda nozm");
        res.put("p. nozm.", "pabas v?rda nozm");
        res.put("ar not. gal.", "Ar noteikto galotni");
        res.put("ar lielo s?kumburtu", "Ar lielo s?kumburtu");
        res.put("pareti.", "Pareti");
        res.put("pareti", "Pareti");
        res.put("reti.", "Reti");
        res.put("reti", "Reti");
        res.put("ret?k", "Ret?k");

        res.put("hip.", "Hipotze");

        return res;
    }

    /*/*
     * Patterns for identifying (true) grammatical information.
     */
    //   public static LinkedList<Pattern> knownPatterns = generateKnownPatterns();
    /*   private static LinkedList<Pattern> generateKnownPatterns()
       {
          LinkedList<Pattern> res = new LinkedList<Pattern>();
          res.add(Pattern.compile("^(.*)(vokatvs [^ ,;:]+)(.*)$"));
          res.add(Pattern.compile("^(.*)(biei lok\\.: [^ ,;:]+)(.*)$"));
          res.add(Pattern.compile("^(.*)(parasti lok\\.: [^ ,;:]+)(.*)$"));
          res.add(Pattern.compile("^(.*)(parasti vsk\\. lok\\.: [^ ,;:]+)(.*)$"));
          res.add(Pattern.compile("^(.*)(parasti en\\.: [^ ,;:]+)(.*)$"));
          res.add(Pattern.compile("^(.*)(pamata skait(\\.|a v?rds) lietv(\\.|?rda) nozm\\.?)(.*)$"));
          res.add(Pattern.compile("^(.*)(\\(?parasti folkl\\.(\\)\\.)?)(.*)$"));
          res.add(Pattern.compile("^(.*)(parasti saistt? valod?\\.)(.*)$"));
          res.add(Pattern.compile("^(.*)(apst\\. nozm)(.*)$"));
          res.add(Pattern.compile("^(.*)(\\(v?cu \"krava\"\\))(.*)$"));
          return res;
       }//*/

    public Gram() {
        orig = null;
        flags = null;
        leftovers = null;
        paradigm = null;
        altLemmas = null;
    }

    /**
     * @param lemma is used for grammar parsing.
     */
    public Gram(Node gramNode, String lemma) {
        orig = gramNode.getTextContent();
        leftovers = null;
        flags = new HashSet<String>();
        paradigm = new HashSet<Integer>();
        altLemmas = null;
        parseGram(lemma);
    }

    /**
     * @param lemma is used for grammar parsing.
     */
    public void set(String gramText, String lemma) {
        orig = gramText;
        leftovers = null;
        flags = new HashSet<String>();
        paradigm = new HashSet<Integer>();
        altLemmas = null;
        parseGram(lemma);
    }

    public boolean hasParadigm() {
        return !paradigm.isEmpty();
    }

    /**
     * Only works correctly, if cleanupLeftovers is used, when needed.
     */
    public boolean hasUnparsedGram() {
        //cleanupLeftovers();      // What is better - unexpected side effects or not working, when used incorrectly?
        return !leftovers.isEmpty();
    }

    /**
     * @param lemma is used for grammar parsing.
     */
    private void parseGram(String lemma) {
        String correctedGram = correctOCRErrors(orig);
        altLemmas = new MappingSet<Integer, Tuple<Lemma, HashSet<String>>>();

        // First process ending patterns, usually located in the beginning
        // of the grammar string.
        correctedGram = processBeginingWithPatterns(correctedGram, lemma);

        String[] subGrams = correctedGram.split("\\s*;\\s*");
        leftovers = new LinkedList<LinkedList<String>>();

        // Process each semicolon-separated substring.
        for (String subGram : subGrams) {
            subGram = processWithNoSemicolonPatterns(subGram, lemma);
            String[] gramElems = subGram.split("\\s*,\\s*");
            LinkedList<String> toDo = new LinkedList<String>();

            // Process each comma-separated substring.
            for (String gramElem : gramElems) {
                gramElem = gramElem.trim();
                // Check for abbreviations.
                if (knownAbbr.containsKey(gramElem))
                    flags.addAll(knownAbbr.getAll(gramElem));
                else {
                    // Check for matches regular expressions.
                    gramElem = processWithNoCommaPatterns(gramElem, lemma);
                    // Unprocessed leftovers. 
                    if (!gramElem.equals(""))
                        toDo.add(gramElem);
                }
            }

            // TODO: magical patterns for processing endings.

            leftovers.add(toDo);
        }

        // Try to deduce paradigm from flags.
        paradigmFromFlags(lemma);

        cleanupLeftovers();
        // TODO cleanup altLemmas;
    }

    /**
     * This method contains collection of ending patterns, found in data.
     * These patterns are meant for using on the beginning of the
     * unsegmented grammar string.
     * Thus,e.g., if there was no plural-only nouns with ending -as, then
     * there is no rule for processing such words (at least in most cases).
     * @param lemma is used for grammar parsing.
     */
    private String processBeginingWithPatterns(String gramText, String lemma) {
        gramText = gramText.trim();
        int newBegin = -1;

        // Blocks of rules.
        if (newBegin == -1)
            newBegin = firstConjDirVerb3PersRules(gramText, lemma);
        if (newBegin == -1)
            newBegin = firstConjDirVerbAllPersRules(gramText, lemma);
        if (newBegin == -1)
            newBegin = secondConjDirVerbRules(gramText, lemma);
        if (newBegin == -1)
            newBegin = thirdConjDir3PersVerbRules(gramText, lemma);
        if (newBegin == -1)
            newBegin = thirdConjDirAllPersVerbRules(gramText, lemma);

        if (newBegin == -1)
            newBegin = firstConjRef3PersVerbRules(gramText, lemma);
        if (newBegin == -1)
            newBegin = firstConjRefAllPersVerbRules(gramText, lemma);
        if (newBegin == -1)
            newBegin = secondConjRefVerbRules(gramText, lemma);
        if (newBegin == -1)
            newBegin = thirdConjRef3PersVerbRules(gramText, lemma);
        if (newBegin == -1)
            newBegin = thirdConjRefAllPersVerbRules(gramText, lemma);

        if (newBegin == -1)
            newBegin = sixthDeclNounFullWordRules(gramText, lemma);

        // Complicated rules: grammar contains lemma variation spelled out.
        if (newBegin == -1) {
            // Super-complicated case: pronunciation included.         
            // Paradigm 1: Lietv?rds 1. deklin?cija -s
            // Changed in new version
            /*if (lemma.endsWith("di") &&
               gramText.matches("(-u, vsk\\. (\\Q"
              + lemma.substring(0, lemma.length() - 1)
              + "s\\E) \\[([^\\]]*?)\\] -a, v\\.)(.*)?")) // ?beziedi: -u, vsk. ?bezieds [a^be`zie^c] -a, v.
            {
               Pattern pattern = Pattern.compile("(-u, vsk\\. (\\Q"
              + lemma.substring(0, lemma.length() - 1)
              + "s\\E) \\[([^\\]]*?)\\] -a, v\\.)(.*)?");
               Matcher matcher = pattern.matcher(gramText);
               if (!matcher.matches()) 
                  System.err.printf("Problem matching \"%s\" with \"?bezieds\" rule\n", lemma);
               newBegin = matcher.group(1).length();
               Lemma altLemma = new Lemma(matcher.group(2));
               altLemma.pronunciation = matcher.group(3);
               HashSet<String> altParams = new HashSet<String> ();
               altParams.add("irkav?rds vienskaitl");
               altLemmas.put(1, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));
                   
               paradigm.add(1);
               flags.add("Vrieu dzimte");
               flags.add("Lietv?rds");
               flags.add("irkav?rds daudzskaitl");
            }//*/

            // Paradigm 2: Lietv?rds 1. deklin?cija -
            if (lemma.endsWith("i") && gramText
                    .startsWith("-u, vsk. " + lemma.substring(0, lemma.length() - 2) + ", -a, v.")) // ditaurii: -u, vsk. ditauri, -a, v.
            {
                newBegin = ("-u, vsk. " + lemma.substring(0, lemma.length() - 2) + ", -a, v.").length();
                Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 2) + "");
                HashSet<String> altParams = new HashSet<String>();
                altParams.add("irkav?rds vienskaitl");
                altLemmas.put(2, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));
                paradigm.add(2);
                flags.add("Vrieu dzimte");
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            }
            // Paradigm 3: Lietv?rds 2. deklin?cija -is
            else if (lemma.endsWith("i") && gramText
                    .startsWith("-u, vsk. " + lemma.substring(0, lemma.length() - 2) + "nis, -a, v.")) // aizvirti: -u, vsk. aizvirtnis, -a, v.
            {
                newBegin = ("-u, vsk. " + lemma.substring(0, lemma.length() - 2) + "nis, -a, v.").length();
                Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 2) + "nis");
                HashSet<String> altParams = new HashSet<String>();
                altParams.add("irkav?rds vienskaitl");
                altLemmas.put(3, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));
                paradigm.add(3);
                flags.add("Vrieu dzimte");
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            } else if (lemma.endsWith("i") && gramText
                    .startsWith("-u, vsk. " + lemma.substring(0, lemma.length() - 3) + "lnis, -a, v.")) // starpvii: -u, vsk. starpvilnis, -a, v.
            {
                newBegin = ("-u, vsk. " + lemma.substring(0, lemma.length() - 3) + "lnis, -a, v.").length();
                Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 3) + "lnis");
                HashSet<String> altParams = new HashSet<String>();
                altParams.add("irkav?rds vienskaitl");
                altLemmas.put(3, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));
                paradigm.add(3);
                flags.add("Vrieu dzimte");
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            } else if (lemma.endsWith("ji") && gramText.startsWith("-u, vsk. " + lemma + "s, -ja, v.")) // airk?ji: -u, vsk. airk?jis, -ja, v.
            {
                newBegin = ("-u, vsk. " + lemma + "s, -ja, v.").length();
                Lemma altLemma = new Lemma(lemma + "s");
                HashSet<String> altParams = new HashSet<String>();
                altParams.add("irkav?rds vienskaitl");
                altLemmas.put(3, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));
                paradigm.add(3);
                flags.add("Vrieu dzimte");
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            }

            // Paradigm 1: Lietv?rds 1. deklin?cija -s      
            else if (lemma.endsWith("i")
                    && gramText.startsWith("-u, vsk. " + lemma.substring(0, lemma.length() - 1) + "s, -a, v.")) // aizkari: -u, vsk. aizkars, -a, v.
            {
                newBegin = ("-u, vsk. " + lemma.substring(0, lemma.length() - 1) + "s, -a, v.").length();
                Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 1) + "s");
                HashSet<String> altParams = new HashSet<String>();
                altParams.add("irkav?rds vienskaitl");
                altLemmas.put(1, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));
                paradigm.add(1);
                flags.add("Vrieu dzimte");
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            }
        }

        // "-es, dsk. en. -??u, s."
        if (newBegin == -1)
            newBegin = esEndingPluralGenUEndingFemRules(gramText, lemma);

        // More rules
        if (newBegin == -1) {
            // Long, specific patterns.
            // Paradigm Unknown: Atgriezeniskie lietv?rdi -an?s
            if (gramText.startsWith("en. -?s, akuz. -os, instr. -os, dsk. -?s, en. -os, akuz. -?s, s.")) //aizbildin?an?s
            {
                newBegin = "en. -?s, akuz. -os, instr. -os, dsk. -?s, en. -os, akuz. -?s, s.".length();
                if (lemma.endsWith("an?s")) {
                    paradigm.add(0);
                    flags.add("Lietv?rds");
                    flags.add("Atgriezeniskais lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm -an?s\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            }

            // Paradigm 25: Vietniekv?rdi
            else if (gramText.matches("en\\. -k?, dat\\. -kam, akuz\\., instr\\. -ko([.,;].*)?")) //daudzkas
            {
                newBegin = "en. -k?, dat. -kam, akuz., instr. -ko".length();
                if (lemma.endsWith("kas")) {
                    paradigm.add(25);
                    flags.add("Vietniekv?rds");
                    flags.add("Loct k? \"kas\"");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 25\n", lemma);
                    newBegin = 0;
                }
            }
            // Paradigm 7: Lietv?rds 4. deklin?cija -a siev. dz.
            // Paradigm 8: Lietv?rds 4. deklin?cija -a vr. dz.
            else if (gramText.startsWith("en. -as, v. dat. -am, s. dat. -ai, kopdz.")) {
                newBegin = "en. -as, v. dat. -am, s. dat. -ai, kopdz.".length();
                if (lemma.endsWith("a")) {
                    paradigm.add(7);
                    paradigm.add(8);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 7, 8\n", lemma);
                    newBegin = 0;
                }
                flags.add("Kopdzimte");
            }

            // Paradigm 1: Lietv?rds 1. deklin?cija -s
            // Paradigm 2: Lietv?rds 1. deklin?cija -
            else if (gramText.startsWith("lietv. -a, v.")) // aerobs 
            {
                newBegin = "lietv. -a, v.".length();
                //if (lemma.matches(".*[jr]is")) paradigm.add(3);
                //else
                //{
                //if (lemma.matches(".*[aeiou??]s") || lemma.matches(".*[^s]"))
                //   System.err.printf("Problem matching \"%s\" with paradigms 1, 2, 3\n", lemma);

                if (lemma.endsWith(""))
                    paradigm.add(2);
                else if (lemma.matches(".*[^aeiou??]s"))
                    paradigm.add(1);
                else {
                    System.err.printf("Problem matching \"%s\" with paradigms 1, 2, 3\n", lemma);
                    newBegin = 0;
                }
                //}
                flags.add("Vrieu dzimte");
                flags.add("Lietv?rds");
            } else if (gramText.startsWith("vsk. -a, v.")) // acteks
            {
                newBegin = "vsk. -a, v.".length();

                if (lemma.endsWith("")) {
                    paradigm.add(2);
                    flags.add("Lietv?rds");
                } else if (lemma.matches(".*[^aeiou??]s")) {
                    paradigm.add(1);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 1, 2\n", lemma);
                    newBegin = 0;
                }
                flags.add("Vrieu dzimte");
                flags.add("Vienskaitlis");
            }

            // Paradigm 3: Lietv?rds 2. deklin?cija -is
            else if (gramText.startsWith("-a, dsk. en. -u, v.")) // bizmanis
            {
                newBegin = "-a, dsk. en. -u, v.".length();

                if (lemma.endsWith("nis")) {
                    paradigm.add(3);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 3\n", lemma);
                    newBegin = 0;
                }
                flags.add("Vrieu dzimte");
            } else if (gramText.matches("-a, dsk\\. en\\. -u([;,.].*)?")) // afroamerik?i
            {
                newBegin = "-a, dsk. en. -u".length();

                if (lemma.endsWith("i")) {
                    paradigm.add(3);
                    flags.add("irkav?rds daudzskaitl");
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 3\n", lemma);
                    newBegin = 0;
                }
                flags.add("Vrieu dzimte");
            }

            // Paradigm 9: Lietv?rds 5. deklin?cija -e siev. dz.
            else if (gramText.matches("-es, s\\., dsk\\. en\\. -bju([;,.].*)?")) //acetilsalicilsk?be
            {
                newBegin = "-es, s., dsk. en. -bju".length();
                if (lemma.endsWith("be")) {
                    paradigm.add(9);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            } else if (gramText.matches("-es, dsk\\. en\\. -ru([;,.].*)?")) //?dere
            {
                newBegin = "-es, dsk. en. -ru".length();
                if (lemma.endsWith("re")) {
                    paradigm.add(9);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            } else if (gramText.matches("-es, dsk\\. en\\. -u([;,.].*)?")) //aizkr?sne
            {
                newBegin = "-es, dsk. en. -u".length();
                if (lemma.matches(".*[s][n]e")) {
                    paradigm.add(9);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            } else if (gramText.startsWith("-es, s.")) //aizture
            {
                newBegin = "-es, s.".length();
                if (lemma.endsWith("e")) {
                    paradigm.add(9);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            }

            // Paradigm 11: Lietv?rds 6. deklin?cija -s
            // Ending rules
            else if (gramText.matches("-ts, -u([;,.].*)?")) //abonentpults
            {
                newBegin = "-ts, -u".length();
                if (lemma.endsWith("ts")) {
                    paradigm.add(11);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 11\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            } else if (gramText.matches("-vs, -vju([;,.].*)?")) //adatzivs
            {
                newBegin = "-vs, -vju".length();
                if (lemma.endsWith("vs")) {
                    paradigm.add(11);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 11\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            }

            // Paradigm 7: Lietv?rds 4. deklin?cija -a siev. dz.
            // Paradigm 11: Lietv?rds 6. deklin?cija -s siev. dz.
            else if (gramText.startsWith("-as, s.")) //aber?cija, milns, naj?das
            {
                newBegin = "-as, s.".length();
                if (lemma.matches(".*[^aeiou??]s")) {
                    paradigm.add(11);
                    flags.add("Lietv?rds");
                } else if (lemma.endsWith("a")) {
                    paradigm.add(7);
                    flags.add("Lietv?rds");
                } else if (lemma.matches(".*[^aeiou??]as")) {
                    paradigm.add(7);
                    flags.add("irkav?rds daudzskaitl");
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 7, 11\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            }

            // Paradigm 9: Lietv?rds 5. deklin?cija -e siev. dz.
            // Paradigm 11: Lietv?rds 6. deklin?cija -s
            else if (gramText.startsWith("dsk. en. -u, s.")) //?dmine, b?kuguns, b?rksaknes
            {
                newBegin = "dsk. en. -u, s.".length();
                if (lemma.endsWith("ns")) {
                    paradigm.add(11);
                    flags.add("Lietv?rds");
                } else if (lemma.endsWith("nes")) {
                    paradigm.add(9);
                    flags.add("Lietv?rds");
                    flags.add("irkav?rds daudzskaitl");
                } else if (lemma.endsWith("ne")) {
                    paradigm.add(9);
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 9, 11\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            }

            // Grammar includes endings for other lemma variants. 
            // Paradigm 1: Lietv?rds 1. deklin?cija -s
            // Paradigm 9: Lietv?rds 5. deklin?cija -e siev. dz.
            else if (gramText.matches("s\\. -te, -u([;.].*)?")) //abstinents
            {
                newBegin = "s. -te, -u".length();
                if (lemma.endsWith("ts")) {
                    Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 1) + "e");
                    HashSet<String> altParams = new HashSet<String>();
                    altParams.add("Sievieu dzimte");
                    altParams.add("Cita paradigma");
                    altLemmas.put(9, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    paradigm.add(1);
                    flags.add("Lietv?rds");
                    flags.add("Vrieu dzimte");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 1 & 5\n", lemma);
                    newBegin = 0;
                }
            }
            // Paradigm 3: Lietv?rds 2. deklin?cija -is
            // Paradigm 9: Lietv?rds 5. deklin?cija -e siev. dz.
            else if (gramText.matches("-a; s\\. -e -u([;.].*)?")) //agonistiis
            {
                newBegin = "-a; s. -e -u".length();
                if (lemma.endsWith("is")) {
                    Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 2) + "e");
                    HashSet<String> altParams = new HashSet<String>();
                    altParams.add("Sievieu dzimte");
                    altParams.add("Cita paradigma");
                    altLemmas.put(9, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    paradigm.add(2);
                    flags.add("Lietv?rds");
                    flags.add("Vrieu dzimte");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 3 & 5\n", lemma);
                    newBegin = 0;
                }
            } else if (gramText.matches("-a; s. -te, -u([;.].*)?")) //aiolietis
            {
                newBegin = "-a; s. -te, -u".length();
                if (lemma.endsWith("tis")) {
                    Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 2) + "e");
                    HashSet<String> altParams = new HashSet<String>();
                    altParams.add("Sievieu dzimte");
                    altParams.add("Cita paradigma");
                    altLemmas.put(9, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    paradigm.add(2);
                    flags.add("Lietv?rds");
                    flags.add("Vrieu dzimte");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 3 & 5\n", lemma);
                    newBegin = 0;
                }
            }
            // Paradigm 13: pabas v?rdi ar -s
            // Paradigm 14: pabas v?rdi ar -
            else if (gramText.matches("p\\. v\\. -ais; s\\. -a, -?([;,.].*)?")) //aerobs
            {
                newBegin = "p. v. -ais; s. -a, -?".length();
                if (lemma.matches(".*[^aeiou??]")) {
                    paradigm.add(14);
                    flags.add("pabas v?rds");
                } else if (lemma.matches(".*[^aeiou??]s")) {
                    paradigm.add(13);
                    flags.add("pabas v?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 13, 14\n", lemma);
                    newBegin = 0;
                }
            } else if (gramText.matches("-ais[;,] s\\. -a, -?([;,.].*)?")) //abj?ds, acains, ag?ms
            {
                newBegin = "-ais; s. -a, -?".length();
                if (lemma.matches(".*[^aeiou??]")) {
                    paradigm.add(14);
                    flags.add("pabas v?rds");
                } else if (lemma.matches(".*[^aeiou??]s")) {
                    paradigm.add(13);
                    flags.add("pabas v?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 13, 14\n", lemma);
                    newBegin = 0;
                }
            }

            // Paradigm 13-14: plural forms
            else if (gramText.startsWith("s. -as; adj.")) //abji 2
            {
                newBegin = "s. -as; adj.".length();
                if (lemma.endsWith("i")) {
                    paradigm.add(13);
                    paradigm.add(14);
                    flags.add("pabas v?rds");
                    flags.add("irkav?rds daudzskaitl");
                    flags.add("Neviennozmga paradigma");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 13-14\n", lemma);
                    newBegin = 0;
                }

            } else if (gramText.startsWith("s. -as; tikai dsk.")) //abji 1
            {
                // This exception is on purpose! this way "tikai dsk." is later
                // transformed to appropriate flag.
                newBegin = "s. -as;".length();
                if (lemma.endsWith("i")) {
                    paradigm.add(13);
                    paradigm.add(14);
                    flags.add("pabas v?rds");
                    flags.add("irkav?rds daudzskaitl");
                    flags.add("Neviennozmga paradigma");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 13-14\n", lemma);
                    newBegin = 0;
                }
            }
            // Paradigm 25: Vietniekv?rdi
            else if (gramText.startsWith("s. -as; vietniekv.")) // abi
            {
                newBegin = "s. -as; vietniekv.".length();
                if (lemma.endsWith("i")) {
                    paradigm.add(25);
                    flags.add("irkav?rds daudzskaitl");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 25\n", lemma);
                    newBegin = 0;
                }
                flags.add("Vietniekv?rds");
            }

            // Paradigm 30: jaundzimuais, pdjais
            else if (gramText.startsWith("-?, v. -?s, s.")) //iereibuais
            {
                newBegin = "-?, v. -?s, s.".length();
                if (lemma.endsWith("uais")) {
                    paradigm.add(30);
                    flags.add("pabas v?rds");
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 30\n", lemma);
                    newBegin = 0;
                }
            } else if (gramText.startsWith("-?, v.")) //pirmdzimtais
            {
                newBegin = "-?, v.".length();
                if (lemma.endsWith("ais")) {
                    paradigm.add(30);
                    flags.add("pabas v?rds");
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 30\n", lemma);
                    newBegin = 0;
                }
                flags.add("Vrieu dzimte");
            } else if (gramText.startsWith("-?s, s.")) //pirmdzimt?, -an?s
            {
                newBegin = "-?s, s.".length();
                if (lemma.endsWith("an?s")) {
                    paradigm.add(0);
                    flags.add("Atgriezeniskais lietv?rds");
                    flags.add("Lietv?rds");

                } else if (lemma.endsWith("?")) {
                    paradigm.add(30);
                    flags.add("pabas v?rds");
                    flags.add("Lietv?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 30, -an?s\n", lemma);
                    newBegin = 0;
                }
                flags.add("Sievieu dzimte");
            } else if (gramText.matches("s\\. -?([.;].*)?")) //agr?kais
            {
                newBegin = "s. -?".length();
                if (lemma.endsWith("ais")) {
                    paradigm.add(30);
                    flags.add("pabas v?rds");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 30\n", lemma);
                    newBegin = 0;
                }
            }

            // Paradigm Unknown: Divdabis
            // Grammar includes endings for other lemma variants. 
            else if (gramText.matches("-guais; s\\. -gusi, -gus([.;].*)?")) //aizdudzis
            {
                newBegin = "-guais; s. -gusi, -gus".length();
                if (lemma.endsWith("dzis")) {
                    Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 4) + "gusi");
                    HashSet<String> altParams = new HashSet<String>();
                    altParams.add("Sievieu dzimte");
                    altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    paradigm.add(0);
                    flags.add("Divdabis");
                    flags.add("Lok?mais dar?m?s k?rtas pag?tnes divdabis (-is, -usi, -ies, -usies)");
                    flags.add("Vrieu dzimte");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 0 (Divdabis)\n", lemma);
                    newBegin = 0;
                }
            } else if (gramText.matches("-uais; s. -usi\\, -us([.;].*)?")) //aizkpis
            {
                newBegin = "-uais; s. -usi, -us".length();
                if (lemma.matches(".*[cdjlmprstv]is")) {
                    Lemma altLemma = new Lemma(lemma.substring(0, lemma.length() - 3) + "usi");
                    HashSet<String> altParams = new HashSet<String>();
                    altParams.add("Sievieu dzimte");
                    altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    paradigm.add(0);
                    flags.add("Divdabis");
                    flags.add("Lok?mais dar?m?s k?rtas pag?tnes divdabis (-is, -usi, -ies, -usies)");
                    flags.add("Vrieu dzimte");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigm 0 (Divdabis)\n", lemma);
                    newBegin = 0;
                }
            }

        }

        // "-??a, v."
        if (newBegin == -1)
            newBegin = aEndingMascRules(gramText, lemma);
        // "-??u, v."
        if (newBegin == -1)
            newBegin = uEndingMascRules(gramText, lemma);
        // "-??u, s."
        if (newBegin == -1)
            newBegin = uEndingFemRules(gramText, lemma);

        // === Risky rules =================================================
        // These rules matches prefix of some other rule.
        if (newBegin == -1)
            newBegin = singleEndingOnlyRules(gramText, lemma);

        if (newBegin > 0 && newBegin <= gramText.length())
            gramText = gramText.substring(newBegin);
        else if (newBegin > gramText.length()) {
            System.err.printf("Problem with processing lemma \"%s\" and grammar \"%s\": obtained cut index \"%d\"",
                    lemma, gramText, newBegin);
        }
        if (gramText.matches("[.,;].*"))
            gramText = gramText.substring(1);
        return gramText;
    }

    /**
     * This method contains collection of patterns with no commas in them -
     * these patterns can be applied to any segmented grammar substring, not
     * only on the beginning of the grammar. Only patterns found in data are
     * given. Thus,e.g., if there was no plural-only nouns with ending -as,
     * then there is no rule for processing such words (at least in most
     * cases).
     * @param lemma is used for grammar parsing.
     * @return leftovers (unprocessed part of string)
     */
    private String processWithNoCommaPatterns(String gramText, String lemma) {
        gramText = gramText.trim();
        int newBegin = -1;

        // Alternative form processing.
        if (gramText.matches("parasti divd\\. form?: (\\w+)")) //aizdzert->aizdzerts
        {
            Matcher m = Pattern.compile("(parasti divd\\. form?: (\\w+))([.;].*)?").matcher(gramText);
            m.matches();
            String newLemma = m.group(2);
            Lemma altLemma = new Lemma(newLemma);
            HashSet<String> altParams = new HashSet<String>();
            altParams.add("Divdabis");
            altParams.add("Cita paradigma");

            newBegin = m.group(1).length();
            if (newLemma.endsWith("ts")) // aizdzert->aizdzerts
            {
                altParams.add("Lok?mais cieam?s k?rtas pag?tnes divdabis (-ts, -ta)");
                altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                flags.add("Darbbas v?rds");
                flags.add("Parasti divdabja form?");
                flags.add("Parasti lok?m? cieam?s k?rtas pag?tnes divdabja form?");
            } else if (newLemma.endsWith("is") || newLemma.endsWith("ies")) // aizmakt->aizsmacis, pieriesties->pieriesies
            {
                altParams.add("Lok?mais dar?m?s k?rtas pag?tnes divdabis (-is, -usi, -ies, -usies)");
                altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                flags.add("Darbbas v?rds");
                flags.add("Parasti divdabja form?");
                flags.add("Parasti lok?m? dar?m?s k?rtas pag?tnes divdabja form?");
            } else if (newLemma.endsWith("damies")) //aizvilkties->aizvilkdamies
            {
                altParams.add("Daji lok?mais divdabis (-dams, -dama, -damies, -dam?s)");
                altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                flags.add("Darbbas v?rds");
                flags.add("Parasti divdabja form?");
                flags.add("Parasti daji lok?m? divdabja form?");
            } else {
                System.err.printf("Problem matching \"%s\" in entry \"%s\" with paradigm 0 (Divdabis)\n", newLemma,
                        lemma);
                newBegin = 0;
            }
        } else if (gramText.matches("biei lok\\.: (\\w+)")) // agrums->agrum?
        {
            Matcher m = Pattern.compile("(biei lok\\.: (\\w+))([.;].*)?").matcher(gramText);
            newBegin = m.group(1).length();
            flags.add("Biei lokatva form?");
        }

        if (newBegin > 0)
            gramText = gramText.substring(newBegin);
        return gramText;
    }

    /**
     * This method contains collection of patterns with no semicolon in them -
     * these patterns can be applied to grammar segmented on ';', but not
     * segmented on ','. Only patterns found in data are
     * given. Thus,e.g., if there was no plural-only nouns with ending -as,
     * then there is no rule for processing such words (at least in most
     * cases).
     * @param lemma is used for grammar parsing.
     * @return leftovers (unprocessed part of string)
     */
    private String processWithNoSemicolonPatterns(String gramText, String lemma) {
        gramText = gramText.trim();
        int newBegin = -1;

        // Alternative form processing.
        if (gramText.matches("parasti divd\\. form?: (\\w+), (\\w+)")) //aizelsties->aizelsies, aizelsdamies
        {
            Matcher m = Pattern.compile("(parasti divd\\. form?: (\\w+), (\\w+))([.;].*)?").matcher(gramText);
            m.matches();
            String[] newLemmas = { m.group(2), m.group(3) };
            newBegin = m.group(1).length();
            for (String newLemma : newLemmas) {
                Lemma altLemma = new Lemma(newLemma);
                HashSet<String> altParams = new HashSet<String>();
                altParams.add("Divdabis");
                altParams.add("Cita paradigma");

                if (newLemma.endsWith("ts")) // noliegt->noliegts
                {
                    altParams.add("Lok?mais cieam?s k?rtas pag?tnes divdabis (-ts, -ta)");
                    altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    flags.add("Darbbas v?rds");
                    flags.add("Parasti divdabja form?");
                    flags.add("Parasti lok?m? cieam?s k?rtas pag?tnes divdabja form?");
                } else if (newLemma.endsWith("is") || newLemma.endsWith("ies")) // aizelsties->aizelsies
                {
                    altParams.add("Lok?mais dar?m?s k?rtas pag?tnes divdabis (-is, -usi, -ies, -usies)");
                    altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    flags.add("Darbbas v?rds");
                    flags.add("Parasti divdabja form?");
                    flags.add("Parasti lok?m? dar?m?s k?rtas pag?tnes divdabja form?");
                } else if (newLemma.endsWith("ams") || newLemma.endsWith("?ms")) // noliegt->noliedzams
                {
                    altParams.add("Lok?mais cieam?s k?rtas tagadnes divdabis (-ams, -ama, -?ms, -?ma)");
                    altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    flags.add("Darbbas v?rds");
                    flags.add("Parasti divdabja form?");
                    flags.add("Parasti lok?m? cieam?s k?rtas tagadnes divdabja form?");
                } else if (newLemma.endsWith("damies")) //aizelsties->aizelsdamies
                {
                    altParams.add("Daji lok?mais divdabis (-dams, -dama, -damies, -dam?s)");
                    altLemmas.put(0, new Tuple<Lemma, HashSet<String>>(altLemma, altParams));

                    flags.add("Darbbas v?rds");
                    flags.add("Parasti divdabja form?");
                    flags.add("Parasti daji lok?m? divdabja form?");
                } else {
                    System.err.printf("Problem matching \"%s\" in entry \"%s\" with paradigm 0 (Divdabis)\n",
                            newLemma, lemma);
                    newBegin = 0;
                }
            }
        }

        if (newBegin > 0)
            gramText = gramText.substring(newBegin);
        return gramText;
    }

    /**
     * Simple rule - tries to match grammar text to given string and lemma
     * ending. If matched, adds a single paradigm.
     * @param pattern   Unescaped ending string grammar text must begin with
     *                to apply this rule.
     * @param requiredEnding   Required ending for the lemma to apply this
     *                      rule.
     * @param paradigmId   Paradigm ID to set if rule matched.
     * @param positiveFlags   These flags are added if rule and lemma ending
     *                   matched.
     * @param alwaysFlags   These flags are added if rule matched.
     * @param gramText   Grammar string currently being processed.
     * @param lemma      Lemma string for this header.
     * @return New begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int simpleRule(String pattern, String requiredEnding, int paradigmId, String[] positiveFlags,
            String[] alwaysFlags, String gramText, String lemma) {
        int newBegin = -1;
        if (gramText.matches("\\Q" + pattern + "\\E([;,.].*)?")) {
            newBegin = pattern.length();
            if (lemma.endsWith(requiredEnding)) {
                paradigm.add(paradigmId);
                if (positiveFlags != null)
                    flags.addAll(Arrays.asList(positiveFlags));
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm %s\n", lemma, paradigmId);
                newBegin = 0;
            }
            if (alwaysFlags != null)
                flags.addAll(Arrays.asList(alwaysFlags));
        }
        return newBegin;
    }

    /**
     * The same as simple rule, but hyperns ar optional. It tries to match
     * grammar text to given pattern and lemma ending. If matched, adds a single
     * paradigm.
     * @param pattern   Unescaped ending string grammar text must begin with
     *                to apply this rule.
     * @param requiredEnding   Required ending for the lemma to apply this
     *                      rule.
     * @param paradigmId   Paradigm ID to set if rule matched.
     * @param positiveFlags   These flags are added if rule and lemma ending
     *                   matched.
     * @param alwaysFlags   These flags are added if rule matched.
     * @param gramText   Grammar string currently being processed.
     * @param lemma      Lemma string for this header.
     * @return New begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int simpleRuleOptHyperns(String pattern, String requiredEnding, int paradigmId, String[] positiveFlags,
            String[] alwaysFlags, String gramText, String lemma) {
        int newBegin = -1;
        pattern = pattern.replace("-", "\\E-?\\Q");
        pattern = "(\\Q" + pattern + "\\E)([;,.].*)?";
        Matcher m = Pattern.compile(pattern).matcher(gramText);
        if (m.matches()) {
            newBegin = m.group(1).length();
            if (lemma.endsWith(requiredEnding)) {
                paradigm.add(paradigmId);
                if (positiveFlags != null)
                    flags.addAll(Arrays.asList(positiveFlags));
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm %s\n", lemma, paradigmId);
                newBegin = 0;
            }
            if (alwaysFlags != null)
                flags.addAll(Arrays.asList(alwaysFlags));
        }
        return newBegin;
    }

    /**
     * Paradigm 9: Lietv?rds 5. deklin?cija -e siev. dz.
     * Rules in form "-es, dsk. en. -?u, s.".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int esEndingPluralGenUEndingFemRules(String gramText, String lemma) {
        int newBegin = -1;
        // Paradigm 9: Lietv?rds 5. deklin?cija -e siev. dz.
        if (gramText.startsWith("-es, dsk. en. -?u, s.")) //?bece
        {
            newBegin = "-es, dsk. en. -?u, s.".length();
            if (lemma.matches(".*[c?]e")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -u, s.")) //?bele
        {
            newBegin = "-es, dsk. en. -u, s.".length();
            if (lemma.endsWith("le")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -u, s.")) //abate
        {
            newBegin = "-es, dsk. en. -u, s.".length();
            if (lemma.matches(".*[ts]e")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -u, s.")) //?bolaine
        {
            newBegin = "-es, dsk. en. -u, s.".length();
            if (lemma.endsWith("ne")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -u, s.")) //?bolmaize
        {
            newBegin = "-es, dsk. en. -u, s.".length();
            if (lemma.matches(".*[zd]e")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -ru, s.")) //administratore
        {
            newBegin = "-es, dsk. en. -ru, s.".length();
            if (lemma.endsWith("re")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -stu, s.")) //abolicioniste
        {
            newBegin = "-es, dsk. en. -stu, s.".length();
            if (lemma.endsWith("ste")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -u, s.")) //aeroloe
        {
            newBegin = "-es, dsk. en. -u, s.".length();
            if (lemma.endsWith("e")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -vju, s.")) //agave
        {
            newBegin = "-es, dsk. en. -vju, s.".length();
            if (lemma.endsWith("ve")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -u, s.")) //agnostie
        {
            newBegin = "-es, dsk. en. -u, s.".length();
            if (lemma.endsWith("e")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -mju, s.")) //agronome
        {
            newBegin = "-es, dsk. en. -mju, s.".length();
            if (lemma.endsWith("me")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-es, dsk. en. -pju, s.")) //aitkope, tsklapes
        {
            newBegin = "-es, dsk. en. -pju, s.".length();
            if (lemma.endsWith("pe")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else if (lemma.endsWith("pes")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        }
        return newBegin;
    }

    /**
     * Paradigm 7: Lietv?rds 4. deklin?cija -a siev. dz.
     * Paradigm 9: Lietv?rds 5. deklin?cija -e siev. dz.
     * Paradigm 11: Lietv?rds 6. deklin?cija -s
     * Rules in form "-u, s." and "-u, s.".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int uEndingFemRules(String gramText, String lemma) {
        int newBegin = -1;
        // Paradigms: 7, 9, 11
        if (gramText.startsWith("-u, s.")) //ahajiete, aizkulises, bikses, klauas
        {
            newBegin = "-u, s.".length();
            if (lemma.endsWith("te")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else if (lemma.endsWith("as")) {
                paradigm.add(7);
                flags.add("irkav?rds daudzskaitl");
                flags.add("Lietv?rds");
            } else if (lemma.endsWith("tis")) {
                paradigm.add(11);
                flags.add("irkav?rds daudzskaitl");
                flags.add("Lietv?rds");
            } else if (lemma.matches(".*[st]es")) {
                paradigm.add(9);
                flags.add("irkav?rds daudzskaitl");
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 7, 9, 11\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        }
        // Paradigms: 7, 9
        else if (gramText.startsWith("-u, s.")) //mir?des, graizes, baas
        {
            newBegin = "-u, s.".length();
            if (lemma.endsWith("as")) {
                paradigm.add(7);
                flags.add("irkav?rds daudzskaitl");
                flags.add("Lietv?rds");
            } else if (lemma.matches(".*[dz]es")) {
                paradigm.add(9);
                flags.add("irkav?rds daudzskaitl");
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 7, 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-u, s.")) //acenes, iemaas
        {
            newBegin = "-u, s.".length();
            if (lemma.endsWith("as")) {
                paradigm.add(7);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            } else if (lemma.endsWith("ne")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
            } else if (lemma.endsWith("nes")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 7, 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        } else if (gramText.startsWith("-u, s.")) // aijas, zees
        {
            newBegin = "-u, s.".length();
            if (lemma.endsWith("as")) {
                paradigm.add(7);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            } else if (lemma.endsWith("a")) {
                paradigm.add(7);
                flags.add("Lietv?rds");
            } else if (lemma.matches(".*[]es")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 7, 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        }
        // Paradigms: 9
        else if (gramText.startsWith("-u, s.")) //bailes
        {
            newBegin = "-u, s.".length();
            if (lemma.endsWith("les")) {
                paradigm.add(9);
                flags.add("irkav?rds daudzskaitl");
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
            flags.add("Sievieu dzimte");
        }
        return newBegin;
    }

    /**
     * Paradigm 1: Lietv?rds 1. deklin?cija -s
     * Paradigm 2: Lietv?rds 1. deklin?cija -
     * Paradigm 3: Lietv?rds 2. deklin?cija -is
     * Paradigm 4: Lietv?rds 2. deklin?cija -s (nom. == en.)
     * Paradigm 5: Lietv?rds 2. deklin?cija -suns
     * Rules in form "-a, v." and "-a, v.".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int aEndingMascRules(String gramText, String lemma) {
        int newBegin = -1;
        // Paradigms: 3, 5
        if (gramText.startsWith("-a, v.")) // acumirklis, durkls
        {
            newBegin = "-a, v.".length();
            if (lemma.endsWith("ls")) {
                paradigm.add(5);
                flags.add("Lietv?rds");
            } else if (lemma.endsWith("lis")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 3, 5\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        } else if (gramText.startsWith("-a, v.")) // abrkasis, lemess
        {
            newBegin = "-a, v.".length();
            if (lemma.endsWith("ss")) {
                paradigm.add(5);
                flags.add("Lietv?rds");
            } else if (lemma.matches(".*[st]is")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 3, 5\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        }
        // Paradigm 3
        else if (gramText.startsWith("-a, v.")) // agnostiis
        {
            newBegin = "-a, v.".length();
            if (lemma.matches(".*[]is")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 3\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        } else if (gramText.startsWith("-pja, v.")) // aitkopis
        {
            newBegin = "-pja, v.".length();
            if (lemma.endsWith("pis")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 3\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        } else if (gramText.startsWith("-a, v.")) // aizb?znis
        {
            newBegin = "-a, v.".length();
            if (lemma.endsWith("znis")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 3\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        } else if (gramText.startsWith("-a, vsk.")) // ?dgrauzis
        {
            newBegin = "-a, vsk.".length();
            if (lemma.matches(".*[zd]is")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 3\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        }
        //Paradigms: 1, 3
        else if (gramText.matches("-ra[,;] v.(.*)?")) // airis, mrniekmeistars
        {
            newBegin = "-ra, v.".length();
            if (lemma.endsWith("ris")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
            } else if (lemma.endsWith("rs")) {
                paradigm.add(1);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 3\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        }
        // Paradigms: 2, 3, 5
        else if (gramText.startsWith("-a, v.")) // abesnis
        {
            newBegin = "-a, v.".length();

            if (lemma.endsWith("suns")) {
                paradigm.add(5);
                flags.add("Lietv?rds");
            } else if (lemma.endsWith("")) {
                paradigm.add(2);
                flags.add("Lietv?rds");
            } else if (lemma.endsWith("nis")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigms 2, 3, 5\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        }
        // Paradigms: 1, 2, 3 (if no sound changes), 1-5 (if plural)
        else if (gramText.startsWith("-a, v.")) // abats, akustiis, spargui, skostii
        {
            newBegin = "-a, v.".length();
            if (lemma.matches(".*[jr]is")) {
                paradigm.add(3);
                flags.add("Lietv?rds");

            } else if (lemma.endsWith("")) {
                paradigm.add(2);
                flags.add("Lietv?rds");
            } else if (lemma.matches(".*[^aeiou??]s")) {
                paradigm.add(1);
                flags.add("Lietv?rds");
            } else if (lemma.matches(".*[]i")) {
                paradigm.add(1);
                paradigm.add(2);
                paradigm.add(3);
                paradigm.add(4);
                paradigm.add(5);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
                flags.add("Neviennozmga paradigma");
            } else if (lemma.matches(".*[]i")) {
                paradigm.add(1);
                paradigm.add(2);
                paradigm.add(3);
                paradigm.add(5);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
                flags.add("Neviennozmga paradigma");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigms 1, 2, 3\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        }
        return newBegin;
    }

    /**
     * Paradigm 1: Lietv?rds 1. deklin?cija -s
     * Paradigm 2: Lietv?rds 1. deklin?cija -
     * Paradigm 3: Lietv?rds 2. deklin?cija -is
     * Paradigm 4: Lietv?rds 2. deklin?cija -s (piem., mness) (vsk. nom. = vsk. gen)
     * Paradigm 5: Lietv?rds 2. deklin?cija -suns
     * Paradigm 32: Lietv?rds 6. deklin?cija - audis
     * Rules in form "-u, v." and "-u, v.".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int uEndingMascRules(String gramText, String lemma) {
        int newBegin = -1;
        // Paradigm 32
        if (gramText.startsWith("-u, v.")) //audis
        {
            newBegin = "-u, v.".length();
            if (lemma.endsWith("audis")) {
                paradigm.add(11);
                flags.add("irkav?rds daudzskaitl");
                flags.add("Lietv?rds");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 32\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
            // TODO Daudzskaitlinieks?
        }
        // Paradigms: 1-5 (plural forms)
        else if (gramText.startsWith("-u, v.")) // bretoi
        {
            newBegin = "-u, v.".length();
            if (lemma.endsWith("i")) {
                paradigm.add(1);
                paradigm.add(2);
                paradigm.add(3);
                paradigm.add(4);
                paradigm.add(5);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
                flags.add("Neviennozmga paradigma");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigms 1-5\n", lemma);
                newBegin = 0;
            }
            flags.add("Vrieu dzimte");
        } else if (gramText.startsWith("-u, v.")) // abesi, abh?zi, ?dgraui, adigejiei, ad?ri, alimenti, angi, antinukloni, apakbrun?i
        {
            newBegin = "-u, v.".length();
            if (lemma.endsWith("nieki") || lemma.endsWith("umi") || lemma.endsWith("ot?ji")) {
                paradigm.add(1);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
            } else if (lemma.endsWith("iei")) {
                paradigm.add(3);
                paradigm.add(5);
                flags.add("Lietv?rds");
                flags.add("irkav?rds daudzskaitl");
                flags.add("Neviennozmga paradigma");

            } else {
                if (lemma.matches(".*[]i")) // akmei, mnei etc.
                {
                    paradigm.add(1);
                    paradigm.add(2);
                    paradigm.add(3);
                    paradigm.add(4);
                    paradigm.add(5);
                    flags.add("Lietv?rds");
                    flags.add("irkav?rds daudzskaitl");
                    flags.add("Neviennozmga paradigma");
                } else if (lemma.matches(".*[vpm]ji")) // looks like these are predefined sound changes always
                {
                    paradigm.add(3);
                    paradigm.add(5);
                    flags.add("Lietv?rds");
                    flags.add("irkav?rds daudzskaitl");
                    flags.add("Neviennozmga paradigma");
                } else if (lemma.matches(".*[bgkhrst?]i") || lemma.matches(".*[aeiou??]ji")) // can't determine if there is sound change (t - tti, s - viesi, j - airk?ji)
                {
                    paradigm.add(1);
                    paradigm.add(2);
                    paradigm.add(3);
                    paradigm.add(5);
                    flags.add("Lietv?rds");
                    flags.add("irkav?rds daudzskaitl");
                    flags.add("Neviennozmga paradigma");
                } else if (lemma.matches(".*[cdlmnpvz]i")) // there is no sound change
                {
                    paradigm.add(1);
                    paradigm.add(2);
                    flags.add("Lietv?rds");
                    flags.add("irkav?rds daudzskaitl");
                    flags.add("Neviennozmga paradigma");
                } else {
                    System.err.printf("Problem matching \"%s\" with paradigms 1-5\n", lemma);
                    newBegin = 0;
                }
            }
            flags.add("Vrieu dzimte");
        }
        return newBegin;
    }

    /**
     * Paradigm 3: Lietv?rds 2. deklin?cija -is
     * Paradigm 9: Lietv?rds 5. deklin?cija -e siev. dz.
     * Rules containing single ending with no other information, e.g. "-u".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int singleEndingOnlyRules(String gramText, String lemma) {
        int newBegin = -1;
        // Paradigm 9
        if (gramText.matches("-u([;.].*)?")) //abioenze, ablumozes, akol?de, nematodes
        {
            newBegin = "-u".length();
            if (lemma.matches(".*[dz]es")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
                flags.add("Sievieu dzimte");
                flags.add("irkav?rds daudzskaitl");
            } else if (lemma.matches(".*[dz]e")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
                flags.add("Sievieu dzimte");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
        } else if (gramText.matches("-u([;.].*)?")) //agrene, aizlaidnes
        {
            newBegin = "-u".length();
            if (lemma.endsWith("nes")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
                flags.add("Sievieu dzimte");
                flags.add("irkav?rds daudzskaitl");
            } else if (lemma.endsWith("ne")) {
                paradigm.add(9);
                flags.add("Lietv?rds");
                flags.add("Sievieu dzimte");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 9\n", lemma);
                newBegin = 0;
            }
        }
        // Paradigm 3
        else if (gramText.matches("-a([;,.].*)?")) //?bolainis
        {
            newBegin = "-a".length();
            if (lemma.endsWith("nis")) {
                paradigm.add(3);
                flags.add("Lietv?rds");
                flags.add("Vrieu dzimte");
            } else {
                System.err.printf("Problem matching \"%s\" with paradigm 3\n", lemma);
                newBegin = 0;
            }
        }
        return newBegin;
    }

    /**
     * // Paradigm 11: Lietv?rds 6. deklin?cija -s
     * Rules in form "-valsts, dsk. en. -valstu, s.", i.e containing full 6th
     * ceclension nouns.
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int sixthDeclNounFullWordRules(String gramText, String lemma) {
        int newBegin = -1;
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-acs, dsk. en. -acu, s.", "acs", 11, new String[] { "Lietv?rds" },
                    new String[] { "Sievieu dzimte" }, gramText, lemma); //uzacs, acs
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kr?sns, dsk. en. -kr?u, s.", "kr?sns", 11,
                    new String[] { "Lietv?rds" }, new String[] { "Sievieu dzimte" }, gramText, lemma); //aizkr?sns
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-valsts, dsk. en. -valstu, s.", "valsts", 11,
                    new String[] { "Lietv?rds" }, new String[] { "Sievieu dzimte" }, gramText, lemma); //agr?rvalsts
        return newBegin;
    }

    /**
     * Paradigm 15: Darbbas v?rdi 1. konjug?cija tieie
     * Rules in form "parasti 3. pers., -alc, pag. -alca".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int firstConjDirVerb3PersRules(String gramText, String lemma) {
        int newBegin = -1;
        // Rules ordered alphabetically by verb infinitive.
        // A
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -aug, pag. -auga", "augt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"augt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizaugt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -aust, pag. -ausa", "aust", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"aust\" (k? gaisma)" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizaust 1
        // B
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -birst, pag. -bira", "birt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"birt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbirt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -brk, pag. -bruka", "brukt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"brukt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbrukt
        // C
        // D
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -deg, pag. -dega", "degt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"degt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdegt 2
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dim, pag. -dima", "dimt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dimt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdimt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dip, pag. -dipa", "dipt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dipt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdipt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dc, pag. -dca", "dkt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dkt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdkt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dze, pag. -dzla", "dzelt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dzelt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdzelt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dzst, pag. -dzija", "dzt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dzt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdzt 2
        // E, F
        // G
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -grimst, pag. -grima", "grimt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"grimt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgrimt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -grst, pag. -gruva", "grt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"grt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgrt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("3. pers. -guldz, pag. -guldza", "gulgt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"gulgt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgulgt
        // H
        // I
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -irst, pag. -ira", "irt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"irt\" (k? audums)" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //irt 2
        // J
        // K
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -kalst, pag. -kalta", "kalst", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kalst\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkalst
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -kauc, pag. -kauca", "kaukt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kaukt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkaukt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -kn?bj, pag. -kn?ba", "kn?bt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kn?bt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkn?bt
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -kvpst, pag. -kvpa", "kvpt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kvpt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkvpt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -kviec, pag. -kvieca", "kviekt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kviekt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkviekt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -ep, pag. -epa", "ept", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"ept\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizept
        // L
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -lkst, pag. -lka", "lkt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"lkt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizlkt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -lp, pag. -lipa", "lipt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"lipt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizlipt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -lst, pag. -lija", "lt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"lt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizlt
        // M, N, O, P, R
        // S, 
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -alc, pag. -alca", "alkt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"alkt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizalkt
        // T
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -tkst, pag. -tka", "tkt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"tkt\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aiztkt
        // U, V, Z
        return newBegin;
    }

    /**
     * Paradigm 15: Darbbas v?rdi 1. konjug?cija tieie
     * Rules in form "-tupstu, -tupsti, -tupst, pag. -tupu".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int firstConjDirVerbAllPersRules(String gramText, String lemma) {
        int newBegin = -1;
        // Rules ordered alphabetically by verb infinitive.
        // A
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-aru, -ar, -ar, pag. -aru", "art", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"art\"" }, null, gramText, lemma); //aizart
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-auu, -aud, -au, pag. -audu", "aust", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"aust\" (k? zirneklis)" }, null, gramText,
                    lemma); //aizaust 2
        // B
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-b?u, -b?z, -b?, pag. -b?zu", "b?zt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"b?zt\"" }, null, gramText, lemma); //aizb?zt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-bgu, -bdz, -bg, pag. -bgu", "bgt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"bgt\"" }, null, gramText, lemma); //aizbgt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-beru, -ber, -ber, pag. -bru", "brt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"brt\"" }, null, gramText, lemma); //aizbrt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-bilstu, -bilsti, -bilst, pag. -bildu", "bilst", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"bilst\"" }, null, gramText, lemma); //aizbilst
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-braucu, -brauc, -brauc, pag. -braucu", "braukt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"braukt\"" }, null, gramText, lemma); //aizbraukt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-br?u, -br?z, -br?, pag. -br?zu", "br?zt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"br?zt\"" }, null, gramText, lemma); //aizbr?zt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-brienu, -brien, -brien, pag. -bridu", "brist", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"brist\"" }, null, gramText, lemma); //aizbrist
        // C
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-ceu, -cel, -ce, pag. -clu", "celt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"celt\"" }, null, gramText, lemma); //aizcelt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-crtu, -crt, -crt, pag. -cirtu", "cirst", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"cirst\"" }, null, gramText, lemma); //aizcirst
        // D
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dedzu, -dedz, -dedz, pag. -dedzu", "degt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"degt\"" }, null, gramText, lemma); //aizdegt 1
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-diebju, -dieb, -diebj, pag. -diebu", "diebt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"diebt\"" }, null, gramText, lemma); //aizdiebt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-diedzu, -diedz, -diedz, pag. -diedzu", "diegt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"diegt\"" }, null, gramText, lemma); //aizdiegt 1
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dodu, -dod, -dod, pag. -devu", "dot", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dot\"" }, null, gramText, lemma); //aizdot
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dr?u, -dr?z, -dr?, pag. -dr?zu", "dr?zt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dr?zt\"" }, null, gramText, lemma); //aizdr?zt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-duru, -dur, -dur, pag. -dru", "durt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"durt\"" }, null, gramText, lemma); //aizdurt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dzeru, -dzer, -dzer, pag. -dzru", "dzert", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dzert\"" }, null, gramText, lemma); //aizdzert
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dzenu, -dzen, -dzen, pag. -dzinu", "dzt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"dzt\"" }, null, gramText, lemma); //aizdzt 1
        // E
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-du, -d, -d, pag. -du", "st", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"st\"" }, null, gramText, lemma); //aizst
        // F
        // G
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-g?u, -g?z, -g?, pag. -g?zu", "g?zt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"g?zt\"" }, null, gramText, lemma); //aizg?zt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-glauu, -glaud, -glau, pag. -glaudu", "glaust", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"glaust\"" }, null, gramText, lemma); //aizglaust
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-gr?bju, -gr?b, -gr?bj, pag. -gr?bu", "gr?bt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"gr?bt\"" }, null, gramText, lemma); //aizgr?bt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-grauu, -grauz, -grau, pag. -grauzu", "grauzt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"grauzt\"" }, null, gramText, lemma); //aizgrauzt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-grieu, -griez, -grie, pag. -griezu", "griezt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"griezt\"" }, null, gramText, lemma); //aizgriezt 2
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-gru, -grd, -gr, pag. -grdu", "grst", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"grst\"" }, null, gramText, lemma); //aizgrst
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-gulstu, -gulsti, -gulst, pag. -glu, ar -gulu", "gult", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"gult\"", "Parall?s formas" }, null,
                    gramText, lemma); //aizgult
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-gstu, -gsti, -gst, pag. -guvu", "gt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"gt\"" }, null, gramText, lemma); //aizgt
        // 
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-iedu, -ied, -ied, pag. -gidu", "ist", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"ist\"" }, null, gramText, lemma); //apist
        // H
        // I
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-eju, -ej, -iet, pag. -g?ju", "iet", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"iet\"" }, null, gramText, lemma); //apiet
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-iru, -ir, -ir, pag. -ru", "irt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"irt\" (k? ar airiem)" }, null, gramText,
                    lemma); //aizirt 1
        // J
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-j?ju, -j?j, -j?j, pag. -j?ju", "j?t", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"j?t\"" }, null, gramText, lemma); //aizj?t
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-jou, -joz, -jo, pag. -jozu", "jozt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"jozt\"" }, null, gramText, lemma); //aizjozt 1, 2
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-jdzu, -jdz, -jdz, pag. -jdzu", "jgt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"jgt\"" }, null, gramText, lemma); //aizjgt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-jumju, -jum, -jumj, pag. -jmu, ar -jumu", "jumt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"jumt\"", "Parall?s formas" }, null,
                    gramText, lemma); //aizjumt
        // K
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-k?pju, -k?p, -k?pj, pag. -k?pu", "k?pt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"k?pt\"" }, null, gramText, lemma); //aizk?pt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-karu, -kar, -kar, pag. -k?ru", "k?rt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"k?rt\"" }, null, gramText, lemma); //aizk?rt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kauju, -kauj, -kauj, pag. -k?vu", "kaut", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kaut\"" }, null, gramText, lemma); //apkaut
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kl?ju, -kl?j, -kl?j, pag. -kl?ju", "kl?t", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kl?t\"" }, null, gramText, lemma); //apkl?t
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kliedzu, -kliedz, -kliedz, pag. -kliedzu", "kliegt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kliegt\"" }, null, gramText, lemma); //aizkliegt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-klimstu, -klimsti, -klimst, pag. -klimtu", "klimst", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"klimst\"" }, null, gramText, lemma); //aizklimst
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-klstu, -klsti, -klst, pag. -kldu", "klst", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"klst\"" }, null, gramText, lemma); //aizklst
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kstu, -ksti, -kst, pag. -kuvu", "kt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kt\"" }, null, gramText, lemma); //aizkt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kou, -kod, -ko, pag. -kodu", "kost", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kost\"" }, null, gramText, lemma); //aizkost
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kr?pju, -kr?p, -kr?pj, pag. -kr?pu", "kr?pt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kr?pt\"" }, null, gramText, lemma); //aizkr?pt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-krauju, -krauj, -krauj, pag. -kr?vu", "kraut", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kraut\"" }, null, gramText, lemma); //aizkraut
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-krtu, -krti, -krt, pag. -kritu", "krist", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"krist\"" }, null, gramText, lemma); //aizkrist
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kuru, -kur, -kur, pag. -kru", "kurt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kurt\"" }, null, gramText, lemma); //aizkurt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kstu, -kusti, -kst, pag. -kusu", "kust", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"kust\"" }, null, gramText, lemma); //aizkust
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-eru, -er, -er, pag. -ru", "ert", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"ert\"" }, null, gramText, lemma); //aizert
        // L
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-laiu, -laid, -lai, pag. -laidu", "laist", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"laist\"" }, null, gramText, lemma); //aizlaist
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-lauu, -lauz, -lau, pag. -lauzu", "lauzt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"lauzt\"" }, null, gramText, lemma); //aizlauzt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-lecu, -lec, -lec, pag. -lcu", "lkt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"lkt\"" }, null, gramText, lemma); //aizlkt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-liedzu, -liedz, -liedz, pag. -liedzu", "liegt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"liegt\"" }, null, gramText, lemma); //aizliegt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-leju, -lej, -lej, pag. -lju", "liet", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"liet\"" }, null, gramText, lemma); //aizliet
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-lieku, -liec, -liek, pag. -liku", "likt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"likt\"" }, null, gramText, lemma); //aizlikt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-lienu, -lien, -lien, pag. -ldu", "lst", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"lst\"" }, null, gramText, lemma); //aizlst
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-lobju, -lob, -lobj, pag. -lobu", "lobt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"lobt\"" }, null, gramText, lemma); //aizlobt

        // M, N, O, P, R, S
        // T
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-tupstu, -tupsti, -tupst, pag. -tupu", "tupt", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"tupt\"", "Parall?s formas" }, null,
                    gramText, lemma); //aiztupt
        // TODO tupu/tupstu
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-tveru, -tver, -tver, pag. -tvru", "tvert", 15,
                    new String[] { "Darbbas v?rds", "Loct k? \"tvert\"" }, null, gramText, lemma); //aiztvert
        // U, V, Z

        return newBegin;
    }

    /**
     * Paradigm 16: Darbbas v?rdi 2. konjug?cija tieie
     * Rules in form "parasti 3. pers., -o, pag. -oja",
     * "-oju, -o, -o, -ojam, -ojat, pag. -oju; -oj?m, -oj?t; pav. -o, -ojiet"
     * and "-ju, -, -, pag. -ju".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int secondConjDirVerbRules(String gramText, String lemma) {
        int newBegin = -1;
        // Paradigm 16: Darbbas v?rdi 2. konjug?cija tieie
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -kko, pag. -kkoja", "kkot", 16,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkkot

        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -?, pag. -?ja", "?t", 16,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizk?b?t
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -, pag. -ja", "t", 16,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //adsorbt
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -o, pag. -oja", "ot", 16, new String[] { "Darbbas v?rds" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizalkot, aizbangot

        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dabju, -dab, -dab, pag. -dabju", "dabt", 16,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizdabt

        if (newBegin == -1)
            newBegin = simpleRule("-oju, -o, -o, -ojam, -ojat, pag. -oju; -oj?m, -oj?t; pav. -o, -ojiet", "ot",
                    16, new String[] { "Darbbas v?rds" }, null, gramText, lemma); //acot
        if (newBegin == -1)
            newBegin = simpleRule(
                    "-ju, -, -, -jam, -jat, pag. -ju, -j?m, -j?t; pav. -, -jiet", "t", 16,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //adverbializt
        if (newBegin == -1)
            newBegin = simpleRule("-?ju, -?, -?, pag. -?ju", "?t", 16, new String[] { "Darbbas v?rds" },
                    null, gramText, lemma); //aij?t
        if (newBegin == -1)
            newBegin = simpleRule("-ju, -, -, pag. -ja", "t", 16, new String[] { "Darbbas v?rds" },
                    null, gramText, lemma); //aizdelvert
        if (newBegin == -1)
            newBegin = simpleRule("-ju, -, -, pag. -ju", "t", 16, new String[] { "Darbbas v?rds" },
                    null, gramText, lemma); //absolutizt
        if (newBegin == -1)
            newBegin = simpleRule("-oju, -o, -o, pag. -oju", "ot", 16, new String[] { "Darbbas v?rds" }, null,
                    gramText, lemma); //aiztuntuot

        return newBegin;
    }

    /**
     * Paradigm 17: Darbbas v?rdi 3. konjug?cija tieie
     * Rules in form "parasti 3. pers., -bl?k, pag. -bl?kja"
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int thirdConjDir3PersVerbRules(String gramText, String lemma) {
        int newBegin = -1;
        // Verb-specific rules.
        // Rules ordered alphabetically by verb infinitive.
        // A
        // B
        newBegin = simpleRuleOptHyperns("parasti 3. pers., -bl?k, pag. -bl?kja", "bl?kt", 17,
                new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbl?kt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -bl?k, pag. -bl?kja", "bl?kt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbl?kt
        // C, 
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -?ab, pag. -?abja", "?abt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aiz?abt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -?aukst, pag. -?aukstja", "?aukstt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aiz?aukstt
        // D
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -d?rd, pag. -d?rdja", "d?rdt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizd?rdt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dimd, pag. -dimdja", "dimdt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdimdt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dip, pag. -dipja", "dipt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdipt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dun, pag. -dunja", "dunt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdunt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -dinkst, pag. -dinkstja", "dinkstt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdinkstt
        // E, F
        // G
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -grab, pag. -grabja", "grabt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgrabt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -gurkst, pag. -gurkstja", "gurkstt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgurkstt
        // H, I, J
        // K
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -klab, pag, -klabja", "klabt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizklabt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -klakst, pag. -klakstja", "klakstt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizklakstt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -klaudz, pag. -klaudzja", "klaudzt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizklaudzt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -kp, pag. -kpja", "kpt", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkpt
        // L, M, N, O, P, R, S, T, U, V, Z

        // Generic ending rules.
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -, pag. -ja", "t", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdzirkstt
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -ina, pag. -in?ja", "in?t", 17,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizducin?t

        return newBegin;
    }

    /**
     * Paradigm 17: Darbbas v?rdi 3. konjug?cija tieie
     * Rules in form "-dziedu, -dziedi, -dzied, pag. -dzied?ju" and
     * "-u, -i, -a, pag. -ju".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int thirdConjDirAllPersVerbRules(String gramText, String lemma) {
        int newBegin = -1;
        // Verb-specific rules.
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dziedu, -dziedi, -dzied, pag. -dzied?ju", "dzied?t", 17,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizdzied?t
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-guu, -guli, -gu, pag. -gulju", "gult", 17,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizgult
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-klimstu, -klimsti, -klimst, pag. -klimstju", "klimstt", 17,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizklimstt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kustu, -kusti, -kust, pag. -kustju", "kustt", 17,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizkustt
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-turu, -turi, -tur, pag. -turju", "turt", 17,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizturt

        // Generic ending rules.
        if (newBegin == -1)
            newBegin = simpleRule("-u, -i, -a, pag. -ju", "t", 17, new String[] { "Darbbas v?rds" }, null,
                    gramText, lemma); //aizstt
        if (newBegin == -1)
            newBegin = simpleRule("-inu, -ini, -ina, pag. -in?ju", "in?t", 17,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizsvilin?t
        return newBegin;
    }

    /**
     * Paradigm 18: Darbbas v?rdi 1. konjug?cija atgriezeniski
     * Rules in form "parasti 3. pers., -alcas, pag. -alc?s".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int firstConjRef3PersVerbRules(String gramText, String lemma) {
        int newBegin = -1;
        // Rules ordered alphabetically by verb infinitive.
        // A, B, C
        // D
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -dcas, pag. -dc?s", "dkties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"dkties\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdkties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -duras, pag. -dr?s", "durties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"durties\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdurties
        // E, F
        // G
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -g?as, pag. -g?z?s", "g?zties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"g?zties\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizg?zties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -grauas, pag. -grauz?s", "grauzties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"grauzties\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgrauzties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("parasti 3. pers., -grieas, pag. -griez?s", "griezties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"griezties\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgriezties 2
        // H, I, J
        // K
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -kaucas, pag. -kauc?s", "kaukties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"kaukties\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkaukties
        // L, M, N, O, P, R
        // S, 
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -alcas, pag. -alc?s", "alkties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"alkties\"" },
                    new String[] { "Parasti 3. person?" }, gramText, lemma); //aizalkties
        // T, U, V, Z
        return newBegin;
    }

    /**
     * Paradigm 18: Darbbas v?rdi 1. konjug?cija atgriezeniski
     * Rules in form "-tupstos, -tupsties, -tupstas, pag. -tupos".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int firstConjRefAllPersVerbRules(String gramText, String lemma) {
        int newBegin = -1;
        // Rules ordered alphabetically by verb infinitive.
        // A
        // B
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-br?os, -br?zies, -br?as, pag. -br?os", "br?zties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"br?zties\"" }, null, gramText, lemma); //aizbr?zties
        if (newBegin == -1)
            newBegin = simpleRule("-brcos, -brcies, -brcas, pag. -brcos", "brkties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"brkties\"" }, null, gramText, lemma); //aizbrkties
        // C
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-cieos, -cieties, -cieas, pag. -cietos", "ciesties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"ciesties\"" }, null, gramText, lemma); //aizciesties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-crtos, -crties, -crtas, pag. -cirtos", "cirsties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"cirsties\"" }, null, gramText, lemma); //aizcirsties
        // D
        if (newBegin == -1)
            newBegin = simpleRule("-degos, -dedzies, -degas, pag. -degos", "degties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"degties\"" }, null, gramText, lemma); //aizdegties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dr?os, -dr?zies, -dr?as, pag. -dr?zos", "dr?zties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"dr?zties\"" }, null, gramText, lemma); //aizdr?zties
        // E
        if (newBegin == -1)
            newBegin = simpleRule("-elos, -elsies, -elas, pag. -elsos", "elsties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"elsties\"" }, null, gramText, lemma); //aizelsties
        // F, 
        // G
        if (newBegin == -1)
            newBegin = simpleRule("-g?rdzos, -g?rdzies, -g?rdzas, pag. -g?rdzos", "g?rgties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"g?rgties\"" }, null, gramText, lemma); //aizg?rgties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-grieos, -griezies, -grieas, pag. -griezos", "griezties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"griezties\"" }, null, gramText, lemma); //aizgriezties 1
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns(
                    "-gulstos, -gulsties, -gulstas, ar -guos, -gulies, -guas, pag. -glos, ar -gulos",
                    "gulties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"gulties\"", "Parall?s formas" }, null,
                    gramText, lemma); //aizgulties
        if (newBegin == -1)
            newBegin = simpleRule("-gstos, -gsties, -gstas, pag. -guvos", "gties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"gties\"" }, null, gramText, lemma); //aizgties
        // ,
        if (newBegin == -1)
            newBegin = simpleRule("-iedos, -iedies, -iedas, pag. -gidos", "isties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"isties\"" }, null, gramText, lemma); //apisties
        // H
        // I
        if (newBegin == -1)
            newBegin = simpleRule("-ejos, -ejos, -ietas, pag. -g?jos", "ieties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"ieties\"" }, null, gramText, lemma); //apieties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-iros, -iries, -iras, pag. -ros", "irties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"irties\" (k? ar airiem)" }, null, gramText,
                    lemma); //aizirties
        // J
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-jdzos, -jdzies, -jdzas, pag. -jdzos", "jgties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"jgties\"" }, null, gramText, lemma); //aizjgties
        // K
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-karos, -karies, -karas, pag. -k?ros", "k?rties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"k?rties\"" }, null, gramText, lemma); //apk?rties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kl?jos, -kl?jies, -kl?jas, pag. -kl?jos", "kl?ties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"kl?ties\"" }, null, gramText, lemma); //apkl?ties
        if (newBegin == -1)
            newBegin = simpleRule("-kliedzos, -kliedzies, -kliedzas, pag. -kliedzos", "kliegties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"kliegties\"" }, null, gramText, lemma); //aizkliegties
        if (newBegin == -1)
            newBegin = simpleRule("-kr?cos, -kr?cies, -kr?cas, pag. -kr?cos", "kr?kties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"kr?kties\"" }, null, gramText, lemma); //aizkr?kties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-kuos, -kulies, -kuas, pag. -klos", "kulties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"kulties\"" }, null, gramText, lemma); //aizkulties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-ros, -eries, -eras, pag. -ros", "erties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"erties\"" }, null, gramText, lemma); //aizerties
        // L
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-laios, -laidies, -laias, pag. -laidos", "laisties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"laisties\"" }, null, gramText, lemma); //aizlaisties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-lauos, -lauzies, -lauas, pag. -lauz?s", "lauzties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"lauzties\"" }, null, gramText, lemma); //aizlauzties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-liedzos, -liedzies, -liedzas, pag. -liedzos", "liegties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"liegties\"" }, null, gramText, lemma); //aizliegties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-liecos, -liecies, -liecas, pag. -liecos", "liekties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"liekties\"" }, null, gramText, lemma); //aizliekties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-liekos, -liecies, -liekas, pag. -likos", "likties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"likties\"" }, null, gramText, lemma); //aizlikties
        // M, N, O, P, R, S
        // T
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-tupstos, -tupsties, -tupstas, pag. -tupos", "tupties", 18,
                    new String[] { "Darbbas v?rds", "Loct k? \"tupties\"", "Parall?s formas" }, null,
                    gramText, lemma); //aiztupties
        //TODO check paralel forms.
        // U, V, Z
        return newBegin;
    }

    /**
     * Paradigm 19: Darbbas v?rdi 2. konjug?cija atgriezeniski
     * Rules in form "parasti 3. pers., -jas, pag. -j?s",
     * "-jos, -jies, -jas, -jamies, -jaties, pag. -jos, -j?mies, -j?ties; pav. -jies, -jieties",
     *  and "-ojos, -ojies, -ojas, pag. -ojos".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int secondConjRefVerbRules(String gramText, String lemma) {
        int newBegin = -1;
        // Paradigm 19: Darbbas v?rdi 2. konjug?cija atgriezeniski
        newBegin = simpleRule("parasti 3. pers., -jas, pag. -j?s", "ties", 19,
                new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //absorbties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -ojas, pag. -oj?s", "oties", 19,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //daudzk?roties

        if (newBegin == -1)
            newBegin = simpleRule(
                    "-jos, -jies, -jas, -jamies, -jaties, pag. -jos, -j?mies, -j?ties; pav. -jies, -jieties",
                    "ties", 19, new String[] { "Darbbas v?rds" }, null, gramText, lemma); //adverbities
        if (newBegin == -1)
            newBegin = simpleRule("-ojos, -ojies, -ojas, pag. -ojos", "oties", 19,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aiztuntuoties, apgrkoties
        if (newBegin == -1)
            newBegin = simpleRule("-jos, -jies, -jas, pag. -jos", "ties", 19,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //abstrahties
        if (newBegin == -1)
            newBegin = simpleRule("-?jos, -?jies, -?jas, pag. -?jos", "?ties", 19,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizdom?ties
        return newBegin;
    }

    /**
     * Paradigm 20: Darbbas v?rdi 3. konjug?cija atgriezeniski
     * Rules in form "parasti 3. pers., -?s, pag. -j?s" and
     * "-os, -ies, -?s, pag. -jos".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int thirdConjRef3PersVerbRules(String gramText, String lemma) {
        int newBegin = -1;
        // Verb-specific rules.
        // Rules ordered alphabetically by verb infinitive.
        // A
        // B
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -brikas, pag. -brikj?s", "brikties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbrikties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -brikas, pag. -brikj?s", "brikties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbrikties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -brkas, pag. -brkj?s", "brkties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbrkties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -brkas, pag. -brkj?s", "brkties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbrkties
        // C, 
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -?abas, pag. -?abj?s", "?abties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aiz?abties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -?aukstas, pag. -?aukstj?s", "?aukstties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aiz?aukstties
        // D
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -d?rdas, pag. -d?rdj?s", "d?rdties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizd?rdties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -drebas, pag. -drebj?s", "drebties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizdrebties
        // E, F
        // G
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -gr?b?s, pag. -gr?bj?s", "grabties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgrabties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -gurkstas, pag. -gurkstj?s", "gurkstties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizgurkstties
        // H, I, J
        // K
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -klabas, pag. -klabj?s", "klabties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizklabties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -klaudzas, pag. -klaudzj?s", "klaudzties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizklaudzties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -klukstas, pag. -klukstj?s", "klukstties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizklukstties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -klunkas, pag. -klunkj?s", "klunkties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizklunkties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -klunkas, pag. -klunkj?s", "klunkties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizklunkties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -knakst?s, pag. -knakstj?s", "knakstties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizknakstties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -knakas, pag. -knakj?s", "knakties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizknakties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -knakas, pag. -knakj?s", "knakties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizknakties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -knaukas, pag. -knaukj?s", "knaukties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizknaukties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -knaukas, pag. -knaukj?s", "knaukties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizknaukties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -knikas, pag. -knikj?s", "knikties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizknikties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -knikas, pag. -knikj?s", "knikties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizknikties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -krakstas, pag. -krakstj?s", "krakstties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkrakstties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -krakas, pag. -krakj?s", "krakties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkrakties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -kurkstas, pag. -kurkstj?s", "kurkstties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkurkstties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -kurkas, pag. -kurkj?s", "kurkties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizkurkties
        // L, M, N, O, P, R, S, T, U, V, Z

        // Generic ending rules.
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -as, pag. -j?s", "ties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aiz?iepstties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -in?s, pag. -in?j?s", "in?ties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbubin?ties
        if (newBegin == -1)
            newBegin = simpleRule("parasti 3. pers., -?s, pag. -j?s", "ties", 20,
                    new String[] { "Darbbas v?rds" }, new String[] { "Parasti 3. person?" }, gramText, lemma); //aizbdties
        return newBegin;
    }

    /**
     * Paradigm 20: Darbbas v?rdi 3. konjug?cija atgriezeniski
     * Rules in form "parasti 3. pers., -?s, pag. -j?s" and
     * "-os, -ies, -?s, pag. -jos".
     * This function is seperated out for readability from
     * {@link #processBeginingWithPatterns(String, String)} as currently these rules
     * for verbs are long and highly specific and, thus, do not conflict
     * with other rules.
     * @return new begining for gram string if one of these rulles matched,
     * -1 otherwise.
     */
    private int thirdConjRefAllPersVerbRules(String gramText, String lemma) {
        int newBegin = -1;
        // Verb-specific rules.
        if (newBegin == -1)
            newBegin = simpleRule("-dziedos, -dziedies, -dziedas, pag. -dzied?jos", "dzied?ties", 20,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizdzied?ties
        if (newBegin == -1)
            newBegin = simpleRuleOptHyperns("-dzenos, -dzenies, -dzenas, pag. -dzinos", "dzties", 20,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizdzties
        if (newBegin == -1)
            newBegin = simpleRule("-guos, -gulies, -guas, pag. -guljos", "gulties", 20,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizgulties
        if (newBegin == -1)
            newBegin = simpleRule("-kustos, -kusties, -kustas, pag. -kustjos", "kustties", 20,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //aizkustties

        // Generic ending rules.
        if (newBegin == -1)
            newBegin = simpleRule("-os, -ies, -as, pag. -jos", "ties", 20, new String[] { "Darbbas v?rds" },
                    null, gramText, lemma); //apkaunties
        if (newBegin == -1)
            newBegin = simpleRule("-inos, -inies, -in?s, pag. -in?jos", "in?ties", 20,
                    new String[] { "Darbbas v?rds" }, null, gramText, lemma); //apklauin?ties
        if (newBegin == -1)
            newBegin = simpleRule("-os, -ies, -?s, pag. -jos", "ties", 20, new String[] { "Darbbas v?rds" },
                    null, gramText, lemma); //apklausties

        return newBegin;
    }

    /**
     * @param lemma is used for paradigm detection in cases where endings
     * matter.
     */
    private void paradigmFromFlags(String lemma) {
        if (flags.contains("pabas v?rds")) {
            if (lemma.endsWith("ais") || lemma.endsWith("?"))
                paradigm.add(30);
            else if (lemma.matches(".*[^aeiou??]s"))
                paradigm.add(13);
            else if (lemma.matches(".*[^aeiou??]"))
                paradigm.add(14);
        }

        if (flags.contains("Darbbas v?rds")) {
            if (lemma.endsWith("t") || lemma.endsWith("in?t"))
                paradigm.add(17);
            if (lemma.endsWith("ties") || lemma.endsWith("in?ties"))
                paradigm.add(20);
        }

        if (flags.contains("Apst?ka v?rds"))
            paradigm.add(21);
        if (flags.contains("Partikula"))
            paradigm.add(28);
        if (flags.contains("Priev?rds"))
            paradigm.add(26);

        if (flags.contains("Izsauksmes v?rds"))
            paradigm.add(29); // Hardcoded
        if (flags.contains("Sasin?jums"))
            paradigm.add(29); // Hardcoded
        if (flags.contains("V?rds svevalod?"))
            paradigm.add(29);

        if (flags.contains("Vietniekv?rds"))
            paradigm.add(25);
        if (flags.contains("Jaut?jamais vietniekv?rds"))
            paradigm.add(25);
        if (flags.contains("Noliedzamais vietniekv?rds"))
            paradigm.add(25);
        if (flags.contains("Nor?d?mais vietniekv?rds"))
            paradigm.add(25);
        if (flags.contains("Noteicamais vietniekv?rds"))
            paradigm.add(25);
        if (flags.contains("Piederbas vietniekv?rds"))
            paradigm.add(25);
        if (flags.contains("Visp?rin?mais vietniekv?rds"))
            paradigm.add(25);

        if (flags.contains("Priedklis"))
            paradigm.add(0); //Prefixes are not words.
        if (flags.contains("Salikteu daa"))
            paradigm.add(0); //Prefixes are not words.
    }

    /**
     * This should be called after something is removed from leftovers.
     */
    public void cleanupLeftovers() {
        for (int i = leftovers.size() - 1; i >= 0; i--) {
            if (leftovers.get(i).isEmpty())
                leftovers.remove(i);
        }
    }

    /**
     * Hopefully, this method will be empty for final data ;)
     */
    private String correctOCRErrors(String gramText) {
        //Inconsequences in data

        //gramText = gramText.replaceAll("^m?t\\.", "mat\\.");
        //gramText = gramText.replace(" m?t.", " mat.");
        //gramText = gramText.replace("vsk..", "vsk.");
        //gramText = gramText.replace("vsk .", "vsk.");
        //gramText = gramText.replaceAll("^gen\\.", "en\\.");
        //gramText = gramText.replace(" gen.", " en.");
        //gramText = gramText.replaceAll("^trans;", "trans\\.;");
        //gramText = gramText.replace(" trans;", " trans.;");

        //gramText = gramText.replace("-ais; s. -a: -?;", "-ais; s. -a, -?;"); //apgrcgs

        return gramText;

    }

    public String toJSON() {
        return toJSON(true);
    }

    // In case of speed problems StringBuilder can be returned.
    public String toJSON(boolean printOrig) {
        StringBuilder res = new StringBuilder();

        res.append("\"Gram\":{");
        boolean hasPrev = false;

        if (paradigm != null && !paradigm.isEmpty()) {
            if (hasPrev)
                res.append(", ");
            res.append("\"Paradigm\":");
            res.append(JSONUtils.simplesToJSON(paradigm));
            hasPrev = true;
        }

        if (altLemmas != null && !altLemmas.isEmpty()) {
            if (hasPrev)
                res.append(", ");
            res.append("\"AltLemmas\":{");
            Iterator<Integer> it = altLemmas.keySet().iterator();
            while (it.hasNext()) {
                Integer next = it.next();
                if (!altLemmas.getAll(next).isEmpty()) {
                    res.append("\"");
                    res.append(JSONObject.escape(next.toString()));
                    res.append("\":[");
                    Iterator<Tuple<Lemma, HashSet<String>>> flagIt = altLemmas.getAll(next).iterator();
                    while (flagIt.hasNext()) {
                        Tuple<Lemma, HashSet<String>> alt = flagIt.next();
                        res.append("{");
                        res.append(alt.first.toJSON());
                        if (alt.second != null && !alt.second.isEmpty()) {
                            res.append(", \"Flags\":");
                            res.append(JSONUtils.simplesToJSON(alt.second));
                        }
                        res.append("}");
                        if (flagIt.hasNext())
                            res.append(", ");
                    }

                    res.append("]");
                    if (it.hasNext())
                        res.append(", ");
                }
            }
            res.append("}");
            hasPrev = true;
        }

        if (flags != null && !flags.isEmpty()) {
            if (hasPrev)
                res.append(", ");
            res.append("\"Flags\":");
            res.append(JSONUtils.simplesToJSON(flags));
            hasPrev = true;
        }

        if (leftovers != null && leftovers.size() > 0) {
            if (hasPrev)
                res.append(", ");
            res.append("\"Leftovers\":[");

            Iterator<LinkedList<String>> it = leftovers.iterator();
            while (it.hasNext()) {
                LinkedList<String> next = it.next();
                if (!next.isEmpty()) {
                    res.append(JSONUtils.simplesToJSON(next));
                    if (it.hasNext())
                        res.append(", ");
                }
            }
            res.append("]");
            hasPrev = true;
        }

        if (printOrig && orig != null && orig.length() > 0) {
            if (hasPrev)
                res.append(", ");
            res.append("\"Original\":\"");
            res.append(JSONObject.escape(orig));
            res.append("\"");
            hasPrev = true;
        }

        res.append("}");
        return res.toString();
    }
}