A stemmer for Brazilian Portuguese words - CSharp System.Globalization

CSharp examples for System.Globalization:Stemmer

Description

A stemmer for Brazilian Portuguese words

Demo Code

using System.Globalization;

namespace Lucene.Net.Analysis.Br
{
    /*/*from w  w  w .  j  a v  a2 s  .c  o m*/
    * Licensed to the Apache Software Foundation (ASF) under one or more
    * contributor license agreements.  See the NOTICE file distributed with
    * this work for additional information regarding copyright ownership.
    * The ASF licenses this file to You under the Apache License, Version 2.0
    * (the "License"); you may not use this file except in compliance with
    * the License.  You may obtain a copy of the License at
    *
    *     http://www.apache.org/licenses/LICENSE-2.0
    *
    * Unless required by applicable law or agreed to in writing, software
    * distributed under the License is distributed on an "AS IS" BASIS,
    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    * See the License for the specific language governing permissions and
    * limitations under the License.
    */

    /// <summary>
    /// A stemmer for Brazilian Portuguese words.
    /// </summary>
    public class BrazilianStemmer
    {
        private static readonly CultureInfo locale = new CultureInfo("pt-BR");

        /// <summary>
        /// Changed term
        /// </summary>
        private string TERM;
        private string CT;
        private string R1;
        private string R2;
        private string RV;


        public BrazilianStemmer()
        {
        }

        /// <summary>
        /// Stems the given term to an unique <c>discriminator</c>.
        /// </summary>
        /// <param name="term">  The term that should be stemmed. </param>
        /// <returns>Discriminator for <paramref name="term"/></returns>
        protected internal virtual string Stem(string term)
        {
            bool altered = false; // altered the term

            // creates CT
            CreateCT(term);

            if (!IsIndexable(CT))
            {
                return null;
            }
            if (!IsStemmable(CT))
            {
                return CT;
            }

            R1 = GetR1(CT);
            R2 = GetR1(R1);
            RV = GetRV(CT);
            TERM = term + ";" + CT;

            altered = Step1();
            if (!altered)
            {
                altered = Step2();
            }

            if (altered)
            {
                Step3();
            }
            else
            {
                Step4();
            }

            Step5();

            return CT;
        }

        /// <summary>
        /// Checks a term if it can be processed correctly.
        /// </summary>
        /// <returns>  true if, and only if, the given term consists in letters. </returns>
        private bool IsStemmable(string term)
        {
            for (int c = 0; c < term.Length; c++)
            {
                // Discard terms that contain non-letter characters.
                if (!char.IsLetter(term[c]))
                {
                    return false;
                }
            }
            return true;
        }

        /// <summary>
        /// Checks a term if it can be processed indexed.
        /// </summary>
        /// <returns> true if it can be indexed </returns>
        private bool IsIndexable(string term)
        {
            return (term.Length < 30) && (term.Length > 2);
        }

        /// <summary>
        /// See if string is 'a','e','i','o','u'
        /// </summary>
        /// <returns> true if is vowel </returns>
        private bool IsVowel(char value)
        {
            return (value == 'a') || (value == 'e') || (value == 'i') || (value == 'o') || (value == 'u');
        }

        /// <summary>
        /// Gets R1
        /// 
        /// R1 - is the region after the first non-vowel following a vowel,
        ///      or is the null region at the end of the word if there is
        ///      no such non-vowel.
        /// </summary>
        /// <returns> null or a string representing R1 </returns>
        private string GetR1(string value)
        {
            int i;
            int j;

            // be-safe !!!
            if (value == null)
            {
                return null;
            }

            // find 1st vowel
            i = value.Length - 1;
            for (j = 0; j < i; j++)
            {
                if (IsVowel(value[j]))
                {
                    break;
                }
            }

            if (!(j < i))
            {
                return null;
            }

            // find 1st non-vowel
            for (; j < i; j++)
            {
                if (!(IsVowel(value[j])))
                {
                    break;
                }
            }

            if (!(j < i))
            {
                return null;
            }

            return value.Substring(j + 1);
        }

        /// <summary>
        /// Gets RV
        /// 
        /// RV - IF the second letter is a consonant, RV is the region after
        ///      the next following vowel,
        /// 
        ///      OR if the first two letters are vowels, RV is the region
        ///      after the next consonant,
        /// 
        ///      AND otherwise (consonant-vowel case) RV is the region after
        ///      the third letter.
        /// 
        ///      BUT RV is the end of the word if this positions cannot be
        ///      found.
        /// </summary>
        /// <returns> null or a string representing RV </returns>
        private string GetRV(string value)
        {
            int i;
            int j;

            // be-safe !!!
            if (value == null)
            {
                return null;
            }

            i = value.Length - 1;

            // RV - IF the second letter is a consonant, RV is the region after
            //      the next following vowel,
            if ((i > 0) && !IsVowel(value[1]))
            {
                // find 1st vowel
                for (j = 2; j < i; j++)
                {
                    if (IsVowel(value[j]))
                    {
                        break;
                    }
                }

                if (j < i)
                {
                    return value.Substring(j + 1);
                }
            }


            // RV - OR if the first two letters are vowels, RV is the region
            //      after the next consonant,
            if ((i > 1) && IsVowel(value[0]) && IsVowel(value[1]))
            {
                // find 1st consoant
                for (j = 2; j < i; j++)
                {
                    if (!IsVowel(value[j]))
                    {
                        break;
                    }
                }

                if (j < i)
                {
                    return value.Substring(j + 1);
                }
            }

            // RV - AND otherwise (consonant-vowel case) RV is the region after
            //      the third letter.
            if (i > 2)
            {
                return value.Substring(3);
            }

            return null;
        }

        /// <summary>
        /// 1) Turn to lowercase
        /// 2) Remove accents
        /// 3) ? -> a ; ? -> o
        /// 4) ? -> c
        /// </summary>
        /// <returns> null or a string transformed </returns>
        private string ChangeTerm(string value)
        {
            int j;
            string r = "";

            // be-safe !!!
            if (value == null)
            {
                return null;
            }

            value = locale.TextInfo.ToLower(value);
            for (j = 0; j < value.Length; j++)
            {
                if ((value[j] == '?') || (value[j] == '?') || (value[j] == '?'))
                {
                    r = r + "a";
                    continue;
                }
                if ((value[j] == '?') || (value[j] == '?'))
                {
                    r = r + "e";
                    continue;
                }
                if (value[j] == '?')
                {
                    r = r + "i";
                    continue;
                }
                if ((value[j] == '?') || (value[j] == '?') || (value[j] == '?'))
                {
                    r = r + "o";
                    continue;
                }
                if ((value[j] == '?') || (value[j] == '?'))
                {
                    r = r + "u";
                    continue;
                }
                if (value[j] == '?')
                {
                    r = r + "c";
                    continue;
                }
                if (value[j] == '?')
                {
                    r = r + "n";
                    continue;
                }

                r = r + value[j];
            }

            return r;
        }

        /// <summary>
        /// Check if a string ends with a suffix
        /// </summary>
        /// <returns> true if the string ends with the specified suffix </returns>
        private bool Suffix(string value, string suffix)
        {

            // be-safe !!!
            if ((value == null) || (suffix == null))
            {
                return false;
            }

            if (suffix.Length > value.Length)
            {
                return false;
            }

            return value.Substring(value.Length - suffix.Length).Equals(suffix);
        }

        /// <summary>
        /// Replace a <see cref="string"/> suffix by another
        /// </summary>
        /// <returns> the replaced <see cref="string"/> </returns>
        private string ReplaceSuffix(string value, string toReplace, string changeTo)
        {
            string vvalue;

            // be-safe !!!
            if ((value == null) || (toReplace == null) || (changeTo == null))
            {
                return value;
            }

            vvalue = RemoveSuffix(value, toReplace);

            if (value.Equals(vvalue))
            {
                return value;
            }
            else
            {
                return vvalue + changeTo;
            }
        }

        /// <summary>
        /// Remove a <see cref="string"/> suffix
        /// </summary>
        /// <returns> the <see cref="string"/> without the suffix </returns>
        private string RemoveSuffix(string value, string toRemove)
        {
            // be-safe !!!
            if ((value == null) || (toRemove == null) || !Suffix(value, toRemove))
            {
                return value;
            }

            return value.Substring(0, value.Length - toRemove.Length);
        }

        /// <summary>
        /// See if a suffix is preceded by a <see cref="string"/>
        /// </summary>
        /// <returns> true if the suffix is preceded </returns>
        private bool SuffixPreceded(string value, string suffix, string preceded)
        {
            // be-safe !!!
            if ((value == null) || (suffix == null) || (preceded == null) || !Suffix(value, suffix))
            {
                return false;
            }

            return Suffix(RemoveSuffix(value, suffix), preceded);
        }

        /// <summary>
        /// Creates CT (changed term) , substituting * '?' and '?' for 'a~' and 'o~'.
        /// </summary>
        private void CreateCT(string term)
        {
            CT = ChangeTerm(term);

            if (CT.Length < 2)
            {
                return;
            }

            // if the first character is ... , remove it
            if ((CT[0] == '"') || (CT[0] == '\'') || (CT[0] == '-') || (CT[0] == ',') || (CT[0] == ';') || (CT[0] == '.') || (CT[0] == '?') || (CT[0] == '!'))
            {
                CT = CT.Substring(1);
            }

            if (CT.Length < 2)
            {
                return;
            }

            // if the last character is ... , remove it
            if ((CT[CT.Length - 1] == '-') || (CT[CT.Length - 1] == ',') || (CT[CT.Length - 1] == ';') || (CT[CT.Length - 1] == '.') || (CT[CT.Length - 1] == '?') || (CT[CT.Length - 1] == '!') || (CT[CT.Length - 1] == '\'') || (CT[CT.Length - 1] == '"'))
            {
                CT = CT.Substring(0, CT.Length - 1);
            }
        }


        /// <summary>
        /// Standard suffix removal.
        /// Search for the longest among the following suffixes, and perform
        /// the following actions:
        /// </summary>
        /// <returns> false if no ending was removed </returns>
        private bool Step1()
        {
            if (CT == null)
            {
                return false;
            }

            // suffix length = 7
            if (Suffix(CT, "uciones") && Suffix(R2, "uciones"))
            {
                CT = ReplaceSuffix(CT, "uciones", "u");
                return true;
            }

            // suffix length = 6
            if (CT.Length >= 6)
            {
                if (Suffix(CT, "imentos") && Suffix(R2, "imentos"))
                {
                    CT = RemoveSuffix(CT, "imentos");
                    return true;
                }
                if (Suffix(CT, "amentos") && Suffix(R2, "amentos"))
                {
                    CT = RemoveSuffix(CT, "amentos");
                    return true;
                }
                if (Suffix(CT, "adores") && Suffix(R2, "adores"))
                {
                    CT = RemoveSuffix(CT, "adores");
                    return true;
                }
                if (Suffix(CT, "adoras") && Suffix(R2, "adoras"))
                {
                    CT = RemoveSuffix(CT, "adoras");
                    return true;
                }
                if (Suffix(CT, "logias") && Suffix(R2, "logias"))
                {
                    ReplaceSuffix(CT, "logias", "log");
                    return true;
                }
                if (Suffix(CT, "encias") && Suffix(R2, "encias"))
                {
                    CT = ReplaceSuffix(CT, "encias", "ente");
                    return true;
                }
                if (Suffix(CT, "amente") && Suffix(R1, "amente"))
                {
                    CT = RemoveSuffix(CT, "amente");
                    return true;
                }
                if (Suffix(CT, "idades") && Suffix(R2, "idades"))
                {
                    CT = RemoveSuffix(CT, "idades");
                    return true;
                }
            }

            // suffix length = 5
            if (CT.Length >= 5)
            {
                if (Suffix(CT, "acoes") && Suffix(R2, "acoes"))
                {
                    CT = RemoveSuffix(CT, "acoes");
                    return true;
                }
                if (Suffix(CT, "imento") && Suffix(R2, "imento"))
                {
                    CT = RemoveSuffix(CT, "imento");
                    return true;
                }
                if (Suffix(CT, "amento") && Suffix(R2, "amento"))
                {
                    CT = RemoveSuffix(CT, "amento");
                    return true;
                }
                if (Suffix(CT, "adora") && Suffix(R2, "adora"))
                {
                    CT = RemoveSuffix(CT, "adora");
                    return true;
                }
                if (Suffix(CT, "ismos") && Suffix(R2, "ismos"))
                {
                    CT = RemoveSuffix(CT, "ismos");
                    return true;
                }
                if (Suffix(CT, "istas") && Suffix(R2, "istas"))
                {
                    CT = RemoveSuffix(CT, "istas");
                    return true;
                }
                if (Suffix(CT, "logia") && Suffix(R2, "logia"))
                {
                    CT = ReplaceSuffix(CT, "logia", "log");
                    return true;
                }
                if (Suffix(CT, "ucion") && Suffix(R2, "ucion"))
                {
                    CT = ReplaceSuffix(CT, "ucion", "u");
                    return true;
                }
                if (Suffix(CT, "encia") && Suffix(R2, "encia"))
                {
                    CT = ReplaceSuffix(CT, "encia", "ente");
                    return true;
                }
                if (Suffix(CT, "mente") && Suffix(R2, "mente"))
                {
                    CT = RemoveSuffix(CT, "mente");
                    return true;
                }
                if (Suffix(CT, "idade") && Suffix(R2, "idade"))
                {
                    CT = RemoveSuffix(CT, "idade");
                    return true;
                }
            }

            // suffix length = 4
            if (CT.Length >= 4)
            {
                if (Suffix(CT, "acao") && Suffix(R2, "acao"))
                {
                    CT = RemoveSuffix(CT, "acao");
                    return true;
                }
                if (Suffix(CT, "ezas") && Suffix(R2, "ezas"))
                {
                    CT = RemoveSuffix(CT, "ezas");
                    return true;
                }
                if (Suffix(CT, "icos") && Suffix(R2, "icos"))
                {
                    CT = RemoveSuffix(CT, "icos");
                    return true;
                }
                if (Suffix(CT, "icas") && Suffix(R2, "icas"))
                {
                    CT = RemoveSuffix(CT, "icas");
                    return true;
                }
                if (Suffix(CT, "ismo") && Suffix(R2, "ismo"))
                {
                    CT = RemoveSuffix(CT, "ismo");
                    return true;
                }
                if (Suffix(CT, "avel") && Suffix(R2, "avel"))
                {
                    CT = RemoveSuffix(CT, "avel");
                    return true;
                }
                if (Suffix(CT, "ivel") && Suffix(R2, "ivel"))
                {
                    CT = RemoveSuffix(CT, "ivel");
                    return true;
                }
                if (Suffix(CT, "ista") && Suffix(R2, "ista"))
                {
                    CT = RemoveSuffix(CT, "ista");
                    return true;
                }
                if (Suffix(CT, "osos") && Suffix(R2, "osos"))
                {
                    CT = RemoveSuffix(CT, "osos");
                    return true;
                }
                if (Suffix(CT, "osas") && Suffix(R2, "osas"))
                {
                    CT = RemoveSuffix(CT, "osas");
                    return true;
                }
                if (Suffix(CT, "ador") && Suffix(R2, "ador"))
                {
                    CT = RemoveSuffix(CT, "ador");
                    return true;
                }
                if (Suffix(CT, "ivas") && Suffix(R2, "ivas"))
                {
                    CT = RemoveSuffix(CT, "ivas");
                    return true;
                }
                if (Suffix(CT, "ivos") && Suffix(R2, "ivos"))
                {
                    CT = RemoveSuffix(CT, "ivos");
                    return true;
                }
                if (Suffix(CT, "iras") && Suffix(RV, "iras") && SuffixPreceded(CT, "iras", "e"))
                {
                    CT = ReplaceSuffix(CT, "iras", "ir");
                    return true;
                }
            }

            // suffix length = 3
            if (CT.Length >= 3)
            {
                if (Suffix(CT, "eza") && Suffix(R2, "eza"))
                {
                    CT = RemoveSuffix(CT, "eza");
                    return true;
                }
                if (Suffix(CT, "ico") && Suffix(R2, "ico"))
                {
                    CT = RemoveSuffix(CT, "ico");
                    return true;
                }
                if (Suffix(CT, "ica") && Suffix(R2, "ica"))
                {
                    CT = RemoveSuffix(CT, "ica");
                    return true;
                }
                if (Suffix(CT, "oso") && Suffix(R2, "oso"))
                {
                    CT = RemoveSuffix(CT, "oso");
                    return true;
                }
                if (Suffix(CT, "osa") && Suffix(R2, "osa"))
                {
                    CT = RemoveSuffix(CT, "osa");
                    return true;
                }
                if (Suffix(CT, "iva") && Suffix(R2, "iva"))
                {
                    CT = RemoveSuffix(CT, "iva");
                    return true;
                }
                if (Suffix(CT, "ivo") && Suffix(R2, "ivo"))
                {
                    CT = RemoveSuffix(CT, "ivo");
                    return true;
                }
                if (Suffix(CT, "ira") && Suffix(RV, "ira") && SuffixPreceded(CT, "ira", "e"))
                {
                    CT = ReplaceSuffix(CT, "ira", "ir");
                    return true;
                }
            }

            // no ending was removed by step1
            return false;
        }


        /// <summary>
        /// Verb suffixes.
        /// 
        /// Search for the longest among the following suffixes in RV,
        /// and if found, delete.
        /// </summary>
        /// <returns> false if no ending was removed </returns>
        private bool Step2()
        {
            if (RV == null)
            {
                return false;
            }

            // suffix lenght = 7
            if (RV.Length >= 7)
            {
                if (Suffix(RV, "issemos"))
                {
                    CT = RemoveSuffix(CT, "issemos");
                    return true;
                }
                if (Suffix(RV, "essemos"))
                {
                    CT = RemoveSuffix(CT, "essemos");
                    return true;
                }
                if (Suffix(RV, "assemos"))
                {
                    CT = RemoveSuffix(CT, "assemos");
                    return true;
                }
                if (Suffix(RV, "ariamos"))
                {
                    CT = RemoveSuffix(CT, "ariamos");
                    return true;
                }
                if (Suffix(RV, "eriamos"))
                {
                    CT = RemoveSuffix(CT, "eriamos");
                    return true;
                }
                if (Suffix(RV, "iriamos"))
                {
                    CT = RemoveSuffix(CT, "iriamos");
                    return true;
                }
            }

            // suffix length = 6
            if (RV.Length >= 6)
            {
                if (Suffix(RV, "iremos"))
                {
                    CT = RemoveSuffix(CT, "iremos");
                    return true;
                }
                if (Suffix(RV, "eremos"))
                {
                    CT = RemoveSuffix(CT, "eremos");
                    return true;
                }
                if (Suffix(RV, "aremos"))
                {
                    CT = RemoveSuffix(CT, "aremos");
                    return true;
                }
                if (Suffix(RV, "avamos"))
                {
                    CT = RemoveSuffix(CT, "avamos");
                    return true;
                }
                if (Suffix(RV, "iramos"))
                {
                    CT = RemoveSuffix(CT, "iramos");
                    return true;
                }
                if (Suffix(RV, "eramos"))
                {
                    CT = RemoveSuffix(CT, "eramos");
                    return true;
                }
                if (Suffix(RV, "aramos"))
                {
                    CT = RemoveSuffix(CT, "aramos");
                    return true;
                }
                if (Suffix(RV, "asseis"))
                {
                    CT = RemoveSuffix(CT, "asseis");
                    return true;
                }
                if (Suffix(RV, "esseis"))
                {
                    CT = RemoveSuffix(CT, "esseis");
                    return true;
                }
                if (Suffix(RV, "isseis"))
                {
                    CT = RemoveSuffix(CT, "isseis");
                    return true;
                }
                if (Suffix(RV, "arieis"))
                {
                    CT = RemoveSuffix(CT, "arieis");
                    return true;
                }
                if (Suffix(RV, "erieis"))
                {
                    CT = RemoveSuffix(CT, "erieis");
                    return true;
                }
                if (Suffix(RV, "irieis"))
                {
                    CT = RemoveSuffix(CT, "irieis");
                    return true;
                }
            }


            // suffix length = 5
            if (RV.Length >= 5)
            {
                if (Suffix(RV, "irmos"))
                {
                    CT = RemoveSuffix(CT, "irmos");
                    return true;
                }
                if (Suffix(RV, "iamos"))
                {
                    CT = RemoveSuffix(CT, "iamos");
                    return true;
                }
                if (Suffix(RV, "armos"))
                {
                    CT = RemoveSuffix(CT, "armos");
                    return true;
                }
                if (Suffix(RV, "ermos"))
                {
                    CT = RemoveSuffix(CT, "ermos");
                    return true;
                }
                if (Suffix(RV, "areis"))
                {
                    CT = RemoveSuffix(CT, "areis");
                    return true;
                }
                if (Suffix(RV, "ereis"))
                {
                    CT = RemoveSuffix(CT, "ereis");
                    return true;
                }
                if (Suffix(RV, "ireis"))
                {
                    CT = RemoveSuffix(CT, "ireis");
                    return true;
                }
                if (Suffix(RV, "asses"))
                {
                    CT = RemoveSuffix(CT, "asses");
                    return true;
                }
                if (Suffix(RV, "esses"))
                {
                    CT = RemoveSuffix(CT, "esses");
                    return true;
                }
                if (Suffix(RV, "isses"))
                {
                    CT = RemoveSuffix(CT, "isses");
                    return true;
                }
                if (Suffix(RV, "astes"))
                {
                    CT = RemoveSuffix(CT, "astes");
                    return true;
                }
                if (Suffix(RV, "assem"))
                {
                    CT = RemoveSuffix(CT, "assem");
                    return true;
                }
                if (Suffix(RV, "essem"))
                {
                    CT = RemoveSuffix(CT, "essem");
                    return true;
                }
                if (Suffix(RV, "issem"))
                {
                    CT = RemoveSuffix(CT, "issem");
                    return true;
                }
                if (Suffix(RV, "ardes"))
                {
                    CT = RemoveSuffix(CT, "ardes");
                    return true;
                }
                if (Suffix(RV, "erdes"))
                {
                    CT = RemoveSuffix(CT, "erdes");
                    return true;
                }
                if (Suffix(RV, "irdes"))
                {
                    CT = RemoveSuffix(CT, "irdes");
                    return true;
                }
                if (Suffix(RV, "ariam"))
                {
                    CT = RemoveSuffix(CT, "ariam");
                    return true;
                }
                if (Suffix(RV, "eriam"))
                {
                    CT = RemoveSuffix(CT, "eriam");
                    return true;
                }
                if (Suffix(RV, "iriam"))
                {
                    CT = RemoveSuffix(CT, "iriam");
                    return true;
                }
                if (Suffix(RV, "arias"))
                {
                    CT = RemoveSuffix(CT, "arias");
                    return true;
                }
                if (Suffix(RV, "erias"))
                {
                    CT = RemoveSuffix(CT, "erias");
                    return true;
                }
                if (Suffix(RV, "irias"))
                {
                    CT = RemoveSuffix(CT, "irias");
                    return true;
                }
                if (Suffix(RV, "estes"))
                {
                    CT = RemoveSuffix(CT, "estes");
                    return true;
                }
                if (Suffix(RV, "istes"))
                {
                    CT = RemoveSuffix(CT, "istes");
                    return true;
                }
                if (Suffix(RV, "areis"))
                {
                    CT = RemoveSuffix(CT, "areis");
                    return true;
                }
                if (Suffix(RV, "aveis"))
                {
                    CT = RemoveSuffix(CT, "aveis");
                    return true;
                }
            }

            // suffix length = 4
            if (RV.Length >= 4)
            {
                if (Suffix(RV, "aria"))
                {
                    CT = RemoveSuffix(CT, "aria");
                    return true;
                }
                if (Suffix(RV, "eria"))
                {
                    CT = RemoveSuffix(CT, "eria");
                    return true;
                }
                if (Suffix(RV, "iria"))
                {
                    CT = RemoveSuffix(CT, "iria");
                    return true;
                }
                if (Suffix(RV, "asse"))
                {
                    CT = RemoveSuffix(CT, "asse");
                    return true;
                }
                if (Suffix(RV, "esse"))
                {
                    CT = RemoveSuffix(CT, "esse");
                    return true;
                }
                if (Suffix(RV, "isse"))
                {
                    CT = RemoveSuffix(CT, "isse");
                    return true;
                }
                if (Suffix(RV, "aste"))
                {
                    CT = RemoveSuffix(CT, "aste");
                    return true;
                }
                if (Suffix(RV, "este"))
                {
                    CT = RemoveSuffix(CT, "este");
                    return true;
                }
                if (Suffix(RV, "iste"))
                {
                    CT = RemoveSuffix(CT, "iste");
                    return true;
                }
                if (Suffix(RV, "arei"))
                {
                    CT = RemoveSuffix(CT, "arei");
                    return true;
                }
                if (Suffix(RV, "erei"))
                {
                    CT = RemoveSuffix(CT, "erei");
                    return true;
                }
                if (Suffix(RV, "irei"))
                {
                    CT = RemoveSuffix(CT, "irei");
                    return true;
                }
                if (Suffix(RV, "aram"))
                {
                    CT = RemoveSuffix(CT, "aram");
                    return true;
                }
                if (Suffix(RV, "eram"))
                {
                    CT = RemoveSuffix(CT, "eram");
                    return true;
                }
                if (Suffix(RV, "iram"))
                {
                    CT = RemoveSuffix(CT, "iram");
                    return true;
                }
                if (Suffix(RV, "avam"))
                {
                    CT = RemoveSuffix(CT, "avam");
                    return true;
                }
                if (Suffix(RV, "arem"))
                {
                    CT = RemoveSuffix(CT, "arem");
                    return true;
                }
                if (Suffix(RV, "erem"))
                {
                    CT = RemoveSuffix(CT, "erem");
                    return true;
                }
                if (Suffix(RV, "irem"))
                {
                    CT = RemoveSuffix(CT, "irem");
                    return true;
                }
                if (Suffix(RV, "ando"))
                {
                    CT = RemoveSuffix(CT, "ando");
                    return true;
                }
                if (Suffix(RV, "endo"))
                {
                    CT = RemoveSuffix(CT, "endo");
                    return true;
                }
                if (Suffix(RV, "indo"))
                {
                    CT = RemoveSuffix(CT, "indo");
                    return true;
                }
                if (Suffix(RV, "arao"))
                {
                    CT = RemoveSuffix(CT, "arao");
                    return true;
                }
                if (Suffix(RV, "erao"))
                {
                    CT = RemoveSuffix(CT, "erao");
                    return true;
                }
                if (Suffix(RV, "irao"))
                {
                    CT = RemoveSuffix(CT, "irao");
                    return true;
                }
                if (Suffix(RV, "adas"))
                {
                    CT = RemoveSuffix(CT, "adas");
                    return true;
                }
                if (Suffix(RV, "idas"))
                {
                    CT = RemoveSuffix(CT, "idas");
                    return true;
                }
                if (Suffix(RV, "aras"))
                {
                    CT = RemoveSuffix(CT, "aras");
                    return true;
                }
                if (Suffix(RV, "eras"))
                {
                    CT = RemoveSuffix(CT, "eras");
                    return true;
                }
                if (Suffix(RV, "iras"))
                {
                    CT = RemoveSuffix(CT, "iras");
                    return true;
                }
                if (Suffix(RV, "avas"))
                {
                    CT = RemoveSuffix(CT, "avas");
                    return true;
                }
                if (Suffix(RV, "ares"))
                {
                    CT = RemoveSuffix(CT, "ares");
                    return true;
                }
                if (Suffix(RV, "eres"))
                {
                    CT = RemoveSuffix(CT, "eres");
                    return true;
                }
                if (Suffix(RV, "ires"))
                {
                    CT = RemoveSuffix(CT, "ires");
                    return true;
                }
                if (Suffix(RV, "ados"))
                {
                    CT = RemoveSuffix(CT, "ados");
                    return true;
                }
                if (Suffix(RV, "idos"))
                {
                    CT = RemoveSuffix(CT, "idos");
                    return true;
                }
                if (Suffix(RV, "amos"))
                {
                    CT = RemoveSuffix(CT, "amos");
                    return true;
                }
                if (Suffix(RV, "emos"))
                {
                    CT = RemoveSuffix(CT, "emos");
                    return true;
                }
                if (Suffix(RV, "imos"))
                {
                    CT = RemoveSuffix(CT, "imos");
                    return true;
                }
                if (Suffix(RV, "iras"))
                {
                    CT = RemoveSuffix(CT, "iras");
                    return true;
                }
                if (Suffix(RV, "ieis"))
                {
                    CT = RemoveSuffix(CT, "ieis");
                    return true;
                }
            }

            // suffix length = 3
            if (RV.Length >= 3)
            {
                if (Suffix(RV, "ada"))
                {
                    CT = RemoveSuffix(CT, "ada");
                    return true;
                }
                if (Suffix(RV, "ida"))
                {
                    CT = RemoveSuffix(CT, "ida");
                    return true;
                }
                if (Suffix(RV, "ara"))
                {
                    CT = RemoveSuffix(CT, "ara");
                    return true;
                }
                if (Suffix(RV, "era"))
                {
                    CT = RemoveSuffix(CT, "era");
                    return true;
                }
                if (Suffix(RV, "ira"))
                {
                    CT = RemoveSuffix(CT, "ava");
                    return true;
                }
                if (Suffix(RV, "iam"))
                {
                    CT = RemoveSuffix(CT, "iam");
                    return true;
                }
                if (Suffix(RV, "ado"))
                {
                    CT = RemoveSuffix(CT, "ado");
                    return true;
                }
                if (Suffix(RV, "ido"))
                {
                    CT = RemoveSuffix(CT, "ido");
                    return true;
                }
                if (Suffix(RV, "ias"))
                {
                    CT = RemoveSuffix(CT, "ias");
                    return true;
                }
                if (Suffix(RV, "ais"))
                {
                    CT = RemoveSuffix(CT, "ais");
                    return true;
                }
                if (Suffix(RV, "eis"))
                {
                    CT = RemoveSuffix(CT, "eis");
                    return true;
                }
                if (Suffix(RV, "ira"))
                {
                    CT = RemoveSuffix(CT, "ira");
                    return true;
                }
                if (Suffix(RV, "ear"))
                {
                    CT = RemoveSuffix(CT, "ear");
                    return true;
                }
            }

            // suffix length = 2
            if (RV.Length >= 2)
            {
                if (Suffix(RV, "ia"))
                {
                    CT = RemoveSuffix(CT, "ia");
                    return true;
                }
                if (Suffix(RV, "ei"))
                {
                    CT = RemoveSuffix(CT, "ei");
                    return true;
                }
                if (Suffix(RV, "am"))
                {
                    CT = RemoveSuffix(CT, "am");
                    return true;
                }
                if (Suffix(RV, "em"))
                {
                    CT = RemoveSuffix(CT, "em");
                    return true;
                }
                if (Suffix(RV, "ar"))
                {
                    CT = RemoveSuffix(CT, "ar");
                    return true;
                }
                if (Suffix(RV, "er"))
                {
                    CT = RemoveSuffix(CT, "er");
                    return true;
                }
                if (Suffix(RV, "ir"))
                {
                    CT = RemoveSuffix(CT, "ir");
                    return true;
                }
                if (Suffix(RV, "as"))
                {
                    CT = RemoveSuffix(CT, "as");
                    return true;
                }
                if (Suffix(RV, "es"))
                {
                    CT = RemoveSuffix(CT, "es");
                    return true;
                }
                if (Suffix(RV, "is"))
                {
                    CT = RemoveSuffix(CT, "is");
                    return true;
                }
                if (Suffix(RV, "eu"))
                {
                    CT = RemoveSuffix(CT, "eu");
                    return true;
                }
                if (Suffix(RV, "iu"))
                {
                    CT = RemoveSuffix(CT, "iu");
                    return true;
                }
                if (Suffix(RV, "iu"))
                {
                    CT = RemoveSuffix(CT, "iu");
                    return true;
                }
                if (Suffix(RV, "ou"))
                {
                    CT = RemoveSuffix(CT, "ou");
                    return true;
                }
            }

            // no ending was removed by step2
            return false;
        }

        /// <summary>
        /// Delete suffix 'i' if in RV and preceded by 'c'
        /// </summary>
        private void Step3()
        {
            if (RV == null)
            {
                return;
            }

            if (Suffix(RV, "i") && SuffixPreceded(RV, "i", "c"))
            {
                CT = RemoveSuffix(CT, "i");
            }

        }

        /// <summary>
        /// Residual suffix
        /// 
        /// If the word ends with one of the suffixes (os a i o ? ? ?)
        /// in RV, delete it
        /// </summary>
        private void Step4()
        {
            if (RV == null)
            {
                return;
            }

            if (Suffix(RV, "os"))
            {
                CT = RemoveSuffix(CT, "os");
                return;
            }
            if (Suffix(RV, "a"))
            {
                CT = RemoveSuffix(CT, "a");
                return;
            }
            if (Suffix(RV, "i"))
            {
                CT = RemoveSuffix(CT, "i");
                return;
            }
            if (Suffix(RV, "o"))
            {
                CT = RemoveSuffix(CT, "o");
                return;
            }

        }

        /// <summary>
        /// If the word ends with one of ( e ? ?) in RV,delete it,
        /// and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
        /// delete the 'u' (or 'i')
        /// 
        /// Or if the word ends ? remove the cedilha
        /// </summary>
        private void Step5()
        {
            if (RV == null)
            {
                return;
            }

            if (Suffix(RV, "e"))
            {
                if (SuffixPreceded(RV, "e", "gu"))
                {
                    CT = RemoveSuffix(CT, "e");
                    CT = RemoveSuffix(CT, "u");
                    return;
                }

                if (SuffixPreceded(RV, "e", "ci"))
                {
                    CT = RemoveSuffix(CT, "e");
                    CT = RemoveSuffix(CT, "i");
                    return;
                }

                CT = RemoveSuffix(CT, "e");
                return;
            }
        }

        /// <summary>
        /// For log and debug purpose
        /// </summary>
        /// <returns> TERM, CT, RV, R1 and R2 </returns>
        public virtual string Log()
        {
            return " (TERM = " + TERM + ")" + " (CT = " + CT + ")" + " (RV = " + RV + ")" + " (R1 = " + R1 + ")" + " (R2 = " + R2 + ")";
        }
    }
}

Related Tutorials