com.joliciel.jochre.lexicon.DefaultLexiconWrapper.java Source code

Introduction

Here is the source code for com.joliciel.jochre.lexicon.DefaultLexiconWrapper.java
Source

 ///////////////////////////////////////////////////////////////////////////////
 //Copyright (C) 2013 Assaf Urieli
 //
 //This file is part of Jochre.
 //
 //Jochre is free software: you can redistribute it and/or modify
 //it under the terms of the GNU Affero General Public License as published by
 //the Free Software Foundation, either version 3 of the License, or
 //(at your option) any later version.
 //
 //Jochre is distributed in the hope that it will be useful,
 //but WITHOUT ANY WARRANTY; without even the implied warranty of
 //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 //GNU Affero General Public License for more details.
 //
 //You should have received a copy of the GNU Affero General Public License
 //along with Jochre.  If not, see <http://www.gnu.org/licenses/>.
 //////////////////////////////////////////////////////////////////////////////
 package com.joliciel.jochre.lexicon;

 import java.text.Normalizer;
 import java.text.Normalizer.Form;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;

 import com.joliciel.jochre.JochreSession;

 /**
  * For each word in the lexicon, adds variants with an initial uppercase and all upper-case.
  * @author Assaf Urieli
  *
  */
 public class DefaultLexiconWrapper implements Lexicon {
     @SuppressWarnings("unused")
     private static final Log LOG = LogFactory.getLog(DefaultLexiconWrapper.class);
     Lexicon baseLexicon;
     Set<String> upperCaseLexicon = new HashSet<String>();

     public DefaultLexiconWrapper(Lexicon baseLexicon) {
         super();
         this.baseLexicon = baseLexicon;
         Iterator<String> words = baseLexicon.getWords();
         while (words.hasNext()) {
             String word = words.next();
             if (word.length() > 0) {
                 String firstLetter = word.substring(0, 1);

                 if (word.length() == 1)
                     upperCaseLexicon.add(this.toUpperCaseNoAccents(firstLetter));
                 else
                     upperCaseLexicon.add(this.toUpperCaseNoAccents(firstLetter) + word.substring(1));

                 upperCaseLexicon.add(this.toUpperCaseNoAccents(word));
             }
         }
     }

     @Override
     public int getFrequency(String word) {
         int frequency = baseLexicon.getFrequency(word);
         if (frequency > 0)
             return frequency;

         if (upperCaseLexicon.contains(word))
             return 1;

         return 0;
     }

     String toUpperCaseNoAccents(String string) {
         // decompose accents
         String decomposed = Normalizer.normalize(string, Form.NFD);
         // removing diacritics
         String removed = decomposed.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");

         String uppercase = removed.toUpperCase(JochreSession.getLocale());
         return uppercase;
     }

char getLetterWithoutAccents(char letter) {
   switch (letter) {
   case '': case '': case '': case '': case '': case '':
      return 'a';
   case '': case '': case '': case '?': case '':
      return 'c';
   case '?': case '':
      return 'd';
   case '': case '': case '': case '': case '': case '': case '': case '':
      return 'e';
   case '?': case '':
      return 'g';
   case '': case '':
      return 'h';
   case '': case 'i': case '': case '': case '': case '': case '':
      return 'i';
   case '':
      return 'j';
   case '':
      return 'l';
   case '': case '': case '':
      return 'n';
   case '': case '': case '': case '':
      return 'o';
   case '': case '':
      return 'r';
   case '': case '?': case '': case '':
      return 's';
   case '':
      return 't';
   case '': case '': case '': case '': case '': case '': case '':
      return 'u';
   case '':
      return 'y';
   case '': case '': case '': case '':
      return 'z';
   }
   return letter;
}

     @Override
     public Iterator<String> getWords() {
         return baseLexicon.getWords();
     }
 }