GIST.IzbirkomExtractor.Russian.OrdinalFactory.java Source code

Java tutorial

Introduction

Here is the source code for GIST.IzbirkomExtractor.Russian.OrdinalFactory.java

Source

/**
 * Copyright  2013 , UT-Battelle, LLC
 * All rights reserved
 *                                        
 * JavaParserOfAddresses, Version 1.0
 * http://github.com/sorokine/JavaParserOfAddresses
 * 
 * This program is freely distributed under UT-Batelle, LLC
 * open source license.  Read the file LICENSE.txt for details.
 */

package GIST.IzbirkomExtractor.Russian;

import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;

/**
 * TODO: code all the forms of ordinal numbers; specific endings 
 * can be found by intersecting sets of endings by number, gender, 
 * value, and case  
 * 
 *                            ?         
 * ?          ?             0 1 4 5 9 10-20      
 *                          
 *                   
 *                    
 * ?                    
 *                    
 * 
 * ?             ?               2 6 7 8 
 *                         
 *                      
 *                         
 * ?                     
 *                       
 * 
 * ?             ?               3
 *                   
 *                
 *                   
 * ?                           
 *                       
 * 
 * ?         ?       5
 *                
 *                
 *             
 * ?            
 *                 
 * 
 * ?      
 *       
 *    
 *    
 * ?   
 *     
 * 
 *      ? ? ? ? ? ??
 *     ? ? ? ? ?
 * 
 * Russian ordinals factory
 * @author Alex Sorokine <sorokine@gmail.com>
 *
 */
public class OrdinalFactory {

    /**
     * Stems for numeral from 0 to 10 and their lookup table
     */
    private String[] stems0_10 = { "", "", "", "", "", "?",
            "?", "?", "?", "?", "??" };
    private HashMap<String, Integer> stems0_10_lookup;

    /**
     * Stems for numerals from 11 to 19
     */
    private String[] stems11_19 = { "", "", "", "", "?", "?", "?",
            "?", "?" };
    private HashMap<String, Integer> stems11_19_lookup;

    private Pattern digits_numeral_pat;
    private Pattern stem0_10_numeral_pat;
    private Pattern stem11_19_numeral_pat;

    /**
     * Default constructor
     */
    public OrdinalFactory() {

        /* creating varions maps and hash tables */
        stems0_10_lookup = new HashMap<String, Integer>(stems0_10.length);
        for (int i = 0; i < stems0_10.length; i++)
            stems0_10_lookup.put(stems0_10[i], i);

        stems11_19_lookup = new HashMap<String, Integer>(stems11_19.length);
        for (int i = 0; i < stems11_19.length; i++)
            stems11_19_lookup.put(stems11_19[i], i);

        /* create matching patterns for parsing */
        /* pattern for 1-, 2-, ... */
        digits_numeral_pat = Pattern.compile("\\b(\\d{1,2})(?:(-)?([-?]{1,2}))?\\b",
                Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

        /* pattern for , , ... */
        StringBuilder sb0_10_pat = new StringBuilder();
        sb0_10_pat.append("\\b(");
        sb0_10_pat.append(StringUtils.join(stems0_10, '|'));
        sb0_10_pat.append(")([-?]{1,3})\\b");
        stem0_10_numeral_pat = Pattern.compile(sb0_10_pat.toString(),
                Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

        /* pattern for , , ... */
        StringBuilder sb11_19_pat = new StringBuilder();
        sb11_19_pat.append("\\b(");
        sb11_19_pat.append(StringUtils.join(stems11_19, '|'));
        sb11_19_pat.append(")([-?]{1,2})\\b");
        stem11_19_numeral_pat = Pattern.compile(sb11_19_pat.toString(),
                Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

        // TODO: patterns for 20+
    }

    /**
     * Factory method to create a new ordinal
     * 
     * @param value
     * @return
     */
    public Ordinal create(int value) {
        Ordinal o = new Ordinal(this);
        return o.setValue(value);
    }

    /**
     * Factory method to create a new ordinal with preset properties
     * 
     * @param value
     * @param gcase
     * @param ggender
     * @param gnumber
     * @return
     */
    public Ordinal create(int value, GCase gcase, GGender ggender, GNumber gnumber) {
        return new Ordinal(this, value, gcase, ggender, gnumber);
    }

    /**
     * Factory method to parse an ordinal from a string. Sets all ordinal
     * properties.
     * 
     * @param s
     * @return a new Ordinal withh all properties set or null if the string is not parsable
     */
    public Ordinal parse(String s) {

        /* checking for 1-, 2-, ... */
        Matcher m1 = digits_numeral_pat.matcher(s);
        if (m1.find()) {
            Ordinal o = new Ordinal(this).setOrigStr(s);
            o.setValue(Integer.valueOf(m1.group(1))); // FIXME: catch number format exception
            o.setDash(m1.group(2) != null && !m1.group(2).isEmpty());
            o.setGrammaticalProperties(m1.group(3));
            return o;
        }

        /* checking for , , ... */
        Matcher m2 = stem0_10_numeral_pat.matcher(s);
        if (m2.find()) {
            Ordinal o = new Ordinal(this).setOrigStr(s);
            o.setValue(stems0_10_lookup.get(m2.group(1).toLowerCase()));
            o.setGrammaticalProperties(m2.group(2));
            return o;
        }

        /* checking for , , ... */
        Matcher m3 = stem11_19_numeral_pat.matcher(s);
        if (m3.find()) {
            Ordinal o = new Ordinal(this).setOrigStr(s);
            o.setValue(11 + stems11_19_lookup.get(m3.group(1).toLowerCase()));
            o.setGrammaticalProperties(m3.group(2));
            return o;
        }

        return null;
    }

    /**
     * returns the stem for a given value
     * @param value
     * @return
     */
    public String getStem(int value) {
        if (value >= 0 && value < 11)
            return stems0_10[value];
        else if (value > 10 && value < 20)
            return stems11_19[value - 11];
        else
            return "";
    }

    /**
     * This is for testing only, will be removed
     * @param args
     */
    public static void main(String[] args) {
        try {
            String s[] = { "5-? ?? ?",
                    " 26 ? ??", "1-? ?? ",
                    "3-  ", "2-? ?? ",
                    "1-? ?? ?", "9 ? ?",
                    "1 ? ? ", "3-? ?? ",
                    "? ??   ", "1-? ??  ",
                    "5 ? ?      ", "2 ? ?",
                    "? 3  ", "6-? ?? ?",
                    "? 5 ", "2-?  ", "4 ? ?",
                    "? ?? ?", "3-?  " };

            OrdinalFactory of = new OrdinalFactory();
            for (String string : s)
                System.out.println(string + " => " + of.parse(string));

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}