Back to project page TaigIME-android.
The source code is released under:
GNU General Public License
If you think the Android project TaigIME-android listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.
/** * //from ww w . j a va 2 s . c o m */ package fr.magistry.taigime; import java.lang.Character.UnicodeBlock; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; import android.content.res.Resources; import android.util.Log; /** * @author pierre * */ public class TaigiSyl { private static final int SCRIPT_TL = 0; private static final int SCRIPT_POJ = 1; private static final int SCRIPT_TY = 2; private static final int SCRIPT_BOPOMO = 3; private static Pattern mPatternBopomoSyl = Pattern.compile("(?:(?|?|?|??|?|??|?|?|?|?|?|??|?|?|?|?|?|?|?|?)?([???????]+?|?|?)(?:(?|?|?)|(?|?|??|??))?([1-9])?)|(?|?|?|??|?|??|?|?|?|?|?|??|?|?|?|?|?|?|?|?)-?"); private static Pattern mPatternTRSSyl = Pattern.compile( "(?:(ph|b|p|m|th|t|n|l|k|g|kh|ng|h|tsh|j|ts|s)?([aeiou+]+(?:nn|N)?|ng|m)(?:(ng|m|n|r)|(p|t|h|k))?([1-9])?)|(ph|b|p|m|th|n|l|k|g|kh|ng|tsi|tshi|tsh|si|ts|ji|j|s|h|t)-?-?"); private static Pattern mPatternPOJSyl = Pattern.compile( "(?:(ph|b|p|m|th|t|n|l|k|g|kh|ng|h|chh|j|ch|s)?([aeiou+]+(?:nn|N)?|ng|m)(?:(ng|m|n|r)|(p|t|h|k))?([1-9])?)|(ph|b|p|m|th|t|n|l|k|g|kh|ng|chh|chhi|chi|si|ch|ji|j|s|h)-?-?"); private static Pattern mPatternTYSyl = Pattern.compile( "(?:(k|c|z|z|ng|s|t|p|c|r|bh|gh|h|g|r|m|l|n|b|s|d)?([aeiou+]+(?:nn|N|r)?|ng|m)(?:(ng|m|n)|(p|t|h|k))?([1-9])?)|(k|c|z|z|ng|si|t|p|c|r|bh|gh|h|g|r|m|l|n|b|s|d)-?-?"); private static final int G_INITIALE = 1; private static final int G_VOYELLE = 2; private static final int G_FINALE = 3; private static final int G_ENTRANT = 4; private static final int G_TON = 5; private static final int G_INITIALE_SEULE = 6; private static final String[] TY_initials = new String[] {"zi","ri","si","ci","z","k","ng","gh","p","bh","h","g","m","l","t","n","b","s","d","c","r"}; private static final String[] TY_initials_ipa = new String[] {"t?","?","?","t??","ts","k?","?","?","p?","b","h","k","m","l","t?","n","p","s","t","ts?","dz"}; private String Initiale = ""; private String Mediane = ""; private String Finale = ""; private String TonEntrant = ""; private int Ton = 0; private String Inputed; public TaigiSyl(String input) { Inputed = input; } public String getInputed(){ return Inputed; } public String getInitiale() { return Initiale; } public void setInitiale(String initiale) { Initiale = initiale; } public String getMediane() { return Mediane; } private static int getUTFSize(String s) { int len = (s == null) ? 0 : s.length(); int l = 0; for (int i = 0; i < len; i++) { int c = s.charAt(i); if ((c >= 0x0001) && (c <= 0x007F)) { l++; } else if (c > 0x07FF) { l += 3; } else { l += 2; } } return l; } public void setMediane(String mediane) { int l = mediane.length(); //int l = getUTFSize(mediane); StringBuffer sb = new StringBuffer(); for(int i = l; i < 3;i++){ sb.append(""); } sb.append(mediane); Mediane = sb.toString(); } public String getFinale() { return Finale; } public void setFinale(String finale) { Finale = finale; } public int getTon() { return Ton; } public void setTon(int ton) { Ton = ton; } public void setTonEntrant(String tonEntrant) { TonEntrant = tonEntrant; } public String getTonEntrant(){ return TonEntrant; } public void updateBopomo(String bopomo){ } public String getSqliteString(boolean fuzzy){ if(Initiale != "" && Mediane.equals("")){ return Initiale +".___._._._"; } StringBuffer sb = new StringBuffer(); sb.append(Initiale.equals("") ? "" : Initiale); sb.append("."); sb.append(Mediane); sb.append("."); sb.append(Finale.equals("") ? "" : Finale); sb.append("."); sb.append(TonEntrant.equals("") ? "_" : TonEntrant); sb.append("."); sb.append((Ton == 0 || !fuzzy) ? "_" : String.valueOf(Ton)); return sb.toString(); } public static ArrayList<TaigiSyl> parseBopomo(String bopomo){ bopomo = bopomo.replace("?", "??").replace("?","??").replace("?","??").replace("?","??"); ArrayList<TaigiSyl> result = parseString(mPatternBopomoSyl, bopomo, SCRIPT_BOPOMO); return result; } public static ArrayList<TaigiSyl> parseTRS(String trs){ ArrayList<TaigiSyl> result = parseString(mPatternTRSSyl, trs, SCRIPT_TL); return result; } public static ArrayList<TaigiSyl> parsePOJ(String poj){ ArrayList<TaigiSyl> result = parseString(mPatternPOJSyl, poj, SCRIPT_POJ); return result; } public static ArrayList<TaigiSyl> parseTY(String ty){ ArrayList<TaigiSyl> result = parseString(mPatternTYSyl, ty, SCRIPT_TY); return result; } private static TaigiSyl IPA_of_TRS(TaigiSyl syl){ Log.v("ime", syl.toString()); if((syl.Mediane.startsWith("i") || syl.Mediane.startsWith("i") || syl.Mediane.startsWith("i") ) && (syl.Initiale.startsWith("j") || syl.Initiale.startsWith("s") || syl.Initiale.startsWith("ts") )) syl.Initiale += "i"; syl.Initiale = syl.Initiale.replace("tsi","t?") .replace("ji","?") .replace("si","?") .replace("tshi","t??") .replace("tsh","ts?") .replace("ts","ts") .replace("kh","k?") .replace("ng","?") .replace("j","dz") .replace("g","?") .replace("ph","p?") .replace("b","b") .replace("h","h") .replace("k","k") .replace("m","m") .replace("l","l") .replace("th","t?") .replace("n","n") .replace("p","p") .replace("s","s") .replace("t","t"); if(syl.Mediane.endsWith("o") && (syl.Finale != "" || syl.TonEntrant.equals("p") || syl.TonEntrant.equals("t") || syl.TonEntrant.equals("k"))) syl.Mediane += "o"; syl.setMediane(syl.Mediane.replace("ng","?") .replace("unn","?") .replace("ann","") .replace("m","m") .replace("inn","?") .replace("enn","?") .replace("onn","") .replace("oo","?") .replace("au","a?") .replace("o", "?")); syl.TonEntrant = syl.TonEntrant .replace("t","t?") .replace("h","?") .replace("k","k?") .replace("p","p?"); syl.Finale = syl.Finale .replace("ng","?") .replace("m","m") .replace("n","n"); Log.v("ime", syl.toString()); return syl; } private static TaigiSyl IPA_of_POJ(TaigiSyl syl){ Log.v("ime", syl.toString()); // POJ to TRS if(syl.Mediane.startsWith("o") && !syl.Mediane.equals("o")) syl.Mediane = syl.Mediane.replaceFirst("o","u"); if(syl.Mediane.equals("e") && syl.TonEntrant.equals("k")) syl.Mediane = syl.Mediane.replaceFirst("e","i"); if(syl.Mediane.endsWith("o") && (syl.Finale != "" || syl.TonEntrant.equals("p") || syl.TonEntrant.equals("t") || syl.TonEntrant.equals("k"))) syl.Mediane += "u"; if((syl.Mediane.startsWith("i") || syl.Mediane.startsWith("i") || syl.Mediane.startsWith("i") ) && (syl.Initiale.startsWith("j") || syl.Initiale.startsWith("s") || syl.Initiale.startsWith("ch") )) syl.Initiale += "i"; syl.Initiale = syl.Initiale.replace("chi","t?") .replace("ji","?") .replace("si","?") .replace("chhi","t??") .replace("chh","ts?") .replace("ch","ts") .replace("kh","k?") .replace("ng","?") .replace("j","dz") .replace("g","?") .replace("ph","p?") .replace("b","b") .replace("h","h") .replace("k","k") .replace("m","m") .replace("l","l") .replace("th","t?") .replace("n","n") .replace("p","p") .replace("s","s") .replace("t","t"); syl.setMediane(syl.Mediane.replace("ng","?") .replace("unn","?") .replace("ann","") .replace("m","m") .replace("inn","?") .replace("enn","?") .replace("onn","") .replace("ou","?") .replace("au","a?") .replace("o", "?")); syl.TonEntrant = syl.TonEntrant .replace("t","t?") .replace("h","?") .replace("k","k?") .replace("p","p?"); syl.Finale = syl.Finale .replace("ng","?") .replace("m","m") .replace("n","n"); Log.v("ime", syl.toString()); return syl; } private static TaigiSyl IPA_of_TY(TaigiSyl syl){ Log.v("ime", syl.toString()); if((syl.Mediane.startsWith("i") || syl.Mediane.startsWith("i") || syl.Mediane.startsWith("i") ) && (syl.Initiale.startsWith("c") || syl.Initiale.startsWith("s") || syl.Initiale.startsWith("z") || syl.Initiale.startsWith("r"))) syl.Initiale += "i"; Log.v("ime2", syl.toString()); for(int i=0; i< TY_initials_ipa.length; i++ ){ if(syl.Initiale.contains(TY_initials[i])){ Log.v("a", TY_initials[i]); Log.v("a", TY_initials_ipa[i]); syl.Initiale = syl.Initiale.replace(TY_initials[i],TY_initials_ipa[i]); break; } } //if(syl.Mediane.endsWith("o") && // (syl.Finale != "" || syl.TonEntrant.equals("p") || syl.TonEntrant.equals("t") || syl.TonEntrant.equals("k"))) // syl.Mediane += "o"; syl.setMediane(syl.Mediane.replace("ng","?") .replace("unn","?") .replace("ann","") .replace("m","m") .replace("inn","?") .replace("enn","?") .replace("onn","") .replace("or", "?") .replace("o","?") .replace("au","a?")); syl.TonEntrant = syl.TonEntrant .replace("t","t?") .replace("h","?") .replace("k","k?") .replace("p","p?"); syl.Finale = syl.Finale .replace("ng","?") .replace("m","m") .replace("n","n"); Log.v("ime", syl.toString()); return syl; } private static TaigiSyl IPA_of_Bopomo(TaigiSyl syl){ syl.Initiale = syl.Initiale.replace("??","t?") .replace("?","?") .replace("?","ts") .replace("?","k?") .replace("?","?") .replace("?","dz") .replace("?","?") .replace("?","p?") .replace("?","b") .replace("?","t??") .replace("??","h") .replace("??","k") .replace("?","m") .replace("?","l") .replace("?","t?") .replace("?","n") .replace("?","p") .replace("?","s") .replace("?","t") .replace("?","?") .replace("?","ts?") .replace("?","?") .replace("?","?"); syl.TonEntrant = syl.TonEntrant.replace("?","t") .replace("?","?") .replace("?","k?") .replace("?","p?") .replace("?","p?") .replace("?","t?") .replace("??","k?") .replace("??","?"); syl.Finale = syl.Finale.replace("?","?") .replace("?","m") .replace("?","n"); syl.setMediane(syl.Mediane.replace("?","?") .replace("?","?") .replace("?","") .replace("?","") .replace("?","") .replace("?","?") .replace("?","?") .replace("?","?") .replace("?","?") .replace("?","") .replace("?","") .replace("?","a") .replace("?","e") .replace("??","e") .replace("?","?") .replace("?","?") .replace("?","m") .replace("?","?") .replace("?","i") .replace("?","u") .replace("au","a?")); Log.v("ime", syl.toString()); return syl; } private static ArrayList<TaigiSyl> parseString(Pattern pat, String input, int transcription_type){ Matcher m = pat.matcher(input); ArrayList<TaigiSyl> result = new ArrayList<TaigiSyl>(); while(m.find()){ TaigiSyl syl = new TaigiSyl(m.group(0)); int ng = m.groupCount(); if(m.group(G_INITIALE_SEULE) != null){ syl.setInitiale(m.group(G_INITIALE_SEULE)); } else { for(int i=1;i<ng;i++){ if(m.group(i)!=null) { switch(i){ case G_INITIALE: syl.setInitiale(m.group(i)); break; case G_VOYELLE: syl.setMediane(m.group(i)); break; case G_FINALE: syl.setFinale(m.group(i)); break; case G_TON: syl.setTon(Integer.valueOf(m.group(i))); break; case G_ENTRANT: syl.setTonEntrant(m.group(i)); } } } } switch(transcription_type) { case SCRIPT_BOPOMO: syl = IPA_of_Bopomo(syl); break; case SCRIPT_POJ: syl = IPA_of_POJ(syl); break; case SCRIPT_TY: syl = IPA_of_TY(syl); break; default: syl = IPA_of_TRS(syl); break; } result.add(syl); } return result; } public String toString(){ return String.format("%s/%s/%s/%s",Initiale, Mediane, Finale, TonEntrant); } }