hugonicolau.openbrailleinput.wordcorrection.BrailleWordCorrection.java Source code

Java tutorial

Introduction

Here is the source code for hugonicolau.openbrailleinput.wordcorrection.BrailleWordCorrection.java

Source

/*
 * Copyright (C) 2011 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package hugonicolau.openbrailleinput.wordcorrection;

import hugonicolau.openbrailleinput.R;
import hugonicolau.openbrailleinput.ime.OpenBrailleInput;
import hugonicolau.openbrailleinput.wordcorrection.mafsa.MAFSA;
import hugonicolau.openbrailleinput.wordcorrection.mafsa.MAFSA.SuggestionResult;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.StreamCorruptedException;
import java.util.Set;

import org.apache.commons.io.IOUtils;

import android.content.Context;
import android.util.Log;

public class BrailleWordCorrection {

    private static BrailleWordCorrection mSharedInstance = null;

    private MAFSA mMAFSA = null;
    private float[] mFrequencies;
    public String Language = "";

    protected BrailleWordCorrection() {
    }

    public static BrailleWordCorrection getSharedInstance() {
        if (mSharedInstance == null)
            mSharedInstance = new BrailleWordCorrection();
        return mSharedInstance;
    }

    /**
     * Score function, a lower score indicates a better match
     * score = a . wMSD(Styped, Sword) +  . word
     * a, : weighting factors (derived from pilot data)
     * : frequency
     * MSD substitution: cost = distance * sw
     * MSD insertion: cost = distance * iw
     * MSD omissions: cost = ow
     */

    // empirical values
    float a = (float) 0.6484113;
    float b = (float) 0.35158873;
    float sw = (float) 1.6959325;
    float iw = (float) 0.41078573;
    float ow = (float) 1.8156104;

    // distance used to calculate similarity between words
    Distance mDistance = ChordDistances.Damerau;

    public String[] getSuggestions(String word, int suggestionsLimit) {
        if (word == null || mMAFSA == null) {
            return null;
        }
        boolean exists = false;

        // get suggestions
        //long startTime = System.currentTimeMillis();
        int maxCost = 2;
        if (word.length() > 5) {
            maxCost = 3;
            //iw = (float) 1.5;
            //ow = (float) 2;
        }

        //println("Get suggestions for[" + word + "]");

        Set<SuggestionResult> results = mMAFSA.searchMSD(word, maxCost, iw, sw, ow, mDistance);

        //long stopTime = System.currentTimeMillis();
        //println("Search time [" + (stopTime - startTime) + "] ms");

        // calculate final score for each word
        //startTime = System.currentTimeMillis();
        for (SuggestionResult result : results) {
            if (result.suggestion.equalsIgnoreCase(word)) {
                // if transcribed word exists, then it reveives the min score
                result.msdScore = -2;
                exists = true;
            } else {
                result.msdScore = a * (result.msdScore / maxCost) - b * getFrequency(result.suggestion);
            }
        }

        if (!exists && mMAFSA.contains(word))
            results.add(mMAFSA.new SuggestionResult(word, -2));

        // blank space filter
        for (int i = 1; i < word.length(); i++) {
            String word1 = word.substring(0, i);
            String word2 = word.substring(i);
            if (mMAFSA.contains(word1) && mMAFSA.contains(word2)) {
                SuggestionResult res = mMAFSA.new SuggestionResult(word1 + " " + word2, ow);

                float freq1 = getFrequency(word1);
                float freq2 = getFrequency(word2);

                res.msdScore = a * (res.msdScore / maxCost) - b * ((freq1 + freq2) / 2);
                res.msdScore *= 1.5; // penalize for being two words
                results.add(res);
            }
        }

        //stopTime = System.currentTimeMillis();
        //println("Scoring time [" + (stopTime - startTime) + "] ms");

        SuggestionResult[] resultsArray = results.toArray(new SuggestionResult[results.size()]);

        // sort 
        //startTime = System.currentTimeMillis();
        Quicksort sorter = new Quicksort();
        sorter.sort(resultsArray);
        //stopTime = System.currentTimeMillis();
        //println("Sort time [" + (stopTime - startTime) + "] ms");

        //if(results.size() == 0) println("Empty results for word[" + word + "]");
        return MAFSA.extractWords(resultsArray, suggestionsLimit);
    }

    /* UTILS */
    private float getFrequency(String word) {
        int hash = mMAFSA.wordToHash(word);
        //println("Word[" + word + "] Hash[" + hash + "] Freq[" + mFrequencies[hash] + "]");
        return mFrequencies[hash];
    }

    /*
      * LOADING METHODS 
      */
    public void load(Context context, String locale) {
        // load MA-FSA
        InputStream is = null;
        try {
            if (locale.equalsIgnoreCase("0")) {
                // english
                is = context.getResources().openRawResource(R.raw.android_en);
                mDistance = ChordDistances.DamerauEN;
            } else if (locale.equalsIgnoreCase("1")) {
                // portuguese
                is = context.getResources().openRawResource(R.raw.android_pt);
                mDistance = ChordDistances.DamerauPT;
            } else {
                throw new IOException("Invalid locale");
            }

            mMAFSA = MAFSA.load(is);

        } catch (IOException ioe) {
            // handle this exception
            Log.v(OpenBrailleInput.TAG, "Couldn't load dawg" + ioe.getMessage());
        } finally {
            IOUtils.closeQuietly(is);
        }

        // load frequencies
        is = null;
        try {
            if (locale.equalsIgnoreCase("0")) {
                // english
                is = context.getResources().openRawResource(R.raw.freq_en);
            } else if (locale.equalsIgnoreCase("1")) {
                // portuguese
                is = context.getResources().openRawResource(R.raw.freq_pt);
            } else {
                throw new IOException("Invalid locale");
            }

            mFrequencies = loadFrequencies(is);

            if (mFrequencies == null)
                Log.v(OpenBrailleInput.TAG, "Couldn't load frequencies");

        } catch (IOException ioe) {
            // handle this exception
            Log.v(OpenBrailleInput.TAG, "Couldn't load frequencies" + ioe.getMessage());
        } finally {
            IOUtils.closeQuietly(is);
        }

        Language = locale;
    }

    private float[] loadFrequencies(InputStream is) throws StreamCorruptedException, IOException {
        BufferedInputStream bis = new BufferedInputStream(is, 8 * 1024);
        ObjectInputStream ois = new ObjectInputStream(bis);

        float[] floats = null;

        try {
            floats = (float[]) ois.readObject();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }

        return floats;
    }

    private class Quicksort {
        private SuggestionResult[] data;
        private int number;

        public void sort(SuggestionResult[] values) {
            // check for empty or null array
            if (values == null || values.length == 0) {
                return;
            }
            this.data = values;
            number = values.length;
            quicksort(0, number - 1);
        }

        private void quicksort(int low, int high) {
            int i = low, j = high;
            // Get the pivot element from the middle of the list
            SuggestionResult pivot = data[low + (high - low) / 2];

            // Divide into two lists
            while (i <= j) {
                // If the current value from the left list is smaller then the pivot
                // element then get the next element from the left list
                while (data[i].msdScore < pivot.msdScore) {
                    i++;
                }
                // If the current value from the right list is larger then the pivot
                // element then get the next element from the right list
                while (data[j].msdScore > pivot.msdScore) {
                    j--;
                }

                // If we have found a values in the left list which is larger then
                // the pivot element and if we have found a value in the right list
                // which is smaller then the pivot element then we exchange the
                // values.
                // As we are done we can increase i and j
                if (i <= j) {
                    exchange(i, j);
                    i++;
                    j--;
                }
            }
            // Recursion
            if (low < j)
                quicksort(low, j);
            if (i < high)
                quicksort(i, high);
        }

        private void exchange(int i, int j) {
            SuggestionResult temp = data[i];
            data[i] = data[j];
            data[j] = temp;
        }
    }

}