TranslationList.java Source code

Java tutorial

Introduction

Here is the source code for TranslationList.java

Source

/*
 * JapanMapTranslate
 * Copyright (c) Florian Fischer, 2016 (florianfischer@gmx.de)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;

import org.apache.commons.lang.StringUtils;

import com.atilika.kuromoji.ipadic.Token;
import com.atilika.kuromoji.ipadic.Tokenizer;

/**
 * Manages and applies a translation list for simple translation.
 * The word list must be a tab-separated file with three columns:<br>
 *       <code>Japanese   translation   usage</code><br>
 * For the usage field, three values are supported: 
 * - 1 translates the text anywhere
 * - 2 translates the text only at the beginning
 * - 3 translates the text only at the end
 * If anything else (or nothing) is found in the usage column, the line is ignored. 
 */
public class TranslationList {

    static final String FIELD_SEP = "\t";
    static final int COL_SRC = 0;
    static final int COL_TRL = 1;
    static final int COL_USAGE = 2;

    private String[] srcEverywhere, trlEverywhere;
    private String[] srcPrefix, trlPrefix;
    private String[] srcSuffix, trlSuffix;

    private int verbose = 0;

    /** Reads a word list from a file. */
    public TranslationList(String file) {
        ArrayList<String> sEverywhere = new ArrayList<String>();
        ArrayList<String> tEverywhere = new ArrayList<String>();
        ArrayList<String> sPrefix = new ArrayList<String>();
        ArrayList<String> tPrefix = new ArrayList<String>();
        ArrayList<String> sSuffix = new ArrayList<String>();
        ArrayList<String> tSuffix = new ArrayList<String>();

        try {
            BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            String l = null;
            while ((l = r.readLine()) != null) {
                String[] fields = l.split(FIELD_SEP);
                if (fields.length < 3)
                    continue; // incomplete line
                String src = fields[COL_SRC].trim();
                String trl = fields[COL_TRL].trim();
                String usage = fields[COL_USAGE].trim();

                if (!src.isEmpty() && !trl.isEmpty()) {
                    switch (usage) {
                    case "1":
                        sEverywhere.add(src);
                        tEverywhere.add(trl);
                        break;
                    case "2":
                        sPrefix.add(src);
                        tPrefix.add(trl);
                        break;
                    case "3":
                        sSuffix.add(src);
                        tSuffix.add(trl);
                        break;
                    default:
                        break;
                    }
                }
            }
            r.close();
        } catch (IOException ex) {
            System.err.println("error while reading translation list: " + ex.getMessage());
        }

        srcEverywhere = (String[]) sEverywhere.toArray(new String[sEverywhere.size()]);
        trlEverywhere = (String[]) tEverywhere.toArray(new String[tEverywhere.size()]);
        srcPrefix = (String[]) sPrefix.toArray(new String[sPrefix.size()]);
        trlPrefix = (String[]) tPrefix.toArray(new String[tPrefix.size()]);
        srcSuffix = (String[]) sSuffix.toArray(new String[sSuffix.size()]);
        trlSuffix = (String[]) tSuffix.toArray(new String[tSuffix.size()]);
    }

    /** Attempts to translate a text using the word list. */
    public String translate(Tokenizer tok, String text) {
        StringBuilder res = new StringBuilder();

        for (Token t : tok.tokenize(text)) {
            if (Transliterator.hasAsianChar(t.getSurface())) {
                // attempt (partial) translation
                String s = t.getSurface();
                String r = StringUtils.replaceEach(s, srcEverywhere, trlEverywhere);
                r = replaceAnyOfAtStart(r, srcPrefix, trlPrefix);
                r = replaceAnyOfAtEnd(r, srcSuffix, trlSuffix);
                if (r != s && verbose > 1)
                    System.out.println("translation list: translated " + s + " to " + r);
                res.append(r);
            } else // tokens without asian chars can be ignored here and will be copied directly
                res.append(t.getSurface());
        }
        return res.toString();
    }

    public static String replaceAnyOfAtStart(String s, String[] search, String[] replace) {
        for (int i = 0; i < search.length; i++) {
            if (s.startsWith(search[i]))
                s = replace[i] + s.substring(search[i].length());
        }
        return s;
    }

    public static String replaceAnyOfAtEnd(String s, String[] search, String[] replace) {
        for (int i = 0; i < search.length; i++) {
            if (s.endsWith(search[i]))
                s = s.substring(0, s.length() - search[i].length()) + replace[i];
        }
        return s;
    }

    public void setVerbose(int v) {
        verbose = v;
    }
}