com.globalsight.machineTranslation.domt.DoMTProxy.java Source code

Java tutorial

Introduction

Here is the source code for com.globalsight.machineTranslation.domt.DoMTProxy.java

Source

/**
 *  Copyright 2009 Welocalize, Inc. 
 *  
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  
 *  You may obtain a copy of the License at 
 *  http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  
 */
package com.globalsight.machineTranslation.domt;

import java.io.StringReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map.Entry;

import org.apache.log4j.Logger;
import org.apache.xerces.parsers.DOMParser;
import org.apache.xmlrpc.client.XmlRpcClient;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

import com.globalsight.everest.webapp.pagehandler.administration.mtprofile.MTProfileConstants;
import com.globalsight.machineTranslation.AbstractTranslator;
import com.globalsight.machineTranslation.MTHelper;
import com.globalsight.machineTranslation.MachineTranslationException;
import com.globalsight.machineTranslation.MachineTranslator;
import com.globalsight.util.StringUtil;
import com.globalsight.util.edit.EditUtil;
import com.globalsight.util.edit.GxmlUtil;

import com.globalsight.util.gxml.GxmlElement;
import com.globalsight.util.gxml.TextNode;

public class DoMTProxy extends AbstractTranslator implements MachineTranslator {
    private static final Logger logger = Logger.getLogger(DoMTProxy.class);

    public static final String DEFAULT_ENGINE_NAME = "translate-xliff";

    private static Object LOCK = new Object();

    private static boolean useTagTranslation = true;

    public DoMTProxy() throws MachineTranslationException {
    }

    public String getEngineName() {
        return ENGINE_DOMT;
    }

    /**
     * DoMT supports all locales in theory. One engine (name) is for one locale
     * pair. If the engine is not for current locale pair, "status" API will
     * return "failed" status at once, so the check will be done when real hit
     * MT.
     */
    @Override
    public boolean supportsLocalePair(Locale p_sourceLocale, Locale p_targetLocale)
            throws MachineTranslationException {
        return true;
    }

    @Override
    protected String doTranslation(Locale p_sourceLocale, Locale p_targetLocale, String p_string)
            throws MachineTranslationException {
        if (StringUtil.isEmpty(p_string))
            return null;

        try {
            String wrappedSegment = p_string;
            if (!p_string.toLowerCase().startsWith("<segment")) {
                wrappedSegment = "<segment>" + p_string + "</segment>";
            }

            String[] segments = new String[] { wrappedSegment };

            String[] translations = doBatchTranslation(p_sourceLocale, p_targetLocale, segments);

            if (translations != null && translations.length > 0) {
                return GxmlUtil.stripRootTag(translations[0]);
            }
        } catch (Exception e) {

        }

        return null;
    }

    @Override
    protected String[] doBatchTranslation(Locale sourceLocale, Locale targetLocale, String[] segments)
            throws MachineTranslationException {
        if (sourceLocale == null || targetLocale == null || segments == null || segments.length < 1) {
            return null;
        }

        String[] results = new String[segments.length];
        if (useTagTranslation) {
            results = translateWithTags(sourceLocale, targetLocale, segments);
        } else {
            results = translatePureText(sourceLocale, targetLocale, segments);
        }

        return results;
    }

    /**
     * Send segments with tags to DOMT engine.
     */
    private String[] translateWithTags(Locale sourceLocale, Locale targetLocale, String[] segments) {
        String[] results = new String[segments.length];

        boolean isXlf = MTHelper.isXlf(this.getMtParameterMap());
        try {
            // Ensure the sequence will be unchanged after translation.
            HashMap<Integer, String> id2Segs = new HashMap<Integer, String>();
            String[] heads = new String[segments.length];
            for (int i = 0; i < segments.length; i++) {
                int index = segments[i].indexOf(">");
                heads[i] = segments[i].substring(0, index + 1);

                boolean hasInternalText = (segments[i].indexOf(" internal=\"yes\"") > -1);
                GxmlElement gxmlRoot = MTHelper.getGxmlElement(segments[i]);
                List subFlowList = gxmlRoot.getDescendantElements(GxmlElement.SUB_TYPE);
                if ((subFlowList == null || subFlowList.size() == 0) && !hasInternalText) {
                    String segmentWithId = segments[i];
                    if (!isXlf) {
                        segmentWithId = segmentWithId.replace(" i=", " id=");
                    }
                    id2Segs.put(composeKey(i, 0), GxmlUtil.stripRootTag(segmentWithId));
                } else {
                    // If segment gxml HAS subs, send texts to MT then compose
                    // back again.
                    List items = MTHelper.getImmediateAndSubImmediateTextNodes(gxmlRoot);
                    for (int subIndex = 0; subIndex < items.size(); subIndex++) {
                        TextNode textNode = (TextNode) items.get(subIndex);
                        id2Segs.put(composeKey(i, subIndex + 1), textNode.toGxml());
                    }
                }
            }

            if (id2Segs.size() > 0) {
                String srcXlf = getDoMtXliff(id2Segs, sourceLocale, targetLocale);
                if (MTHelper.isLogDetailedInfo(ENGINE_DOMT)) {
                    logger.info("Segments in XLF sending to DoMT:" + srcXlf);
                }

                String translatedXlf = hitDoMt(sourceLocale, targetLocale, srcXlf);
                if (MTHelper.isLogDetailedInfo(ENGINE_DOMT)) {
                    logger.info("Segments in XLF returned from DoMT:" + translatedXlf);
                }

                if (!isXlf && StringUtil.isNotEmpty(translatedXlf)) {
                    translatedXlf = translatedXlf.replace(" id=", " i=").replace("trans-unit i=", "trans-unit id=");
                }

                // id :: translated targets
                HashMap<Integer, String> targets = extractDoMtReturning(translatedXlf);

                HashMap<Integer, HashMap<Integer, String>> targetGroups = getTargetGroups(targets);

                String translatedSegment = "";
                for (int mainIndex = 0; mainIndex < segments.length; mainIndex++) {
                    translatedSegment = "";
                    HashMap<Integer, String> subSet = targetGroups.get(mainIndex);
                    if (subSet == null) {
                        results[mainIndex] = heads[mainIndex] + "" + "</segment>";
                        continue;
                    }

                    boolean hasInternalText = (segments[mainIndex].indexOf(" internal=\"yes\"") > -1);
                    GxmlElement gxmlRoot = MTHelper.getGxmlElement(segments[mainIndex]);
                    List subFlowList = gxmlRoot.getDescendantElements(GxmlElement.SUB_TYPE);
                    if ((subFlowList == null || subFlowList.size() == 0) && !hasInternalText) {
                        translatedSegment = subSet.get(0);
                        // if DoMT fails to translate this, it returns -1.
                        if (translatedSegment == null || "-1".equals(translatedSegment)) {
                            translatedSegment = "";
                        }
                    } else {
                        List items = MTHelper.getImmediateAndSubImmediateTextNodes(gxmlRoot);
                        if (items != null && items.size() > 0 && items.size() == subSet.size()) {
                            for (int subIndex = 0; subIndex < items.size(); subIndex++) {
                                TextNode textNode = (TextNode) items.get(subIndex);
                                String trans = subSet.get(subIndex + 1);
                                if (trans == null || "-1".equals(trans)) {
                                    trans = "";
                                }
                                textNode.setTextBuffer(new StringBuffer(trans));
                            }
                            translatedSegment = GxmlUtil.stripRootTag(gxmlRoot.toGxml());
                        }
                    }
                    results[mainIndex] = heads[mainIndex] + translatedSegment + "</segment>";
                }
            }
        } catch (MachineTranslationException e) {
            logger.error(e.getMessage());
        }

        return results;
    }

    /**
     * Send pure texts to DoMT engine for translation.
     */
    private String[] translatePureText(Locale sourceLocale, Locale targetLocale, String[] segments) {
        String[] results = new String[segments.length];
        try {
            // Ensure the sequence will be unchanged after translation.
            HashMap<Integer, String> id2Segs = new HashMap<Integer, String>();
            for (int i = 0; i < segments.length; i++) {
                String[] segmentsFromGxml = MTHelper.getSegmentsInGxml(segments[i]);
                if (segmentsFromGxml == null || segmentsFromGxml.length < 1) {
                    results[i] = segments[i];
                } else {
                    for (int count = 0; count < segmentsFromGxml.length; count++) {
                        id2Segs.put(composeKey(i, count), EditUtil.encodeXmlEntities(segmentsFromGxml[count]));
                    }
                }
            }

            if (id2Segs.size() > 0) {
                String srcXlf = getDoMtXliff(id2Segs, sourceLocale, targetLocale);
                if (MTHelper.isLogDetailedInfo(ENGINE_DOMT)) {
                    logger.info("Segments in XLF sending to DoMT:" + srcXlf);
                }

                String translatedXlf = hitDoMt(sourceLocale, targetLocale, srcXlf);
                if (MTHelper.isLogDetailedInfo(ENGINE_DOMT)) {
                    logger.info("Segments in XLF returned from DoMT:" + translatedXlf);
                }

                // id :: translated targets
                HashMap<Integer, String> targets = extractDoMtReturning(translatedXlf);

                HashMap<Integer, HashMap<Integer, String>> targetGroups = getTargetGroups(targets);

                for (int mainIndex = 0; mainIndex < segments.length; mainIndex++) {
                    HashMap<Integer, String> subSet = targetGroups.get(mainIndex);
                    GxmlElement gxmlRoot = MTHelper.getGxmlElement(segments[mainIndex]);
                    List items = MTHelper.getImmediateAndSubImmediateTextNodes(gxmlRoot);
                    for (int subIndex = 0; subIndex < items.size(); subIndex++) {
                        TextNode textNode = (TextNode) items.get(subIndex);
                        if (subSet == null) {
                            textNode.setTextBuffer(new StringBuffer(""));
                        } else {
                            String seg = subSet.get(subIndex);
                            // if DoMT fails to translate this, it returns -1.
                            if (seg == null || "-1".equals(seg)) {
                                seg = "";
                            }
                            textNode.setTextBuffer(new StringBuffer(seg));
                        }
                    }
                    results[mainIndex] = gxmlRoot.toGxml();
                }
            }
        } catch (MachineTranslationException e) {
            logger.error(e.getMessage());
        }

        return results;
    }

    @SuppressWarnings("rawtypes")
    private String hitDoMt(Locale p_sourceLocale, Locale p_targetLocale, String p_xlf)
            throws MachineTranslationException {
        String translatedXlf = null;
        try {
            String url = (String) getMtParameterMap().get(MTProfileConstants.MT_DOMT_URL);
            XmlRpcClient client = DoMTUtil.getXmlRpcClient(url);

            String[] xliff = new String[] { p_xlf };
            Object[] params = new Object[] { xliff };
            Object[] returning = null;
            int runTimes = 0;
            // DoMT can't handle concurrent requests very well, if fail, recall
            // it at most 5 times.
            while (runTimes < 5) {
                runTimes++;
                try {
                    synchronized (LOCK) {
                        Thread.sleep(3000);
                        logger.info("invoke 'run' API times : " + runTimes);
                        returning = (Object[]) client.execute("run", params);
                    }

                    if (MTHelper.isLogDetailedInfo(ENGINE_DOMT)) {
                        DoMTUtil.logRunInfo(returning);
                    }

                    if (DoMTUtil.isRunSucceed(returning)) {
                        break;
                    }
                } catch (Exception e) {
                    logger.warn(e.getMessage());
                }
            }

            if (DoMTUtil.isRunSucceed(returning)) {
                int statusTimes = 0;
                String jobId = (String) returning[0];
                String[] jobIds = new String[] { jobId };
                Object[] statusParams = new Object[4];
                statusParams[0] = jobIds;// job_ids
                statusParams[1] = new Boolean(false);// diagnostics
                statusParams[2] = new Boolean(true);// include_contents
                statusParams[3] = new Boolean(false);// delete

                HashMap status = null;
                int maxTimes = Math.round(getMaxWaitTime() / 15);
                // Wait for "getMaxWaitTime()" seconds at most.
                while (statusTimes < maxTimes) {
                    statusTimes++;
                    try {
                        synchronized (LOCK) {
                            Thread.sleep(3000);
                            logger.info("invoke 'status' API times : " + statusTimes);
                            status = (HashMap) client.execute("status", statusParams);
                        }

                        if (MTHelper.isLogDetailedInfo(ENGINE_DOMT)) {
                            DoMTUtil.logStatusInfo(status, jobId);
                        }

                        String jobStatus = null;
                        HashMap jobValues = (HashMap) status.get(jobId);
                        if (jobValues != null) {
                            jobStatus = (String) jobValues.get(DoMTUtil.KEY_STATUS);
                        }
                        String subStatus = null;
                        HashMap serverValues = (HashMap) status.get(DoMTUtil.KEY_SERVER);
                        if (serverValues != null) {
                            subStatus = (String) serverValues.get(DoMTUtil.KEY_SUBSTATUS);
                        }

                        if (DoMTUtil.JOB_STATUS_COMPLETED.equalsIgnoreCase(jobStatus)) {
                            Object[] content = (Object[]) jobValues.get(DoMTUtil.KEY_CONTENT);
                            if (content != null && content.length > 0) {
                                translatedXlf = (String) content[0];
                                break;
                            }
                        } else if (DoMTUtil.JOB_STATUS_STOPPED.equalsIgnoreCase(jobStatus)
                                || DoMTUtil.JOB_STATUS_FAILED.equalsIgnoreCase(jobStatus)) {
                            logger.warn("Job is in '" + jobStatus + "' status, stop waiting and return.");
                            break;
                        } else if ("errors".equalsIgnoreCase(subStatus)) {
                            //                            break;
                        } else {
                            logger.info("Wait for 15 seconds before next invoking 'status'...");
                            Thread.sleep(15000);// 15 seconds
                        }
                    } catch (Exception e) {
                        logger.warn(e.getMessage());
                    }
                }

                if (org.apache.commons.lang3.StringUtils.isBlank(translatedXlf)) {
                    return null;
                }
            }
        } catch (Exception e) {
            logger.error(e);
            throw new MachineTranslationException(e);
        }

        return translatedXlf;
    }

    private String getDoMtXliff(HashMap<Integer, String> segmentsNoTag, Locale sourceLocale, Locale targetLocale) {
        String sourceLang = DoMTUtil.checkLang(sourceLocale.getLanguage(), sourceLocale.getCountry());
        String targetLang = DoMTUtil.checkLang(targetLocale.getLanguage(), targetLocale.getCountry());
        String engineName = (String) getMtParameterMap().get(MTProfileConstants.MT_DOMT_ENGINE_NAME);

        // "translate-xliff" is a default engine on DoMT server, it is using
        // "nl" and "en" as source/target language name.
        if (DEFAULT_ENGINE_NAME.equals(engineName)) {
            sourceLang = "nl";
            targetLang = "en";
        }

        StringBuffer xlf = new StringBuffer();
        xlf.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n");
        xlf.append("<xliff version=\"1.2\">\r\n");
        xlf.append("<file original=\"None\" source-language=\"").append(sourceLang).append("\" target-language=\"")
                .append(targetLang).append("\" datatype=\"multi-format\">\r\n");
        xlf.append("<header>\r\n");
        xlf.append("  <note from=\"PTTOOLS\">\r\n");
        xlf.append("    <graphname>").append(engineName).append("</graphname>\r\n");
        xlf.append("  </note>\r\n");
        xlf.append("</header>\r\n");
        xlf.append("<body>\r\n");
        Iterator<Entry<Integer, String>> it = segmentsNoTag.entrySet().iterator();
        while (it.hasNext()) {
            Entry<Integer, String> entry = (Entry<Integer, String>) it.next();
            int id = entry.getKey();
            String source = entry.getValue();
            xlf.append("<trans-unit id=\"" + id + "\" translate=\"yes\">\r\n");
            xlf.append("<source>").append(source).append("</source>\r\n");
            xlf.append("</trans-unit>\r\n");
        }
        xlf.append("</body>\r\n");
        xlf.append("</file>\r\n");
        xlf.append("</xliff>\r\n");

        return xlf.toString();
    }

    /**
     * Extract to get the target translations in DoMT returned xliff string.
     * 
     * @param mtReturning
     *            -- xliff string DoMT returned.
     * @return -- tuid : target map
     */
    private HashMap<Integer, String> extractDoMtReturning(String mtReturning) {
        HashMap<Integer, String> targets = new HashMap<Integer, String>();

        if (StringUtil.isEmpty(mtReturning))
            return targets;

        try {
            StringReader sr = new StringReader(mtReturning);
            InputSource is = new InputSource(sr);
            DOMParser parser = new DOMParser();
            parser.setFeature("http://xml.org/sax/features/validation", false);
            parser.parse(is);

            Element doc = parser.getDocument().getDocumentElement();
            NodeList tuNodeList = doc.getElementsByTagName("trans-unit");
            if (tuNodeList != null && tuNodeList.getLength() > 0) {
                for (int i = 0; i < tuNodeList.getLength(); i++) {
                    Node tuNode = tuNodeList.item(i);
                    String id = tuNode.getAttributes().getNamedItem("id").getNodeValue();
                    NodeList tuSubNodeList = tuNode.getChildNodes();
                    if (tuSubNodeList != null && tuSubNodeList.getLength() > 0) {
                        for (int j = 0; j < tuSubNodeList.getLength(); j++) {
                            Node tuSubNode = tuSubNodeList.item(j);
                            if ("target".equalsIgnoreCase(tuSubNode.getNodeName())) {
                                String target = MTHelper.outputNode2Xml(tuSubNode);
                                if (StringUtil.isNotEmpty(target)) {
                                    int index = target.indexOf("<target ");
                                    if (index > -1)
                                        target = target.substring(index);
                                }
                                target = GxmlUtil.stripRootTag(target);
                                targets.put(Integer.parseInt(id), target);
                                break;
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            logger.error("Fail to extract the DoMT translated xliff", e);
        }

        return targets;
    }

    private HashMap<Integer, HashMap<Integer, String>> getTargetGroups(HashMap<Integer, String> targets) {
        HashMap<Integer, HashMap<Integer, String>> result = new HashMap<Integer, HashMap<Integer, String>>();

        Iterator<Entry<Integer, String>> trgIt = targets.entrySet().iterator();
        while (trgIt.hasNext()) {
            Entry<Integer, String> entry = trgIt.next();
            int[] indexes = getIndexes(entry.getKey());

            HashMap<Integer, String> subSet = result.get(indexes[0]);
            if (subSet == null) {
                subSet = new HashMap<Integer, String>();
                result.put(indexes[0], subSet);
            }
            subSet.put(indexes[1], entry.getValue());
        }

        return result;
    }

    /**
     * The key is composed of 3 parts: "1" as a start; "indexInArray" as middle;
     * if a segment with tag has sub text, the sub text index as the third part.
     * 
     * @param indexInArray
     * @param subTextIndex
     * @return
     */
    private int composeKey(int indexInArray, int subTextIndex) {
        String subIndex = String.valueOf(subTextIndex);
        if (subTextIndex < 10) {
            subIndex = "00" + subTextIndex;
        } else if (subTextIndex < 100 && subTextIndex >= 10) {
            subIndex = "0" + subTextIndex;
        }

        return Integer.parseInt("1" + indexInArray + subIndex);
    }

    /**
     * Get the main index and sub index in array.
     * @param composedKey
     * @return
     */
    private int[] getIndexes(int composedKey) {
        try {
            String key = String.valueOf(composedKey);
            String mainIndex = key.substring(1, key.length() - 3);
            String subIndex = key.substring(key.length() - 3);
            if (subIndex.startsWith("00")) {
                subIndex = subIndex.substring(2);
            } else if (subIndex.startsWith("0")) {
                subIndex = subIndex.substring(1);
            }

            return new int[] { Integer.parseInt(mainIndex), Integer.parseInt(subIndex) };
        } catch (Exception e) {

        }

        return null;
    }

    private static int getMaxWaitTime() {
        // Default 15 minutes.
        int maxWaitTime = 900;
        try {
            String param = MTHelper.getMTConfig("domt.max.wait.timeout");
            if (param != null) {
                maxWaitTime = Integer.parseInt(param);
            }
        } catch (Exception ignore) {

        }
        return maxWaitTime;
    }
}