org.omegat.languagetools.LanguageToolNetworkBridge.java Source code

Java tutorial

Introduction

Here is the source code for org.omegat.languagetools.LanguageToolNetworkBridge.java

Source

/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
 with fuzzy matching, translation memory, keyword search,
 glossaries, and translation leveraging into updated projects.
    
 Copyright (C) 2016 Lev Abashkin
 Home page: http://www.omegat.org/
 Support center: http://groups.yahoo.com/group/OmegaT/
    
 This file is part of OmegaT.
    
 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
    
 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
    
 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/
package org.omegat.languagetools;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import org.apache.commons.io.IOUtils;
import org.omegat.util.JsonParser;
import org.omegat.util.Language;
import org.omegat.util.Log;
import org.omegat.util.OStrings;

public class LanguageToolNetworkBridge extends BaseLanguageToolBridge {

    /* Constants */
    private final static String CHECK_PATH = "/v2/check";
    private final static String LANGS_PATH = "/v2/languages";
    private final static String SERVER_CLASS_NAME = "org.languagetool.server.HTTPServer";
    private final static String API_VERSION = "1";

    /* Instance scope fields */
    private Process server;
    private int localPort;
    private String serverUrl;

    /* Project scope fields */
    private Language sourceLang, targetLang;
    private String disabledCategories, disabledRules, enabledRules;

    /**
     * Get instance talking to remote server
     *
     * @param url
     *            URL of remote LanguageTool server
     * @return new LanguageToolNetworkBridge instance
     * @throws java.lang.Exception
     */
    public LanguageToolNetworkBridge(Language sourceLang, Language targetLang, String url) throws Exception {
        // Try to connect URL
        if (!testServer(url)) {
            Log.logWarningRB("LT_BAD_URL");
            throw new Exception();
        }
        // OK, URL seems valid, let's use it.
        serverUrl = url;
        init(sourceLang, targetLang);
    }

    /**
     * Get instance spawning and talking to local server
     *
     * @param path
     *            local LanguageTool directory
     * @param port
     *            local port for spawned server to listen
     * @return new LanguageToolNetworkBridge instance
     * @throws java.lang.Exception
     */
    public LanguageToolNetworkBridge(Language sourceLang, Language targetLang, String path, int port)
            throws Exception {
        // Remember port
        localPort = port;

        File serverJar = new File(path);

        // Check if ClassPath points to a real file
        if (!serverJar.isFile()) {
            Log.logWarningRB("LT_BAD_LOCAL_PATH");
            throw new Exception();
        }

        // Check if socket is available
        try {
            new ServerSocket(port).close();
        } catch (Exception e) {
            Log.logWarningRB("LT_BAD_SOCKET");
            throw new Exception();
        }
        // Run the server
        ProcessBuilder pb = new ProcessBuilder("java", "-cp", serverJar.getAbsolutePath(), SERVER_CLASS_NAME,
                "--port", Integer.toString(port));
        pb.redirectErrorStream(true);
        server = pb.start();

        // Create thread to consume server output
        new Thread(() -> {
            try (InputStream is = server.getInputStream()) {
                @SuppressWarnings("unused")
                int b;
                while ((b = is.read()) != -1) {
                    // Discard
                }
            } catch (IOException e) {
                // Do nothing
            }
        }).start();

        // Wait for server to start
        int timeout = 10000;
        int timeWaiting = 0;
        int interval = 10;
        while (true) {
            Thread.sleep(interval);
            timeWaiting += interval;
            try {
                new Socket("localhost", port).close();
                break;
            } catch (Exception e) {
            }
            if (timeWaiting >= timeout) {
                Log.logWarningRB("LT_SERVER_START_TIMEOUT");
                server.destroy();
                throw new Exception();
            }
        }

        serverUrl = "http://localhost:" + port + CHECK_PATH;
        Log.log(OStrings.getString("LT_SERVER_STARTED"));
        try {
            init(sourceLang, targetLang);
        } catch (Exception ex) {
            stop();
            throw ex;
        }
    }

    /**
     * Common initialization for both constructors
     * 
     * @throws Exception
     *             If unable to determine the server's supported languages
     */
    private void init(Language sourceLang, Language targetLang) throws Exception {
        List<Object> serverLanguages = getSupportedLanguages();
        this.sourceLang = negotiateLanguage(serverLanguages, sourceLang);
        this.targetLang = negotiateLanguage(serverLanguages, targetLang);
        Log.log("Negotiated LanguageTool source language: " + this.sourceLang);
        Log.log("Negotiated LanguageTool target language: " + this.targetLang);
    }

    @Override
    public synchronized void stop() {
        if (server != null) {
            try {
                server.destroy();
                // Wait for server to release socket
                while (true) {
                    try {
                        new Socket("localhost", localPort).close();
                    } catch (Exception e) {
                        break;
                    }
                }
                Log.log(OStrings.getString("LT_SERVER_TERMINATED"));
                server = null;
            } catch (Exception ex) {
                Log.log(ex);
            }
        }
    }

    @Override
    public void applyRuleFilters(Set<String> disabledCategories, Set<String> disabledRules,
            Set<String> enabledRules) {
        this.disabledCategories = String.join(",", disabledCategories);
        this.disabledRules = String.join(",", disabledRules);
        this.enabledRules = String.join(",", enabledRules);
    }

    @Override
    @SuppressWarnings("unchecked")
    protected List<LanguageToolResult> getCheckResultsImpl(String sourceText, String translationText)
            throws Exception {
        if (targetLang == null) {
            return Collections.emptyList();
        }

        URL url = new URL(serverUrl);
        URLConnection conn = url.openConnection();
        conn.setRequestProperty("User-Agent", OStrings.getNameAndVersion());
        conn.setDoOutput(true);
        try (OutputStreamWriter writer = new OutputStreamWriter(conn.getOutputStream())) {
            String srcLang = sourceLang == null ? null : sourceLang.toString();
            writer.write(buildPostData(srcLang, targetLang.toString(), sourceText, translationText,
                    disabledCategories, disabledRules, enabledRules));
            writer.flush();
        }

        checkHttpError(conn);

        String json = "";
        try (InputStream in = conn.getInputStream()) {
            json = IOUtils.toString(in, StandardCharsets.UTF_8);
        }

        Map<String, Object> response = (Map<String, Object>) JsonParser.parse(json);
        Map<String, String> software = (Map<String, String>) response.get("software");

        if (!software.get("apiVersion").equals(API_VERSION)) {
            Log.logWarningRB("LT_API_VERSION_MISMATCH");
        }

        List<Map<String, Object>> matches = (List<Map<String, Object>>) response.get("matches");

        return matches.stream().map(match -> {
            String message = addSuggestionTags((String) match.get("message"));
            int start = (int) match.get("offset");
            int end = start + (int) match.get("length");
            Map<String, Object> rule = (Map<String, Object>) match.get("rule");
            String ruleId = (String) rule.get("id");
            String ruleDescription = (String) rule.get("description");
            return new LanguageToolResult(message, start, end, ruleId, ruleDescription);
        }).collect(Collectors.toList());
    }

    @SuppressWarnings("unchecked")
    protected List<Object> getSupportedLanguages() throws Exception {
        // This is a really stupid way to get the /languages endpoint URL, but it'll do for now.
        String langsUrl = serverUrl.replace(CHECK_PATH, LANGS_PATH);

        URL url = new URL(langsUrl);
        URLConnection conn = url.openConnection();
        conn.setRequestProperty("User-Agent", OStrings.getNameAndVersion());
        conn.setDoOutput(true);

        checkHttpError(conn);

        String json = "";
        try (InputStream in = conn.getInputStream()) {
            json = IOUtils.toString(in, StandardCharsets.UTF_8);
        }

        return (List<Object>) JsonParser.parse(json);
    }

    static void checkHttpError(URLConnection conn) throws Exception {
        if (conn instanceof HttpURLConnection) {
            HttpURLConnection httpConn = (HttpURLConnection) conn;
            if (httpConn.getResponseCode() != 200) {
                try (InputStream err = httpConn.getErrorStream()) {
                    String errMsg = IOUtils.toString(err, StandardCharsets.UTF_8);
                    throw new Exception(errMsg);
                }
            }
        }
    }

    /**
     * Replace double quotes with <suggestion></suggestion> tags
     * in error message to imitate native LanguageTool behavior
     */
    static String addSuggestionTags(String str) {
        return str.replaceAll("^([^:]+:\\s?)\"([^']+)\"", "$1<suggestion>$2</suggestion>");
    }

    /**
     * Construct POST request data
     */
    static String buildPostData(String sourceLang, String targetLang, String sourceText, String targetText,
            String disabledCategories, String disabledRules, String enabledRules)
            throws UnsupportedEncodingException {
        String encoding = "UTF-8";
        StringBuilder result = new StringBuilder();
        result.append("text=").append(URLEncoder.encode(targetText, encoding)).append("&language=")
                .append(URLEncoder.encode(targetLang, encoding));
        if (sourceText != null && sourceLang != null) {
            result.append("&srctext=").append(URLEncoder.encode(sourceText, encoding)).append("&motherTongue=")
                    .append(URLEncoder.encode(sourceLang, encoding));
        }
        if (disabledCategories != null) {
            result.append("&disabledCategories=").append(URLEncoder.encode(disabledCategories, encoding));
        }
        if (disabledRules != null) {
            result.append("&disabledRules=").append(URLEncoder.encode(disabledRules, encoding));
        }
        if (enabledRules != null) {
            result.append("&enabledRules=").append(URLEncoder.encode(enabledRules, encoding));
        }
        return result.toString();
    }

    /**
     * Try to talk with LT server and return result
     */
    static boolean testServer(String testUrl) {
        if (testUrl.trim().toLowerCase(Locale.ENGLISH).startsWith("https://languagetool.org/api/v2/check")) {
            // Blacklist the official LanguageTool public API specifically
            // because this is what users are most likely to try, but they ask
            // not to send automated requests:
            // http://wiki.languagetool.org/public-http-api
            return false;
        }
        try {
            URL url = new URL(testUrl);
            URLConnection conn = url.openConnection();
            conn.setDoOutput(true);
            try (OutputStreamWriter writer = new OutputStreamWriter(conn.getOutputStream())) {
                // Supply a dummy disabled category to force the server to take
                // its configuration from this query only, not any server-side
                // config.
                writer.write(buildPostData(null, "en-US", null, "Test", "FOO", null, null));
                writer.flush();
            }
            checkHttpError(conn);
            try (InputStream in = conn.getInputStream()) {
                String response = IOUtils.toString(in, StandardCharsets.UTF_8);
                if (response.contains("<?xml")) {
                    Log.logErrorRB("LT_WRONG_FORMAT_RESPONSE");
                    return false;
                } else {
                    return true;
                }
            }
        } catch (Exception e) {
            Log.log(e);
            return false;
        }
    }

    /**
     * Find the best-matching language from the provided options.
     * 
     * @param serverLangs
     *            The raw response objects from {@link #getSupportedLanguages()}
     * @param desiredLang
     *            The language to match
     * @return The best-matching language, or null if no languages matched at all
     */
    @SuppressWarnings("unchecked")
    static Language negotiateLanguage(List<Object> serverLangs, Language desiredLang) {
        // Search for full xx-YY match
        String omLocale = desiredLang.getLanguage();
        for (Object obj : serverLangs) {
            Map<String, String> lang = (Map<String, String>) obj;
            if (omLocale.equalsIgnoreCase(lang.get("longCode"))) {
                return desiredLang;
            }
        }

        // Search for just xx match
        String omLang = desiredLang.getLanguageCode();
        for (Object obj : serverLangs) {
            Map<String, String> lang = (Map<String, String>) obj;
            if (omLang.equalsIgnoreCase(lang.get("longCode"))) {
                return new Language(desiredLang.getLanguageCode());
            }
        }
        for (Object obj : serverLangs) {
            Map<String, String> lang = (Map<String, String>) obj;
            if (omLang.equalsIgnoreCase(lang.get("code"))) {
                return new Language(desiredLang.getLanguageCode());
            }
        }
        return null;
    }
}