babel.util.language.GoogleLangDetector.java Source code

Java tutorial

Introduction

Here is the source code for babel.util.language.GoogleLangDetector.java

Source

/**
 * This file is licensed to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package babel.util.language;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;

import org.json.JSONObject;

/**
 * Uses Google Language API for language detection. Partly based on detection
 * code in google-api-translate-java.
 */
public class GoogleLangDetector implements LangDetector {
    /** Maximum size of the sting sent to google for identification. */
    protected static final int MAX_TEXT_LENGTH = 1800; // Should be 5000, but longer strings get dropped.
    protected static final String URL = "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=";
    protected static final String ENCODING = "UTF-8";
    protected static byte ESCAPE_CHAR = '%';

    /**
     * @param referrer URL of the the organization originating the request.
     */
    public GoogleLangDetector(String referrer) {
        if (referrer == null || referrer.length() == 0) {
            throw new IllegalArgumentException("Referrer must be supplied.");
        }

        m_referrer = referrer;
    }

    /**
     * Detects the language of a supplied string.
     */
    @Override
    public LangDetectionResult detect(final String text) throws Exception {
        if (text == null || text.length() == 0) {
            return new LangDetectionResult(null);
        }

        JSONObject json = retrieveJSON(new URL(URL + encodeAndTrim(text)));

        return new LangDetectionResult(
                Language.fromString(json.getJSONObject("responseData").getString("language")),
                json.getJSONObject("responseData").getDouble("confidence"),
                json.getJSONObject("responseData").getBoolean("isReliable"));
    }

    /**
     * Ancodes and shortens a string taking care not to leave an incomplete unsafe
     * charachter at the end.
     * 
     * @param str original string.
     * @return shortened string.
     */
    protected String encodeAndTrim(String str) throws Exception {
        String shortStr = URLEncoder.encode(str, ENCODING);

        if (shortStr.length() > MAX_TEXT_LENGTH) {
            // Shorten the string
            shortStr = shortStr.substring(0, MAX_TEXT_LENGTH);

            // Cut an incomplete unsafe charachter at the end (if any)
            int escIdx = shortStr.lastIndexOf(ESCAPE_CHAR);

            if (escIdx > 0 && (MAX_TEXT_LENGTH - escIdx) < 3) {
                shortStr = shortStr.substring(0, escIdx);
            }
        }

        return shortStr;
    }

    /**
     * Forms an HTTP request, sends it using GET method and returns the result of
     * the request as a JSONObject.
     *
     * @param url the URL to query for a JSONObject.
     */
    protected JSONObject retrieveJSON(final URL url) throws Exception {
        try {
            final HttpURLConnection uc = (HttpURLConnection) url.openConnection();
            uc.setRequestProperty("referer", m_referrer);
            uc.setRequestMethod("GET");
            uc.setDoOutput(true);

            try {
                final String result = inputStreamToString(uc.getInputStream());

                return new JSONObject(result);
            } finally { // http://java.sun.com/j2se/1.5.0/docs/guide/net/http-keepalive.html
                uc.getInputStream().close();

                if (uc.getErrorStream() != null) {
                    uc.getErrorStream().close();
                }
            }
        } catch (Exception ex) {
            throw new Exception("Error retrieving detection result : " + ex.toString(), ex);
        }
    }

    /**
     * Reads an InputStream and returns its contents as a String.
     * 
     * @param inputStream InputStream to read from.
     * @return the contents of the InputStream as a String.
     */
    private String inputStreamToString(final InputStream inputStream) throws Exception {
        final StringBuilder outputBuilder = new StringBuilder();

        try {
            String string;
            if (inputStream != null) {
                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, ENCODING));
                while (null != (string = reader.readLine())) {
                    outputBuilder.append(string).append('\n');
                }
            }
        } catch (Exception ex) {
            throw new Exception("Error reading translation stream : " + ex.toString(), ex);
        }

        return outputBuilder.toString();
    }

    /** Used to tell Google the origin of the request. */
    protected String m_referrer;
}