de.elomagic.vaadin.addon.speechrecognition.SpeechRecognition.java Source code

Introduction

Here is the source code for de.elomagic.vaadin.addon.speechrecognition.SpeechRecognition.java
Source

/*
 * Copyright 2014 Carsten Rambow.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package de.elomagic.vaadin.addon.speechrecognition;

import java.net.URI;
import java.util.ArrayList;
import java.util.List;

import org.json.JSONArray;
import org.json.JSONException;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.vaadin.annotations.JavaScript;
import com.vaadin.ui.AbstractJavaScriptComponent;
import com.vaadin.ui.JavaScriptFunction;

import de.elomagic.vaadin.addon.speechrecognition.SpeechRecognitionEvent.Type;

/**
 * SpeechSynthesis is a non visualization class for controlling a text-to-speech output.
 * <p/>
 * This class is experimental and runs only under latest version of Google Chrome browsers.
 *
 * @author Carsten Rambow
 */
@JavaScript({ "js/speechrecognition-connector.js" })
public class SpeechRecognition extends AbstractJavaScriptComponent {
    private final List<SpeechRecognitionListener> eventListener = new ArrayList<>();

    public SpeechRecognition() {
        super();

        addFunction("onaudiostart", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.AudioStart);
            }
        });
        addFunction("onsoundstart", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.SoundStart);
            }
        });
        addFunction("onspeechstart", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.SpeechStart);
            }
        });
        addFunction("onspeechend", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.SpeechEnd);
            }
        });
        addFunction("onsoundend", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.SoundEnd);
            }
        });
        addFunction("onaudioend", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.AudioEnd);
            }
        });
        addFunction("onresult", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.Result);
            }
        });
        addFunction("onnomatch", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.NoMatch);
            }
        });
        addFunction("onerror", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.Error);
            }
        });
        addFunction("onstart", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.Start);
            }
        });
        addFunction("onend", new JavaScriptFunction() {

            @Override
            public void call(final JSONArray arguments) throws JSONException {
                fireEvent(arguments, Type.End);
            }
        });

    }

    /**
     * When the continuous attribute is set to false, the user agent must return no more than one final result in response to starting recognition, for example a single turn pattern of interaction.
     * When the continuous attribute is set to true, the user agent must return zero or more final results representing multiple consecutive recognitions in response to starting recognition, for
     * example a dictation. The default value must be false. Note, this attribute setting does not affect interim results.
     *
     * @param continuous
     */
    public void setContinuous(final boolean continuous) {
        getState().continuous = continuous;
    }

    /**
     * Controls whether interim results are returned. When set to true, interim results should be returned. When set to false, interim results must NOT be returned. The default value must be false.
     * Note, this attribute setting does not affect final results.
     *
     * @param interimResults
     */
    public void setInterimResults(final boolean interimResults) {
        getState().interimResults = interimResults;
    }

    /**
     * This attribute will set the language of the recognition for the request, using a valid BCP 47 language tag. [BCP47] If unset it remains unset for getting in script, but will default to use the
     * lang of the HTML document root element and associated hierachy. This default value is computed and used when the input request opens a connection to the recognition service.
     *
     * @param lang
     */
    public void setLang(final String lang) {
        getState().lang = lang;
    }

    /**
     * This attribute will set the maximum number of SpeechRecognitionAlternatives per result. The default value is 1.
     *
     * @param maxAlternatives
     */
    public void setMaxAlternatives(final int maxAlternatives) {
        getState().maxAlternatives = maxAlternatives;
    }

    /**
     * The serviceURI attribute specifies the location of the speech recognition service that the web application wishes to use. If this attribute is unset at the time of the start method call, then
     * the user agent must use the user agent default speech service. Note that the serviceURI is a generic URI and can thus point to local services either through use of a URN with meaning to the
     * user agent or by specifying a URL that the user agent recognizes as a local service. Additionally, the user agent default can be local or remote and can incorporate end user choices via
     * interfaces provided by the user agent such as browser configuration parameters. [Editor note: The group is currently discussing whether WebRTC might be used to specify selection of audio
     * sources and remote recognizers.]
     *
     * @param uri
     */
    public void setServiceURI(final URI uri) {
        getState().serviceURI = uri == null ? "" : uri.toString();
    }

    /**
     * When the start method is called it represents the moment in time the web application wishes to begin recognition. When the speech input is streaming live through the input media stream, then
     * this start call represents the moment in time that the service <b>must</b> begin to listen and try to match the grammars associated with this request. Once the system is successfully listening
     * to the recognition the user agent <b>must</b> raise a start event. If the start method is called on an already started object (that is, start has previously been called, and no error or end
     * event has fired on the object), the user agent <b>must</b> throw an InvalidStateError exception and ignore the call.
     */
    public void start() throws IllegalStateException {
        callFunction("start");
    }

    /**
     * The stop method represents an instruction to the recognition service to stop listening to more audio, and to try and return a result using just the audio that it has already received for this
     * recognition. A typical use of the stop method might be for a web application where the end user is doing the end pointing, similar to a walkie-talkie. The end user might press and hold the
     * space bar to talk to the system and on the space down press the start call would have occurred and when the space bar is released the stop method is called to ensure that the system is no
     * longer listening to the user. Once the stop method is called the speech service <b>must not</b> collect additional audio and <b>must not</b> continue to listen to the user. The speech service
     * <b>must</b> attempt to return a recognition result (or a nomatch) based on the audio that it has already collected for this recognition. If the stop method is called on an object which is
     * already stopped or being stopped (that is, start was never called on it, the end or error event has fired on it, or stop was previously called on it), the user agent <b>must</b> ignore the
     * call.
     */
    public void stop() {
        callFunction("stop");
    }

    /**
     * The abort method is a request to immediately stop listening and stop recognizing and do not return any information but that the system is done. When the abort method is called, the speech
     * service <b>must</b> stop recognizing. The user agent <b>must</b> raise an end event once the speech service is no longer connected. If the abort method is called on an object which is already
     * stopped or aborting (that is, start was never called on it, the end or error event has fired on it, or abort was previously called on it), the user agent <b>must</b> ignore the call.
     */
    public void abort() {
        callFunction("abort");
    }

    @Override
    protected SpeechRecognitionState getState() {
        return (SpeechRecognitionState) super.getState();
    }

    private void fireEvent(final JSONArray arguments, final Type type) {
        try {
            SpeechRecognitionEvent event = mapEvent(arguments, type);
            for (SpeechRecognitionListener listener : eventListener) {
                listener.onSpeechRecognitionEvent(event);
            }
        } catch (JSONException ex) {
            ex.printStackTrace(System.err);
        }
    }

    private static Gson createGson() {
        return new GsonBuilder()
                .registerTypeAdapter(SpeechRecognitionResults.class, new SpeechRecognitionResultsDeserializer())
                .registerTypeAdapter(SpeechRecognitionResult.class, new SpeechRecognitionResultDeserializer())
                .create();
    }

    private SpeechRecognitionEvent mapEvent(final JSONArray arguments, final Type type) throws JSONException {
        Gson gson = createGson();

        SpeechRecognitionEventData data = gson.fromJson(arguments.getJSONObject(0).toString(),
                SpeechRecognitionEventData.class);

        return new SpeechRecognitionEvent(this, type, data, arguments.getJSONObject(0));
    }

    /**
     * Adds an event listener.
     * <p/>
     * Will be called even an event occur.
     *
     * @param listener
     */
    public void addSpeechRecognitionListener(final SpeechRecognitionListener listener) {
        if (eventListener.contains(listener)) {
            return;
        }

        eventListener.add(listener);
    }

    /**
     * Removes an event listener.
     *
     * @param listener
     */
    public void removeSpeechRecognitionListener(final SpeechRecognitionListener listener) {
        eventListener.remove(listener);
    }

}