iristk.speech.nuancecloud.NuanceCloudRecognizerListener.java Source code

Java tutorial

Introduction

Here is the source code for iristk.speech.nuancecloud.NuanceCloudRecognizerListener.java

Source

/*******************************************************************************
 * Copyright (c) 2014 Gabriel Skantze.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Public License v3.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/gpl.html
 * 
 * Contributors:
 *     Gabriel Skantze - initial API and implementation
 ******************************************************************************/
package iristk.speech.nuancecloud;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.sound.sampled.AudioFormat;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIUtils;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;

import iristk.speech.RecHyp;
import iristk.speech.RecResult;
import iristk.speech.RecognizerListener;
import iristk.speech.ResultPolicy;
import iristk.util.BlockingByteQueue;
import iristk.util.Language;

public class NuanceCloudRecognizerListener implements RecognizerListener {

    public static int REQUEST_TIMEOUT = 5000;

    private String DEVICE_ID = "12345";
    private String language = "en_US";
    private String LM = "Dictation"; // or WebSearch
    private String RESULTS_FORMAT = "text/plain"; //"application/xml";

    private static String HOSTNAME = "dictation.nuancemobility.net"; //"dictation.nuancemobility.net"; //"sandbox.nmdp.nuancemobility.net";
    private static String SERVLET = "NMDPAsrCmdServlet/dictation";

    HttpClient httpclient;

    private static String cookie = null;

    BlockingByteQueue speechQueue = new BlockingByteQueue();

    BlockingByteQueue encodedQueue = new BlockingByteQueue();

    private PostThread postThread;
    private boolean newRecognition = false;

    private NuanceCloudAudioSource nuanceAudioSource = null;

    private boolean useSpeex = false;

    private AudioFormat audioFormat;

    private int nBestLength = 1;

    private ResultPolicy resultPolicy = null;

    private boolean active = true;

    private boolean running;

    private NuanceCloudLicense license;

    //private ByteArrayOutputStream out;
    //private AudioFormat audioFormat;

    /*
     * This function will initialize httpclient, set some basic HTTP parameters (version, UTF),
     *   and setup SSL settings for communication between the httpclient and our Nuance servers
     */

    public NuanceCloudRecognizerListener(NuanceCloudLicense license) {
        try {
            if (license == null)
                this.license = NuanceCloudLicense.read();
            else
                this.license = license;
            httpclient = getHttpClient();
        } catch (KeyManagementException e) {
            e.printStackTrace();
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public NuanceCloudRecognizerListener() {
        this(null);
    }

    @SuppressWarnings("deprecation")
    private static HttpClient getHttpClient() throws NoSuchAlgorithmException, KeyManagementException {
        // Standard HTTP parameters
        HttpParams params = new BasicHttpParams();
        HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
        HttpProtocolParams.setContentCharset(params, "UTF-8");
        HttpProtocolParams.setUseExpectContinue(params, false);
        // Initialize the HTTP client
        HttpClient httpclient = new DefaultHttpClient(params);

        // Initialize/setup SSL
        TrustManager easyTrustManager = new X509TrustManager() {
            @Override
            public void checkClientTrusted(java.security.cert.X509Certificate[] arg0, String arg1)
                    throws java.security.cert.CertificateException {
            }

            @Override
            public void checkServerTrusted(java.security.cert.X509Certificate[] arg0, String arg1)
                    throws java.security.cert.CertificateException {
            }

            @Override
            public java.security.cert.X509Certificate[] getAcceptedIssuers() {
                return null;
            }
        };

        SSLContext sslcontext = SSLContext.getInstance("TLS");
        sslcontext.init(null, new TrustManager[] { easyTrustManager }, null);
        SSLSocketFactory sf = new SSLSocketFactory(sslcontext);
        sf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
        Scheme sch = new Scheme("https", sf, 443);
        httpclient.getConnectionManager().getSchemeRegistry().register(sch);

        // Return the initialized instance of our httpclient
        return httpclient;
    }

    public void setResultPolicy(ResultPolicy policy) {
        this.resultPolicy = policy;
    }

    @Override
    public void recognitionResult(RecResult result) {
        if (!running)
            return;

        if (postThread == null)
            return;

        /*
        AudioInputStream ai = new AudioInputStream(new ByteArrayInputStream(out.toByteArray()), audioFormat, out.toByteArray().length);
        try {
           AudioSystem.write(ai, AudioFileFormat.Type.WAVE, new File("c:/" + System.currentTimeMillis() + ".wav"));
        } catch (IOException e1) {
           e1.printStackTrace();
        }
        */

        speechQueue.endWrite();

        //long t = System.currentTimeMillis();

        if (result.isFailed() || (resultPolicy != null && resultPolicy.isReady(result))) {
            postThread.abort();
            postThread = null;
            return;
        }

        try {
            postThread.join(REQUEST_TIMEOUT);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        HttpResponse response = postThread.getResponse();
        //System.out.println(System.currentTimeMillis() - t);

        if (response == null) {
            postThread.abort();
            postThread = null;
            System.out.println("No response from Nuance");
            return;
        }

        postThread = null;

        if (result.type == RecResult.MAXSPEECH)
            result.type = RecResult.FINAL;

        HttpEntity resEntity = response.getEntity();

        //System.out.println(response.getStatusLine());
        if (resEntity != null) {
            //System.out.println("Response content length: " + resEntity.getContentLength());
            //System.out.println("Chunked?: " + resEntity.isChunked());
            //System.out.println("Nuance Session Id: " + response.getFirstHeader("x-nuance-sessionid").getValue());

            if (cookie == null) {
                Header cookieHeader = response.getFirstHeader("Set-Cookie");
                cookie = cookieHeader.getValue();
                StringTokenizer st = new StringTokenizer(cookie, ";");
                cookie = st.nextToken().trim();
                //System.out.println("Cookie: " + cookie);
            }
            if (response.getStatusLine().getStatusCode() == 200) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(resEntity.getContent(), "UTF-8"));
                    String sentence;
                    result.nbest = new ArrayList<RecHyp>();
                    while ((sentence = reader.readLine()) != null) {
                        result.nbest.add(new RecHyp(sentence));
                    }
                    //System.out.println("Replacing " + result.text + " with " + result.nbest.get(0).text);
                    result.text = result.nbest.get(0).text;
                    if (nBestLength > 1) {
                        while (result.nbest.size() > nBestLength) {
                            result.nbest.remove(result.nbest.size() - 1);
                        }
                    } else {
                        result.nbest = null;
                    }

                    result.conf = 1.0f;
                    result.sem = null;
                    //result.grammar = "NuanceCloud";

                    EntityUtils.consume(resEntity);
                } catch (Exception ex) {
                    ex.printStackTrace();
                }
                if (reader != null) {
                    try {
                        reader.close();
                    } catch (Exception ex) {
                        ex.printStackTrace();
                    }
                }
            } else {
                System.out.println("Nuance returned code " + response.getStatusLine().getStatusCode());
                try {
                    EntityUtils.consume(resEntity);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

        //System.out.println("DONE");
    }

    @Override
    public void speechSamples(byte[] samples, int pos, int len) {
        if (!running)
            return;

        if (newRecognition) {
            newRecognition = false;
            initRequest();
        }
        speechQueue.write(samples, pos, len);
    }

    private void initRequest() {
        try {
            speechQueue.reset();

            InputStream inputStream;
            String codec;

            if (!(audioFormat.getSampleRate() == 8000 || audioFormat.getSampleRate() == 16000)) {
                System.err.println("Error: NuanceCloudRecognizer must get 8 or 16 Khz audio, not "
                        + audioFormat.getSampleRate());
            }

            if (useSpeex) {
                encodedQueue.reset();
                JSpeexEnc encoder = new JSpeexEnc((int) audioFormat.getSampleRate());
                encoder.startEncoding(speechQueue, encodedQueue);
                inputStream = encodedQueue.getInputStream();
                codec = "audio/x-speex;rate=" + (int) audioFormat.getSampleRate();
            } else {
                inputStream = speechQueue.getInputStream();
                codec = "audio/x-wav;codec=pcm;bit=16;rate=" + (int) audioFormat.getSampleRate();
            }

            List<NameValuePair> qparams = new ArrayList<NameValuePair>();
            qparams.add(new BasicNameValuePair("appId", license.getAppId()));
            qparams.add(new BasicNameValuePair("appKey", license.getAppKey()));
            qparams.add(new BasicNameValuePair("id", DEVICE_ID));
            URI uri = URIUtils.createURI("https", HOSTNAME, 443, SERVLET, URLEncodedUtils.format(qparams, "UTF-8"),
                    null);
            final HttpPost httppost = new HttpPost(uri);
            httppost.addHeader("Content-Type", codec);
            httppost.addHeader("Content-Language", language);
            httppost.addHeader("Accept-Language", language);
            httppost.addHeader("Accept", RESULTS_FORMAT);
            httppost.addHeader("Accept-Topic", LM);
            if (nuanceAudioSource != null) {
                httppost.addHeader("X-Dictation-AudioSource", nuanceAudioSource.name());
            }
            if (cookie != null)
                httppost.addHeader("Cookie", cookie);

            InputStreamEntity reqEntity = new InputStreamEntity(inputStream, -1);
            reqEntity.setContentType(codec);

            httppost.setEntity(reqEntity);

            postThread = new PostThread(httppost);

        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }

    private class PostThread extends Thread {

        private HttpResponse response;
        private HttpPost httppost;

        public PostThread(HttpPost httppost) {
            this.httppost = httppost;
            start();
        }

        @Override
        public void run() {
            try {
                response = httpclient.execute(httppost);
            } catch (Exception e) {
                //e.printStackTrace();
            }
        };

        public HttpResponse getResponse() {
            return response;
        }

        public void abort() {
            //long t = System.currentTimeMillis();
            httppost.abort();
            try {
                join();
                //System.out.println(System.currentTimeMillis() - t);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

    }

    @Override
    public void initRecognition(AudioFormat audioFormat) {

        this.running = false;

        if (!active)
            return;

        this.running = true;

        this.audioFormat = audioFormat;

        if (postThread != null) {
            System.err.println("ERROR: NuanceCloudRecognizer did not finalize last request");
            recognitionResult(new RecResult(RecResult.FINAL));
        }

        postThread = null;

        newRecognition = true;
    }

    /*
    public RecResult recognize(AudioSource audioSource) {
       initRecognition(audioSource.getAudioFormat());
       byte[] samples = new byte[3200];
       int len;
       do {
     len = audioSource.read(samples, 0, samples.length);
     if (len > -1)
        speechSamples(samples, 0, len);
       } while (len == 3200);
       RecResult result = new RecResult(RecResult.FINAL);
       recognitionResult(result);
       return result;
    }
        
    public RecResult recognizeFile(File file) {
       try {
     return recognize(new FileAudioSource(file));
       } catch (UnsupportedAudioFileException e) {
     e.printStackTrace();
       } catch (IOException e) {
     e.printStackTrace();
       }
       return null;
    }
    */

    public void setLanguage(Language lang) {
        this.language = lang.getCode().replaceAll("-", "_");
    }

    @Override
    public void startOfSpeech(float timestamp) {
    }

    @Override
    public void endOfSpeech(float timestamp) {
    }

    public void setNBestLength(int length) {
        nBestLength = length;
    }

    public void setActive(boolean active) {
        this.active = active;
    }

}