iristk.speech.nuancecloud.JSpeexNuanceCloudRecognizerListener.java Source code

Java tutorial

Introduction

Here is the source code for iristk.speech.nuancecloud.JSpeexNuanceCloudRecognizerListener.java

Source

/*******************************************************************************
 * Copyright (c) 2014 Gabriel Skantze.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Public License v3.0
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/gpl.html
 * 
 * Contributors:
 *     Gabriel Skantze - initial API and implementation
 ******************************************************************************/
package iristk.speech.nuancecloud;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.StringTokenizer;

import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.sound.sampled.AudioFormat;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpVersion;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIUtils;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.util.EntityUtils;

import iristk.speech.RecHyp;
import iristk.speech.RecResult;
import iristk.speech.RecognizerListener;
import iristk.util.BlockingByteQueue;
import iristk.util.Language;

public class JSpeexNuanceCloudRecognizerListener implements RecognizerListener {

    public static final int REQUEST_TIMEOUT = 5000;

    private String APP_ID;
    private String APP_KEY;

    private String DEVICE_ID = "12345";
    private String LANGUAGE = "en_US";
    private String CODEC = "audio/x-speex;rate=16000";
    private String LM = "Dictation"; // or WebSearch
    private String RESULTS_FORMAT = "text/plain"; //"application/xml";

    private static String HOSTNAME = "dictation.nuancemobility.net"; //"dictation.nuancemobility.net"; //"sandbox.nmdp.nuancemobility.net";
    private static String SERVLET = "NMDPAsrCmdServlet/dictation";

    HttpClient httpclient;

    private static String cookie = null;

    BlockingByteQueue byteQueue = new BlockingByteQueue();
    BlockingByteQueue encodedQueue = new BlockingByteQueue();

    private PostThread postThread;
    private boolean newRecognition = false;

    private NuanceCloudAudioSource nuanceAudioSource = null;

    //private ByteArrayOutputStream out;
    //private AudioFormat audioFormat;

    /*
     * This function will initialize httpclient, set some basic HTTP parameters (version, UTF),
     *   and setup SSL settings for communication between the httpclient and our Nuance servers
     */

    public JSpeexNuanceCloudRecognizerListener(Language lang) {
        try {
            this.LANGUAGE = lang.getCode().replaceAll("-", "_");
            Properties properties = new Properties();
            properties.load(new FileReader(NuanceCloudPackage.PACKAGE.getPath("license.properties")));
            this.APP_ID = properties.getProperty("APP_ID");
            this.APP_KEY = properties.getProperty("APP_KEY");
            httpclient = getHttpClient();
        } catch (KeyManagementException e) {
            e.printStackTrace();
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @SuppressWarnings("deprecation")
    private HttpClient getHttpClient() throws NoSuchAlgorithmException, KeyManagementException {
        // Standard HTTP parameters
        HttpParams params = new BasicHttpParams();
        HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
        HttpProtocolParams.setContentCharset(params, "UTF-8");
        HttpProtocolParams.setUseExpectContinue(params, false);
        // Initialize the HTTP client
        httpclient = new DefaultHttpClient(params);

        // Initialize/setup SSL
        TrustManager easyTrustManager = new X509TrustManager() {
            @Override
            public void checkClientTrusted(java.security.cert.X509Certificate[] arg0, String arg1)
                    throws java.security.cert.CertificateException {
                // TODO Auto-generated method stub
            }

            @Override
            public void checkServerTrusted(java.security.cert.X509Certificate[] arg0, String arg1)
                    throws java.security.cert.CertificateException {
                // TODO Auto-generated method stub
            }

            @Override
            public java.security.cert.X509Certificate[] getAcceptedIssuers() {
                // TODO Auto-generated method stub
                return null;
            }
        };

        SSLContext sslcontext = SSLContext.getInstance("TLS");
        sslcontext.init(null, new TrustManager[] { easyTrustManager }, null);
        SSLSocketFactory sf = new SSLSocketFactory(sslcontext);
        sf.setHostnameVerifier(SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
        Scheme sch = new Scheme("https", sf, 443);
        httpclient.getConnectionManager().getSchemeRegistry().register(sch);

        // Return the initialized instance of our httpclient
        return httpclient;
    }

    @Override
    public void recognitionResult(RecResult result) {
        if (postThread == null)
            return;

        /*
        AudioInputStream ai = new AudioInputStream(new ByteArrayInputStream(out.toByteArray()), audioFormat, out.toByteArray().length);
        try {
           AudioSystem.write(ai, AudioFileFormat.Type.WAVE, new File("c:/" + System.currentTimeMillis() + ".wav"));
        } catch (IOException e1) {
           e1.printStackTrace();
        }
        */

        byteQueue.endWrite();

        try {
            postThread.join(REQUEST_TIMEOUT);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        HttpResponse response = postThread.getResponse();

        if (response == null) {
            postThread.abort();
            postThread = null;
            System.out.println("No response from Nuance");
            return;
        }

        postThread = null;

        if (result.type == RecResult.MAXSPEECH)
            result.type = RecResult.FINAL;

        HttpEntity resEntity = response.getEntity();

        //System.out.println(response.getStatusLine());
        if (resEntity != null) {
            //System.out.println("Response content length: " + resEntity.getContentLength());
            //System.out.println("Chunked?: " + resEntity.isChunked());
            //System.out.println("Nuance Session Id: " + response.getFirstHeader("x-nuance-sessionid").getValue());

            if (cookie == null) {
                Header cookieHeader = response.getFirstHeader("Set-Cookie");
                cookie = cookieHeader.getValue();
                StringTokenizer st = new StringTokenizer(cookie, ";");
                cookie = st.nextToken().trim();
                //System.out.println("Cookie: " + cookie);
            }
            System.out.println(response.getStatusLine().getStatusCode());
            if (response.getStatusLine().getStatusCode() == 200) {
                BufferedReader reader = null;
                try {
                    reader = new BufferedReader(new InputStreamReader(resEntity.getContent(), "UTF-8"));
                    String sentence;
                    result.nbest = new ArrayList<RecHyp>();
                    while ((sentence = reader.readLine()) != null) {
                        result.nbest.add(new RecHyp(sentence));
                    }
                    result.text = result.nbest.get(0).text;
                    //TODO flag whether we want an nbest, now it's turned off
                    result.nbest = null;
                    EntityUtils.consume(resEntity);
                } catch (Exception ex) {
                    ex.printStackTrace();
                }
                if (reader != null) {
                    try {
                        reader.close();
                    } catch (Exception ex) {
                        ex.printStackTrace();
                    }
                }
            } else {
                try {
                    EntityUtils.consume(resEntity);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

        //System.out.println("DONE");
    }

    @Override
    public void speechSamples(byte[] samples, int pos, int len) {
        if (newRecognition) {
            newRecognition = false;
            initRequest();
        }
        //out.write(samples, pos, len);
        byteQueue.write(samples, pos, len);
        //System.out.println("Speech Samples");
    }

    private void initRequest() {
        try {
            byteQueue.reset();
            encodedQueue.reset();
            JSpeexEnc encoder = new JSpeexEnc(16000);
            encoder.startEncoding(byteQueue, encodedQueue);

            List<NameValuePair> qparams = new ArrayList<NameValuePair>();
            qparams.add(new BasicNameValuePair("appId", APP_ID));
            qparams.add(new BasicNameValuePair("appKey", APP_KEY));
            qparams.add(new BasicNameValuePair("id", DEVICE_ID));
            URI uri = URIUtils.createURI("https", HOSTNAME, 443, SERVLET, URLEncodedUtils.format(qparams, "UTF-8"),
                    null);
            final HttpPost httppost = new HttpPost(uri);
            httppost.addHeader("Content-Type", CODEC);
            httppost.addHeader("Content-Language", LANGUAGE);
            httppost.addHeader("Accept-Language", LANGUAGE);
            httppost.addHeader("Accept", RESULTS_FORMAT);
            httppost.addHeader("Accept-Topic", LM);
            if (nuanceAudioSource != null) {
                httppost.addHeader("X-Dictation-AudioSource", nuanceAudioSource.name());
            }
            if (cookie != null)
                httppost.addHeader("Cookie", cookie);

            InputStreamEntity reqEntity = new InputStreamEntity(encodedQueue.getInputStream(), -1);
            reqEntity.setContentType(CODEC);

            httppost.setEntity(reqEntity);

            postThread = new PostThread(httppost);

        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }

    private class PostThread extends Thread {

        private HttpResponse response;
        private HttpPost httppost;

        public PostThread(HttpPost httppost) {
            this.httppost = httppost;
            start();
        }

        @Override
        public void run() {
            try {
                response = httpclient.execute(httppost);
            } catch (Exception e) {
                //e.printStackTrace();
            }
        };

        public HttpResponse getResponse() {
            return response;
        }

        public void abort() {
            httppost.abort();
            try {
                join();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }

    }

    @Override
    public void initRecognition(AudioFormat audioFormat) {

        //this.audioFormat = audioFormat;

        if (postThread != null) {
            System.err.println("ERROR: NuanceCloudRecognizer did not finalize last request");
            recognitionResult(new RecResult(RecResult.FINAL));
        }

        postThread = null;
        /*
        if (audioFormat.getSampleRate() == 16000) {
           this.CODEC ="audio/x-wav;codec=pcm;bit=16;rate=16000";  
        } else if (audioFormat.getSampleRate() == 8000) {
           this.CODEC ="audio/x-wav;codec=pcm;bit=16;rate=8000"; 
        } else {
           throw new IllegalArgumentException("NuanceCloudRecognizer does not support " + audioFormat);
        }
        */
        newRecognition = true;
    }

    /*
    public RecResult recognize(AudioSource audioSource) {
       initRecognition(audioSource.getAudioFormat());
       byte[] samples = new byte[3200];
       int len;
       do {
     len = audioSource.read(samples, 0, samples.length);
     if (len > -1)
        speechSamples(samples, 0, len);
       } while (len == 3200);
       RecResult result = new RecResult(RecResult.FINAL);
       recognitionResult(result);
       return result;
    }
        
    public RecResult recognizeFile(File file) {
       try {
     return recognize(new FileAudioSource(file));
       } catch (UnsupportedAudioFileException e) {
     e.printStackTrace();
       } catch (IOException e) {
     e.printStackTrace();
       }
       return null;
    }
    */

    public void setLanguage(String lang) {
        this.LANGUAGE = lang;
    }

    @Override
    public void startOfSpeech(float timestamp) {
    }

    @Override
    public void endOfSpeech(float timestamp) {
    }

}