com.ibm.watson.developer_cloud.android.text_to_speech.v1.TTSUtility.java Source code

Java tutorial

Introduction

Here is the source code for com.ibm.watson.developer_cloud.android.text_to_speech.v1.TTSUtility.java

Source

/**
 *  Copyright IBM Corporation 2015
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 **/

package com.ibm.watson.developer_cloud.android.text_to_speech.v1;

import android.app.Application;
import android.media.AudioFormat;
import android.media.AudioManager;
import android.media.AudioTrack;
import android.os.Environment;
import android.util.Log;

import com.ibm.watson.developer_cloud.android.speech_to_text.v1.opus.OggOpus;

import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;

import com.ibm.watson.developer_cloud.android.speech_to_text.v1.audio.PcmWaveWriter;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.IOException;
import java.io.InputStream;

import java.io.RandomAccessFile;

import java.util.LinkedList;
import java.util.List;

public class TTSUtility extends Application {
    private static final String TAG = TTSUtility.class.getName();

    public static final String CODEC_WAV = "audio/wav";
    public static final int CODEC_WAV_SAMPLE_RATE = 0;

    public static final String CODEC_OPUS = "audio/opus";
    public static final int CODEC_OPUS_SAMPLE_RATE = 48000;

    private String username;
    private String password;
    private String token;
    private boolean learningOptOut;
    private String voice;
    private String content;
    private String codec;
    private int sampleRate;
    private String server;
    private AudioTrack audioTrack;

    public TTSUtility() {
        this.codec = CODEC_WAV;
        // By default, the sample rate would be detected by the SDK if the value is set to zero
        // However, the metadata is not reliable, need to decode at the maximum sample rate
        this.sampleRate = 48000;
    }

    /**
     * Set codec
     * @param codec
     */
    public void setCodec(String codec) {
        this.codec = codec;
    }

    /**
     * Stop player
     */
    private void stopTtsPlayer() {
        if (audioTrack != null && audioTrack.getState() != AudioTrack.STATE_UNINITIALIZED) {
            // IMPORTANT: NOT use stop()
            // For an immediate stop, use pause(), followed by flush() to discard audio data that hasn't been played back yet.
            audioTrack.pause();
            audioTrack.flush();
        }
    }

    /**
     * Text to speech
     * @param arguments
     */
    public void synthesize(String[] arguments) {
        Log.i(TAG, "Start requesting TTS... (" + this.codec + ")");
        try {
            parseParams(arguments);
        } catch (Exception e) {
            e.printStackTrace();
        }

        if (this.codec == CODEC_WAV) {
            this.sampleRate = CODEC_WAV_SAMPLE_RATE;
        } else {
            this.sampleRate = CODEC_OPUS_SAMPLE_RATE;
        }

        TTSThread thread = new TTSThread();
        thread.setName("TTSThread");
        thread.start();
    }

    private void initPlayer() {
        stopTtsPlayer();
        // IMPORTANT: minimum required buffer size for the successful creation of an AudioTrack instance in streaming mode.
        int bufferSize = AudioTrack.getMinBufferSize(sampleRate, AudioFormat.CHANNEL_OUT_MONO,
                AudioFormat.ENCODING_PCM_16BIT);

        synchronized (this) {
            audioTrack = new AudioTrack(AudioManager.STREAM_MUSIC, sampleRate, AudioFormat.CHANNEL_OUT_MONO,
                    AudioFormat.ENCODING_PCM_16BIT, bufferSize, AudioTrack.MODE_STREAM);
            if (audioTrack != null)
                audioTrack.play();
        }
    }

    private void parseParams(String[] arguments) {
        int i = 0;
        this.server = arguments[i++];
        this.username = arguments[i++];
        this.password = arguments[i++];
        this.voice = arguments[i++];
        this.content = arguments[i++];
        this.token = arguments[i++];
        this.learningOptOut = Boolean.valueOf(arguments[i++]);
    }

    /**
    * Post text data to the server and get returned audio data
    * @param server iTrans server
    * @param username
    * @param password
    * @param content
    * @return {@link HttpResponse}
    * @throws Exception
    */
    public static HttpResponse createPost(String server, String username, String password, String token,
            boolean learningOptOut, String content, String voice, String codec) throws Exception {
        String url = server;

        //HTTP GET Client
        HttpClient httpClient = new DefaultHttpClient();
        //Add params
        List<BasicNameValuePair> params = new LinkedList<BasicNameValuePair>();
        params.add(new BasicNameValuePair("text", content));
        params.add(new BasicNameValuePair("voice", voice));
        params.add(new BasicNameValuePair("accept", codec));
        HttpGet httpGet = new HttpGet(url + "?" + URLEncodedUtils.format(params, "utf-8"));
        // use token based authentication if possible, otherwise Basic Authentication will be used
        if (token != null) {
            Log.d(TAG, "using token based authentication");
            httpGet.setHeader("X-Watson-Authorization-Token", token);
        } else {
            Log.d(TAG, "using basic authentication");
            httpGet.setHeader(
                    BasicScheme.authenticate(new UsernamePasswordCredentials(username, password), "UTF-8", false));
        }

        if (learningOptOut) {
            Log.d(TAG, "setting X-Watson-Learning-OptOut");
            httpGet.setHeader("X-Watson-Learning-Opt-Out", "true");
        }
        HttpResponse executed = httpClient.execute(httpGet);
        return executed;
    }

    /**
     * Get storage path
     * @return
     */
    private String getBaseDir() {
        String baseDir;
        if (Environment.getExternalStorageState().equals(Environment.MEDIA_MOUNTED)) {
            baseDir = Environment.getExternalStorageDirectory().getAbsolutePath() + "/";
        } else {
            baseDir = "/data/data/" + getApplicationContext().getPackageName() + "/";
        }

        return baseDir;
    }

    /**
     * Thread to post text data to iTrans server and play returned audio data 
     * @author chienlk
     *
     */
    public class TTSThread extends Thread {
        @Override
        public void run() {
            android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_URGENT_AUDIO);

            HttpResponse post;
            try {
                post = createPost(server, username, password, token, learningOptOut, content, voice, codec);
                InputStream is = post.getEntity().getContent();

                byte[] data = null;
                if (codec == CODEC_WAV) {
                    data = analyzeWavData(is);
                } else if (codec == CODEC_OPUS) {
                    data = analyzeOpusData(is);
                }
                initPlayer();
                audioTrack.write(data, 0, data.length);
                is.close();

            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                Log.i(TAG, "Stopping audioTrack...");
                if (audioTrack != null && audioTrack.getState() != AudioTrack.STATE_UNINITIALIZED) {
                    audioTrack.release();
                }
            }
        }
    }

    private byte[] analyzeOpusData(InputStream is) {
        String inFilePath = getBaseDir() + "Watson.opus";
        String outFilePath = getBaseDir() + "Watson.pcm";
        File inFile = new File(inFilePath);
        File outFile = new File(outFilePath);
        outFile.deleteOnExit();
        inFile.deleteOnExit();

        try {
            RandomAccessFile inRaf = new RandomAccessFile(inFile, "rw");
            byte[] opus = IOUtils.toByteArray(is);
            inRaf.write(opus);

            sampleRate = OggOpus.decode(inFilePath, outFilePath, sampleRate); // zero means to detect the sample rate by decoder

            RandomAccessFile outRaf = new RandomAccessFile(outFile, "r");

            byte[] data = new byte[(int) outRaf.length()];

            int outLength = outRaf.read(data);

            inRaf.close();
            outRaf.close();
            if (outLength == 0) {
                throw new IOException("Data reading failed");
            }
            return data;
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return new byte[0];
    }

    /**
     * Analyze sample rate and return the PCM data
     * @param i
     * @return
     */
    public byte[] analyzeWavData(InputStream i) {
        try {
            int headSize = 44, metaDataSize = 48;
            byte[] data = IOUtils.toByteArray(i);
            if (data.length < headSize) {
                throw new IOException("Wrong Wav header");
            }

            if (this.sampleRate == 0 && data.length > 28) {
                this.sampleRate = readInt(data, 24); // 24 is the position of sample rate in wav format
            }

            int destPos = headSize + metaDataSize;
            int rawLength = data.length - destPos;

            byte[] d = new byte[rawLength];
            System.arraycopy(data, destPos, d, 0, rawLength);
            return d;
        } catch (IOException e) {
            Log.e(TAG, "Error while formatting header");
        }
        return new byte[0];
    }

    public byte[] stripHeaderAndSaveWav(InputStream i) {
        byte[] d = new byte[0];
        try {
            int headSize = 44;
            int metaDataSize = 48;
            i.skip(headSize + metaDataSize);
            d = IOUtils.toByteArray(i);
        } catch (IOException e) {
            Log.d(TAG, "Error while formatting header");
        }
        return saveWav(d);
    }

    public byte[] saveWav(byte[] d) {
        PcmWaveWriter wR = new PcmWaveWriter(sampleRate, 1);
        return wR.saveWav(d, sampleRate, 1, 16);
    }

    void saveWavFile(byte[] d) {
        String fileName = getBaseDir() + "a.wav";
        try {
            PcmWaveWriter wR = new PcmWaveWriter(sampleRate, 1);
            wR.open(fileName);
            wR.saveWavFile(d, sampleRate, 1, 16);
            wR.close();
            Log.i(TAG, "save file OK");
        } catch (IOException e) {
            Log.d(TAG, "save file FAIL");
            e.printStackTrace();
        }
    }

    /**
     * Converts Little Endian (Windows) bytes to an int (Java uses Big Endian).
     * @param data the data to read.
     * @param offset the offset from which to start reading.
     * @return the integer value of the reassembled bytes.
     */
    protected static int readInt(final byte[] data, final int offset) {
        return (data[offset] & 0xff) | ((data[offset + 1] & 0xff) << 8) | ((data[offset + 2] & 0xff) << 16)
                | (data[offset + 3] << 24); // no 0xff on the last one to keep the sign
    }

    /**
     * Converts Little Endian (Windows) bytes to an short (Java uses Big Endian).
     * @param data the data to read.
     * @param offset the offset from which to start reading.
     * @return the integer value of the reassembled bytes.
     */
    protected static int readShort(final byte[] data, final int offset) {
        return (data[offset] & 0xff) | (data[offset + 1] << 8); // no 0xff on the last one to keep the sign
    }
}