Back to project page api-ai-android-sdk.
The source code is released under:
Apache License
If you think the Android project api-ai-android-sdk listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.
package ai.api.util; //from w w w.j a v a 2 s. c o m /*********************************************************************************************************************** * * API.AI Android SDK - client-side libraries for API.AI * ================================================= * * Copyright (C) 2014 by Speaktoit, Inc. (https://www.speaktoit.com) * https://www.api.ai * *********************************************************************************************************************** * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. * ***********************************************************************************************************************/ import android.util.Log; import java.nio.ByteBuffer; import java.nio.ShortBuffer; public class VoiceActivityDetector { public static final String TAG = VoiceActivityDetector.class.getName(); private final int sampleRate; private SpeechEventsListener eventsListener; private double averageNoiseEnergy = 0.0; private double lastActiveTime = -1.0; /** * last time active frame hit sequance. */ private double lastSequenceTime = 0.0; /** * number of active frame in sequance. */ private int sequenceCounter = 0; /** * current processed time in millis */ private double time = 0.0; private final double sequenceLengthMilis = 100.0; private final int minSpeechSequenceCount = 3; /** * multiplayer for energy noise overcome. */ private final double energyFactor = 1.1; private final double maxSilenceLengthMilis = 0.35 * 1000; private final double minSilenceLengthMilis = 0.08 * 1000; private double silenceLengthMilis = maxSilenceLengthMilis; private boolean speechActive = false; /** * Time in millis to remember nose energy */ private final int startNoiseInterval = 150; private int minAudioBufferSize = 1920; public VoiceActivityDetector(final int sampleRate) { this.sampleRate = sampleRate; } public void processBuffer(final byte[] buffer, final int bytesRead) { final ByteBuffer byteBuffer = ByteBuffer.wrap(buffer, 0, bytesRead); final ShortBuffer shorts = byteBuffer.asShortBuffer(); final boolean active = isFrameActive(shorts); final int frameSize = bytesRead / 2; // 16 bit encoding time = time + (frameSize * 1000) / sampleRate; // because of sampleRate given for seconds if (active) { if (lastActiveTime >= 0 && time - lastActiveTime < sequenceLengthMilis) { sequenceCounter++; if (sequenceCounter >= minSpeechSequenceCount) { if (!speechActive) { onSpeechBegin(); } speechActive = true; //Log.d(TAG, "LAST SPEECH " + time); lastSequenceTime = time; silenceLengthMilis = Math.max(minSilenceLengthMilis, silenceLengthMilis - (maxSilenceLengthMilis - minSilenceLengthMilis) / 4); //Log.d(TAG, "SM:" + silenceLengthMilis); } } else { sequenceCounter = 1; } lastActiveTime = time; } else { if (time - lastSequenceTime > silenceLengthMilis) { if (lastSequenceTime > 0) { //Log.d(TAG, "TERMINATE: " + time); if (speechActive) { speechActive = false; onSpeechEnd(); } } else { //Log.d(TAG, "NOSPEECH: " + time); } } } } private boolean isFrameActive(final ShortBuffer frame) { int lastSign = 0; int czCount = 0; double energy = 0.0; final int frameSize = frame.limit(); for (int i = 0; i < frameSize; i++) { final short amplitudeValue = frame.get(i); energy += amplitudeValue * amplitudeValue / frameSize; final int sign; if (amplitudeValue > 0) { sign = 1; } else { sign = -1; } if (lastSign != 0 && sign != lastSign) { czCount += 1; } lastSign = sign; } onChangeLevel(Math.sqrt(energy / frameSize) / 10 /* normalization value */); boolean result = false; if (time < startNoiseInterval) { averageNoiseEnergy = (averageNoiseEnergy + energy) / 2.0; } else { final int minCZ = (int) (frameSize * (1 / 3.0)); final int maxCZ = (int) (frameSize * (3 / 4.0)); if (czCount >= minCZ && czCount <= maxCZ) { if (energy > averageNoiseEnergy * energyFactor) { result = true; } } } return result; } private void onChangeLevel(final double energy) { if (eventsListener != null) { eventsListener.onAudioLevelChanged(energy); } } public void reset() { time = 0.0; averageNoiseEnergy = 0.0; lastActiveTime = -1.0; lastSequenceTime = 0.0; sequenceCounter = 0; silenceLengthMilis = maxSilenceLengthMilis; speechActive = false; } public void setSpeechListener(final SpeechEventsListener eventsListener) { this.eventsListener = eventsListener; } private void onSpeechEnd() { Log.v(TAG, "onSpeechEnd"); if (eventsListener != null) { eventsListener.onSpeechEnd(); } } private void onSpeechBegin() { Log.v(TAG, "onSpeechBegin"); if (eventsListener != null) { eventsListener.onSpeechBegin(); } } /** * Used for optimization * @param minAudioBufferSize */ public void setMinAudioBufferSize(final int minAudioBufferSize) { this.minAudioBufferSize = minAudioBufferSize; } /** * Used to notify about speech begin/end events */ public interface SpeechEventsListener { void onSpeechBegin(); void onSpeechEnd(); void onAudioLevelChanged(double energy); } }