Line data Source code
1 : /*
2 : * Copyright (C) 2021-2024 Savoir-faire Linux Inc.
3 : *
4 : * This program is free software; you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation; either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program; if not, write to the Free Software
16 : * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 : */
18 :
19 : #include "speex.h"
20 :
21 : #include "audio/audiolayer.h"
22 :
23 : #ifndef _MSC_VER
24 : #include <speex/speex_config_types.h>
25 : #endif
26 : extern "C" {
27 : #include <speex/speex_echo.h>
28 : #include <speex/speex_preprocess.h>
29 : }
30 :
31 : #include <cstdint>
32 : #include <memory>
33 : #include <vector>
34 :
35 : namespace jami {
36 :
37 : inline AudioFormat
38 : audioFormatToSampleFormat(AudioFormat format)
39 : {
40 : return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_S16};
41 : }
42 :
43 0 : SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
44 : : AudioProcessor(format.withSampleFormat(AV_SAMPLE_FMT_S16), frameSize)
45 0 : , echoState(speex_echo_state_init_mc((int) frameSize,
46 0 : (int) frameSize * 16,
47 0 : (int) format_.nb_channels,
48 0 : (int) format_.nb_channels),
49 0 : &speex_echo_state_destroy)
50 0 : , procBuffer(std::make_unique<AudioFrame>(format.withSampleFormat(AV_SAMPLE_FMT_S16P), frameSize_))
51 : {
52 0 : JAMI_DBG("[speex-dsp] SpeexAudioProcessor, frame size = %d (=%d ms), channels = %d",
53 : frameSize,
54 : frameDurationMs_,
55 : format_.nb_channels);
56 : // set up speex echo state
57 0 : speex_echo_ctl(echoState.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
58 :
59 : // speex specific value to turn feature on (need to pass a pointer to it)
60 0 : spx_int32_t speexOn = 1;
61 :
62 : // probability integers, i.e. 50 means 50%
63 : // vad will be true if speex's raw probability calculation is higher than this in any case
64 0 : spx_int32_t probStart = 99;
65 :
66 : // vad will be true if voice was active last frame
67 : // AND speex's raw probability calculation is higher than this
68 0 : spx_int32_t probContinue = 90;
69 :
70 : // maximum noise suppression in dB (negative)
71 0 : spx_int32_t maxNoiseSuppress = -50;
72 :
73 : // set up speex preprocess states, one for each channel
74 : // note that they are not enabled here, but rather in the enable* functions
75 0 : for (unsigned int i = 0; i < format_.nb_channels; i++) {
76 : auto channelPreprocessorState
77 : = SpeexPreprocessStatePtr(speex_preprocess_state_init((int) frameSize,
78 0 : (int) format_.sample_rate),
79 0 : &speex_preprocess_state_destroy);
80 :
81 : // set max noise suppression level
82 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
83 : SPEEX_PREPROCESS_SET_NOISE_SUPPRESS,
84 : &maxNoiseSuppress);
85 :
86 : // set up voice activity values
87 0 : speex_preprocess_ctl(channelPreprocessorState.get(), SPEEX_PREPROCESS_SET_VAD, &speexOn);
88 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
89 : SPEEX_PREPROCESS_SET_PROB_START,
90 : &probStart);
91 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
92 : SPEEX_PREPROCESS_SET_PROB_CONTINUE,
93 : &probContinue);
94 :
95 : // keep track of this channel's preprocessor state
96 0 : preprocessorStates.push_back(std::move(channelPreprocessorState));
97 0 : }
98 :
99 0 : JAMI_INFO("[speex-dsp] Done initializing");
100 0 : }
101 :
102 : void
103 0 : SpeexAudioProcessor::enableEchoCancel(bool enabled)
104 : {
105 0 : JAMI_DBG("[speex-dsp] enableEchoCancel %d", enabled);
106 : // need to set member variable so we know to do it in getProcessed
107 0 : shouldAEC = enabled;
108 :
109 0 : if (enabled) {
110 : // reset the echo canceller
111 0 : speex_echo_state_reset(echoState.get());
112 :
113 0 : for (auto& channelPreprocessorState : preprocessorStates) {
114 : // attach our already-created echo canceller
115 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
116 : SPEEX_PREPROCESS_SET_ECHO_STATE,
117 0 : echoState.get());
118 : }
119 : } else {
120 0 : for (auto& channelPreprocessorState : preprocessorStates) {
121 : // detach echo canceller (set it to NULL)
122 : // don't destroy it though, we will reset it when necessary
123 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
124 : SPEEX_PREPROCESS_SET_ECHO_STATE,
125 : NULL);
126 : }
127 : }
128 0 : }
129 :
130 : void
131 0 : SpeexAudioProcessor::enableNoiseSuppression(bool enabled)
132 : {
133 0 : JAMI_DBG("[speex-dsp] enableNoiseSuppression %d", enabled);
134 0 : spx_int32_t speexSetValue = (spx_int32_t) enabled;
135 :
136 : // for each preprocessor
137 0 : for (auto& channelPreprocessorState : preprocessorStates) {
138 : // set denoise status
139 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
140 : SPEEX_PREPROCESS_SET_DENOISE,
141 : &speexSetValue);
142 : // set de-reverb status
143 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
144 : SPEEX_PREPROCESS_SET_DEREVERB,
145 : &speexSetValue);
146 : }
147 0 : }
148 :
149 : void
150 0 : SpeexAudioProcessor::enableAutomaticGainControl(bool enabled)
151 : {
152 0 : JAMI_DBG("[speex-dsp] enableAutomaticGainControl %d", enabled);
153 0 : spx_int32_t speexSetValue = (spx_int32_t) enabled;
154 :
155 : // for each preprocessor
156 0 : for (auto& channelPreprocessorState : preprocessorStates) {
157 : // set AGC status
158 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
159 : SPEEX_PREPROCESS_SET_AGC,
160 : &speexSetValue);
161 : }
162 0 : }
163 :
164 : void
165 0 : SpeexAudioProcessor::enableVoiceActivityDetection(bool enabled)
166 : {
167 0 : JAMI_DBG("[speex-dsp] enableVoiceActivityDetection %d", enabled);
168 :
169 0 : shouldDetectVoice = enabled;
170 :
171 0 : spx_int32_t speexSetValue = (spx_int32_t) enabled;
172 0 : for (auto& channelPreprocessorState : preprocessorStates) {
173 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
174 : SPEEX_PREPROCESS_SET_VAD,
175 : &speexSetValue);
176 : }
177 0 : }
178 :
179 : std::shared_ptr<AudioFrame>
180 0 : SpeexAudioProcessor::getProcessed()
181 : {
182 0 : if (tidyQueues()) {
183 0 : return {};
184 : }
185 :
186 0 : auto playback = playbackQueue_.dequeue();
187 0 : auto record = recordQueue_.dequeue();
188 :
189 0 : if (!playback || !record) {
190 0 : return {};
191 : }
192 :
193 0 : std::shared_ptr<AudioFrame> processed;
194 0 : if (shouldAEC) {
195 : // we want to echo cancel
196 : // multichannel, output into processed
197 0 : processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
198 0 : speex_echo_cancellation(echoState.get(),
199 0 : (int16_t*) record->pointer()->data[0],
200 0 : (int16_t*) playback->pointer()->data[0],
201 0 : (int16_t*) processed->pointer()->data[0]);
202 : } else {
203 : // don't want to echo cancel, so just use record frame instead
204 0 : processed = record;
205 : }
206 :
207 0 : deinterleaveResampler.resample(processed->pointer(), procBuffer->pointer());
208 :
209 : // overall voice activity
210 0 : bool overallVad = false;
211 : // current channel voice activity
212 : int channelVad;
213 :
214 : // run preprocess on each channel
215 0 : int channel = 0;
216 0 : for (auto& channelPreprocessorState : preprocessorStates) {
217 : // preprocesses in place, returns voice activity boolean
218 0 : channelVad = speex_preprocess_run(channelPreprocessorState.get(), (int16_t*)procBuffer->pointer()->data[channel]);
219 :
220 : // boolean OR
221 0 : overallVad |= channelVad;
222 :
223 0 : channel += 1;
224 : }
225 :
226 0 : interleaveResampler.resample(procBuffer->pointer(), processed->pointer());
227 :
228 : // add stabilized voice activity to the AudioFrame
229 0 : processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad);
230 0 : return processed;
231 0 : }
232 :
233 : } // namespace jami
|