Line data Source code
1 : /*
2 : * Copyright (C) 2004-2024 Savoir-faire Linux Inc.
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "speex.h"
19 :
20 : #include "audio/audiolayer.h"
21 :
22 : #ifndef _MSC_VER
23 : #if __has_include(<speex/speexdsp_config_types.h>)
24 : #include <speex/speexdsp_config_types.h>
25 : #else
26 : #include <speex/speex_config_types.h>
27 : #endif
28 : #endif
29 : extern "C" {
30 : #include <speex/speex_echo.h>
31 : #include <speex/speex_preprocess.h>
32 : }
33 :
34 : #include <cstdint>
35 : #include <memory>
36 : #include <vector>
37 :
38 : namespace jami {
39 :
40 : inline AudioFormat
41 : audioFormatToSampleFormat(AudioFormat format)
42 : {
43 : return {format.sample_rate, format.nb_channels, AV_SAMPLE_FMT_S16};
44 : }
45 :
46 0 : SpeexAudioProcessor::SpeexAudioProcessor(AudioFormat format, unsigned frameSize)
47 : : AudioProcessor(format.withSampleFormat(AV_SAMPLE_FMT_S16), frameSize)
48 0 : , echoState(speex_echo_state_init_mc((int) frameSize,
49 0 : (int) frameSize * 16,
50 0 : (int) format_.nb_channels,
51 0 : (int) format_.nb_channels),
52 0 : &speex_echo_state_destroy)
53 0 : , procBuffer(std::make_unique<AudioFrame>(format.withSampleFormat(AV_SAMPLE_FMT_S16P), frameSize_))
54 : {
55 0 : JAMI_DBG("[speex-dsp] SpeexAudioProcessor, frame size = %d (=%d ms), channels = %d",
56 : frameSize,
57 : frameDurationMs_,
58 : format_.nb_channels);
59 : // set up speex echo state
60 0 : speex_echo_ctl(echoState.get(), SPEEX_ECHO_SET_SAMPLING_RATE, &format_.sample_rate);
61 :
62 : // speex specific value to turn feature on (need to pass a pointer to it)
63 0 : spx_int32_t speexOn = 1;
64 :
65 : // probability integers, i.e. 50 means 50%
66 : // vad will be true if speex's raw probability calculation is higher than this in any case
67 0 : spx_int32_t probStart = 99;
68 :
69 : // vad will be true if voice was active last frame
70 : // AND speex's raw probability calculation is higher than this
71 0 : spx_int32_t probContinue = 90;
72 :
73 : // maximum noise suppression in dB (negative)
74 0 : spx_int32_t maxNoiseSuppress = -50;
75 :
76 : // set up speex preprocess states, one for each channel
77 : // note that they are not enabled here, but rather in the enable* functions
78 0 : for (unsigned int i = 0; i < format_.nb_channels; i++) {
79 : auto channelPreprocessorState
80 : = SpeexPreprocessStatePtr(speex_preprocess_state_init((int) frameSize,
81 0 : (int) format_.sample_rate),
82 0 : &speex_preprocess_state_destroy);
83 :
84 : // set max noise suppression level
85 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
86 : SPEEX_PREPROCESS_SET_NOISE_SUPPRESS,
87 : &maxNoiseSuppress);
88 :
89 : // set up voice activity values
90 0 : speex_preprocess_ctl(channelPreprocessorState.get(), SPEEX_PREPROCESS_SET_VAD, &speexOn);
91 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
92 : SPEEX_PREPROCESS_SET_PROB_START,
93 : &probStart);
94 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
95 : SPEEX_PREPROCESS_SET_PROB_CONTINUE,
96 : &probContinue);
97 :
98 : // keep track of this channel's preprocessor state
99 0 : preprocessorStates.push_back(std::move(channelPreprocessorState));
100 0 : }
101 :
102 0 : JAMI_INFO("[speex-dsp] Done initializing");
103 0 : }
104 :
105 : void
106 0 : SpeexAudioProcessor::enableEchoCancel(bool enabled)
107 : {
108 0 : JAMI_DBG("[speex-dsp] enableEchoCancel %d", enabled);
109 : // need to set member variable so we know to do it in getProcessed
110 0 : shouldAEC = enabled;
111 :
112 0 : if (enabled) {
113 : // reset the echo canceller
114 0 : speex_echo_state_reset(echoState.get());
115 :
116 0 : for (auto& channelPreprocessorState : preprocessorStates) {
117 : // attach our already-created echo canceller
118 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
119 : SPEEX_PREPROCESS_SET_ECHO_STATE,
120 0 : echoState.get());
121 : }
122 : } else {
123 0 : for (auto& channelPreprocessorState : preprocessorStates) {
124 : // detach echo canceller (set it to NULL)
125 : // don't destroy it though, we will reset it when necessary
126 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
127 : SPEEX_PREPROCESS_SET_ECHO_STATE,
128 : NULL);
129 : }
130 : }
131 0 : }
132 :
133 : void
134 0 : SpeexAudioProcessor::enableNoiseSuppression(bool enabled)
135 : {
136 0 : JAMI_DBG("[speex-dsp] enableNoiseSuppression %d", enabled);
137 0 : spx_int32_t speexSetValue = (spx_int32_t) enabled;
138 :
139 : // for each preprocessor
140 0 : for (auto& channelPreprocessorState : preprocessorStates) {
141 : // set denoise status
142 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
143 : SPEEX_PREPROCESS_SET_DENOISE,
144 : &speexSetValue);
145 : // set de-reverb status
146 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
147 : SPEEX_PREPROCESS_SET_DEREVERB,
148 : &speexSetValue);
149 : }
150 0 : }
151 :
152 : void
153 0 : SpeexAudioProcessor::enableAutomaticGainControl(bool enabled)
154 : {
155 0 : JAMI_DBG("[speex-dsp] enableAutomaticGainControl %d", enabled);
156 0 : spx_int32_t speexSetValue = (spx_int32_t) enabled;
157 :
158 : // for each preprocessor
159 0 : for (auto& channelPreprocessorState : preprocessorStates) {
160 : // set AGC status
161 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
162 : SPEEX_PREPROCESS_SET_AGC,
163 : &speexSetValue);
164 : }
165 0 : }
166 :
167 : void
168 0 : SpeexAudioProcessor::enableVoiceActivityDetection(bool enabled)
169 : {
170 0 : JAMI_DBG("[speex-dsp] enableVoiceActivityDetection %d", enabled);
171 :
172 0 : shouldDetectVoice = enabled;
173 :
174 0 : spx_int32_t speexSetValue = (spx_int32_t) enabled;
175 0 : for (auto& channelPreprocessorState : preprocessorStates) {
176 0 : speex_preprocess_ctl(channelPreprocessorState.get(),
177 : SPEEX_PREPROCESS_SET_VAD,
178 : &speexSetValue);
179 : }
180 0 : }
181 :
182 : std::shared_ptr<AudioFrame>
183 0 : SpeexAudioProcessor::getProcessed()
184 : {
185 0 : if (tidyQueues()) {
186 0 : return {};
187 : }
188 :
189 0 : auto playback = playbackQueue_.dequeue();
190 0 : auto record = recordQueue_.dequeue();
191 :
192 0 : if (!playback || !record) {
193 0 : return {};
194 : }
195 :
196 0 : std::shared_ptr<AudioFrame> processed;
197 0 : if (shouldAEC) {
198 : // we want to echo cancel
199 : // multichannel, output into processed
200 0 : processed = std::make_shared<AudioFrame>(record->getFormat(), record->getFrameSize());
201 0 : speex_echo_cancellation(echoState.get(),
202 0 : (int16_t*) record->pointer()->data[0],
203 0 : (int16_t*) playback->pointer()->data[0],
204 0 : (int16_t*) processed->pointer()->data[0]);
205 : } else {
206 : // don't want to echo cancel, so just use record frame instead
207 0 : processed = record;
208 : }
209 :
210 0 : deinterleaveResampler.resample(processed->pointer(), procBuffer->pointer());
211 :
212 : // overall voice activity
213 0 : bool overallVad = false;
214 : // current channel voice activity
215 : int channelVad;
216 :
217 : // run preprocess on each channel
218 0 : int channel = 0;
219 0 : for (auto& channelPreprocessorState : preprocessorStates) {
220 : // preprocesses in place, returns voice activity boolean
221 0 : channelVad = speex_preprocess_run(channelPreprocessorState.get(), (int16_t*)procBuffer->pointer()->data[channel]);
222 :
223 : // boolean OR
224 0 : overallVad |= channelVad;
225 :
226 0 : channel += 1;
227 : }
228 :
229 0 : interleaveResampler.resample(procBuffer->pointer(), processed->pointer());
230 :
231 : // add stabilized voice activity to the AudioFrame
232 0 : processed->has_voice = shouldDetectVoice && getStabilizedVoiceActivity(overallVad);
233 0 : return processed;
234 0 : }
235 :
236 : } // namespace jami
|