Line data Source code
1 : /*
2 : * Copyright (C) 2004-2026 Savoir-faire Linux Inc.
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 : */
17 : #pragma once
18 :
19 : #include "noncopyable.h"
20 : #include "media/audio/audio_frame_resizer.h"
21 : #include "media/audio/resampler.h"
22 : #include "media/audio/audio_format.h"
23 : #include "media/libav_deps.h"
24 : #include "logger.h"
25 :
26 : #include <atomic>
27 : #include <memory>
28 :
29 : namespace jami {
30 :
31 : class AudioProcessor
32 : {
33 : private:
34 : NON_COPYABLE(AudioProcessor);
35 :
36 : public:
37 0 : AudioProcessor(AudioFormat format, unsigned frameSize)
38 0 : : playbackQueue_(format, (int) frameSize)
39 0 : , recordQueue_(format, (int) frameSize)
40 0 : , inputResampler_(new Resampler)
41 0 : , outputResampler_(new Resampler)
42 0 : , format_(format)
43 0 : , frameSize_(frameSize)
44 0 : , frameDurationMs_((unsigned int) (frameSize_ * (1.0 / format_.sample_rate) * 1000))
45 0 : {}
46 0 : virtual ~AudioProcessor() = default;
47 :
48 0 : virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf)
49 : {
50 0 : recordStarted_ = true;
51 0 : if (!playbackStarted_)
52 0 : return;
53 0 : enqueue(recordQueue_, std::move(buf));
54 : };
55 0 : virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf)
56 : {
57 0 : playbackStarted_ = true;
58 0 : if (!recordStarted_)
59 0 : return;
60 0 : auto copy = buf;
61 0 : enqueue(playbackQueue_, std::move(copy));
62 0 : };
63 :
64 : /**
65 : * @brief Process and return a single AudioFrame
66 : */
67 : virtual std::shared_ptr<AudioFrame> getProcessed() = 0;
68 :
69 : /**
70 : * @brief Set the status of echo cancellation
71 : */
72 : virtual void enableEchoCancel(bool enabled) = 0;
73 :
74 : /**
75 : * @brief Set the status of noise suppression
76 : * includes de-reverb, de-noise, high pass filter, etc
77 : */
78 : virtual void enableNoiseSuppression(bool enabled) = 0;
79 :
80 : /**
81 : * @brief Set the status of automatic gain control
82 : */
83 : virtual void enableAutomaticGainControl(bool enabled) = 0;
84 :
85 : /**
86 : * @brief Set the status of voice activity detection
87 : */
88 : virtual void enableVoiceActivityDetection(bool enabled) = 0;
89 :
90 : protected:
91 : AudioFrameResizer playbackQueue_;
92 : AudioFrameResizer recordQueue_;
93 : std::unique_ptr<Resampler> inputResampler_;
94 : std::unique_ptr<Resampler> outputResampler_;
95 : std::atomic_bool playbackStarted_;
96 : std::atomic_bool recordStarted_;
97 : AudioFormat format_;
98 : unsigned int frameSize_;
99 : unsigned int frameDurationMs_;
100 :
101 : // artificially extend voice activity by this long
102 : unsigned int forceMinimumVoiceActivityMs {1000};
103 :
104 : // current number of frames to force the voice activity to be true
105 : unsigned int forceVoiceActiveFramesLeft {0};
106 :
107 : // voice activity must be active for this long _before_ it is considered legitimate
108 : unsigned int minimumConsequtiveDurationMs {200};
109 :
110 : // current number of frames that the voice activity has been true
111 : unsigned int consecutiveActiveFrames {0};
112 :
113 : /**
114 : * @brief Helper method for audio processors, should be called at start of getProcessed()
115 : * Pops frames from audio queues if there's overflow
116 : * @returns True if there is underflow, false otherwise. An AudioProcessor should
117 : * return a blank AudioFrame if there is underflow.
118 : */
119 0 : bool tidyQueues()
120 : {
121 : // We should track the overflows and log them infrequently (every 10 frames)
122 : static int recordOverflowCount = 0;
123 : static int playbackOverflowCount = 0;
124 0 : const int overflowCountMax = 10;
125 :
126 0 : auto recordFrameSize = recordQueue_.frameSize();
127 0 : auto playbackFrameSize = playbackQueue_.frameSize();
128 0 : while (recordQueue_.samples() > recordFrameSize * 10
129 0 : && 2 * playbackQueue_.samples() * recordFrameSize < recordQueue_.samples() * playbackFrameSize) {
130 0 : recordOverflowCount++;
131 0 : recordQueue_.dequeue();
132 : }
133 0 : while (playbackQueue_.samples() > playbackFrameSize * 10
134 0 : && 2 * recordQueue_.samples() * playbackFrameSize < playbackQueue_.samples() * recordFrameSize) {
135 0 : playbackOverflowCount++;
136 0 : playbackQueue_.dequeue();
137 : }
138 :
139 0 : if (recordOverflowCount > overflowCountMax) {
140 0 : JAMI_LOG("record overflow {:d} / {:d} - playback: {:d}",
141 : recordQueue_.samples(),
142 : frameSize_,
143 : playbackQueue_.samples());
144 0 : recordOverflowCount = 0;
145 : }
146 :
147 0 : if (playbackOverflowCount > overflowCountMax) {
148 0 : JAMI_LOG("playback overflow {:d} / {:d} - record: {:d}",
149 : playbackQueue_.samples(),
150 : frameSize_,
151 : recordQueue_.samples());
152 0 : playbackOverflowCount = 0;
153 : }
154 :
155 0 : if (recordQueue_.samples() < recordFrameSize || playbackQueue_.samples() < playbackFrameSize) {
156 : // If there are not enough samples in either queue, we are unable to
157 : // process anything.
158 0 : return true;
159 : }
160 0 : return false;
161 : }
162 :
163 : /**
164 : * @brief Stablilizes voice activity
165 : * @param voiceStatus the voice status that was detected by the audio processor
166 : * for the current frame
167 : * @returns The voice activity status that should be set on the current frame
168 : */
169 0 : bool getStabilizedVoiceActivity(bool voiceStatus)
170 : {
171 0 : bool newVoice = false;
172 :
173 0 : if (voiceStatus) {
174 : // we detected activity
175 0 : consecutiveActiveFrames += 1;
176 :
177 : // make sure that we have been active for necessary time
178 0 : if (consecutiveActiveFrames > minimumConsequtiveDurationMs / frameDurationMs_) {
179 0 : newVoice = true;
180 :
181 : // set number of frames that will be forced positive
182 0 : forceVoiceActiveFramesLeft = (int) forceMinimumVoiceActivityMs / frameDurationMs_;
183 : }
184 0 : } else if (forceVoiceActiveFramesLeft > 0) {
185 : // if we didn't detect voice, but we haven't elapsed the minimum duration,
186 : // force voice to be true
187 0 : newVoice = true;
188 0 : forceVoiceActiveFramesLeft -= 1;
189 :
190 0 : consecutiveActiveFrames += 1;
191 : } else {
192 : // else no voice and no need to force
193 0 : newVoice = false;
194 0 : consecutiveActiveFrames = 0;
195 : }
196 :
197 0 : return newVoice;
198 : }
199 :
200 : private:
201 0 : void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf)
202 : {
203 0 : if (buf->getFormat() != format_) {
204 0 : auto resampled = &frameResizer == &recordQueue_ ? inputResampler_->resample(std::move(buf), format_)
205 0 : : outputResampler_->resample(std::move(buf), format_);
206 0 : frameResizer.enqueue(std::move(resampled));
207 0 : } else {
208 0 : frameResizer.enqueue(std::move(buf));
209 : }
210 0 : };
211 : };
212 :
213 : } // namespace jami
|