Line data Source code
1 : /* 2 : * Copyright (C) 2004-2025 Savoir-faire Linux Inc. 3 : * 4 : * This program is free software: you can redistribute it and/or modify 5 : * it under the terms of the GNU General Public License as published by 6 : * the Free Software Foundation, either version 3 of the License, or 7 : * (at your option) any later version. 8 : * 9 : * This program is distributed in the hope that it will be useful, 10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 : * GNU General Public License for more details. 13 : * 14 : * You should have received a copy of the GNU General Public License 15 : * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 : */ 17 : #pragma once 18 : 19 : #include "noncopyable.h" 20 : #include "media/audio/audio_frame_resizer.h" 21 : #include "media/audio/resampler.h" 22 : #include "media/audio/audio_format.h" 23 : #include "media/libav_deps.h" 24 : #include "logger.h" 25 : 26 : #include <atomic> 27 : #include <memory> 28 : 29 : namespace jami { 30 : 31 : class AudioProcessor 32 : { 33 : private: 34 : NON_COPYABLE(AudioProcessor); 35 : 36 : public: 37 0 : AudioProcessor(AudioFormat format, unsigned frameSize) 38 0 : : playbackQueue_(format, (int) frameSize) 39 0 : , recordQueue_(format, (int) frameSize) 40 0 : , inputResampler_(new Resampler) 41 0 : , outputResampler_(new Resampler) 42 0 : , format_(format) 43 0 : , frameSize_(frameSize) 44 0 : , frameDurationMs_((unsigned int) (frameSize_ * (1.0 / format_.sample_rate) * 1000)) 45 0 : {} 46 0 : virtual ~AudioProcessor() = default; 47 : 48 0 : virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf) 49 : { 50 0 : recordStarted_ = true; 51 0 : if (!playbackStarted_) 52 0 : return; 53 0 : enqueue(recordQueue_, std::move(buf)); 54 : }; 55 0 : virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf) 56 : { 57 0 : playbackStarted_ = true; 58 0 : if (!recordStarted_) 59 0 : return; 60 0 : auto copy = buf; 61 0 : enqueue(playbackQueue_, std::move(copy)); 62 0 : }; 63 : 64 : /** 65 : * @brief Process and return a single AudioFrame 66 : */ 67 : virtual std::shared_ptr<AudioFrame> getProcessed() = 0; 68 : 69 : /** 70 : * @brief Set the status of echo cancellation 71 : */ 72 : virtual void enableEchoCancel(bool enabled) = 0; 73 : 74 : /** 75 : * @brief Set the status of noise suppression 76 : * includes de-reverb, de-noise, high pass filter, etc 77 : */ 78 : virtual void enableNoiseSuppression(bool enabled) = 0; 79 : 80 : /** 81 : * @brief Set the status of automatic gain control 82 : */ 83 : virtual void enableAutomaticGainControl(bool enabled) = 0; 84 : 85 : /** 86 : * @brief Set the status of voice activity detection 87 : */ 88 : virtual void enableVoiceActivityDetection(bool enabled) = 0; 89 : 90 : protected: 91 : AudioFrameResizer playbackQueue_; 92 : AudioFrameResizer recordQueue_; 93 : std::unique_ptr<Resampler> inputResampler_; 94 : std::unique_ptr<Resampler> outputResampler_; 95 : std::atomic_bool playbackStarted_; 96 : std::atomic_bool recordStarted_; 97 : AudioFormat format_; 98 : unsigned int frameSize_; 99 : unsigned int frameDurationMs_; 100 : 101 : // artificially extend voice activity by this long 102 : unsigned int forceMinimumVoiceActivityMs {1000}; 103 : 104 : // current number of frames to force the voice activity to be true 105 : unsigned int forceVoiceActiveFramesLeft {0}; 106 : 107 : // voice activity must be active for this long _before_ it is considered legitimate 108 : unsigned int minimumConsequtiveDurationMs {200}; 109 : 110 : // current number of frames that the voice activity has been true 111 : unsigned int consecutiveActiveFrames {0}; 112 : 113 : /** 114 : * @brief Helper method for audio processors, should be called at start of getProcessed() 115 : * Pops frames from audio queues if there's overflow 116 : * @returns True if there is underflow, false otherwise. An AudioProcessor should 117 : * return a blank AudioFrame if there is underflow. 118 : */ 119 0 : bool tidyQueues() 120 : { 121 : // We should track the overflows and log them infrequently (every 10 frames) 122 : static int recordOverflowCount = 0; 123 : static int playbackOverflowCount = 0; 124 0 : const int overflowCountMax = 10; 125 : 126 0 : auto recordFrameSize = recordQueue_.frameSize(); 127 0 : auto playbackFrameSize = playbackQueue_.frameSize(); 128 0 : while (recordQueue_.samples() > recordFrameSize * 10 129 0 : && 2 * playbackQueue_.samples() * recordFrameSize 130 0 : < recordQueue_.samples() * playbackFrameSize) { 131 0 : recordOverflowCount++; 132 0 : recordQueue_.dequeue(); 133 : } 134 0 : while (playbackQueue_.samples() > playbackFrameSize * 10 135 0 : && 2 * recordQueue_.samples() * playbackFrameSize 136 0 : < playbackQueue_.samples() * recordFrameSize) { 137 0 : playbackOverflowCount++; 138 0 : playbackQueue_.dequeue(); 139 : } 140 : 141 0 : if (recordOverflowCount > overflowCountMax) { 142 0 : JAMI_LOG("record overflow {:d} / {:d} - playback: {:d}", 143 : recordQueue_.samples(), 144 : frameSize_, 145 : playbackQueue_.samples()); 146 0 : recordOverflowCount = 0; 147 : } 148 : 149 0 : if (playbackOverflowCount > overflowCountMax) { 150 0 : JAMI_LOG("playback overflow {:d} / {:d} - record: {:d}", 151 : playbackQueue_.samples(), 152 : frameSize_, 153 : recordQueue_.samples()); 154 0 : playbackOverflowCount = 0; 155 : } 156 : 157 0 : if (recordQueue_.samples() < recordFrameSize 158 0 : || playbackQueue_.samples() < playbackFrameSize) { 159 : // If there are not enough samples in either queue, we are unable to 160 : // process anything. 161 0 : return true; 162 : } 163 0 : return false; 164 : } 165 : 166 : /** 167 : * @brief Stablilizes voice activity 168 : * @param voiceStatus the voice status that was detected by the audio processor 169 : * for the current frame 170 : * @returns The voice activity status that should be set on the current frame 171 : */ 172 0 : bool getStabilizedVoiceActivity(bool voiceStatus) 173 : { 174 0 : bool newVoice = false; 175 : 176 0 : if (voiceStatus) { 177 : // we detected activity 178 0 : consecutiveActiveFrames += 1; 179 : 180 : // make sure that we have been active for necessary time 181 0 : if (consecutiveActiveFrames > minimumConsequtiveDurationMs / frameDurationMs_) { 182 0 : newVoice = true; 183 : 184 : // set number of frames that will be forced positive 185 0 : forceVoiceActiveFramesLeft = (int) forceMinimumVoiceActivityMs / frameDurationMs_; 186 : } 187 0 : } else if (forceVoiceActiveFramesLeft > 0) { 188 : // if we didn't detect voice, but we haven't elapsed the minimum duration, 189 : // force voice to be true 190 0 : newVoice = true; 191 0 : forceVoiceActiveFramesLeft -= 1; 192 : 193 0 : consecutiveActiveFrames += 1; 194 : } else { 195 : // else no voice and no need to force 196 0 : newVoice = false; 197 0 : consecutiveActiveFrames = 0; 198 : } 199 : 200 0 : return newVoice; 201 : } 202 : 203 : private: 204 0 : void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf) 205 : { 206 0 : if (buf->getFormat() != format_) { 207 0 : auto resampled = &frameResizer == &recordQueue_ 208 0 : ? inputResampler_->resample(std::move(buf), format_) 209 0 : : outputResampler_->resample(std::move(buf), format_); 210 0 : frameResizer.enqueue(std::move(resampled)); 211 0 : } else { 212 0 : frameResizer.enqueue(std::move(buf)); 213 : } 214 0 : }; 215 : }; 216 : 217 : } // namespace jami