Line data Source code
1 : /* 2 : * Copyright (C) 2004-2025 Savoir-faire Linux Inc. 3 : * 4 : * This program is free software: you can redistribute it and/or modify 5 : * it under the terms of the GNU General Public License as published by 6 : * the Free Software Foundation, either version 3 of the License, or 7 : * (at your option) any later version. 8 : * 9 : * This program is distributed in the hope that it will be useful, 10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 : * GNU General Public License for more details. 13 : * 14 : * You should have received a copy of the GNU General Public License 15 : * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 : */ 17 : #pragma once 18 : 19 : #include "noncopyable.h" 20 : #include "media/audio/audio_frame_resizer.h" 21 : #include "media/audio/resampler.h" 22 : #include "media/audio/audio_format.h" 23 : #include "media/libav_deps.h" 24 : #include "logger.h" 25 : 26 : #include <atomic> 27 : #include <memory> 28 : 29 : namespace jami { 30 : 31 : class AudioProcessor 32 : { 33 : private: 34 : NON_COPYABLE(AudioProcessor); 35 : 36 : public: 37 0 : AudioProcessor(AudioFormat format, unsigned frameSize) 38 0 : : playbackQueue_(format, (int) frameSize) 39 0 : , recordQueue_(format, (int) frameSize) 40 0 : , inputResampler_(new Resampler) 41 0 : , outputResampler_(new Resampler) 42 0 : , format_(format) 43 0 : , frameSize_(frameSize) 44 0 : , frameDurationMs_((unsigned int) (frameSize_ * (1.0 / format_.sample_rate) * 1000)) 45 0 : {} 46 0 : virtual ~AudioProcessor() = default; 47 : 48 0 : virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf) 49 : { 50 0 : recordStarted_ = true; 51 0 : if (!playbackStarted_) 52 0 : return; 53 0 : enqueue(recordQueue_, std::move(buf)); 54 : }; 55 0 : virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf) 56 : { 57 0 : playbackStarted_ = true; 58 0 : if (!recordStarted_) 59 0 : return; 60 0 : auto copy = buf; 61 0 : enqueue(playbackQueue_, std::move(copy)); 62 0 : }; 63 : 64 : /** 65 : * @brief Process and return a single AudioFrame 66 : */ 67 : virtual std::shared_ptr<AudioFrame> getProcessed() = 0; 68 : 69 : /** 70 : * @brief Set the status of echo cancellation 71 : */ 72 : virtual void enableEchoCancel(bool enabled) = 0; 73 : 74 : /** 75 : * @brief Set the status of noise suppression 76 : * includes de-reverb, de-noise, high pass filter, etc 77 : */ 78 : virtual void enableNoiseSuppression(bool enabled) = 0; 79 : 80 : /** 81 : * @brief Set the status of automatic gain control 82 : */ 83 : virtual void enableAutomaticGainControl(bool enabled) = 0; 84 : 85 : /** 86 : * @brief Set the status of voice activity detection 87 : */ 88 : virtual void enableVoiceActivityDetection(bool enabled) = 0; 89 : 90 : protected: 91 : AudioFrameResizer playbackQueue_; 92 : AudioFrameResizer recordQueue_; 93 : std::unique_ptr<Resampler> inputResampler_; 94 : std::unique_ptr<Resampler> outputResampler_; 95 : std::atomic_bool playbackStarted_; 96 : std::atomic_bool recordStarted_; 97 : AudioFormat format_; 98 : unsigned int frameSize_; 99 : unsigned int frameDurationMs_; 100 : 101 : // artificially extend voice activity by this long 102 : unsigned int forceMinimumVoiceActivityMs {1000}; 103 : 104 : // current number of frames to force the voice activity to be true 105 : unsigned int forceVoiceActiveFramesLeft {0}; 106 : 107 : // voice activity must be active for this long _before_ it is considered legitimate 108 : unsigned int minimumConsequtiveDurationMs {200}; 109 : 110 : // current number of frames that the voice activity has been true 111 : unsigned int consecutiveActiveFrames {0}; 112 : 113 : /** 114 : * @brief Helper method for audio processors, should be called at start of getProcessed() 115 : * Pops frames from audio queues if there's overflow 116 : * @returns True if there is underflow, false otherwise. An AudioProcessor should 117 : * return a blank AudioFrame if there is underflow. 118 : */ 119 0 : bool tidyQueues() 120 : { 121 : // We should track the overflows and log them infrequently (every 10 frames) 122 : static int recordOverflowCount = 0; 123 : static int playbackOverflowCount = 0; 124 0 : const int overflowCountMax = 10; 125 : 126 0 : auto recordFrameSize = recordQueue_.frameSize(); 127 0 : auto playbackFrameSize = playbackQueue_.frameSize(); 128 0 : while (recordQueue_.samples() > recordFrameSize * 10 129 0 : && 2 * playbackQueue_.samples() * recordFrameSize < recordQueue_.samples() * playbackFrameSize) { 130 0 : recordOverflowCount++; 131 0 : recordQueue_.dequeue(); 132 : } 133 0 : while (playbackQueue_.samples() > playbackFrameSize * 10 134 0 : && 2 * recordQueue_.samples() * playbackFrameSize < playbackQueue_.samples() * recordFrameSize) { 135 0 : playbackOverflowCount++; 136 0 : playbackQueue_.dequeue(); 137 : } 138 : 139 0 : if (recordOverflowCount > overflowCountMax) { 140 0 : JAMI_LOG("record overflow {:d} / {:d} - playback: {:d}", 141 : recordQueue_.samples(), 142 : frameSize_, 143 : playbackQueue_.samples()); 144 0 : recordOverflowCount = 0; 145 : } 146 : 147 0 : if (playbackOverflowCount > overflowCountMax) { 148 0 : JAMI_LOG("playback overflow {:d} / {:d} - record: {:d}", 149 : playbackQueue_.samples(), 150 : frameSize_, 151 : recordQueue_.samples()); 152 0 : playbackOverflowCount = 0; 153 : } 154 : 155 0 : if (recordQueue_.samples() < recordFrameSize || playbackQueue_.samples() < playbackFrameSize) { 156 : // If there are not enough samples in either queue, we are unable to 157 : // process anything. 158 0 : return true; 159 : } 160 0 : return false; 161 : } 162 : 163 : /** 164 : * @brief Stablilizes voice activity 165 : * @param voiceStatus the voice status that was detected by the audio processor 166 : * for the current frame 167 : * @returns The voice activity status that should be set on the current frame 168 : */ 169 0 : bool getStabilizedVoiceActivity(bool voiceStatus) 170 : { 171 0 : bool newVoice = false; 172 : 173 0 : if (voiceStatus) { 174 : // we detected activity 175 0 : consecutiveActiveFrames += 1; 176 : 177 : // make sure that we have been active for necessary time 178 0 : if (consecutiveActiveFrames > minimumConsequtiveDurationMs / frameDurationMs_) { 179 0 : newVoice = true; 180 : 181 : // set number of frames that will be forced positive 182 0 : forceVoiceActiveFramesLeft = (int) forceMinimumVoiceActivityMs / frameDurationMs_; 183 : } 184 0 : } else if (forceVoiceActiveFramesLeft > 0) { 185 : // if we didn't detect voice, but we haven't elapsed the minimum duration, 186 : // force voice to be true 187 0 : newVoice = true; 188 0 : forceVoiceActiveFramesLeft -= 1; 189 : 190 0 : consecutiveActiveFrames += 1; 191 : } else { 192 : // else no voice and no need to force 193 0 : newVoice = false; 194 0 : consecutiveActiveFrames = 0; 195 : } 196 : 197 0 : return newVoice; 198 : } 199 : 200 : private: 201 0 : void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf) 202 : { 203 0 : if (buf->getFormat() != format_) { 204 0 : auto resampled = &frameResizer == &recordQueue_ ? inputResampler_->resample(std::move(buf), format_) 205 0 : : outputResampler_->resample(std::move(buf), format_); 206 0 : frameResizer.enqueue(std::move(resampled)); 207 0 : } else { 208 0 : frameResizer.enqueue(std::move(buf)); 209 : } 210 0 : }; 211 : }; 212 : 213 : } // namespace jami