Line data Source code
1 : /* 2 : * Copyright (C) 2004-2024 Savoir-faire Linux Inc. 3 : * 4 : * This program is free software: you can redistribute it and/or modify 5 : * it under the terms of the GNU General Public License as published by 6 : * the Free Software Foundation, either version 3 of the License, or 7 : * (at your option) any later version. 8 : * 9 : * This program is distributed in the hope that it will be useful, 10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 : * GNU General Public License for more details. 13 : * 14 : * You should have received a copy of the GNU General Public License 15 : * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 : */ 17 : #pragma once 18 : 19 : #include "noncopyable.h" 20 : #include "media/audio/audio_frame_resizer.h" 21 : #include "media/audio/resampler.h" 22 : #include "media/audio/audio_format.h" 23 : #include "media/libav_deps.h" 24 : #include "logger.h" 25 : 26 : #include <atomic> 27 : #include <memory> 28 : 29 : namespace jami { 30 : 31 : class AudioProcessor 32 : { 33 : private: 34 : NON_COPYABLE(AudioProcessor); 35 : 36 : public: 37 0 : AudioProcessor(AudioFormat format, unsigned frameSize) 38 0 : : playbackQueue_(format, (int) frameSize) 39 0 : , recordQueue_(format, (int) frameSize) 40 0 : , resampler_(new Resampler) 41 0 : , format_(format) 42 0 : , frameSize_(frameSize) 43 0 : , frameDurationMs_((unsigned int) (frameSize_ * (1.0 / format_.sample_rate) * 1000)) 44 0 : {} 45 0 : virtual ~AudioProcessor() = default; 46 : 47 0 : virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf) 48 : { 49 0 : recordStarted_ = true; 50 0 : if (!playbackStarted_) 51 0 : return; 52 0 : enqueue(recordQueue_, std::move(buf)); 53 : }; 54 0 : virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf) 55 : { 56 0 : playbackStarted_ = true; 57 0 : if (!recordStarted_) 58 0 : return; 59 0 : auto copy = buf; 60 0 : enqueue(playbackQueue_, std::move(copy)); 61 0 : }; 62 : 63 : /** 64 : * @brief Process and return a single AudioFrame 65 : */ 66 : virtual std::shared_ptr<AudioFrame> getProcessed() = 0; 67 : 68 : /** 69 : * @brief Set the status of echo cancellation 70 : */ 71 : virtual void enableEchoCancel(bool enabled) = 0; 72 : 73 : /** 74 : * @brief Set the status of noise suppression 75 : * includes de-reverb, de-noise, high pass filter, etc 76 : */ 77 : virtual void enableNoiseSuppression(bool enabled) = 0; 78 : 79 : /** 80 : * @brief Set the status of automatic gain control 81 : */ 82 : virtual void enableAutomaticGainControl(bool enabled) = 0; 83 : 84 : /** 85 : * @brief Set the status of voice activity detection 86 : */ 87 : virtual void enableVoiceActivityDetection(bool enabled) = 0; 88 : 89 : protected: 90 : AudioFrameResizer playbackQueue_; 91 : AudioFrameResizer recordQueue_; 92 : std::unique_ptr<Resampler> resampler_; 93 : std::atomic_bool playbackStarted_; 94 : std::atomic_bool recordStarted_; 95 : AudioFormat format_; 96 : unsigned int frameSize_; 97 : unsigned int frameDurationMs_; 98 : 99 : // artificially extend voice activity by this long 100 : unsigned int forceMinimumVoiceActivityMs {1000}; 101 : 102 : // current number of frames to force the voice activity to be true 103 : unsigned int forceVoiceActiveFramesLeft {0}; 104 : 105 : // voice activity must be active for this long _before_ it is considered legitimate 106 : unsigned int minimumConsequtiveDurationMs {200}; 107 : 108 : // current number of frames that the voice activity has been true 109 : unsigned int consecutiveActiveFrames {0}; 110 : 111 : /** 112 : * @brief Helper method for audio processors, should be called at start of getProcessed() 113 : * Pops frames from audio queues if there's overflow 114 : * @returns True if there is underflow, false otherwise. An AudioProcessor should 115 : * return a blank AudioFrame if there is underflow. 116 : */ 117 0 : bool tidyQueues() 118 : { 119 0 : auto recordFrameSize = recordQueue_.frameSize(); 120 0 : auto playbackFrameSize = playbackQueue_.frameSize(); 121 0 : while (recordQueue_.samples() > recordFrameSize * 10 122 0 : && 2 * playbackQueue_.samples() * recordFrameSize < recordQueue_.samples() * playbackFrameSize) { 123 0 : JAMI_LOG("record overflow {:d} / {:d} - playback: {:d}", recordQueue_.samples(), frameSize_, playbackQueue_.samples()); 124 0 : recordQueue_.dequeue(); 125 : } 126 0 : while (playbackQueue_.samples() > playbackFrameSize * 10 127 0 : && 2 * recordQueue_.samples() * playbackFrameSize < playbackQueue_.samples() * recordFrameSize) { 128 0 : JAMI_LOG("playback overflow {:d} / {:d} - record: {:d}", playbackQueue_.samples(), frameSize_, recordQueue_.samples()); 129 0 : playbackQueue_.dequeue(); 130 : } 131 0 : if (recordQueue_.samples() < recordFrameSize 132 0 : || playbackQueue_.samples() < playbackFrameSize) { 133 : // If there are not enough samples in either queue, we are unable to 134 : // process anything. 135 0 : return true; 136 : } 137 0 : return false; 138 : } 139 : 140 : /** 141 : * @brief Stablilizes voice activity 142 : * @param voiceStatus the voice status that was detected by the audio processor 143 : * for the current frame 144 : * @returns The voice activity status that should be set on the current frame 145 : */ 146 0 : bool getStabilizedVoiceActivity(bool voiceStatus) 147 : { 148 0 : bool newVoice = false; 149 : 150 0 : if (voiceStatus) { 151 : // we detected activity 152 0 : consecutiveActiveFrames += 1; 153 : 154 : // make sure that we have been active for necessary time 155 0 : if (consecutiveActiveFrames > minimumConsequtiveDurationMs / frameDurationMs_) { 156 0 : newVoice = true; 157 : 158 : // set number of frames that will be forced positive 159 0 : forceVoiceActiveFramesLeft = (int) forceMinimumVoiceActivityMs / frameDurationMs_; 160 : } 161 0 : } else if (forceVoiceActiveFramesLeft > 0) { 162 : // if we didn't detect voice, but we haven't elapsed the minimum duration, 163 : // force voice to be true 164 0 : newVoice = true; 165 0 : forceVoiceActiveFramesLeft -= 1; 166 : 167 0 : consecutiveActiveFrames += 1; 168 : } else { 169 : // else no voice and no need to force 170 0 : newVoice = false; 171 0 : consecutiveActiveFrames = 0; 172 : } 173 : 174 0 : return newVoice; 175 : } 176 : 177 : private: 178 0 : void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf) 179 : { 180 0 : if (buf->getFormat() != format_) { 181 0 : frameResizer.enqueue(resampler_->resample(std::move(buf), format_)); 182 : } else 183 0 : frameResizer.enqueue(std::move(buf)); 184 0 : }; 185 : }; 186 : 187 : } // namespace jami