Line data Source code
1 : /* 2 : * Copyright (C) 2021-2024 Savoir-faire Linux Inc. 3 : * 4 : * This program is free software; you can redistribute it and/or modify 5 : * it under the terms of the GNU General Public License as published by 6 : * the Free Software Foundation; either version 3 of the License, or 7 : * (at your option) any later version. 8 : * 9 : * This program is distributed in the hope that it will be useful, 10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 : * GNU General Public License for more details. 13 : * 14 : * You should have received a copy of the GNU General Public License 15 : * along with this program; if not, write to the Free Software 16 : * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 17 : */ 18 : 19 : #pragma once 20 : 21 : #include "noncopyable.h" 22 : #include "media/audio/audio_frame_resizer.h" 23 : #include "media/audio/resampler.h" 24 : #include "media/audio/audio_format.h" 25 : #include "media/libav_deps.h" 26 : #include "logger.h" 27 : 28 : #include <atomic> 29 : #include <memory> 30 : 31 : namespace jami { 32 : 33 : class AudioProcessor 34 : { 35 : private: 36 : NON_COPYABLE(AudioProcessor); 37 : 38 : public: 39 0 : AudioProcessor(AudioFormat format, unsigned frameSize) 40 0 : : playbackQueue_(format, (int) frameSize) 41 0 : , recordQueue_(format, (int) frameSize) 42 0 : , resampler_(new Resampler) 43 0 : , format_(format) 44 0 : , frameSize_(frameSize) 45 0 : , frameDurationMs_((unsigned int) (frameSize_ * (1.0 / format_.sample_rate) * 1000)) 46 0 : {} 47 0 : virtual ~AudioProcessor() = default; 48 : 49 0 : virtual void putRecorded(std::shared_ptr<AudioFrame>&& buf) 50 : { 51 0 : recordStarted_ = true; 52 0 : if (!playbackStarted_) 53 0 : return; 54 0 : enqueue(recordQueue_, std::move(buf)); 55 : }; 56 0 : virtual void putPlayback(const std::shared_ptr<AudioFrame>& buf) 57 : { 58 0 : playbackStarted_ = true; 59 0 : if (!recordStarted_) 60 0 : return; 61 0 : auto copy = buf; 62 0 : enqueue(playbackQueue_, std::move(copy)); 63 0 : }; 64 : 65 : /** 66 : * @brief Process and return a single AudioFrame 67 : */ 68 : virtual std::shared_ptr<AudioFrame> getProcessed() = 0; 69 : 70 : /** 71 : * @brief Set the status of echo cancellation 72 : */ 73 : virtual void enableEchoCancel(bool enabled) = 0; 74 : 75 : /** 76 : * @brief Set the status of noise suppression 77 : * includes de-reverb, de-noise, high pass filter, etc 78 : */ 79 : virtual void enableNoiseSuppression(bool enabled) = 0; 80 : 81 : /** 82 : * @brief Set the status of automatic gain control 83 : */ 84 : virtual void enableAutomaticGainControl(bool enabled) = 0; 85 : 86 : /** 87 : * @brief Set the status of voice activity detection 88 : */ 89 : virtual void enableVoiceActivityDetection(bool enabled) = 0; 90 : 91 : protected: 92 : AudioFrameResizer playbackQueue_; 93 : AudioFrameResizer recordQueue_; 94 : std::unique_ptr<Resampler> resampler_; 95 : std::atomic_bool playbackStarted_; 96 : std::atomic_bool recordStarted_; 97 : AudioFormat format_; 98 : unsigned int frameSize_; 99 : unsigned int frameDurationMs_; 100 : 101 : // artificially extend voice activity by this long 102 : unsigned int forceMinimumVoiceActivityMs {1000}; 103 : 104 : // current number of frames to force the voice activity to be true 105 : unsigned int forceVoiceActiveFramesLeft {0}; 106 : 107 : // voice activity must be active for this long _before_ it is considered legitimate 108 : unsigned int minimumConsequtiveDurationMs {200}; 109 : 110 : // current number of frames that the voice activity has been true 111 : unsigned int consecutiveActiveFrames {0}; 112 : 113 : /** 114 : * @brief Helper method for audio processors, should be called at start of getProcessed() 115 : * Pops frames from audio queues if there's overflow 116 : * @returns True if there is underflow, false otherwise. An AudioProcessor should 117 : * return a blank AudioFrame if there is underflow. 118 : */ 119 0 : bool tidyQueues() 120 : { 121 0 : auto recordFrameSize = recordQueue_.frameSize(); 122 0 : auto playbackFrameSize = playbackQueue_.frameSize(); 123 0 : while (recordQueue_.samples() > recordFrameSize * 10 124 0 : && 2 * playbackQueue_.samples() * recordFrameSize < recordQueue_.samples() * playbackFrameSize) { 125 0 : JAMI_LOG("record overflow {:d} / {:d} - playback: {:d}", recordQueue_.samples(), frameSize_, playbackQueue_.samples()); 126 0 : recordQueue_.dequeue(); 127 : } 128 0 : while (playbackQueue_.samples() > playbackFrameSize * 10 129 0 : && 2 * recordQueue_.samples() * playbackFrameSize < playbackQueue_.samples() * recordFrameSize) { 130 0 : JAMI_LOG("playback overflow {:d} / {:d} - record: {:d}", playbackQueue_.samples(), frameSize_, recordQueue_.samples()); 131 0 : playbackQueue_.dequeue(); 132 : } 133 0 : if (recordQueue_.samples() < recordFrameSize 134 0 : || playbackQueue_.samples() < playbackFrameSize) { 135 : // If there are not enough samples in either queue, we can't 136 : // process anything. 137 0 : return true; 138 : } 139 0 : return false; 140 : } 141 : 142 : /** 143 : * @brief Stablilizes voice activity 144 : * @param voiceStatus the voice status that was detected by the audio processor 145 : * for the current frame 146 : * @returns The voice activity status that should be set on the current frame 147 : */ 148 0 : bool getStabilizedVoiceActivity(bool voiceStatus) 149 : { 150 0 : bool newVoice = false; 151 : 152 0 : if (voiceStatus) { 153 : // we detected activity 154 0 : consecutiveActiveFrames += 1; 155 : 156 : // make sure that we have been active for necessary time 157 0 : if (consecutiveActiveFrames > minimumConsequtiveDurationMs / frameDurationMs_) { 158 0 : newVoice = true; 159 : 160 : // set number of frames that will be forced positive 161 0 : forceVoiceActiveFramesLeft = (int) forceMinimumVoiceActivityMs / frameDurationMs_; 162 : } 163 0 : } else if (forceVoiceActiveFramesLeft > 0) { 164 : // if we didn't detect voice, but we haven't elapsed the minimum duration, 165 : // force voice to be true 166 0 : newVoice = true; 167 0 : forceVoiceActiveFramesLeft -= 1; 168 : 169 0 : consecutiveActiveFrames += 1; 170 : } else { 171 : // else no voice and no need to force 172 0 : newVoice = false; 173 0 : consecutiveActiveFrames = 0; 174 : } 175 : 176 0 : return newVoice; 177 : } 178 : 179 : private: 180 0 : void enqueue(AudioFrameResizer& frameResizer, std::shared_ptr<AudioFrame>&& buf) 181 : { 182 0 : if (buf->getFormat() != format_) { 183 0 : frameResizer.enqueue(resampler_->resample(std::move(buf), format_)); 184 : } else 185 0 : frameResizer.enqueue(std::move(buf)); 186 0 : }; 187 : }; 188 : 189 : } // namespace jami