LCOV - code coverage report
Current view: top level - foo/src/media/audio - audio_input.cpp (source / functions) Hit Total Coverage
Test: jami-coverage-filtered.info Lines: 136 273 49.8 %
Date: 2026-02-28 10:41:24 Functions: 24 45 53.3 %

          Line data    Source code
       1             : /*
       2             :  *  Copyright (C) 2004-2026 Savoir-faire Linux Inc.
       3             :  *
       4             :  *  This program is free software: you can redistribute it and/or modify
       5             :  *  it under the terms of the GNU General Public License as published by
       6             :  *  the Free Software Foundation, either version 3 of the License, or
       7             :  *  (at your option) any later version.
       8             :  *
       9             :  *  This program is distributed in the hope that it will be useful,
      10             :  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
      12             :  *  GNU General Public License for more details.
      13             :  *
      14             :  *  You should have received a copy of the GNU General Public License
      15             :  *  along with this program. If not, see <https://www.gnu.org/licenses/>.
      16             :  */
      17             : 
      18             : #include "audio_frame_resizer.h"
      19             : #include "audio_input.h"
      20             : #include "jami/media_const.h"
      21             : #include "manager.h"
      22             : #include "media_decoder.h"
      23             : #include "resampler.h"
      24             : #include "logger.h"
      25             : #include "ringbufferpool.h"
      26             : #include "tracepoint.h"
      27             : #include "video/video_device.h"
      28             : 
      29             : #include <future>
      30             : #include <memory>
      31             : 
      32             : namespace jami {
      33             : 
      34             : static constexpr auto MS_PER_PACKET = std::chrono::milliseconds(20);
      35             : 
      36         236 : AudioInput::AudioInput(const std::string& id)
      37         236 :     : id_(id)
      38         236 :     , format_(Manager::instance().getRingBufferPool().getInternalAudioFormat())
      39         236 :     , frameSize_(static_cast<int>(format_.sample_rate * MS_PER_PACKET.count()) / 1000)
      40         236 :     , resampler_(new Resampler)
      41         472 :     , resizer_(new AudioFrameResizer(format_,
      42             :                                      frameSize_,
      43         236 :                                      [this](std::shared_ptr<AudioFrame>&& f) { frameResized(std::move(f)); }))
      44         236 :     , deviceGuard_()
      45       42947 :     , loop_([] { return true; }, [this] { process(); }, [] {})
      46             : {
      47         944 :     JAMI_DEBUG("Creating audio input with id: {}", id_);
      48         236 :     ringBuf_ = Manager::instance().getRingBufferPool().createRingBuffer(id_);
      49         236 : }
      50             : 
      51           0 : AudioInput::AudioInput(const std::string& id, const std::string& resource)
      52           0 :     : AudioInput(id)
      53             : {
      54           0 :     switchInput(resource);
      55           0 : }
      56             : 
      57         236 : AudioInput::~AudioInput()
      58             : {
      59         236 :     if (playingFile_) {
      60           5 :         Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
      61           5 :         Manager::instance().getRingBufferPool().unBindHalfDuplexOut(id_, id_);
      62             :     }
      63         236 :     ringBuf_.reset();
      64         236 :     loop_.join();
      65             : 
      66         236 :     Manager::instance().getRingBufferPool().flush(id_);
      67         236 : }
      68             : 
      69             : void
      70       41593 : AudioInput::process()
      71             : {
      72       41593 :     readFromDevice();
      73       41593 : }
      74             : 
      75             : void
      76          18 : AudioInput::updateStartTime(int64_t start)
      77             : {
      78          18 :     if (decoder_) {
      79          18 :         decoder_->updateStartTime(start);
      80             :     }
      81          18 : }
      82             : 
      83             : void
      84           0 : AudioInput::frameResized(std::shared_ptr<AudioFrame>&& ptr)
      85             : {
      86           0 :     std::shared_ptr<AudioFrame> frame = std::move(ptr);
      87           0 :     frame->pointer()->pts = static_cast<int64_t>(sent_samples);
      88           0 :     sent_samples += frame->pointer()->nb_samples;
      89             : 
      90           0 :     notify(std::static_pointer_cast<MediaFrame>(std::move(frame)));
      91           0 : }
      92             : 
      93             : void
      94           9 : AudioInput::setSeekTime(int64_t time)
      95             : {
      96           9 :     if (decoder_) {
      97           9 :         decoder_->setSeekTime(time);
      98             :     }
      99           9 : }
     100             : 
     101             : void
     102       41593 : AudioInput::readFromDevice()
     103             : {
     104             :     {
     105       41593 :         std::lock_guard lk(resourceMutex_);
     106       41593 :         if (decodingFile_)
     107           0 :             while (ringBuf_ && ringBuf_->isEmpty())
     108           0 :                 readFromFile();
     109       41593 :         if (playingFile_) {
     110          15 :             while (ringBuf_ && ringBuf_->getLength(id_) == 0)
     111          10 :                 readFromQueue();
     112             :         }
     113       41593 :     }
     114             : 
     115       41593 :     auto& bufferPool = Manager::instance().getRingBufferPool();
     116       41593 :     if (not bufferPool.waitForDataAvailable(id_, wakeUp_))
     117       41593 :         std::this_thread::sleep_until(wakeUp_);
     118       41593 :     wakeUp_ += MS_PER_PACKET;
     119             : 
     120       41593 :     auto audioFrame = bufferPool.getData(id_);
     121       41593 :     if (not audioFrame)
     122       41593 :         return;
     123             : 
     124           0 :     if (muteState_) {
     125           0 :         libav_utils::fillWithSilence(audioFrame->pointer());
     126           0 :         audioFrame->has_voice = false; // force no voice activity when muted
     127             :     }
     128             : 
     129           0 :     std::lock_guard lk(fmtMutex_);
     130           0 :     if (bufferPool.getInternalAudioFormat() != format_)
     131           0 :         audioFrame = resampler_->resample(std::move(audioFrame), format_);
     132           0 :     resizer_->enqueue(std::move(audioFrame));
     133             : 
     134           0 :     if (recorderCallback_ && settingMS_.exchange(false)) {
     135           0 :         recorderCallback_(MediaStream("a:local", format_, static_cast<int64_t>(sent_samples)));
     136             :     }
     137             : 
     138             :     jami_tracepoint(audio_input_read_from_device_end, id_.c_str());
     139       41593 : }
     140             : 
     141             : void
     142          10 : AudioInput::readFromQueue()
     143             : {
     144          10 :     if (!decoder_)
     145           0 :         return;
     146          10 :     if (paused_ || !decoder_->emitFrame(true)) {
     147          10 :         std::this_thread::sleep_for(MS_PER_PACKET);
     148             :     }
     149             : }
     150             : 
     151             : void
     152           0 : AudioInput::readFromFile()
     153             : {
     154           0 :     if (!decoder_)
     155           0 :         return;
     156           0 :     const auto ret = decoder_->decode();
     157           0 :     switch (ret) {
     158           0 :     case MediaDemuxer::Status::Success:
     159           0 :         break;
     160           0 :     case MediaDemuxer::Status::EndOfFile:
     161           0 :         createDecoder();
     162           0 :         break;
     163           0 :     case MediaDemuxer::Status::ReadError:
     164           0 :         JAMI_ERR() << "Failed to decode frame";
     165           0 :         break;
     166           0 :     case MediaDemuxer::Status::ReadBufferOverflow:
     167           0 :         JAMI_ERR() << "Read buffer overflow detected";
     168           0 :         break;
     169           0 :     case MediaDemuxer::Status::FallBack:
     170             :     case MediaDemuxer::Status::RestartRequired:
     171           0 :         break;
     172             :     }
     173             : }
     174             : 
     175             : bool
     176           0 : AudioInput::initCapture(const std::string& device)
     177             : {
     178           0 :     std::string targetId = device;
     179             : #if defined(_WIN32)
     180             :     // There are two possible formats for device:
     181             :     // 1. A string containing "window-id:hwnd=XXXX" where XXXX is the HWND of the window to capture
     182             :     // 2. A string that does not contain a window handle, in which case we capture desktop audio
     183             :     std::string pattern = "window-id:hwnd=";
     184             :     size_t winHandlePos = device.find(pattern);
     185             : 
     186             :     if (winHandlePos != std::string::npos) {
     187             :         // Get HWND from device URI
     188             :         size_t startPos = winHandlePos + pattern.size();
     189             :         size_t endPos = device.find(' ', startPos);
     190             :         if (endPos == std::string::npos) {
     191             :             endPos = device.size();
     192             :         }
     193             :         targetId = device.substr(startPos, endPos - startPos);
     194             :     } else {
     195             :         targetId = video::DEVICE_DESKTOP;
     196             :     }
     197             : #elif defined(__linux__)
     198             :     // On Linux, we always capture desktop audio because window-specific audio capture is not yet implemented
     199             :     // Possible to implement window audio capture on X11 specifically in the future, but not Wayland as of Jan 2026
     200             :     // See https://github.com/flatpak/xdg-desktop-portal/issues/957
     201           0 :     targetId = video::DEVICE_DESKTOP;
     202             : #elif defined(__APPLE__)
     203             :     // As of Jan 2026, audio capture has not been implemented for macOS (TODO)
     204             :     targetId = video::DEVICE_DESKTOP;
     205             : #endif
     206             : 
     207           0 :     devOpts_ = {};
     208           0 :     devOpts_.input = targetId;
     209           0 :     devOpts_.channel = format_.nb_channels;
     210           0 :     devOpts_.framerate = format_.sample_rate;
     211             : 
     212             :     // This will cause the audio layer to create a ring buffer with id=targetId
     213             :     // The audio layer will then fill it with the audio from the captured window/desktop
     214           0 :     deviceGuard_ = Manager::instance().startCaptureStream(targetId);
     215           0 :     if (!deviceGuard_) {
     216           0 :         if (!targetId.empty())
     217           0 :             JAMI_ERROR("Failed to start capture stream for window-id: {}", targetId);
     218             :         else
     219           0 :             JAMI_ERROR("Failed to start capture stream for desktop audio");
     220           0 :         return false;
     221             :     }
     222             : 
     223             :     // We want the audio input's ring buffer to read the captured audio from the audio layer
     224             :     // Then the audio RTP session will handle sending the audio over the network
     225           0 :     Manager::instance().getRingBufferPool().bindHalfDuplexOut(id_, targetId);
     226             : 
     227           0 :     sourceRingBufferId_ = targetId;
     228           0 :     playingDevice_ = true;
     229           0 :     return true;
     230           0 : }
     231             : 
     232             : bool
     233         227 : AudioInput::initDevice(const std::string& device)
     234             : {
     235         227 :     devOpts_ = {};
     236         227 :     devOpts_.input = device;
     237         227 :     devOpts_.channel = format_.nb_channels;
     238         227 :     devOpts_.framerate = format_.sample_rate;
     239         227 :     deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::CAPTURE);
     240         227 :     playingDevice_ = true;
     241         227 :     return true;
     242             : }
     243             : 
     244             : void
     245           5 : AudioInput::configureFilePlayback(const std::string& path, std::shared_ptr<MediaDemuxer>& demuxer, int index)
     246             : {
     247           5 :     decoder_.reset();
     248           5 :     devOpts_ = {};
     249           5 :     devOpts_.input = path;
     250           5 :     devOpts_.name = path;
     251           0 :     auto decoder = std::make_unique<MediaDecoder>(demuxer, index, [this](std::shared_ptr<MediaFrame>&& frame) {
     252           0 :         if (muteState_)
     253           0 :             libav_utils::fillWithSilence(frame->pointer());
     254           0 :         if (ringBuf_)
     255           0 :             ringBuf_->put(std::static_pointer_cast<AudioFrame>(frame));
     256           5 :     });
     257           5 :     decoder->emulateRate();
     258           5 :     decoder->setInterruptCallback([](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
     259             :                                   this);
     260             : 
     261             :     // have file audio mixed into the local buffer so it gets played
     262           5 :     Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     263             :     // Bind to itself to be able to read from the ringbuffer
     264           5 :     Manager::instance().getRingBufferPool().bindHalfDuplexOut(id_, id_);
     265             : 
     266           5 :     sourceRingBufferId_ = id_;
     267           5 :     deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
     268             : 
     269           5 :     wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
     270           5 :     playingFile_ = true;
     271           5 :     decoder_ = std::move(decoder);
     272           5 :     resource_ = path;
     273           5 :     loop_.start();
     274           5 : }
     275             : 
     276             : void
     277          10 : AudioInput::setPaused(bool paused)
     278             : {
     279          10 :     if (paused) {
     280           8 :         Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     281           8 :         deviceGuard_.reset();
     282             :     } else {
     283           2 :         Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     284           2 :         deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
     285             :     }
     286          10 :     paused_ = paused;
     287          10 : }
     288             : 
     289             : void
     290           9 : AudioInput::flushBuffers()
     291             : {
     292           9 :     if (decoder_) {
     293           9 :         decoder_->flushBuffers();
     294             :     }
     295           9 : }
     296             : 
     297             : bool
     298           0 : AudioInput::initFile(const std::string& path)
     299             : {
     300           0 :     if (access(path.c_str(), R_OK) != 0) {
     301           0 :         JAMI_ERROR("File '{}' not available", path);
     302           0 :         return false;
     303             :     }
     304             : 
     305           0 :     devOpts_ = {};
     306           0 :     devOpts_.input = path;
     307           0 :     devOpts_.name = path;
     308           0 :     devOpts_.loop = "1";
     309             :     // sets devOpts_'s sample rate and number of channels
     310           0 :     if (!createDecoder()) {
     311           0 :         JAMI_WARN() << "Unable to decode audio from file, switching back to default device";
     312           0 :         return initDevice("");
     313             :     }
     314           0 :     wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
     315             : 
     316             :     // have file audio mixed into the local buffer so it gets played
     317           0 :     Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     318           0 :     sourceRingBufferId_ = id_;
     319           0 :     decodingFile_ = true;
     320           0 :     deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
     321           0 :     return true;
     322             : }
     323             : 
     324             : std::shared_future<DeviceParams>
     325         227 : AudioInput::switchInput(const std::string& resource)
     326             : {
     327             :     // Always switch inputs, even if it's the same resource, so audio will be in sync with video
     328         227 :     std::unique_lock lk(resourceMutex_);
     329             : 
     330         908 :     JAMI_DEBUG("Switching audio source from [{}] to [{}]", resource_, resource);
     331             : 
     332         227 :     auto oldGuard = std::move(deviceGuard_);
     333             : 
     334         227 :     decoder_.reset();
     335         227 :     if (decodingFile_) {
     336           0 :         decodingFile_ = false;
     337           0 :         Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     338             :     }
     339             : 
     340         227 :     playingDevice_ = false;
     341         227 :     resource_ = resource;
     342         227 :     sourceRingBufferId_.clear();
     343         227 :     devOptsFound_ = false;
     344             : 
     345         227 :     std::promise<DeviceParams> p;
     346         227 :     foundDevOpts_.swap(p);
     347             : 
     348         227 :     if (resource_.empty()) {
     349         227 :         if (initDevice(""))
     350         227 :             foundDevOpts(devOpts_);
     351             :     } else {
     352           0 :         static const std::string& sep = libjami::Media::VideoProtocolPrefix::SEPARATOR;
     353           0 :         const auto pos = resource_.find(sep);
     354           0 :         if (pos == std::string::npos)
     355           0 :             return {};
     356             : 
     357           0 :         const auto prefix = resource_.substr(0, pos);
     358           0 :         if ((pos + sep.size()) >= resource_.size())
     359           0 :             return {};
     360             : 
     361           0 :         const auto suffix = resource_.substr(pos + sep.size());
     362             : 
     363           0 :         bool ready = false;
     364           0 :         if (prefix == libjami::Media::VideoProtocolPrefix::FILE)
     365           0 :             ready = initFile(suffix);
     366           0 :         else if (prefix == libjami::Media::VideoProtocolPrefix::DISPLAY)
     367           0 :             ready = initCapture(suffix);
     368             :         else
     369           0 :             ready = initDevice(suffix);
     370             : 
     371           0 :         if (ready)
     372           0 :             foundDevOpts(devOpts_);
     373           0 :     }
     374             : 
     375         227 :     futureDevOpts_ = foundDevOpts_.get_future().share();
     376         227 :     wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
     377         227 :     lk.unlock();
     378         227 :     if (not loop_.isRunning())
     379         200 :         loop_.start();
     380         227 :     if (onSuccessfulSetup_)
     381         166 :         onSuccessfulSetup_(MEDIA_AUDIO, 0);
     382         227 :     return futureDevOpts_;
     383         227 : }
     384             : 
     385             : void
     386         227 : AudioInput::foundDevOpts(const DeviceParams& params)
     387             : {
     388         227 :     if (!devOptsFound_) {
     389         227 :         devOptsFound_ = true;
     390         227 :         foundDevOpts_.set_value(params);
     391             :     }
     392         227 : }
     393             : 
     394             : void
     395         167 : AudioInput::setRecorderCallback(const std::function<void(const MediaStream& ms)>& cb)
     396             : {
     397         167 :     settingMS_.exchange(true);
     398         167 :     recorderCallback_ = cb;
     399         167 :     if (decoder_)
     400           0 :         decoder_->setContextCallback([this]() {
     401           0 :             if (recorderCallback_)
     402           0 :                 recorderCallback_(getInfo());
     403           0 :         });
     404         167 : }
     405             : 
     406             : bool
     407           0 : AudioInput::createDecoder()
     408             : {
     409           0 :     decoder_.reset();
     410           0 :     if (devOpts_.input.empty()) {
     411           0 :         foundDevOpts(devOpts_);
     412           0 :         return false;
     413             :     }
     414             : 
     415           0 :     auto decoder = std::make_unique<MediaDecoder>([this](std::shared_ptr<MediaFrame>&& frame) {
     416           0 :         if (ringBuf_)
     417           0 :             ringBuf_->put(std::static_pointer_cast<AudioFrame>(frame));
     418           0 :     });
     419             : 
     420             :     // NOTE don't emulate rate, file is read as frames are needed
     421             : 
     422           0 :     decoder->setInterruptCallback([](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
     423             :                                   this);
     424             : 
     425           0 :     if (decoder->openInput(devOpts_) < 0) {
     426           0 :         JAMI_ERR() << "Unable to open input '" << devOpts_.input << "'";
     427           0 :         foundDevOpts(devOpts_);
     428           0 :         return false;
     429             :     }
     430             : 
     431           0 :     if (decoder->setupAudio() < 0) {
     432           0 :         JAMI_ERR() << "Unable to setup decoder for '" << devOpts_.input << "'";
     433           0 :         foundDevOpts(devOpts_);
     434           0 :         return false;
     435             :     }
     436             : 
     437           0 :     auto ms = decoder->getStream(devOpts_.input);
     438           0 :     devOpts_.channel = ms.nbChannels;
     439           0 :     devOpts_.framerate = ms.sampleRate;
     440           0 :     JAMI_DBG() << "Created audio decoder: " << ms;
     441             : 
     442           0 :     decoder_ = std::move(decoder);
     443           0 :     foundDevOpts(devOpts_);
     444           0 :     decoder_->setContextCallback([this]() {
     445           0 :         if (recorderCallback_)
     446           0 :             recorderCallback_(getInfo());
     447           0 :     });
     448           0 :     return true;
     449           0 : }
     450             : 
     451             : void
     452         166 : AudioInput::setFormat(const AudioFormat& fmt)
     453             : {
     454         166 :     std::lock_guard lk(fmtMutex_);
     455         166 :     format_ = fmt;
     456         166 :     resizer_->setFormat(format_, static_cast<int>(format_.sample_rate * MS_PER_PACKET.count()) / 1000);
     457         166 : }
     458             : 
     459             : void
     460         333 : AudioInput::setMuted(bool isMuted)
     461             : {
     462         333 :     JAMI_WARN("Audio Input muted [%s]", isMuted ? "YES" : "NO");
     463         333 :     muteState_ = isMuted;
     464         333 : }
     465             : 
     466             : MediaStream
     467           0 : AudioInput::getInfo() const
     468             : {
     469           0 :     std::lock_guard lk(fmtMutex_);
     470           0 :     return MediaStream("a:local", format_, static_cast<int64_t>(sent_samples));
     471           0 : }
     472             : 
     473             : MediaStream
     474           1 : AudioInput::getInfo(const std::string& name) const
     475             : {
     476           1 :     std::lock_guard lk(fmtMutex_);
     477           1 :     auto ms = MediaStream(name, format_, static_cast<int64_t>(sent_samples));
     478           2 :     return ms;
     479           1 : }
     480             : 
     481             : } // namespace jami

Generated by: LCOV version 1.14