LCOV - code coverage report
Current view: top level - src/media/audio - audio_input.cpp (source / functions) Coverage Total Hit
Test: jami-coverage-filtered.info Lines: 51.5 % 274 141
Test Date: 2026-06-13 09:18:46 Functions: 45.8 % 59 27

            Line data    Source code
       1              : /*
       2              :  *  Copyright (C) 2004-2026 Savoir-faire Linux Inc.
       3              :  *
       4              :  *  This program is free software: you can redistribute it and/or modify
       5              :  *  it under the terms of the GNU General Public License as published by
       6              :  *  the Free Software Foundation, either version 3 of the License, or
       7              :  *  (at your option) any later version.
       8              :  *
       9              :  *  This program is distributed in the hope that it will be useful,
      10              :  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
      11              :  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
      12              :  *  GNU General Public License for more details.
      13              :  *
      14              :  *  You should have received a copy of the GNU General Public License
      15              :  *  along with this program. If not, see <https://www.gnu.org/licenses/>.
      16              :  */
      17              : 
      18              : #include "audio_frame_resizer.h"
      19              : #include "audio_input.h"
      20              : #include "jami/media_const.h"
      21              : #include "manager.h"
      22              : #include "media_decoder.h"
      23              : #include "resampler.h"
      24              : #include "logger.h"
      25              : #include "ringbufferpool.h"
      26              : #include "tracepoint.h"
      27              : #include "video/video_device.h"
      28              : 
      29              : #include <future>
      30              : #include <memory>
      31              : 
      32              : namespace jami {
      33              : 
      34              : static constexpr auto MS_PER_PACKET = std::chrono::milliseconds(20);
      35              : 
      36          246 : AudioInput::AudioInput(const std::string& id)
      37          246 :     : id_(id)
      38          246 :     , format_(Manager::instance().getRingBufferPool().getInternalAudioFormat())
      39          246 :     , frameSize_(static_cast<int>(format_.sample_rate * MS_PER_PACKET.count()) / 1000)
      40          246 :     , resampler_(new Resampler)
      41          246 :     , resizer_(new AudioFrameResizer(format_,
      42              :                                      frameSize_,
      43          492 :                                      [this](std::shared_ptr<AudioFrame>&& f) { frameResized(std::move(f)); }))
      44          246 :     , deviceGuard_()
      45        43313 :     , loop_([] { return true; }, [this] { process(); }, [] {})
      46              : {
      47          984 :     JAMI_DEBUG("Creating audio input with id: {}", id_);
      48          246 :     ringBuf_ = Manager::instance().getRingBufferPool().createRingBuffer(id_);
      49          246 : }
      50              : 
      51            0 : AudioInput::AudioInput(const std::string& id, const std::string& resource)
      52            0 :     : AudioInput(id)
      53              : {
      54            0 :     switchInput(resource);
      55            0 : }
      56              : 
      57          246 : AudioInput::~AudioInput()
      58              : {
      59          246 :     if (playingFile_) {
      60           10 :         Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
      61            5 :         Manager::instance().getRingBufferPool().unBindHalfDuplexOut(id_, id_);
      62              :     }
      63          246 :     ringBuf_.reset();
      64          246 :     loop_.join();
      65              : 
      66          246 :     Manager::instance().getRingBufferPool().flush(id_);
      67          246 : }
      68              : 
      69              : void
      70        41899 : AudioInput::process()
      71              : {
      72        41899 :     readFromDevice();
      73        41899 : }
      74              : 
      75              : void
      76           26 : AudioInput::updateStartTime(int64_t start)
      77              : {
      78           26 :     if (decoder_) {
      79           26 :         decoder_->updateStartTime(start);
      80              :     }
      81           26 : }
      82              : 
      83              : void
      84            0 : AudioInput::frameResized(std::shared_ptr<AudioFrame>&& ptr)
      85              : {
      86            0 :     std::shared_ptr<AudioFrame> frame = std::move(ptr);
      87            0 :     frame->pointer()->pts = static_cast<int64_t>(sent_samples);
      88            0 :     sent_samples += frame->pointer()->nb_samples;
      89              : 
      90            0 :     notify(std::static_pointer_cast<MediaFrame>(std::move(frame)));
      91            0 : }
      92              : 
      93              : void
      94            9 : AudioInput::setSeekTime(int64_t time)
      95              : {
      96            9 :     if (decoder_) {
      97            9 :         decoder_->setSeekTime(time);
      98              :     }
      99            9 : }
     100              : 
     101              : void
     102        41899 : AudioInput::readFromDevice()
     103              : {
     104              :     {
     105        41899 :         std::lock_guard lk(resourceMutex_);
     106        41899 :         if (decodingFile_)
     107            0 :             while (ringBuf_ && ringBuf_->isEmpty())
     108            0 :                 readFromFile();
     109        41899 :         if (playingFile_) {
     110           15 :             while (ringBuf_ && ringBuf_->getLength(id_) == 0)
     111           10 :                 readFromQueue();
     112              :         }
     113        41899 :     }
     114              : 
     115        41899 :     auto& bufferPool = Manager::instance().getRingBufferPool();
     116        41899 :     if (not bufferPool.waitForDataAvailable(id_, wakeUp_))
     117        41899 :         std::this_thread::sleep_until(wakeUp_);
     118        41899 :     wakeUp_ += MS_PER_PACKET;
     119              : 
     120        41899 :     auto audioFrame = bufferPool.getData(id_);
     121        41899 :     if (not audioFrame)
     122        41899 :         return;
     123              : 
     124            0 :     if (muteState_) {
     125            0 :         libav_utils::fillWithSilence(audioFrame->pointer());
     126            0 :         audioFrame->has_voice = false; // force no voice activity when muted
     127              :     }
     128              : 
     129            0 :     std::lock_guard lk(fmtMutex_);
     130            0 :     if (bufferPool.getInternalAudioFormat() != format_)
     131            0 :         audioFrame = resampler_->resample(std::move(audioFrame), format_);
     132            0 :     resizer_->enqueue(std::move(audioFrame));
     133              : 
     134            0 :     if (recorderCallback_ && settingMS_.exchange(false)) {
     135            0 :         recorderCallback_(MediaStream("a:local", format_, static_cast<int64_t>(sent_samples)));
     136              :     }
     137              : 
     138              :     jami_tracepoint(audio_input_read_from_device_end, id_.c_str());
     139        41899 : }
     140              : 
     141              : void
     142           10 : AudioInput::readFromQueue()
     143              : {
     144           10 :     if (!decoder_)
     145            0 :         return;
     146           10 :     if (paused_ || !decoder_->emitFrame(true)) {
     147           10 :         std::this_thread::sleep_for(MS_PER_PACKET);
     148              :     }
     149              : }
     150              : 
     151              : void
     152            0 : AudioInput::readFromFile()
     153              : {
     154            0 :     if (!decoder_)
     155            0 :         return;
     156            0 :     const auto ret = decoder_->decode();
     157            0 :     switch (ret) {
     158            0 :     case MediaDemuxer::Status::Success:
     159            0 :         break;
     160            0 :     case MediaDemuxer::Status::EndOfFile:
     161            0 :         createDecoder();
     162            0 :         break;
     163            0 :     case MediaDemuxer::Status::ReadError:
     164            0 :         JAMI_ERROR("Failed to decode frame");
     165            0 :         break;
     166            0 :     case MediaDemuxer::Status::ReadBufferOverflow:
     167            0 :         JAMI_ERROR("Read buffer overflow detected");
     168            0 :         break;
     169            0 :     case MediaDemuxer::Status::FallBack:
     170              :     case MediaDemuxer::Status::RestartRequired:
     171            0 :         break;
     172              :     }
     173              : }
     174              : 
     175              : bool
     176            0 : AudioInput::initCapture(const std::string& device)
     177              : {
     178            0 :     std::string targetId = device;
     179              : #if defined(_WIN32)
     180              :     // There are two possible formats for device:
     181              :     // 1. A string containing "window-id:hwnd=XXXX" where XXXX is the HWND of the window to capture
     182              :     // 2. A string that does not contain a window handle, in which case we capture desktop audio
     183              :     std::string pattern = "window-id:hwnd=";
     184              :     size_t winHandlePos = device.find(pattern);
     185              : 
     186              :     if (winHandlePos != std::string::npos) {
     187              :         // Get HWND from device URI
     188              :         size_t startPos = winHandlePos + pattern.size();
     189              :         size_t endPos = device.find(' ', startPos);
     190              :         if (endPos == std::string::npos) {
     191              :             endPos = device.size();
     192              :         }
     193              :         targetId = device.substr(startPos, endPos - startPos);
     194              :     } else {
     195              :         targetId = video::DEVICE_DESKTOP;
     196              :     }
     197              : #elif defined(__linux__)
     198              :     // On Linux, we always capture desktop audio because window-specific audio capture is not yet implemented
     199              :     // Possible to implement window audio capture on X11 specifically in the future, but not Wayland as of Jan 2026
     200              :     // See https://github.com/flatpak/xdg-desktop-portal/issues/957
     201            0 :     targetId = video::DEVICE_DESKTOP;
     202              : #elif defined(__APPLE__)
     203              :     // As of Jan 2026, audio capture has not been implemented for macOS (TODO)
     204              :     targetId = video::DEVICE_DESKTOP;
     205              : #endif
     206              : 
     207            0 :     devOpts_ = {};
     208            0 :     devOpts_.input = targetId;
     209            0 :     devOpts_.channel = format_.nb_channels;
     210            0 :     devOpts_.framerate = format_.sample_rate;
     211              : 
     212              :     // This will cause the audio layer to create a ring buffer with id=targetId
     213              :     // The audio layer will then fill it with the audio from the captured window/desktop
     214            0 :     deviceGuard_ = Manager::instance().startCaptureStream(targetId);
     215            0 :     if (!deviceGuard_) {
     216            0 :         if (!targetId.empty())
     217            0 :             JAMI_ERROR("Failed to start capture stream for window-id: {}", targetId);
     218              :         else
     219            0 :             JAMI_ERROR("Failed to start capture stream for desktop audio");
     220            0 :         return false;
     221              :     }
     222              : 
     223              :     // We want the audio input's ring buffer to read the captured audio from the audio layer
     224              :     // Then the audio RTP session will handle sending the audio over the network
     225            0 :     Manager::instance().getRingBufferPool().bindHalfDuplexOut(id_, targetId);
     226              : 
     227            0 :     sourceRingBufferId_ = targetId;
     228            0 :     playingDevice_ = true;
     229            0 :     return true;
     230            0 : }
     231              : 
     232              : bool
     233          237 : AudioInput::initDevice(const std::string& device)
     234              : {
     235          237 :     devOpts_ = {};
     236          237 :     devOpts_.input = device;
     237          237 :     devOpts_.channel = format_.nb_channels;
     238          237 :     devOpts_.framerate = format_.sample_rate;
     239          237 :     deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::CAPTURE);
     240          237 :     playingDevice_ = true;
     241          237 :     return true;
     242          237 : }
     243              : 
     244              : void
     245            5 : AudioInput::configureFilePlayback(const std::string& path, std::shared_ptr<MediaDemuxer>& demuxer, int index)
     246              : {
     247            5 :     decoder_.reset();
     248            5 :     devOpts_ = {};
     249            5 :     devOpts_.input = path;
     250            5 :     devOpts_.name = path;
     251            0 :     auto decoder = std::make_unique<MediaDecoder>(demuxer, index, [this](std::shared_ptr<MediaFrame>&& frame) {
     252            0 :         if (muteState_)
     253            0 :             libav_utils::fillWithSilence(frame->pointer());
     254            0 :         if (ringBuf_)
     255            0 :             ringBuf_->put(std::static_pointer_cast<AudioFrame>(frame));
     256            5 :     });
     257            5 :     decoder->emulateRate();
     258            5 :     decoder->setInterruptCallback([](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
     259              :                                   this);
     260              : 
     261              :     // have file audio mixed into the local buffer so it gets played
     262           10 :     Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     263              :     // Bind to itself to be able to read from the ringbuffer
     264            5 :     Manager::instance().getRingBufferPool().bindHalfDuplexOut(id_, id_);
     265              : 
     266            5 :     sourceRingBufferId_ = id_;
     267            5 :     deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
     268              : 
     269            5 :     wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
     270            5 :     playingFile_ = true;
     271            5 :     decoder_ = std::move(decoder);
     272            5 :     resource_ = path;
     273            5 :     loop_.start();
     274           10 : }
     275              : 
     276              : void
     277           18 : AudioInput::setPaused(bool paused)
     278              : {
     279           18 :     if (paused) {
     280           24 :         Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     281           12 :         deviceGuard_.reset();
     282              :     } else {
     283           12 :         Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     284            6 :         deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
     285              :     }
     286           18 :     paused_ = paused;
     287           18 : }
     288              : 
     289              : void
     290            9 : AudioInput::flushBuffers()
     291              : {
     292            9 :     if (decoder_) {
     293            9 :         decoder_->flushBuffers();
     294              :     }
     295            9 : }
     296              : 
     297              : bool
     298            0 : AudioInput::initFile(const std::string& path)
     299              : {
     300            0 :     if (access(path.c_str(), R_OK) != 0) {
     301            0 :         JAMI_ERROR("File '{}' not available", path);
     302            0 :         return false;
     303              :     }
     304              : 
     305            0 :     devOpts_ = {};
     306            0 :     devOpts_.input = path;
     307            0 :     devOpts_.name = path;
     308            0 :     devOpts_.loop = "1";
     309              :     // sets devOpts_'s sample rate and number of channels
     310            0 :     if (!createDecoder()) {
     311            0 :         JAMI_WARNING("Unable to decode audio from file, switching back to default device");
     312            0 :         return initDevice("");
     313              :     }
     314            0 :     wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
     315              : 
     316              :     // have file audio mixed into the local buffer so it gets played
     317            0 :     Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     318            0 :     sourceRingBufferId_ = id_;
     319            0 :     decodingFile_ = true;
     320            0 :     deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
     321            0 :     return true;
     322            0 : }
     323              : 
     324              : std::shared_future<DeviceParams>
     325          237 : AudioInput::switchInput(const std::string& resource)
     326              : {
     327              :     // Always switch inputs, even if it's the same resource, so audio will be in sync with video
     328          237 :     std::unique_lock lk(resourceMutex_);
     329              : 
     330          948 :     JAMI_DEBUG("Switching audio source from [{}] to [{}]", resource_, resource);
     331              : 
     332          237 :     auto oldGuard = std::move(deviceGuard_);
     333              : 
     334          237 :     decoder_.reset();
     335          237 :     if (decodingFile_) {
     336            0 :         decodingFile_ = false;
     337            0 :         Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
     338              :     }
     339              : 
     340          237 :     playingDevice_ = false;
     341          237 :     resource_ = resource;
     342          237 :     sourceRingBufferId_.clear();
     343          237 :     devOptsFound_ = false;
     344              : 
     345          237 :     std::promise<DeviceParams> p;
     346          237 :     foundDevOpts_.swap(p);
     347              : 
     348          237 :     if (resource_.empty()) {
     349          474 :         if (initDevice(""))
     350          237 :             foundDevOpts(devOpts_);
     351              :     } else {
     352              :         static const std::string& sep = libjami::Media::VideoProtocolPrefix::SEPARATOR;
     353            0 :         const auto pos = resource_.find(sep);
     354            0 :         if (pos == std::string::npos)
     355            0 :             return {};
     356              : 
     357            0 :         const auto prefix = resource_.substr(0, pos);
     358            0 :         if ((pos + sep.size()) >= resource_.size())
     359            0 :             return {};
     360              : 
     361            0 :         const auto suffix = resource_.substr(pos + sep.size());
     362              : 
     363            0 :         bool ready = false;
     364            0 :         if (prefix == libjami::Media::VideoProtocolPrefix::FILE)
     365            0 :             ready = initFile(suffix);
     366            0 :         else if (prefix == libjami::Media::VideoProtocolPrefix::DISPLAY)
     367            0 :             ready = initCapture(suffix);
     368              :         else
     369            0 :             ready = initDevice(suffix);
     370              : 
     371            0 :         if (ready)
     372            0 :             foundDevOpts(devOpts_);
     373            0 :     }
     374              : 
     375          237 :     futureDevOpts_ = foundDevOpts_.get_future().share();
     376          237 :     wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
     377          237 :     lk.unlock();
     378          237 :     if (not loop_.isRunning())
     379          210 :         loop_.start();
     380          237 :     if (onSuccessfulSetup_)
     381          176 :         onSuccessfulSetup_(MEDIA_AUDIO, 0);
     382          237 :     return futureDevOpts_;
     383          237 : }
     384              : 
     385              : void
     386          237 : AudioInput::foundDevOpts(const DeviceParams& params)
     387              : {
     388          237 :     if (!devOptsFound_) {
     389          237 :         devOptsFound_ = true;
     390          237 :         foundDevOpts_.set_value(params);
     391              :     }
     392          237 : }
     393              : 
     394              : void
     395          177 : AudioInput::setRecorderCallback(const std::function<void(const MediaStream& ms)>& cb)
     396              : {
     397          177 :     settingMS_.exchange(true);
     398          177 :     recorderCallback_ = cb;
     399          177 :     if (decoder_)
     400            0 :         decoder_->setContextCallback([this]() {
     401            0 :             if (recorderCallback_)
     402            0 :                 recorderCallback_(getInfo());
     403            0 :         });
     404          177 : }
     405              : 
     406              : bool
     407            0 : AudioInput::createDecoder()
     408              : {
     409            0 :     decoder_.reset();
     410            0 :     if (devOpts_.input.empty()) {
     411            0 :         foundDevOpts(devOpts_);
     412            0 :         return false;
     413              :     }
     414              : 
     415            0 :     auto decoder = std::make_unique<MediaDecoder>([this](std::shared_ptr<MediaFrame>&& frame) {
     416            0 :         if (ringBuf_)
     417            0 :             ringBuf_->put(std::static_pointer_cast<AudioFrame>(frame));
     418            0 :     });
     419              : 
     420              :     // NOTE don't emulate rate, file is read as frames are needed
     421              : 
     422            0 :     decoder->setInterruptCallback([](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
     423              :                                   this);
     424              : 
     425            0 :     if (decoder->openInput(devOpts_) < 0) {
     426            0 :         JAMI_ERROR("Unable to open input '{}'", devOpts_.input);
     427            0 :         foundDevOpts(devOpts_);
     428            0 :         return false;
     429              :     }
     430              : 
     431            0 :     if (decoder->setupAudio() < 0) {
     432            0 :         JAMI_ERROR("Unable to setup decoder for '{}'", devOpts_.input);
     433            0 :         foundDevOpts(devOpts_);
     434            0 :         return false;
     435              :     }
     436              : 
     437            0 :     auto ms = decoder->getStream(devOpts_.input);
     438            0 :     devOpts_.channel = ms.nbChannels;
     439            0 :     devOpts_.framerate = ms.sampleRate;
     440            0 :     JAMI_LOG("Created audio decoder: {}", ms);
     441              : 
     442            0 :     decoder_ = std::move(decoder);
     443            0 :     foundDevOpts(devOpts_);
     444            0 :     decoder_->setContextCallback([this]() {
     445            0 :         if (recorderCallback_)
     446            0 :             recorderCallback_(getInfo());
     447            0 :     });
     448            0 :     return true;
     449            0 : }
     450              : 
     451              : void
     452          176 : AudioInput::setFormat(const AudioFormat& fmt)
     453              : {
     454          176 :     std::lock_guard lk(fmtMutex_);
     455          176 :     format_ = fmt;
     456          176 :     resizer_->setFormat(format_, static_cast<int>(format_.sample_rate * MS_PER_PACKET.count()) / 1000);
     457          176 : }
     458              : 
     459              : void
     460          354 : AudioInput::setMuted(bool isMuted)
     461              : {
     462         1416 :     JAMI_WARNING("Audio Input muted [{}]", isMuted ? "YES" : "NO");
     463          354 :     muteState_ = isMuted;
     464          354 : }
     465              : 
     466              : MediaStream
     467            1 : AudioInput::getInfo() const
     468              : {
     469            1 :     std::lock_guard lk(fmtMutex_);
     470            3 :     return MediaStream("a:local", format_, static_cast<int64_t>(sent_samples));
     471            1 : }
     472              : 
     473              : MediaStream
     474            1 : AudioInput::getInfo(const std::string& name) const
     475              : {
     476            1 :     std::lock_guard lk(fmtMutex_);
     477            1 :     auto ms = MediaStream(name, format_, static_cast<int64_t>(sent_samples));
     478            2 :     return ms;
     479            1 : }
     480              : 
     481              : } // namespace jami
        

Generated by: LCOV version 2.0-1