Line data Source code
1 : /*
2 : * Copyright (C) 2004-2026 Savoir-faire Linux Inc.
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "audio_frame_resizer.h"
19 : #include "audio_input.h"
20 : #include "jami/media_const.h"
21 : #include "manager.h"
22 : #include "media_decoder.h"
23 : #include "resampler.h"
24 : #include "logger.h"
25 : #include "ringbufferpool.h"
26 : #include "tracepoint.h"
27 : #include "video/video_device.h"
28 :
29 : #include <future>
30 : #include <memory>
31 :
32 : namespace jami {
33 :
34 : static constexpr auto MS_PER_PACKET = std::chrono::milliseconds(20);
35 :
36 236 : AudioInput::AudioInput(const std::string& id)
37 236 : : id_(id)
38 236 : , format_(Manager::instance().getRingBufferPool().getInternalAudioFormat())
39 236 : , frameSize_(static_cast<int>(format_.sample_rate * MS_PER_PACKET.count()) / 1000)
40 236 : , resampler_(new Resampler)
41 472 : , resizer_(new AudioFrameResizer(format_,
42 : frameSize_,
43 236 : [this](std::shared_ptr<AudioFrame>&& f) { frameResized(std::move(f)); }))
44 236 : , deviceGuard_()
45 42947 : , loop_([] { return true; }, [this] { process(); }, [] {})
46 : {
47 944 : JAMI_DEBUG("Creating audio input with id: {}", id_);
48 236 : ringBuf_ = Manager::instance().getRingBufferPool().createRingBuffer(id_);
49 236 : }
50 :
51 0 : AudioInput::AudioInput(const std::string& id, const std::string& resource)
52 0 : : AudioInput(id)
53 : {
54 0 : switchInput(resource);
55 0 : }
56 :
57 236 : AudioInput::~AudioInput()
58 : {
59 236 : if (playingFile_) {
60 5 : Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
61 5 : Manager::instance().getRingBufferPool().unBindHalfDuplexOut(id_, id_);
62 : }
63 236 : ringBuf_.reset();
64 236 : loop_.join();
65 :
66 236 : Manager::instance().getRingBufferPool().flush(id_);
67 236 : }
68 :
69 : void
70 41593 : AudioInput::process()
71 : {
72 41593 : readFromDevice();
73 41593 : }
74 :
75 : void
76 18 : AudioInput::updateStartTime(int64_t start)
77 : {
78 18 : if (decoder_) {
79 18 : decoder_->updateStartTime(start);
80 : }
81 18 : }
82 :
83 : void
84 0 : AudioInput::frameResized(std::shared_ptr<AudioFrame>&& ptr)
85 : {
86 0 : std::shared_ptr<AudioFrame> frame = std::move(ptr);
87 0 : frame->pointer()->pts = static_cast<int64_t>(sent_samples);
88 0 : sent_samples += frame->pointer()->nb_samples;
89 :
90 0 : notify(std::static_pointer_cast<MediaFrame>(std::move(frame)));
91 0 : }
92 :
93 : void
94 9 : AudioInput::setSeekTime(int64_t time)
95 : {
96 9 : if (decoder_) {
97 9 : decoder_->setSeekTime(time);
98 : }
99 9 : }
100 :
101 : void
102 41593 : AudioInput::readFromDevice()
103 : {
104 : {
105 41593 : std::lock_guard lk(resourceMutex_);
106 41593 : if (decodingFile_)
107 0 : while (ringBuf_ && ringBuf_->isEmpty())
108 0 : readFromFile();
109 41593 : if (playingFile_) {
110 15 : while (ringBuf_ && ringBuf_->getLength(id_) == 0)
111 10 : readFromQueue();
112 : }
113 41593 : }
114 :
115 41593 : auto& bufferPool = Manager::instance().getRingBufferPool();
116 41593 : if (not bufferPool.waitForDataAvailable(id_, wakeUp_))
117 41593 : std::this_thread::sleep_until(wakeUp_);
118 41593 : wakeUp_ += MS_PER_PACKET;
119 :
120 41593 : auto audioFrame = bufferPool.getData(id_);
121 41593 : if (not audioFrame)
122 41593 : return;
123 :
124 0 : if (muteState_) {
125 0 : libav_utils::fillWithSilence(audioFrame->pointer());
126 0 : audioFrame->has_voice = false; // force no voice activity when muted
127 : }
128 :
129 0 : std::lock_guard lk(fmtMutex_);
130 0 : if (bufferPool.getInternalAudioFormat() != format_)
131 0 : audioFrame = resampler_->resample(std::move(audioFrame), format_);
132 0 : resizer_->enqueue(std::move(audioFrame));
133 :
134 0 : if (recorderCallback_ && settingMS_.exchange(false)) {
135 0 : recorderCallback_(MediaStream("a:local", format_, static_cast<int64_t>(sent_samples)));
136 : }
137 :
138 : jami_tracepoint(audio_input_read_from_device_end, id_.c_str());
139 41593 : }
140 :
141 : void
142 10 : AudioInput::readFromQueue()
143 : {
144 10 : if (!decoder_)
145 0 : return;
146 10 : if (paused_ || !decoder_->emitFrame(true)) {
147 10 : std::this_thread::sleep_for(MS_PER_PACKET);
148 : }
149 : }
150 :
151 : void
152 0 : AudioInput::readFromFile()
153 : {
154 0 : if (!decoder_)
155 0 : return;
156 0 : const auto ret = decoder_->decode();
157 0 : switch (ret) {
158 0 : case MediaDemuxer::Status::Success:
159 0 : break;
160 0 : case MediaDemuxer::Status::EndOfFile:
161 0 : createDecoder();
162 0 : break;
163 0 : case MediaDemuxer::Status::ReadError:
164 0 : JAMI_ERR() << "Failed to decode frame";
165 0 : break;
166 0 : case MediaDemuxer::Status::ReadBufferOverflow:
167 0 : JAMI_ERR() << "Read buffer overflow detected";
168 0 : break;
169 0 : case MediaDemuxer::Status::FallBack:
170 : case MediaDemuxer::Status::RestartRequired:
171 0 : break;
172 : }
173 : }
174 :
175 : bool
176 0 : AudioInput::initCapture(const std::string& device)
177 : {
178 0 : std::string targetId = device;
179 : #if defined(_WIN32)
180 : // There are two possible formats for device:
181 : // 1. A string containing "window-id:hwnd=XXXX" where XXXX is the HWND of the window to capture
182 : // 2. A string that does not contain a window handle, in which case we capture desktop audio
183 : std::string pattern = "window-id:hwnd=";
184 : size_t winHandlePos = device.find(pattern);
185 :
186 : if (winHandlePos != std::string::npos) {
187 : // Get HWND from device URI
188 : size_t startPos = winHandlePos + pattern.size();
189 : size_t endPos = device.find(' ', startPos);
190 : if (endPos == std::string::npos) {
191 : endPos = device.size();
192 : }
193 : targetId = device.substr(startPos, endPos - startPos);
194 : } else {
195 : targetId = video::DEVICE_DESKTOP;
196 : }
197 : #elif defined(__linux__)
198 : // On Linux, we always capture desktop audio because window-specific audio capture is not yet implemented
199 : // Possible to implement window audio capture on X11 specifically in the future, but not Wayland as of Jan 2026
200 : // See https://github.com/flatpak/xdg-desktop-portal/issues/957
201 0 : targetId = video::DEVICE_DESKTOP;
202 : #elif defined(__APPLE__)
203 : // As of Jan 2026, audio capture has not been implemented for macOS (TODO)
204 : targetId = video::DEVICE_DESKTOP;
205 : #endif
206 :
207 0 : devOpts_ = {};
208 0 : devOpts_.input = targetId;
209 0 : devOpts_.channel = format_.nb_channels;
210 0 : devOpts_.framerate = format_.sample_rate;
211 :
212 : // This will cause the audio layer to create a ring buffer with id=targetId
213 : // The audio layer will then fill it with the audio from the captured window/desktop
214 0 : deviceGuard_ = Manager::instance().startCaptureStream(targetId);
215 0 : if (!deviceGuard_) {
216 0 : if (!targetId.empty())
217 0 : JAMI_ERROR("Failed to start capture stream for window-id: {}", targetId);
218 : else
219 0 : JAMI_ERROR("Failed to start capture stream for desktop audio");
220 0 : return false;
221 : }
222 :
223 : // We want the audio input's ring buffer to read the captured audio from the audio layer
224 : // Then the audio RTP session will handle sending the audio over the network
225 0 : Manager::instance().getRingBufferPool().bindHalfDuplexOut(id_, targetId);
226 :
227 0 : sourceRingBufferId_ = targetId;
228 0 : playingDevice_ = true;
229 0 : return true;
230 0 : }
231 :
232 : bool
233 227 : AudioInput::initDevice(const std::string& device)
234 : {
235 227 : devOpts_ = {};
236 227 : devOpts_.input = device;
237 227 : devOpts_.channel = format_.nb_channels;
238 227 : devOpts_.framerate = format_.sample_rate;
239 227 : deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::CAPTURE);
240 227 : playingDevice_ = true;
241 227 : return true;
242 : }
243 :
244 : void
245 5 : AudioInput::configureFilePlayback(const std::string& path, std::shared_ptr<MediaDemuxer>& demuxer, int index)
246 : {
247 5 : decoder_.reset();
248 5 : devOpts_ = {};
249 5 : devOpts_.input = path;
250 5 : devOpts_.name = path;
251 0 : auto decoder = std::make_unique<MediaDecoder>(demuxer, index, [this](std::shared_ptr<MediaFrame>&& frame) {
252 0 : if (muteState_)
253 0 : libav_utils::fillWithSilence(frame->pointer());
254 0 : if (ringBuf_)
255 0 : ringBuf_->put(std::static_pointer_cast<AudioFrame>(frame));
256 5 : });
257 5 : decoder->emulateRate();
258 5 : decoder->setInterruptCallback([](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
259 : this);
260 :
261 : // have file audio mixed into the local buffer so it gets played
262 5 : Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
263 : // Bind to itself to be able to read from the ringbuffer
264 5 : Manager::instance().getRingBufferPool().bindHalfDuplexOut(id_, id_);
265 :
266 5 : sourceRingBufferId_ = id_;
267 5 : deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
268 :
269 5 : wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
270 5 : playingFile_ = true;
271 5 : decoder_ = std::move(decoder);
272 5 : resource_ = path;
273 5 : loop_.start();
274 5 : }
275 :
276 : void
277 10 : AudioInput::setPaused(bool paused)
278 : {
279 10 : if (paused) {
280 8 : Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
281 8 : deviceGuard_.reset();
282 : } else {
283 2 : Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
284 2 : deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
285 : }
286 10 : paused_ = paused;
287 10 : }
288 :
289 : void
290 9 : AudioInput::flushBuffers()
291 : {
292 9 : if (decoder_) {
293 9 : decoder_->flushBuffers();
294 : }
295 9 : }
296 :
297 : bool
298 0 : AudioInput::initFile(const std::string& path)
299 : {
300 0 : if (access(path.c_str(), R_OK) != 0) {
301 0 : JAMI_ERROR("File '{}' not available", path);
302 0 : return false;
303 : }
304 :
305 0 : devOpts_ = {};
306 0 : devOpts_.input = path;
307 0 : devOpts_.name = path;
308 0 : devOpts_.loop = "1";
309 : // sets devOpts_'s sample rate and number of channels
310 0 : if (!createDecoder()) {
311 0 : JAMI_WARN() << "Unable to decode audio from file, switching back to default device";
312 0 : return initDevice("");
313 : }
314 0 : wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
315 :
316 : // have file audio mixed into the local buffer so it gets played
317 0 : Manager::instance().getRingBufferPool().bindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
318 0 : sourceRingBufferId_ = id_;
319 0 : decodingFile_ = true;
320 0 : deviceGuard_ = Manager::instance().startAudioStream(AudioDeviceType::PLAYBACK);
321 0 : return true;
322 : }
323 :
324 : std::shared_future<DeviceParams>
325 227 : AudioInput::switchInput(const std::string& resource)
326 : {
327 : // Always switch inputs, even if it's the same resource, so audio will be in sync with video
328 227 : std::unique_lock lk(resourceMutex_);
329 :
330 908 : JAMI_DEBUG("Switching audio source from [{}] to [{}]", resource_, resource);
331 :
332 227 : auto oldGuard = std::move(deviceGuard_);
333 :
334 227 : decoder_.reset();
335 227 : if (decodingFile_) {
336 0 : decodingFile_ = false;
337 0 : Manager::instance().getRingBufferPool().unBindHalfDuplexOut(RingBufferPool::DEFAULT_ID, id_);
338 : }
339 :
340 227 : playingDevice_ = false;
341 227 : resource_ = resource;
342 227 : sourceRingBufferId_.clear();
343 227 : devOptsFound_ = false;
344 :
345 227 : std::promise<DeviceParams> p;
346 227 : foundDevOpts_.swap(p);
347 :
348 227 : if (resource_.empty()) {
349 227 : if (initDevice(""))
350 227 : foundDevOpts(devOpts_);
351 : } else {
352 0 : static const std::string& sep = libjami::Media::VideoProtocolPrefix::SEPARATOR;
353 0 : const auto pos = resource_.find(sep);
354 0 : if (pos == std::string::npos)
355 0 : return {};
356 :
357 0 : const auto prefix = resource_.substr(0, pos);
358 0 : if ((pos + sep.size()) >= resource_.size())
359 0 : return {};
360 :
361 0 : const auto suffix = resource_.substr(pos + sep.size());
362 :
363 0 : bool ready = false;
364 0 : if (prefix == libjami::Media::VideoProtocolPrefix::FILE)
365 0 : ready = initFile(suffix);
366 0 : else if (prefix == libjami::Media::VideoProtocolPrefix::DISPLAY)
367 0 : ready = initCapture(suffix);
368 : else
369 0 : ready = initDevice(suffix);
370 :
371 0 : if (ready)
372 0 : foundDevOpts(devOpts_);
373 0 : }
374 :
375 227 : futureDevOpts_ = foundDevOpts_.get_future().share();
376 227 : wakeUp_ = std::chrono::steady_clock::now() + MS_PER_PACKET;
377 227 : lk.unlock();
378 227 : if (not loop_.isRunning())
379 200 : loop_.start();
380 227 : if (onSuccessfulSetup_)
381 166 : onSuccessfulSetup_(MEDIA_AUDIO, 0);
382 227 : return futureDevOpts_;
383 227 : }
384 :
385 : void
386 227 : AudioInput::foundDevOpts(const DeviceParams& params)
387 : {
388 227 : if (!devOptsFound_) {
389 227 : devOptsFound_ = true;
390 227 : foundDevOpts_.set_value(params);
391 : }
392 227 : }
393 :
394 : void
395 167 : AudioInput::setRecorderCallback(const std::function<void(const MediaStream& ms)>& cb)
396 : {
397 167 : settingMS_.exchange(true);
398 167 : recorderCallback_ = cb;
399 167 : if (decoder_)
400 0 : decoder_->setContextCallback([this]() {
401 0 : if (recorderCallback_)
402 0 : recorderCallback_(getInfo());
403 0 : });
404 167 : }
405 :
406 : bool
407 0 : AudioInput::createDecoder()
408 : {
409 0 : decoder_.reset();
410 0 : if (devOpts_.input.empty()) {
411 0 : foundDevOpts(devOpts_);
412 0 : return false;
413 : }
414 :
415 0 : auto decoder = std::make_unique<MediaDecoder>([this](std::shared_ptr<MediaFrame>&& frame) {
416 0 : if (ringBuf_)
417 0 : ringBuf_->put(std::static_pointer_cast<AudioFrame>(frame));
418 0 : });
419 :
420 : // NOTE don't emulate rate, file is read as frames are needed
421 :
422 0 : decoder->setInterruptCallback([](void* data) -> int { return not static_cast<AudioInput*>(data)->isCapturing(); },
423 : this);
424 :
425 0 : if (decoder->openInput(devOpts_) < 0) {
426 0 : JAMI_ERR() << "Unable to open input '" << devOpts_.input << "'";
427 0 : foundDevOpts(devOpts_);
428 0 : return false;
429 : }
430 :
431 0 : if (decoder->setupAudio() < 0) {
432 0 : JAMI_ERR() << "Unable to setup decoder for '" << devOpts_.input << "'";
433 0 : foundDevOpts(devOpts_);
434 0 : return false;
435 : }
436 :
437 0 : auto ms = decoder->getStream(devOpts_.input);
438 0 : devOpts_.channel = ms.nbChannels;
439 0 : devOpts_.framerate = ms.sampleRate;
440 0 : JAMI_DBG() << "Created audio decoder: " << ms;
441 :
442 0 : decoder_ = std::move(decoder);
443 0 : foundDevOpts(devOpts_);
444 0 : decoder_->setContextCallback([this]() {
445 0 : if (recorderCallback_)
446 0 : recorderCallback_(getInfo());
447 0 : });
448 0 : return true;
449 0 : }
450 :
451 : void
452 166 : AudioInput::setFormat(const AudioFormat& fmt)
453 : {
454 166 : std::lock_guard lk(fmtMutex_);
455 166 : format_ = fmt;
456 166 : resizer_->setFormat(format_, static_cast<int>(format_.sample_rate * MS_PER_PACKET.count()) / 1000);
457 166 : }
458 :
459 : void
460 333 : AudioInput::setMuted(bool isMuted)
461 : {
462 333 : JAMI_WARN("Audio Input muted [%s]", isMuted ? "YES" : "NO");
463 333 : muteState_ = isMuted;
464 333 : }
465 :
466 : MediaStream
467 0 : AudioInput::getInfo() const
468 : {
469 0 : std::lock_guard lk(fmtMutex_);
470 0 : return MediaStream("a:local", format_, static_cast<int64_t>(sent_samples));
471 0 : }
472 :
473 : MediaStream
474 1 : AudioInput::getInfo(const std::string& name) const
475 : {
476 1 : std::lock_guard lk(fmtMutex_);
477 1 : auto ms = MediaStream(name, format_, static_cast<int64_t>(sent_samples));
478 2 : return ms;
479 1 : }
480 :
481 : } // namespace jami
|