Line data Source code
1 : /*
2 : * Copyright (C) 2004-2026 Savoir-faire Linux Inc.
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "libav_deps.h"
19 : #include "logger.h"
20 : #include <libavutil/frame.h>
21 : #include <libavutil/mathematics.h>
22 : #include "resampler.h"
23 : #include "libav_utils.h"
24 :
25 : extern "C" {
26 : #include <libswresample/swresample.h>
27 : }
28 :
29 : namespace jami {
30 :
31 859 : Resampler::Resampler()
32 859 : : swrCtx_(swr_alloc())
33 859 : , initCount_(0)
34 859 : {}
35 :
36 859 : Resampler::~Resampler()
37 : {
38 859 : swr_free(&swrCtx_);
39 859 : }
40 :
41 : void
42 3 : Resampler::reinit(const AVFrame* in, const AVFrame* out)
43 : {
44 : // NOTE swr_set_matrix should be called on an uninitialized context
45 3 : auto swrCtx = swr_alloc();
46 3 : if (!swrCtx) {
47 0 : JAMI_ERROR("[{}] Unable to allocate resampler context", fmt::ptr(this));
48 0 : throw std::bad_alloc();
49 : }
50 :
51 3 : int ret = av_opt_set_chlayout(swrCtx, "ichl", &in->ch_layout, 0);
52 3 : if (ret < 0) {
53 0 : swr_free(&swrCtx);
54 : char layout_buf[64];
55 0 : av_channel_layout_describe(&in->ch_layout, layout_buf, sizeof(layout_buf));
56 0 : JAMI_ERROR("[{}] Failed to set input channel layout {}: {}",
57 : fmt::ptr(this),
58 : layout_buf,
59 : libav_utils::getError(ret));
60 0 : throw std::runtime_error("Failed to set input channel layout");
61 : }
62 3 : ret = av_opt_set_int(swrCtx, "isr", in->sample_rate, 0);
63 3 : if (ret < 0) {
64 0 : swr_free(&swrCtx);
65 0 : JAMI_ERROR("[{}] Failed to set input sample rate {}: {}",
66 : fmt::ptr(this),
67 : in->sample_rate,
68 : libav_utils::getError(ret));
69 0 : throw std::runtime_error("Failed to set input sample rate");
70 : }
71 3 : ret = av_opt_set_sample_fmt(swrCtx, "isf", static_cast<AVSampleFormat>(in->format), 0);
72 3 : if (ret < 0) {
73 0 : swr_free(&swrCtx);
74 0 : JAMI_ERROR("[{}] Failed to set input sample format {}: {}",
75 : fmt::ptr(this),
76 : av_get_sample_fmt_name(static_cast<AVSampleFormat>(in->format)),
77 : libav_utils::getError(ret));
78 0 : throw std::runtime_error("Failed to set input sample format");
79 : }
80 :
81 3 : ret = av_opt_set_chlayout(swrCtx, "ochl", &out->ch_layout, 0);
82 3 : if (ret < 0) {
83 0 : swr_free(&swrCtx);
84 : char layout_buf[64];
85 0 : av_channel_layout_describe(&out->ch_layout, layout_buf, sizeof(layout_buf));
86 0 : JAMI_ERROR("[{}] Failed to set output channel layout {}: {}",
87 : fmt::ptr(this),
88 : layout_buf,
89 : libav_utils::getError(ret));
90 0 : throw std::runtime_error("Failed to set output channel layout");
91 : }
92 3 : ret = av_opt_set_int(swrCtx, "osr", out->sample_rate, 0);
93 3 : if (ret < 0) {
94 0 : swr_free(&swrCtx);
95 0 : JAMI_ERROR("[{}] Failed to set output sample rate {}: {}",
96 : fmt::ptr(this),
97 : out->sample_rate,
98 : libav_utils::getError(ret));
99 0 : throw std::runtime_error("Failed to set output sample rate");
100 : }
101 3 : ret = av_opt_set_sample_fmt(swrCtx, "osf", static_cast<AVSampleFormat>(out->format), 0);
102 3 : if (ret < 0) {
103 0 : swr_free(&swrCtx);
104 0 : JAMI_ERROR("[{}] Failed to set output sample format {}: {}",
105 : fmt::ptr(this),
106 : av_get_sample_fmt_name(static_cast<AVSampleFormat>(out->format)),
107 : libav_utils::getError(ret));
108 0 : throw std::runtime_error("Failed to set output sample format");
109 : }
110 :
111 : /**
112 : * Downmixing from 5.1 requires extra setup, since libswresample is unable to do it
113 : * automatically (not yet implemented).
114 : *
115 : * Source: https://www.atsc.org/wp-content/uploads/2015/03/A52-201212-17.pdf
116 : * Section 7.8.2 for the algorithm
117 : * Tables 5.9 and 5.10 for the coefficients clev and slev
118 : *
119 : * LFE downmixing is optional, so any coefficient can be used, we use +6dB for mono and
120 : * +0dB in each channel for stereo.
121 : */
122 3 : if (in->ch_layout.u.mask == AV_CH_LAYOUT_5POINT1 || in->ch_layout.u.mask == AV_CH_LAYOUT_5POINT1_BACK) {
123 2 : int ret = 0;
124 : // NOTE: MSVC is unable to allocate dynamic size arrays on the stack
125 2 : if (out->ch_layout.nb_channels == 2) {
126 : double matrix[2][6];
127 : // L = 1.0*FL + 0.707*FC + 0.707*BL + 1.0*LFE
128 1 : matrix[0][0] = 1;
129 1 : matrix[0][1] = 0;
130 1 : matrix[0][2] = 0.707;
131 1 : matrix[0][3] = 1;
132 1 : matrix[0][4] = 0.707;
133 1 : matrix[0][5] = 0;
134 : // R = 1.0*FR + 0.707*FC + 0.707*BR + 1.0*LFE
135 1 : matrix[1][0] = 0;
136 1 : matrix[1][1] = 1;
137 1 : matrix[1][2] = 0.707;
138 1 : matrix[1][3] = 1;
139 1 : matrix[1][4] = 0;
140 1 : matrix[1][5] = 0.707;
141 1 : ret = swr_set_matrix(swrCtx, matrix[0], 6);
142 : } else {
143 : double matrix[1][6];
144 : // M = 1.0*FL + 1.414*FC + 1.0*FR + 0.707*BL + 0.707*BR + 2.0*LFE
145 1 : matrix[0][0] = 1;
146 1 : matrix[0][1] = 1;
147 1 : matrix[0][2] = 1.414;
148 1 : matrix[0][3] = 2;
149 1 : matrix[0][4] = 0.707;
150 1 : matrix[0][5] = 0.707;
151 1 : ret = swr_set_matrix(swrCtx, matrix[0], 6);
152 : }
153 2 : if (ret < 0) {
154 0 : swr_free(&swrCtx);
155 0 : JAMI_ERROR("[{}] Failed to set mixing matrix: {}", fmt::ptr(this), libav_utils::getError(ret));
156 0 : throw std::runtime_error("Failed to set mixing matrix");
157 : }
158 : }
159 :
160 3 : ret = swr_init(swrCtx);
161 3 : if (ret >= 0) {
162 3 : std::swap(swrCtx_, swrCtx);
163 3 : swr_free(&swrCtx);
164 12 : JAMI_DEBUG("[{}] Succesfully (re)initialized resampler context from {} to {}",
165 : fmt::ptr(this),
166 : libav_utils::getFormat(in).toString(),
167 : libav_utils::getFormat(out).toString());
168 3 : ++initCount_;
169 : } else {
170 0 : swr_free(&swrCtx);
171 0 : JAMI_ERROR("[{}] Runtime error: Failed to initialize resampler context: {}",
172 : fmt::ptr(this),
173 : libav_utils::getError(ret));
174 0 : throw std::runtime_error("Failed to initialize resampler context");
175 : }
176 3 : }
177 :
178 : int
179 4 : Resampler::resample(const AVFrame* input, AVFrame* output)
180 : {
181 4 : bool firstFrame = (initCount_ == 0);
182 4 : if (!initCount_)
183 2 : reinit(input, output);
184 :
185 4 : int ret = swr_convert_frame(swrCtx_, output, input);
186 4 : if (ret & AVERROR_INPUT_CHANGED || ret & AVERROR_OUTPUT_CHANGED) {
187 : // Under certain conditions, the resampler reinits itself in an infinite loop. This is
188 : // indicative of an underlying problem in the code. This check is so the backtrace
189 : // doesn't get mangled with a bunch of calls to Resampler::resample
190 1 : if (initCount_ > 1) {
191 : // JAMI_ERROR("Infinite loop detected in audio resampler, please open an issue on https://git.jami.net");
192 0 : JAMI_ERROR("[{}] Loop detected in audio resampler when resampling from {} to {}",
193 : fmt::ptr(this),
194 : libav_utils::getFormat(input).toString(),
195 : libav_utils::getFormat(output).toString());
196 0 : throw std::runtime_error("Infinite loop detected in audio resampler");
197 : }
198 1 : reinit(input, output);
199 1 : return resample(input, output);
200 : }
201 :
202 3 : if (ret < 0) {
203 0 : JAMI_ERROR("[{}] Failed to resample frame: {}", fmt::ptr(this), libav_utils::getError(ret));
204 0 : return -1;
205 : }
206 :
207 3 : if (firstFrame) {
208 : // we just resampled the first frame
209 4 : auto targetOutputLength = av_rescale_rnd(input->nb_samples,
210 2 : output->sample_rate,
211 2 : input->sample_rate,
212 : AV_ROUND_UP);
213 2 : if (output->nb_samples < targetOutputLength) {
214 : // create new frame with more samples, padded with silence
215 8 : JAMI_WARNING("[{}] Adding {} samples of silence at beginning of first frame to reach {} samples",
216 : fmt::ptr(this),
217 : targetOutputLength - output->nb_samples,
218 : targetOutputLength);
219 2 : auto* newOutput = av_frame_alloc();
220 2 : if (!newOutput) {
221 0 : JAMI_ERROR("[{}] Failed to clone output frame for resizing", fmt::ptr(this));
222 0 : return -1;
223 : }
224 2 : av_frame_copy_props(newOutput, output);
225 2 : newOutput->format = output->format;
226 2 : newOutput->nb_samples = static_cast<int>(targetOutputLength);
227 2 : newOutput->ch_layout = output->ch_layout;
228 2 : newOutput->channel_layout = output->channel_layout;
229 2 : newOutput->sample_rate = output->sample_rate;
230 2 : int bufferRet = av_frame_get_buffer(newOutput, 0);
231 2 : if (bufferRet < 0) {
232 0 : JAMI_ERROR("[{}] Failed to allocate new output frame buffer: {}",
233 : fmt::ptr(this),
234 : libav_utils::getError(bufferRet));
235 0 : av_frame_free(&newOutput);
236 0 : return -1;
237 : }
238 2 : auto sampleOffset = targetOutputLength - output->nb_samples;
239 4 : bufferRet = av_samples_set_silence(newOutput->data,
240 : 0,
241 : static_cast<int>(sampleOffset),
242 : output->ch_layout.nb_channels,
243 2 : static_cast<AVSampleFormat>(output->format));
244 2 : if (bufferRet < 0) {
245 0 : JAMI_ERROR("[{}] Failed to set silence on new output frame: {}",
246 : fmt::ptr(this),
247 : libav_utils::getError(bufferRet));
248 0 : av_frame_free(&newOutput);
249 0 : return -1;
250 : }
251 : // copy old data to new frame at offset sampleOffset
252 4 : bufferRet = av_samples_copy(newOutput->data,
253 2 : output->data,
254 : static_cast<int>(sampleOffset),
255 : 0,
256 : output->nb_samples,
257 : output->ch_layout.nb_channels,
258 2 : static_cast<AVSampleFormat>(output->format));
259 2 : if (bufferRet < 0) {
260 0 : JAMI_ERROR("[{}] Failed to copy data to new output frame: {}",
261 : fmt::ptr(this),
262 : libav_utils::getError(bufferRet));
263 0 : av_frame_free(&newOutput);
264 0 : return -1;
265 : }
266 8 : JAMI_DEBUG("[{}] Resampled first frame. Resized from {} to {} samples",
267 : fmt::ptr(this),
268 : output->nb_samples,
269 : newOutput->nb_samples);
270 : // replace output frame buffer
271 2 : av_frame_unref(output);
272 2 : av_frame_move_ref(output, newOutput);
273 2 : av_frame_free(&newOutput);
274 : }
275 : }
276 :
277 : // Resampling worked, reset count to 1 so reinit isn't called again
278 3 : initCount_ = 1;
279 3 : return 0;
280 : }
281 :
282 : std::unique_ptr<AudioFrame>
283 0 : Resampler::resample(std::unique_ptr<AudioFrame>&& in, const AudioFormat& format)
284 : {
285 0 : if (in->pointer()->sample_rate == (int) format.sample_rate
286 0 : && in->pointer()->ch_layout.nb_channels == (int) format.nb_channels
287 0 : && (AVSampleFormat) in->pointer()->format == format.sampleFormat) {
288 0 : return std::move(in);
289 : }
290 0 : auto output = std::make_unique<AudioFrame>(format);
291 0 : resample(in->pointer(), output->pointer());
292 0 : output->has_voice = in->has_voice;
293 0 : return output;
294 0 : }
295 :
296 : std::shared_ptr<AudioFrame>
297 0 : Resampler::resample(std::shared_ptr<AudioFrame>&& in, const AudioFormat& format)
298 : {
299 0 : if (not in) {
300 0 : return {};
301 : }
302 0 : auto inPtr = in->pointer();
303 0 : if (inPtr == nullptr) {
304 0 : return {};
305 : }
306 :
307 0 : if (inPtr->sample_rate == (int) format.sample_rate && inPtr->ch_layout.nb_channels == (int) format.nb_channels
308 0 : && (AVSampleFormat) inPtr->format == format.sampleFormat) {
309 0 : return std::move(in);
310 : }
311 :
312 0 : auto output = std::make_shared<AudioFrame>(format);
313 0 : if (auto outPtr = output->pointer()) {
314 0 : resample(inPtr, outPtr);
315 0 : output->has_voice = in->has_voice;
316 0 : return output;
317 : }
318 0 : return {};
319 0 : }
320 :
321 : } // namespace jami
|