Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 123 additions & 3 deletions src/core/audio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <algorithm>
#include <cmath>
#include <limits>
#include <numeric>
#include <vector>
#ifdef LIBROSA_HAS_SOXR
#include <soxr.h>
Expand Down Expand Up @@ -279,6 +280,11 @@ namespace {
Real rolloff;
};

struct RationalResamplePlan {
Eigen::Index source_step;
Eigen::Index phase_count;
};

bool is_kaiser_resampler(const std::string& res_type) {
return res_type == "kaiser" ||
res_type == "kaiser_vhq" ||
Expand Down Expand Up @@ -344,8 +350,45 @@ namespace {
return modified_bessel_i0(arg) / i0_beta;
}

ArrayXr kaiser_sinc_resample(const ArrayXr& y, Real ratio, Eigen::Index n_samples,
const SincResamplerSpec& spec) {
std::optional<RationalResamplePlan> rational_resample_plan(
Real orig_sr, Real target_sr, Real ratio) {
long long orig_i = static_cast<long long>(std::llround(orig_sr));
long long target_i = static_cast<long long>(std::llround(target_sr));

if (orig_i <= 0 || target_i <= 0) {
return std::nullopt;
}

if (std::abs(orig_sr - static_cast<Real>(orig_i)) > 1e-6 ||
std::abs(target_sr - static_cast<Real>(target_i)) > 1e-6) {
return std::nullopt;
}

Real rational_ratio =
static_cast<Real>(target_i) / static_cast<Real>(orig_i);
if (std::abs(ratio - rational_ratio) >
1e-12 * std::max<Real>(1.0, std::abs(ratio))) {
return std::nullopt;
}

long long divisor = std::gcd(orig_i, target_i);
long long source_step = orig_i / divisor;
long long phase_count = target_i / divisor;

// Avoid excessive setup for unusual non-reduced rates.
if (phase_count > 4096) {
return std::nullopt;
}

return RationalResamplePlan{
static_cast<Eigen::Index>(source_step),
static_cast<Eigen::Index>(phase_count)
};
}

ArrayXr kaiser_sinc_resample_direct(const ArrayXr& y, Real ratio,
Eigen::Index n_samples,
const SincResamplerSpec& spec) {
ArrayXr y_hat(n_samples);
if (n_samples == 0) {
return y_hat;
Expand Down Expand Up @@ -381,6 +424,82 @@ namespace {
return y_hat;
}

ArrayXr kaiser_sinc_resample_polyphase(const ArrayXr& y, Real ratio,
Eigen::Index n_samples,
const SincResamplerSpec& spec,
const RationalResamplePlan& plan) {
ArrayXr y_hat(n_samples);
if (n_samples == 0) {
return y_hat;
}

Real cutoff = std::min<Real>(1.0, ratio) * spec.rolloff;
cutoff = std::min<Real>(1.0, std::max<Real>(cutoff, 1e-8));

Eigen::Index radius = static_cast<Eigen::Index>(
std::ceil(static_cast<Real>(spec.zero_crossings) / cutoff));
radius = std::max<Eigen::Index>(radius, 1);

Real i0_beta = modified_bessel_i0(spec.beta);
const Eigen::Index kernel_size = 2 * radius + 1;

std::vector<Eigen::Index> phase_center_offset(
static_cast<size_t>(plan.phase_count));
std::vector<std::vector<Real>> phase_weights(
static_cast<size_t>(plan.phase_count),
std::vector<Real>(static_cast<size_t>(kernel_size)));

for (Eigen::Index phase = 0; phase < plan.phase_count; ++phase) {
Real input_pos = static_cast<Real>(phase) / ratio;
Eigen::Index center = static_cast<Eigen::Index>(std::floor(input_pos));
phase_center_offset[static_cast<size_t>(phase)] = center;

for (Eigen::Index rel = -radius; rel <= radius; ++rel) {
Real distance = input_pos - static_cast<Real>(center + rel);
Real window = kaiser_window(distance, static_cast<Real>(radius),
spec.beta, i0_beta);
Real weight = cutoff * sinc(cutoff * distance) * window;
phase_weights[static_cast<size_t>(phase)]
[static_cast<size_t>(rel + radius)] = weight;
}
}

for (Eigen::Index out_idx = 0; out_idx < n_samples; ++out_idx) {
Eigen::Index block = out_idx / plan.phase_count;
Eigen::Index phase = out_idx - block * plan.phase_count;
Eigen::Index center =
block * plan.source_step +
phase_center_offset[static_cast<size_t>(phase)];

Eigen::Index first_rel = std::max<Eigen::Index>(-radius, -center);
Eigen::Index last_rel =
std::min<Eigen::Index>(radius, y.size() - 1 - center);

Real sample = 0.0;
const auto& weights = phase_weights[static_cast<size_t>(phase)];
for (Eigen::Index rel = first_rel; rel <= last_rel; ++rel) {
sample += y(center + rel) *
weights[static_cast<size_t>(rel + radius)];
}

y_hat(out_idx) = sample;
}

return y_hat;
}

ArrayXr kaiser_sinc_resample(const ArrayXr& y, Real orig_sr, Real target_sr,
Real ratio, Eigen::Index n_samples,
const SincResamplerSpec& spec) {
std::optional<RationalResamplePlan> plan =
rational_resample_plan(orig_sr, target_sr, ratio);
if (plan) {
return kaiser_sinc_resample_polyphase(y, ratio, n_samples, spec, *plan);
}

return kaiser_sinc_resample_direct(y, ratio, n_samples, spec);
}

#ifdef LIBROSA_HAS_SOXR
unsigned long soxr_quality_recipe(const std::string& res_type) {
if (res_type == "soxr_vhq") return SOXR_VHQ;
Expand Down Expand Up @@ -660,7 +779,8 @@ ArrayXr resample(const ArrayXr& y, Real orig_sr, Real target_sr,
ArrayXr y_hat;

if (is_kaiser_resampler(res_type)) {
y_hat = kaiser_sinc_resample(y, ratio, n_samples, kaiser_resampler_spec(res_type));
y_hat = kaiser_sinc_resample(y, orig_sr, target_sr, ratio, n_samples,
kaiser_resampler_spec(res_type));
} else if (is_soxr_resampler(res_type)) {
#ifdef LIBROSA_HAS_SOXR
y_hat = soxr_resample(y, orig_sr, target_sr, n_samples, res_type);
Expand Down
85 changes: 85 additions & 0 deletions tests/test_audio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#endif
#include <cmath>
#include <filesystem>
#include <limits>
#include <random>
#include <stdexcept>
#include <string>
Expand All @@ -31,6 +32,71 @@ ArrayXr random_array(Eigen::Index size) {
return result;
}

Real reference_sinc(Real x) {
if (std::abs(x) < 1e-12) {
return 1.0;
}
Real pix = constants::PI * x;
return std::sin(pix) / pix;
}

Real reference_modified_bessel_i0(Real x) {
Real y = (x * x) * 0.25;
Real term = 1.0;
Real sum = 1.0;

for (int k = 1; k < 80; ++k) {
term *= y / static_cast<Real>(k * k);
sum += term;
if (term <= sum * std::numeric_limits<Real>::epsilon()) {
break;
}
}

return sum;
}

ArrayXr reference_kaiser_hq_resample_direct(const ArrayXr& y,
Real orig_sr,
Real target_sr) {
Real ratio = target_sr / orig_sr;
Eigen::Index n_samples = static_cast<Eigen::Index>(std::ceil(y.size() * ratio));
ArrayXr y_hat(n_samples);

Real cutoff = std::min<Real>(1.0, ratio) * 0.95;
cutoff = std::min<Real>(1.0, std::max<Real>(cutoff, 1e-8));

Eigen::Index radius = static_cast<Eigen::Index>(std::ceil(64.0 / cutoff));
radius = std::max<Eigen::Index>(radius, 1);

Real beta = 14.0;
Real i0_beta = reference_modified_bessel_i0(beta);

for (Eigen::Index out_idx = 0; out_idx < n_samples; ++out_idx) {
Real input_pos = static_cast<Real>(out_idx) / ratio;
Eigen::Index center = static_cast<Eigen::Index>(std::floor(input_pos));
Eigen::Index start = std::max<Eigen::Index>(0, center - radius);
Eigen::Index stop = std::min<Eigen::Index>(y.size() - 1, center + radius);

Real sample = 0.0;
for (Eigen::Index in_idx = start; in_idx <= stop; ++in_idx) {
Real distance = input_pos - static_cast<Real>(in_idx);
Real x = distance / static_cast<Real>(radius);
Real window = 0.0;
if (std::abs(x) <= 1.0) {
Real arg = beta * std::sqrt(std::max<Real>(0.0, 1.0 - x * x));
window = reference_modified_bessel_i0(arg) / i0_beta;
}
Real weight = cutoff * reference_sinc(cutoff * distance) * window;
sample += y(in_idx) * weight;
}

y_hat(out_idx) = sample;
}

return y_hat;
}

class TempAudioFile {
public:
TempAudioFile(int sample_rate, int channels, int64_t frames) {
Expand Down Expand Up @@ -258,6 +324,25 @@ TEST(ResampleTest, KaiserHqPreservesToneAmplitude) {
EXPECT_GT(y_resampled.abs().maxCoeff(), 0.8);
}

TEST(ResampleTest, KaiserHqRationalRateMatchesDirectKernel) {
Real orig_sr = 48000;
Real target_sr = 22050;
Eigen::Index n = 257;

ArrayXr y(n);
for (Eigen::Index i = 0; i < n; ++i) {
Real x = static_cast<Real>(i);
y(i) = std::sin(0.17 * x) + 0.25 * std::cos(0.031 * x);
}

ArrayXr expected = reference_kaiser_hq_resample_direct(y, orig_sr, target_sr);
ArrayXr actual = resample(y, orig_sr, target_sr, "kaiser_hq", true, false);

ASSERT_EQ(actual.size(), expected.size());
Real max_error = (actual - expected).abs().maxCoeff();
EXPECT_LT(max_error, 1e-12);
}

TEST(ResampleTest, SoxrIsExplicitOptIn) {
Real orig_sr = 22050;
Real target_sr = 8000;
Expand Down
Loading