diff --git a/src/core/audio.cpp b/src/core/audio.cpp index f0d8594..4145847 100644 --- a/src/core/audio.cpp +++ b/src/core/audio.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #ifdef LIBROSA_HAS_SOXR #include @@ -279,6 +280,11 @@ namespace { Real rolloff; }; + struct RationalResamplePlan { + Eigen::Index source_step; + Eigen::Index phase_count; + }; + bool is_kaiser_resampler(const std::string& res_type) { return res_type == "kaiser" || res_type == "kaiser_vhq" || @@ -344,8 +350,45 @@ namespace { return modified_bessel_i0(arg) / i0_beta; } - ArrayXr kaiser_sinc_resample(const ArrayXr& y, Real ratio, Eigen::Index n_samples, - const SincResamplerSpec& spec) { + std::optional rational_resample_plan( + Real orig_sr, Real target_sr, Real ratio) { + long long orig_i = static_cast(std::llround(orig_sr)); + long long target_i = static_cast(std::llround(target_sr)); + + if (orig_i <= 0 || target_i <= 0) { + return std::nullopt; + } + + if (std::abs(orig_sr - static_cast(orig_i)) > 1e-6 || + std::abs(target_sr - static_cast(target_i)) > 1e-6) { + return std::nullopt; + } + + Real rational_ratio = + static_cast(target_i) / static_cast(orig_i); + if (std::abs(ratio - rational_ratio) > + 1e-12 * std::max(1.0, std::abs(ratio))) { + return std::nullopt; + } + + long long divisor = std::gcd(orig_i, target_i); + long long source_step = orig_i / divisor; + long long phase_count = target_i / divisor; + + // Avoid excessive setup for unusual non-reduced rates. + if (phase_count > 4096) { + return std::nullopt; + } + + return RationalResamplePlan{ + static_cast(source_step), + static_cast(phase_count) + }; + } + + ArrayXr kaiser_sinc_resample_direct(const ArrayXr& y, Real ratio, + Eigen::Index n_samples, + const SincResamplerSpec& spec) { ArrayXr y_hat(n_samples); if (n_samples == 0) { return y_hat; @@ -381,6 +424,82 @@ namespace { return y_hat; } + ArrayXr kaiser_sinc_resample_polyphase(const ArrayXr& y, Real ratio, + Eigen::Index n_samples, + const SincResamplerSpec& spec, + const RationalResamplePlan& plan) { + ArrayXr y_hat(n_samples); + if (n_samples == 0) { + return y_hat; + } + + Real cutoff = std::min(1.0, ratio) * spec.rolloff; + cutoff = std::min(1.0, std::max(cutoff, 1e-8)); + + Eigen::Index radius = static_cast( + std::ceil(static_cast(spec.zero_crossings) / cutoff)); + radius = std::max(radius, 1); + + Real i0_beta = modified_bessel_i0(spec.beta); + const Eigen::Index kernel_size = 2 * radius + 1; + + std::vector phase_center_offset( + static_cast(plan.phase_count)); + std::vector> phase_weights( + static_cast(plan.phase_count), + std::vector(static_cast(kernel_size))); + + for (Eigen::Index phase = 0; phase < plan.phase_count; ++phase) { + Real input_pos = static_cast(phase) / ratio; + Eigen::Index center = static_cast(std::floor(input_pos)); + phase_center_offset[static_cast(phase)] = center; + + for (Eigen::Index rel = -radius; rel <= radius; ++rel) { + Real distance = input_pos - static_cast(center + rel); + Real window = kaiser_window(distance, static_cast(radius), + spec.beta, i0_beta); + Real weight = cutoff * sinc(cutoff * distance) * window; + phase_weights[static_cast(phase)] + [static_cast(rel + radius)] = weight; + } + } + + for (Eigen::Index out_idx = 0; out_idx < n_samples; ++out_idx) { + Eigen::Index block = out_idx / plan.phase_count; + Eigen::Index phase = out_idx - block * plan.phase_count; + Eigen::Index center = + block * plan.source_step + + phase_center_offset[static_cast(phase)]; + + Eigen::Index first_rel = std::max(-radius, -center); + Eigen::Index last_rel = + std::min(radius, y.size() - 1 - center); + + Real sample = 0.0; + const auto& weights = phase_weights[static_cast(phase)]; + for (Eigen::Index rel = first_rel; rel <= last_rel; ++rel) { + sample += y(center + rel) * + weights[static_cast(rel + radius)]; + } + + y_hat(out_idx) = sample; + } + + return y_hat; + } + + ArrayXr kaiser_sinc_resample(const ArrayXr& y, Real orig_sr, Real target_sr, + Real ratio, Eigen::Index n_samples, + const SincResamplerSpec& spec) { + std::optional plan = + rational_resample_plan(orig_sr, target_sr, ratio); + if (plan) { + return kaiser_sinc_resample_polyphase(y, ratio, n_samples, spec, *plan); + } + + return kaiser_sinc_resample_direct(y, ratio, n_samples, spec); + } + #ifdef LIBROSA_HAS_SOXR unsigned long soxr_quality_recipe(const std::string& res_type) { if (res_type == "soxr_vhq") return SOXR_VHQ; @@ -660,7 +779,8 @@ ArrayXr resample(const ArrayXr& y, Real orig_sr, Real target_sr, ArrayXr y_hat; if (is_kaiser_resampler(res_type)) { - y_hat = kaiser_sinc_resample(y, ratio, n_samples, kaiser_resampler_spec(res_type)); + y_hat = kaiser_sinc_resample(y, orig_sr, target_sr, ratio, n_samples, + kaiser_resampler_spec(res_type)); } else if (is_soxr_resampler(res_type)) { #ifdef LIBROSA_HAS_SOXR y_hat = soxr_resample(y, orig_sr, target_sr, n_samples, res_type); diff --git a/tests/test_audio.cpp b/tests/test_audio.cpp index 86fc224..aa06c2e 100644 --- a/tests/test_audio.cpp +++ b/tests/test_audio.cpp @@ -9,6 +9,7 @@ #endif #include #include +#include #include #include #include @@ -31,6 +32,71 @@ ArrayXr random_array(Eigen::Index size) { return result; } +Real reference_sinc(Real x) { + if (std::abs(x) < 1e-12) { + return 1.0; + } + Real pix = constants::PI * x; + return std::sin(pix) / pix; +} + +Real reference_modified_bessel_i0(Real x) { + Real y = (x * x) * 0.25; + Real term = 1.0; + Real sum = 1.0; + + for (int k = 1; k < 80; ++k) { + term *= y / static_cast(k * k); + sum += term; + if (term <= sum * std::numeric_limits::epsilon()) { + break; + } + } + + return sum; +} + +ArrayXr reference_kaiser_hq_resample_direct(const ArrayXr& y, + Real orig_sr, + Real target_sr) { + Real ratio = target_sr / orig_sr; + Eigen::Index n_samples = static_cast(std::ceil(y.size() * ratio)); + ArrayXr y_hat(n_samples); + + Real cutoff = std::min(1.0, ratio) * 0.95; + cutoff = std::min(1.0, std::max(cutoff, 1e-8)); + + Eigen::Index radius = static_cast(std::ceil(64.0 / cutoff)); + radius = std::max(radius, 1); + + Real beta = 14.0; + Real i0_beta = reference_modified_bessel_i0(beta); + + for (Eigen::Index out_idx = 0; out_idx < n_samples; ++out_idx) { + Real input_pos = static_cast(out_idx) / ratio; + Eigen::Index center = static_cast(std::floor(input_pos)); + Eigen::Index start = std::max(0, center - radius); + Eigen::Index stop = std::min(y.size() - 1, center + radius); + + Real sample = 0.0; + for (Eigen::Index in_idx = start; in_idx <= stop; ++in_idx) { + Real distance = input_pos - static_cast(in_idx); + Real x = distance / static_cast(radius); + Real window = 0.0; + if (std::abs(x) <= 1.0) { + Real arg = beta * std::sqrt(std::max(0.0, 1.0 - x * x)); + window = reference_modified_bessel_i0(arg) / i0_beta; + } + Real weight = cutoff * reference_sinc(cutoff * distance) * window; + sample += y(in_idx) * weight; + } + + y_hat(out_idx) = sample; + } + + return y_hat; +} + class TempAudioFile { public: TempAudioFile(int sample_rate, int channels, int64_t frames) { @@ -258,6 +324,25 @@ TEST(ResampleTest, KaiserHqPreservesToneAmplitude) { EXPECT_GT(y_resampled.abs().maxCoeff(), 0.8); } +TEST(ResampleTest, KaiserHqRationalRateMatchesDirectKernel) { + Real orig_sr = 48000; + Real target_sr = 22050; + Eigen::Index n = 257; + + ArrayXr y(n); + for (Eigen::Index i = 0; i < n; ++i) { + Real x = static_cast(i); + y(i) = std::sin(0.17 * x) + 0.25 * std::cos(0.031 * x); + } + + ArrayXr expected = reference_kaiser_hq_resample_direct(y, orig_sr, target_sr); + ArrayXr actual = resample(y, orig_sr, target_sr, "kaiser_hq", true, false); + + ASSERT_EQ(actual.size(), expected.size()); + Real max_error = (actual - expected).abs().maxCoeff(); + EXPECT_LT(max_error, 1e-12); +} + TEST(ResampleTest, SoxrIsExplicitOptIn) { Real orig_sr = 22050; Real target_sr = 8000;