From 33d506084ed46e083df10d016efaa7579464f2ba Mon Sep 17 00:00:00 2001 From: Oli Larkin Date: Sat, 30 May 2026 08:02:23 +0200 Subject: [PATCH] Add optional SOXR resampling support --- CMakeLists.txt | 18 ++++++++ NOTICE.md | 12 ++--- README.md | 11 +++++ include/librosa/core/audio.hpp | 3 +- include/librosa/effects.hpp | 3 +- requirements-dev.txt | 1 + src/core/audio.cpp | 65 +++++++++++++++++++++++++++ tests/crossval/README.md | 2 + tests/crossval/generate_references.py | 27 +++++++++++ tests/crossval/test_crossval.cpp | 30 +++++++++++++ tests/test_audio.cpp | 16 +++++++ 11 files changed, 181 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b56e5dc..b063fa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ option(LIBROSA_BUILD_CROSSVAL_TESTS "Build cross-validation tests against Python option(LIBROSA_BUILD_CLI "Build the librosa CLI tool" OFF) option(LIBROSA_BUILD_WASM "Build the Emscripten WASM/npm binding" OFF) option(LIBROSA_BUILD_SWIFT_C_WRAPPER "Build the Swift-friendly C ABI wrapper" OFF) +option(LIBROSA_USE_SOXR "Use libsoxr for explicit SOXR resampling modes" OFF) if(APPLE) option(LIBROSA_USE_AUDIOTOOLBOX "Use Apple AudioToolbox for audio file I/O" ON) else() @@ -33,6 +34,10 @@ if(LIBROSA_BUILD_WASM AND NOT EMSCRIPTEN) message(FATAL_ERROR "LIBROSA_BUILD_WASM requires configuring with emcmake/Emscripten") endif() +if(LIBROSA_BUILD_WASM AND LIBROSA_USE_SOXR) + message(FATAL_ERROR "LIBROSA_USE_SOXR is not supported for Emscripten/WASM builds") +endif() + if(LIBROSA_USE_AUDIOTOOLBOX AND NOT APPLE) message(FATAL_ERROR "LIBROSA_USE_AUDIOTOOLBOX is only available on Apple platforms") endif() @@ -54,6 +59,12 @@ else() endif() endif() +if(LIBROSA_USE_SOXR) + find_package(PkgConfig REQUIRED) + pkg_check_modules(SOXR REQUIRED soxr) + message(STATUS "librosa SOXR resampler: enabled (${SOXR_VERSION})") +endif() + # Resolve FFT backend selection. if(LIBROSA_FFT_BACKEND STREQUAL "auto") if(APPLE) @@ -169,6 +180,13 @@ elseif(LIBROSA_USE_AUDIOTOOLBOX) target_link_libraries(librosa PUBLIC "-framework AudioToolbox" "-framework CoreFoundation") endif() +if(LIBROSA_USE_SOXR) + target_compile_definitions(librosa PUBLIC LIBROSA_HAS_SOXR) + target_include_directories(librosa PRIVATE ${SOXR_INCLUDE_DIRS}) + target_link_directories(librosa PUBLIC ${SOXR_LIBRARY_DIRS}) + target_link_libraries(librosa PUBLIC ${SOXR_LIBRARIES}) +endif() + if(_librosa_fft_backend STREQUAL "fftw") target_include_directories(librosa PRIVATE ${FFTW3_INCLUDE_DIRS}) target_link_directories(librosa PUBLIC ${FFTW3_LIBRARY_DIRS}) diff --git a/NOTICE.md b/NOTICE.md index db81201..034532c 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -37,13 +37,15 @@ These are *not* bundled — they're expected to come from the user's system | [FFTW3](https://www.fftw.org/) | when `LIBROSA_FFT_BACKEND=fftw` (default on Linux/Windows) | GPL-2.0-or-later | If distributing binaries, consider using the `accelerate` or `pffft` backend, or obtain a non-GPL FFTW commercial license. | | Apple Accelerate framework | when `LIBROSA_FFT_BACKEND=accelerate` (default on Apple) | Apple SDK terms | System framework, no extra install. | | Apple AudioToolbox framework | when `LIBROSA_USE_AUDIOTOOLBOX=ON` or when building the Swift package | Apple SDK terms | System audio file I/O framework, no extra install. | +| [libsoxr](https://sourceforge.net/projects/soxr/) | when `LIBROSA_USE_SOXR=ON` | LGPL-2.1-or-later | Optional SOXR resampler for Python-librosa parity checks. Off by default. | -### LGPL notes (libsndfile) +### LGPL notes (libsndfile, libsoxr) -libsndfile is LGPL-2.1. Apple SwiftPM builds do not link it. For non-Apple -CMake builds that do link it, librosa.cpp itself remains ISC, but a binary that -links against an LGPL library inherits LGPL obligations for the combined work. -In practice this means: +libsndfile is LGPL-2.1 and libsoxr is LGPL-2.1-or-later. Apple SwiftPM builds +do not link either one by default. For CMake builds that do link an LGPL +library, librosa.cpp itself remains ISC, but a binary that links against an +LGPL library inherits LGPL obligations for the combined work. In practice this +means: - Dynamic linking (the default on all platforms when using system packages) satisfies the LGPL naturally — end users can swap the `.so` / `.dylib`. diff --git a/README.md b/README.md index 58eae62..390061b 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,9 @@ Build-time: - An internal Kaiser-windowed sinc resampler provides the `kaiser_*` modes used by the CQT/default resample path, including `kaiser_hq`. No libsoxr install or LGPL resampler link is required. +- Optional SOXR resampling modes (`soxr_vhq`, `soxr_hq`, `soxr_mq`, + `soxr_lq`, `soxr_qq`) can be enabled with `-DLIBROSA_USE_SOXR=ON` for + Python-librosa parity checks. This is off by default because libsoxr is LGPL. Bundled (no action needed): @@ -64,6 +67,14 @@ Accelerate backend): brew install cmake ninja ``` +To enable optional SOXR resampling modes in CMake builds, install libsoxr and +configure with `-DLIBROSA_USE_SOXR=ON`: + +```bash +brew install libsoxr +cmake -S . -B build-soxr -DLIBROSA_USE_SOXR=ON +``` + ## Build ```bash diff --git a/include/librosa/core/audio.hpp b/include/librosa/core/audio.hpp index 216b60b..2def899 100644 --- a/include/librosa/core/audio.hpp +++ b/include/librosa/core/audio.hpp @@ -75,7 +75,8 @@ ArrayXr to_mono(const ArrayXr& y); // Pass-through for already mono /// @param y Input signal /// @param orig_sr Original sample rate /// @param target_sr Target sample rate -/// @param res_type Resampling method ("kaiser_*", "fft", or "linear") +/// @param res_type Resampling method ("kaiser_*", "fft", "linear", or +/// "soxr_*" when built with LIBROSA_USE_SOXR) /// @param fix Adjust length to match expected /// @param scale Scale for energy preservation /// @return Resampled signal diff --git a/include/librosa/effects.hpp b/include/librosa/effects.hpp index dfb4baf..40ec6af 100644 --- a/include/librosa/effects.hpp +++ b/include/librosa/effects.hpp @@ -39,7 +39,8 @@ ArrayXr time_stretch( /// @param sr Sample rate /// @param n_steps Number of steps to shift (can be fractional) /// @param bins_per_octave Number of steps per octave -/// @param res_type Resampling method ("kaiser_*", "fft", or "linear") +/// @param res_type Resampling method ("kaiser_*", "fft", "linear", or +/// "soxr_*" when built with LIBROSA_USE_SOXR) /// @param n_fft FFT window size /// @param hop_length Samples between frames /// @return Pitch-shifted audio diff --git a/requirements-dev.txt b/requirements-dev.txt index a9b6a26..7db1a15 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,3 +5,4 @@ # Pin to the upstream version librosa.cpp is ported from. librosa==0.11.0 numpy +soxr diff --git a/src/core/audio.cpp b/src/core/audio.cpp index 106721d..f0d8594 100644 --- a/src/core/audio.cpp +++ b/src/core/audio.cpp @@ -13,6 +13,9 @@ #include #include #include +#ifdef LIBROSA_HAS_SOXR +#include +#endif #include "../internal/fft.hpp" namespace librosa { @@ -285,6 +288,10 @@ namespace { res_type == "kaiser_fast"; } + bool is_soxr_resampler(const std::string& res_type) { + return res_type.rfind("soxr", 0) == 0; + } + SincResamplerSpec kaiser_resampler_spec(const std::string& res_type) { if (res_type == "kaiser_vhq") { return {96, 16.0, 0.975}; @@ -373,6 +380,57 @@ namespace { return y_hat; } + +#ifdef LIBROSA_HAS_SOXR + unsigned long soxr_quality_recipe(const std::string& res_type) { + if (res_type == "soxr_vhq") return SOXR_VHQ; + if (res_type == "soxr_hq") return SOXR_HQ; + if (res_type == "soxr_mq") return SOXR_MQ; + if (res_type == "soxr_lq") return SOXR_LQ; + if (res_type == "soxr_qq") return SOXR_QQ; + throw ParameterError("Unknown SOXR resampling type: " + res_type); + } + + ArrayXr soxr_resample(const ArrayXr& y, Real orig_sr, Real target_sr, + Eigen::Index n_samples, const std::string& res_type) { + ArrayXr y_hat(n_samples); + if (n_samples == 0) { + return y_hat; + } + + soxr_io_spec_t io_spec = soxr_io_spec(SOXR_FLOAT64_I, SOXR_FLOAT64_I); + soxr_quality_spec_t quality_spec = + soxr_quality_spec(soxr_quality_recipe(res_type), 0); + soxr_runtime_spec_t runtime_spec = soxr_runtime_spec(1); + + size_t idone = 0; + size_t odone = 0; + soxr_error_t err = soxr_oneshot( + static_cast(orig_sr), + static_cast(target_sr), + 1, + y.data(), + static_cast(y.size()), + &idone, + y_hat.data(), + static_cast(n_samples), + &odone, + &io_spec, + &quality_spec, + &runtime_spec); + + if (err) { + throw ParameterError(std::string("SOXR resampling failed: ") + + soxr_strerror(err)); + } + + if (odone != static_cast(y_hat.size())) { + y_hat.conservativeResize(static_cast(odone)); + } + + return y_hat; + } +#endif } // ============================================================================ @@ -603,6 +661,13 @@ ArrayXr resample(const ArrayXr& y, Real orig_sr, Real target_sr, if (is_kaiser_resampler(res_type)) { y_hat = kaiser_sinc_resample(y, ratio, n_samples, kaiser_resampler_spec(res_type)); + } else if (is_soxr_resampler(res_type)) { +#ifdef LIBROSA_HAS_SOXR + y_hat = soxr_resample(y, orig_sr, target_sr, n_samples, res_type); +#else + throw ParameterError( + "SOXR resampling requires configuring with -DLIBROSA_USE_SOXR=ON"); +#endif } else if (res_type == "fft" || res_type == "scipy") { int n_fft = y.size(); int n_out = n_samples; diff --git a/tests/crossval/README.md b/tests/crossval/README.md index 0c5f42d..905e5cb 100644 --- a/tests/crossval/README.md +++ b/tests/crossval/README.md @@ -68,11 +68,13 @@ Different modules may require different tolerance levels due to: Current tolerances: - `DEFAULT_TOLERANCE = 1e-5`: For exact mathematical operations +- `1e-6`: For optional SOXR resampling parity when libsoxr is enabled - `LOOSE_TOLERANCE = 1e-3`: For filter banks, spectral features ## Modules Covered - [x] Convert (hz_to_mel, mel_to_hz, hz_to_midi, amplitude_to_db, power_to_db) +- [x] Audio (SOXR resampling modes when built with `LIBROSA_USE_SOXR=ON`) - [x] Filters (mel filterbank, chroma filterbank) - [x] Spectrum (STFT magnitude/phase) - [x] Features (melspectrogram, MFCC, chroma, spectral features, RMS, ZCR) diff --git a/tests/crossval/generate_references.py b/tests/crossval/generate_references.py index 71835f0..25a7ebf 100644 --- a/tests/crossval/generate_references.py +++ b/tests/crossval/generate_references.py @@ -96,6 +96,32 @@ def generate_spectrum_references(): {"n_fft": n_fft, "hop_length": hop_length}) +def generate_resample_references(): + """Generate references for SOXR resampling.""" + print("\n=== Resample Module ===") + + sr = 22050 + target_sr = 8000 + duration = 0.25 + t = np.arange(int(sr * duration), dtype=np.float64) / sr + y = ( + 0.5 * np.sin(2 * np.pi * 220.0 * t) + + 0.25 * np.sin(2 * np.pi * 997.0 * t) + + 0.1 * np.sin(2 * np.pi * 3200.0 * t) + ) + y[::997] += 0.05 + y = y.astype(np.float64) + + save_array("resample_test_signal", y, {"sr": sr, "target_sr": target_sr}) + + for res_type in ["soxr_vhq", "soxr_hq", "soxr_mq", "soxr_lq", "soxr_qq"]: + y_hat = librosa.resample( + y, orig_sr=sr, target_sr=target_sr, res_type=res_type + ) + save_array(f"resample_{res_type}", y_hat, + {"sr": sr, "target_sr": target_sr, "res_type": res_type}) + + def generate_filters_references(): """Generate references for filters module.""" print("\n=== Filters Module ===") @@ -717,6 +743,7 @@ def main(): generate_convert_references() generate_spectrum_references() + generate_resample_references() generate_filters_references() generate_feature_references() generate_onset_references() diff --git a/tests/crossval/test_crossval.cpp b/tests/crossval/test_crossval.cpp index 51d7c58..bacb932 100644 --- a/tests/crossval/test_crossval.cpp +++ b/tests/crossval/test_crossval.cpp @@ -15,6 +15,7 @@ // Include all librosa headers #include +#include #include #include #include @@ -490,6 +491,35 @@ TEST_F(CrossValidationTest, ChromaFilterbank) { expectArrayNear(chroma_fb, expected, LOOSE_TOLERANCE, "chroma filterbank"); } +// ============================================================================ +// Audio Resampling Tests +// ============================================================================ + +TEST_F(CrossValidationTest, ResampleSoxrModes) { + if (dataDir.empty()) GTEST_SKIP() << "Reference data not found"; + +#ifndef LIBROSA_HAS_SOXR + GTEST_SKIP() << "libsoxr support not enabled"; +#else + json_util::ArrayData signal_ref; + if (!loadArray("resample_test_signal", signal_ref)) GTEST_SKIP(); + + ArrayXr y = signal_ref.toArrayXr(); + const std::vector modes = { + "soxr_vhq", "soxr_hq", "soxr_mq", "soxr_lq", "soxr_qq" + }; + + for (const auto& mode : modes) { + json_util::ArrayData expected_ref; + ASSERT_TRUE(loadArray("resample_" + mode, expected_ref)); + + ArrayXr actual = resample(y, 22050, 8000, mode, true, false); + ArrayXr expected = expected_ref.toArrayXr(); + expectArrayNear(actual, expected, 1e-6, "resample " + mode); + } +#endif +} + // ============================================================================ // Spectrum Module Tests // ============================================================================ diff --git a/tests/test_audio.cpp b/tests/test_audio.cpp index e7e3019..86fc224 100644 --- a/tests/test_audio.cpp +++ b/tests/test_audio.cpp @@ -258,6 +258,22 @@ TEST(ResampleTest, KaiserHqPreservesToneAmplitude) { EXPECT_GT(y_resampled.abs().maxCoeff(), 0.8); } +TEST(ResampleTest, SoxrIsExplicitOptIn) { + Real orig_sr = 22050; + Real target_sr = 8000; + ArrayXr y = random_array(2048); + +#ifdef LIBROSA_HAS_SOXR + ArrayXr y_resampled = resample(y, orig_sr, target_sr, "soxr_hq", true, false); + Eigen::Index expected_length = static_cast( + std::ceil(static_cast(y.size()) * target_sr / orig_sr)); + EXPECT_EQ(y_resampled.size(), expected_length); + EXPECT_GT(y_resampled.abs().maxCoeff(), 0.0); +#else + EXPECT_THROW(resample(y, orig_sr, target_sr, "soxr_hq", true, false), ParameterError); +#endif +} + TEST(ResampleTest, InvalidSampleRatesThrow) { ArrayXr y = ArrayXr::Ones(32);