From 92abdb1a25d9f576e08920a1dea142c8f5cbdd95 Mon Sep 17 00:00:00 2001 From: Enrique Saurez Date: Fri, 12 Jun 2026 17:06:13 -0700 Subject: [PATCH] [nanvix] E: Build bz2/lzma/zlib/sqlite3 as DT_NEEDED .so chains Completes the external-library unbundling started in the parent PR by moving the remaining four stdlib extensions (_bz2, _lzma, zlib, _sqlite3) from being statically linked into python.elf to runtime- loaded .so files that emit DT_NEEDED for libbz2.so / liblzma.so / libz.so / libsqlite3.so. After this PR, python.elf bundles no external Nanvix-ported libraries at all -- every .so wrapper (_ssl, _hashlib, _ctypes, _sqlite3, zlib, _bz2, _lzma) consumes its underlying library through DT_NEEDED. This matches upstream cpython's default behavior on every Linux distro (system-library detection enabled). Changes ------- - .nanvix/setup_local.py: add 4 *shared* entries (_bz2, _lzma, zlib, _sqlite3). The corresponding LIBSQLITE3_LIBS / ZLIB_LIBS / BZIP2_LIBS / LIBLZMA_LIBS already use -L/-l form (no change needed there); cpython's normal MODULE_*_LDFLAGS machinery wires them into each .so's link line as DT_NEEDED. - Makefile.nanvix: drop -lsqlite3 -lz -lbz2 -llzma from python.elf's LIBS (those symbols come from the standalone .so files now). LIBS segment 2 becomes empty. - .nanvix/runtime_sos.py: REQUIRED_RUNTIME_SOS gains libbz2.so / liblzma.so / libz.so / libsqlite3.so so .nanvix/test.py and .nanvix/package.py stage them into sysroot/lib/ at run time. - .nanvix/z.py: _DEP_EXPECTED_LIBS for bzip2 / zlib / sqlite / xz extended with the .so siblings so `./z setup` validates that the new shared-library port builds installed correctly into buildroot/lib. - .nanvix/test.py: extend CPYTHON_TEST_EXTERNAL_DEPS smoke test with the 4 new modules. Runtime dependencies -------------------- - nanvix/nanvix#2472 -- libm visibility fix (inherited from PR-A). - nanvix/nanvix#2473 -- dlfcn init-array + DT_RUNPATH support (inherited from PR-B). The 4 new .so files this PR consumes come from the Wave 6 port-repo PRs that must land first: - esaurez/bzip2#2 -- libbz2.so build target - esaurez/zlib#1 -- libz.so build target - esaurez/xz#1 -- liblzma.so build target - esaurez/sqlite#1 -- libsqlite3.so build target These four port-lib releases must be pinned in cpython's nanvix.toml before this PR is mergeable upstream. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .nanvix/runtime_sos.py | 8 ++++++++ .nanvix/setup_local.py | 42 ++++++++++++++++++++++++++++-------------- .nanvix/test.py | 5 +++++ .nanvix/z.py | 8 ++++---- Makefile.nanvix | 25 +++++++++++++------------ 5 files changed, 58 insertions(+), 30 deletions(-) diff --git a/.nanvix/runtime_sos.py b/.nanvix/runtime_sos.py index a45034fdb0b462..f35975dce2eb4b 100644 --- a/.nanvix/runtime_sos.py +++ b/.nanvix/runtime_sos.py @@ -31,10 +31,18 @@ # _ssl.cpython-312.so -> libssl.so + libcrypto.so # libssl.so -> libcrypto.so # _hashlib.cpython-312.so -> libcrypto.so +# _bz2.cpython-312.so -> libbz2.so +# _lzma.cpython-312.so -> liblzma.so +# zlib.cpython-312.so -> libz.so +# _sqlite3.cpython-312.so -> libsqlite3.so REQUIRED_RUNTIME_SOS: tuple[str, ...] = ( "libffi.so", "libcrypto.so", "libssl.so", + "libbz2.so", + "liblzma.so", + "libz.so", + "libsqlite3.so", ) diff --git a/.nanvix/setup_local.py b/.nanvix/setup_local.py index fea1256c2a9c3a..3fdcd701ce1a8b 100644 --- a/.nanvix/setup_local.py +++ b/.nanvix/setup_local.py @@ -277,25 +277,21 @@ class SetupEntry(NamedTuple): SetupEntry(name="termios", linkage=Linkage.SHARED, tokens=("termios.c",)), # ---------------- Modules with external Nanvix-ported deps --------- # - # libffi, libssl, libcrypto each ship as a .so under $(SYSROOT)/lib/ - # and the consuming extension .so (_ctypes, _ssl, _hashlib) - # references it via DT_NEEDED. The loader resolves them at dlopen - # time and binds UND symbols against python.elf .dynsym. - # - # _bz2 / _lzma / zlib / _sqlite3 are intentionally NOT moved here: - # the Nanvix port repos for libbz2 / liblzma / libz / libsqlite3 do - # not yet ship .so builds, so those four extensions stay statically - # built into python.elf (cpython upstream default) until the - # follow-up PR that lands alongside the Wave 6 port-repo .so PRs. + # libffi / libssl / libcrypto / libbz2 / liblzma / libz / libsqlite3 + # each ship as a .so under $(SYSROOT)/lib/ and the consuming + # extension .so emits DT_NEEDED for it; the Nanvix dynamic loader + # walks the chain at dlopen time and binds UND symbols against + # python.elf .dynsym. This matches the upstream cpython behavior + # when configure is invoked with system-library detection enabled + # (the default on every Linux distro). SetupEntry( name="_ssl", linkage=Linkage.SHARED, tokens=("_ssl.c",), section_header=( - "Stdlib modules with external Nanvix-ported deps that are " - "already shipped as .so by their respective port repos. " - "Each .so emits DT_NEEDED for the corresponding sysroot " - "library; the loader walks the chain at dlopen time." + "Stdlib modules with external Nanvix-ported deps. Each .so " + "emits DT_NEEDED for the corresponding sysroot library; the " + "loader walks the chain at dlopen time." ), ), SetupEntry(name="_hashlib", linkage=Linkage.SHARED, tokens=("_hashopenssl.c",)), @@ -310,6 +306,24 @@ class SetupEntry(NamedTuple): "_ctypes/cfield.c", ), ), + SetupEntry(name="_bz2", linkage=Linkage.SHARED, tokens=("_bz2module.c",)), + SetupEntry(name="_lzma", linkage=Linkage.SHARED, tokens=("_lzmamodule.c",)), + SetupEntry(name="zlib", linkage=Linkage.SHARED, tokens=("zlibmodule.c",)), + SetupEntry( + name="_sqlite3", + linkage=Linkage.SHARED, + tokens=( + "_sqlite/blob.c", + "_sqlite/connection.c", + "_sqlite/cursor.c", + "_sqlite/microprotocols.c", + "_sqlite/module.c", + "_sqlite/prepare_protocol.c", + "_sqlite/row.c", + "_sqlite/statement.c", + "_sqlite/util.c", + ), + ), ) diff --git a/.nanvix/test.py b/.nanvix/test.py index 97ee95215a1b61..0f3a82ecb5e09e 100644 --- a/.nanvix/test.py +++ b/.nanvix/test.py @@ -114,6 +114,11 @@ ("_ssl", "hasattr(m, 'RAND_bytes')"), ("_hashlib", "hasattr(m, 'openssl_sha256') or hasattr(m, 'new')"), ("_ctypes", "hasattr(m, 'dlopen')"), + # libbz2 / liblzma / libz / libsqlite3: same DT_NEEDED model. + ("_bz2", "m.BZ2Compressor().compress(b'hello') is not None"), + ("_lzma", "hasattr(m, 'LZMACompressor')"), + ("zlib", "m.crc32(b'hello') == 0x3610a686"), + ("_sqlite3", "hasattr(m, 'connect')"), ), ), ) diff --git a/.nanvix/z.py b/.nanvix/z.py index 40f88d6b5d0e96..04d9a6f2c125f8 100644 --- a/.nanvix/z.py +++ b/.nanvix/z.py @@ -79,15 +79,15 @@ # Map dependency names to the library files they install into buildroot/lib. _DEP_EXPECTED_LIBS: dict[str, list[str]] = { - "bzip2": ["libbz2.a"], + "bzip2": ["libbz2.a", "libbz2.so"], "libffi": ["libffi.a", "libffi.so"], - "zlib": ["libz.a"], - "sqlite": ["libsqlite3.a"], + "zlib": ["libz.a", "libz.so"], + "sqlite": ["libsqlite3.a", "libsqlite3.so"], "openssl": ["libssl.a", "libcrypto.a", "libssl.so", "libcrypto.so"], "libxml2": ["libxml2.a"], "libxslt": ["libxslt.a", "libexslt.a"], "lxml": ["liblxml_etree.a", "liblxml_elementpath.a"], - "xz": ["liblzma.a"], + "xz": ["liblzma.a", "liblzma.so"], } diff --git a/Makefile.nanvix b/Makefile.nanvix index b070dd8bea5811..b99de253cd1111 100644 --- a/Makefile.nanvix +++ b/Makefile.nanvix @@ -185,17 +185,18 @@ endif # which libnvx_crt0 is the sole provider. Listing it first today keeps # python.elf using a consistent `_start` source across both states. # -# `LIBS` segment 2 (`--start-group ... --end-group`): the external -# Nanvix-ported libraries (`-lsqlite3 -lz -lbz2 -llzma`) whose .a files -# stay statically linked into python.elf. The corresponding extensions -# (_sqlite3, zlib, _bz2, _lzma) remain *static* in Setup.local for now -# because the Nanvix port repos for libbz2 / libz / liblzma / -# libsqlite3 do not yet ship .so builds. The follow-up cpython PR that -# lands alongside the Wave 6 port-repo .so PRs migrates these four -# extensions to *shared* with DT_NEEDED references to lib*.so. -lssl -# / -lcrypto / -lffi are NOT in this list -- they ship as separate .so -# files under sysroot/lib/ and are referenced via DT_NEEDED by their -# consumer .so modules (_ssl, _hashlib, _ctypes). +# `LIBS` segment 2 (empty after the Wave 7 unbundling): with libffi / +# libssl / libcrypto / libsqlite3 / libz / libbz2 / liblzma all +# shipped as .so under sysroot/lib/, python.elf no longer bundles any +# external Nanvix-ported libraries. The per-module .so wrappers +# (_ssl, _hashlib, _ctypes, _sqlite3, zlib, _bz2, _lzma) reference +# the corresponding system .so via DT_NEEDED; the Nanvix dynamic +# loader walks the chain at dlopen time and binds UND symbols against +# python.elf .dynsym. The per-module *_LIBS environment variables +# (LIBSQLITE3_LIBS, ZLIB_LIBS, BZIP2_LIBS, LIBLZMA_LIBS, LIBFFI_LIBS) +# in -L/-l form make ld emit the DT_NEEDED entry rather than +# embedding the .a -- exactly the upstream cpython behavior for +# system-library builds (Linux distro default). CONFIGURE_ENV = \ CC="$(TOOLCHAIN_PREFIX)/bin/i686-nanvix-gcc" \ CXX="$(TOOLCHAIN_PREFIX)/bin/i686-nanvix-g++" \ @@ -205,7 +206,7 @@ CONFIGURE_ENV = \ CFLAGS="-O3 -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -I$(SYSROOT_PATH)/include" \ CFLAGS_NODIST="-fno-semantic-interposition" \ LDFLAGS="-L$(SYSROOT_PATH)/lib -T$(SYSROOT_PATH)/lib/user.ld -Wl,--allow-multiple-definition -no-pie -Wl,--export-dynamic -Wl,--no-dynamic-linker" \ - LIBS="-Wl,--whole-archive $(LIBNVX_CRT0) $(LIBPOSIX) $(LIBC) $(LIBM) $(LIBSTDCXX) $(LIBGCC) -Wl,--no-whole-archive -Wl,--start-group -lsqlite3 -lz -lbz2 -llzma -Wl,--end-group" \ + LIBS="-Wl,--whole-archive $(LIBNVX_CRT0) $(LIBPOSIX) $(LIBC) $(LIBM) $(LIBSTDCXX) $(LIBGCC) -Wl,--no-whole-archive" \ LIBSQLITE3_LIBS="-L$(SYSROOT_PATH)/lib -lsqlite3" \ LIBSQLITE3_CFLAGS="-I$(SYSROOT_PATH)/include" \ ZLIB_LIBS="-L$(SYSROOT_PATH)/lib -lz" \