diff --git a/.nanvix/config.py b/.nanvix/config.py index 36b62c5df19951..fb4d1eaf02ed41 100644 --- a/.nanvix/config.py +++ b/.nanvix/config.py @@ -72,11 +72,20 @@ def toolchain_paths( "libssl": sr / "lib" / "libssl.a", "libcrypto": sr / "lib" / "libcrypto.a", "liblzma": sr / "lib" / "liblzma.a", + "libnvx_crt0": sr / "lib" / "libnvx_crt0.a", } def configure_env(toolchain: str | Path, sysroot: str | Path) -> dict[str, str]: - """Return the environment dict for ./configure.""" + """Return the environment dict for ./configure. + + NOTE: This helper is currently unused; the actual cpython build invokes + ``make -f Makefile.nanvix`` which has its own inline CONFIGURE_ENV. This + function is kept in sync so a future caller does not pick up stale link + flags. See ``Makefile.nanvix`` for the authoritative comment block + explaining the ``--whole-archive`` / ``--export-dynamic`` / + ``--allow-multiple-definition`` rationale. + """ tp = toolchain_paths(toolchain, sysroot) sr = Path(sysroot) return { @@ -96,7 +105,10 @@ def configure_env(toolchain: str | Path, sysroot: str | Path) -> dict[str, str]: f"-Wl,--export-dynamic -Wl,--no-dynamic-linker" ), "LIBS": ( - f"-Wl,--start-group {tp['libposix']} {tp['libc']} {tp['libm']} " + f"-Wl,--whole-archive {tp['libnvx_crt0']} {tp['libposix']} " + f"{tp['libc']} {tp['libm']} " + f"-lstdc++ -lgcc -Wl,--no-whole-archive " + f"-Wl,--start-group " f"-lsqlite3 -lssl -lcrypto -lz -lbz2 -llzma -lffi -Wl,--end-group" ), "LIBSQLITE3_LIBS": f"-L{sr}/lib -lsqlite3", diff --git a/Makefile.nanvix b/Makefile.nanvix index 4a11e1a19b3b02..00692eb7115de5 100644 --- a/Makefile.nanvix +++ b/Makefile.nanvix @@ -91,6 +91,14 @@ ifdef CONFIG_NANVIX LIBSQLITE3 := $(DOCKER_SYSROOT_PATH)/lib/libsqlite3.a LIBSSL := $(DOCKER_SYSROOT_PATH)/lib/libssl.a LIBCRYPTO := $(DOCKER_SYSROOT_PATH)/lib/libcrypto.a + # libnvx_crt0 ships the executable startup symbols (`_do_start`, `_start`, + # `c_trampoline`). It must be present in the Nanvix sysroot ahead of this + # cpython build; the existence check below fails loudly when it is not. + LIBNVX_CRT0 := $(DOCKER_SYSROOT_PATH)/lib/libnvx_crt0.a + # Host-side path used by the parse-time existence guard below. The + # docker build sees the sysroot at $(DOCKER_SYSROOT_PATH); the host + # only sees it at $(NANVIX_HOME). + LIBNVX_CRT0_HOST := $(abspath $(NANVIX_HOME))/lib/libnvx_crt0.a BUILD_PYTHON := $(DOCKER_TOOLCHAIN_PATH)/bin/python3 else TOOLCHAIN_PREFIX := $(NANVIX_TOOLCHAIN) @@ -102,8 +110,19 @@ ifdef CONFIG_NANVIX LIBSQLITE3 := $(abspath $(NANVIX_HOME))/lib/libsqlite3.a LIBSSL := $(abspath $(NANVIX_HOME))/lib/libssl.a LIBCRYPTO := $(abspath $(NANVIX_HOME))/lib/libcrypto.a + LIBNVX_CRT0 := $(abspath $(NANVIX_HOME))/lib/libnvx_crt0.a + LIBNVX_CRT0_HOST := $(LIBNVX_CRT0) BUILD_PYTHON := $(NANVIX_TOOLCHAIN)/bin/python3 endif + + # libstdc++ / libgcc are referenced via `-l` rather than absolute paths so + # the GCC driver resolves them: libgcc lives under a versioned dir + # (`lib/gcc/i686-nanvix//libgcc.a`) and hardcoding a version + # would be fragile across toolchain upgrades. Defined once at top-level + # because the `-l` form is identical between the docker and host build + # paths above. + LIBSTDCXX := -lstdc++ + LIBGCC := -lgcc else ifneq ($(MAKECMDGOALS),clean) ifneq ($(MAKECMDGOALS),distclean) @@ -113,7 +132,74 @@ else EXE=.elf endif +# Existence check for libnvx_crt0.a. +# +# libnvx_crt0.a is the executable startup archive (`_do_start` / `_start` / +# `c_trampoline`) introduced by the Nanvix `nvx-crt0` crate split. cpython +# requires a Nanvix sysroot that ships it. If the user is building against +# an older sysroot snapshot, fail at make-parse time with an actionable +# message rather than producing a python.elf with no entry point. +# +# Skip the guard for `clean` / `distclean` (which should work against any +# sysroot, including ones with no libnvx_crt0.a). Empty MAKECMDGOALS is +# treated as a build because `make` with no target invokes `all`. +ifdef CONFIG_NANVIX +GUARD_GOALS := $(filter-out clean distclean,$(or $(MAKECMDGOALS),build)) +ifneq ($(GUARD_GOALS),) +ifeq ($(wildcard $(LIBNVX_CRT0_HOST)),) +$(error libnvx_crt0.a not found at $(LIBNVX_CRT0_HOST). Update the Nanvix sysroot to one that ships libnvx_crt0.a (the nvx-crt0 crate must be present and built into the sysroot lib/ directory).) +endif +endif +endif + # Configure environment variables +# +# Linker flag rationale: +# +# `-Wl,--export-dynamic`: put every globally-defined symbol from python.elf +# into its `.dynsym`. Extension `.so`s (numpy's `_multiarray_umath.so`, ssl, +# etc.) leave C/C++ runtime symbols UND and resolve them against python.elf +# at dlopen() time. Without this, those `.so`s fail to load. +# +# `-Wl,--allow-multiple-definition`: tolerates duplicate symbol definitions +# that arise from forcing every libposix / libc / libm / libstdc++ / libgcc +# object into python.elf via `--whole-archive` below. The biggest set is +# newlib's long-double math helpers (`frexpl`, `llrintl`, `lrintl`, `rintl` +# are defined in three different newlib directories simultaneously — a +# newlib build-system bug). Other known overlaps include libposix vs. libc +# (`_start`, `copysign[f]`, `getenv`, `setenv`, `unsetenv`, `environ`, +# `isatty`), libc vs. libm (`frexp`, `ldexp`, `modf`, `isnan`, `isinf`, +# `scalbn`, …), libm vs. libstdc++ (`hypotf`), libgcc internal duplicates +# (`__x86.get_pc_thunk.*`), and a libc / libgcc `__eprintf`. The set is +# large and toolchain-build-version-dependent; treating the link as +# multiple-definition-tolerant is the only practical workaround until each +# upstream is fixed. Remove this flag once the contributing duplicates are +# resolved upstream. +# +# `LIBS` segment 1 (`--whole-archive ... --no-whole-archive`): force every +# object from libnvx_crt0, libposix, libc, libm, libstdc++, and libgcc into +# python.elf so the runtime symbols extension `.so`s depend on are embedded +# and re-exported via `--export-dynamic`. Without `--whole-archive`, the +# static linker drops unreferenced objects (e.g. `fscanf`, `longjmp`, +# `strtold_l` for numpy; `operator new/delete[]`, `__cxa_*`, `_Unwind_*`, +# `std::type_info` vtables for any C++ extension) and subsequent dlopen() +# of those `.so`s fails with "symbol not found". +# +# `libnvx_crt0` is listed first inside `--whole-archive`. Today, libposix.a +# still ships its own copy of the startup symbols (`_start`, `_do_start`, +# `c_trampoline`) because `nvx` builds them in under the `staticlib` +# feature. With `--allow-multiple-definition` (above) the linker takes the +# first definition, so listing libnvx_crt0 first selects the standalone +# crt0 copy of `_start` and friends. This is an intentional behaviour +# change: future Nanvix versions remove the duplicate from libposix, after +# which libnvx_crt0 is the sole provider. Listing it first today keeps +# python.elf using a consistent `_start` source across both states. +# +# `LIBS` segment 2 (`--start-group ... --end-group`): the external add-on +# libraries (sqlite3, ssl, crypto, z, bz2, lzma, ffi). The group is needed +# only for their inter-archive circular dependencies; they resolve symbols +# from libposix/libc/libm/libstdc++ against the already-embedded objects +# from segment 1. CONFIGURE_ENV = \ CC="$(TOOLCHAIN_PREFIX)/bin/i686-nanvix-gcc" \ CXX="$(TOOLCHAIN_PREFIX)/bin/i686-nanvix-g++" \ @@ -123,7 +209,7 @@ CONFIGURE_ENV = \ CFLAGS="-O3 -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -I$(SYSROOT_PATH)/include" \ CFLAGS_NODIST="-fno-semantic-interposition" \ LDFLAGS="-L$(SYSROOT_PATH)/lib -T$(SYSROOT_PATH)/lib/user.ld -Wl,--allow-multiple-definition -no-pie -Wl,--export-dynamic -Wl,--no-dynamic-linker" \ - LIBS="-Wl,--start-group $(LIBPOSIX) $(LIBC) $(LIBM) -lsqlite3 -lssl -lcrypto -lz -lbz2 -llzma -lffi -Wl,--end-group" \ + LIBS="-Wl,--whole-archive $(LIBNVX_CRT0) $(LIBPOSIX) $(LIBC) $(LIBM) $(LIBSTDCXX) $(LIBGCC) -Wl,--no-whole-archive -Wl,--start-group -lsqlite3 -lssl -lcrypto -lz -lbz2 -llzma -lffi -Wl,--end-group" \ LIBSQLITE3_LIBS="-L$(SYSROOT_PATH)/lib -lsqlite3" \ LIBSQLITE3_CFLAGS="-I$(SYSROOT_PATH)/include" \ ZLIB_LIBS="-L$(SYSROOT_PATH)/lib -lz" \