Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions .nanvix/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,20 @@ def toolchain_paths(
"libssl": sr / "lib" / "libssl.a",
"libcrypto": sr / "lib" / "libcrypto.a",
"liblzma": sr / "lib" / "liblzma.a",
"libnvx_crt0": sr / "lib" / "libnvx_crt0.a",
}


def configure_env(toolchain: str | Path, sysroot: str | Path) -> dict[str, str]:
"""Return the environment dict for ./configure."""
"""Return the environment dict for ./configure.

NOTE: This helper is currently unused; the actual cpython build invokes
``make -f Makefile.nanvix`` which has its own inline CONFIGURE_ENV. This
function is kept in sync so a future caller does not pick up stale link
flags. See ``Makefile.nanvix`` for the authoritative comment block
explaining the ``--whole-archive`` / ``--export-dynamic`` /
``--allow-multiple-definition`` rationale.
"""
tp = toolchain_paths(toolchain, sysroot)
sr = Path(sysroot)
return {
Expand All @@ -96,7 +105,10 @@ def configure_env(toolchain: str | Path, sysroot: str | Path) -> dict[str, str]:
f"-Wl,--export-dynamic -Wl,--no-dynamic-linker"
),
"LIBS": (
f"-Wl,--start-group {tp['libposix']} {tp['libc']} {tp['libm']} "
f"-Wl,--whole-archive {tp['libnvx_crt0']} {tp['libposix']} "
f"{tp['libc']} {tp['libm']} "
f"-lstdc++ -lgcc -Wl,--no-whole-archive "
f"-Wl,--start-group "
f"-lsqlite3 -lssl -lcrypto -lz -lbz2 -llzma -lffi -Wl,--end-group"
),
"LIBSQLITE3_LIBS": f"-L{sr}/lib -lsqlite3",
Expand Down
8 changes: 7 additions & 1 deletion .nanvix/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,19 @@ def _generate_setup_local_cmd() -> str:
return (
f"printf '%s\\n' "
f"'# Auto-generated by .nanvix/docker.py -- do not edit manually.' "
f"'' "
f"'# Statically-linked extension modules for Nanvix builds.' "
f"'#' "
f"'*static*' "
f"'# Nanvix OS interface module (snapshot, host-mount).' "
f"'_nanvix _nanvixmodule.c' "
f"'# lxml C extension modules (statically linked via pre-built archives).' "
f"'_lxml_etree lxml_etree_builtin.c -L{sysroot}/lib -llxml_etree -lxslt -lexslt -lxml2 -lz' "
f"'_lxml_elementpath lxml_elementpath_builtin.c -L{sysroot}/lib -llxml_elementpath -lxml2 -lz' "
f"'' "
f"'# Phase 0 of the .a -> .so migration: array as proof-of-concept shared module.' "
f"'# See nanvix-todo/cpython-static-to-shared-migration.md section 4.' "
f"'*shared*' "
f"'array arraymodule.c' "
f"> {ws}/Modules/Setup.local"
)

Expand Down
48 changes: 48 additions & 0 deletions .nanvix/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,57 @@

FROM ghcr.io/nanvix/toolchain-gcc:sha-34a3641

# Install the host Python plus the build helpers required by extension
# modules that ship a meson or Cython build step (numpy, scipy, pandas,
# ...):
#
# - ninja — meson's default backend; missing it makes every meson-based
# extension build fail immediately.
# - Cython — required by numpy 1.26.4's `numpy/_build_utils/tempita.py`
# .pyx.in code generation. Pinned `<3` for numpy 1.26.x
# compatibility; lift the pin when bumping numpy.
#
# We deliberately purge `/usr/include/python3.12` after the install. The
# `python3-pip` / `ninja-build` apt packages transitively pull in
# `libpython3.12-dev`, whose headers under `/usr/include/python3.12` would
# otherwise be picked up by meson's regen step ahead of the Nanvix cross
# sysroot headers and silently corrupt the cross-build.
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
python3 \
python3-dev \
python3-pip \
ninja-build \
&& pip3 install --break-system-packages --no-cache-dir 'Cython<3' \
&& rm -rf /usr/include/python3.12 \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python3 /opt/nanvix/bin/python3

# Install the cc-wrapper. This wrapper sits in front of the real
# `i686-nanvix-gcc` / `i686-nanvix-g++` driver binaries and detects
# whether the invocation is producing an executable or a shared library.
# For shared-library links (-shared) it strips exe-only LDFLAGS that
# would otherwise be inherited from cpython's single `LDFLAGS` env var
# and cause `.so` builds to fail when the linker treats the output as
# an executable. See cc-wrapper.sh's header for the full rationale.
#
# Install pattern: rename the real driver to `<name>.real`, install the
# wrapper script at `<name>`, and symlink for both gcc and g++.
COPY cc-wrapper.sh /opt/nanvix/bin/i686-nanvix-cc-wrapper.sh
RUN sed -i 's/\r$//' /opt/nanvix/bin/i686-nanvix-cc-wrapper.sh \
&& chmod +x /opt/nanvix/bin/i686-nanvix-cc-wrapper.sh \
&& for tool in i686-nanvix-gcc i686-nanvix-g++; do \
if [ -L "/opt/nanvix/bin/$tool" ]; then \
# Pre-existing wrapper symlink: require the matching .real to
# already exist (set up by a prior wrapper install) before we
# replace the symlink, so we never strand the toolchain.
if [ ! -f "/opt/nanvix/bin/$tool.real" ]; then \
echo "cc-wrapper install: $tool is a symlink but $tool.real is missing; aborting" >&2; \
exit 1; \
fi; \
rm /opt/nanvix/bin/$tool; \
elif [ ! -f "/opt/nanvix/bin/$tool.real" ]; then \
mv /opt/nanvix/bin/$tool /opt/nanvix/bin/$tool.real; \
fi; \
ln -sf i686-nanvix-cc-wrapper.sh /opt/nanvix/bin/$tool; \
done
108 changes: 108 additions & 0 deletions .nanvix/docker/cc-wrapper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/bin/bash
# i686-nanvix-gcc / i686-nanvix-g++ cc-wrapper.
#
# Detects whether the invocation is producing an executable or a shared
# library, and routes to the real compiler driver with the correct linker
# flags for each case.
#
# Why this exists:
#
# The Nanvix build of CPython sets a single `LDFLAGS` env var on
# `./configure` that contains executable-specific flags (the linker
# script `user.ld`, `-no-pie`, `-Wl,--no-dynamic-linker`,
# `-Wl,--export-dynamic`). cpython's build system propagates that same
# `LDFLAGS` to BOTH the main `python.elf` link and to every extension
# module `.so` link. For `.so` outputs those exe-only flags are wrong:
#
# - `-T user.ld` tells `ld` to use an executable layout.
# When applied to a `-shared` link, `ld`
# treats the output as an exe and rejects
# any undefined symbol -- even those that
# should resolve at dlopen() time against
# the main exe's `.dynsym` (the C API
# symbols every Python extension references).
# - `-no-pie` PIE-disable. Shared libraries must be PIC.
# - `-Wl,--no-dynamic-linker` meaningless for `.so`.
# - `-Wl,--export-dynamic` exe-only.
#
# This wrapper makes the build system's single `LDFLAGS` value
# "do the right thing" for both modes, without forcing each Makefile
# that consumes the toolchain to know the difference.
#
# Behaviour:
#
# - If the invocation is compile-only (any of `-c` / `-S` / `-E`):
# forward unchanged to the real compiler.
# - If the invocation does NOT contain `-shared`:
# executable link (or pure compile in the rare case
# of no `-c`/`-S`/`-E` and no `-shared`). Forward unchanged.
# - If the invocation contains `-shared`:
# shared-library link. Strip the exe-only flags listed above
# and ensure `-fPIC` is present. Forward.
#
# The wrapper is invoked by symlink: i686-nanvix-gcc -> cc-wrapper.sh
# and i686-nanvix-g++ -> cc-wrapper.sh. The wrapper picks the right
# real binary based on its own argv[0] (i.e. how it was invoked).
#
# Each real binary is preserved as `.real` alongside the wrapper:
# i686-nanvix-gcc.real, i686-nanvix-g++.real.
#
# See nanvix-todo/c-extension-compiler-wrapper.md for the design note.

set -e

# Find the real binary by appending `.real` to argv[0]'s basename.
self_dir="$(dirname "$0")"
self_name="$(basename "$0")"
real_bin="${self_dir}/${self_name}.real"

if [ ! -x "$real_bin" ]; then
echo "cc-wrapper: real binary not found at $real_bin" >&2
exit 1
fi

# Detect mode: compile-only, exe link, or shared link.
shared=0
compile_only=0
for arg in "$@"; do
case "$arg" in
-shared) shared=1 ;;
-c|-S|-E) compile_only=1 ;;
esac
done

if [ "$compile_only" = "1" ] || [ "$shared" = "0" ]; then
# Compile-only or exe link: forward unchanged.
exec "$real_bin" "$@"
fi

# Shared-library link: strip exe-only flags and ensure -fPIC.
filtered=()
skip_next=0
have_fpic=0
for arg in "$@"; do
if [ "$skip_next" = "1" ]; then
skip_next=0
continue
fi
case "$arg" in
-T) skip_next=1 ;;
-T*) ;;
-no-pie) ;;
-Wl,--no-dynamic-linker) ;;
-Wl,--export-dynamic) ;;
-Wl,-T,*) ;;
*.ld) ;;
-fPIC)
have_fpic=1
filtered+=("$arg")
;;
*) filtered+=("$arg") ;;
esac
done

if [ "$have_fpic" = "0" ]; then
filtered=(-fPIC "${filtered[@]}")
fi

exec "$real_bin" "${filtered[@]}"
10 changes: 9 additions & 1 deletion .nanvix/lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,21 @@

_SETUP_LOCAL_TEMPLATE = """\
# Auto-generated by .nanvix/lxml.py -- do not edit manually.

# Statically-linked extension modules for Nanvix builds.
#
*static*
# Nanvix OS interface module (snapshot, host-mount).
_nanvix _nanvixmodule.c
# lxml C extension modules (statically linked via pre-built archives).
_lxml_etree lxml_etree_builtin.c -L{sysroot}/lib -llxml_etree -lxslt -lexslt -lxml2 -lz
_lxml_elementpath lxml_elementpath_builtin.c -L{sysroot}/lib -llxml_elementpath -lxml2 -lz

# Phase 0 of the .a -> .so migration: array as proof-of-concept shared module.
# See nanvix-todo/cpython-static-to-shared-migration.md section 4.
# Listed BEFORE Setup.stdlib's static declaration so makesetup's
# "first rule wins" semantics make this shared variant take precedence.
*shared*
array arraymodule.c
"""


Expand Down
16 changes: 16 additions & 0 deletions .nanvix/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,21 @@ def stage(
# filesystem I/O goes through nanvixd's virtualized host-FS layer.
hello_script = sysroot_dir / "test_hello.py"
standalone = process_mode == "standalone"
# Phase 0 of the .a -> .so migration: `array` is now a shared
# extension at lib/python3.12/lib-dynload/array.cpython-312.so
# (built from `*shared* array arraymodule.c` in Setup.local).
# Asserting it is NOT in `sys.builtin_module_names` proves the
# dlopen path is exercised end-to-end; if the .so failed to load,
Comment on lines +451 to +455
# the import would raise.
array_snippet = (
"import array\n"
"assert 'array' not in sys.builtin_module_names, "
"'array still built-in!'\n"
"_a = array.array('i', [1, 2, 3])\n"
"assert _a.tolist() == [1, 2, 3], f'array contents wrong: {_a.tolist()}'\n"
"print(f'CPYTHON_TEST_ARRAY_SO: array loaded via dlopen from "
"{array.__file__}')\n"
)
lxml_snippet = (
"try:\n"
" import lxml.etree\n"
Expand All @@ -465,6 +480,7 @@ def stage(
"import sys\n"
"print('CPYTHON_TEST_HELLO: Hello from Python', sys.version_info[:2])\n"
"print('CPYTHON_TEST_PLATFORM:', sys.platform)\n"
+ array_snippet
+ (lxml_snippet if standalone else ""),
)

Expand Down
78 changes: 77 additions & 1 deletion Makefile.nanvix
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ ifdef CONFIG_NANVIX
LIBSQLITE3 := $(DOCKER_SYSROOT_PATH)/lib/libsqlite3.a
LIBSSL := $(DOCKER_SYSROOT_PATH)/lib/libssl.a
LIBCRYPTO := $(DOCKER_SYSROOT_PATH)/lib/libcrypto.a
# libnvx_crt0 ships the executable startup symbols (`_do_start`, `_start`,
# `c_trampoline`). It must be present in the Nanvix sysroot ahead of this
# cpython build; the existence check below fails loudly when it is not.
LIBNVX_CRT0 := $(DOCKER_SYSROOT_PATH)/lib/libnvx_crt0.a
BUILD_PYTHON := $(DOCKER_TOOLCHAIN_PATH)/bin/python3
else
TOOLCHAIN_PREFIX := $(NANVIX_TOOLCHAIN)
Expand All @@ -102,8 +106,18 @@ ifdef CONFIG_NANVIX
LIBSQLITE3 := $(abspath $(NANVIX_HOME))/lib/libsqlite3.a
LIBSSL := $(abspath $(NANVIX_HOME))/lib/libssl.a
LIBCRYPTO := $(abspath $(NANVIX_HOME))/lib/libcrypto.a
LIBNVX_CRT0 := $(abspath $(NANVIX_HOME))/lib/libnvx_crt0.a
BUILD_PYTHON := $(NANVIX_TOOLCHAIN)/bin/python3
endif

# libstdc++ / libgcc are referenced via `-l` rather than absolute paths so
# the GCC driver resolves them: libgcc lives under a versioned dir
# (`lib/gcc/i686-nanvix/<gcc-version>/libgcc.a`) and hardcoding a version
# would be fragile across toolchain upgrades. Defined once at top-level
# because the `-l` form is identical between the docker and host build
# paths above.
LIBSTDCXX := -lstdc++
LIBGCC := -lgcc
else
ifneq ($(MAKECMDGOALS),clean)
ifneq ($(MAKECMDGOALS),distclean)
Expand All @@ -113,7 +127,69 @@ else
EXE=.elf
endif

# Existence check for libnvx_crt0.a.
#
# libnvx_crt0.a is the executable startup archive (`_do_start` / `_start` /
# `c_trampoline`) introduced by the Nanvix `nvx-crt0` crate split. cpython
# requires a Nanvix sysroot that ships it. If the user is building against
# an older sysroot snapshot, fail at make-parse time with an actionable
# message rather than producing a python.elf with no entry point.
ifdef CONFIG_NANVIX
ifneq ($(filter clean distclean,$(MAKECMDGOALS)),$(MAKECMDGOALS))
ifeq ($(wildcard $(LIBNVX_CRT0)),)
$(error libnvx_crt0.a not found at $(LIBNVX_CRT0). Update the Nanvix sysroot to one that ships libnvx_crt0.a (the nvx-crt0 crate must be present and built into the sysroot lib/ directory).)
endif
endif
endif
Comment on lines +137 to +143

# Configure environment variables
#
# Linker flag rationale:
#
# `-Wl,--export-dynamic`: put every globally-defined symbol from python.elf
# into its `.dynsym`. Extension `.so`s (numpy's `_multiarray_umath.so`, ssl,
# etc.) leave C/C++ runtime symbols UND and resolve them against python.elf
# at dlopen() time. Without this, those `.so`s fail to load.
#
# `-Wl,--allow-multiple-definition`: tolerates duplicate symbol definitions
# that arise from forcing every libposix / libc / libm / libstdc++ / libgcc
# object into python.elf via `--whole-archive` below. The biggest set is
# newlib's long-double math helpers (`frexpl`, `llrintl`, `lrintl`, `rintl`
# are defined in three different newlib directories simultaneously — a
# newlib build-system bug). Other known overlaps include libposix vs. libc
# (`_start`, `copysign[f]`, `getenv`, `setenv`, `unsetenv`, `environ`,
# `isatty`), libc vs. libm (`frexp`, `ldexp`, `modf`, `isnan`, `isinf`,
# `scalbn`, …), libm vs. libstdc++ (`hypotf`), libgcc internal duplicates
# (`__x86.get_pc_thunk.*`), and a libc / libgcc `__eprintf`. The set is
# large and toolchain-build-version-dependent; treating the link as
# multiple-definition-tolerant is the only practical workaround until each
# upstream is fixed. Remove this flag once the contributing duplicates are
# resolved upstream.
#
# `LIBS` segment 1 (`--whole-archive ... --no-whole-archive`): force every
# object from libnvx_crt0, libposix, libc, libm, libstdc++, and libgcc into
# python.elf so the runtime symbols extension `.so`s depend on are embedded
# and re-exported via `--export-dynamic`. Without `--whole-archive`, the
# static linker drops unreferenced objects (e.g. `fscanf`, `longjmp`,
# `strtold_l` for numpy; `operator new/delete[]`, `__cxa_*`, `_Unwind_*`,
# `std::type_info` vtables for any C++ extension) and subsequent dlopen()
# of those `.so`s fails with "symbol not found".
#
# `libnvx_crt0` is listed first inside `--whole-archive`. Today, libposix.a
# still ships its own copy of the startup symbols (`_start`, `_do_start`,
# `c_trampoline`) because `nvx` builds them in under the `staticlib`
# feature. With `--allow-multiple-definition` (above) the linker takes the
# first definition, so listing libnvx_crt0 first selects the standalone
# crt0 copy of `_start` and friends. This is an intentional behaviour
# change: future Nanvix versions remove the duplicate from libposix, after
# which libnvx_crt0 is the sole provider. Listing it first today keeps
# python.elf using a consistent `_start` source across both states.
#
# `LIBS` segment 2 (`--start-group ... --end-group`): the external add-on
# libraries (sqlite3, ssl, crypto, z, bz2, lzma, ffi). The group is needed
# only for their inter-archive circular dependencies; they resolve symbols
# from libposix/libc/libm/libstdc++ against the already-embedded objects
# from segment 1.
CONFIGURE_ENV = \
CC="$(TOOLCHAIN_PREFIX)/bin/i686-nanvix-gcc" \
CXX="$(TOOLCHAIN_PREFIX)/bin/i686-nanvix-g++" \
Expand All @@ -123,7 +199,7 @@ CONFIGURE_ENV = \
CFLAGS="-O3 -fomit-frame-pointer -fno-unwind-tables -fno-asynchronous-unwind-tables -I$(SYSROOT_PATH)/include" \
CFLAGS_NODIST="-fno-semantic-interposition" \
LDFLAGS="-L$(SYSROOT_PATH)/lib -T$(SYSROOT_PATH)/lib/user.ld -Wl,--allow-multiple-definition -no-pie -Wl,--export-dynamic -Wl,--no-dynamic-linker" \
LIBS="-Wl,--start-group $(LIBPOSIX) $(LIBC) $(LIBM) -lsqlite3 -lssl -lcrypto -lz -lbz2 -llzma -lffi -Wl,--end-group" \
LIBS="-Wl,--whole-archive $(LIBNVX_CRT0) $(LIBPOSIX) $(LIBC) $(LIBM) $(LIBSTDCXX) $(LIBGCC) -Wl,--no-whole-archive -Wl,--start-group -lsqlite3 -lssl -lcrypto -lz -lbz2 -llzma -lffi -Wl,--end-group" \
LIBSQLITE3_LIBS="-L$(SYSROOT_PATH)/lib -lsqlite3" \
LIBSQLITE3_CFLAGS="-I$(SYSROOT_PATH)/include" \
ZLIB_LIBS="-L$(SYSROOT_PATH)/lib -lz" \
Expand Down
Loading