diff --git a/.nanvix/build.py b/.nanvix/build.py
index b179d5412bc365b..8eb60c3df1ba471 100644
--- a/.nanvix/build.py
+++ b/.nanvix/build.py
@@ -22,7 +22,7 @@
config = load_sibling("config", __file__)
docker_mod = load_sibling("docker", __file__)
-lxml_mod = load_sibling("lxml", __file__)
+setup_local_mod = load_sibling("setup_local", __file__)
def make_args(
@@ -115,7 +115,7 @@ def build(
return
effective_sysroot = config.DOCKER_SYSROOT_PATH if docker else sysroot
effective_toolchain = config.DOCKER_TOOLCHAIN_PATH if docker else toolchain
- lxml_mod.generate_setup_local(repo_root, Path(effective_sysroot))
+ setup_local_mod.generate_setup_local(repo_root, Path(effective_sysroot))
args = make_args(
effective_sysroot,
effective_toolchain,
diff --git a/.nanvix/config.py b/.nanvix/config.py
index fb4d1eaf02ed41e..1c6e81439d1fc1b 100644
--- a/.nanvix/config.py
+++ b/.nanvix/config.py
@@ -341,8 +341,6 @@ def configure_opts(
# #323 wave 8 — regex and plistlib
"test_re",
"test_plistlib",
- # #600 — lxml built-in smoke test
- "test_nanvix_lxml",
# #526 — _lzma stdlib enablement
"test_lzma",
# #327 — network and protocol tests (IPv4 only; IPv6 disabled)
@@ -464,9 +462,9 @@ def configure_opts(
"lib/pkgconfig",
]
-# site-packages is no longer trimmed because lxml runtime files may be
-# installed there by downstream packaging. When the directory is empty
-# it remains harmlessly on disk (ramfs.trim_sysroot only removes empty
+# site-packages is not trimmed because downstream third-party packages
+# may install runtime files there. When the directory is empty it
+# remains harmlessly on disk (ramfs.trim_sysroot only removes empty
# bin/). To force-trim site-packages for minimal images, add the path
# back into SYSROOT_TRIM_DIRS above.
diff --git a/.nanvix/docker.py b/.nanvix/docker.py
index abee84074b7cbdf..ca367e0378a95c3 100644
--- a/.nanvix/docker.py
+++ b/.nanvix/docker.py
@@ -286,7 +286,7 @@ def _generate_setup_local_cmd() -> str:
"""Shell command to generate Modules/Setup.local inside the container.
Rendered from .nanvix/setup_local.py (single source of truth shared
- with the host build path .nanvix/lxml.py::generate_setup_local).
+ with the host build path .nanvix/setup_local.py::generate_setup_local).
The rendered file body is emitted via a single ``printf '%s\\n' ...``
invocation with each line single-quoted for the container shell.
"""
diff --git a/.nanvix/lxml.py b/.nanvix/lxml.py
deleted file mode 100644
index 4148935049611f1..000000000000000
--- a/.nanvix/lxml.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""lxml build helpers and runtime staging for Nanvix CPython."""
-
-from __future__ import annotations
-
-import shutil
-from pathlib import Path
-
-import sys as _sys
-
-_sys.path.insert(0, str(Path(__file__).resolve().parent))
-from _loader import load_sibling
-
-config = load_sibling("config", __file__)
-setup_local_mod = load_sibling("setup_local", __file__)
-
-
-def generate_setup_local(repo_root: Path, sysroot: Path) -> None:
- """Generate Modules/Setup.local from .nanvix/setup_local.py.
-
- Single source of truth shared with the Docker build path
- (.nanvix/docker.py::_generate_setup_local_cmd). The {sysroot}
- placeholder in entry tokens is substituted with the host build's
- sysroot path here.
- """
- setup_local = repo_root / "Modules" / "Setup.local"
- content = setup_local_mod.render_setup_local(
- sysroot=str(sysroot),
- header_comment="Auto-generated by .nanvix/lxml.py -- do not edit manually.",
- )
- setup_local.write_text(content, encoding="utf-8")
- print(f"[lxml] Generated {setup_local}")
-
-
-def clear_setup_local(repo_root: Path) -> None:
- """Remove the generated Modules/Setup.local."""
- setup_local = repo_root / "Modules" / "Setup.local"
- if setup_local.exists():
- setup_local.unlink()
- print(f"[lxml] Removed {setup_local}")
-
-
-def stage_lxml_runtime(repo_root: Path, sysroot: Path) -> None:
- """Copy lxml Python files from buildroot into the test/package sysroot.
-
- Looks for lxml in ``.nanvix/buildroot/python-packages/lxml/``.
- Skips gracefully when the python-packages directory is not available.
- """
- lxml_src = repo_root / ".nanvix" / "buildroot" / "python-packages" / "lxml"
- if not lxml_src.is_dir():
- print(
- f"[lxml] Staged lxml package not found at {lxml_src}; "
- "skipping runtime staging."
- )
- return
-
- py_lib = sysroot / "lib" / config.PYTHON_LIB_DIR
- if not py_lib.is_dir():
- raise RuntimeError(f"Python runtime library directory is missing: {py_lib}")
-
- dst = py_lib / "lxml"
- if dst.exists():
- shutil.rmtree(dst)
- shutil.copytree(lxml_src, dst)
-
- # Ensure the etree.py shim explicitly exports names that are not in
- # lxml.etree.__all__ but are expected by downstream packages (e.g.
- # openpyxl imports xmlfile). The star-import only picks up names
- # listed in __all__; xmlfile/htmlfile are cdef classes omitted from
- # that list.
- _write_etree_shim(dst / "etree.py")
-
- print(f"[lxml] Staged {lxml_src} -> {dst}")
-
-
-# The content of the etree.py shim that bridges the built-in _lxml_etree
-# C extension to the expected lxml.etree import path.
-_ETREE_SHIM = """\
-from _lxml_etree import *
-from _lxml_etree import _Element, _ElementTree, _Comment, _ProcessingInstruction, ElementBase, QName, _Attrib
-from _lxml_etree import xmlfile, htmlfile
-"""
-
-
-def _write_etree_shim(path: Path) -> None:
- """Write (or overwrite) the lxml/etree.py shim with correct exports."""
- path.write_text(_ETREE_SHIM, encoding="utf-8")
diff --git a/.nanvix/nanvix.toml b/.nanvix/nanvix.toml
index 91ddf71b9bc1b73..7d2637ba7275252 100644
--- a/.nanvix/nanvix.toml
+++ b/.nanvix/nanvix.toml
@@ -15,7 +15,4 @@ sqlite = "3.49.0"
openssl = "3.5.0"
bzip2 = "1.0.8"
libffi = "3.4.6"
-libxml2 = "2.12.9"
-libxslt = "1.1.42"
-lxml = "5.3.0"
xz = "5.2.5"
diff --git a/.nanvix/package.py b/.nanvix/package.py
index a6e23878491b414..c9e9ecec988337d 100644
--- a/.nanvix/package.py
+++ b/.nanvix/package.py
@@ -21,7 +21,6 @@
config = load_sibling("config", __file__)
build_mod = load_sibling("build", __file__)
-lxml_mod = load_sibling("lxml", __file__)
ramfs_mod = load_sibling("ramfs", __file__)
@@ -102,9 +101,6 @@ def package(
if not sysroot_installed.is_dir():
raise FileNotFoundError(f"Install did not produce {sysroot_installed}")
- # Stage lxml Python package into the installed sysroot.
- lxml_mod.stage_lxml_runtime(repo_root, sysroot_installed)
-
# --- Buildroot: build dependencies ---
buildroot_pkg = release_staging / "buildroot-pkg"
buildroot_pkg.mkdir(parents=True)
diff --git a/.nanvix/setup_local.py b/.nanvix/setup_local.py
index 14189598fae6fe8..4435dee21405da5 100644
--- a/.nanvix/setup_local.py
+++ b/.nanvix/setup_local.py
@@ -3,9 +3,9 @@
"""Single source of truth for Modules/Setup.local on Nanvix builds.
-The host-build path (.nanvix/lxml.py::generate_setup_local) and the
-Docker-build path (.nanvix/docker.py::_generate_setup_local_cmd) both
-consume SETUP_LOCAL_ENTRIES and render the same file body via
+The host-build path (generate_setup_local, below) and the Docker-build
+path (.nanvix/docker.py::_generate_setup_local_cmd) both consume
+SETUP_LOCAL_ENTRIES and render the same file body via
render_setup_local().
Module ordering matters: makesetup applies "first rule wins" semantics
@@ -19,6 +19,7 @@
from __future__ import annotations
from enum import Enum
+from pathlib import Path
from typing import Iterable, NamedTuple, Sequence
@@ -91,31 +92,6 @@ class SetupEntry(NamedTuple):
"take precedence."
),
),
- SetupEntry(
- name="_lxml_etree",
- linkage=Linkage.SHARED,
- tokens=(
- "lxml_etree_builtin.c",
- "-L{sysroot}/lib",
- "-llxml_etree",
- "-lxslt",
- "-lexslt",
- "-lxml2",
- "-lz",
- ),
- section_header=("lxml C extension modules (linked via pre-built archives)."),
- ),
- SetupEntry(
- name="_lxml_elementpath",
- linkage=Linkage.SHARED,
- tokens=(
- "lxml_elementpath_builtin.c",
- "-L{sysroot}/lib",
- "-llxml_elementpath",
- "-lxml2",
- "-lz",
- ),
- ),
# ---------------- Data primitives (no external deps) ----------------
*(
SetupEntry(
@@ -329,9 +305,9 @@ def render_setup_local(
lines.append(entry.linkage.value)
current_linkage = entry.linkage
elif entry.section_header:
- # Mid-section group separator (e.g. moving from lxml to the
- # data-primitive modules without changing linkage). Emit as
- # a blank-line-separated comment block before the entry.
+ # Mid-section group separator (e.g. moving from one shared
+ # group to the next without changing linkage). Emit as a
+ # blank-line-separated comment block before the entry.
lines.append("")
lines.extend(_wrap_comment(entry.section_header))
@@ -345,3 +321,20 @@ def render_setup_local(
lines.append("") # trailing newline
return "\n".join(lines)
+
+
+def generate_setup_local(repo_root: Path, sysroot: Path) -> None:
+ """Generate Modules/Setup.local from SETUP_LOCAL_ENTRIES.
+
+ Single source of truth shared with the Docker build path
+ (.nanvix/docker.py::_generate_setup_local_cmd). The {sysroot}
+ placeholder in entry tokens is substituted with the host build's
+ sysroot path here.
+ """
+ setup_local = repo_root / "Modules" / "Setup.local"
+ content = render_setup_local(
+ sysroot=str(sysroot),
+ header_comment="Auto-generated by .nanvix/setup_local.py -- do not edit manually.",
+ )
+ setup_local.write_text(content, encoding="utf-8")
+ print(f"[setup_local] Generated {setup_local}")
diff --git a/.nanvix/test.py b/.nanvix/test.py
index 0c99c793d9061a4..a34100fff71464b 100644
--- a/.nanvix/test.py
+++ b/.nanvix/test.py
@@ -31,7 +31,6 @@
config = load_sibling("config", __file__)
build_mod = load_sibling("build", __file__)
-lxml_mod = load_sibling("lxml", __file__)
ramfs_mod = load_sibling("ramfs", __file__)
@@ -547,11 +546,7 @@ def stage(
)
# Copy test script — a simple smoke test that validates the interpreter.
- # The lxml import test is only included for standalone mode because
- # xmlInitParser() hangs in multi-process/single-process modes where
- # filesystem I/O goes through nanvixd's virtualized host-FS layer.
hello_script = sysroot_dir / "test_hello.py"
- standalone = process_mode == "standalone"
# Phase 0 of the .a -> .so migration: `array` is now a shared
# extension at lib/python3.12/lib-dynload/array.cpython-312.so
# (built from `*shared* array arraymodule.c` in Setup.local).
@@ -567,26 +562,12 @@ def stage(
"print(f'CPYTHON_TEST_ARRAY_SO: array loaded via dlopen from "
"{array.__file__}')\n"
)
- lxml_snippet = (
- "try:\n"
- " import lxml.etree\n"
- " doc = lxml.etree.fromstring(b'lxml OK')\n"
- " assert doc.tag == 'root'\n"
- " assert doc[0].text == 'lxml OK'\n"
- " print('CPYTHON_TEST_LXML: lxml.etree import and parse OK')\n"
- "except ImportError as e:\n"
- " print(f'CPYTHON_TEST_LXML_SKIP: {e}')\n"
- "except Exception as e:\n"
- " print(f'CPYTHON_TEST_LXML_FAIL: {e}')\n"
- " sys.exit(1)\n"
- )
hello_script.write_text(
"import sys\n"
"print('CPYTHON_TEST_HELLO: Hello from Python', sys.version_info[:2])\n"
"print('CPYTHON_TEST_PLATFORM:', sys.platform)\n"
+ array_snippet
- + _render_so_sanity_snippets()
- + (lxml_snippet if standalone else ""),
+ + _render_so_sanity_snippets(),
)
# Copy the HTTP server smoke-test script from the repo root into the
@@ -823,8 +804,6 @@ def run_hello(
environment variable syntax. Multi-process and single-process modes
use direct host-filesystem access (no ramfs).
"""
- standalone = process_mode == "standalone"
-
print(f"Test: Hello world ({process_mode})...")
returncode, output, elapsed_ms = _run_nanvixd_script(
@@ -846,26 +825,18 @@ def run_hello(
# Validate output.
found_hello = False
- found_lxml = False
for line in output.splitlines():
if line.startswith("CPYTHON_TEST_"):
tag = line.split(":")[0].replace("CPYTHON_TEST_", "")
print(f" {tag}: {line.strip()}")
if tag == "HELLO":
found_hello = True
- elif tag in ("LXML", "LXML_SKIP"):
- found_lxml = True
if not found_hello:
print(" FAIL: Hello test did not produce expected output")
print(output)
raise RuntimeError("Hello test did not produce expected output")
- if standalone and not found_lxml:
- # lxml staging is best-effort — if the runtime package was not
- # available (e.g. release asset missing), the test is non-fatal.
- print(" WARNING: lxml import/parse test did not produce expected output")
-
print(" PASS")
@@ -1194,7 +1165,6 @@ def run_all(
run_fn=run_fn,
docker=docker,
)
- lxml_mod.stage_lxml_runtime(repo_root, staging / "sysroot")
# Ramfs — only needed for standalone mode. Multi-process and
# single-process use host-filesystem access (no ramfs).
diff --git a/.nanvix/z.py b/.nanvix/z.py
index 341167dbd2cfefb..6c3f00e1ef2f77a 100644
--- a/.nanvix/z.py
+++ b/.nanvix/z.py
@@ -29,7 +29,6 @@
# Local modules (loaded via importlib since .nanvix/ is not a valid package name)
# ---------------------------------------------------------------------------
import sys as _sys
-import tempfile
from pathlib import Path
from nanvix_zutil import (
@@ -84,9 +83,6 @@
"zlib": ["libz.a"],
"sqlite": ["libsqlite3.a"],
"openssl": ["libssl.a", "libcrypto.a"],
- "libxml2": ["libxml2.a"],
- "libxslt": ["libxslt.a", "libexslt.a"],
- "lxml": ["liblxml_etree.a", "liblxml_elementpath.a"],
"xz": ["liblzma.a"],
}
@@ -342,14 +338,7 @@ def _install_missing_deps(self) -> None:
if not expected:
continue
libs_present = all((lib_dir / lib).exists() for lib in expected)
- # For lxml, also require the python-packages payload.
- if dep.name == "lxml":
- pkg_present = (
- buildroot / "python-packages" / "lxml" / "__init__.py"
- ).exists()
- if libs_present and pkg_present:
- continue
- elif libs_present:
+ if libs_present:
continue
resolved = suffix_dep(dep, nanvix_version) if nanvix_version else dep
self._download_dep_fallback(resolved, buildroot)
@@ -369,7 +358,6 @@ def _download_dep_fallback(
when the exact tag is missing).
- Multiple deployment-mode candidates (standalone, single-process,
multi-process).
- - Extraction of ``python-packages/`` payload (e.g. lxml).
"""
dep_name = dep.name
repo = dep.repo
@@ -434,69 +422,6 @@ def _download_dep_fallback(
log.warning(f"No compatible fallback asset for {dep_name}")
return
- # --- CPython-specific: extract python-packages/ (e.g. lxml) ---
- cache_dir = buildroot.parent / "cache"
- asset_prefix = f"{dep_name}-{platform}-"
- for cached in sorted(cache_dir.iterdir()) if cache_dir.is_dir() else []:
- if not cached.name.startswith(asset_prefix):
- continue
- self._extract_python_packages(cached, buildroot)
- break
-
- def _extract_python_packages(self, asset_path: Path, buildroot: Path) -> None:
- """Extract ``python-packages/`` from an archive into *buildroot*."""
- import tarfile
- import zipfile
-
- with tempfile.TemporaryDirectory() as tmpdir:
- extract_dir = Path(tmpdir) / "extracted"
- extract_dir.mkdir()
-
- if zipfile.is_zipfile(asset_path):
- with zipfile.ZipFile(asset_path) as zf:
- for member in zf.namelist():
- if "python-packages" not in member:
- continue
- if os.path.isabs(member) or ".." in member.split("/"):
- continue
- dest = (extract_dir / member).resolve()
- if not dest.is_relative_to(extract_dir.resolve()):
- continue
- zf.extract(member, extract_dir)
- else:
- with tarfile.open(str(asset_path), "r:*") as tf:
- pkg_members = [
- m
- for m in tf.getmembers()
- if "python-packages" in m.name
- and not os.path.isabs(m.name)
- and ".." not in m.name.split("/")
- ]
- if not pkg_members:
- return
- try:
- tf.extractall(
- str(extract_dir), members=pkg_members, filter="data"
- )
- except TypeError:
- tf.extractall(str(extract_dir), members=pkg_members)
-
- for pkg_src in extract_dir.rglob("python-packages"):
- if not pkg_src.is_dir():
- continue
- pkg_dst = buildroot / "python-packages"
- pkg_dst.mkdir(parents=True, exist_ok=True)
- for item in pkg_src.iterdir():
- target = pkg_dst / item.name
- if item.is_dir():
- if target.exists():
- shutil.rmtree(target)
- shutil.copytree(item, target)
- else:
- shutil.copy2(item, target)
- log.info(f"Installed python packages from {asset_path.name}")
- break
-
if __name__ == "__main__":
CPythonBuild.main()
diff --git a/Lib/test/test_nanvix_lxml.py b/Lib/test/test_nanvix_lxml.py
deleted file mode 100644
index 28c62b312b953c9..000000000000000
--- a/Lib/test/test_nanvix_lxml.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Smoke tests for lxml built-in on NanVix."""
-
-import sys
-import unittest
-from test.support import is_nanvix
-from test.support.import_helper import import_module
-
-if not is_nanvix:
- raise unittest.SkipTest("lxml built-in is Nanvix-specific")
-
-etree = import_module("lxml.etree")
-
-
-class NanvixLxmlTests(unittest.TestCase):
-
- def test_import_lxml_etree(self):
- self.assertTrue(hasattr(etree, "fromstring"))
- self.assertTrue(hasattr(etree, "_Element"))
-
- def test_parse_xml(self):
- root = etree.fromstring(b'text')
- self.assertEqual(root.tag, "root")
- child = root.find("child")
- self.assertIsNotNone(child)
- self.assertEqual(child.text, "text")
- self.assertEqual(child.get("key"), "val")
-
- def test_element_creation(self):
- root = etree.Element("doc")
- etree.SubElement(root, "item").text = "hello"
- xml = etree.tostring(root, encoding="unicode")
- self.assertIn("- hello
", xml)
-
- def test_elementpath(self):
- root = etree.fromstring(b"12")
- results = root.findall("b")
- self.assertEqual(len(results), 2)
- self.assertEqual(results[0].text, "1")
-
- def test_xmlfile_available(self):
- # xmlfile must be importable from lxml.etree for openpyxl compat.
- from lxml.etree import xmlfile
-
- self.assertTrue(callable(xmlfile))
-
- def test_htmlfile_available(self):
- from lxml.etree import htmlfile
-
- self.assertTrue(callable(htmlfile))
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/Modules/lxml_elementpath_builtin.c b/Modules/lxml_elementpath_builtin.c
deleted file mode 100644
index 6a1758e12246590..000000000000000
--- a/Modules/lxml_elementpath_builtin.c
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * lxml_elementpath_builtin.c - Shim for lxml._elementpath built-in module.
- *
- * Registers the Cython _elementpath extension under the flat name
- * "_lxml_elementpath" for Modules/Setup.local.
- */
-
-#include "Python.h"
-
-extern PyObject* PyInit__elementpath(void);
-
-PyMODINIT_FUNC
-PyInit__lxml_elementpath(void)
-{
- return PyInit__elementpath();
-}
diff --git a/Modules/lxml_etree_builtin.c b/Modules/lxml_etree_builtin.c
deleted file mode 100644
index d028054d5b08f23..000000000000000
--- a/Modules/lxml_etree_builtin.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * lxml_etree_builtin.c - Shim to register lxml.etree as a CPython built-in.
- *
- * makesetup does not support dotted module names, so the Cython extension
- * is registered under the flat name "_lxml_etree". A pure-Python shim at
- * lxml/etree.py re-exports everything via `from _lxml_etree import *`.
- *
- * The Cython-generated code in liblxml_etree.a exports PyInit_etree.
- * This wrapper provides PyInit__lxml_etree so the name matches the
- * entry in Modules/Setup.local.
- */
-
-#include "Python.h"
-
-extern PyObject* PyInit_etree(void);
-
-PyMODINIT_FUNC
-PyInit__lxml_etree(void)
-{
- return PyInit_etree();
-}