diff --git a/.nanvix/build.py b/.nanvix/build.py index b179d5412bc365b..8eb60c3df1ba471 100644 --- a/.nanvix/build.py +++ b/.nanvix/build.py @@ -22,7 +22,7 @@ config = load_sibling("config", __file__) docker_mod = load_sibling("docker", __file__) -lxml_mod = load_sibling("lxml", __file__) +setup_local_mod = load_sibling("setup_local", __file__) def make_args( @@ -115,7 +115,7 @@ def build( return effective_sysroot = config.DOCKER_SYSROOT_PATH if docker else sysroot effective_toolchain = config.DOCKER_TOOLCHAIN_PATH if docker else toolchain - lxml_mod.generate_setup_local(repo_root, Path(effective_sysroot)) + setup_local_mod.generate_setup_local(repo_root, Path(effective_sysroot)) args = make_args( effective_sysroot, effective_toolchain, diff --git a/.nanvix/config.py b/.nanvix/config.py index fb4d1eaf02ed41e..1c6e81439d1fc1b 100644 --- a/.nanvix/config.py +++ b/.nanvix/config.py @@ -341,8 +341,6 @@ def configure_opts( # #323 wave 8 — regex and plistlib "test_re", "test_plistlib", - # #600 — lxml built-in smoke test - "test_nanvix_lxml", # #526 — _lzma stdlib enablement "test_lzma", # #327 — network and protocol tests (IPv4 only; IPv6 disabled) @@ -464,9 +462,9 @@ def configure_opts( "lib/pkgconfig", ] -# site-packages is no longer trimmed because lxml runtime files may be -# installed there by downstream packaging. When the directory is empty -# it remains harmlessly on disk (ramfs.trim_sysroot only removes empty +# site-packages is not trimmed because downstream third-party packages +# may install runtime files there. When the directory is empty it +# remains harmlessly on disk (ramfs.trim_sysroot only removes empty # bin/). To force-trim site-packages for minimal images, add the path # back into SYSROOT_TRIM_DIRS above. diff --git a/.nanvix/docker.py b/.nanvix/docker.py index abee84074b7cbdf..ca367e0378a95c3 100644 --- a/.nanvix/docker.py +++ b/.nanvix/docker.py @@ -286,7 +286,7 @@ def _generate_setup_local_cmd() -> str: """Shell command to generate Modules/Setup.local inside the container. Rendered from .nanvix/setup_local.py (single source of truth shared - with the host build path .nanvix/lxml.py::generate_setup_local). + with the host build path .nanvix/setup_local.py::generate_setup_local). The rendered file body is emitted via a single ``printf '%s\\n' ...`` invocation with each line single-quoted for the container shell. """ diff --git a/.nanvix/lxml.py b/.nanvix/lxml.py deleted file mode 100644 index 4148935049611f1..000000000000000 --- a/.nanvix/lxml.py +++ /dev/null @@ -1,86 +0,0 @@ -"""lxml build helpers and runtime staging for Nanvix CPython.""" - -from __future__ import annotations - -import shutil -from pathlib import Path - -import sys as _sys - -_sys.path.insert(0, str(Path(__file__).resolve().parent)) -from _loader import load_sibling - -config = load_sibling("config", __file__) -setup_local_mod = load_sibling("setup_local", __file__) - - -def generate_setup_local(repo_root: Path, sysroot: Path) -> None: - """Generate Modules/Setup.local from .nanvix/setup_local.py. - - Single source of truth shared with the Docker build path - (.nanvix/docker.py::_generate_setup_local_cmd). The {sysroot} - placeholder in entry tokens is substituted with the host build's - sysroot path here. - """ - setup_local = repo_root / "Modules" / "Setup.local" - content = setup_local_mod.render_setup_local( - sysroot=str(sysroot), - header_comment="Auto-generated by .nanvix/lxml.py -- do not edit manually.", - ) - setup_local.write_text(content, encoding="utf-8") - print(f"[lxml] Generated {setup_local}") - - -def clear_setup_local(repo_root: Path) -> None: - """Remove the generated Modules/Setup.local.""" - setup_local = repo_root / "Modules" / "Setup.local" - if setup_local.exists(): - setup_local.unlink() - print(f"[lxml] Removed {setup_local}") - - -def stage_lxml_runtime(repo_root: Path, sysroot: Path) -> None: - """Copy lxml Python files from buildroot into the test/package sysroot. - - Looks for lxml in ``.nanvix/buildroot/python-packages/lxml/``. - Skips gracefully when the python-packages directory is not available. - """ - lxml_src = repo_root / ".nanvix" / "buildroot" / "python-packages" / "lxml" - if not lxml_src.is_dir(): - print( - f"[lxml] Staged lxml package not found at {lxml_src}; " - "skipping runtime staging." - ) - return - - py_lib = sysroot / "lib" / config.PYTHON_LIB_DIR - if not py_lib.is_dir(): - raise RuntimeError(f"Python runtime library directory is missing: {py_lib}") - - dst = py_lib / "lxml" - if dst.exists(): - shutil.rmtree(dst) - shutil.copytree(lxml_src, dst) - - # Ensure the etree.py shim explicitly exports names that are not in - # lxml.etree.__all__ but are expected by downstream packages (e.g. - # openpyxl imports xmlfile). The star-import only picks up names - # listed in __all__; xmlfile/htmlfile are cdef classes omitted from - # that list. - _write_etree_shim(dst / "etree.py") - - print(f"[lxml] Staged {lxml_src} -> {dst}") - - -# The content of the etree.py shim that bridges the built-in _lxml_etree -# C extension to the expected lxml.etree import path. -_ETREE_SHIM = """\ -from _lxml_etree import * -from _lxml_etree import _Element, _ElementTree, _Comment, _ProcessingInstruction, ElementBase, QName, _Attrib -from _lxml_etree import xmlfile, htmlfile -""" - - -def _write_etree_shim(path: Path) -> None: - """Write (or overwrite) the lxml/etree.py shim with correct exports.""" - path.write_text(_ETREE_SHIM, encoding="utf-8") diff --git a/.nanvix/nanvix.toml b/.nanvix/nanvix.toml index 91ddf71b9bc1b73..7d2637ba7275252 100644 --- a/.nanvix/nanvix.toml +++ b/.nanvix/nanvix.toml @@ -15,7 +15,4 @@ sqlite = "3.49.0" openssl = "3.5.0" bzip2 = "1.0.8" libffi = "3.4.6" -libxml2 = "2.12.9" -libxslt = "1.1.42" -lxml = "5.3.0" xz = "5.2.5" diff --git a/.nanvix/package.py b/.nanvix/package.py index a6e23878491b414..c9e9ecec988337d 100644 --- a/.nanvix/package.py +++ b/.nanvix/package.py @@ -21,7 +21,6 @@ config = load_sibling("config", __file__) build_mod = load_sibling("build", __file__) -lxml_mod = load_sibling("lxml", __file__) ramfs_mod = load_sibling("ramfs", __file__) @@ -102,9 +101,6 @@ def package( if not sysroot_installed.is_dir(): raise FileNotFoundError(f"Install did not produce {sysroot_installed}") - # Stage lxml Python package into the installed sysroot. - lxml_mod.stage_lxml_runtime(repo_root, sysroot_installed) - # --- Buildroot: build dependencies --- buildroot_pkg = release_staging / "buildroot-pkg" buildroot_pkg.mkdir(parents=True) diff --git a/.nanvix/setup_local.py b/.nanvix/setup_local.py index 14189598fae6fe8..4435dee21405da5 100644 --- a/.nanvix/setup_local.py +++ b/.nanvix/setup_local.py @@ -3,9 +3,9 @@ """Single source of truth for Modules/Setup.local on Nanvix builds. -The host-build path (.nanvix/lxml.py::generate_setup_local) and the -Docker-build path (.nanvix/docker.py::_generate_setup_local_cmd) both -consume SETUP_LOCAL_ENTRIES and render the same file body via +The host-build path (generate_setup_local, below) and the Docker-build +path (.nanvix/docker.py::_generate_setup_local_cmd) both consume +SETUP_LOCAL_ENTRIES and render the same file body via render_setup_local(). Module ordering matters: makesetup applies "first rule wins" semantics @@ -19,6 +19,7 @@ from __future__ import annotations from enum import Enum +from pathlib import Path from typing import Iterable, NamedTuple, Sequence @@ -91,31 +92,6 @@ class SetupEntry(NamedTuple): "take precedence." ), ), - SetupEntry( - name="_lxml_etree", - linkage=Linkage.SHARED, - tokens=( - "lxml_etree_builtin.c", - "-L{sysroot}/lib", - "-llxml_etree", - "-lxslt", - "-lexslt", - "-lxml2", - "-lz", - ), - section_header=("lxml C extension modules (linked via pre-built archives)."), - ), - SetupEntry( - name="_lxml_elementpath", - linkage=Linkage.SHARED, - tokens=( - "lxml_elementpath_builtin.c", - "-L{sysroot}/lib", - "-llxml_elementpath", - "-lxml2", - "-lz", - ), - ), # ---------------- Data primitives (no external deps) ---------------- *( SetupEntry( @@ -329,9 +305,9 @@ def render_setup_local( lines.append(entry.linkage.value) current_linkage = entry.linkage elif entry.section_header: - # Mid-section group separator (e.g. moving from lxml to the - # data-primitive modules without changing linkage). Emit as - # a blank-line-separated comment block before the entry. + # Mid-section group separator (e.g. moving from one shared + # group to the next without changing linkage). Emit as a + # blank-line-separated comment block before the entry. lines.append("") lines.extend(_wrap_comment(entry.section_header)) @@ -345,3 +321,20 @@ def render_setup_local( lines.append("") # trailing newline return "\n".join(lines) + + +def generate_setup_local(repo_root: Path, sysroot: Path) -> None: + """Generate Modules/Setup.local from SETUP_LOCAL_ENTRIES. + + Single source of truth shared with the Docker build path + (.nanvix/docker.py::_generate_setup_local_cmd). The {sysroot} + placeholder in entry tokens is substituted with the host build's + sysroot path here. + """ + setup_local = repo_root / "Modules" / "Setup.local" + content = render_setup_local( + sysroot=str(sysroot), + header_comment="Auto-generated by .nanvix/setup_local.py -- do not edit manually.", + ) + setup_local.write_text(content, encoding="utf-8") + print(f"[setup_local] Generated {setup_local}") diff --git a/.nanvix/test.py b/.nanvix/test.py index 0c99c793d9061a4..a34100fff71464b 100644 --- a/.nanvix/test.py +++ b/.nanvix/test.py @@ -31,7 +31,6 @@ config = load_sibling("config", __file__) build_mod = load_sibling("build", __file__) -lxml_mod = load_sibling("lxml", __file__) ramfs_mod = load_sibling("ramfs", __file__) @@ -547,11 +546,7 @@ def stage( ) # Copy test script — a simple smoke test that validates the interpreter. - # The lxml import test is only included for standalone mode because - # xmlInitParser() hangs in multi-process/single-process modes where - # filesystem I/O goes through nanvixd's virtualized host-FS layer. hello_script = sysroot_dir / "test_hello.py" - standalone = process_mode == "standalone" # Phase 0 of the .a -> .so migration: `array` is now a shared # extension at lib/python3.12/lib-dynload/array.cpython-312.so # (built from `*shared* array arraymodule.c` in Setup.local). @@ -567,26 +562,12 @@ def stage( "print(f'CPYTHON_TEST_ARRAY_SO: array loaded via dlopen from " "{array.__file__}')\n" ) - lxml_snippet = ( - "try:\n" - " import lxml.etree\n" - " doc = lxml.etree.fromstring(b'lxml OK')\n" - " assert doc.tag == 'root'\n" - " assert doc[0].text == 'lxml OK'\n" - " print('CPYTHON_TEST_LXML: lxml.etree import and parse OK')\n" - "except ImportError as e:\n" - " print(f'CPYTHON_TEST_LXML_SKIP: {e}')\n" - "except Exception as e:\n" - " print(f'CPYTHON_TEST_LXML_FAIL: {e}')\n" - " sys.exit(1)\n" - ) hello_script.write_text( "import sys\n" "print('CPYTHON_TEST_HELLO: Hello from Python', sys.version_info[:2])\n" "print('CPYTHON_TEST_PLATFORM:', sys.platform)\n" + array_snippet - + _render_so_sanity_snippets() - + (lxml_snippet if standalone else ""), + + _render_so_sanity_snippets(), ) # Copy the HTTP server smoke-test script from the repo root into the @@ -823,8 +804,6 @@ def run_hello( environment variable syntax. Multi-process and single-process modes use direct host-filesystem access (no ramfs). """ - standalone = process_mode == "standalone" - print(f"Test: Hello world ({process_mode})...") returncode, output, elapsed_ms = _run_nanvixd_script( @@ -846,26 +825,18 @@ def run_hello( # Validate output. found_hello = False - found_lxml = False for line in output.splitlines(): if line.startswith("CPYTHON_TEST_"): tag = line.split(":")[0].replace("CPYTHON_TEST_", "") print(f" {tag}: {line.strip()}") if tag == "HELLO": found_hello = True - elif tag in ("LXML", "LXML_SKIP"): - found_lxml = True if not found_hello: print(" FAIL: Hello test did not produce expected output") print(output) raise RuntimeError("Hello test did not produce expected output") - if standalone and not found_lxml: - # lxml staging is best-effort — if the runtime package was not - # available (e.g. release asset missing), the test is non-fatal. - print(" WARNING: lxml import/parse test did not produce expected output") - print(" PASS") @@ -1194,7 +1165,6 @@ def run_all( run_fn=run_fn, docker=docker, ) - lxml_mod.stage_lxml_runtime(repo_root, staging / "sysroot") # Ramfs — only needed for standalone mode. Multi-process and # single-process use host-filesystem access (no ramfs). diff --git a/.nanvix/z.py b/.nanvix/z.py index 341167dbd2cfefb..6c3f00e1ef2f77a 100644 --- a/.nanvix/z.py +++ b/.nanvix/z.py @@ -29,7 +29,6 @@ # Local modules (loaded via importlib since .nanvix/ is not a valid package name) # --------------------------------------------------------------------------- import sys as _sys -import tempfile from pathlib import Path from nanvix_zutil import ( @@ -84,9 +83,6 @@ "zlib": ["libz.a"], "sqlite": ["libsqlite3.a"], "openssl": ["libssl.a", "libcrypto.a"], - "libxml2": ["libxml2.a"], - "libxslt": ["libxslt.a", "libexslt.a"], - "lxml": ["liblxml_etree.a", "liblxml_elementpath.a"], "xz": ["liblzma.a"], } @@ -342,14 +338,7 @@ def _install_missing_deps(self) -> None: if not expected: continue libs_present = all((lib_dir / lib).exists() for lib in expected) - # For lxml, also require the python-packages payload. - if dep.name == "lxml": - pkg_present = ( - buildroot / "python-packages" / "lxml" / "__init__.py" - ).exists() - if libs_present and pkg_present: - continue - elif libs_present: + if libs_present: continue resolved = suffix_dep(dep, nanvix_version) if nanvix_version else dep self._download_dep_fallback(resolved, buildroot) @@ -369,7 +358,6 @@ def _download_dep_fallback( when the exact tag is missing). - Multiple deployment-mode candidates (standalone, single-process, multi-process). - - Extraction of ``python-packages/`` payload (e.g. lxml). """ dep_name = dep.name repo = dep.repo @@ -434,69 +422,6 @@ def _download_dep_fallback( log.warning(f"No compatible fallback asset for {dep_name}") return - # --- CPython-specific: extract python-packages/ (e.g. lxml) --- - cache_dir = buildroot.parent / "cache" - asset_prefix = f"{dep_name}-{platform}-" - for cached in sorted(cache_dir.iterdir()) if cache_dir.is_dir() else []: - if not cached.name.startswith(asset_prefix): - continue - self._extract_python_packages(cached, buildroot) - break - - def _extract_python_packages(self, asset_path: Path, buildroot: Path) -> None: - """Extract ``python-packages/`` from an archive into *buildroot*.""" - import tarfile - import zipfile - - with tempfile.TemporaryDirectory() as tmpdir: - extract_dir = Path(tmpdir) / "extracted" - extract_dir.mkdir() - - if zipfile.is_zipfile(asset_path): - with zipfile.ZipFile(asset_path) as zf: - for member in zf.namelist(): - if "python-packages" not in member: - continue - if os.path.isabs(member) or ".." in member.split("/"): - continue - dest = (extract_dir / member).resolve() - if not dest.is_relative_to(extract_dir.resolve()): - continue - zf.extract(member, extract_dir) - else: - with tarfile.open(str(asset_path), "r:*") as tf: - pkg_members = [ - m - for m in tf.getmembers() - if "python-packages" in m.name - and not os.path.isabs(m.name) - and ".." not in m.name.split("/") - ] - if not pkg_members: - return - try: - tf.extractall( - str(extract_dir), members=pkg_members, filter="data" - ) - except TypeError: - tf.extractall(str(extract_dir), members=pkg_members) - - for pkg_src in extract_dir.rglob("python-packages"): - if not pkg_src.is_dir(): - continue - pkg_dst = buildroot / "python-packages" - pkg_dst.mkdir(parents=True, exist_ok=True) - for item in pkg_src.iterdir(): - target = pkg_dst / item.name - if item.is_dir(): - if target.exists(): - shutil.rmtree(target) - shutil.copytree(item, target) - else: - shutil.copy2(item, target) - log.info(f"Installed python packages from {asset_path.name}") - break - if __name__ == "__main__": CPythonBuild.main() diff --git a/Lib/test/test_nanvix_lxml.py b/Lib/test/test_nanvix_lxml.py deleted file mode 100644 index 28c62b312b953c9..000000000000000 --- a/Lib/test/test_nanvix_lxml.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Smoke tests for lxml built-in on NanVix.""" - -import sys -import unittest -from test.support import is_nanvix -from test.support.import_helper import import_module - -if not is_nanvix: - raise unittest.SkipTest("lxml built-in is Nanvix-specific") - -etree = import_module("lxml.etree") - - -class NanvixLxmlTests(unittest.TestCase): - - def test_import_lxml_etree(self): - self.assertTrue(hasattr(etree, "fromstring")) - self.assertTrue(hasattr(etree, "_Element")) - - def test_parse_xml(self): - root = etree.fromstring(b'text') - self.assertEqual(root.tag, "root") - child = root.find("child") - self.assertIsNotNone(child) - self.assertEqual(child.text, "text") - self.assertEqual(child.get("key"), "val") - - def test_element_creation(self): - root = etree.Element("doc") - etree.SubElement(root, "item").text = "hello" - xml = etree.tostring(root, encoding="unicode") - self.assertIn("hello", xml) - - def test_elementpath(self): - root = etree.fromstring(b"12") - results = root.findall("b") - self.assertEqual(len(results), 2) - self.assertEqual(results[0].text, "1") - - def test_xmlfile_available(self): - # xmlfile must be importable from lxml.etree for openpyxl compat. - from lxml.etree import xmlfile - - self.assertTrue(callable(xmlfile)) - - def test_htmlfile_available(self): - from lxml.etree import htmlfile - - self.assertTrue(callable(htmlfile)) - - -if __name__ == "__main__": - unittest.main() diff --git a/Modules/lxml_elementpath_builtin.c b/Modules/lxml_elementpath_builtin.c deleted file mode 100644 index 6a1758e12246590..000000000000000 --- a/Modules/lxml_elementpath_builtin.c +++ /dev/null @@ -1,16 +0,0 @@ -/* - * lxml_elementpath_builtin.c - Shim for lxml._elementpath built-in module. - * - * Registers the Cython _elementpath extension under the flat name - * "_lxml_elementpath" for Modules/Setup.local. - */ - -#include "Python.h" - -extern PyObject* PyInit__elementpath(void); - -PyMODINIT_FUNC -PyInit__lxml_elementpath(void) -{ - return PyInit__elementpath(); -} diff --git a/Modules/lxml_etree_builtin.c b/Modules/lxml_etree_builtin.c deleted file mode 100644 index d028054d5b08f23..000000000000000 --- a/Modules/lxml_etree_builtin.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * lxml_etree_builtin.c - Shim to register lxml.etree as a CPython built-in. - * - * makesetup does not support dotted module names, so the Cython extension - * is registered under the flat name "_lxml_etree". A pure-Python shim at - * lxml/etree.py re-exports everything via `from _lxml_etree import *`. - * - * The Cython-generated code in liblxml_etree.a exports PyInit_etree. - * This wrapper provides PyInit__lxml_etree so the name matches the - * entry in Modules/Setup.local. - */ - -#include "Python.h" - -extern PyObject* PyInit_etree(void); - -PyMODINIT_FUNC -PyInit__lxml_etree(void) -{ - return PyInit_etree(); -}