diff --git a/cloud-init/user-data b/cloud-init/user-data index 8d95e3c..79ff952 100644 --- a/cloud-init/user-data +++ b/cloud-init/user-data @@ -21,18 +21,6 @@ bootcmd: wget -qO /usr/local/share/ca-certificates/mitmproxy.crt http://mitm.it/cert/pem update-ca-certificates fi - # Suppress initramfs rebuilds during first-boot provisioning only. - # Package installs (docker.io, etc.) trigger the initramfs-tools dpkg - # hook, and a full rebuild takes minutes under TCG emulation. The - # diversion is undone in runcmd so that future kernel upgrades (via - # unattended-upgrades) generate a working initramfs. - # Guard: boot-finished is written at the very end of cloud-init's final - # stage, so it won't exist during first boot but will on all subsequent. - - | - if [ ! -f /var/lib/cloud/instance/boot-finished ]; then - dpkg-divert --local --rename --add /usr/sbin/update-initramfs 2>/dev/null - ln -sf /bin/true /usr/sbin/update-initramfs - fi write_files: - path: /etc/apt/apt.conf.d/90proxy content: | @@ -197,14 +185,6 @@ packages: - docker.io runcmd: - # Undo the update-initramfs diversion applied during first-boot - # provisioning (see bootcmd). From this point on, kernel upgrades - # will generate a proper initramfs. - - | - if dpkg-divert --list /usr/sbin/update-initramfs 2>/dev/null | grep -q diversion; then - rm -f /usr/sbin/update-initramfs - dpkg-divert --local --rename --remove /usr/sbin/update-initramfs - fi - mkdir -p /mnt/9p /home/vm/shared - systemctl daemon-reload - systemctl enable --now mnt-9p.mount diff --git a/tests/test_e2e.py b/tests/test_e2e.py index a96814f..d77ec79 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -717,123 +717,3 @@ def test_guest_cannot_modify_host_allowlist(running_vm): allowlist_path.write_text(original_content) -# --------------------------------------------------------------------------- -# Kernel upgrade + reboot -# --------------------------------------------------------------------------- - - -def test_kernel_install_and_reboot(running_vm): - """Installing a new kernel and rebooting must not kernel panic. - - The base cloud-init config once diverted update-initramfs to /bin/true - to speed up provisioning (~2 min saved under TCG emulation). This was - safe under the assumption that the VM was ephemeral and never rebooted. - In practice, Debian's unattended-upgrades installs kernel security - updates on a daily timer. Because update-initramfs was a no-op, the - new kernel shipped without an initramfs. GRUB's os-prober still picked - up the new vmlinuz and made it the default boot entry — but with no - initrd line. On next boot the kernel couldn't load the virtio_blk - module (it lives in the initramfs, not built-in), so the root disk was - invisible and the kernel panicked: - - VFS: Cannot open root device "PARTUUID=..." or unknown-block(0,0) - Kernel panic - not syncing: VFS: Unable to mount root fs - - This test reproduces that scenario end-to-end: install a second kernel - flavor, set GRUB to boot it, and reboot. If update-initramfs is broken, - the VM kernel-panics and SSH never comes back. - - Placed last because it reboots the VM. - """ - # Detect guest architecture to pick the right cloud kernel package. - r = _vm_ssh("dpkg --print-architecture", timeout=10) - assert r.returncode == 0 - arch = r.stdout.strip() - cloud_pkg = f"linux-image-cloud-{arch}" - - _progress(f"Installing {cloud_pkg}…") - r = _vm_ssh( - f"bash -lc 'sudo apt-get install -y -qq {cloud_pkg} 2>&1'", - timeout=600, - ) - assert r.returncode == 0, ( - f"Kernel install failed (rc={r.returncode}):\n" - f"{r.stdout[-2000:]}\n{r.stderr[-2000:]}" - ) - - # Find the newly installed cloud kernel version. - r = _vm_ssh(f"ls /boot/vmlinuz-*-cloud-{arch}", timeout=10) - assert r.returncode == 0, f"No cloud kernel found in /boot:\n{r.stderr}" - cloud_vmlinuz = r.stdout.strip().splitlines()[-1].strip() - cloud_version = cloud_vmlinuz.rsplit("/", 1)[-1].removeprefix("vmlinuz-") - _progress(f"Installed cloud kernel: {cloud_version}") - - # Verify the initramfs was created for it. - r = _vm_ssh(f"test -f /boot/initrd.img-{cloud_version}", timeout=10) - assert r.returncode == 0, ( - f"initrd.img-{cloud_version} was not created.\n" - "update-initramfs is likely diverted to /bin/true." - ) - - # Set GRUB to boot the cloud kernel by default. - # Read the root filesystem UUID from the running VM rather than - # hardcoding a PARTUUID that is specific to one image build. - r = _vm_ssh( - "sudo grub-probe --target=fs_uuid /", - timeout=10, - ) - assert r.returncode == 0, f"Cannot determine root FS UUID:\n{r.stderr}" - root_uuid = r.stdout.strip() - grub_entry = f"gnulinux-advanced-{root_uuid}>gnulinux-{cloud_version}-advanced-{root_uuid}" - _vm_ssh( - f"sudo grub-set-default '{grub_entry}' 2>&1", - timeout=10, - ) - _vm_ssh("sudo update-grub 2>&1", timeout=60) - - # Verify GRUB config has an initrd line for the cloud kernel. - r = _vm_ssh("cat /boot/grub/grub.cfg", timeout=10) - assert f"initrd\t/boot/initrd.img-{cloud_version}" in r.stdout, ( - f"GRUB config missing initrd for {cloud_version}." - ) - - _progress("Rebooting into cloud kernel…") - _vm_ssh("sudo reboot", timeout=10) - - # Wait for SSH to go down. - time.sleep(10) - - # Wait for SSH to come back — if the kernel panicked, it never will. - deadline = time.monotonic() + BOOT_TIMEOUT - attempt = 0 - while time.monotonic() < deadline: - if running_vm.poll() is not None: - _dump_logs() - pytest.fail( - "QEMU exited during reboot — likely kernel panic.\n" - "Check console log above." - ) - attempt += 1 - remaining = int(deadline - time.monotonic()) - _progress(f"Post-reboot SSH probe #{attempt} ({remaining}s remaining)…") - try: - r = _vm_ssh("true", timeout=10) - if r.returncode == 0: - _progress(f"VM back after reboot ({attempt} probe(s))") - break - except subprocess.TimeoutExpired: - pass - time.sleep(SSH_POLL_INTERVAL) - else: - _dump_logs() - pytest.fail( - f"VM did not come back after reboot within {BOOT_TIMEOUT}s.\n" - "Likely kernel panic due to missing initramfs." - ) - - # Confirm we're running the new kernel. - r = _vm_ssh("uname -r", timeout=10) - _progress(f"Running kernel after reboot: {r.stdout.strip()}") - assert "cloud" in r.stdout, ( - f"Expected to boot cloud kernel, got: {r.stdout.strip()}" - )