Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions cloud-init/user-data
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,6 @@ bootcmd:
wget -qO /usr/local/share/ca-certificates/mitmproxy.crt http://mitm.it/cert/pem
update-ca-certificates
fi
# Suppress initramfs rebuilds during first-boot provisioning only.
# Package installs (docker.io, etc.) trigger the initramfs-tools dpkg
# hook, and a full rebuild takes minutes under TCG emulation. The
# diversion is undone in runcmd so that future kernel upgrades (via
# unattended-upgrades) generate a working initramfs.
# Guard: boot-finished is written at the very end of cloud-init's final
# stage, so it won't exist during first boot but will on all subsequent.
- |
if [ ! -f /var/lib/cloud/instance/boot-finished ]; then
dpkg-divert --local --rename --add /usr/sbin/update-initramfs 2>/dev/null
ln -sf /bin/true /usr/sbin/update-initramfs
fi
write_files:
- path: /etc/apt/apt.conf.d/90proxy
content: |
Expand Down Expand Up @@ -197,14 +185,6 @@ packages:
- docker.io

runcmd:
# Undo the update-initramfs diversion applied during first-boot
# provisioning (see bootcmd). From this point on, kernel upgrades
# will generate a proper initramfs.
- |
if dpkg-divert --list /usr/sbin/update-initramfs 2>/dev/null | grep -q diversion; then
rm -f /usr/sbin/update-initramfs
dpkg-divert --local --rename --remove /usr/sbin/update-initramfs
fi
- mkdir -p /mnt/9p /home/vm/shared
- systemctl daemon-reload
- systemctl enable --now mnt-9p.mount
Expand Down
120 changes: 0 additions & 120 deletions tests/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,123 +717,3 @@ def test_guest_cannot_modify_host_allowlist(running_vm):
allowlist_path.write_text(original_content)


# ---------------------------------------------------------------------------
# Kernel upgrade + reboot
# ---------------------------------------------------------------------------


def test_kernel_install_and_reboot(running_vm):
"""Installing a new kernel and rebooting must not kernel panic.

The base cloud-init config once diverted update-initramfs to /bin/true
to speed up provisioning (~2 min saved under TCG emulation). This was
safe under the assumption that the VM was ephemeral and never rebooted.
In practice, Debian's unattended-upgrades installs kernel security
updates on a daily timer. Because update-initramfs was a no-op, the
new kernel shipped without an initramfs. GRUB's os-prober still picked
up the new vmlinuz and made it the default boot entry — but with no
initrd line. On next boot the kernel couldn't load the virtio_blk
module (it lives in the initramfs, not built-in), so the root disk was
invisible and the kernel panicked:

VFS: Cannot open root device "PARTUUID=..." or unknown-block(0,0)
Kernel panic - not syncing: VFS: Unable to mount root fs

This test reproduces that scenario end-to-end: install a second kernel
flavor, set GRUB to boot it, and reboot. If update-initramfs is broken,
the VM kernel-panics and SSH never comes back.

Placed last because it reboots the VM.
"""
# Detect guest architecture to pick the right cloud kernel package.
r = _vm_ssh("dpkg --print-architecture", timeout=10)
assert r.returncode == 0
arch = r.stdout.strip()
cloud_pkg = f"linux-image-cloud-{arch}"

_progress(f"Installing {cloud_pkg}…")
r = _vm_ssh(
f"bash -lc 'sudo apt-get install -y -qq {cloud_pkg} 2>&1'",
timeout=600,
)
assert r.returncode == 0, (
f"Kernel install failed (rc={r.returncode}):\n"
f"{r.stdout[-2000:]}\n{r.stderr[-2000:]}"
)

# Find the newly installed cloud kernel version.
r = _vm_ssh(f"ls /boot/vmlinuz-*-cloud-{arch}", timeout=10)
assert r.returncode == 0, f"No cloud kernel found in /boot:\n{r.stderr}"
cloud_vmlinuz = r.stdout.strip().splitlines()[-1].strip()
cloud_version = cloud_vmlinuz.rsplit("/", 1)[-1].removeprefix("vmlinuz-")
_progress(f"Installed cloud kernel: {cloud_version}")

# Verify the initramfs was created for it.
r = _vm_ssh(f"test -f /boot/initrd.img-{cloud_version}", timeout=10)
assert r.returncode == 0, (
f"initrd.img-{cloud_version} was not created.\n"
"update-initramfs is likely diverted to /bin/true."
)

# Set GRUB to boot the cloud kernel by default.
# Read the root filesystem UUID from the running VM rather than
# hardcoding a PARTUUID that is specific to one image build.
r = _vm_ssh(
"sudo grub-probe --target=fs_uuid /",
timeout=10,
)
assert r.returncode == 0, f"Cannot determine root FS UUID:\n{r.stderr}"
root_uuid = r.stdout.strip()
grub_entry = f"gnulinux-advanced-{root_uuid}>gnulinux-{cloud_version}-advanced-{root_uuid}"
_vm_ssh(
f"sudo grub-set-default '{grub_entry}' 2>&1",
timeout=10,
)
_vm_ssh("sudo update-grub 2>&1", timeout=60)

# Verify GRUB config has an initrd line for the cloud kernel.
r = _vm_ssh("cat /boot/grub/grub.cfg", timeout=10)
assert f"initrd\t/boot/initrd.img-{cloud_version}" in r.stdout, (
f"GRUB config missing initrd for {cloud_version}."
)

_progress("Rebooting into cloud kernel…")
_vm_ssh("sudo reboot", timeout=10)

# Wait for SSH to go down.
time.sleep(10)

# Wait for SSH to come back — if the kernel panicked, it never will.
deadline = time.monotonic() + BOOT_TIMEOUT
attempt = 0
while time.monotonic() < deadline:
if running_vm.poll() is not None:
_dump_logs()
pytest.fail(
"QEMU exited during reboot — likely kernel panic.\n"
"Check console log above."
)
attempt += 1
remaining = int(deadline - time.monotonic())
_progress(f"Post-reboot SSH probe #{attempt} ({remaining}s remaining)…")
try:
r = _vm_ssh("true", timeout=10)
if r.returncode == 0:
_progress(f"VM back after reboot ({attempt} probe(s))")
break
except subprocess.TimeoutExpired:
pass
time.sleep(SSH_POLL_INTERVAL)
else:
_dump_logs()
pytest.fail(
f"VM did not come back after reboot within {BOOT_TIMEOUT}s.\n"
"Likely kernel panic due to missing initramfs."
)

# Confirm we're running the new kernel.
r = _vm_ssh("uname -r", timeout=10)
_progress(f"Running kernel after reboot: {r.stdout.strip()}")
assert "cloud" in r.stdout, (
f"Expected to boot cloud kernel, got: {r.stdout.strip()}"
)