Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
11bc174
docs(brief): add M0.7 milestone brief
guysenpai Jun 5, 2026
5443ad7
docs(brief): confirm specs read for M0.7
guysenpai Jun 5, 2026
f1abaa5
docs(brief): activate M0.7
guysenpai Jun 5, 2026
0ed9720
docs(brief): journal update
guysenpai Jun 5, 2026
d689058
feat(ipc): add ShmRegion.fromFd for SCM_RIGHTS primary attach
guysenpai Jun 5, 2026
bf03366
feat(ipc): wire SCM_RIGHTS handoff end-to-end, bump protocol to v3
guysenpai Jun 5, 2026
e6d15c1
docs(brief): journal update
guysenpai Jun 5, 2026
8859ff9
fix(ipc): harden ShmRegionsHandoff validation per engine-ipc.md §8.3
guysenpai Jun 5, 2026
49abdce
docs(brief): journal update
guysenpai Jun 5, 2026
040d807
feat(ipc): extend message catalogue with control + project commands
guysenpai Jun 5, 2026
77ff420
docs(brief): journal update
guysenpai Jun 5, 2026
f4fbbf3
feat(ipc): wire Windows editor + runtime path via CreateProcessW
guysenpai Jun 5, 2026
c666a1a
docs(brief): journal update
guysenpai Jun 5, 2026
34a057e
fix(platform): correct Windows command-line arg quoting (ArgvQuote)
guysenpai Jun 5, 2026
bbbf9d7
docs(brief): journal update
guysenpai Jun 5, 2026
32e4fbb
fix(platform): make editor/runtime compile on Windows (args + utf16)
guysenpai Jun 5, 2026
ffe206c
docs(brief): journal update
guysenpai Jun 5, 2026
1c3e31d
fix(platform): resolve runtime path from exe dir for Windows spawn
guysenpai Jun 5, 2026
1887984
docs(brief): journal update
guysenpai Jun 5, 2026
8bced76
feat(ipc): add editor CommandLog ring for best-effort replay
guysenpai Jun 5, 2026
553f105
feat(ipc): persist minimal scene snapshot on save-project
guysenpai Jun 5, 2026
4590370
feat(ipc): add best-effort command replay after crash restart
guysenpai Jun 5, 2026
fab104f
test(ipc): un-gate crash recovery + cover post-save replay
guysenpai Jun 5, 2026
8c34093
test(ipc): fuzz the full catalogue + detect leaks in fuzz_1h
guysenpai Jun 5, 2026
3d56e67
chore(ci): add nightly IPC fuzz workflow on linux + windows
guysenpai Jun 5, 2026
1860b69
docs(brief): journal update
guysenpai Jun 5, 2026
3b53414
test(ipc): harden fuzz teardown with a well-formed sentinel
guysenpai Jun 6, 2026
c32a42c
docs(brief): journal update
guysenpai Jun 6, 2026
2bccbc3
docs(brief): close M0.7 with final milestone synthesis
guysenpai Jun 6, 2026
50fa16f
docs(ipc): document the synchronous-replay seq_id invariant
guysenpai Jun 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .github/workflows/nightly-fuzz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: Nightly IPC fuzz

# 1 h IPC fuzz over the full message catalogue (tests/ipc/fuzz_1h.zig),
# promoted to nightly CI at M0.7 / E4. Runs on Linux + Windows and
# archives the stdout digest as an artifact (G3 gate). Scheduled runs
# only fire from the default branch (GitHub rule), so this activates once
# the M0.7 branch is squash-merged to `main`; `workflow_dispatch` lets it
# be triggered manually from the Actions tab in the meantime.
on:
schedule:
# 04:00 UTC daily — off-peak for the shared runner pool.
- cron: '0 4 * * *'
workflow_dispatch:
inputs:
duration_ms:
description: 'Fuzz duration in ms (default 3600000 = 1 h)'
required: false
default: '3600000'

concurrency:
group: nightly-fuzz-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read

jobs:
fuzz:
strategy:
fail-fast: false
matrix:
os: [ubuntu-24.04, windows-2025]
runs-on: ${{ matrix.os }}
# 1 h fuzz + ReleaseSafe build (~3 min on the 2-vCPU Windows runner) +
# overhead. 90 min leaves headroom without risking a runaway hang
# masquerading as a pass.
timeout-minutes: 90
steps:
- uses: actions/checkout@v6

# Pinned to 0.16.0 exact — same rationale as ci.yml (the action takes
# `0.16.x` literally and 404s on the mirrors).
- uses: mlugg/setup-zig@v2
with:
version: 0.16.0

- name: zig build (ReleaseSafe)
run: zig build -Doptimize=ReleaseSafe

- name: Run 1 h IPC fuzz over the full catalogue
shell: bash
run: |
set -euo pipefail
DURATION="${{ github.event.inputs.duration_ms || '3600000' }}"
zig build test-ipc-fuzz-1h -Doptimize=ReleaseSafe -- \
--duration-ms="${DURATION}" 2>&1 | tee "fuzz-${{ matrix.os }}.txt"

- name: Upload fuzz digest artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: nightly-fuzz-digest-${{ matrix.os }}
path: fuzz-${{ matrix.os }}.txt
retention-days: 30
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ zig-pkg/
# Weld caches (shader compile cache, asset cooking cache, etc.)
.weld-cache/

# IPC runtime artifacts — minimal scene snapshots written on SaveProject
# (engine-ipc.md §7.1) and any leftover from crash-recovery tests
weld-snap-*.bin
weld-snapshot-*.bin

# Bench outputs (per-machine, regenerated by `zig build bench-*`)
bench/out/*.md

Expand Down
728 changes: 728 additions & 0 deletions briefs/M0.7-ipc-scm-rights-windows-fuzz.md

Large diffs are not rendered by default.

31 changes: 19 additions & 12 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,8 @@ pub fn build(b: *std.Build) void {
"tests/ipc/viewport_cases/wrong_width.zig",
"tests/ipc/viewport_cases/no_tearing_1000_frames.zig",
"tests/ipc/fd_passing.zig",
"tests/ipc/handoff_fd.zig",
"tests/ipc/catalogue.zig",
"tests/ipc/process.zig",
"tests/ipc/handshake.zig",
"tests/ipc/crash_recovery.zig",
Expand All @@ -642,17 +644,18 @@ pub fn build(b: *std.Build) void {
t_mod.addImport("weld_core", core_module);
const t = b.addTest(.{ .root_module = t_mod });
const run_t = b.addRunArtifact(t);
// `tests/ipc/crash_recovery.zig` spawns
// `zig-out/bin/weld-runtime` to exercise the editor↔runtime
// termination contract (G4 + G5). The path is relative to
// the project root which is the cwd when `zig build test`
// dispatches the test binary; the runtime exe must already
// be installed for `posix_spawnp` to find it. Bare
// `b.addRunArtifact(t).step.dependOn(b.getInstallStep())`
// would gate the test on every install step (including the
// S5 etch_cook), so we wire the dependency narrowly to the
// runtime install step alone.
if (std.mem.eql(u8, p, "tests/ipc/crash_recovery.zig")) {
// `tests/ipc/crash_recovery.zig` and `tests/ipc/catalogue.zig`
// spawn `zig-out/bin/weld-runtime` to exercise the editor↔runtime
// contract end-to-end (G4 + G5; M0.7 / E2 catalogue handlers). The
// path is relative to the project root which is the cwd when
// `zig build test` dispatches the test binary; the runtime exe must
// already be installed for `posix_spawnp` to find it. Bare
// `b.addRunArtifact(t).step.dependOn(b.getInstallStep())` would gate
// the test on every install step (including the S5 etch_cook), so we
// wire the dependency narrowly to the runtime install step alone.
if (std.mem.eql(u8, p, "tests/ipc/crash_recovery.zig") or
std.mem.eql(u8, p, "tests/ipc/catalogue.zig"))
{
run_t.step.dependOn(&b.addInstallArtifact(runtime_exe, .{}).step);
}
test_step.dependOn(&run_t.step);
Expand Down Expand Up @@ -698,9 +701,13 @@ pub fn build(b: *std.Build) void {
b.installArtifact(fuzz_1h_exe);
const fuzz_1h_run = b.addRunArtifact(fuzz_1h_exe);
fuzz_1h_run.step.dependOn(b.getInstallStep());
// Forward `-- <args>` so the duration can be overridden for a local
// smoke run, e.g. `zig build test-ipc-fuzz-1h -- --duration-ms=3000`.
// The nightly cron runs it with no args (1 h default).
if (b.args) |forwarded| fuzz_1h_run.addArgs(forwarded);
const fuzz_1h_step = b.step(
"test-ipc-fuzz-1h",
"Run the S6 1 h IPC fuzz harness (manual; output digest archived in validation/s6-go-nogo.md)",
"Run the IPC fuzz harness over the full catalogue (1 h default; nightly CI). Override: -- --duration-ms=N",
);
fuzz_1h_step.dependOn(&fuzz_1h_run.step);

Expand Down
203 changes: 203 additions & 0 deletions src/core/ipc/cleanup.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
//! Startup orphan reaping for IPC endpoints (`engine-ipc.md` §2.4 +
//! §6.3). The editor owns its Unix socket file (`/tmp/weld-<pid>.sock`)
//! and its POSIX shm regions (`/weld-shm-<role>-<pid>`); a `kill -9`
//! of the editor leaves both behind, named with the dead editor's PID.
//! The next editor calls `reapOrphans` at startup to remove any such
//! orphan whose embedded PID is no longer alive (`process.is_alive`).
//!
//! Safety: an endpoint is removed **only** when its PID is dead, so a
//! second editor running concurrently (live PID) never has its
//! endpoints reaped. The reap is best-effort — every failure is
//! swallowed (it is startup hygiene, not a correctness gate).
//!
//! Implementation note: raw `opendir`/`readdir` via `extern "c"`,
//! consistent with the rest of the IPC module (`shm_posix.zig`,
//! `transport_posix.zig`) which binds libc directly to stay decoupled
//! from the evolving `std.fs` / `std.Io.Dir` signatures across Zig
//! 0.16 patches. Windows is a no-op: named pipes and named file
//! mappings are refcounted kernel objects that vanish with their last
//! handle, so there is nothing to unlink. shm orphan scanning uses the
//! Linux `/dev/shm` tmpfs listing (macOS POSIX shm objects are not
//! filesystem-visible, so only the socket reap runs there).

const std = @import("std");
const builtin = @import("builtin");

const process = @import("../platform/process.zig");

const is_linux = builtin.os.tag == .linux;
const is_posix = builtin.os.tag == .linux or builtin.os.tag == .macos;

// `struct dirent` layout. Linux glibc and macOS (arm64, 64-bit inode)
// differ in field order/width before `d_name`; only the `d_name`
// offset matters here (we read it as a NUL-terminated slice).
const dirent = if (is_linux) extern struct {
d_ino: u64,
d_off: i64,
d_reclen: u16,
d_type: u8,
d_name: [256]u8,
} else extern struct {
d_ino: u64,
d_seekoff: u64,
d_reclen: u16,
d_namlen: u16,
d_type: u8,
d_name: [1024]u8,
};

const DIR = opaque {};

const sys = struct {
extern "c" fn opendir(name: [*:0]const u8) ?*DIR;
extern "c" fn readdir(dir: *DIR) ?*dirent;
extern "c" fn closedir(dir: *DIR) c_int;
extern "c" fn unlink(path: [*:0]const u8) c_int;
extern "c" fn shm_unlink(name: [*:0]const u8) i32;
};

/// Removes orphan IPC endpoints left by crashed editors. Scans
/// `/tmp` for `weld-<pid>.sock` sockets (Linux + macOS) and, on Linux,
/// `/dev/shm` for `weld-shm-*-<pid>` regions, unlinking each whose
/// `<pid>` is no longer alive. Best-effort and side-effect-safe for a
/// concurrently-running editor (live PIDs are kept). No-op on Windows.
pub fn reapOrphans() void {
if (comptime !is_posix) return;
reapSocketOrphans();
if (comptime is_linux) reapShmOrphans();
}

/// Parses the PID out of an editor socket name `weld-<pid>.sock`.
/// Returns `null` for any name that is not exactly that shape (so
/// test sockets like `weld-crashtest-<pid>.sock` are left untouched).
fn pidFromSocketName(name: []const u8) ?process.Pid {
const prefix = "weld-";
const suffix = ".sock";
if (!std.mem.startsWith(u8, name, prefix)) return null;
if (!std.mem.endsWith(u8, name, suffix)) return null;
const mid = name[prefix.len .. name.len - suffix.len];
if (mid.len == 0) return null;
return std.fmt.parseInt(process.Pid, mid, 10) catch null;
}

/// Parses the trailing PID out of a shm region name
/// `weld-shm-<role>-<pid>`. Returns `null` when the name does not
/// start with `weld-shm-` or has no numeric trailing segment.
fn pidFromShmName(name: []const u8) ?process.Pid {
const prefix = "weld-shm-";
if (!std.mem.startsWith(u8, name, prefix)) return null;
const last_dash = std.mem.lastIndexOfScalar(u8, name, '-') orelse return null;
const pid_str = name[last_dash + 1 ..];
if (pid_str.len == 0) return null;
return std.fmt.parseInt(process.Pid, pid_str, 10) catch null;
}

fn reapSocketOrphans() void {
const d = sys.opendir("/tmp") orelse return;
defer _ = sys.closedir(d);
while (sys.readdir(d)) |ent| {
const name = std.mem.sliceTo(&ent.d_name, 0);
const pid = pidFromSocketName(name) orelse continue;
if (process.is_alive(pid)) continue;
var buf: [320]u8 = undefined;
const full = std.fmt.bufPrintZ(&buf, "/tmp/{s}", .{name}) catch continue;
_ = sys.unlink(full.ptr);
}
}

fn reapShmOrphans() void {
const d = sys.opendir("/dev/shm") orelse return;
defer _ = sys.closedir(d);
while (sys.readdir(d)) |ent| {
const name = std.mem.sliceTo(&ent.d_name, 0);
const pid = pidFromShmName(name) orelse continue;
if (process.is_alive(pid)) continue;
// `shm_unlink` takes the name as passed to `shm_open` (leading
// slash), which maps to `/dev/shm/<name>` on Linux.
var buf: [320]u8 = undefined;
const shm_name = std.fmt.bufPrintZ(&buf, "/{s}", .{name}) catch continue;
_ = sys.shm_unlink(shm_name.ptr);
}
}

// ---------------------------------------------------------------- tests --

extern "c" fn getpid() process.Pid;
extern "c" fn creat(path: [*:0]const u8, mode: c_uint) c_int;
extern "c" fn close(fd: c_int) c_int;
extern "c" fn access(path: [*:0]const u8, mode: c_int) c_int;

test "pidFromSocketName parses weld-<pid>.sock only" {
try std.testing.expectEqual(@as(?process.Pid, 1234), pidFromSocketName("weld-1234.sock"));
try std.testing.expectEqual(@as(?process.Pid, null), pidFromSocketName("weld-crashtest-1234.sock"));
try std.testing.expectEqual(@as(?process.Pid, null), pidFromSocketName("weld-.sock"));
try std.testing.expectEqual(@as(?process.Pid, null), pidFromSocketName("other-1234.sock"));
try std.testing.expectEqual(@as(?process.Pid, null), pidFromSocketName("weld-1234.txt"));
}

test "pidFromShmName parses the trailing pid of weld-shm-<role>-<pid>" {
try std.testing.expectEqual(@as(?process.Pid, 77), pidFromShmName("weld-shm-viewport-77"));
try std.testing.expectEqual(@as(?process.Pid, 5), pidFromShmName("weld-shm-overlays-5"));
try std.testing.expectEqual(@as(?process.Pid, null), pidFromShmName("weld-viewport-9"));
try std.testing.expectEqual(@as(?process.Pid, null), pidFromShmName("weld-shm-noprefixmatch"));
}

test "reapOrphans removes a dead-pid socket and keeps a live-pid one" {
if (comptime !is_posix) return error.SkipZigTest;

// A PID far above any real one — `kill(pid, 0)` returns ESRCH, so
// `is_alive` is false. Deterministic on Linux + macOS.
const dead_pid: process.Pid = 0x7FFF_FFFF;
const my_pid = getpid();
try std.testing.expect(!process.is_alive(dead_pid));
try std.testing.expect(process.is_alive(my_pid));

var dead_buf: [64]u8 = undefined;
var live_buf: [64]u8 = undefined;
const dead_path = try std.fmt.bufPrintZ(&dead_buf, "/tmp/weld-{d}.sock", .{dead_pid});
const live_path = try std.fmt.bufPrintZ(&live_buf, "/tmp/weld-{d}.sock", .{my_pid});

_ = sys.unlink(dead_path.ptr);
_ = sys.unlink(live_path.ptr);
const fd_dead = creat(dead_path.ptr, 0o600);
try std.testing.expect(fd_dead >= 0);
_ = close(fd_dead);
const fd_live = creat(live_path.ptr, 0o600);
try std.testing.expect(fd_live >= 0);
_ = close(fd_live);
defer _ = sys.unlink(live_path.ptr);
defer _ = sys.unlink(dead_path.ptr);

reapOrphans();

// The dead-pid orphan is gone; the live-pid endpoint survives.
try std.testing.expect(access(dead_path.ptr, 0) != 0);
try std.testing.expect(access(live_path.ptr, 0) == 0);
}

test "reapOrphans removes a dead-pid shm region on Linux" {
if (comptime !is_linux) return error.SkipZigTest;

const dead_pid: process.Pid = 0x7FFF_FFFF;
var name_buf: [64]u8 = undefined;
const shm_name = try std.fmt.bufPrintZ(&name_buf, "/weld-shm-reaptest-{d}", .{dead_pid});
const O_RDWR: i32 = 0x0002;
const O_CREAT: i32 = 0x0040;
const shm = struct {
extern "c" fn shm_open(name: [*:0]const u8, oflag: i32, mode: u32) i32;
};
_ = sys.shm_unlink(shm_name.ptr); // clear a prior run's leftover
const fd = shm.shm_open(shm_name.ptr, O_RDWR | O_CREAT, 0o600);
try std.testing.expect(fd >= 0);
_ = close(fd);
defer _ = sys.shm_unlink(shm_name.ptr); // belt-and-suspenders

// `/dev/shm/weld-shm-reaptest-<dead>` now exists.
var path_buf: [64]u8 = undefined;
const dev_path = try std.fmt.bufPrintZ(&path_buf, "/dev/shm/weld-shm-reaptest-{d}", .{dead_pid});
try std.testing.expect(access(dev_path.ptr, 0) == 0);

reapOrphans();

try std.testing.expect(access(dev_path.ptr, 0) != 0);
}
Loading
Loading