Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 211 additions & 0 deletions bench/asset_cache.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
//! Bench: asset cooking-cache hit differential — M0.6 / E4.
//!
//! Measures the wall-clock differential between a *cold cook* (cache miss:
//! BLAKE3 over the payload + assemble + write the `.bin`) and a *warm cache
//! hit* (a directory lookup that skips the cook entirely). This is the
//! performance half of the brief's cache criterion; the correctness half
//! (a miss → hit transition that returns byte-identical bytes) is the
//! deterministic gate `tests/assets/cache_diff.zig`.
//!
//! The differential is host- and load-dependent, so it lives here — measured
//! under the opposable protocol on the reference machine — not inside
//! `zig build test`, where a single cache-hit sample spiking on a page
//! fault / AV scan / cold directory would red-fail the gate. The brief's
//! reference figure is ≥ 100 ms cold cook / < 10 ms hit for a real
//! decode-heavy asset; this synthetic 16 MiB RGBA8 cook is hash- and
//! write-bound rather than decode-bound, so the absolute cold time is
//! smaller — the *ratio* (cook ≫ lookup) is the signal.
//!
//! Run: `zig build bench-asset-cache` (add `-- --smoke` for a tiny CI sanity
//! run). Writes `bench/out/asset_cache_<os>.md`.

const std = @import("std");
const builtin = @import("builtin");
const assets = @import("weld_asset_pipeline");

const log = std.log.scoped(.bench_asset_cache);

const REPORT_DIR: []const u8 = "bench/out";
const SCRATCH_DIR: []const u8 = "bench/out/asset_cache_scratch";

pub fn main(init: std.process.Init) !void {
const gpa = init.gpa;
const io = init.io;
const args = try init.minimal.args.toSlice(init.arena.allocator());

var smoke = false;
for (args[1..]) |a| {
if (std.mem.eql(u8, a, "--smoke")) smoke = true;
}

// Non-smoke: 2048×2048 RGBA8 = 16 MiB, so the cold cook (BLAKE3 over the
// payload + a 16 MiB write) is clearly expensive vs a dir-lookup hit.
// Smoke: a tiny asset, just enough to exercise the path under CI.
const dim: u32 = if (smoke) 64 else 2048;
const iterations: usize = if (smoke) 3 else 16;

log.info("config: {d}x{d} RGBA8 ({d} KiB), {d} iterations, smoke={}", .{
dim, dim, (dim * dim * 4) / 1024, iterations, smoke,
});

const cwd = std.Io.Dir.cwd();
cwd.createDirPath(io, SCRATCH_DIR) catch |e| switch (e) {
error.PathAlreadyExists => {},
else => return e,
};
// The scratch cache is bench-private state; remove it on the way out so a
// re-run always starts from genuine misses (cf. bench/etch_compile.zig).
defer cwd.deleteTree(io, SCRATCH_DIR) catch {};

var scratch = try cwd.openDir(io, SCRATCH_DIR, .{});
defer scratch.close(io);
const cache = assets.cache.Cache.init(scratch);

const blob = try gpa.alloc(u8, dim * dim * 4);
defer gpa.free(blob);
for (blob, 0..) |*b, i| b.* = @truncate(i *% 2_654_435_761);

const source_hash = assets.hash.hex128(blob);
const extracted = [_]assets.format.Field{
.{ .key = "width", .value = .{ .int = @intCast(dim) } },
.{ .key = "height", .value = .{ .int = @intCast(dim) } },
.{ .key = "blob", .value = .{ .string = &source_hash } },
};
const doc = assets.AssetDoc{
.name = "big",
.type_name = "Texture2D",
.version = 1,
.source = "big.png",
.source_hash = &source_hash,
.extracted = &extracted,
};

const cold_ns = try gpa.alloc(u64, iterations);
defer gpa.free(cold_ns);
const hit_ns = try gpa.alloc(u64, iterations);
defer gpa.free(hit_ns);

// Each iteration uses a distinct cache key (distinct settings string) so
// every cold sample starts from a genuine miss in the shared scratch dir.
var settings_buf: [16]u8 = undefined;
for (0..iterations) |i| {
const settings = try std.fmt.bufPrint(&settings_buf, "pc-{d}", .{i});
const key = assets.cache.computeKey(&source_hash, settings, 0);

// Cold path — cache miss: look up (absent) → cook → store.
const t_cold = std.Io.Clock.Timestamp.now(io, .awake);
std.debug.assert(!cache.contains(io, &key));
const bin = try assets.cookers.cookTexture(gpa, doc, blob);
try cache.put(io, &key, bin);
cold_ns[i] = @intCast(t_cold.untilNow(io).raw.nanoseconds);
gpa.free(bin);

// Warm path — cache hit: a directory lookup; the cook is skipped.
const t_hit = std.Io.Clock.Timestamp.now(io, .awake);
const hit = cache.contains(io, &key);
hit_ns[i] = @intCast(t_hit.untilNow(io).raw.nanoseconds);
std.debug.assert(hit);
}

try writeReport(gpa, io, cold_ns, hit_ns, dim, iterations, smoke);
}

const Stats = struct {
min_ns: u64,
p50_ns: u64,
max_ns: u64,
};

fn computeStats(gpa: std.mem.Allocator, samples: []const u64) !Stats {
const sorted = try gpa.dupe(u64, samples);
defer gpa.free(sorted);
std.mem.sort(u64, sorted, {}, std.sort.asc(u64));
return .{
.min_ns = sorted[0],
.p50_ns = sorted[sorted.len / 2],
.max_ns = sorted[sorted.len - 1],
};
}

fn writeReport(
gpa: std.mem.Allocator,
io: std.Io,
cold_ns: []const u64,
hit_ns: []const u64,
dim: u32,
iterations: usize,
smoke: bool,
) !void {
const cold = try computeStats(gpa, cold_ns);
const hit = try computeStats(gpa, hit_ns);
const speedup: f64 = if (hit.p50_ns == 0)
0
else
@as(f64, @floatFromInt(cold.p50_ns)) / @as(f64, @floatFromInt(hit.p50_ns));

std.Io.Dir.cwd().createDirPath(io, REPORT_DIR) catch |e| switch (e) {
error.PathAlreadyExists => {},
else => return e,
};

const platform_tag = @tagName(builtin.os.tag);
var path_buf: [256]u8 = undefined;
const path = try std.fmt.bufPrint(&path_buf, "{s}/asset_cache_{s}.md", .{ REPORT_DIR, platform_tag });

var file = try std.Io.Dir.cwd().createFile(io, path, .{ .truncate = true });
defer file.close(io);

var write_buf: [4096]u8 = undefined;
var writer = file.writer(io, &write_buf);
const w = &writer.interface;

try w.print("# Bench: asset cooking-cache hit differential — {s}\n\n", .{platform_tag});
try w.print(
"Config: {d}×{d} RGBA8 ({d} KiB payload), {d} iterations{s}.\n\n",
.{ dim, dim, (dim * dim * 4) / 1024, iterations, if (smoke) " (smoke)" else "" },
);

try w.print("## Cold cook (cache miss)\n\n", .{});
try w.print("| Metric | Value (ms) |\n|---|---|\n", .{});
try w.print("| min | {d:.3} |\n", .{nanosToMs(cold.min_ns)});
try w.print("| p50 | {d:.3} |\n", .{nanosToMs(cold.p50_ns)});
try w.print("| max | {d:.3} |\n\n", .{nanosToMs(cold.max_ns)});

try w.print("## Warm hit (cache lookup)\n\n", .{});
try w.print("| Metric | Value (µs) |\n|---|---|\n", .{});
try w.print("| min | {d:.3} |\n", .{nanosToUs(hit.min_ns)});
try w.print("| p50 | {d:.3} |\n", .{nanosToUs(hit.p50_ns)});
try w.print("| max | {d:.3} |\n\n", .{nanosToUs(hit.max_ns)});

try w.print("## Differential\n\n", .{});
try w.print("- Cache-hit speedup (cold p50 / hit p50): **{d:.0}×**\n\n", .{speedup});

try w.print("## Notes\n\n", .{});
try w.print(
"This differential is host- and load-dependent and is therefore a " ++
"bench number, not a `zig build test` gate (the correctness half — " ++
"a miss → hit transition that returns byte-identical bytes — is the " ++
"deterministic gate `tests/assets/cache_diff.zig`). Measure under the " ++
"opposable protocol on the reference machine. The brief reference is " ++
"≥ 100 ms cold / < 10 ms hit for a real decode-heavy asset; this " ++
"synthetic RGBA8 cook is hash- and write-bound, so the cold time is " ++
"smaller — the ratio (cook ≫ lookup) is the signal.\n",
.{},
);
try w.flush();

log.info("report written: {s}", .{path});
log.info("cold p50 = {d:.3} ms, hit p50 = {d:.3} µs, speedup = {d:.0}×", .{
nanosToMs(cold.p50_ns),
nanosToUs(hit.p50_ns),
speedup,
});
}

fn nanosToMs(ns: u64) f64 {
return @as(f64, @floatFromInt(ns)) / 1e6;
}

fn nanosToUs(ns: u64) f64 {
return @as(f64, @floatFromInt(ns)) / 1e3;
}
27 changes: 27 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,33 @@ pub fn build(b: *std.Build) void {
);
paeth_bench_step.dependOn(&paeth_bench_run.step);

// ------------------------------------- M0.6 asset cooking-cache bench -----
//
// Cold-cook-vs-warm-hit time differential. Host- and load-dependent, so
// it lives here (archived, non-blocking, measured under the opposable
// protocol) and NOT in `zig build test` — the correctness half (miss →
// hit transition + byte-identity) is the deterministic gate
// `tests/assets/cache_diff.zig`. `zig build bench-asset-cache`.
const asset_cache_bench_module = b.createModule(.{
.root_source_file = b.path("bench/asset_cache.zig"),
.target = target,
.optimize = optimize,
});
asset_cache_bench_module.addImport("weld_asset_pipeline", asset_pipeline_module);
const asset_cache_bench_exe = b.addExecutable(.{
.name = "asset-cache-bench",
.root_module = asset_cache_bench_module,
});
b.installArtifact(asset_cache_bench_exe);
const asset_cache_bench_run = b.addRunArtifact(asset_cache_bench_exe);
asset_cache_bench_run.step.dependOn(b.getInstallStep());
if (b.args) |args| asset_cache_bench_run.addArgs(args);
const asset_cache_bench_step = b.step(
"bench-asset-cache",
"Run the M0.6 cooking-cache cold-vs-hit differential (writes bench/out/asset_cache_<os>.md; pass `-- --smoke` for a CI sanity run)",
);
asset_cache_bench_step.dependOn(&asset_cache_bench_run.step);

// ----------------------------------- M0.6 thin offline asset cook demo ----
//
// `zig build cook-demo` cooks the three M0.6 fixtures end-to-end through
Expand Down
64 changes: 31 additions & 33 deletions tests/assets/cache_diff.zig
Original file line number Diff line number Diff line change
@@ -1,19 +1,35 @@
//! M0.6 / E4 — cooking-cache hit differential (brief §Acceptance ▸ Benchmarks).
//! M0.6 / E4 — cooking-cache hit functional test (brief §Acceptance ▸ Benchmarks).
//!
//! A second cook of an unchanged asset hits the cache and skips the
//! (expensive) cook entirely. The asset is sized so the first cook does real
//! work (hash + write a large `.bin`); the hit is a directory lookup.
//! A second cook of an unchanged asset hits the cache and returns the
//! byte-identical artifact without re-cooking. This is the *correctness*
//! half of the brief's cache criterion: a miss → hit transition plus
//! byte-identity. It is deterministic and cross-host — no wall-clock
//! assertion — so it belongs in the `zig build test` gate.
//!
//! The *performance* half — the cold-cook-vs-hit time differential — is a
//! host- and load-dependent measurement, so it lives in the bench suite
//! (`bench/asset_cache.zig`, `zig build bench-asset-cache`), measured under
//! the opposable protocol on the reference machine. The original M0.6 test
//! asserted an absolute millisecond ratio inside the correctness gate, which
//! red-failed on slower / Windows CI runners (a single cache-hit sample can
//! spike on a page fault, AV scan, or cold directory). That debt was flagged
//! in the M0.7 brief (§ Acted deviations → "Known debt left untouched") and
//! is resolved here by moving the timing out of the gate, leaving only the
//! deterministic functional assertions below.

const std = @import("std");
const assets = @import("weld_asset_pipeline");

// 2048×2048 RGBA8 = 16 MiB — large enough that the first cook (BLAKE3 over the
// payload + writing the `.bin`) is clearly expensive vs a cache-hit lookup,
// without bloating CI with a huge temp file.
const width = 2048;
const height = 2048;
// 256×256 RGBA8 = 256 KiB — large enough to exercise a real cook (header +
// metadata + payload copy + BLAKE3 content hash) and a non-trivial
// byte-identity check, small enough to keep the correctness gate fast on
// every host. The larger 16 MiB asset that makes a *cold cook* expensive
// (the point of the timing differential) is the bench's concern, not the
// gate's.
const width = 256;
const height = 256;

test "second cook of unchanged asset hits cache" {
test "second cook of unchanged asset hits cache and returns identical bytes" {
const gpa = std.testing.allocator;
const io = std.testing.io;

Expand Down Expand Up @@ -42,37 +58,19 @@ test "second cook of unchanged asset hits cache" {

const key = assets.cache.computeKey(&source_hash, "pc", 0);

// First cook — cache miss: cook the .bin and store it.
const t_miss = std.Io.Clock.Timestamp.now(io, .awake);
// First cook — cache miss: the artifact is absent, so we cook it and
// store it.
try std.testing.expect(!cache.contains(io, &key));
const bin = try assets.cookers.cookTexture(gpa, doc, blob);
defer gpa.free(bin);
try cache.put(io, &key, bin);
const miss_ns: i64 = @intCast(t_miss.untilNow(io).raw.nanoseconds);

// Second cook — cache hit: the artifact already exists, the cook is
// skipped entirely.
const t_hit = std.Io.Clock.Timestamp.now(io, .awake);
const hit = cache.contains(io, &key);
const hit_ns: i64 = @intCast(t_hit.untilNow(io).raw.nanoseconds);
try std.testing.expect(hit);
// Second cook — cache hit: the artifact now exists, so the cook is
// skipped entirely (the runtime serves the stored `.bin`).
try std.testing.expect(cache.contains(io, &key));

// The cached artifact is byte-identical to the fresh cook.
const cached = (try cache.get(gpa, io, &key)).?;
defer gpa.free(cached);
try std.testing.expectEqualSlices(u8, bin, cached);

const miss_ms = @divTrunc(miss_ns, std.time.ns_per_ms);
const hit_us = @divTrunc(hit_ns, std.time.ns_per_us);
std.debug.print("\n[cache_diff] first cook (miss) = {d} ms, second cook (hit) = {d} us\n", .{ miss_ms, hit_us });

// Differential gate. The hit is a directory lookup (< 10 ms) and avoids
// the cook entirely — a large speedup. The absolute first-cook wall-time
// is build-mode- and disk-dependent (here ~800 ms Debug, ~50 ms
// ReleaseSafe for 16 MiB); the brief's "≥ 100 ms first cook / < 10 ms
// second" is the reference-machine figure for a real decode-heavy asset,
// so the test asserts the robust differential rather than a flaky
// absolute wall-time (see Closing notes).
try std.testing.expect(@divTrunc(hit_ns, std.time.ns_per_ms) < 10); // hit < 10 ms
try std.testing.expect(miss_ns > hit_ns * 20); // cache hit ≫ 20× faster
}
Loading