diff --git a/runtime/docker/build.go b/runtime/docker/build.go index 22d7cff..3daf3d9 100644 --- a/runtime/docker/build.go +++ b/runtime/docker/build.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/moby/moby/api/types/build" "github.com/moby/moby/client" @@ -168,6 +169,12 @@ func streamBuildOutput(ctx context.Context, body io.ReadCloser, events chan<- ru } } +// tarEpoch is the canonical normalized mtime stamped into every tar +// header by tarDirectory. Using unix epoch (rather than time.Time{}, +// which clamps to epoch anyway in USTAR) makes intent explicit and +// avoids any reader special-casing the year-1 sentinel. +var tarEpoch = time.Unix(0, 0) + // tarDirectory writes the contents of dir (recursively) into w as a // non-gzipped tar archive. Symlinks are preserved as tar TypeSymlink // entries with their original target text; the daemon-side BuildKit @@ -217,6 +224,22 @@ func tarDirectory(dir string, w io.Writer) error { if d.IsDir() { hdr.Name = rel + "/" } + // Normalize metadata so the tar stream — and therefore + // BuildKit's COPY vertex digest — is reproducible across + // invocations and machines. Wall-clock mtimes from + // os.WriteFile (e.g. useruid's synthesized context) and + // host-specific uid/gid would otherwise perturb the digest of + // byte-identical content, causing cache misses and forcing + // re-extraction of downstream image layers. BuildKit hashes + // content for cache purposes, so erasing these fields doesn't + // lose information it relies on. + hdr.ModTime = tarEpoch + hdr.AccessTime = time.Time{} + hdr.ChangeTime = time.Time{} + hdr.Uid = 0 + hdr.Gid = 0 + hdr.Uname = "" + hdr.Gname = "" if err := tw.WriteHeader(hdr); err != nil { return err diff --git a/runtime/docker/build_test.go b/runtime/docker/build_test.go index 3e9f7a2..3b24f41 100644 --- a/runtime/docker/build_test.go +++ b/runtime/docker/build_test.go @@ -1,8 +1,14 @@ package docker import ( + "archive/tar" + "bytes" + "io" + "os" + "path/filepath" "reflect" "testing" + "time" ) func TestExtractBaseImages(t *testing.T) { @@ -92,6 +98,91 @@ FROM alpine:3.20 AS b`, } } +// TestTarDirectoryNormalizesMetadata guards against the BuildKit +// COPY-cache regression caused by wall-clock mtimes in synthesized +// build contexts (uid-reconcile, etc.) leaking into the tar stream and +// perturbing the vertex digest of byte-identical content. +func TestTarDirectoryNormalizesMetadata(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "file.txt"), []byte("hello"), 0o644); err != nil { + t.Fatalf("write: %v", err) + } + if err := os.Mkdir(filepath.Join(dir, "sub"), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, "sub", "nested.txt"), []byte("world"), 0o644); err != nil { + t.Fatalf("write nested: %v", err) + } + + var buf bytes.Buffer + if err := tarDirectory(dir, &buf); err != nil { + t.Fatalf("tarDirectory: %v", err) + } + + tr := tar.NewReader(&buf) + entries := 0 + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("tar.Next: %v", err) + } + entries++ + if !hdr.ModTime.Equal(time.Unix(0, 0)) { + t.Errorf("%s: ModTime not epoch: %v", hdr.Name, hdr.ModTime) + } + if !hdr.AccessTime.IsZero() { + t.Errorf("%s: AccessTime not zero: %v", hdr.Name, hdr.AccessTime) + } + if !hdr.ChangeTime.IsZero() { + t.Errorf("%s: ChangeTime not zero: %v", hdr.Name, hdr.ChangeTime) + } + if hdr.Uid != 0 || hdr.Gid != 0 { + t.Errorf("%s: uid/gid not zero: uid=%d gid=%d", hdr.Name, hdr.Uid, hdr.Gid) + } + if hdr.Uname != "" || hdr.Gname != "" { + t.Errorf("%s: uname/gname not empty: uname=%q gname=%q", hdr.Name, hdr.Uname, hdr.Gname) + } + } + if entries == 0 { + t.Fatal("no tar entries read") + } +} + +// TestTarDirectoryDeterministic asserts that taring the same content +// twice with diverging wall-clock mtimes produces byte-identical +// streams — the property BuildKit's COPY cache relies on. +func TestTarDirectoryDeterministic(t *testing.T) { + mkContext := func(t *testing.T, mtime time.Time) string { + t.Helper() + dir := t.TempDir() + p := filepath.Join(dir, "uid-fix.sh") + if err := os.WriteFile(p, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write: %v", err) + } + if err := os.Chtimes(p, mtime, mtime); err != nil { + t.Fatalf("chtimes: %v", err) + } + return dir + } + + a := mkContext(t, time.Unix(1_700_000_000, 0)) + b := mkContext(t, time.Unix(1_800_000_000, 0)) + + var bufA, bufB bytes.Buffer + if err := tarDirectory(a, &bufA); err != nil { + t.Fatalf("tarDirectory a: %v", err) + } + if err := tarDirectory(b, &bufB); err != nil { + t.Fatalf("tarDirectory b: %v", err) + } + if !bytes.Equal(bufA.Bytes(), bufB.Bytes()) { + t.Fatalf("tar streams differ despite identical content (mtime leaked)") + } +} + func TestSubstituteArgs(t *testing.T) { args := map[string]string{"X": "alpine", "Y": "3.20"} cases := []struct { diff --git a/useruid.go b/useruid.go index 8610c53..4aeb264 100644 --- a/useruid.go +++ b/useruid.go @@ -8,6 +8,7 @@ import ( "runtime" "strconv" "syscall" + "time" "github.com/crunchloop/devcontainer/config" "github.com/crunchloop/devcontainer/events" @@ -84,11 +85,26 @@ func (e *Engine) reconcileRemoteUserUID(ctx context.Context, cfg *config.Resolve } defer os.RemoveAll(tmp) - if err := os.WriteFile(filepath.Join(tmp, "uid-fix.sh"), []byte(uidReconcileScript), 0o755); err != nil { + // Pin synthesized-context file times to the epoch so the tar + // stream is stable across invocations regardless of wall-clock. + // runtime/docker/build.go also normalizes tar headers as a + // defense-in-depth measure, but keeping the on-disk mtimes + // deterministic here means this context's reproducibility is a + // local property — independent of any consumer's tar pipeline. + epoch := time.Unix(0, 0) + uidFix := filepath.Join(tmp, "uid-fix.sh") + if err := os.WriteFile(uidFix, []byte(uidReconcileScript), 0o755); err != nil { + return "", err + } + if err := os.Chtimes(uidFix, epoch, epoch); err != nil { return "", err } df := generateUIDDockerfile(finalImage, user, hostUID, hostGID) - if err := os.WriteFile(filepath.Join(tmp, "Dockerfile"), []byte(df), 0o644); err != nil { + dfPath := filepath.Join(tmp, "Dockerfile") + if err := os.WriteFile(dfPath, []byte(df), 0o644); err != nil { + return "", err + } + if err := os.Chtimes(dfPath, epoch, epoch); err != nil { return "", err } opts.bus.Emit(events.BuildStartEvent{Source: events.BuildSourceUIDReconcile, Ref: tag})