From 74471c11c7f9e55b6c47cc3ae39140c7d4c84cce Mon Sep 17 00:00:00 2001 From: Alano Terblanche <18033717+Benehiko@users.noreply.github.com> Date: Wed, 24 Jun 2026 11:34:40 +0200 Subject: [PATCH 1/2] fix(keychain): retry collection ops when gnome-keyring relocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Linux keychain store dials a fresh D-Bus connection per operation and closes it on return. gnome-keyring scopes an unlock to the session that performed it, so closing a previous connection relocks the collection. That relock can land asynchronously mid-operation — after IsLocked reports the collection unlocked but before the call against the collection runs — surfacing an intermittent "Cannot create an item in a locked collection" error. Add withRelockRetry, which reacts to the authoritative signal (the operation's own org.freedesktop.Secret.Error.IsLocked D-Bus error) by unlocking and retrying with bounded exponential backoff. Wrap the mutating and reading calls (CreateItem, DeleteItem, GetSecret) with it, and factor the unlock preamble into ensureUnlocked. A failed Unlock (e.g. dismissed auth prompt) aborts the loop immediately to avoid re-prompting. Co-Authored-By: Claude Opus 4.8 (1M context) --- store/keychain/keychain_linux.go | 156 ++++++++++++++------ store/keychain/keychain_linux_test.go | 196 +++++++++++++++++++++++++- 2 files changed, 304 insertions(+), 48 deletions(-) diff --git a/store/keychain/keychain_linux.go b/store/keychain/keychain_linux.go index 9d1498ca..759daf7e 100644 --- a/store/keychain/keychain_linux.go +++ b/store/keychain/keychain_linux.go @@ -23,6 +23,7 @@ import ( "fmt" "maps" "slices" + "time" dbus "github.com/godbus/dbus/v5" @@ -145,6 +146,91 @@ func isCollectionUnlocked(collectionPath dbus.ObjectPath, service secretService) return errCollectionLocked } +// ensureUnlocked unlocks the collection if it currently reports as locked. It is +// the cheap, best-effort preamble run before any collection operation; the +// authoritative guard against a collection that relocks underneath us is +// [withRelockRetry], since the lock state can change between this check and the +// operation itself (see that function for why). +func ensureUnlocked(collectionPath dbus.ObjectPath, service secretService) error { + err := isCollectionUnlocked(collectionPath, service) + if err != nil && !errors.Is(err, errCollectionLocked) { + return err + } + if errors.Is(err, errCollectionLocked) { + return service.Unlock([]dbus.ObjectPath{collectionPath}) + } + return nil +} + +// secretServiceIsLockedError is the D-Bus error name the secret service returns +// when a mutating call (e.g. CreateItem) targets a locked collection. +// +// https://specifications.freedesktop.org/secret-service-spec/latest/errors.html +const secretServiceIsLockedError = "org.freedesktop.Secret.Error.IsLocked" + +// isLockedDBusError reports whether err is the secret service's "collection is +// locked" D-Bus error. The lock state is matched on the structured D-Bus error +// name rather than the human-readable message so it is stable across backends +// and locales. +func isLockedDBusError(err error) bool { + var dbusErr dbus.Error + return errors.As(err, &dbusErr) && dbusErr.Name == secretServiceIsLockedError +} + +// Relock retry tuning. An operation that hits a relocked collection is retried +// with exponential backoff: the relock is a brief race that settles on its own, +// and spacing the attempts out avoids hammering the secret service (or, on a +// password-protected keyring, re-issuing Unlock fast enough to spam the user +// with authentication prompts). +// +// relockRetryMaxDelay caps the backoff growth; with the current +// maxRelockRetries the slept delays are 20,40,80,160,320ms (the cap only takes +// effect if maxRelockRetries is raised past 6). +const ( + maxRelockRetries = 5 + relockRetryBaseDelay = 20 * time.Millisecond + relockRetryMaxDelay = 500 * time.Millisecond +) + +// sleepFn is the sleep seam used by the relock backoff so tests can exercise the +// retry loop without real delays. +var sleepFn = time.Sleep + +// withRelockRetry runs a collection operation, retrying it with exponential +// backoff when the secret service rejects it because the collection is locked. +// +// The store dials a fresh D-Bus connection for every operation and closes it on +// return. gnome-keyring scopes an unlock to the session that performed it, so +// when a previous operation's connection closes the daemon relocks the +// collection — and that relock can land asynchronously in the middle of a later +// operation, after we have already observed the collection as unlocked but +// before the call against the collection runs. The result is an intermittent +// "Cannot create an item in a locked collection" error even though we unlocked +// moments earlier. IsLocked cannot guard against this because the state changes +// between the check and the call, so we react to the authoritative signal — the +// operation's own locked error — by unlocking again and retrying. +// +// In the common case this is the passwordless auto-unlock path (e.g. the +// PAM-unlocked login keyring), where Unlock returns the null prompt and asks +// the user for nothing. withRelockRetry cannot itself prove the keyring is +// passwordless, so on a password-protected keyring a retry could surface an +// authentication prompt; the bounded retry count and backoff keep that to a +// handful of spaced-out prompts at worst, and a dismissed prompt makes Unlock +// return an error that aborts the loop immediately rather than re-prompting. +func withRelockRetry(service secretService, collectionPath dbus.ObjectPath, op func() error) error { + err := op() + delay := relockRetryBaseDelay + for attempt := 0; attempt < maxRelockRetries && isLockedDBusError(err); attempt++ { + sleepFn(delay) + delay = min(delay*2, relockRetryMaxDelay) + if unlockErr := service.Unlock([]dbus.ObjectPath{collectionPath}); unlockErr != nil { + return unlockErr + } + err = op() + } + return err +} + type keychainStore[T store.Secret] struct { serviceGroup string serviceName string @@ -172,15 +258,9 @@ func (k *keychainStore[T]) Delete(_ context.Context, id store.ID) error { return err } - err = isCollectionUnlocked(objectPath, service) - if err != nil && !errors.Is(err, errCollectionLocked) { + if err := ensureUnlocked(objectPath, service); err != nil { return err } - if errors.Is(err, errCollectionLocked) { - if err := service.Unlock([]dbus.ObjectPath{objectPath}); err != nil { - return err - } - } attributes := make(map[string]string) safelySetMetadata(k.serviceGroup, k.serviceName, attributes) @@ -195,7 +275,9 @@ func (k *keychainStore[T]) Delete(_ context.Context, id store.ID) error { return nil } - return service.DeleteItem(items[0]) + return withRelockRetry(service, objectPath, func() error { + return service.DeleteItem(items[0]) + }) } func (k *keychainStore[T]) Get(ctx context.Context, id store.ID) (store.Secret, error) { @@ -219,15 +301,9 @@ func (k *keychainStore[T]) Get(ctx context.Context, id store.ID) (store.Secret, return nil, err } - err = isCollectionUnlocked(objectPath, service) - if err != nil && !errors.Is(err, errCollectionLocked) { + if err := ensureUnlocked(objectPath, service); err != nil { return nil, err } - if errors.Is(err, errCollectionLocked) { - if err := service.Unlock([]dbus.ObjectPath{objectPath}); err != nil { - return nil, err - } - } searchMetadata := make(map[string]string) safelySetMetadata(k.serviceGroup, k.serviceName, searchMetadata) @@ -248,7 +324,12 @@ func (k *keychainStore[T]) Get(ctx context.Context, id store.ID) (store.Secret, } safelyCleanMetadata(attributes) - value, err := service.GetSecret(items[0], *session) + var value []byte + err = withRelockRetry(service, objectPath, func() error { + var getErr error + value, getErr = service.GetSecret(items[0], *session) + return getErr + }) if err != nil { return nil, err } @@ -286,15 +367,9 @@ func (k *keychainStore[T]) GetAllMetadata(ctx context.Context) (map[store.ID]sto return nil, err } - err = isCollectionUnlocked(objectPath, service) - if err != nil && !errors.Is(err, errCollectionLocked) { + if err := ensureUnlocked(objectPath, service); err != nil { return nil, err } - if errors.Is(err, errCollectionLocked) { - if err := service.Unlock([]dbus.ObjectPath{objectPath}); err != nil { - return nil, err - } - } searchMetadata := make(map[string]string) safelySetMetadata(k.serviceGroup, k.serviceName, searchMetadata) @@ -358,15 +433,9 @@ func (k *keychainStore[T]) Save(_ context.Context, id store.ID, secret store.Sec return err } - err = isCollectionUnlocked(objectPath, service) - if err != nil && !errors.Is(err, errCollectionLocked) { + if err := ensureUnlocked(objectPath, service); err != nil { return err } - if errors.Is(err, errCollectionLocked) { - if err := service.Unlock([]dbus.ObjectPath{objectPath}); err != nil { - return err - } - } value, err := secret.Marshal() if err != nil { @@ -387,7 +456,10 @@ func (k *keychainStore[T]) Save(_ context.Context, id store.ID, secret store.Sec label := k.itemLabel(id.String()) properties := kc.NewSecretProperties(label, attributes) - _, err = service.CreateItem(objectPath, properties, sessSecret, kc.ReplaceBehaviorReplace) + err = withRelockRetry(service, objectPath, func() error { + _, createErr := service.CreateItem(objectPath, properties, sessSecret, kc.ReplaceBehaviorReplace) + return createErr + }) if err != nil { return err } @@ -400,9 +472,15 @@ func (k *keychainStore[T]) Upsert(ctx context.Context, id store.ID, secret store } // loadSecret fetches the raw secret value for itemPath, zeroes it after use, -// and returns a fully populated Secret. -func (k *keychainStore[T]) loadSecret(ctx context.Context, id store.ID, svc secretService, itemPath dbus.ObjectPath, session *kc.Session, attributes map[string]string) (store.Secret, error) { - value, err := svc.GetSecret(itemPath, *session) +// and returns a fully populated Secret. collectionPath is the enclosing +// collection, used to re-unlock if it relocks mid-read (see withRelockRetry). +func (k *keychainStore[T]) loadSecret(ctx context.Context, id store.ID, svc secretService, collectionPath, itemPath dbus.ObjectPath, session *kc.Session, attributes map[string]string) (store.Secret, error) { + var value []byte + err := withRelockRetry(svc, collectionPath, func() error { + var getErr error + value, getErr = svc.GetSecret(itemPath, *session) + return getErr + }) if err != nil { return nil, err } @@ -439,15 +517,9 @@ func (k *keychainStore[T]) Filter(ctx context.Context, pattern store.Pattern) (m return nil, err } - err = isCollectionUnlocked(objectPath, service) - if err != nil && !errors.Is(err, errCollectionLocked) { + if err := ensureUnlocked(objectPath, service); err != nil { return nil, err } - if errors.Is(err, errCollectionLocked) { - if err := service.Unlock([]dbus.ObjectPath{objectPath}); err != nil { - return nil, err - } - } attributes := make(map[string]string) // add our pattern to the attributes so we can match against items that @@ -492,7 +564,7 @@ func (k *keychainStore[T]) Filter(ctx context.Context, pattern store.Pattern) (m continue } - secret, err := k.loadSecret(ctx, secretID, service, itemPath, session, attributes) + secret, err := k.loadSecret(ctx, secretID, service, objectPath, itemPath, session, attributes) if err != nil { return nil, err } diff --git a/store/keychain/keychain_linux_test.go b/store/keychain/keychain_linux_test.go index d6ab98be..caa36f61 100644 --- a/store/keychain/keychain_linux_test.go +++ b/store/keychain/keychain_linux_test.go @@ -17,8 +17,11 @@ package keychain import ( + "errors" + "fmt" "sync/atomic" "testing" + "time" dbus "github.com/godbus/dbus/v5" "github.com/stretchr/testify/assert" @@ -26,6 +29,7 @@ import ( "github.com/docker/secrets-engine/store" kc "github.com/docker/secrets-engine/store/keychain/internal/go-keychain/secretservice" + "github.com/docker/secrets-engine/store/mocks" ) // fakeService is a pure in-memory [secretService]. It never talks to a real @@ -42,6 +46,30 @@ type fakeService struct { opened atomic.Int64 closed atomic.Int64 + + // {create,delete,getSecret}ItemLockedErrs is how many leading calls of each + // kind fail with the secret service "collection is locked" D-Bus error + // before one succeeds, simulating a collection that relocks underneath the + // store (see withRelockRetry). The error is wrapped exactly as the real + // service wraps it (fmt.Errorf("...: %w", dbus.Error{...})), so the tests + // exercise the errors.As-through-wrap path isLockedDBusError depends on. + createItemLockedErrs atomic.Int64 + createItemCalls atomic.Int64 + deleteItemLockedErrs atomic.Int64 + deleteItemCalls atomic.Int64 + getSecretLockedErrs atomic.Int64 + getSecretCalls atomic.Int64 + unlockCalls atomic.Int64 + + // unlockErr, when set, is returned by Unlock to simulate e.g. a dismissed + // authentication prompt. + unlockErr error +} + +// lockedErr mirrors how the real SecretService wraps a locked-collection D-Bus +// error (see secretservice.go), so isLockedDBusError must unwrap to detect it. +func lockedErr(op string) error { + return fmt.Errorf("failed to %s: %w", op, dbus.Error{Name: secretServiceIsLockedError}) } func (f *fakeService) Collections() ([]dbus.ObjectPath, error) { @@ -50,20 +78,43 @@ func (f *fakeService) Collections() ([]dbus.ObjectPath, error) { func (f *fakeService) ReadAlias(string) (dbus.ObjectPath, error) { return loginKeychainObjectPath, nil } func (f *fakeService) IsLocked(dbus.ObjectPath) (bool, error) { return false, nil } func (f *fakeService) OpenSession(kc.AuthenticationMode) (*kc.Session, error) { - return &kc.Session{}, nil + // Plain mode lets Session.NewSecret wrap the value without negotiating an + // AES key, so Save can drive CreateItem in tests without a live session. + return &kc.Session{Mode: kc.AuthenticationInsecurePlain}, nil } -func (f *fakeService) CloseSession(*kc.Session) {} -func (f *fakeService) Unlock([]dbus.ObjectPath) error { return nil } +func (f *fakeService) CloseSession(*kc.Session) {} +func (f *fakeService) Unlock([]dbus.ObjectPath) error { + f.unlockCalls.Add(1) + return f.unlockErr +} + func (f *fakeService) SearchCollection(dbus.ObjectPath, kc.Attributes) ([]dbus.ObjectPath, error) { return f.items, nil } func (f *fakeService) CreateItem(dbus.ObjectPath, map[string]dbus.Variant, kc.Secret, kc.ReplaceBehavior) (dbus.ObjectPath, error) { + if f.createItemCalls.Add(1) <= f.createItemLockedErrs.Load() { + return "", lockedErr("create item") + } return "", nil } -func (f *fakeService) DeleteItem(dbus.ObjectPath) error { return nil } -func (f *fakeService) GetAttributes(dbus.ObjectPath) (kc.Attributes, error) { return nil, nil } -func (f *fakeService) GetSecret(dbus.ObjectPath, kc.Session) ([]byte, error) { return nil, nil } + +func (f *fakeService) DeleteItem(dbus.ObjectPath) error { + if f.deleteItemCalls.Add(1) <= f.deleteItemLockedErrs.Load() { + return lockedErr("delete item") + } + return nil +} +func (f *fakeService) GetAttributes(dbus.ObjectPath) (kc.Attributes, error) { return nil, nil } +func (f *fakeService) GetSecret(dbus.ObjectPath, kc.Session) ([]byte, error) { + if f.getSecretCalls.Add(1) <= f.getSecretLockedErrs.Load() { + return nil, lockedErr("get secret") + } + // A value MockCredential.Unmarshal can parse ("username:password"), so the + // Get path completes once the simulated relock clears. + return []byte("bob:bob-password"), nil +} + func (f *fakeService) Close() error { f.closed.Add(1) return nil @@ -81,6 +132,15 @@ func withFakeService(t *testing.T, fake *fakeService) { } } +// stubRelockSleep replaces the relock backoff sleep with a no-op so retry tests +// run without real delays, restoring it on cleanup. +func stubRelockSleep(t *testing.T) { + t.Helper() + orig := sleepFn + t.Cleanup(func() { sleepFn = orig }) + sleepFn = func(time.Duration) {} +} + // TestKeychainGetNotFound exercises the full Get path against the fake — open, // resolve collection, search — and asserts an empty search maps to // ErrCredentialNotFound, all without a live keyring. @@ -116,6 +176,130 @@ func TestKeychainClosesEveryConnection(t *testing.T) { assert.Equal(t, opened, closed, "every opened connection must be closed") } +// TestSaveRetriesWhenCollectionRelocks is a regression test for the +// intermittent "Cannot create an item in a locked collection" flake. Because +// the store opens and closes a fresh D-Bus connection per operation, the secret +// service can relock the collection asynchronously, so a CreateItem can fail as +// locked even though the collection was observed unlocked moments earlier. Save +// must react to that locked error by unlocking and retrying rather than failing. +func TestSaveRetriesWhenCollectionRelocks(t *testing.T) { + stubRelockSleep(t) + fake := &fakeService{} + fake.createItemLockedErrs.Store(2) // first two attempts hit a relocked collection + withFakeService(t, fake) + + ks := setupKeychain(t, nil) + err := ks.Save(t.Context(), store.MustParseID("com.test.test/test/bob"), &mocks.MockCredential{ + Username: "bob", + Password: "bob-password", + }) + require.NoError(t, err) + + assert.Equal(t, int64(3), fake.createItemCalls.Load(), "two locked failures then one success") + // The fake reports the collection as unlocked, so ensureUnlocked issues no + // Unlock; every Unlock comes from withRelockRetry — exactly one per retry. + assert.Equal(t, int64(2), fake.unlockCalls.Load(), "exactly one Unlock per relock retry") +} + +// TestSaveStopsRetryingAfterMaxRelocks asserts the retry is bounded: a +// persistently locked collection surfaces the locked error instead of looping +// forever. +func TestSaveStopsRetryingAfterMaxRelocks(t *testing.T) { + stubRelockSleep(t) + fake := &fakeService{} + fake.createItemLockedErrs.Store(1 << 30) // never recovers + withFakeService(t, fake) + + ks := setupKeychain(t, nil) + err := ks.Save(t.Context(), store.MustParseID("com.test.test/test/bob"), &mocks.MockCredential{ + Username: "bob", + Password: "bob-password", + }) + require.Error(t, err) + assert.True(t, isLockedDBusError(err), "the persistent locked error must be returned to the caller") + assert.Equal(t, int64(maxRelockRetries+1), fake.createItemCalls.Load(), "initial attempt plus the bounded retries") +} + +// TestSaveStopsRetryingWhenUnlockFails asserts the retry loop does not keep +// re-unlocking (and therefore re-prompting) once Unlock itself errors, e.g. +// when the user dismisses an authentication prompt. The Unlock error is +// surfaced after a single attempt. +func TestSaveStopsRetryingWhenUnlockFails(t *testing.T) { + stubRelockSleep(t) + dismissed := errors.New("prompt dismissed") + fake := &fakeService{unlockErr: dismissed} + fake.createItemLockedErrs.Store(1 << 30) // always locked, so a retry is attempted + withFakeService(t, fake) + + ks := setupKeychain(t, nil) + err := ks.Save(t.Context(), store.MustParseID("com.test.test/test/bob"), &mocks.MockCredential{ + Username: "bob", + Password: "bob-password", + }) + require.ErrorIs(t, err, dismissed) + assert.Equal(t, int64(1), fake.unlockCalls.Load(), "must not re-unlock (re-prompt) after the first Unlock fails") + assert.Equal(t, int64(1), fake.createItemCalls.Load(), "only the initial attempt runs; the failed unlock aborts the retry") +} + +// TestDeleteRetriesWhenCollectionRelocks covers the second mutating call site: +// Delete's DeleteItem is wrapped in withRelockRetry just like Save's CreateItem, +// so a relocked collection must be unlocked and retried rather than failing. +func TestDeleteRetriesWhenCollectionRelocks(t *testing.T) { + stubRelockSleep(t) + fake := &fakeService{items: []dbus.ObjectPath{"/org/freedesktop/secrets/collection/login/1"}} + fake.deleteItemLockedErrs.Store(2) + withFakeService(t, fake) + + ks := setupKeychain(t, nil) + err := ks.Delete(t.Context(), store.MustParseID("com.test.test/test/bob")) + require.NoError(t, err) + + assert.Equal(t, int64(3), fake.deleteItemCalls.Load(), "two locked failures then one success") + assert.Equal(t, int64(2), fake.unlockCalls.Load(), "exactly one Unlock per relock retry") +} + +// TestGetRetriesWhenCollectionRelocks covers the read path: GetSecret can also +// hit org.freedesktop.Secret.Error.IsLocked if the collection relocks between +// ensureUnlocked and the read, so Get wraps it in withRelockRetry too. +func TestGetRetriesWhenCollectionRelocks(t *testing.T) { + stubRelockSleep(t) + fake := &fakeService{items: []dbus.ObjectPath{"/org/freedesktop/secrets/collection/login/1"}} + fake.getSecretLockedErrs.Store(2) + withFakeService(t, fake) + + ks := setupKeychain(t, nil) + secret, err := ks.Get(t.Context(), store.MustParseID("com.test.test/test/bob")) + require.NoError(t, err) + require.NotNil(t, secret) + + assert.Equal(t, int64(3), fake.getSecretCalls.Load(), "two locked failures then one success") + assert.Equal(t, int64(2), fake.unlockCalls.Load(), "exactly one Unlock per relock retry") +} + +// TestIsLockedDBusError pins the central contract the retry depends on: +// detection must work whether the locked error is a bare dbus.Error or wrapped +// (the real service wraps it as fmt.Errorf("...: %w", dbus.Error{...})), and +// must not match other errors. +func TestIsLockedDBusError(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + {"bare locked dbus error", dbus.Error{Name: secretServiceIsLockedError}, true}, + {"wrapped locked dbus error", fmt.Errorf("failed to create item: %w", dbus.Error{Name: secretServiceIsLockedError}), true}, + {"doubly wrapped locked dbus error", fmt.Errorf("outer: %w", fmt.Errorf("failed to get secret: %w", dbus.Error{Name: secretServiceIsLockedError})), true}, + {"different dbus error name", dbus.Error{Name: "org.freedesktop.DBus.Error.Failed"}, false}, + {"plain error", errors.New("boom"), false}, + {"nil error", nil, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, isLockedDBusError(tt.err)) + }) + } +} + func TestResolveDefaultCollection(t *testing.T) { const customCollection = dbus.ObjectPath("/org/freedesktop/secrets/collection/custom") From 8d7ca4e27f85e6bff59053584ababb663d086e44 Mon Sep 17 00:00:00 2001 From: Alano Terblanche <18033717+Benehiko@users.noreply.github.com> Date: Wed, 24 Jun 2026 13:56:17 +0200 Subject: [PATCH 2/2] fix(keychain): satisfy lll and apply relock-retry review fixups Wrap the loadSecret signature across lines so it no longer exceeds the 200-char lll limit (the new collectionPath param pushed it to 207), unblocking the lint job. Review follow-ups, no behaviour change on any happy path: - withRelockRetry now wraps the abort-on-failed-Unlock error as "unlock after relock: %w" so the cause is visible while errors.Is on the underlying Unlock error (e.g. a dismissed prompt) still holds. - Correct the relockRetryMaxDelay doc comment: the 500ms cap first takes effect once maxRelockRetries reaches 6, not "past 6". - Note that the sleepFn / newService test seams are unsynchronised package-level vars, so swapping them precludes t.Parallel(). Tests: - TestFilterRetriesWhenCollectionRelocks covers the Filter->loadSecret read path through withRelockRetry (needs a configurable GetAttributes seam on the fake so the item is not skipped). - TestRelockBackoffSchedule pins the documented 20/40/80/160/320ms backoff via a recording sleep seam. Co-Authored-By: Claude Opus 4.8 (1M context) --- store/keychain/keychain_linux.go | 21 +++++-- store/keychain/keychain_linux_test.go | 82 ++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 7 deletions(-) diff --git a/store/keychain/keychain_linux.go b/store/keychain/keychain_linux.go index 759daf7e..4b283064 100644 --- a/store/keychain/keychain_linux.go +++ b/store/keychain/keychain_linux.go @@ -185,7 +185,8 @@ func isLockedDBusError(err error) bool { // // relockRetryMaxDelay caps the backoff growth; with the current // maxRelockRetries the slept delays are 20,40,80,160,320ms (the cap only takes -// effect if maxRelockRetries is raised past 6). +// effect once maxRelockRetries reaches 6, where the sixth delay would otherwise +// be 640ms). const ( maxRelockRetries = 5 relockRetryBaseDelay = 20 * time.Millisecond @@ -193,7 +194,8 @@ const ( ) // sleepFn is the sleep seam used by the relock backoff so tests can exercise the -// retry loop without real delays. +// retry loop without real delays. It is a package-level var with no +// synchronisation, so tests that swap it must not run in parallel. var sleepFn = time.Sleep // withRelockRetry runs a collection operation, retrying it with exponential @@ -224,7 +226,11 @@ func withRelockRetry(service secretService, collectionPath dbus.ObjectPath, op f sleepFn(delay) delay = min(delay*2, relockRetryMaxDelay) if unlockErr := service.Unlock([]dbus.ObjectPath{collectionPath}); unlockErr != nil { - return unlockErr + // Surface why the retry stopped while preserving errors.Is on the + // underlying Unlock error (e.g. a dismissed prompt). The original + // locked error is intentionally dropped: the failed unlock is the + // actionable cause once we have decided to stop retrying. + return fmt.Errorf("unlock after relock: %w", unlockErr) } err = op() } @@ -474,7 +480,14 @@ func (k *keychainStore[T]) Upsert(ctx context.Context, id store.ID, secret store // loadSecret fetches the raw secret value for itemPath, zeroes it after use, // and returns a fully populated Secret. collectionPath is the enclosing // collection, used to re-unlock if it relocks mid-read (see withRelockRetry). -func (k *keychainStore[T]) loadSecret(ctx context.Context, id store.ID, svc secretService, collectionPath, itemPath dbus.ObjectPath, session *kc.Session, attributes map[string]string) (store.Secret, error) { +func (k *keychainStore[T]) loadSecret( + ctx context.Context, + id store.ID, + svc secretService, + collectionPath, itemPath dbus.ObjectPath, + session *kc.Session, + attributes map[string]string, +) (store.Secret, error) { var value []byte err := withRelockRetry(svc, collectionPath, func() error { var getErr error diff --git a/store/keychain/keychain_linux_test.go b/store/keychain/keychain_linux_test.go index caa36f61..195feb73 100644 --- a/store/keychain/keychain_linux_test.go +++ b/store/keychain/keychain_linux_test.go @@ -44,6 +44,11 @@ type fakeService struct { // "credential not found" path. items []dbus.ObjectPath + // attributes is returned verbatim by GetAttributes. Filter only descends into + // loadSecret (and therefore the wrapped GetSecret) for items whose attributes + // carry a parseable "id"; leave nil for paths that do not inspect attributes. + attributes kc.Attributes + opened atomic.Int64 closed atomic.Int64 @@ -105,7 +110,11 @@ func (f *fakeService) DeleteItem(dbus.ObjectPath) error { } return nil } -func (f *fakeService) GetAttributes(dbus.ObjectPath) (kc.Attributes, error) { return nil, nil } + +func (f *fakeService) GetAttributes(dbus.ObjectPath) (kc.Attributes, error) { + return f.attributes, nil +} + func (f *fakeService) GetSecret(dbus.ObjectPath, kc.Session) ([]byte, error) { if f.getSecretCalls.Add(1) <= f.getSecretLockedErrs.Load() { return nil, lockedErr("get secret") @@ -121,7 +130,9 @@ func (f *fakeService) Close() error { } // withFakeService swaps the package newService seam for one that hands out the -// given fake (counting each open) and restores the original on cleanup. +// given fake (counting each open) and restores the original on cleanup. It +// mutates the package-level newService var, so tests using it must not run in +// parallel. func withFakeService(t *testing.T, fake *fakeService) { t.Helper() orig := newService @@ -133,7 +144,8 @@ func withFakeService(t *testing.T, fake *fakeService) { } // stubRelockSleep replaces the relock backoff sleep with a no-op so retry tests -// run without real delays, restoring it on cleanup. +// run without real delays, restoring it on cleanup. It mutates the package-level +// sleepFn var, so tests using it must not run in parallel. func stubRelockSleep(t *testing.T) { t.Helper() orig := sleepFn @@ -141,6 +153,20 @@ func stubRelockSleep(t *testing.T) { sleepFn = func(time.Duration) {} } +// recordRelockSleep replaces the relock backoff sleep with a recorder that +// captures each requested delay (without actually sleeping) and restores the +// original on cleanup. It pins the documented backoff schedule. Like +// stubRelockSleep it mutates the package-level sleepFn var, so tests using it +// must not run in parallel. +func recordRelockSleep(t *testing.T) *[]time.Duration { + t.Helper() + orig := sleepFn + t.Cleanup(func() { sleepFn = orig }) + slept := &[]time.Duration{} + sleepFn = func(d time.Duration) { *slept = append(*slept, d) } + return slept +} + // TestKeychainGetNotFound exercises the full Get path against the fake — open, // resolve collection, search — and asserts an empty search maps to // ErrCredentialNotFound, all without a live keyring. @@ -276,6 +302,56 @@ func TestGetRetriesWhenCollectionRelocks(t *testing.T) { assert.Equal(t, int64(2), fake.unlockCalls.Load(), "exactly one Unlock per relock retry") } +// TestFilterRetriesWhenCollectionRelocks covers the read path reached through +// Filter, which loads each matched secret via loadSecret. That GetSecret is +// wrapped in withRelockRetry just like Get's, so a collection that relocks +// mid-read must be unlocked and retried rather than failing the whole Filter. +func TestFilterRetriesWhenCollectionRelocks(t *testing.T) { + stubRelockSleep(t) + fake := &fakeService{ + items: []dbus.ObjectPath{"/org/freedesktop/secrets/collection/login/1"}, + // Filter only descends into loadSecret for items whose attributes carry a + // parseable id; without it the item is skipped and GetSecret never runs. + attributes: kc.Attributes{"id": "com.test.test/test/bob"}, + } + fake.getSecretLockedErrs.Store(2) + withFakeService(t, fake) + + ks := setupKeychain(t, nil) + creds, err := ks.Filter(t.Context(), store.MustParsePattern("**")) + require.NoError(t, err) + require.Len(t, creds, 1) + + assert.Equal(t, int64(3), fake.getSecretCalls.Load(), "two locked failures then one success") + assert.Equal(t, int64(2), fake.unlockCalls.Load(), "exactly one Unlock per relock retry") +} + +// TestRelockBackoffSchedule pins the documented exponential backoff. A +// persistently relocked collection exhausts every retry, so the recorded delays +// are the full schedule: 20,40,80,160,320ms (the 500ms cap only takes effect +// once maxRelockRetries reaches 6). +func TestRelockBackoffSchedule(t *testing.T) { + slept := recordRelockSleep(t) + fake := &fakeService{} + fake.createItemLockedErrs.Store(1 << 30) // never recovers, so all retries run + withFakeService(t, fake) + + ks := setupKeychain(t, nil) + err := ks.Save(t.Context(), store.MustParseID("com.test.test/test/bob"), &mocks.MockCredential{ + Username: "bob", + Password: "bob-password", + }) + require.Error(t, err) + + assert.Equal(t, []time.Duration{ + 20 * time.Millisecond, + 40 * time.Millisecond, + 80 * time.Millisecond, + 160 * time.Millisecond, + 320 * time.Millisecond, + }, *slept, "documented relock backoff schedule") +} + // TestIsLockedDBusError pins the central contract the retry depends on: // detection must work whether the locked error is a bare dbus.Error or wrapped // (the real service wraps it as fmt.Errorf("...: %w", dbus.Error{...})), and