From 92a26caccc063ce2bcbe29f26985f35a96a8be40 Mon Sep 17 00:00:00 2001
From: Alano Terblanche <18033717+Benehiko@users.noreply.github.com>
Date: Fri, 19 Jun 2026 16:00:36 +0200
Subject: [PATCH 1/3] fix(keychain): update Secret Service items in place on
 Linux to stop duplicate accumulation

On Linux, keychainStore.Save copied the secret's volatile Metadata() into the
searchable Secret Service attributes and relied on CreateItem(ReplaceBehaviorReplace)
to overwrite the prior item. Both gnome-keyring and kwalletd select the replace
target by matching the full supplied attribute set, so any change in the volatile
metadata (e.g. the Docker Hub OAuth credential's rotating JWT claims) defeats the
match and a brand-new item is created on every save -- duplicates pile up without
bound, and each stale item keeps a cleartext copy of the old claims.

Fix Save to update in place: search by the stable identity triple
{service:group, service:name, id} only, then either create when absent or rewrite
the first match's secret/attributes/label in place and collapse any pre-existing
duplicates. The item's object path is preserved, so the secret is never momentarily
absent and no duplicate is minted. The observable store contract is unchanged:
Save still returns nil iff the secret is stored (refreshing attributes/label and
collapsing leftovers are best-effort).

This is backend-agnostic: the attribute-match behaviour is shared by gnome-keyring
and kwalletd, and macOS/Windows key items on a stable identifier so are unaffected.

Add SetItemSecret/SetItemAttributes/SetItemLabel to the vendored secretservice
library (thin org.freedesktop.Secret.Item wrappers) to enable the in-place update.

Refs: docker/secrets-engine-private#446

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../secretservice/secretservice.go            | 34 +++++++++++++++++
 store/keychain/keychain_linux.go              | 37 ++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/store/keychain/internal/go-keychain/secretservice/secretservice.go b/store/keychain/internal/go-keychain/secretservice/secretservice.go
index af4579c3..6fac01c9 100644
--- a/store/keychain/internal/go-keychain/secretservice/secretservice.go
+++ b/store/keychain/internal/go-keychain/secretservice/secretservice.go
@@ -436,6 +436,40 @@ func (s *SecretService) PromptAndWait(prompt dbus.ObjectPath) (paths *dbus.Varia
 	}
 }
 
+// SetItemSecret replaces an existing item's secret value in place via
+// org.freedesktop.Secret.Item.SetSecret. The secret must already be encoded for
+// the session it references (see [Session.NewSecret]). SetSecret takes a single
+// Secret argument (D-Bus signature (oayays)) and returns no values, so there is
+// no prompt path: the item's collection must be unlocked first (use [Unlock]),
+// otherwise the call fails.
+func (s *SecretService) SetItemSecret(item dbus.ObjectPath, secret Secret) error {
+	if err := s.Obj(item).Call("org.freedesktop.Secret.Item.SetSecret", NilFlags, secret).Store(); err != nil {
+		return fmt.Errorf("failed to set item secret: %w", err)
+	}
+	return nil
+}
+
+// SetItemAttributes replaces an existing item's lookup attributes in place by
+// setting the read-write org.freedesktop.Secret.Item.Attributes property
+// (type a{ss}) through org.freedesktop.DBus.Properties.Set. The collection must
+// be unlocked.
+func (s *SecretService) SetItemAttributes(item dbus.ObjectPath, attributes Attributes) error {
+	if err := s.Obj(item).SetProperty("org.freedesktop.Secret.Item.Attributes", dbus.MakeVariant(map[string]string(attributes))); err != nil {
+		return fmt.Errorf("failed to set item attributes: %w", err)
+	}
+	return nil
+}
+
+// SetItemLabel replaces an existing item's displayable label in place by setting
+// the read-write org.freedesktop.Secret.Item.Label property (type s) through
+// org.freedesktop.DBus.Properties.Set. The collection must be unlocked.
+func (s *SecretService) SetItemLabel(item dbus.ObjectPath, label string) error {
+	if err := s.Obj(item).SetProperty("org.freedesktop.Secret.Item.Label", dbus.MakeVariant(label)); err != nil {
+		return fmt.Errorf("failed to set item label: %w", err)
+	}
+	return nil
+}
+
 // NewSecretProperties
 func NewSecretProperties(label string, attributes map[string]string) map[string]dbus.Variant {
 	return map[string]dbus.Variant{
diff --git a/store/keychain/keychain_linux.go b/store/keychain/keychain_linux.go
index 9d1498ca..d1fe4aed 100644
--- a/store/keychain/keychain_linux.go
+++ b/store/keychain/keychain_linux.go
@@ -65,6 +65,9 @@ type secretService interface {
 	DeleteItem(item dbus.ObjectPath) error
 	GetAttributes(item dbus.ObjectPath) (kc.Attributes, error)
 	GetSecret(item dbus.ObjectPath, session kc.Session) ([]byte, error)
+	SetItemSecret(item dbus.ObjectPath, secret kc.Secret) error
+	SetItemAttributes(item dbus.ObjectPath, attributes kc.Attributes) error
+	SetItemLabel(item dbus.ObjectPath, label string) error
 	Close() error
 }
 
@@ -385,13 +388,43 @@ func (k *keychainStore[T]) Save(_ context.Context, id store.ID, secret store.Sec
 	safelySetID(id, attributes)
 
 	label := k.itemLabel(id.String())
-	properties := kc.NewSecretProperties(label, attributes)
 
-	_, err = service.CreateItem(objectPath, properties, sessSecret, kc.ReplaceBehaviorReplace)
+	// Find existing items for this identity by the stable triple only
+	// {service:group, service:name, id}, never the volatile metadata, so a
+	// changed metadata value can never hide a previously-stored item. This is
+	// what makes the in-place update below reliable and stops the duplicate
+	// accumulation described in issue #446.
+	ident := make(map[string]string)
+	safelySetMetadata(k.serviceGroup, k.serviceName, ident)
+	safelySetID(id, ident)
+
+	items, err := service.SearchCollection(objectPath, ident)
 	if err != nil {
 		return err
 	}
 
+	// Nothing stored yet: create a fresh item.
+	if len(items) == 0 {
+		properties := kc.NewSecretProperties(label, attributes)
+		_, err = service.CreateItem(objectPath, properties, sessSecret, kc.ReplaceBehaviorReplace)
+		return err
+	}
+
+	// Update the first match in place. Its object path is preserved, so the
+	// secret is never momentarily absent and no duplicate is minted. Writing the
+	// secret value IS the operation, so only its failure fails Save; refreshing
+	// the attributes and label and collapsing any pre-existing duplicates are
+	// best-effort (the secret is already stored) and must not flip the result.
+	primary := items[0]
+	if err := service.SetItemSecret(primary, sessSecret); err != nil {
+		return err
+	}
+	_ = service.SetItemAttributes(primary, attributes)
+	_ = service.SetItemLabel(primary, label)
+	for _, dup := range items[1:] {
+		_ = service.DeleteItem(dup)
+	}
+
 	return nil
 }
 

From 759357d6b6f90835e15b89c60230017094f1af8d Mon Sep 17 00:00:00 2001
From: Alano Terblanche <18033717+Benehiko@users.noreply.github.com>
Date: Fri, 19 Jun 2026 16:00:39 +0200
Subject: [PATCH 2/3] test(keychain): cover create, in-place collapse, and
 real-keychain dedup convergence

Fake-backed unit tests for the create vs in-place-collapse branches, plus
real-keychain integration tests (run under the gnome-keyring harness) that seed a
duplicate backlog and assert a single Save collapses it to one item, and that
repeated saves with changing metadata never accumulate.

The integration assertions poll the daemon via require.EventuallyWithT until the
expected item count is reached, absorbing the lag between the store deleting
duplicates and an independent connection observing it without masking a genuine
failure to converge. The dedup tests use their own service group/name
(com.test.dedup/dedup) so a leaked credential can never reach TestKeychain, and
ensureUnlocked polls IsLocked after Unlock to close the first-unlock race on the
direct-to-daemon seed and purge paths.

Refs: docker/secrets-engine-private#446

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 store/keychain/keychain_linux_test.go | 278 +++++++++++++++++++++++++-
 1 file changed, 275 insertions(+), 3 deletions(-)

diff --git a/store/keychain/keychain_linux_test.go b/store/keychain/keychain_linux_test.go
index d6ab98be..94f5aca9 100644
--- a/store/keychain/keychain_linux_test.go
+++ b/store/keychain/keychain_linux_test.go
@@ -17,8 +17,11 @@
 package keychain
 
 import (
+	"context"
+	"fmt"
 	"sync/atomic"
 	"testing"
+	"time"
 
 	dbus "github.com/godbus/dbus/v5"
 	"github.com/stretchr/testify/assert"
@@ -26,6 +29,7 @@ import (
 
 	"github.com/docker/secrets-engine/store"
 	kc "github.com/docker/secrets-engine/store/keychain/internal/go-keychain/secretservice"
+	"github.com/docker/secrets-engine/store/mocks"
 )
 
 // fakeService is a pure in-memory [secretService]. It never talks to a real
@@ -40,6 +44,13 @@ type fakeService struct {
 	// "credential not found" path.
 	items []dbus.ObjectPath
 
+	// recorded write operations, for assertions in the Save tests. Not
+	// concurrency-safe: the tests that read them drive a single sequential
+	// operation through the fake.
+	createCalls    int
+	setSecretItems []dbus.ObjectPath
+	deletedItems   []dbus.ObjectPath
+
 	opened atomic.Int64
 	closed atomic.Int64
 }
@@ -50,7 +61,9 @@ func (f *fakeService) Collections() ([]dbus.ObjectPath, error) {
 func (f *fakeService) ReadAlias(string) (dbus.ObjectPath, error) { return loginKeychainObjectPath, nil }
 func (f *fakeService) IsLocked(dbus.ObjectPath) (bool, error)    { return false, nil }
 func (f *fakeService) OpenSession(kc.AuthenticationMode) (*kc.Session, error) {
-	return &kc.Session{}, nil
+	// plain mode so Session.NewSecret works without a negotiated AES key, which
+	// lets the Save path run end-to-end against the fake.
+	return &kc.Session{Mode: kc.AuthenticationInsecurePlain}, nil
 }
 func (f *fakeService) CloseSession(*kc.Session)       {}
 func (f *fakeService) Unlock([]dbus.ObjectPath) error { return nil }
@@ -59,11 +72,22 @@ func (f *fakeService) SearchCollection(dbus.ObjectPath, kc.Attributes) ([]dbus.O
 }
 
 func (f *fakeService) CreateItem(dbus.ObjectPath, map[string]dbus.Variant, kc.Secret, kc.ReplaceBehavior) (dbus.ObjectPath, error) {
-	return "", nil
+	f.createCalls++
+	return "/created", nil
+}
+
+func (f *fakeService) DeleteItem(item dbus.ObjectPath) error {
+	f.deletedItems = append(f.deletedItems, item)
+	return nil
 }
-func (f *fakeService) DeleteItem(dbus.ObjectPath) error                      { return nil }
 func (f *fakeService) GetAttributes(dbus.ObjectPath) (kc.Attributes, error)  { return nil, nil }
 func (f *fakeService) GetSecret(dbus.ObjectPath, kc.Session) ([]byte, error) { return nil, nil }
+func (f *fakeService) SetItemSecret(item dbus.ObjectPath, _ kc.Secret) error {
+	f.setSecretItems = append(f.setSecretItems, item)
+	return nil
+}
+func (f *fakeService) SetItemAttributes(dbus.ObjectPath, kc.Attributes) error { return nil }
+func (f *fakeService) SetItemLabel(dbus.ObjectPath, string) error             { return nil }
 func (f *fakeService) Close() error {
 	f.closed.Add(1)
 	return nil
@@ -116,6 +140,254 @@ func TestKeychainClosesEveryConnection(t *testing.T) {
 	assert.Equal(t, opened, closed, "every opened connection must be closed")
 }
 
+// TestKeychainSaveCreatesWhenAbsent asserts Save mints a new item only when the
+// identity has no existing item, and performs no in-place update or cleanup.
+func TestKeychainSaveCreatesWhenAbsent(t *testing.T) {
+	fake := &fakeService{} // no items -> create path
+	withFakeService(t, fake)
+
+	ks := setupKeychain(t, nil)
+	id := store.MustParseID("com.test.test/test/new-user")
+	creds := &mocks.MockCredential{Username: "alice", Password: "alice-password"}
+
+	require.NoError(t, ks.Save(t.Context(), id, creds))
+
+	assert.Equal(t, 1, fake.createCalls, "must CreateItem when no existing item")
+	assert.Empty(t, fake.setSecretItems, "no in-place update when creating")
+	assert.Empty(t, fake.deletedItems, "nothing to collapse")
+}
+
+// TestKeychainSaveCollapsesDuplicatesInPlace is the issue #446 regression test:
+// when several items already share one stable identity (the accumulated
+// duplicates), Save must update the first match in place — never minting a new
+// item — and drain the remaining duplicates, leaving exactly one.
+func TestKeychainSaveCollapsesDuplicatesInPlace(t *testing.T) {
+	fake := &fakeService{
+		items: []dbus.ObjectPath{"/item/a", "/item/b", "/item/c"},
+	}
+	withFakeService(t, fake)
+
+	ks := setupKeychain(t, nil)
+	id := store.MustParseID("com.test.test/test/bob")
+	creds := &mocks.MockCredential{Username: "bob", Password: "bob-password"}
+
+	require.NoError(t, ks.Save(t.Context(), id, creds))
+
+	assert.Zero(t, fake.createCalls, "must not CreateItem when an item already exists")
+	assert.Equal(t, []dbus.ObjectPath{"/item/a"}, fake.setSecretItems,
+		"secret must be rewritten on the first match in place")
+	assert.ElementsMatch(t, []dbus.ObjectPath{"/item/b", "/item/c"}, fake.deletedItems,
+		"the remaining duplicates must be collapsed, leaving only the first match")
+}
+
+// The real-keychain dedup tests use their own service group/name so their items
+// are namespace-isolated from TestKeychain (which shares com.test.test/test).
+// GetAllMetadata/Filter search by {service:group, service:name}, so a leaked
+// dedup item can never show up in — and break — the shared suite.
+const (
+	dedupServiceGroup = "com.test.dedup"
+	dedupServiceName  = "dedup"
+)
+
+// ensureUnlocked unlocks the collection and waits until the daemon actually
+// reports it unlocked. The freedesktop Unlock call can return before the
+// collection is fully unlocked, so a CreateItem/DeleteItem issued immediately
+// after can still fail with "locked collection" — polling IsLocked closes that
+// race. (The store's own Save/Get/Delete avoid it only because the collection
+// stays unlocked once any earlier operation has unlocked it.)
+func ensureUnlocked(t *testing.T, svc *kc.SecretService, collection dbus.ObjectPath) {
+	t.Helper()
+	require.NoError(t, svc.Unlock([]dbus.ObjectPath{collection}))
+	require.Eventually(t, func() bool {
+		locked, err := svc.IsLocked(collection)
+		return err == nil && !locked
+	}, 5*time.Second, 100*time.Millisecond, "collection did not unlock")
+}
+
+// searchRealItems queries a live Secret Service for every item sharing id's
+// stable identity triple {service:group, service:name, id}. The store API keys
+// results by ID and so would collapse duplicates into one logical entry —
+// counting the physical items requires going under it, straight to the daemon.
+//
+// It opens its own short-lived connection (mirroring how the store dials a fresh
+// connection per operation) so it observes exactly what an independent client
+// would see. Object paths it returns stay valid after the connection closes.
+//
+// It returns its error rather than failing the test so it is safe to call from a
+// [require.Eventually] condition, which runs on a separate goroutine where the
+// require.* helpers must not be used.
+func searchRealItems(serviceGroup, serviceName string, id store.ID) ([]dbus.ObjectPath, error) {
+	svc, err := kc.NewService()
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = svc.Close() }()
+
+	collection, err := getDefaultCollection(svc)
+	if err != nil {
+		return nil, err
+	}
+
+	attrs := map[string]string{}
+	safelySetMetadata(serviceGroup, serviceName, attrs)
+	safelySetID(id, attrs)
+
+	return svc.SearchCollection(collection, attrs)
+}
+
+// findRealItems is the test-goroutine wrapper around [searchRealItems] that fails
+// the test on error.
+func findRealItems(t *testing.T, serviceGroup, serviceName string, id store.ID) []dbus.ObjectPath {
+	t.Helper()
+	items, err := searchRealItems(serviceGroup, serviceName, id)
+	require.NoError(t, err)
+	return items
+}
+
+// requireItemCount polls the live Secret Service until exactly want items remain
+// for id, failing after a short timeout. Polling absorbs any lag between the
+// store deleting duplicates and an independent connection observing it; on
+// timeout EventuallyWithT reports the last observed count (and any search
+// error), so a genuine failure to converge is still caught and diagnosable.
+func requireItemCount(t *testing.T, serviceGroup, serviceName string, id store.ID, want int, msg string) {
+	t.Helper()
+	require.EventuallyWithT(t, func(c *assert.CollectT) {
+		items, err := searchRealItems(serviceGroup, serviceName, id)
+		assert.NoError(c, err)
+		assert.Len(c, items, want, msg)
+	}, 10*time.Second, 200*time.Millisecond)
+}
+
+// seedRealDuplicates creates n separate Secret Service items that all share id's
+// stable identity triple but carry a distinct volatile attribute each — exactly
+// how issue #446 accumulates duplicates: the daemon's replace match fails on the
+// differing volatile attributes, so every save mints a fresh item.
+func seedRealDuplicates(t *testing.T, serviceGroup, serviceName string, id store.ID, n int) {
+	t.Helper()
+	svc, err := kc.NewService()
+	require.NoError(t, err)
+	defer func() { _ = svc.Close() }()
+
+	session, err := svc.OpenSession(kc.AuthenticationDHAES)
+	require.NoError(t, err)
+	defer svc.CloseSession(session)
+
+	collection, err := getDefaultCollection(svc)
+	require.NoError(t, err)
+
+	// Talking to the daemon directly skips the unlock the store does internally,
+	// and gnome-keyring reports even the passwordless 'login' collection as
+	// locked until then.
+	ensureUnlocked(t, svc, collection)
+
+	label := serviceGroup + ":" + serviceName + ":" + id.String()
+	for i := range n {
+		sessSecret, err := session.NewSecret(fmt.Appendf(nil, "seed-user:seed-pass-%d", i))
+		require.NoError(t, err)
+
+		attrs := map[string]string{
+			// volatile: distinct per item, so each CreateItem adds a new one
+			// rather than replacing — the duplicate-accumulation pattern.
+			"nonce": fmt.Sprintf("seed-%d", i),
+		}
+		safelySetMetadata(serviceGroup, serviceName, attrs)
+		safelySetID(id, attrs)
+
+		_, err = svc.CreateItem(collection, kc.NewSecretProperties(label, attrs), sessSecret, kc.ReplaceBehaviorDoNotReplace)
+		require.NoError(t, err)
+	}
+}
+
+// purgeRealItems removes every item for id, draining any leftover duplicates so
+// the test cannot leak state. It unlocks the collection first (DeleteItem fails
+// on a locked collection) and deletes all matches, asserting success so a silent
+// cleanup failure surfaces as a leak rather than corrupting a later test.
+func purgeRealItems(t *testing.T, serviceGroup, serviceName string, id store.ID) {
+	t.Helper()
+	svc, err := kc.NewService()
+	require.NoError(t, err)
+	defer func() { _ = svc.Close() }()
+
+	collection, err := getDefaultCollection(svc)
+	require.NoError(t, err)
+	ensureUnlocked(t, svc, collection)
+
+	attrs := map[string]string{}
+	safelySetMetadata(serviceGroup, serviceName, attrs)
+	safelySetID(id, attrs)
+	items, err := svc.SearchCollection(collection, attrs)
+	require.NoError(t, err)
+	for _, item := range items {
+		require.NoError(t, svc.DeleteItem(item))
+	}
+}
+
+// TestKeychainCollapsesExistingDuplicates is the issue #446 backlog test against
+// a real Secret Service: given several duplicate items already stored under one
+// identity, a single Save must update one item in place and delete the rest,
+// leaving exactly one item holding the latest secret.
+func TestKeychainCollapsesExistingDuplicates(t *testing.T) {
+	id := store.MustParseID(dedupServiceGroup + "/" + dedupServiceName + "/backlog")
+	t.Cleanup(func() { purgeRealItems(t, dedupServiceGroup, dedupServiceName, id) })
+
+	// Pre-existing backlog: three duplicate items for one identity.
+	seedRealDuplicates(t, dedupServiceGroup, dedupServiceName, id, 3)
+	require.Len(t, findRealItems(t, dedupServiceGroup, dedupServiceName, id), 3, "precondition: three duplicates seeded")
+
+	ks, err := New(dedupServiceGroup, dedupServiceName, func(_ context.Context, _ store.ID) store.Secret {
+		return &mocks.MockCredential{}
+	})
+	require.NoError(t, err)
+
+	require.NoError(t, ks.Save(t.Context(), id, &mocks.MockCredential{
+		Username: "backlog-user",
+		Password: "final-password",
+	}))
+
+	requireItemCount(t, dedupServiceGroup, dedupServiceName, id, 1,
+		"a single Save must collapse the duplicate backlog to one item")
+
+	got, err := ks.Get(t.Context(), id)
+	require.NoError(t, err)
+	assert.Equal(t, "final-password", got.(*mocks.MockCredential).Password,
+		"the surviving item must hold the latest secret")
+}
+
+// TestKeychainSaveDoesNotAccumulate is the forward-looking issue #446 test
+// against a real Secret Service: saving the same identity repeatedly with
+// metadata that changes on every save (mimicking volatile JWT claims) must keep
+// exactly one item, never minting duplicates.
+func TestKeychainSaveDoesNotAccumulate(t *testing.T) {
+	id := store.MustParseID(dedupServiceGroup + "/" + dedupServiceName + "/no-accumulate")
+	t.Cleanup(func() { purgeRealItems(t, dedupServiceGroup, dedupServiceName, id) })
+
+	ks, err := New(dedupServiceGroup, dedupServiceName, func(_ context.Context, _ store.ID) store.Secret {
+		return &mocks.MockCredential{}
+	})
+	require.NoError(t, err)
+
+	const saves = 5
+	for i := range saves {
+		require.NoError(t, ks.Save(t.Context(), id, &mocks.MockCredential{
+			Username: "no-accumulate-user",
+			Password: fmt.Sprintf("password-%d", i),
+			Attributes: map[string]string{
+				"nonce": fmt.Sprintf("%d", i), // volatile: differs every save
+			},
+		}))
+	}
+
+	requireItemCount(t, dedupServiceGroup, dedupServiceName, id, 1,
+		"saving with changing metadata must not accumulate duplicate items")
+
+	got, err := ks.Get(t.Context(), id)
+	require.NoError(t, err)
+	actual := got.(*mocks.MockCredential)
+	assert.Equal(t, fmt.Sprintf("password-%d", saves-1), actual.Password)
+	assert.Equal(t, fmt.Sprintf("%d", saves-1), actual.Attributes["nonce"],
+		"the surviving item's metadata must be refreshed in place")
+}
+
 func TestResolveDefaultCollection(t *testing.T) {
 	const customCollection = dbus.ObjectPath("/org/freedesktop/secrets/collection/custom")
 

From e03dc7a6458d1bf3a1cf278c0f1ddf28b53a7004 Mon Sep 17 00:00:00 2001
From: Alano Terblanche <18033717+Benehiko@users.noreply.github.com>
Date: Wed, 24 Jun 2026 14:23:30 +0200
Subject: [PATCH 3/3] fix(keychain): make Linux write path and dedup tests
 relock-aware
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The real-keyring tests added by this PR fail intermittently in CI with
"Cannot create an item in a locked collection" and a duplicate backlog
that never collapses. Both are the gnome-keyring relock race: the store
dials a fresh D-Bus connection per operation, and a prior op's closing
connection relocks the collection asynchronously — after the unlock but
before the call against the collection runs. IsLocked cannot guard it
because the state changes between the check and the call.

React to the authoritative signal instead. Add withRelockRetry, which
retries an operation with bounded exponential backoff when it fails with
the org.freedesktop.Secret.Error.IsLocked D-Bus error, re-unlocking each
attempt and aborting immediately if Unlock itself fails (e.g. a dismissed
prompt).

Wrap the lock-gated mutations with it:
- Save: the create-when-absent CreateItem, the in-place SetItemSecret,
  and the best-effort duplicate-collapse DeleteItem loop. The collapse
  delete stays best-effort but is now relock-aware: a swallowed locked
  error would otherwise leave the duplicates this feature exists to drain
  (issue #446).
- Delete: DeleteItem.
Reads (Get/Filter GetSecret) are equally race-prone but are left to the
dedicated relock-retry change (#560); this commit covers the write path
the new tests exercise.

Also make the tests' own direct-daemon helpers relock-aware, since they
talk to the daemon outside the store: seedRealDuplicates' CreateItem and
purgeRealItems' DeleteItem now go through withRelockRetry.

Tests: fake-backed TestKeychainSaveRetriesWhenCreateRelocks,
TestKeychainSaveRetriesWhenSetSecretRelocks,
TestKeychainSaveCollapseRetriesWhenDeleteRelocks and
TestKeychainSaveStopsRetryingAfterMaxRelocks. Verified green against real
gnome-keyring on the fedora-43 and ubuntu-24 CI harnesses; lint clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 store/keychain/keychain_linux.go      |  97 +++++++++++++++++-
 store/keychain/keychain_linux_test.go | 136 +++++++++++++++++++++++++-
 2 files changed, 224 insertions(+), 9 deletions(-)

diff --git a/store/keychain/keychain_linux.go b/store/keychain/keychain_linux.go
index d1fe4aed..1126fe66 100644
--- a/store/keychain/keychain_linux.go
+++ b/store/keychain/keychain_linux.go
@@ -23,6 +23,7 @@ import (
 	"fmt"
 	"maps"
 	"slices"
+	"time"
 
 	dbus "github.com/godbus/dbus/v5"
 
@@ -148,6 +149,81 @@ func isCollectionUnlocked(collectionPath dbus.ObjectPath, service secretService)
 	return errCollectionLocked
 }
 
+// secretServiceIsLockedError is the D-Bus error name the secret service returns
+// when a mutating call (e.g. CreateItem) targets a locked collection.
+//
+// https://specifications.freedesktop.org/secret-service-spec/latest/errors.html
+const secretServiceIsLockedError = "org.freedesktop.Secret.Error.IsLocked"
+
+// isLockedDBusError reports whether err is the secret service's "collection is
+// locked" D-Bus error. The lock state is matched on the structured D-Bus error
+// name rather than the human-readable message so it is stable across backends
+// and locales.
+func isLockedDBusError(err error) bool {
+	var dbusErr dbus.Error
+	return errors.As(err, &dbusErr) && dbusErr.Name == secretServiceIsLockedError
+}
+
+// Relock retry tuning. An operation that hits a relocked collection is retried
+// with exponential backoff: the relock is a brief race that settles on its own,
+// and spacing the attempts out avoids hammering the secret service (or, on a
+// password-protected keyring, re-issuing Unlock fast enough to spam the user
+// with authentication prompts).
+//
+// relockRetryMaxDelay caps the backoff growth; with the current
+// maxRelockRetries the slept delays are 20,40,80,160,320ms (the cap only takes
+// effect once maxRelockRetries reaches 6, where the sixth delay would otherwise
+// be 640ms).
+const (
+	maxRelockRetries     = 5
+	relockRetryBaseDelay = 20 * time.Millisecond
+	relockRetryMaxDelay  = 500 * time.Millisecond
+)
+
+// sleepFn is the sleep seam used by the relock backoff so tests can exercise the
+// retry loop without real delays. It is a package-level var with no
+// synchronisation, so tests that swap it must not run in parallel.
+var sleepFn = time.Sleep
+
+// withRelockRetry runs a collection operation, retrying it with exponential
+// backoff when the secret service rejects it because the collection is locked.
+//
+// The store dials a fresh D-Bus connection for every operation and closes it on
+// return. gnome-keyring scopes an unlock to the session that performed it, so
+// when a previous operation's connection closes the daemon relocks the
+// collection — and that relock can land asynchronously in the middle of a later
+// operation, after we have already observed the collection as unlocked but
+// before the call against the collection runs. The result is an intermittent
+// "Cannot create an item in a locked collection" error even though we unlocked
+// moments earlier. IsLocked cannot guard against this because the state changes
+// between the check and the call, so we react to the authoritative signal — the
+// operation's own locked error — by unlocking again and retrying.
+//
+// In the common case this is the passwordless auto-unlock path (e.g. the
+// PAM-unlocked login keyring), where Unlock returns the null prompt and asks
+// the user for nothing. withRelockRetry cannot itself prove the keyring is
+// passwordless, so on a password-protected keyring a retry could surface an
+// authentication prompt; the bounded retry count and backoff keep that to a
+// handful of spaced-out prompts at worst, and a dismissed prompt makes Unlock
+// return an error that aborts the loop immediately rather than re-prompting.
+func withRelockRetry(service secretService, collectionPath dbus.ObjectPath, op func() error) error {
+	err := op()
+	delay := relockRetryBaseDelay
+	for attempt := 0; attempt < maxRelockRetries && isLockedDBusError(err); attempt++ {
+		sleepFn(delay)
+		delay = min(delay*2, relockRetryMaxDelay)
+		if unlockErr := service.Unlock([]dbus.ObjectPath{collectionPath}); unlockErr != nil {
+			// Surface why the retry stopped while preserving errors.Is on the
+			// underlying Unlock error (e.g. a dismissed prompt). The original
+			// locked error is intentionally dropped: the failed unlock is the
+			// actionable cause once we have decided to stop retrying.
+			return fmt.Errorf("unlock after relock: %w", unlockErr)
+		}
+		err = op()
+	}
+	return err
+}
+
 type keychainStore[T store.Secret] struct {
 	serviceGroup string
 	serviceName  string
@@ -198,7 +274,9 @@ func (k *keychainStore[T]) Delete(_ context.Context, id store.ID) error {
 		return nil
 	}
 
-	return service.DeleteItem(items[0])
+	return withRelockRetry(service, objectPath, func() error {
+		return service.DeleteItem(items[0])
+	})
 }
 
 func (k *keychainStore[T]) Get(ctx context.Context, id store.ID) (store.Secret, error) {
@@ -406,8 +484,10 @@ func (k *keychainStore[T]) Save(_ context.Context, id store.ID, secret store.Sec
 	// Nothing stored yet: create a fresh item.
 	if len(items) == 0 {
 		properties := kc.NewSecretProperties(label, attributes)
-		_, err = service.CreateItem(objectPath, properties, sessSecret, kc.ReplaceBehaviorReplace)
-		return err
+		return withRelockRetry(service, objectPath, func() error {
+			_, createErr := service.CreateItem(objectPath, properties, sessSecret, kc.ReplaceBehaviorReplace)
+			return createErr
+		})
 	}
 
 	// Update the first match in place. Its object path is preserved, so the
@@ -416,13 +496,20 @@ func (k *keychainStore[T]) Save(_ context.Context, id store.ID, secret store.Sec
 	// the attributes and label and collapsing any pre-existing duplicates are
 	// best-effort (the secret is already stored) and must not flip the result.
 	primary := items[0]
-	if err := service.SetItemSecret(primary, sessSecret); err != nil {
+	if err := withRelockRetry(service, objectPath, func() error {
+		return service.SetItemSecret(primary, sessSecret)
+	}); err != nil {
 		return err
 	}
 	_ = service.SetItemAttributes(primary, attributes)
 	_ = service.SetItemLabel(primary, label)
 	for _, dup := range items[1:] {
-		_ = service.DeleteItem(dup)
+		// Best-effort, but still relock-aware: a collection that relocks
+		// mid-collapse would otherwise leave the duplicates the whole feature
+		// exists to drain (see withRelockRetry and issue #446).
+		_ = withRelockRetry(service, objectPath, func() error {
+			return service.DeleteItem(dup)
+		})
 	}
 
 	return nil
diff --git a/store/keychain/keychain_linux_test.go b/store/keychain/keychain_linux_test.go
index 94f5aca9..d547e001 100644
--- a/store/keychain/keychain_linux_test.go
+++ b/store/keychain/keychain_linux_test.go
@@ -48,13 +48,34 @@ type fakeService struct {
 	// concurrency-safe: the tests that read them drive a single sequential
 	// operation through the fake.
 	createCalls    int
+	setSecretCalls int
+	deleteCalls    int
 	setSecretItems []dbus.ObjectPath
 	deletedItems   []dbus.ObjectPath
 
+	// {createItem,setSecret,deleteItem}LockedErrs is how many leading calls of
+	// each kind fail with the secret service "collection is locked" D-Bus error
+	// before one succeeds, simulating a collection that relocks underneath the
+	// store (see withRelockRetry). The error is wrapped exactly as the real
+	// service wraps it, so the tests exercise the errors.As-through-wrap path
+	// isLockedDBusError depends on. unlockCalls counts the re-unlocks the retry
+	// issues; unlockErr, when set, makes Unlock fail (e.g. a dismissed prompt).
+	createItemLockedErrs int
+	setSecretLockedErrs  int
+	deleteItemLockedErrs int
+	unlockCalls          int
+	unlockErr            error
+
 	opened atomic.Int64
 	closed atomic.Int64
 }
 
+// lockedErr mirrors how the real SecretService wraps a locked-collection D-Bus
+// error (see secretservice.go), so isLockedDBusError must unwrap to detect it.
+func lockedErr(op string) error {
+	return fmt.Errorf("failed to %s: %w", op, dbus.Error{Name: secretServiceIsLockedError})
+}
+
 func (f *fakeService) Collections() ([]dbus.ObjectPath, error) {
 	return []dbus.ObjectPath{loginKeychainObjectPath}, nil
 }
@@ -65,24 +86,39 @@ func (f *fakeService) OpenSession(kc.AuthenticationMode) (*kc.Session, error) {
 	// lets the Save path run end-to-end against the fake.
 	return &kc.Session{Mode: kc.AuthenticationInsecurePlain}, nil
 }
-func (f *fakeService) CloseSession(*kc.Session)       {}
-func (f *fakeService) Unlock([]dbus.ObjectPath) error { return nil }
+func (f *fakeService) CloseSession(*kc.Session) {}
+func (f *fakeService) Unlock([]dbus.ObjectPath) error {
+	f.unlockCalls++
+	return f.unlockErr
+}
+
 func (f *fakeService) SearchCollection(dbus.ObjectPath, kc.Attributes) ([]dbus.ObjectPath, error) {
 	return f.items, nil
 }
 
 func (f *fakeService) CreateItem(dbus.ObjectPath, map[string]dbus.Variant, kc.Secret, kc.ReplaceBehavior) (dbus.ObjectPath, error) {
 	f.createCalls++
+	if f.createCalls <= f.createItemLockedErrs {
+		return "", lockedErr("create item")
+	}
 	return "/created", nil
 }
 
 func (f *fakeService) DeleteItem(item dbus.ObjectPath) error {
+	f.deleteCalls++
+	if f.deleteCalls <= f.deleteItemLockedErrs {
+		return lockedErr("delete item")
+	}
 	f.deletedItems = append(f.deletedItems, item)
 	return nil
 }
 func (f *fakeService) GetAttributes(dbus.ObjectPath) (kc.Attributes, error)  { return nil, nil }
 func (f *fakeService) GetSecret(dbus.ObjectPath, kc.Session) ([]byte, error) { return nil, nil }
 func (f *fakeService) SetItemSecret(item dbus.ObjectPath, _ kc.Secret) error {
+	f.setSecretCalls++
+	if f.setSecretCalls <= f.setSecretLockedErrs {
+		return lockedErr("set item secret")
+	}
 	f.setSecretItems = append(f.setSecretItems, item)
 	return nil
 }
@@ -105,6 +141,16 @@ func withFakeService(t *testing.T, fake *fakeService) {
 	}
 }
 
+// stubRelockSleep replaces the relock backoff sleep with a no-op so retry tests
+// run without real delays, restoring it on cleanup. It mutates the package-level
+// sleepFn var, so tests using it must not run in parallel.
+func stubRelockSleep(t *testing.T) {
+	t.Helper()
+	orig := sleepFn
+	t.Cleanup(func() { sleepFn = orig })
+	sleepFn = func(time.Duration) {}
+}
+
 // TestKeychainGetNotFound exercises the full Get path against the fake — open,
 // resolve collection, search — and asserts an empty search maps to
 // ErrCredentialNotFound, all without a live keyring.
@@ -180,6 +226,79 @@ func TestKeychainSaveCollapsesDuplicatesInPlace(t *testing.T) {
 		"the remaining duplicates must be collapsed, leaving only the first match")
 }
 
+// TestKeychainSaveRetriesWhenCreateRelocks covers the create path: gnome-keyring
+// can relock the collection between the unlock and the CreateItem, so a fresh
+// Save must react to the locked error by unlocking and retrying rather than
+// failing.
+func TestKeychainSaveRetriesWhenCreateRelocks(t *testing.T) {
+	stubRelockSleep(t)
+	fake := &fakeService{} // no items -> create path
+	fake.createItemLockedErrs = 2
+	withFakeService(t, fake)
+
+	ks := setupKeychain(t, nil)
+	require.NoError(t, ks.Save(t.Context(), store.MustParseID("com.test.test/test/bob"),
+		&mocks.MockCredential{Username: "bob", Password: "bob-password"}))
+
+	assert.Equal(t, 3, fake.createCalls, "two locked failures then one success")
+	assert.Equal(t, 2, fake.unlockCalls, "exactly one Unlock per relock retry")
+}
+
+// TestKeychainSaveRetriesWhenSetSecretRelocks covers the in-place update path:
+// the SetItemSecret that rewrites the surviving item must survive a relock.
+func TestKeychainSaveRetriesWhenSetSecretRelocks(t *testing.T) {
+	stubRelockSleep(t)
+	fake := &fakeService{items: []dbus.ObjectPath{"/item/a"}}
+	fake.setSecretLockedErrs = 2
+	withFakeService(t, fake)
+
+	ks := setupKeychain(t, nil)
+	require.NoError(t, ks.Save(t.Context(), store.MustParseID("com.test.test/test/bob"),
+		&mocks.MockCredential{Username: "bob", Password: "bob-password"}))
+
+	assert.Equal(t, []dbus.ObjectPath{"/item/a"}, fake.setSecretItems,
+		"the secret must be written in place once the relock clears")
+	assert.Equal(t, 2, fake.unlockCalls, "exactly one Unlock per relock retry")
+}
+
+// TestKeychainSaveCollapseRetriesWhenDeleteRelocks is the unit-level counterpart
+// of the real-keyring backlog test: collapsing a duplicate must drain it even if
+// the collection relocks mid-delete. The collapse delete is best-effort, but a
+// silently swallowed locked error would leave the duplicate behind — the exact
+// #446 symptom — so it is still relock-aware.
+func TestKeychainSaveCollapseRetriesWhenDeleteRelocks(t *testing.T) {
+	stubRelockSleep(t)
+	fake := &fakeService{items: []dbus.ObjectPath{"/item/a", "/item/b"}}
+	fake.deleteItemLockedErrs = 2
+	withFakeService(t, fake)
+
+	ks := setupKeychain(t, nil)
+	require.NoError(t, ks.Save(t.Context(), store.MustParseID("com.test.test/test/bob"),
+		&mocks.MockCredential{Username: "bob", Password: "bob-password"}))
+
+	assert.Equal(t, []dbus.ObjectPath{"/item/b"}, fake.deletedItems,
+		"the duplicate must be collapsed once the relock clears")
+	assert.Equal(t, 3, fake.deleteCalls, "two locked failures then one success")
+	assert.Equal(t, 2, fake.unlockCalls, "exactly one Unlock per relock retry")
+}
+
+// TestKeychainSaveStopsRetryingAfterMaxRelocks asserts the retry is bounded: a
+// persistently locked collection surfaces the locked error to the caller instead
+// of looping forever.
+func TestKeychainSaveStopsRetryingAfterMaxRelocks(t *testing.T) {
+	stubRelockSleep(t)
+	fake := &fakeService{}              // no items -> create path
+	fake.createItemLockedErrs = 1 << 30 // never recovers
+	withFakeService(t, fake)
+
+	ks := setupKeychain(t, nil)
+	err := ks.Save(t.Context(), store.MustParseID("com.test.test/test/bob"),
+		&mocks.MockCredential{Username: "bob", Password: "bob-password"})
+	require.Error(t, err)
+	assert.True(t, isLockedDBusError(err), "the persistent locked error must reach the caller")
+	assert.Equal(t, maxRelockRetries+1, fake.createCalls, "initial attempt plus the bounded retries")
+}
+
 // The real-keychain dedup tests use their own service group/name so their items
 // are namespace-isolated from TestKeychain (which shares com.test.test/test).
 // GetAllMetadata/Filter search by {service:group, service:name}, so a leaked
@@ -293,7 +412,14 @@ func seedRealDuplicates(t *testing.T, serviceGroup, serviceName string, id store
 		safelySetMetadata(serviceGroup, serviceName, attrs)
 		safelySetID(id, attrs)
 
-		_, err = svc.CreateItem(collection, kc.NewSecretProperties(label, attrs), sessSecret, kc.ReplaceBehaviorDoNotReplace)
+		// Seed directly against the daemon, but stay relock-aware: a prior op's
+		// closing connection can relock the collection between the unlock above
+		// and this create (see withRelockRetry), which would otherwise fail the
+		// seed with "Cannot create an item in a locked collection".
+		err = withRelockRetry(svc, collection, func() error {
+			_, createErr := svc.CreateItem(collection, kc.NewSecretProperties(label, attrs), sessSecret, kc.ReplaceBehaviorDoNotReplace)
+			return createErr
+		})
 		require.NoError(t, err)
 	}
 }
@@ -318,7 +444,9 @@ func purgeRealItems(t *testing.T, serviceGroup, serviceName string, id store.ID)
 	items, err := svc.SearchCollection(collection, attrs)
 	require.NoError(t, err)
 	for _, item := range items {
-		require.NoError(t, svc.DeleteItem(item))
+		require.NoError(t, withRelockRetry(svc, collection, func() error {
+			return svc.DeleteItem(item)
+		}))
 	}
 }