From c22541752da4e447ebaac3378e4ebe507d084487 Mon Sep 17 00:00:00 2001 From: Dennis Date: Sat, 23 May 2026 11:19:30 +0000 Subject: [PATCH 1/3] fix(nodeauth): downgrade context-cancel errors to WARN in node JWT authenticator SHARED-2644: De-noise context cancellation errors in node auth path. - Log WARN (not ERROR) when IsNodePubKeyTrusted returns context.Canceled or context.DeadlineExceeded - Keep ERROR for genuine provider failures - Add contextErr structured field for observability - Add test cases for both Canceled and DeadlineExceeded paths - Fix captureHandler.WithAttrs/WithGroup to satisfy slog.Handler contract --- pkg/beholder/durable_event_store.go | 52 ++++++++ pkg/nodeauth/jwt/node_jwt_authenticator.go | 16 ++- .../jwt/node_jwt_authenticator_test.go | 117 ++++++++++++++++++ 3 files changed, 181 insertions(+), 4 deletions(-) create mode 100644 pkg/beholder/durable_event_store.go diff --git a/pkg/beholder/durable_event_store.go b/pkg/beholder/durable_event_store.go new file mode 100644 index 0000000000..1dc4026ac4 --- /dev/null +++ b/pkg/beholder/durable_event_store.go @@ -0,0 +1,52 @@ +package beholder + +import ( + "context" + "time" +) + +// Backward-compatibility types for downstream consumers (e.g. chainlink node). +// These were moved to pkg/durableemitter in PR #2081. They are re-declared here +// (not aliases) to avoid an import cycle through test utilities. +// +// TODO(SHARED-2644): Remove this file once chainlink node migrates imports to +// pkg/durableemitter (tracked in chainlink PR #22562). These duplicate declarations +// have no compile-time sync check against the canonical types. + +// DurableEvent represents a persisted event awaiting delivery to Chip. +type DurableEvent struct { + ID int64 + Payload []byte + CreatedAt time.Time +} + +// DurableQueueStats is a point-in-time snapshot of the pending queue for metrics. +type DurableQueueStats struct { + Depth int64 + PayloadBytes int64 + OldestPendingAge time.Duration + NearTTLCount int64 +} + +// DurableQueueObserver is optionally implemented by DurableEventStore implementations +// so DurableEmitter can export queue depth and age gauges when metrics are enabled. +type DurableQueueObserver interface { + ObserveDurableQueue(ctx context.Context, eventTTL, nearExpiryLead time.Duration) (DurableQueueStats, error) +} + +// BatchInserter is optionally implemented by DurableEventStore implementations +// to support multi-row inserts for higher throughput. +type BatchInserter interface { + InsertBatch(ctx context.Context, payloads [][]byte) ([]int64, error) +} + +// DurableEventStore abstracts the persistence layer for durable chip events. +type DurableEventStore interface { + Insert(ctx context.Context, payload []byte) (int64, error) + Delete(ctx context.Context, id int64) error + MarkDelivered(ctx context.Context, id int64) error + MarkDeliveredBatch(ctx context.Context, ids []int64) (int64, error) + PurgeDelivered(ctx context.Context, batchLimit int) (deleted int64, err error) + ListPending(ctx context.Context, createdBefore time.Time, limit int) ([]DurableEvent, error) + DeleteExpired(ctx context.Context, ttl time.Duration) (int64, error) +} diff --git a/pkg/nodeauth/jwt/node_jwt_authenticator.go b/pkg/nodeauth/jwt/node_jwt_authenticator.go index 24bcbd5ac8..5a3fa79ba2 100644 --- a/pkg/nodeauth/jwt/node_jwt_authenticator.go +++ b/pkg/nodeauth/jwt/node_jwt_authenticator.go @@ -78,10 +78,18 @@ func (v *NodeJWTAuthenticator) AuthenticateJWT(ctx context.Context, tokenString // Public Key Validation: Verify node's CSA pubkey against the whitelisted registry via NodeAuthProvider. isValid, err := v.nodeAuthProvider.IsNodePubKeyTrusted(ctx, publicKey) if err != nil { - v.logger.Error("Node validation failed", - "csaPubKey", hex.EncodeToString(publicKey), - "error", err, - ) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + v.logger.Warn("Node validation skipped: context canceled or deadline exceeded", + "csaPubKey", hex.EncodeToString(publicKey), + "error", err, + "contextErr", ctx.Err(), + ) + } else { + v.logger.Error("Node validation failed", + "csaPubKey", hex.EncodeToString(publicKey), + "error", err, + ) + } return false, claims, fmt.Errorf("node validation failed: %w", err) } diff --git a/pkg/nodeauth/jwt/node_jwt_authenticator_test.go b/pkg/nodeauth/jwt/node_jwt_authenticator_test.go index 9986d268aa..539abc919f 100644 --- a/pkg/nodeauth/jwt/node_jwt_authenticator_test.go +++ b/pkg/nodeauth/jwt/node_jwt_authenticator_test.go @@ -5,8 +5,10 @@ import ( "crypto/ed25519" "crypto/rand" "encoding/hex" + "errors" "io" "log/slog" + "sync" "testing" "time" @@ -450,3 +452,118 @@ func TestNewNodeJWTAuthenticator_WithAndWithoutLeeway(t *testing.T) { assert.NotNil(t, authenticator.parser) }) } + +// captureHandler is a minimal slog.Handler that captures log records for test assertions. +// It satisfies the full slog.Handler contract: WithAttrs and WithGroup return a new +// handler so that logger.With(...) / logger.WithGroup(...) calls don't silently discard +// attributes. +type captureHandler struct { + mu sync.Mutex + records []slog.Record + attrs []slog.Attr + groups []string +} + +func (h *captureHandler) Enabled(_ context.Context, _ slog.Level) bool { return true } +func (h *captureHandler) Handle(_ context.Context, r slog.Record) error { + // Prepend any inherited attrs so they appear in captured records. + if len(h.attrs) > 0 { + r2 := slog.NewRecord(r.Time, r.Level, r.Message, r.PC) + r2.AddAttrs(h.attrs...) + r.Attrs(func(a slog.Attr) bool { r2.AddAttrs(a); return true }) + r = r2 + } + h.mu.Lock() + defer h.mu.Unlock() + h.records = append(h.records, r.Clone()) + return nil +} +func (h *captureHandler) WithAttrs(attrs []slog.Attr) slog.Handler { + h.mu.Lock() + defer h.mu.Unlock() + merged := make([]slog.Attr, len(h.attrs)+len(attrs)) + copy(merged, h.attrs) + copy(merged[len(h.attrs):], attrs) + return &captureHandler{records: h.records, attrs: merged, groups: h.groups} +} +func (h *captureHandler) WithGroup(name string) slog.Handler { + h.mu.Lock() + defer h.mu.Unlock() + groups := append(append([]string{}, h.groups...), name) + return &captureHandler{records: h.records, attrs: h.attrs, groups: groups} +} + +func TestNodeJWTAuthenticator_AuthenticateJWT_ProviderNonContextError(t *testing.T) { + // Non-context provider errors must be logged at ERROR level. + privateKey, csaPubKey := createValidatorTestKeys() + providerErr := errors.New("database unavailable") + mockProvider := &mocks.NodeAuthProvider{} + mockProvider.On("IsNodePubKeyTrusted", mock.Anything, csaPubKey).Return(false, providerErr) + + h := &captureHandler{} + authenticator := NewNodeJWTAuthenticator(mockProvider, slog.New(h)) + + testRequest := testRequest{Field: "test-request"} + valid, claims, err := authenticator.AuthenticateJWT(context.Background(), createValidJWT(privateKey, csaPubKey), testRequest) + + require.Error(t, err) + assert.False(t, valid) + assert.NotNil(t, claims) + assert.Contains(t, err.Error(), "node validation failed") + + require.Len(t, h.records, 1) + assert.Equal(t, slog.LevelError, h.records[0].Level, "non-context provider errors should log at ERROR") + mockProvider.AssertExpectations(t) +} + +func TestNodeJWTAuthenticator_AuthenticateJWT_ProviderContextCancelledError(t *testing.T) { + // Context-cancellation errors from the provider must be logged at WARN, not ERROR, + // because they are caused by the caller cancelling the request — not a system fault. + privateKey, csaPubKey := createValidatorTestKeys() + mockProvider := &mocks.NodeAuthProvider{} + mockProvider.On("IsNodePubKeyTrusted", mock.Anything, csaPubKey).Return(false, context.Canceled) + + h := &captureHandler{} + authenticator := NewNodeJWTAuthenticator(mockProvider, slog.New(h)) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // already cancelled + + testRequest := testRequest{Field: "test-request"} + valid, claims, err := authenticator.AuthenticateJWT(ctx, createValidJWT(privateKey, csaPubKey), testRequest) + + require.Error(t, err) + assert.False(t, valid) + assert.NotNil(t, claims) + assert.ErrorIs(t, err, context.Canceled) + + require.Len(t, h.records, 1) + assert.Equal(t, slog.LevelWarn, h.records[0].Level, "context cancellation from provider should log at WARN not ERROR") + mockProvider.AssertExpectations(t) +} + +func TestNodeJWTAuthenticator_AuthenticateJWT_ProviderDeadlineExceededError(t *testing.T) { + // context.DeadlineExceeded from the provider must also be logged at WARN, not ERROR, + // because it is an expected transient condition (e.g. slow upstream), not a system fault. + privateKey, csaPubKey := createValidatorTestKeys() + mockProvider := &mocks.NodeAuthProvider{} + mockProvider.On("IsNodePubKeyTrusted", mock.Anything, csaPubKey).Return(false, context.DeadlineExceeded) + + h := &captureHandler{} + authenticator := NewNodeJWTAuthenticator(mockProvider, slog.New(h)) + + ctx, cancel := context.WithTimeout(context.Background(), 0) // immediately expired + defer cancel() + + testRequest := testRequest{Field: "test-request"} + valid, claims, err := authenticator.AuthenticateJWT(ctx, createValidJWT(privateKey, csaPubKey), testRequest) + + require.Error(t, err) + assert.False(t, valid) + assert.NotNil(t, claims) + assert.ErrorIs(t, err, context.DeadlineExceeded) + + require.Len(t, h.records, 1) + assert.Equal(t, slog.LevelWarn, h.records[0].Level, "deadline exceeded from provider should log at WARN not ERROR") + mockProvider.AssertExpectations(t) +} From fa33a2aa0e086b56abfa634625afa5754457003a Mon Sep 17 00:00:00 2001 From: "Dzianis Vashchuk (Chainlink)" Date: Wed, 27 May 2026 15:53:45 +0000 Subject: [PATCH 2/3] refactor(nodeauth): remove beholder compat shim (moved to separate PR) The backward-compat type declarations for DurableEventStore are split into their own PR (fix/beholder-backward-compat) per reviewer request to keep this PR focused on the log-noise fix. --- pkg/beholder/durable_event_store.go | 52 ----------------------------- 1 file changed, 52 deletions(-) delete mode 100644 pkg/beholder/durable_event_store.go diff --git a/pkg/beholder/durable_event_store.go b/pkg/beholder/durable_event_store.go deleted file mode 100644 index 1dc4026ac4..0000000000 --- a/pkg/beholder/durable_event_store.go +++ /dev/null @@ -1,52 +0,0 @@ -package beholder - -import ( - "context" - "time" -) - -// Backward-compatibility types for downstream consumers (e.g. chainlink node). -// These were moved to pkg/durableemitter in PR #2081. They are re-declared here -// (not aliases) to avoid an import cycle through test utilities. -// -// TODO(SHARED-2644): Remove this file once chainlink node migrates imports to -// pkg/durableemitter (tracked in chainlink PR #22562). These duplicate declarations -// have no compile-time sync check against the canonical types. - -// DurableEvent represents a persisted event awaiting delivery to Chip. -type DurableEvent struct { - ID int64 - Payload []byte - CreatedAt time.Time -} - -// DurableQueueStats is a point-in-time snapshot of the pending queue for metrics. -type DurableQueueStats struct { - Depth int64 - PayloadBytes int64 - OldestPendingAge time.Duration - NearTTLCount int64 -} - -// DurableQueueObserver is optionally implemented by DurableEventStore implementations -// so DurableEmitter can export queue depth and age gauges when metrics are enabled. -type DurableQueueObserver interface { - ObserveDurableQueue(ctx context.Context, eventTTL, nearExpiryLead time.Duration) (DurableQueueStats, error) -} - -// BatchInserter is optionally implemented by DurableEventStore implementations -// to support multi-row inserts for higher throughput. -type BatchInserter interface { - InsertBatch(ctx context.Context, payloads [][]byte) ([]int64, error) -} - -// DurableEventStore abstracts the persistence layer for durable chip events. -type DurableEventStore interface { - Insert(ctx context.Context, payload []byte) (int64, error) - Delete(ctx context.Context, id int64) error - MarkDelivered(ctx context.Context, id int64) error - MarkDeliveredBatch(ctx context.Context, ids []int64) (int64, error) - PurgeDelivered(ctx context.Context, batchLimit int) (deleted int64, err error) - ListPending(ctx context.Context, createdBefore time.Time, limit int) ([]DurableEvent, error) - DeleteExpired(ctx context.Context, ttl time.Duration) (int64, error) -} From e466681db988ceac450591f016ea0f07f7449d0a Mon Sep 17 00:00:00 2001 From: Dzianis Vashchuk Date: Thu, 28 May 2026 06:56:51 +0000 Subject: [PATCH 3/3] ci: retrigger SonarQube scan for coverage pickup