From 040a1dd35e529a4e5cc391b98b5eff86e6281cb9 Mon Sep 17 00:00:00 2001 From: nikw9944 Date: Wed, 13 May 2026 18:42:10 +0000 Subject: [PATCH 1/6] e2e/qa: skip per-type capacity check when onchain max is zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the onchain semantic in qa.Test.ValidDevices: a per-type max of zero (max_unicast_users, max_multicast_publishers, max_multicast_subscribers) means the cap is not enforced. The create_core user processor only fails with MaxUnicastUsersExceeded when max > 0 && count >= max, so the QA filter should skip the per-type bucket entirely when max is zero and fall through to the aggregate users check. Fixes the regression where qa.alldevices on mainnet-beta dropped from testing 85 devices to testing 3 — 92 of 96 activated mainnet-beta devices have max_unicast_users == 0 and were silently excluded. malbeclabs/infra#1294 --- e2e/internal/qa/test.go | 11 +- e2e/internal/qa/test_test.go | 209 +++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+), 3 deletions(-) create mode 100644 e2e/internal/qa/test_test.go diff --git a/e2e/internal/qa/test.go b/e2e/internal/qa/test.go index d3c3c0a8df..699379f3c4 100644 --- a/e2e/internal/qa/test.go +++ b/e2e/internal/qa/test.go @@ -127,8 +127,10 @@ func (d *Device) capacityFor(userType DeviceUserType) (current, max int) { // ValidDevices returns devices that pass filtering criteria for the given // user type. A device is considered valid when it has at least minCapacity -// free slots in the type-specific bucket (e.g. unicast) AND in the aggregate -// users bucket — both are enforced onchain independently. +// free slots in the aggregate users bucket. The type-specific bucket +// (e.g. unicast) is also checked, but only when its onchain max is non-zero — +// onchain, a per-type max of 0 means the cap is unenforced (see +// smartcontract/programs/doublezero-serviceability/src/processors/user/create_core.rs). // // If skipCapacityCheck is true (e.g., when using a QA identity that bypasses // on-chain capacity checks), devices are not filtered by available capacity. @@ -145,7 +147,10 @@ func (t *Test) ValidDevices(userType DeviceUserType, minCapacity int, skipCapaci // Skip capacity check if using QA identity (bypasses on-chain max_users check) if !skipCapacityCheck { typeCount, typeMax := device.capacityFor(userType) - if typeMax-typeCount < minCapacity { + // Mirror the onchain semantic: the per-type cap is only enforced + // when max > 0. A max of 0 means "no per-type cap" and we fall + // through to the aggregate check. + if typeMax > 0 && typeMax-typeCount < minCapacity { t.log.Debug("Skipping device with insufficient type-specific capacity", "device", device.Code, "userType", userType, diff --git a/e2e/internal/qa/test_test.go b/e2e/internal/qa/test_test.go new file mode 100644 index 0000000000..4f8a40123b --- /dev/null +++ b/e2e/internal/qa/test_test.go @@ -0,0 +1,209 @@ +package qa + +import ( + "io" + "log/slog" + "testing" + + "github.com/stretchr/testify/require" +) + +// newTestForValidDevices builds a minimal *Test from a slice of devices. +// ValidDevices only depends on t.log and t.devices. +func newTestForValidDevices(devices []*Device) *Test { + deviceMap := make(map[string]*Device, len(devices)) + for _, d := range devices { + deviceMap[d.Code] = d + } + return &Test{ + log: slog.New(slog.NewTextHandler(io.Discard, nil)), + devices: deviceMap, + } +} + +// codesOf returns the sorted codes returned by ValidDevices, for easy +// comparison against expected sets. +func codesOf(devices []*Device) []string { + out := make([]string, 0, len(devices)) + for _, d := range devices { + out = append(out, d.Code) + } + return out +} + +func TestValidDevices_Unicast(t *testing.T) { + t.Parallel() + + const minCapacity = 2 + + tests := []struct { + name string + devices []*Device + skipCapacityCheck bool + want []string + }{ + { + name: "per-type max set with free slots is included", + devices: []*Device{ + {Code: "alpha", MaxUsers: 96, UsersCount: 4, MaxUnicastUsers: 48, UnicastUsersCount: 4}, + }, + want: []string{"alpha"}, + }, + { + name: "per-type max set and saturated is excluded (preserves #3563 fix)", + devices: []*Device{ + {Code: "nyc002-dz002", MaxUsers: 96, UsersCount: 29, MaxUnicastUsers: 29, UnicastUsersCount: 29}, + }, + want: []string{}, + }, + { + name: "per-type max set with fewer than minCapacity free slots is excluded", + devices: []*Device{ + {Code: "bravo", MaxUsers: 96, UsersCount: 47, MaxUnicastUsers: 48, UnicastUsersCount: 47}, + }, + want: []string{}, + }, + { + name: "per-type max zero with users counted is included (regression fix)", + devices: []*Device{ + {Code: "frankfurt-edge", MaxUsers: 96, UsersCount: 12, MaxUnicastUsers: 0, UnicastUsersCount: 12}, + }, + want: []string{"frankfurt-edge"}, + }, + { + name: "per-type max zero with aggregate cap saturated is excluded", + devices: []*Device{ + {Code: "full", MaxUsers: 5, UsersCount: 4, MaxUnicastUsers: 0, UnicastUsersCount: 4}, + }, + want: []string{}, + }, + { + name: "device with test in code is excluded", + devices: []*Device{ + {Code: "lab-test-1", MaxUsers: 96, UsersCount: 0, MaxUnicastUsers: 48, UnicastUsersCount: 0}, + }, + want: []string{}, + }, + { + name: "skipCapacityCheck includes saturated and zero-max devices", + devices: []*Device{ + {Code: "alpha", MaxUsers: 96, UsersCount: 4, MaxUnicastUsers: 48, UnicastUsersCount: 4}, + {Code: "nyc002-dz002", MaxUsers: 96, UsersCount: 29, MaxUnicastUsers: 29, UnicastUsersCount: 29}, + {Code: "frankfurt-edge", MaxUsers: 96, UsersCount: 12, MaxUnicastUsers: 0, UnicastUsersCount: 12}, + {Code: "full", MaxUsers: 5, UsersCount: 4, MaxUnicastUsers: 0, UnicastUsersCount: 4}, + }, + skipCapacityCheck: true, + want: []string{"alpha", "frankfurt-edge", "full", "nyc002-dz002"}, + }, + { + name: "mainnet-beta-like mix returns only those with free per-type or unset cap", + devices: []*Device{ + {Code: "allnodes-fra1", MaxUsers: 96, UsersCount: 29, MaxUnicastUsers: 48, UnicastUsersCount: 29}, + {Code: "fra-velia", MaxUsers: 96, UsersCount: 4, MaxUnicastUsers: 48, UnicastUsersCount: 4}, + {Code: "frankry", MaxUsers: 128, UsersCount: 68, MaxUnicastUsers: 96, UnicastUsersCount: 68}, + {Code: "nyc002-dz002", MaxUsers: 96, UsersCount: 29, MaxUnicastUsers: 29, UnicastUsersCount: 29}, + {Code: "amsterdam-edge", MaxUsers: 96, UsersCount: 7, MaxUnicastUsers: 0, UnicastUsersCount: 7}, + {Code: "tokyo-edge", MaxUsers: 96, UsersCount: 2, MaxUnicastUsers: 0, UnicastUsersCount: 2}, + }, + want: []string{"allnodes-fra1", "amsterdam-edge", "fra-velia", "frankry", "tokyo-edge"}, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + tt := newTestForValidDevices(tc.devices) + got := codesOf(tt.ValidDevices(DeviceUserTypeUnicast, minCapacity, tc.skipCapacityCheck)) + require.Equal(t, tc.want, got) + }) + } +} + +func TestValidDevices_MulticastPublisher(t *testing.T) { + t.Parallel() + + const minCapacity = 1 + + tests := []struct { + name string + devices []*Device + want []string + }{ + { + name: "per-type max set with free slots is included", + devices: []*Device{ + {Code: "pub-ok", MaxUsers: 96, UsersCount: 0, MaxMulticastPublishers: 4, MulticastPublishersCount: 1}, + }, + want: []string{"pub-ok"}, + }, + { + name: "per-type max set and saturated is excluded", + devices: []*Device{ + {Code: "pub-full", MaxUsers: 96, UsersCount: 0, MaxMulticastPublishers: 1, MulticastPublishersCount: 1}, + }, + want: []string{}, + }, + { + name: "per-type max zero with publishers counted is included (regression fix)", + devices: []*Device{ + {Code: "pub-uncapped", MaxUsers: 96, UsersCount: 3, MaxMulticastPublishers: 0, MulticastPublishersCount: 3}, + }, + want: []string{"pub-uncapped"}, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + tt := newTestForValidDevices(tc.devices) + got := codesOf(tt.ValidDevices(DeviceUserTypeMulticastPublisher, minCapacity, false)) + require.Equal(t, tc.want, got) + }) + } +} + +func TestValidDevices_MulticastSubscriber(t *testing.T) { + t.Parallel() + + const minCapacity = 1 + + tests := []struct { + name string + devices []*Device + want []string + }{ + { + name: "per-type max set with free slots is included", + devices: []*Device{ + {Code: "sub-ok", MaxUsers: 96, UsersCount: 0, MaxMulticastSubscribers: 8, MulticastSubscribersCount: 2}, + }, + want: []string{"sub-ok"}, + }, + { + name: "per-type max set and saturated is excluded", + devices: []*Device{ + {Code: "sub-full", MaxUsers: 96, UsersCount: 0, MaxMulticastSubscribers: 2, MulticastSubscribersCount: 2}, + }, + want: []string{}, + }, + { + name: "per-type max zero with subscribers counted is included (regression fix)", + devices: []*Device{ + {Code: "sub-uncapped", MaxUsers: 96, UsersCount: 5, MaxMulticastSubscribers: 0, MulticastSubscribersCount: 5}, + }, + want: []string{"sub-uncapped"}, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + tt := newTestForValidDevices(tc.devices) + got := codesOf(tt.ValidDevices(DeviceUserTypeMulticastSubscriber, minCapacity, false)) + require.Equal(t, tc.want, got) + }) + } +} From d76a171a92bf6f99f57a06b26fa616170288ad83 Mon Sep 17 00:00:00 2001 From: Nik Weidenbacher Date: Thu, 21 May 2026 22:38:58 +0000 Subject: [PATCH 2/6] e2e/qa: remove client-side capacity pre-filtering from ValidDevices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The QA user pubkey is now on the onchain qa_allowlist, so the smart contract bypasses all capacity limits for QA connections. Remove the client-side capacity pre-filtering (DeviceUserType, capacityFor, minCapacity, skipCapacityCheck) from ValidDevices — it was a heuristic that drifted from the onchain semantic and is no longer needed. ValidDevices now only filters out devices with "test" in their code. Also add a hint to ConnectUserUnicast error messages when a capacity error is detected, directing the operator to verify the qa-allowlist. malbeclabs/infra#1294 --- e2e/internal/qa/client_unicast.go | 17 +++- e2e/internal/qa/test.go | 79 +-------------- e2e/internal/qa/test_test.go | 162 ++++-------------------------- e2e/qa_alldevices_unicast_test.go | 7 +- 4 files changed, 42 insertions(+), 223 deletions(-) diff --git a/e2e/internal/qa/client_unicast.go b/e2e/internal/qa/client_unicast.go index 5c206f8d19..e746f97b7b 100644 --- a/e2e/internal/qa/client_unicast.go +++ b/e2e/internal/qa/client_unicast.go @@ -65,10 +65,17 @@ func (c *Client) ConnectUserUnicast(ctx context.Context, deviceCode string, wait ClientIp: c.ClientIP, }) if err != nil { + if isCapacityError(err.Error()) { + return fmt.Errorf("failed to connect on host %s: %w — this may mean the QA user pubkey is not on the onchain qa-allowlist; verify with 'doublezero global-config qa-allowlist list'", c.Host, err) + } return fmt.Errorf("failed to connect on host %s: %w", c.Host, err) } if !resp.GetSuccess() { - return fmt.Errorf("connection failed on host %s: %s", c.Host, resp.GetOutput()) + output := strings.Join(resp.GetOutput(), "\n") + if isCapacityError(output) { + return fmt.Errorf("connection failed on host %s: %s — this may mean the QA user pubkey is not on the onchain qa-allowlist; verify with 'doublezero global-config qa-allowlist list'", c.Host, output) + } + return fmt.Errorf("connection failed on host %s: %s", c.Host, output) } c.log.Debug("Unicast user connected", "host", c.Host, "device", deviceCode) @@ -282,6 +289,14 @@ type Hop struct { Raw string } +func isCapacityError(s string) bool { + return strings.Contains(s, "user limit") || + strings.Contains(s, "MaxUsersExceeded") || + strings.Contains(s, "MaxUnicastUsersExceeded") || + strings.Contains(s, "MaxMulticastPublishersExceeded") || + strings.Contains(s, "MaxMulticastSubscribersExceeded") +} + func parseMTR(input string) ([]Hop, error) { re := regexp.MustCompile(`^\s*(\d+)\.\|\-\-\s+(\S+)\s+(\d+(?:\.\d+)?)(?:%)?\s+(\d+)\b`) diff --git a/e2e/internal/qa/test.go b/e2e/internal/qa/test.go index 699379f3c4..54e1de78dc 100644 --- a/e2e/internal/qa/test.go +++ b/e2e/internal/qa/test.go @@ -87,87 +87,18 @@ func (t *Test) Devices() map[string]*Device { return t.devices } -// DeviceUserType identifies which per-type user slot bucket to check against -// a device's capacity. The onchain device tracks three independent counters — -// unicast, multicast publisher, multicast subscriber — each with its own max. -type DeviceUserType int - -const ( - DeviceUserTypeUnicast DeviceUserType = iota - DeviceUserTypeMulticastPublisher - DeviceUserTypeMulticastSubscriber -) - -func (d DeviceUserType) String() string { - switch d { - case DeviceUserTypeUnicast: - return "unicast" - case DeviceUserTypeMulticastPublisher: - return "multicast_publisher" - case DeviceUserTypeMulticastSubscriber: - return "multicast_subscriber" - default: - return fmt.Sprintf("unknown(%d)", int(d)) - } -} - -// capacityFor returns the (current, max) counters for the requested user type. -func (d *Device) capacityFor(userType DeviceUserType) (current, max int) { - switch userType { - case DeviceUserTypeUnicast: - return d.UnicastUsersCount, d.MaxUnicastUsers - case DeviceUserTypeMulticastPublisher: - return d.MulticastPublishersCount, d.MaxMulticastPublishers - case DeviceUserTypeMulticastSubscriber: - return d.MulticastSubscribersCount, d.MaxMulticastSubscribers - default: - return 0, 0 - } -} - -// ValidDevices returns devices that pass filtering criteria for the given -// user type. A device is considered valid when it has at least minCapacity -// free slots in the aggregate users bucket. The type-specific bucket -// (e.g. unicast) is also checked, but only when its onchain max is non-zero — -// onchain, a per-type max of 0 means the cap is unenforced (see -// smartcontract/programs/doublezero-serviceability/src/processors/user/create_core.rs). -// -// If skipCapacityCheck is true (e.g., when using a QA identity that bypasses -// on-chain capacity checks), devices are not filtered by available capacity. -func (t *Test) ValidDevices(userType DeviceUserType, minCapacity int, skipCapacityCheck bool) []*Device { +// ValidDevices returns all activated devices except those whose code contains +// "test" (typically not real hardware). Capacity is not checked here — the QA +// user pubkey should be on the onchain qa_allowlist so that the smart contract +// bypasses capacity limits for QA connections. +func (t *Test) ValidDevices() []*Device { devices := make([]*Device, 0, len(t.devices)) for _, device := range t.Devices() { - // Skip devices with "test" in the code as these are typically not real hardware if strings.Contains(strings.ToLower(device.Code), "test") { t.log.Debug("Skipping test device", "device", device.Code) continue } - - // Skip capacity check if using QA identity (bypasses on-chain max_users check) - if !skipCapacityCheck { - typeCount, typeMax := device.capacityFor(userType) - // Mirror the onchain semantic: the per-type cap is only enforced - // when max > 0. A max of 0 means "no per-type cap" and we fall - // through to the aggregate check. - if typeMax > 0 && typeMax-typeCount < minCapacity { - t.log.Debug("Skipping device with insufficient type-specific capacity", - "device", device.Code, - "userType", userType, - "count", typeCount, - "max", typeMax, - ) - continue - } - if device.MaxUsers-device.UsersCount < minCapacity { - t.log.Debug("Skipping device with insufficient aggregate capacity", - "device", device.Code, - "users", device.UsersCount, - "maxUsers", device.MaxUsers, - ) - continue - } - } devices = append(devices, device) } diff --git a/e2e/internal/qa/test_test.go b/e2e/internal/qa/test_test.go index 4f8a40123b..7bfa6f9a8a 100644 --- a/e2e/internal/qa/test_test.go +++ b/e2e/internal/qa/test_test.go @@ -8,8 +8,6 @@ import ( "github.com/stretchr/testify/require" ) -// newTestForValidDevices builds a minimal *Test from a slice of devices. -// ValidDevices only depends on t.log and t.devices. func newTestForValidDevices(devices []*Device) *Test { deviceMap := make(map[string]*Device, len(devices)) for _, d := range devices { @@ -21,8 +19,6 @@ func newTestForValidDevices(devices []*Device) *Test { } } -// codesOf returns the sorted codes returned by ValidDevices, for easy -// comparison against expected sets. func codesOf(devices []*Device) []string { out := make([]string, 0, len(devices)) for _, d := range devices { @@ -31,169 +27,49 @@ func codesOf(devices []*Device) []string { return out } -func TestValidDevices_Unicast(t *testing.T) { +func TestValidDevices(t *testing.T) { t.Parallel() - const minCapacity = 2 - - tests := []struct { - name string - devices []*Device - skipCapacityCheck bool - want []string - }{ - { - name: "per-type max set with free slots is included", - devices: []*Device{ - {Code: "alpha", MaxUsers: 96, UsersCount: 4, MaxUnicastUsers: 48, UnicastUsersCount: 4}, - }, - want: []string{"alpha"}, - }, - { - name: "per-type max set and saturated is excluded (preserves #3563 fix)", - devices: []*Device{ - {Code: "nyc002-dz002", MaxUsers: 96, UsersCount: 29, MaxUnicastUsers: 29, UnicastUsersCount: 29}, - }, - want: []string{}, - }, - { - name: "per-type max set with fewer than minCapacity free slots is excluded", - devices: []*Device{ - {Code: "bravo", MaxUsers: 96, UsersCount: 47, MaxUnicastUsers: 48, UnicastUsersCount: 47}, - }, - want: []string{}, - }, - { - name: "per-type max zero with users counted is included (regression fix)", - devices: []*Device{ - {Code: "frankfurt-edge", MaxUsers: 96, UsersCount: 12, MaxUnicastUsers: 0, UnicastUsersCount: 12}, - }, - want: []string{"frankfurt-edge"}, - }, - { - name: "per-type max zero with aggregate cap saturated is excluded", - devices: []*Device{ - {Code: "full", MaxUsers: 5, UsersCount: 4, MaxUnicastUsers: 0, UnicastUsersCount: 4}, - }, - want: []string{}, - }, - { - name: "device with test in code is excluded", - devices: []*Device{ - {Code: "lab-test-1", MaxUsers: 96, UsersCount: 0, MaxUnicastUsers: 48, UnicastUsersCount: 0}, - }, - want: []string{}, - }, - { - name: "skipCapacityCheck includes saturated and zero-max devices", - devices: []*Device{ - {Code: "alpha", MaxUsers: 96, UsersCount: 4, MaxUnicastUsers: 48, UnicastUsersCount: 4}, - {Code: "nyc002-dz002", MaxUsers: 96, UsersCount: 29, MaxUnicastUsers: 29, UnicastUsersCount: 29}, - {Code: "frankfurt-edge", MaxUsers: 96, UsersCount: 12, MaxUnicastUsers: 0, UnicastUsersCount: 12}, - {Code: "full", MaxUsers: 5, UsersCount: 4, MaxUnicastUsers: 0, UnicastUsersCount: 4}, - }, - skipCapacityCheck: true, - want: []string{"alpha", "frankfurt-edge", "full", "nyc002-dz002"}, - }, - { - name: "mainnet-beta-like mix returns only those with free per-type or unset cap", - devices: []*Device{ - {Code: "allnodes-fra1", MaxUsers: 96, UsersCount: 29, MaxUnicastUsers: 48, UnicastUsersCount: 29}, - {Code: "fra-velia", MaxUsers: 96, UsersCount: 4, MaxUnicastUsers: 48, UnicastUsersCount: 4}, - {Code: "frankry", MaxUsers: 128, UsersCount: 68, MaxUnicastUsers: 96, UnicastUsersCount: 68}, - {Code: "nyc002-dz002", MaxUsers: 96, UsersCount: 29, MaxUnicastUsers: 29, UnicastUsersCount: 29}, - {Code: "amsterdam-edge", MaxUsers: 96, UsersCount: 7, MaxUnicastUsers: 0, UnicastUsersCount: 7}, - {Code: "tokyo-edge", MaxUsers: 96, UsersCount: 2, MaxUnicastUsers: 0, UnicastUsersCount: 2}, - }, - want: []string{"allnodes-fra1", "amsterdam-edge", "fra-velia", "frankry", "tokyo-edge"}, - }, - } - - for _, tc := range tests { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - tt := newTestForValidDevices(tc.devices) - got := codesOf(tt.ValidDevices(DeviceUserTypeUnicast, minCapacity, tc.skipCapacityCheck)) - require.Equal(t, tc.want, got) - }) - } -} - -func TestValidDevices_MulticastPublisher(t *testing.T) { - t.Parallel() - - const minCapacity = 1 - tests := []struct { name string devices []*Device want []string }{ { - name: "per-type max set with free slots is included", + name: "normal device is included", devices: []*Device{ - {Code: "pub-ok", MaxUsers: 96, UsersCount: 0, MaxMulticastPublishers: 4, MulticastPublishersCount: 1}, + {Code: "fra-velia", MaxUsers: 96, UsersCount: 4}, }, - want: []string{"pub-ok"}, + want: []string{"fra-velia"}, }, { - name: "per-type max set and saturated is excluded", + name: "device with test in code is excluded", devices: []*Device{ - {Code: "pub-full", MaxUsers: 96, UsersCount: 0, MaxMulticastPublishers: 1, MulticastPublishersCount: 1}, + {Code: "lab-test-1", MaxUsers: 96, UsersCount: 0}, }, want: []string{}, }, { - name: "per-type max zero with publishers counted is included (regression fix)", - devices: []*Device{ - {Code: "pub-uncapped", MaxUsers: 96, UsersCount: 3, MaxMulticastPublishers: 0, MulticastPublishersCount: 3}, - }, - want: []string{"pub-uncapped"}, - }, - } - - for _, tc := range tests { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - tt := newTestForValidDevices(tc.devices) - got := codesOf(tt.ValidDevices(DeviceUserTypeMulticastPublisher, minCapacity, false)) - require.Equal(t, tc.want, got) - }) - } -} - -func TestValidDevices_MulticastSubscriber(t *testing.T) { - t.Parallel() - - const minCapacity = 1 - - tests := []struct { - name string - devices []*Device - want []string - }{ - { - name: "per-type max set with free slots is included", + name: "device with TEST in code (case insensitive) is excluded", devices: []*Device{ - {Code: "sub-ok", MaxUsers: 96, UsersCount: 0, MaxMulticastSubscribers: 8, MulticastSubscribersCount: 2}, + {Code: "NYC-TEST-DZ001", MaxUsers: 96, UsersCount: 0}, }, - want: []string{"sub-ok"}, + want: []string{}, }, { - name: "per-type max set and saturated is excluded", + name: "mix of real and test devices returns only real ones sorted", devices: []*Device{ - {Code: "sub-full", MaxUsers: 96, UsersCount: 0, MaxMulticastSubscribers: 2, MulticastSubscribersCount: 2}, + {Code: "tokyo-edge"}, + {Code: "test-device-1"}, + {Code: "amsterdam-edge"}, + {Code: "fra-test-01"}, }, - want: []string{}, + want: []string{"amsterdam-edge", "tokyo-edge"}, }, { - name: "per-type max zero with subscribers counted is included (regression fix)", - devices: []*Device{ - {Code: "sub-uncapped", MaxUsers: 96, UsersCount: 5, MaxMulticastSubscribers: 0, MulticastSubscribersCount: 5}, - }, - want: []string{"sub-uncapped"}, + name: "empty device list returns empty", + devices: []*Device{}, + want: []string{}, }, } @@ -202,7 +78,7 @@ func TestValidDevices_MulticastSubscriber(t *testing.T) { t.Run(tc.name, func(t *testing.T) { t.Parallel() tt := newTestForValidDevices(tc.devices) - got := codesOf(tt.ValidDevices(DeviceUserTypeMulticastSubscriber, minCapacity, false)) + got := codesOf(tt.ValidDevices()) require.Equal(t, tc.want, got) }) } diff --git a/e2e/qa_alldevices_unicast_test.go b/e2e/qa_alldevices_unicast_test.go index de15f548c1..53d6bd9103 100644 --- a/e2e/qa_alldevices_unicast_test.go +++ b/e2e/qa_alldevices_unicast_test.go @@ -24,7 +24,6 @@ import ( var ( devicesFlag = flag.String("devices", "", "comma separated list of devices to run tests against") allocateAddrHosts = flag.String("allocate-addr-hosts", "", "comma separated list of hosts that will have `--allocate-addr` passed to `doublezero connect ibrl`") - skipCapacityCheckFlag = flag.Bool("skip-capacity-check", false, "skip device capacity checks (use when running with QA identity that bypasses on-chain max_users)") ) func TestQA_AllDevices_UnicastConnectivity(t *testing.T) { @@ -53,11 +52,9 @@ func TestQA_AllDevices_UnicastConnectivity(t *testing.T) { clients := test.Clients() require.GreaterOrEqual(t, len(clients), 2, "At least 2 clients are required for connectivity testing") - // Filter devices to only include those with sufficient unicast capacity and skip test devices - // When using a QA identity (--skip-capacity-check), all devices are included regardless of capacity - devices := test.ValidDevices(qa.DeviceUserTypeUnicast, 2, *skipCapacityCheckFlag) + devices := test.ValidDevices() if len(devices) == 0 { - t.Skip("No valid devices found with sufficient capacity") + t.Skip("No valid devices found") } // Filter out transit devices - they don't participate in unicast connectivity tests From 6830507b6e1562e46bc034424afdfdc7cf80576f Mon Sep 17 00:00:00 2001 From: Nik Weidenbacher Date: Thu, 21 May 2026 22:52:21 +0000 Subject: [PATCH 3/6] e2e/qa: remove test_test.go and update changelog --- CHANGELOG.md | 1 + e2e/internal/qa/test_test.go | 85 ------------------------------------ 2 files changed, 1 insertion(+), 85 deletions(-) delete mode 100644 e2e/internal/qa/test_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 75e0f25ffc..e4bf3ccf16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. ### Changes + - Smartcontract - Deprecate the 13 contributor-side program instructions whose only client was the now-deleted activator: `ActivateDevice` (21), `RejectDevice` (22), `CloseAccountDevice` (27), `ActivateLink` (29), `RejectLink` (30), `CloseAccountLink` (35), `ActivateMulticastGroup` (47), `RejectMulticastGroup` (48), `DeactivateMulticastGroup` (53), `ActivateDeviceInterface` (72), `RemoveDeviceInterface` (75), `UnlinkDeviceInterface` (77), and `RejectDeviceInterface` (78). Dispatch arms now short-circuit to `DoubleZeroError::Deprecated` (custom code 67); processor files and argument structs are removed. Borsh variant tags are preserved (unit variants) so the wire format is unchanged — old clients receive a deterministic deprecation error rather than an unknown-instruction decode failure. Bumps `MIN_COMPATIBLE_VERSION` to `0.15.0` (the `client/v0.14.1` git tag was a patch release built from a commit whose workspace Cargo version was still `0.14.0`, so the v0.14.1 binary self-reports as 0.14.0 in its startup version check; v0.15.0 is the first release whose embedded version actually satisfies the intended ≥ 0.14.1 gate). Gated on onchain `ProgramConfig.min_compatible_version ≥ 0.15.0` ([#3623](https://github.com/malbeclabs/doublezero/issues/3623)) - Deprecate the `ActivateUser`, `RejectUser`, `CloseAccountUser`, and `BanUser` user-lifecycle program instructions: dispatch arms now return `DoubleZeroError::Deprecated` (custom code 67), and the processor files / argument structs are removed. Borsh variant tags 37/38/43/45 are preserved so the wire format is unchanged. The activator was the only client of all four — `CreateUser` has been atomic-to-`Activated` since RFC-11, `closeaccount` was activator-driven only, and `RequestBanUser` is now atomic. Gated on onchain `min_compatible_version ≥ 0.12.0` ([#3622](https://github.com/malbeclabs/doublezero/issues/3622)) diff --git a/e2e/internal/qa/test_test.go b/e2e/internal/qa/test_test.go deleted file mode 100644 index 7bfa6f9a8a..0000000000 --- a/e2e/internal/qa/test_test.go +++ /dev/null @@ -1,85 +0,0 @@ -package qa - -import ( - "io" - "log/slog" - "testing" - - "github.com/stretchr/testify/require" -) - -func newTestForValidDevices(devices []*Device) *Test { - deviceMap := make(map[string]*Device, len(devices)) - for _, d := range devices { - deviceMap[d.Code] = d - } - return &Test{ - log: slog.New(slog.NewTextHandler(io.Discard, nil)), - devices: deviceMap, - } -} - -func codesOf(devices []*Device) []string { - out := make([]string, 0, len(devices)) - for _, d := range devices { - out = append(out, d.Code) - } - return out -} - -func TestValidDevices(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - devices []*Device - want []string - }{ - { - name: "normal device is included", - devices: []*Device{ - {Code: "fra-velia", MaxUsers: 96, UsersCount: 4}, - }, - want: []string{"fra-velia"}, - }, - { - name: "device with test in code is excluded", - devices: []*Device{ - {Code: "lab-test-1", MaxUsers: 96, UsersCount: 0}, - }, - want: []string{}, - }, - { - name: "device with TEST in code (case insensitive) is excluded", - devices: []*Device{ - {Code: "NYC-TEST-DZ001", MaxUsers: 96, UsersCount: 0}, - }, - want: []string{}, - }, - { - name: "mix of real and test devices returns only real ones sorted", - devices: []*Device{ - {Code: "tokyo-edge"}, - {Code: "test-device-1"}, - {Code: "amsterdam-edge"}, - {Code: "fra-test-01"}, - }, - want: []string{"amsterdam-edge", "tokyo-edge"}, - }, - { - name: "empty device list returns empty", - devices: []*Device{}, - want: []string{}, - }, - } - - for _, tc := range tests { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - tt := newTestForValidDevices(tc.devices) - got := codesOf(tt.ValidDevices()) - require.Equal(t, tc.want, got) - }) - } -} From 73c53c7128d5139b421f102dd8ce4cdd4f7ef4a7 Mon Sep 17 00:00:00 2001 From: Nik Weidenbacher Date: Fri, 22 May 2026 16:52:27 +0000 Subject: [PATCH 4/6] e2e/qa: use threshold-based failure for alldevices test Replace per-device t.Errorf/assert.NoError with t.Logf so individual device failures are logged but do not fail the test. Instead, evaluate overall and per-host failure rates after all batches complete, and only fail the test if either rate exceeds --failure-threshold (default 20%). --- e2e/qa_alldevices_unicast_test.go | 73 ++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 6 deletions(-) diff --git a/e2e/qa_alldevices_unicast_test.go b/e2e/qa_alldevices_unicast_test.go index 53d6bd9103..934c77a6d4 100644 --- a/e2e/qa_alldevices_unicast_test.go +++ b/e2e/qa_alldevices_unicast_test.go @@ -22,8 +22,9 @@ import ( ) var ( - devicesFlag = flag.String("devices", "", "comma separated list of devices to run tests against") - allocateAddrHosts = flag.String("allocate-addr-hosts", "", "comma separated list of hosts that will have `--allocate-addr` passed to `doublezero connect ibrl`") + devicesFlag = flag.String("devices", "", "comma separated list of devices to run tests against") + allocateAddrHosts = flag.String("allocate-addr-hosts", "", "comma separated list of hosts that will have `--allocate-addr` passed to `doublezero connect ibrl`") + failureThreshold = flag.Float64("failure-threshold", 0.2, "maximum allowed failure rate (0.0-1.0) before the test is marked as failed, applied both globally and per-host") ) func TestQA_AllDevices_UnicastConnectivity(t *testing.T) { @@ -206,6 +207,65 @@ func TestQA_AllDevices_UnicastConnectivity(t *testing.T) { } log.Debug("Test summary", "packetsReceived", totalReceived, "packetsSent", totalSent, "batchesWithLoss", batchesWithLoss, "totalBatches", batchCount) + // Evaluate failure rates against threshold + totalDevices := len(deviceResults) + failedDevices := 0 + var failedDeviceCodes []string + for code, result := range deviceResults { + if !result.Success { + failedDevices++ + failedDeviceCodes = append(failedDeviceCodes, code) + } + } + + overallRate := float64(failedDevices) / float64(totalDevices) + log.Debug("Overall failure rate", + "failed", failedDevices, + "total", totalDevices, + "rate", fmt.Sprintf("%.1f%%", overallRate*100), + "threshold", fmt.Sprintf("%.1f%%", *failureThreshold*100), + ) + if overallRate > *failureThreshold { + slices.Sort(failedDeviceCodes) + t.Errorf("Overall device failure rate %.1f%% (%d/%d) exceeds threshold %.1f%%. Failed devices: %s", + overallRate*100, failedDevices, totalDevices, *failureThreshold*100, + strings.Join(failedDeviceCodes, ", ")) + } + + type hostStats struct { + total int + failed int + failedDevices []string + } + perHost := make(map[string]*hostStats) + for _, batch := range batchData { + for host, assignment := range batch { + if perHost[host] == nil { + perHost[host] = &hostStats{} + } + perHost[host].total++ + if !assignment.Success() { + perHost[host].failed++ + perHost[host].failedDevices = append(perHost[host].failedDevices, assignment.Device.Code) + } + } + } + for host, stats := range perHost { + hostRate := float64(stats.failed) / float64(stats.total) + log.Debug("Per-host failure rate", + "host", host, + "failed", stats.failed, + "total", stats.total, + "rate", fmt.Sprintf("%.1f%%", hostRate*100), + ) + if hostRate > *failureThreshold { + slices.Sort(stats.failedDevices) + t.Errorf("Host %s failure rate %.1f%% (%d/%d) exceeds threshold %.1f%%. Failed devices: %s", + host, hostRate*100, stats.failed, stats.total, *failureThreshold*100, + strings.Join(stats.failedDevices, ", ")) + } + } + results := make([]qa.DeviceTestResult, 0, len(deviceResults)) for _, result := range deviceResults { results = append(results, *result) @@ -312,7 +372,7 @@ func connectClientsAndWaitForRoutes( log.Error("Failed to start connection", "client", c.Host, "device", device.Code, "error", err) batch[c.Host].FailedTests++ if device.Status == serviceability.DeviceStatusActivated && device.MaxUsers > 0 { - t.Errorf("failed to connect client %s to device %s: %v", c.Host, device.Code, err) + t.Logf("DEVICE FAILURE: failed to connect client %s to device %s: %v", c.Host, device.Code, err) } else { log.Warn("Ignoring connection failure for device not ready for users", "device", device.Code, "status", device.Status, "maxUsers", device.MaxUsers) } @@ -327,7 +387,7 @@ func connectClientsAndWaitForRoutes( log.Error("Client failed to reach status up", "client", c.Host, "error", err) batch[c.Host].FailedTests++ if device.Status == serviceability.DeviceStatusActivated && device.MaxUsers > 0 { - t.Errorf("failed to wait for status for client %s: %v", c.Host, err) + t.Logf("DEVICE FAILURE: failed to wait for status for client %s: %v", c.Host, err) } else { log.Warn("Ignoring status failure for device not ready for users", "device", device.Code, "status", device.Status, "maxUsers", device.MaxUsers) } @@ -362,7 +422,7 @@ func connectClientsAndWaitForRoutes( log.Error("Failed to wait for routes", "client", c.Host, "error", err) batch[c.Host].FailedTests++ if device.Status == serviceability.DeviceStatusActivated && device.MaxUsers > 0 { - t.Errorf("failed to wait for routes on client %s: %v", c.Host, err) + t.Logf("DEVICE FAILURE: failed to wait for routes on client %s: %v", c.Host, err) } else { log.Warn("Ignoring route failure for device not ready for users", "device", device.Code, "status", device.Status, "maxUsers", device.MaxUsers) } @@ -414,7 +474,8 @@ func runConnectivitySubtests( srcReady := srcDevice.Status == serviceability.DeviceStatusActivated && srcDevice.MaxUsers > 0 dstReady := dstDevice.Status == serviceability.DeviceStatusActivated && dstDevice.MaxUsers > 0 if srcReady && dstReady { - assert.NoError(t, err, "failed to test connectivity") + t.Logf("DEVICE FAILURE: connectivity test failed from %s to %s (device %s -> %s): %v", + src.Host, target.Host, srcDevice.Code, dstDevice.Code, err) } else { log.Warn("Ignoring connectivity failure involving device not ready for users", "sourceDevice", srcDevice.Code, "sourceStatus", srcDevice.Status, "sourceMaxUsers", srcDevice.MaxUsers, From a159dda61b7272ad27bd867d47fac0c10723d51e Mon Sep 17 00:00:00 2001 From: Nik Weidenbacher Date: Fri, 22 May 2026 16:58:53 +0000 Subject: [PATCH 5/6] e2e/qa: update changelog with threshold-based failure --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4bf3ccf16..c25cddc766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file. ## Unreleased +- e2e/qa: remove client-side capacity pre-filtering from `ValidDevices`, because the QA user pubkey bypasses capacity limits using the serviceability global-config qa-allowlist. Individual device failures no longer fail the test; instead, overall and per-host failure rates are evaluated after all batches and the test only fails if either exceeds `--failure-threshold` (default 10%) or `--per-host-failure-threshold` (default 20%). + ## [v0.24.0](https://github.com/malbeclabs/doublezero/compare/client/v0.23.0...client/v0.24.0) - 2026-05-22 ### Breaking From e10e14bf202b3856e91ba71aba5ef271d99ce426 Mon Sep 17 00:00:00 2001 From: Nik Weidenbacher Date: Fri, 22 May 2026 17:01:03 +0000 Subject: [PATCH 6/6] e2e/qa: split failure threshold into overall (10%) and per-host (20%) --- e2e/qa_alldevices_unicast_test.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/e2e/qa_alldevices_unicast_test.go b/e2e/qa_alldevices_unicast_test.go index 934c77a6d4..95ebb8a39f 100644 --- a/e2e/qa_alldevices_unicast_test.go +++ b/e2e/qa_alldevices_unicast_test.go @@ -24,7 +24,8 @@ import ( var ( devicesFlag = flag.String("devices", "", "comma separated list of devices to run tests against") allocateAddrHosts = flag.String("allocate-addr-hosts", "", "comma separated list of hosts that will have `--allocate-addr` passed to `doublezero connect ibrl`") - failureThreshold = flag.Float64("failure-threshold", 0.2, "maximum allowed failure rate (0.0-1.0) before the test is marked as failed, applied both globally and per-host") + failureThreshold = flag.Float64("failure-threshold", 0.1, "maximum allowed overall device failure rate (0.0-1.0) before the test is marked as failed") + perHostFailureThreshold = flag.Float64("per-host-failure-threshold", 0.2, "maximum allowed per-host device failure rate (0.0-1.0) before the test is marked as failed") ) func TestQA_AllDevices_UnicastConnectivity(t *testing.T) { @@ -258,10 +259,10 @@ func TestQA_AllDevices_UnicastConnectivity(t *testing.T) { "total", stats.total, "rate", fmt.Sprintf("%.1f%%", hostRate*100), ) - if hostRate > *failureThreshold { + if hostRate > *perHostFailureThreshold { slices.Sort(stats.failedDevices) t.Errorf("Host %s failure rate %.1f%% (%d/%d) exceeds threshold %.1f%%. Failed devices: %s", - host, hostRate*100, stats.failed, stats.total, *failureThreshold*100, + host, hostRate*100, stats.failed, stats.total, *perHostFailureThreshold*100, strings.Join(stats.failedDevices, ", ")) } }