From 6f0a97f04ff03bcc240c0dd0e6d536a2920bfbd1 Mon Sep 17 00:00:00 2001 From: Ramine Agoune Date: Sun, 28 Jun 2026 12:50:50 +0200 Subject: [PATCH] perf: cache archetype transitions to avoid the per-operation linear scan Each archetype now caches its add/remove edges to neighbour archetypes. A single-component transition (Add/RemoveComponent, Add/RemoveTag) resolves the destination archetype via an O(1) edge lookup instead of rebuilding an id slice and scanning every archetype; the scan remains as a fallback to populate the edge and for multi-component operations. This also fixes a latent corruption: removeComponent and RemoveTag held a *archetype taken before getArchetypeForComponentsIds, which can create a new archetype and reallocate world.archetypes (past the 1024 preallocated cap), leaving the pointer stale. The destination is now resolved first and both archetypes are re-fetched by index. --- README.md | 37 +++++++++--------- archetype.go | 61 ++++++++++++++++++++++++++++++ archetype_test.go | 95 +++++++++++++++++++++++++++++++++++++++++++++++ component.go | 15 +++----- tag.go | 11 ++---- world.go | 8 ++++ 6 files changed, 193 insertions(+), 34 deletions(-) create mode 100644 archetype_test.go diff --git a/README.md b/README.md index 1cb285b..820450f 100644 --- a/README.md +++ b/README.md @@ -259,8 +259,8 @@ func (scene *Scene) SearchEntity(name string) volt.EntityId { Few ECS tools exist for Go. Arche and unitoftime/ecs are probably the most looked at, and the most optimized. In the benchmark folder, this module is compared to both of them. -- Go - v1.25.3 -- Volt - v1.7.0 +- Go - v1.26.4 +- Volt - v1.9.0 - [Arche - v0.15.3](https://github.com/mlange-42/arche) - [UECS - v0.0.3](https://github.com/unitoftime/ecs) @@ -271,27 +271,30 @@ goarch: amd64 pkg: benchmark cpu: AMD Ryzen 7 5800X 8-Core Processor +Values are the median over 6 runs: + | Benchmark | Iterations | ns/op | B/op | Allocs/op | |--------------------------------------------------|-------------|-----------|------------|-----------| -| BenchmarkCreateEntityArche-16 | 171 | 7138387 | 11096954 | 61 | -| BenchmarkIterateArche-16 | 2798 | 429744 | 354 | 4 | -| BenchmarkAddArche-16 | 253 | 4673362 | 122153 | 100000 | -| BenchmarkRemoveArche-16 | 247 | 4840772 | 100000 | 100000 | -| BenchmarkCreateEntityUECS-16 | 27 | 38852089 | 49119503 | 200146 | -| BenchmarkIterateUECS-16 | 4892 | 235333 | 128 | 3 | -| BenchmarkAddUECS-16 | 28 | 38982533 | 4721942 | 100005 | -| BenchmarkRemoveUECS-16 | 30 | 40290316 | 3336712 | 100000 | -| BenchmarkCreateEntityVolt-16 | 63 | 18836136 | 35181458 | 100101 | -| BenchmarkIterateVolt-16 | 3619 | 337764 | 256 | 8 | -| (DEPRECATED) BenchmarkIterateConcurrentlyVolt-16 | 9164 | 121653 | 3324 | 91 | -| BenchmarkTaskVolt-16 | 9859 | 119525 | 1847 | 38 | -| BenchmarkAddVolt-16 | 103 | 11379690 | 4313182 | 300000 | -| BenchmarkRemoveVolt-16 | 146 | 7647252 | 400001 | 100000 | +| BenchmarkCreateEntityArche-16 | 152 | 7906676 | 11096815 | 61 | +| BenchmarkIterateArche-16 | 3484 | 337794 | 354 | 4 | +| BenchmarkAddArche-16 | 286 | 4177088 | 119632 | 100000 | +| BenchmarkRemoveArche-16 | 248 | 4794950 | 100000 | 100000 | +| BenchmarkCreateEntityUECS-16 | 32 | 35455745 | 49119512 | 200146 | +| BenchmarkIterateUECS-16 | 5035 | 237613 | 128 | 3 | +| BenchmarkAddUECS-16 | 34 | 31213636 | 4437536 | 100004 | +| BenchmarkRemoveUECS-16 | 38 | 29573272 | 3309389 | 100000 | +| BenchmarkCreateEntityVolt-16 | 70 | 15858217 | 35197857 | 100101 | +| BenchmarkIterateVolt-16 | 3900 | 302282 | 144 | 5 | +| (DEPRECATED) BenchmarkIterateConcurrentlyVolt-16 | 11877 | 100236 | 3332 | 94 | +| BenchmarkTaskVolt-16 | 12320 | 97474 | 1856 | 39 | +| BenchmarkAddVolt-16 | 121 | 9782019 | 2866598 | 200000 | +| BenchmarkRemoveVolt-16 | 160 | 7447984 | 0 | 0 | These results show a few things: -- Arche is the fastest tool for writes operations. In our game development though we would rather lean towards fastest read operations, because the games loops will read way more often than write. +- Arche is still the fastest tool for raw write operations. In our game development though we would rather lean towards fastest read operations, because the games loops will read way more often than write. - Unitoftime/ecs is the fastest tool for read operations on one thread only, but the writes are currently way slower than Arche and Volt (except on the Create benchmark). - Volt is a good compromise, an in-between: fast enough add/remove operations, and almost as fast as Arche and UECS for reads on one thread. +- Volt's write path is now much lighter on the garbage collector: thanks to the archetype transition graph and the typed storage, removing a component allocates nothing (0 allocs/op) and adding one roughly halved its allocations compared to previous versions. Volt uses the new iterators from go1.23, which in their current implementation are slower than using a function call in the for-loop inside the Query (as done in UECS). This means, if the Go team finds a way to improve the performances from the iterators, we can hope to acheive near performances as UECS. - Thanks to the iterators, Volt provides a simple way to use goroutines for read operations. The data is received through a channel of iterator. diff --git a/archetype.go b/archetype.go index 1c6f273..6cdf304 100644 --- a/archetype.go +++ b/archetype.go @@ -76,6 +76,13 @@ func (world *World) getArchetypesForComponentsIds(componentsIds ...ComponentId) } func (world *World) getNextArchetype(entityRecord entityRecord, componentsIds ...ComponentId) *archetype { + // Fast path: a single-component transition (AddComponent, AddTag, ...) is + // resolved through the archetype graph, avoiding both the linear scan over + // all archetypes and the slice rebuild done below. + if len(componentsIds) == 1 { + return world.archetypeAfterAdd(entityRecord.archetypeId, componentsIds[0]) + } + var archetype *archetype if entityRecord.archetypeId == 0 { archetype = world.getArchetypeForComponentsIds(componentsIds...) @@ -90,3 +97,57 @@ func (world *World) getNextArchetype(entityRecord entityRecord, componentsIds .. return archetype } + +// archetypeAfterAdd returns the archetype obtained by adding componentId to the +// archetype fromId, using (and lazily populating) the cached archetype graph. +func (world *World) archetypeAfterAdd(fromId archetypeId, componentId ComponentId) *archetype { + if destId, ok := world.archetypes[fromId].addEdges[componentId]; ok { + return &world.archetypes[destId] + } + + // Cache miss: compute the destination once. getArchetypeForComponentsIds may + // create a new archetype and reallocate world.archetypes, so we resolve every + // archetype by index afterwards rather than holding a stale pointer. + newType := append(slices.Clone(world.archetypes[fromId].Type), componentId) + destId := world.getArchetypeForComponentsIds(newType...).Id + world.linkArchetypes(fromId, destId, componentId) + + return &world.archetypes[destId] +} + +// archetypeAfterRemove returns the archetype obtained by removing componentId +// from the archetype fromId, using (and lazily populating) the archetype graph. +func (world *World) archetypeAfterRemove(fromId archetypeId, componentId ComponentId) *archetype { + if destId, ok := world.archetypes[fromId].removeEdges[componentId]; ok { + return &world.archetypes[destId] + } + + fromType := world.archetypes[fromId].Type + newType := make(componentsIds, 0, len(fromType)) + for _, c := range fromType { + if c != componentId { + newType = append(newType, c) + } + } + destId := world.getArchetypeForComponentsIds(newType...).Id + // dest --add componentId--> from, and from --remove componentId--> dest. + world.linkArchetypes(destId, fromId, componentId) + + return &world.archetypes[destId] +} + +// linkArchetypes records the bidirectional transition between two archetypes: +// fromId --add componentId--> destId and destId --remove componentId--> fromId. +func (world *World) linkArchetypes(fromId, destId archetypeId, componentId ComponentId) { + from := &world.archetypes[fromId] + if from.addEdges == nil { + from.addEdges = make(map[ComponentId]archetypeId) + } + from.addEdges[componentId] = destId + + dest := &world.archetypes[destId] + if dest.removeEdges == nil { + dest.removeEdges = make(map[ComponentId]archetypeId) + } + dest.removeEdges[componentId] = fromId +} diff --git a/archetype_test.go b/archetype_test.go new file mode 100644 index 0000000..dc4f998 --- /dev/null +++ b/archetype_test.go @@ -0,0 +1,95 @@ +package volt + +import "testing" + +// TestArchetypeGraph_ReallocationSafety drives the number of archetypes +// well past the 1024 preallocated capacity, so world.archetypes reallocates +// several times. It guards against a stale-pointer hazard: add/remove resolve a +// destination archetype (which may grow world.archetypes) while operating on the +// source archetype. Component values and tag membership must survive intact. +func TestArchetypeGraph_ReallocationSafety(t *testing.T) { + const n = 1500 // exceeds the 1024 archetype preallocation + + world := CreateWorld(n) + RegisterComponent[testComponent1](world, &ComponentConfig[testComponent1]{}) + + entities := make([]EntityId, n) + for i := 0; i < n; i++ { + e := world.CreateEntity() + entities[i] = e + + c := testComponent1{} + c.x = i + if err := AddComponent(world, e, c); err != nil { + t.Fatalf("AddComponent: %s", err.Error()) + } + // A distinct tag per entity forces a distinct archetype {c1, tag_i}. + if err := world.AddTag(TAGS_INDICES+TagId(i), e); err != nil { + t.Fatalf("AddTag: %s", err.Error()) + } + } + + // Every component value must have survived the archetype reallocations. + for i, e := range entities { + c := GetComponent[testComponent1](world, e) + if c == nil { + t.Fatalf("entity %d lost its component", e) + } + if c.x != i { + t.Fatalf("entity %d: expected component x=%d, got %d", e, i, c.x) + } + } + + // Removing the component drives ~n more archetype creations (well past 1024) + // exactly while removeComponent holds the source archetype — the hazard. + for _, e := range entities { + if err := RemoveComponent[testComponent1](world, e); err != nil { + t.Fatalf("RemoveComponent: %s", err.Error()) + } + } + + for i, e := range entities { + if world.HasComponents(e, testComponent1Id) { + t.Fatalf("entity %d still owns the component after removal", e) + } + if !world.HasTag(TAGS_INDICES+TagId(i), e) { + t.Fatalf("entity %d lost its tag after component removal", e) + } + } +} + +// TestArchetypeGraph_EdgesAreReused checks that repeated identical transitions +// resolve to the same archetype (the graph stays consistent across many hops), +// by cycling a component on and off and confirming the entity returns to the +// exact same archetype each time. +func TestArchetypeGraph_EdgesAreReused(t *testing.T) { + world := CreateWorld(16) + RegisterComponent[testComponent1](world, &ComponentConfig[testComponent1]{}) + RegisterComponent[testComponent2](world, &ComponentConfig[testComponent2]{}) + + e := world.CreateEntity() + if err := AddComponent(world, e, testComponent2{}); err != nil { + t.Fatalf("%s", err.Error()) + } + base := world.entities[e].archetypeId + var withC1 archetypeId + + for i := 0; i < 5; i++ { + if err := AddComponent(world, e, testComponent1{}); err != nil { + t.Fatalf("add iteration %d: %s", i, err.Error()) + } + with := world.entities[e].archetypeId + if i == 0 { + withC1 = with + } else if with != withC1 { + t.Fatalf("iteration %d: the {c1,c2} archetype id is unstable (%d != %d)", i, with, withC1) + } + + if err := RemoveComponent[testComponent1](world, e); err != nil { + t.Fatalf("remove iteration %d: %s", i, err.Error()) + } + if back := world.entities[e].archetypeId; back != base { + t.Fatalf("iteration %d: entity did not return to base archetype (%d != %d)", i, back, base) + } + } +} diff --git a/component.go b/component.go index dba417a..3499560 100644 --- a/component.go +++ b/component.go @@ -469,17 +469,14 @@ func (world *World) RemoveComponent(entityId EntityId, componentId ComponentId) func removeComponent(world *World, s storage, entityRecord entityRecord, componentId ComponentId) { world.componentRemovedFn(entityRecord.Id, componentId) - oldArchetype := &world.archetypes[entityRecord.archetypeId] + oldArchetypeId := entityRecord.archetypeId + s.moveLastToKey(oldArchetypeId, entityRecord.key) - s.moveLastToKey(oldArchetype.Id, entityRecord.key) + // Resolve the destination archetype through the graph. This may create a new + // archetype and reallocate world.archetypes, so resolve both pointers after. + archetype := world.archetypeAfterRemove(oldArchetypeId, componentId) + oldArchetype := &world.archetypes[oldArchetypeId] - // Move every components to the new one, and set all the records - componentKey := slices.Index(oldArchetype.Type, componentId) - - componentsIds := make([]ComponentId, len(oldArchetype.Type)) - copy(componentsIds, oldArchetype.Type) - componentsIds = append(componentsIds[:componentKey], componentsIds[componentKey+1:]...) - archetype := world.getArchetypeForComponentsIds(componentsIds...) moveComponentsToArchetype(world, entityRecord, oldArchetype, archetype) world.setArchetype(entityRecord, archetype) diff --git a/tag.go b/tag.go index ab1618a..31e5550 100644 --- a/tag.go +++ b/tag.go @@ -2,7 +2,6 @@ package volt import ( "fmt" - "slices" ) const COMPONENTS_INDICES = 0 @@ -63,15 +62,11 @@ func (world *World) RemoveTag(tagId TagId, entityId EntityId) error { return fmt.Errorf("the entity %d doesn't own the tag %d", entityId, tagId) } + // Resolve the destination archetype through the graph. This may create a new + // archetype and reallocate world.archetypes, so resolve both pointers after. + archetype := world.archetypeAfterRemove(entityRecord.archetypeId, tagId) oldArchetype := &world.archetypes[entityRecord.archetypeId] - // Move every components to the new one, and set all the records - componentKey := slices.Index(oldArchetype.Type, tagId) - - componentsIds := make([]ComponentId, len(oldArchetype.Type)) - copy(componentsIds, oldArchetype.Type) - componentsIds = append(componentsIds[:componentKey], componentsIds[componentKey+1:]...) - archetype := world.getArchetypeForComponentsIds(componentsIds...) moveComponentsToArchetype(world, entityRecord, oldArchetype, archetype) world.setArchetype(entityRecord, archetype) diff --git a/world.go b/world.go index f1a244d..e2a8d3b 100644 --- a/world.go +++ b/world.go @@ -24,6 +24,14 @@ type archetype struct { Id archetypeId Type componentsIds entities []EntityId + + // Archetype graph: cached transitions to neighbour archetypes. + // addEdges[c] is the archetype reached by adding component c to this one; + // removeEdges[c] the one reached by removing c. Archetypes are never + // destroyed, so these edges never go stale. They turn the per-operation + // archetype lookup from a linear scan into an O(1) hop after the first time. + addEdges map[ComponentId]archetypeId + removeEdges map[ComponentId]archetypeId } // Container of archetype and key position in storage, for a given EntityId