From 9a3fe67e1d7927bba59a4fb5326f9b9e03c80736 Mon Sep 17 00:00:00 2001 From: David Klement Date: Thu, 29 Jan 2026 16:55:11 +0100 Subject: [PATCH 01/15] GC: Unify documentation of GC flags --- Include/internal/pycore_gc.h | 14 ++- InternalDocs/garbage_collector.md | 30 +++--- Python/gc.c | 149 ++++++++++++++++-------------- 3 files changed, 103 insertions(+), 90 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 2dfce32237a83c3..a74d1d8d90617e8 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -118,20 +118,18 @@ static inline void _PyObject_GC_SET_SHARED(PyObject *op) { /* Bit 1 is set when the object is in generation which is GCed currently. */ #define _PyGC_PREV_MASK_COLLECTING ((uintptr_t)2) -/* Bit 0 in _gc_next is the old space bit. +/* Bit flags for _gc_next */ +/* Bit 0 is the old space bit. + * It describes the generation space the object is in. * It is set as follows: * Young: gcstate->visited_space * old[0]: 0 * old[1]: 1 * permanent: 0 - * - * During a collection all objects handled should have the bit set to - * gcstate->visited_space, as objects are moved from the young gen - * and the increment into old[gcstate->visited_space]. - * When object are moved from the pending space, old[gcstate->visited_space^1] - * into the increment, the old space bit is flipped. */ -#define _PyGC_NEXT_MASK_OLD_SPACE_1 1 +#define _PyGC_NEXT_MASK_OLD_SPACE_1 ((uintptr_t)1) +/* Bit 1 is set when the object is in the unreachable list. */ +#define _PyGC_NEXT_MASK_UNREACHABLE ((uintptr_t)2) #define _PyGC_PREV_SHIFT 2 #define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index a7d872f3ec43921..3aefef2dc86de2b 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -278,7 +278,7 @@ state in the previous image and after examining the objects referred to by `link the GC knows that `link_3` is reachable after all, so it is moved back to the original list and its `gc_ref` field is set to 1 so that if the GC visits it again, it will know that it's reachable. To avoid visiting an object twice, the GC marks all -objects that have already been visited once (by unsetting the `PREV_MASK_COLLECTING` +objects that have already been visited once (by unsetting the `_PyGC_PREV_MASK_COLLECTING` flag) so that if an object that has already been processed is referenced by some other object, the GC does not process it twice. @@ -465,11 +465,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -725,21 +725,27 @@ of `PyGC_Head` discussed in the `Memory layout and object structure`_ section: - The `_gc_prev` field is normally used as the "previous" pointer to maintain the doubly linked list but its lowest two bits are used to keep the flags - `PREV_MASK_COLLECTING` and `_PyGC_PREV_MASK_FINALIZED`. Between collections, + `_PyGC_PREV_MASK_COLLECTING` and `_PyGC_PREV_MASK_FINALIZED`. Between collections, the only flag that can be present is `_PyGC_PREV_MASK_FINALIZED` that indicates if an object has been already finalized. During collections `_gc_prev` is temporarily used for storing a copy of the reference count (`gc_ref`), in addition to two flags, and the GC linked list becomes a singly linked list until `_gc_prev` is restored. -- The `_gc_next` field is used as the "next" pointer to maintain the doubly linked - list but during collection its lowest bit is used to keep the - `NEXT_MASK_UNREACHABLE` flag that indicates if an object is tentatively +- The `_gc_next` field is normally used as the "next" pointer to maintain the + doubly linked list but its lowest two bits are used to keep the flags + `_PyGC_NEXT_MASK_OLD_SPACE_1` and `_PyGC_NEXT_MASK_UNREACHABLE`. + During collection, the `_PyGC_NEXT_MASK_UNREACHABLE` flag indicates if an object is tentatively unreachable during the cycle detection algorithm. This is a drawback to using only doubly linked lists to implement partitions: while most needed operations are constant-time, there is no efficient way to determine which partition an object is currently in. Instead, when that's needed, ad hoc tricks (like the - `NEXT_MASK_UNREACHABLE` flag) are employed. + `_PyGC_NEXT_MASK_UNREACHABLE` flag) are employed. + The `_PyGC_NEXT_MASK_OLD_SPACE_1` flag + indicates whether the object belongs to the pending space or the + visited space. The objects in the pending space are yet to be processed + during future incremental collections. Which space is which is determined + by gcstate->visited_space. Optimization: delayed untracking containers =========================================== diff --git a/Python/gc.c b/Python/gc.c index 42274315c1bd0de..7c009ef7d90880d 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -28,29 +28,89 @@ typedef struct _gc_runtime_state GCState; // #define GC_EXTRA_DEBUG +/* +_gc_prev values +--------------- + +Between collections, _gc_prev is used for doubly linked list. + +Lowest two bits of _gc_prev are used for flags. +_PyGC_PREV_MASK_COLLECTING is used only while collecting +and cleared before GC ends or _PyObject_GC_UNTRACK() is called. + +During a collection, _gc_prev is temporary used for gc_refs, and the gc list +is singly linked until _gc_prev is restored. + +gc_refs + At the start of a collection, update_refs() copies the true refcount + to gc_refs, for each object in the generation being collected. + subtract_refs() then adjusts gc_refs so that it equals the number of + times an object is referenced directly from outside the generation + being collected. + +_PyGC_PREV_MASK_FINALIZED + This bit is set when the object's finalizer (tp_finalize) has been called. + The flag ensures that the finalizer is only called once. + +_PyGC_PREV_MASK_COLLECTING + This bit is set when the object is in generation which is GCed currently. + + update_refs() set this bit for all objects in current generation. + subtract_refs() and move_unreachable() uses this to distinguish + visited object is in GCing or not. + + move_unreachable() removes this flag from reachable objects. + Only unreachable objects have this flag. + + No objects in interpreter have this flag after GC ends. + + +_gc_next values +--------------- + +_gc_next takes these values: + +0 + The object is not tracked + +!= 0 + Pointer to the next object in the GC list. + Additionally, lowest two bits are used for flags as described below. + +_PyGC_NEXT_MASK_OLD_SPACE_1 + This bit is the old space bit. + It describes the generation space the object is in. + It is set as follows: + * Young: gcstate->visited_space + * old[0]: 0 + * old[1]: 1 + * permanent: 0 + + old[gcstate->visited_space] is the visited space, + old[1-gcstate->visited_space] is the pending space. + The objects in the pending space are yet to be processed + during future incremental collections. + + During a collection all objects handled should have the bit set to + gcstate->visited_space, as the objects are moved into the visited space. + +_PyGC_NEXT_MASK_UNREACHABLE + This flag represents the object is in the unreachable list + in move_unreachable(). + When the object is moved back to the reachable set, the bit is cleared. + + Although this flag is used only in move_unreachable(), move_unreachable() + doesn't clear this flag to skip unnecessary iteration. + move_legacy_finalizers() removes this flag instead. + Between them, unreachable list is not normal list and we can not use + most gc_list_* functions for it. +*/ + #define GC_NEXT _PyGCHead_NEXT #define GC_PREV _PyGCHead_PREV -// update_refs() set this bit for all objects in current generation. -// subtract_refs() and move_unreachable() uses this to distinguish -// visited object is in GCing or not. -// -// move_unreachable() removes this flag from reachable objects. -// Only unreachable objects have this flag. -// -// No objects in interpreter have this flag after GC ends. #define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING - -// Lowest bit of _gc_next is used for UNREACHABLE flag. -// -// This flag represents the object is in unreachable list in move_unreachable() -// -// Although this flag is used only in move_unreachable(), move_unreachable() -// doesn't clear this flag to skip unnecessary iteration. -// move_legacy_finalizers() removes this flag instead. -// Between them, unreachable list is not normal list and we can not use -// most gc_list_* functions for it. -#define NEXT_MASK_UNREACHABLE 2 +#define NEXT_MASK_UNREACHABLE _PyGC_NEXT_MASK_UNREACHABLE #define AS_GC(op) _Py_AS_GC(op) #define FROM_GC(gc) _Py_FROM_GC(gc) @@ -192,57 +252,6 @@ _PyGC_Init(PyInterpreterState *interp) } -/* -_gc_prev values ---------------- - -Between collections, _gc_prev is used for doubly linked list. - -Lowest two bits of _gc_prev are used for flags. -PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends -or _PyObject_GC_UNTRACK() is called. - -During a collection, _gc_prev is temporary used for gc_refs, and the gc list -is singly linked until _gc_prev is restored. - -gc_refs - At the start of a collection, update_refs() copies the true refcount - to gc_refs, for each object in the generation being collected. - subtract_refs() then adjusts gc_refs so that it equals the number of - times an object is referenced directly from outside the generation - being collected. - -PREV_MASK_COLLECTING - Objects in generation being collected are marked PREV_MASK_COLLECTING in - update_refs(). - - -_gc_next values ---------------- - -_gc_next takes these values: - -0 - The object is not tracked - -!= 0 - Pointer to the next object in the GC list. - Additionally, lowest bit is used temporary for - NEXT_MASK_UNREACHABLE flag described below. - -NEXT_MASK_UNREACHABLE - move_unreachable() then moves objects not reachable (whether directly or - indirectly) from outside the generation into an "unreachable" set and - set this flag. - - Objects that are found to be reachable have gc_refs set to 1. - When this flag is set for the reachable object, the object must be in - "unreachable" set. - The flag is unset and the object is moved back to "reachable" set. - - move_legacy_finalizers() will remove this flag from "unreachable" set. -*/ - /*** list functions ***/ static inline void From dd31edcaa5188ee2c331c7848c975dd2dacf573e Mon Sep 17 00:00:00 2001 From: David Klement Date: Mon, 2 Feb 2026 10:57:30 +0100 Subject: [PATCH 02/15] GC: Remove outdated comment --- Python/gc.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 7c009ef7d90880d..f10b1134425da4b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1247,9 +1247,6 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate, * The "base" has to be a valid list with no mask set. - * The "unreachable" list must be uninitialized (this function calls - gc_list_init over 'unreachable'). - IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE flag set but it does not clear it to skip unnecessary iteration. Before the flag is cleared (for example, by using 'clear_unreachable_mask' function or @@ -1313,9 +1310,6 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { * After this function 'unreachable' must not be used anymore and 'still_unreachable' will contain the objects that did not resurrect. - * The "still_unreachable" list must be uninitialized (this function calls - gc_list_init over 'still_unreachable'). - IMPORTANT: After a call to this function, the 'still_unreachable' set will have the PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so we can skip the expense of clearing the flag to avoid extra iteration. */ From fc4f3dba9ace65c69765d31e3b4ff934956f7fb7 Mon Sep 17 00:00:00 2001 From: David Klement Date: Tue, 3 Feb 2026 16:05:04 +0100 Subject: [PATCH 03/15] GC: Rename functions to not clash with regions --- Python/gc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index f10b1134425da4b..f21a2a25883cbed 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1333,7 +1333,7 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, } static void -gc_collect_region(PyThreadState *tstate, +gc_collect_chunk(PyThreadState *tstate, PyGC_Head *from, PyGC_Head *to, struct gc_collection_stats *stats); @@ -1391,7 +1391,7 @@ gc_collect_young(PyThreadState *tstate, PyGC_Head survivors; gc_list_init(&survivors); gc_list_set_space(young, gcstate->visited_space); - gc_collect_region(tstate, young, &survivors, stats); + gc_collect_chunk(tstate, young, &survivors, stats); gc_list_merge(&survivors, visited); validate_spaces(gcstate); gcstate->young.count = 0; @@ -1436,7 +1436,7 @@ visit_add_to_container(PyObject *op, void *arg) } static intptr_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) +expand_chunk_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) { struct container_and_flag arg = { .container = container, @@ -1701,14 +1701,14 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) increment_size++; assert(!_Py_IsImmortal(FROM_GC(gc)) && PyRegion_IsLocal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); - increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); + increment_size += expand_chunk_transitively_reachable(&increment, gc, gcstate); } GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); - gc_collect_region(tstate, &increment, &survivors, stats); + gc_collect_chunk(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; @@ -1740,7 +1740,7 @@ gc_collect_full(PyThreadState *tstate, gc_list_merge(pending, visited); validate_spaces(gcstate); - gc_collect_region(tstate, visited, visited, + gc_collect_chunk(tstate, visited, visited, stats); validate_spaces(gcstate); gcstate->young.count = 0; @@ -1755,7 +1755,7 @@ gc_collect_full(PyThreadState *tstate, /* This is the main function. Read this to understand how the * collection process works. */ static void -gc_collect_region(PyThreadState *tstate, +gc_collect_chunk(PyThreadState *tstate, PyGC_Head *from, PyGC_Head *to, struct gc_collection_stats *stats) From 2a94c8599431d20ecb28ef989582868fb10493f2 Mon Sep 17 00:00:00 2001 From: David Klement Date: Wed, 4 Feb 2026 13:32:44 +0100 Subject: [PATCH 04/15] Regions GC: Add gc.collect_region --- Include/internal/pycore_gc.h | 1 + .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 ++ Modules/clinic/gcmodule.c.h | 71 ++++++++++++++++++- Modules/gcmodule.c | 29 +++++++- Python/gc.c | 22 ++++++ 8 files changed, 125 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index a74d1d8d90617e8..1e72b3db7d24bb3 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -338,6 +338,7 @@ extern void _PyGC_InitState(struct _gc_runtime_state *); extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason); extern void _PyGC_CollectNoFail(PyThreadState *tstate); +extern Py_ssize_t _PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason); /* Freeze objects tracked by the GC and ignore them in future collections. */ extern void _PyGC_Freeze(PyInterpreterState *interp); diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index ec00bc656c3998a..c8a3f1bc94120fd 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1666,6 +1666,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(coro)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(count)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(covariant)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cown)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ctx)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cwd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(d_parameter_type)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 06284103e6cac9b..05c6b86eb6c827e 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -389,6 +389,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(coro) STRUCT_FOR_ID(count) STRUCT_FOR_ID(covariant) + STRUCT_FOR_ID(cown) STRUCT_FOR_ID(ctx) STRUCT_FOR_ID(cwd) STRUCT_FOR_ID(d_parameter_type) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index f7fff41bee29ba6..ca0ef07b202e7c2 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1664,6 +1664,7 @@ extern "C" { INIT_ID(coro), \ INIT_ID(count), \ INIT_ID(covariant), \ + INIT_ID(cown), \ INIT_ID(ctx), \ INIT_ID(cwd), \ INIT_ID(d_parameter_type), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 4cf5fae1f71daa4..9e0a8520e2b3a22 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1344,6 +1344,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cown); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(ctx); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Modules/clinic/gcmodule.c.h b/Modules/clinic/gcmodule.c.h index 08275e35413f667..a9abe8bf66f0488 100644 --- a/Modules/clinic/gcmodule.c.h +++ b/Modules/clinic/gcmodule.c.h @@ -77,7 +77,7 @@ PyDoc_STRVAR(gc_collect__doc__, "collect($module, /, generation=2)\n" "--\n" "\n" -"Run the garbage collector.\n" +"Run the garbage collector on the local region.\n" "\n" "With no arguments, run a full collection. The optional argument\n" "may be an integer specifying which generation to collect. A ValueError\n" @@ -150,6 +150,73 @@ gc_collect(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * return return_value; } +PyDoc_STRVAR(gc_collect_region__doc__, +"collect_region($module, /, cown)\n" +"--\n" +"\n" +"Run the garbage collector on a specific region.\n" +"\n" +"The argument should be a released Cown object holding the region to collect.\n" +"\n" +"The number of unreachable objects is returned."); + +#define GC_COLLECT_REGION_METHODDEF \ + {"collect_region", _PyCFunction_CAST(gc_collect_region), METH_FASTCALL|METH_KEYWORDS, gc_collect_region__doc__}, + +static Py_ssize_t +gc_collect_region_impl(PyObject *module, PyObject *cown); + +static PyObject * +gc_collect_region(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(cown), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"cown", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "collect_region", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *cown; + Py_ssize_t _return_value; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + cown = args[0]; + _return_value = gc_collect_region_impl(module, cown); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(gc_set_debug__doc__, "set_debug($module, flags, /)\n" "--\n" @@ -583,4 +650,4 @@ gc_get_freeze_count(PyObject *module, PyObject *Py_UNUSED(ignored)) exit: return return_value; } -/*[clinic end generated code: output=19738854607938db input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bd7ec0973b947c02 input=a9049054013a1b77]*/ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 8a8c728428343ce..e6fcfc375ba54a5 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -70,7 +70,7 @@ gc.collect -> Py_ssize_t generation: int(c_default="NUM_GENERATIONS - 1") = 2 -Run the garbage collector. +Run the garbage collector on the local region. With no arguments, run a full collection. The optional argument may be an integer specifying which generation to collect. A ValueError @@ -81,7 +81,7 @@ The number of unreachable objects is returned. static Py_ssize_t gc_collect_impl(PyObject *module, int generation) -/*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/ +/*[clinic end generated code: output=b697e633043233c7 input=7031efb5f4ff564a]*/ { PyThreadState *tstate = _PyThreadState_GET(); @@ -93,6 +93,27 @@ gc_collect_impl(PyObject *module, int generation) return _PyGC_Collect(tstate, generation, _Py_GC_REASON_MANUAL); } +/*[clinic input] +gc.collect_region -> Py_ssize_t + + cown: object + +Run the garbage collector on a specific region. + +The argument should be a released Cown object holding the region to collect. + +The number of unreachable objects is returned. +[clinic start generated code]*/ + +static Py_ssize_t +gc_collect_region_impl(PyObject *module, PyObject *cown) +/*[clinic end generated code: output=47f1a91aff062e6c input=58d8161ceff6c23a]*/ +{ + PyThreadState *tstate = _PyThreadState_GET(); + return _PyGC_CollectRegion(tstate, cown, _Py_GC_REASON_MANUAL); +} + + /*[clinic input] gc.set_debug @@ -471,7 +492,8 @@ PyDoc_STRVAR(gc__doc__, "enable() -- Enable automatic garbage collection.\n" "disable() -- Disable automatic garbage collection.\n" "isenabled() -- Returns true if automatic collection is enabled.\n" -"collect() -- Do a full collection right now.\n" +"collect() -- Do a full collection on the local region right now.\n" +"collect_region() -- Do a collection on a specific region right now.\n" "get_count() -- Return the current collection counts.\n" "get_stats() -- Return list of dictionaries containing per-generation stats.\n" "set_debug() -- Set debugging flags.\n" @@ -497,6 +519,7 @@ static PyMethodDef GcMethods[] = { GC_SET_THRESHOLD_METHODDEF GC_GET_THRESHOLD_METHODDEF GC_COLLECT_METHODDEF + GC_COLLECT_REGION_METHODDEF GC_GET_OBJECTS_METHODDEF GC_GET_STATS_METHODDEF GC_IS_TRACKED_METHODDEF diff --git a/Python/gc.c b/Python/gc.c index f21a2a25883cbed..9b8caba7fa78c93 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_set_eval_breaker_bit() +#include "pycore_cown.h" // _PyCown_Type #include "pycore_dict.h" // _PyInlineValuesSize() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.gc @@ -2152,6 +2153,27 @@ _PyGC_CollectNoFail(PyThreadState *tstate) _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN); } +Py_ssize_t +_PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason) +{ + if (!Py_IS_TYPE(cown, &_PyCown_Type)) { + PyErr_SetString(PyExc_TypeError, + "the region to collect must be passed as a Cown"); + return 0; + } + Py_region_t region; + int acquire_res = _PyCown_AcquireGC(_PyCownObject_CAST(cown), ®ion); + assert (acquire_res >= 0); + if (acquire_res <= 0) { + // could not acquire the cown, perhaps someone else has it + return 0; + } + // TODO: implement region-based collection + Py_ssize_t result = 42; + _PyCown_ReleaseGC(_PyCownObject_CAST(cown)); + return result; +} + void _PyGC_DumpShutdownStats(PyInterpreterState *interp) { From a8bbe0e8bdee4afd0599df28d89ac007d21ca483 Mon Sep 17 00:00:00 2001 From: David Klement Date: Thu, 5 Feb 2026 09:57:21 +0100 Subject: [PATCH 05/15] Regions GC: Implement basic GC for regions --- Python/gc.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 3 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 9b8caba7fa78c93..f9d5a56f2606beb 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -11,6 +11,8 @@ #include "pycore_interpframe.h" // _PyFrame_GetLocalsArray() #include "pycore_object_alloc.h" // _PyObject_MallocWithType() #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_region.h" // _Py_region_data +#include "pycore_regionobject.h" // _PyRegion_Type #include "pycore_tuple.h" // _PyTuple_MaybeUntrack() #include "pycore_weakref.h" // _PyWeakref_ClearRef() @@ -1847,6 +1849,117 @@ gc_collect_chunk(PyThreadState *tstate, validate_list(to, collecting_clear_unreachable_clear); } +static void +region_list_split(PyGC_Head *list, PyGC_Head *contained) +{ + // Child regions are at the start of the list + PyGC_Head *node = GC_NEXT(list); + while (node != list) { + // Stop looping if this is not a bridge + PyObject *obj = _Py_FROM_GC(node); + if (Py_TYPE(obj) != &_PyRegion_Type) { + break; + } + node = GC_NEXT(node); + } + if (node == list) { + // No contained objects + return; + } + // Splice the contained objects out of the list + PyGC_Head *first_contained = node; + PyGC_Head *last_contained = GC_PREV(list); + _PyGCHead_SET_NEXT(GC_PREV(first_contained), list); + _PyGCHead_SET_PREV(list, GC_PREV(first_contained)); + _PyGCHead_SET_NEXT(contained, first_contained); + _PyGCHead_SET_PREV(first_contained, contained); + _PyGCHead_SET_NEXT(last_contained, contained); + _PyGCHead_SET_PREV(contained, last_contained); + validate_list(list, collecting_clear_unreachable_clear); + validate_list(contained, collecting_clear_unreachable_clear); +} + +static void +gc_collect_region(PyThreadState *tstate, + Py_region_t region, + struct gc_collection_stats *stats) +{ + if (region == _Py_LOCAL_REGION + || region == _Py_IMMUTABLE_REGION + || region == _Py_COWN_REGION) { + return; + } + _Py_region_data *data = (_Py_region_data*)region; + + PyGC_Head *gc; /* initialize to prevent a compiler warning */ + GCState *gcstate = &tstate->interp->gc; + assert(!_PyErr_Occurred(tstate)); + + /* Separate child regions from contained objects. + * Finalizers need them to be in the GC list, at the start of the list. + */ + PyGC_Head contained; + gc_list_init(&contained); + region_list_split(&data->gc_list, &contained); + + PyGC_Head unreachable; /* non-problematic unreachable trash */ + gc_list_init(&unreachable); + deduce_unreachable(&contained, &unreachable); + untrack_tuples(&contained); + + /* Clear NEXT_MASK_UNREACHABLE manually. */ + clear_unreachable_mask(&unreachable); + + /* Print debugging information. */ + if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { + for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { + debug_cycle("collectable", FROM_GC(gc)); + } + } + + /* Invoke weakref callbacks as necessary. */ + stats->collected += handle_weakref_callbacks(&unreachable, &contained); + validate_list(&contained, collecting_clear_unreachable_clear); + validate_list(&unreachable, collecting_set_unreachable_clear); + + /* Call tp_finalize on objects which have one. */ + finalize_garbage(tstate, &unreachable); + /* Handle any objects that may have resurrected after the call + * to 'finalize_garbage' and continue the collection with the + * objects that are still unreachable */ + PyGC_Head final_unreachable; + gc_list_init(&final_unreachable); + handle_resurrected_objects(&unreachable, &final_unreachable, &contained); + + /* Clear weakrefs to objects in the unreachable set. See the comments + * above handle_weakref_callbacks() for details. + */ + clear_weakrefs(&final_unreachable); + + /* Call tp_clear on objects in the final_unreachable set. This will cause + * the reference cycles to be broken. It may also cause some objects + * in finalizers to be freed. + */ + stats->collected += gc_list_size(&final_unreachable); + delete_garbage(tstate, gcstate, &final_unreachable, &contained); + + /* Restore the GC list. Make sure child regions come first. */ + gc_list_merge(&contained, &data->gc_list); + + /* Collect child regions. */ + gc = GC_NEXT(&data->gc_list); + while (gc != &data->gc_list) { + // Stop looping if this is not a bridge + PyObject *obj = _Py_FROM_GC(gc); + if (Py_TYPE(obj) != &_PyRegion_Type) { + break; + } + Py_region_t child = _PyRegion_Get(obj); + gc_collect_region(tstate, child, stats); + gc = GC_NEXT(gc); + } +} + /* Invoke progress callbacks to notify clients that garbage collection * is starting or stopping */ @@ -2153,6 +2266,19 @@ _PyGC_CollectNoFail(PyThreadState *tstate) _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN); } +static const char * +get_region_name(_PyRegionObject* bridge) { + PyObject *name = bridge->name; + if (name == NULL || !PyUnicode_Check(name)) { + return NULL; + } + const char *name_str = PyUnicode_AsUTF8(name); + if (name_str == NULL) { + return NULL; + } + return name_str; +} + Py_ssize_t _PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason) { @@ -2168,10 +2294,25 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason) // could not acquire the cown, perhaps someone else has it return 0; } - // TODO: implement region-based collection - Py_ssize_t result = 42; + + // TODO(regions-gc): gc callback + GCState *gcstate = &tstate->interp->gc; + struct gc_collection_stats stats = { 0 }; + if (gcstate->debug & _PyGC_DEBUG_STATS) { + _PyRegionObject* bridge = ((_Py_region_data*)region)->bridge; + const char *name = get_region_name(bridge); + if (name == NULL) { + PySys_WriteStderr("gc: collecting region at %p\n", bridge); + } + else { + PySys_WriteStderr("gc: collecting region '%s' at %p\n", name, bridge); + } + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + gc_collect_region(tstate, region, &stats); + _PyErr_SetRaisedException(tstate, exc); _PyCown_ReleaseGC(_PyCownObject_CAST(cown)); - return result; + return stats.collected; } void From 620c21c85ea3a950c5845106e643eb349a168643 Mon Sep 17 00:00:00 2001 From: David Klement Date: Wed, 11 Feb 2026 09:50:10 +0100 Subject: [PATCH 06/15] Regions GC: Unit tests --- Lib/test/test_regions/test_gc.py | 119 ++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_regions/test_gc.py b/Lib/test/test_regions/test_gc.py index 40f57600ed31ade..36c96f8a30f2eaa 100644 --- a/Lib/test/test_regions/test_gc.py +++ b/Lib/test/test_regions/test_gc.py @@ -1,34 +1,135 @@ import unittest -from regions import Region -from immutable import freeze + +from regions import Region, Cown import gc -class TestOwnership(unittest.TestCase): +class TestRegionGC(unittest.TestCase): class A: pass + def setUp(self): + gc.collect() # Ensure there are no lingering cycles + def build_cycle(self): - freeze(self.A) a = self.A() a.b = self.A() a.b.a = a return a - def test_owned_cycles_are_ignored(self): + def build_region_with_unreachable_cycle(self): r = Region() + r.a = self.build_cycle() + r.a = None + return r - # Make sure that there are no lingering cycles - gc.collect() + def test_local_gc_ignores_regions(self): + r = Region() # A normal cycle should be collected self.build_cycle() self.assertGreaterEqual(gc.collect(), 2) # A cycle inside a region should be ignored - r.c = self.build_cycle() - r.c = None + r.a = self.build_cycle() + r.a = None self.assertEqual(gc.collect(), 0) # Dissolving a region should allow cycles to be collected again r = None self.assertGreaterEqual(gc.collect(), 2) + + def test_collect_cycle(self): + r = self.build_region_with_unreachable_cycle() + + c = Cown(r) + r = None + c.release() + # The cycle inside the region should be collected + self.assertEqual(gc.collect_region(c), 2) + + def test_collect_cycle_with_backlink(self): + r = Region() + r.a = self.build_cycle() + r.a.r = r + r.a = None + + c = Cown(r) + r = None + c.release() + # The cycle inside the region should be collected + self.assertEqual(gc.collect_region(c), 2) + + def test_collect_child_region(self): + r = Region() + r.child = self.build_region_with_unreachable_cycle() + + c = Cown(r) + r = None + c.release() + # The cycle inside the child region should be collected + self.assertEqual(gc.collect_region(c), 2) + + def test_collect_unreachable_child_region(self): + r = Region() + r.a = self.build_cycle() + r.a.child = self.build_region_with_unreachable_cycle() + r.a = None + + c = Cown(r) + r = None + c.release() + # The cycle inside the parent region should be collected, + # and the child region should be dissolved into the local region, + # allowing the cycle inside it to be collected by the local GC. + # Note that the bridge object is never counted; + # perhaps not ideal, but it would be difficult to implement otherwise. + self.assertEqual(gc.collect_region(c), 2) + self.assertEqual(gc.collect(), 2) + + # FIXME(regions-gc) + @unittest.skip("finalizers currently do not work") + def test_finalizer(self): + class Finalizable: + def __init__(self, data): + self.data = data + + def __del__(self): + self.data["counter"] += 1 + self.data["instance"] = self + + r = Region() + r.data = {"counter": 0, "instance": None} + r.a = self.build_cycle() + r.a.f = Finalizable(r.data) + r.a = None + + c = Cown(r) + r = None + c.release() + # The cycle should be collected; the finalizer should run exactly once + self.assertEqual(gc.collect_region(c), 2) + self.assertEqual(r.data["counter"], 1) + # The finalizer should not run again + r.data["instance"] = None + self.assertEqual(r.data["counter"], 1) + + # TODO(regions-gc): test that region GC is triggered, but not when disabled + # TODO(regions-gc): callbacks + # TODO(regions-gc): weakref + + +def setUpModule(): + global enabled, debug + enabled = gc.isenabled() + debug = gc.get_debug() + gc.disable() + gc.set_debug(debug & ~gc.DEBUG_LEAK) + + +def tearDownModule(): + gc.set_debug(debug) + gc.enable() if enabled else gc.disable() + + +if __name__ == "__main__": + unittest.main() From 0bfb472b274380684cc9cc077913bf0fc74e3a01 Mon Sep 17 00:00:00 2001 From: David Klement Date: Wed, 18 Feb 2026 12:12:26 +0100 Subject: [PATCH 07/15] Regions GC: Cown switching --- Lib/test/test_regions/test_gc.py | 72 ++++++++++++++++++++++++++----- Python/gc.c | 73 +++++++++++++++++++++++++++++--- 2 files changed, 129 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_regions/test_gc.py b/Lib/test/test_regions/test_gc.py index 36c96f8a30f2eaa..5749aacc02f4b62 100644 --- a/Lib/test/test_regions/test_gc.py +++ b/Lib/test/test_regions/test_gc.py @@ -8,7 +8,9 @@ class A: pass def setUp(self): - gc.collect() # Ensure there are no lingering cycles + # Need to run collection multiple times to clean up region chains + while gc.collect() > 0: + pass def build_cycle(self): a = self.A() @@ -27,7 +29,8 @@ def test_local_gc_ignores_regions(self): # A normal cycle should be collected self.build_cycle() - self.assertGreaterEqual(gc.collect(), 2) + self.assertEqual(gc.collect(), 2) + self.assertEqual(gc.collect(), 0) # A cycle inside a region should be ignored r.a = self.build_cycle() @@ -36,7 +39,7 @@ def test_local_gc_ignores_regions(self): # Dissolving a region should allow cycles to be collected again r = None - self.assertGreaterEqual(gc.collect(), 2) + self.assertEqual(gc.collect(), 2) def test_collect_cycle(self): r = self.build_region_with_unreachable_cycle() @@ -86,10 +89,8 @@ def test_collect_unreachable_child_region(self): self.assertEqual(gc.collect_region(c), 2) self.assertEqual(gc.collect(), 2) - # FIXME(regions-gc) - @unittest.skip("finalizers currently do not work") def test_finalizer(self): - class Finalizable: + class Resurrectable: def __init__(self, data): self.data = data @@ -100,22 +101,73 @@ def __del__(self): r = Region() r.data = {"counter": 0, "instance": None} r.a = self.build_cycle() - r.a.f = Finalizable(r.data) + r.a.f = Resurrectable(r.data) r.a = None c = Cown(r) r = None c.release() - # The cycle should be collected; the finalizer should run exactly once + # The cycle should be collected self.assertEqual(gc.collect_region(c), 2) + c.acquire() + r = c.value + # The finalizer should have run exactly once self.assertEqual(r.data["counter"], 1) + # The instance should not have been collected + self.assertIs(r.data["instance"].data, r.data) # The finalizer should not run again r.data["instance"] = None self.assertEqual(r.data["counter"], 1) + def test_region_opened_by_finalizer(self): + class RegionOpener: + def __init__(self, r): + self.r = r + + def __del__(self): + # Create a cycle; it outlives the finalizer + a = {} + a["a"] = a + # Open the region + a["r"] = self.r + + r = Region() + r.a = self.build_cycle() + r.a.f = RegionOpener(r) + r.a = None + + c = Cown(r) + r = None + c.release() + # Collection should be aborted + self.assertEqual(gc.collect_region(c), 0) + c.acquire() + # The region should have been replaced with None + self.assertIsNone(c.value) + + def test_cown_changed_by_finalizer(self): + class CownChanger: + def __init__(self, c): + self.c = c + + def __del__(self): + # Change the cown's region + self.c.value = Region() + + r = Region() + c = Cown(r) + r.a = self.build_cycle() + r.a.f = CownChanger(c) + r.a = None + r = None + c.release() + # Collection should be aborted + self.assertEqual(gc.collect_region(c), 0) + + # TODO(regions-gc): test that region GC is triggered, but not when disabled - # TODO(regions-gc): callbacks - # TODO(regions-gc): weakref + # TODO(regions-gc): GC callbacks + # TODO(regions-gc): weakrefs def setUpModule(): diff --git a/Python/gc.c b/Python/gc.c index f9d5a56f2606beb..53bd52c5ec88b73 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -121,6 +121,8 @@ _PyGC_NEXT_MASK_UNREACHABLE // Automatically choose the generation that needs collecting. #define GENERATION_AUTO (-1) +#define _Py_region_data_CAST(op) _Py_CAST(_Py_region_data*, op) + static inline int gc_is_collecting(PyGC_Head *g) { @@ -1849,6 +1851,38 @@ gc_collect_chunk(PyThreadState *tstate, validate_list(to, collecting_clear_unreachable_clear); } +struct region_collection_state { + _PyCownObject *cown; + Py_region_t region_in_cown_orig; + Py_region_t region_in_cown_new; +}; + +/* Start of a section where Python code is executed. */ +static inline void +python_code_start(struct region_collection_state *rcstate) +{ + int switch_result = _PyCown_SwitchFromGcToIp(rcstate->cown); + assert(switch_result == 0); +} + +/* End of a section where Python code is executed. */ +static inline void +python_code_end(struct region_collection_state *rcstate) +{ + int switch_result = _PyCown_SwitchFromIpToGc( + rcstate->cown, + &rcstate->region_in_cown_new + ); + assert(switch_result == 0); +} + +/* Returns true if the region contained in the cown has changed. */ +static inline int +has_region_changed(struct region_collection_state *rcstate) +{ + return rcstate->region_in_cown_new != rcstate->region_in_cown_orig; +} + static void region_list_split(PyGC_Head *list, PyGC_Head *contained) { @@ -1882,6 +1916,7 @@ region_list_split(PyGC_Head *list, PyGC_Head *contained) static void gc_collect_region(PyThreadState *tstate, Py_region_t region, + struct region_collection_state *rcstate, struct gc_collection_stats *stats) { if (region == _Py_LOCAL_REGION @@ -1889,9 +1924,9 @@ gc_collect_region(PyThreadState *tstate, || region == _Py_COWN_REGION) { return; } - _Py_region_data *data = (_Py_region_data*)region; - PyGC_Head *gc; /* initialize to prevent a compiler warning */ + _Py_region_data *data = _Py_region_data_CAST(region); + PyGC_Head *gc; GCState *gcstate = &tstate->interp->gc; assert(!_PyErr_Occurred(tstate)); @@ -1917,6 +1952,8 @@ gc_collect_region(PyThreadState *tstate, } } + python_code_start(rcstate); + /* Invoke weakref callbacks as necessary. */ stats->collected += handle_weakref_callbacks(&unreachable, &contained); validate_list(&contained, collecting_clear_unreachable_clear); @@ -1924,6 +1961,16 @@ gc_collect_region(PyThreadState *tstate, /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); + + python_code_end(rcstate); + if (has_region_changed(rcstate)) { + /* Abort. */ + /* Restore the GC list. Make sure child regions come first. */ + gc_list_merge(&contained, &data->gc_list); + gc_list_merge(&unreachable, &data->gc_list); + return; + } + /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ @@ -1936,6 +1983,8 @@ gc_collect_region(PyThreadState *tstate, */ clear_weakrefs(&final_unreachable); + python_code_start(rcstate); + /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. @@ -1943,19 +1992,25 @@ gc_collect_region(PyThreadState *tstate, stats->collected += gc_list_size(&final_unreachable); delete_garbage(tstate, gcstate, &final_unreachable, &contained); + python_code_end(rcstate); + /* Restore the GC list. Make sure child regions come first. */ gc_list_merge(&contained, &data->gc_list); /* Collect child regions. */ gc = GC_NEXT(&data->gc_list); while (gc != &data->gc_list) { + if (has_region_changed(rcstate)) { + /* Abort. */ + return; + } // Stop looping if this is not a bridge PyObject *obj = _Py_FROM_GC(gc); if (Py_TYPE(obj) != &_PyRegion_Type) { break; } Py_region_t child = _PyRegion_Get(obj); - gc_collect_region(tstate, child, stats); + gc_collect_region(tstate, child, rcstate, stats); gc = GC_NEXT(gc); } } @@ -2299,7 +2354,7 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason) GCState *gcstate = &tstate->interp->gc; struct gc_collection_stats stats = { 0 }; if (gcstate->debug & _PyGC_DEBUG_STATS) { - _PyRegionObject* bridge = ((_Py_region_data*)region)->bridge; + _PyRegionObject* bridge = _Py_region_data_CAST(region)->bridge; const char *name = get_region_name(bridge); if (name == NULL) { PySys_WriteStderr("gc: collecting region at %p\n", bridge); @@ -2308,10 +2363,16 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason) PySys_WriteStderr("gc: collecting region '%s' at %p\n", name, bridge); } } + + struct region_collection_state rcstate = { + .cown = _PyCownObject_CAST(cown), + .region_in_cown_orig = region, + .region_in_cown_new = region, + }; PyObject *exc = _PyErr_GetRaisedException(tstate); - gc_collect_region(tstate, region, &stats); + gc_collect_region(tstate, region, &rcstate, &stats); _PyErr_SetRaisedException(tstate, exc); - _PyCown_ReleaseGC(_PyCownObject_CAST(cown)); + _PyCown_ReleaseGC(rcstate.cown); return stats.collected; } From b646fe283409595571cf62bea0f735fa7149ddd9 Mon Sep 17 00:00:00 2001 From: David Klement Date: Tue, 21 Apr 2026 12:03:21 +0200 Subject: [PATCH 08/15] Regions GC: Accept an acquired cown or bridge --- Include/internal/pycore_cown.h | 3 +- Include/internal/pycore_gc.h | 2 +- .../pycore_global_objects_fini_generated.h | 2 +- Include/internal/pycore_global_strings.h | 2 +- .../internal/pycore_runtime_init_generated.h | 2 +- .../internal/pycore_unicodeobject_generated.h | 8 +- Lib/test/test_regions/test_gc.py | 91 +++++------------ Modules/clinic/gcmodule.c.h | 19 ++-- Modules/gcmodule.c | 11 ++- Objects/cownobject.c | 8 ++ Python/gc.c | 97 +++++-------------- 11 files changed, 79 insertions(+), 166 deletions(-) diff --git a/Include/internal/pycore_cown.h b/Include/internal/pycore_cown.h index 5736be692a7ef91..dc2caac4fcdc0bf 100644 --- a/Include/internal/pycore_cown.h +++ b/Include/internal/pycore_cown.h @@ -22,7 +22,8 @@ typedef uint64_t _PyCown_ipid_t; typedef uint64_t _PyCown_thread_id_t; //PyAPI_FUNC(PyObject*) _PyCown_New(); -// PyAPI_FUNC(int) _PyCown_SetValue(_PyCownObject* self, PyObject* value); +PyAPI_FUNC(PyObject*) _PyCown_GetValue(_PyCownObject* self); +PyAPI_FUNC(int) _PyCown_SetValue(_PyCownObject* self, PyObject* value); PyAPI_FUNC(_PyCown_ipid_t) _PyCown_ThisInterpreterId(void); PyAPI_FUNC(_PyCown_thread_id_t) _PyCown_ThisThreadId(void); PyAPI_FUNC(int) _PyCown_RegionOpen(_PyCownObject *self, _PyRegionObject* region, _PyCown_ipid_t ip); diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 1e72b3db7d24bb3..3e841306dd78387 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -338,7 +338,7 @@ extern void _PyGC_InitState(struct _gc_runtime_state *); extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason); extern void _PyGC_CollectNoFail(PyThreadState *tstate); -extern Py_ssize_t _PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason); +extern Py_ssize_t _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason); /* Freeze objects tracked by the GC and ignore them in future collections. */ extern void _PyGC_Freeze(PyInterpreterState *interp); diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index c8a3f1bc94120fd..7e809fe6d89ca73 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1666,7 +1666,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(coro)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(count)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(covariant)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cown)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ctx)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cwd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(d_parameter_type)); @@ -1990,6 +1989,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(readonly)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(real)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(reducer_override)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(region)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(registry)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(rel_tol)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(release)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 05c6b86eb6c827e..1f835c66bf34e33 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -389,7 +389,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(coro) STRUCT_FOR_ID(count) STRUCT_FOR_ID(covariant) - STRUCT_FOR_ID(cown) STRUCT_FOR_ID(ctx) STRUCT_FOR_ID(cwd) STRUCT_FOR_ID(d_parameter_type) @@ -713,6 +712,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(readonly) STRUCT_FOR_ID(real) STRUCT_FOR_ID(reducer_override) + STRUCT_FOR_ID(region) STRUCT_FOR_ID(registry) STRUCT_FOR_ID(rel_tol) STRUCT_FOR_ID(release) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index ca0ef07b202e7c2..330164d6c9fbe42 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1664,7 +1664,6 @@ extern "C" { INIT_ID(coro), \ INIT_ID(count), \ INIT_ID(covariant), \ - INIT_ID(cown), \ INIT_ID(ctx), \ INIT_ID(cwd), \ INIT_ID(d_parameter_type), \ @@ -1988,6 +1987,7 @@ extern "C" { INIT_ID(readonly), \ INIT_ID(real), \ INIT_ID(reducer_override), \ + INIT_ID(region), \ INIT_ID(registry), \ INIT_ID(rel_tol), \ INIT_ID(release), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 9e0a8520e2b3a22..cba7563e3ae6c99 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1344,10 +1344,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(cown); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(ctx); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2640,6 +2636,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(region); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(registry); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_regions/test_gc.py b/Lib/test/test_regions/test_gc.py index 5749aacc02f4b62..f061d788a76ce5b 100644 --- a/Lib/test/test_regions/test_gc.py +++ b/Lib/test/test_regions/test_gc.py @@ -1,6 +1,6 @@ import unittest -from regions import Region, Cown +from regions import Cown, Region import gc class TestRegionGC(unittest.TestCase): @@ -44,11 +44,26 @@ def test_local_gc_ignores_regions(self): def test_collect_cycle(self): r = self.build_region_with_unreachable_cycle() - c = Cown(r) + # The cycle inside the region should be collected + self.assertEqual(gc.collect_region(r), 2) + + def test_acquired_cown(self): + r = self.build_region_with_unreachable_cycle() + cown = Cown(r) r = None - c.release() + # The cycle inside the region should be collected - self.assertEqual(gc.collect_region(c), 2) + self.assertEqual(gc.collect_region(cown), 2) + + def test_released_cown(self): + r = self.build_region_with_unreachable_cycle() + cown = Cown(r) + r = None + cown.release() + + # If passing a cown, it needs to be acquired + with self.assertRaises(TypeError): + gc.collect_region(cown) def test_collect_cycle_with_backlink(self): r = Region() @@ -56,21 +71,15 @@ def test_collect_cycle_with_backlink(self): r.a.r = r r.a = None - c = Cown(r) - r = None - c.release() # The cycle inside the region should be collected - self.assertEqual(gc.collect_region(c), 2) + self.assertEqual(gc.collect_region(r), 2) def test_collect_child_region(self): r = Region() r.child = self.build_region_with_unreachable_cycle() - c = Cown(r) - r = None - c.release() # The cycle inside the child region should be collected - self.assertEqual(gc.collect_region(c), 2) + self.assertEqual(gc.collect_region(r), 2) def test_collect_unreachable_child_region(self): r = Region() @@ -78,15 +87,12 @@ def test_collect_unreachable_child_region(self): r.a.child = self.build_region_with_unreachable_cycle() r.a = None - c = Cown(r) - r = None - c.release() # The cycle inside the parent region should be collected, # and the child region should be dissolved into the local region, # allowing the cycle inside it to be collected by the local GC. # Note that the bridge object is never counted; # perhaps not ideal, but it would be difficult to implement otherwise. - self.assertEqual(gc.collect_region(c), 2) + self.assertEqual(gc.collect_region(r), 2) self.assertEqual(gc.collect(), 2) def test_finalizer(self): @@ -104,13 +110,8 @@ def __del__(self): r.a.f = Resurrectable(r.data) r.a = None - c = Cown(r) - r = None - c.release() # The cycle should be collected - self.assertEqual(gc.collect_region(c), 2) - c.acquire() - r = c.value + self.assertEqual(gc.collect_region(r), 2) # The finalizer should have run exactly once self.assertEqual(r.data["counter"], 1) # The instance should not have been collected @@ -119,52 +120,6 @@ def __del__(self): r.data["instance"] = None self.assertEqual(r.data["counter"], 1) - def test_region_opened_by_finalizer(self): - class RegionOpener: - def __init__(self, r): - self.r = r - - def __del__(self): - # Create a cycle; it outlives the finalizer - a = {} - a["a"] = a - # Open the region - a["r"] = self.r - - r = Region() - r.a = self.build_cycle() - r.a.f = RegionOpener(r) - r.a = None - - c = Cown(r) - r = None - c.release() - # Collection should be aborted - self.assertEqual(gc.collect_region(c), 0) - c.acquire() - # The region should have been replaced with None - self.assertIsNone(c.value) - - def test_cown_changed_by_finalizer(self): - class CownChanger: - def __init__(self, c): - self.c = c - - def __del__(self): - # Change the cown's region - self.c.value = Region() - - r = Region() - c = Cown(r) - r.a = self.build_cycle() - r.a.f = CownChanger(c) - r.a = None - r = None - c.release() - # Collection should be aborted - self.assertEqual(gc.collect_region(c), 0) - - # TODO(regions-gc): test that region GC is triggered, but not when disabled # TODO(regions-gc): GC callbacks # TODO(regions-gc): weakrefs diff --git a/Modules/clinic/gcmodule.c.h b/Modules/clinic/gcmodule.c.h index a9abe8bf66f0488..7f47666f3134d49 100644 --- a/Modules/clinic/gcmodule.c.h +++ b/Modules/clinic/gcmodule.c.h @@ -151,12 +151,13 @@ gc_collect(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * } PyDoc_STRVAR(gc_collect_region__doc__, -"collect_region($module, /, cown)\n" +"collect_region($module, /, region)\n" "--\n" "\n" "Run the garbage collector on a specific region.\n" "\n" -"The argument should be a released Cown object holding the region to collect.\n" +"The argument should be either a Region object\n" +"or an acquired Cown object holding the region to collect.\n" "\n" "The number of unreachable objects is returned."); @@ -164,7 +165,7 @@ PyDoc_STRVAR(gc_collect_region__doc__, {"collect_region", _PyCFunction_CAST(gc_collect_region), METH_FASTCALL|METH_KEYWORDS, gc_collect_region__doc__}, static Py_ssize_t -gc_collect_region_impl(PyObject *module, PyObject *cown); +gc_collect_region_impl(PyObject *module, PyObject *region); static PyObject * gc_collect_region(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -181,7 +182,7 @@ gc_collect_region(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(cown), }, + .ob_item = { &_Py_ID(region), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -190,7 +191,7 @@ gc_collect_region(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"cown", NULL}; + static const char * const _keywords[] = {"region", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "collect_region", @@ -198,7 +199,7 @@ gc_collect_region(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO }; #undef KWTUPLE PyObject *argsbuf[1]; - PyObject *cown; + PyObject *region; Py_ssize_t _return_value; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, @@ -206,8 +207,8 @@ gc_collect_region(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO if (!args) { goto exit; } - cown = args[0]; - _return_value = gc_collect_region_impl(module, cown); + region = args[0]; + _return_value = gc_collect_region_impl(module, region); if ((_return_value == -1) && PyErr_Occurred()) { goto exit; } @@ -650,4 +651,4 @@ gc_get_freeze_count(PyObject *module, PyObject *Py_UNUSED(ignored)) exit: return return_value; } -/*[clinic end generated code: output=bd7ec0973b947c02 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=fdd862c312de4482 input=a9049054013a1b77]*/ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index e6fcfc375ba54a5..61b821c6241170c 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -96,21 +96,22 @@ gc_collect_impl(PyObject *module, int generation) /*[clinic input] gc.collect_region -> Py_ssize_t - cown: object + region: object Run the garbage collector on a specific region. -The argument should be a released Cown object holding the region to collect. +The argument should be either a Region object +or an acquired Cown object holding the region to collect. The number of unreachable objects is returned. [clinic start generated code]*/ static Py_ssize_t -gc_collect_region_impl(PyObject *module, PyObject *cown) -/*[clinic end generated code: output=47f1a91aff062e6c input=58d8161ceff6c23a]*/ +gc_collect_region_impl(PyObject *module, PyObject *region) +/*[clinic end generated code: output=16b435a6dab62cc1 input=da1f6879e70ac5a4]*/ { PyThreadState *tstate = _PyThreadState_GET(); - return _PyGC_CollectRegion(tstate, cown, _Py_GC_REASON_MANUAL); + return _PyGC_CollectRegion(tstate, region, _Py_GC_REASON_MANUAL); } diff --git a/Objects/cownobject.c b/Objects/cownobject.c index f73ca120d760120..00dd54484d258e0 100644 --- a/Objects/cownobject.c +++ b/Objects/cownobject.c @@ -531,12 +531,20 @@ static PyObject *CownObject_get_value(_PyCownObject *self, void *closure) { return PyRegion_NewRef(self->value); } +PyObject *_PyCown_GetValue(_PyCownObject* self) { + return CownObject_get_value(self, NULL); +} + static int CownObject_set_value(_PyCownObject *self, PyObject *value, void *closure) { BAIL_UNLESS_OWNED(self, -1); return cown_set_value(self, value); } +int _PyCown_SetValue(_PyCownObject* self, PyObject* value) { + return CownObject_set_value(self, value, NULL); +} + static PyGetSetDef PyCownObject_getset[] = { {"value", (getter)CownObject_get_value, (setter)CownObject_set_value, "", NULL}, diff --git a/Python/gc.c b/Python/gc.c index 53bd52c5ec88b73..332bd56c74138a9 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1851,38 +1851,6 @@ gc_collect_chunk(PyThreadState *tstate, validate_list(to, collecting_clear_unreachable_clear); } -struct region_collection_state { - _PyCownObject *cown; - Py_region_t region_in_cown_orig; - Py_region_t region_in_cown_new; -}; - -/* Start of a section where Python code is executed. */ -static inline void -python_code_start(struct region_collection_state *rcstate) -{ - int switch_result = _PyCown_SwitchFromGcToIp(rcstate->cown); - assert(switch_result == 0); -} - -/* End of a section where Python code is executed. */ -static inline void -python_code_end(struct region_collection_state *rcstate) -{ - int switch_result = _PyCown_SwitchFromIpToGc( - rcstate->cown, - &rcstate->region_in_cown_new - ); - assert(switch_result == 0); -} - -/* Returns true if the region contained in the cown has changed. */ -static inline int -has_region_changed(struct region_collection_state *rcstate) -{ - return rcstate->region_in_cown_new != rcstate->region_in_cown_orig; -} - static void region_list_split(PyGC_Head *list, PyGC_Head *contained) { @@ -1916,7 +1884,6 @@ region_list_split(PyGC_Head *list, PyGC_Head *contained) static void gc_collect_region(PyThreadState *tstate, Py_region_t region, - struct region_collection_state *rcstate, struct gc_collection_stats *stats) { if (region == _Py_LOCAL_REGION @@ -1952,8 +1919,6 @@ gc_collect_region(PyThreadState *tstate, } } - python_code_start(rcstate); - /* Invoke weakref callbacks as necessary. */ stats->collected += handle_weakref_callbacks(&unreachable, &contained); validate_list(&contained, collecting_clear_unreachable_clear); @@ -1961,16 +1926,6 @@ gc_collect_region(PyThreadState *tstate, /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); - - python_code_end(rcstate); - if (has_region_changed(rcstate)) { - /* Abort. */ - /* Restore the GC list. Make sure child regions come first. */ - gc_list_merge(&contained, &data->gc_list); - gc_list_merge(&unreachable, &data->gc_list); - return; - } - /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ @@ -1983,8 +1938,6 @@ gc_collect_region(PyThreadState *tstate, */ clear_weakrefs(&final_unreachable); - python_code_start(rcstate); - /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. @@ -1992,25 +1945,19 @@ gc_collect_region(PyThreadState *tstate, stats->collected += gc_list_size(&final_unreachable); delete_garbage(tstate, gcstate, &final_unreachable, &contained); - python_code_end(rcstate); - /* Restore the GC list. Make sure child regions come first. */ gc_list_merge(&contained, &data->gc_list); /* Collect child regions. */ gc = GC_NEXT(&data->gc_list); while (gc != &data->gc_list) { - if (has_region_changed(rcstate)) { - /* Abort. */ - return; - } // Stop looping if this is not a bridge PyObject *obj = _Py_FROM_GC(gc); if (Py_TYPE(obj) != &_PyRegion_Type) { break; } Py_region_t child = _PyRegion_Get(obj); - gc_collect_region(tstate, child, rcstate, stats); + gc_collect_region(tstate, child, stats); gc = GC_NEXT(gc); } } @@ -2335,26 +2282,28 @@ get_region_name(_PyRegionObject* bridge) { } Py_ssize_t -_PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason) +_PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason) { - if (!Py_IS_TYPE(cown, &_PyCown_Type)) { - PyErr_SetString(PyExc_TypeError, - "the region to collect must be passed as a Cown"); - return 0; + // We accept cowns to allow passing a closed region. + if (Py_IS_TYPE(region, &_PyCown_Type)) { + PyObject *value = _PyCown_GetValue(_PyCownObject_CAST(region)); + if (value == NULL) { + goto error; + } + region = value; + PyRegion_RemoveLocalRef(value); + Py_DECREF(value); } - Py_region_t region; - int acquire_res = _PyCown_AcquireGC(_PyCownObject_CAST(cown), ®ion); - assert (acquire_res >= 0); - if (acquire_res <= 0) { - // could not acquire the cown, perhaps someone else has it - return 0; + // We should have reached the bridge now. + if (!Py_IS_TYPE(region, &_PyRegion_Type)) { + goto error; } - + Py_region_t region_id = _PyRegion_Get(region); // TODO(regions-gc): gc callback GCState *gcstate = &tstate->interp->gc; struct gc_collection_stats stats = { 0 }; if (gcstate->debug & _PyGC_DEBUG_STATS) { - _PyRegionObject* bridge = _Py_region_data_CAST(region)->bridge; + _PyRegionObject* bridge = _Py_region_data_CAST(region_id)->bridge; const char *name = get_region_name(bridge); if (name == NULL) { PySys_WriteStderr("gc: collecting region at %p\n", bridge); @@ -2363,17 +2312,15 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *cown, _PyGC_Reason reason) PySys_WriteStderr("gc: collecting region '%s' at %p\n", name, bridge); } } - - struct region_collection_state rcstate = { - .cown = _PyCownObject_CAST(cown), - .region_in_cown_orig = region, - .region_in_cown_new = region, - }; PyObject *exc = _PyErr_GetRaisedException(tstate); - gc_collect_region(tstate, region, &rcstate, &stats); + gc_collect_region(tstate, region_id, &stats); _PyErr_SetRaisedException(tstate, exc); - _PyCown_ReleaseGC(rcstate.cown); return stats.collected; + +error: + PyErr_SetString(PyExc_TypeError, + "region parameter must be a bridge or an acquired cown storing a bridge"); + return -1; } void From b1ef3c7df6d6d9406fb385087b0d352c770face4 Mon Sep 17 00:00:00 2001 From: David Klement Date: Wed, 22 Apr 2026 13:31:29 +0200 Subject: [PATCH 09/15] Regions GC: Deal with cleaning --- Include/internal/pycore_region.h | 1 + Python/gc.c | 135 ++++++++++++++++++++++--------- Python/region.c | 6 ++ 3 files changed, 102 insertions(+), 40 deletions(-) diff --git a/Include/internal/pycore_region.h b/Include/internal/pycore_region.h index f300e912c14a64f..6fb587c60a035d5 100644 --- a/Include/internal/pycore_region.h +++ b/Include/internal/pycore_region.h @@ -133,6 +133,7 @@ static inline Py_region_t __PyRegion_Get(PyObject *obj, int follow_pending) { PyAPI_FUNC(int) _PyRegion_New(_PyRegionObject *bridge); PyAPI_FUNC(int) _PyRegion_Dissolve(Py_region_t region); +PyAPI_FUNC(void) _PyRegion_IncRc(Py_region_t region); PyAPI_FUNC(void) _PyRegion_DecRc(Py_region_t region); PyAPI_FUNC(Py_ssize_t) _PyRegion_GetLrc(Py_region_t region); diff --git a/Python/gc.c b/Python/gc.c index 332bd56c74138a9..6eb22f60df2709c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1851,6 +1851,28 @@ gc_collect_chunk(PyThreadState *tstate, validate_list(to, collecting_clear_unreachable_clear); } +static PyObject* +region_get_name(_PyRegionObject *bridge) +{ + PyObject *name = bridge->name; + if (name == NULL || !PyUnicode_Check(name)) { + return NULL; + } + return name; +} + +static void +debug_region_collection(const char *message, _PyRegionObject *bridge) +{ + PyObject *name = region_get_name(bridge); + if (name == NULL) { + PySys_FormatStderr("gc: %s at %p\n", message, bridge); + } + else { + PySys_FormatStderr("gc: %s '%U' at %p\n", message, name, bridge); + } +} + static void region_list_split(PyGC_Head *list, PyGC_Head *contained) { @@ -1881,22 +1903,33 @@ region_list_split(PyGC_Head *list, PyGC_Head *contained) validate_list(contained, collecting_clear_unreachable_clear); } +/* Collect garbage in a region and its tree. + * The caller must ensure that the region data will not be deallocated + * during the collection, even if the region gets cleaned. + */ static void gc_collect_region(PyThreadState *tstate, - Py_region_t region, + Py_region_t region_id, struct gc_collection_stats *stats) { - if (region == _Py_LOCAL_REGION - || region == _Py_IMMUTABLE_REGION - || region == _Py_COWN_REGION) { - return; - } - - _Py_region_data *data = _Py_region_data_CAST(region); PyGC_Head *gc; GCState *gcstate = &tstate->interp->gc; + _Py_region_data *data = _Py_region_data_CAST(region_id); assert(!_PyErr_Occurred(tstate)); + /* Create an artificial local reference to keep the region open, + * preventing Python code from sharing it with other interpreters. + */ + PyObject *bridge = PyRegion_XNewRef(data->bridge); + if (bridge == NULL) { + // Nothing to do. + assert(gc_list_is_empty(&data->gc_list)); + return; + } + if (gcstate->debug & _PyGC_DEBUG_STATS) { + debug_region_collection("handling garbage in region", _PyRegionObject_CAST(bridge)); + } + /* Separate child regions from contained objects. * Finalizers need them to be in the GC list, at the start of the list. */ @@ -1904,12 +1937,11 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&contained); region_list_split(&data->gc_list, &contained); - PyGC_Head unreachable; /* non-problematic unreachable trash */ + /* Identify unreachable objects. */ + PyGC_Head unreachable; gc_list_init(&unreachable); deduce_unreachable(&contained, &unreachable); untrack_tuples(&contained); - - /* Clear NEXT_MASK_UNREACHABLE manually. */ clear_unreachable_mask(&unreachable); /* Print debugging information. */ @@ -1945,10 +1977,39 @@ gc_collect_region(PyThreadState *tstate, stats->collected += gc_list_size(&final_unreachable); delete_garbage(tstate, gcstate, &final_unreachable, &contained); - /* Restore the GC list. Make sure child regions come first. */ - gc_list_merge(&contained, &data->gc_list); + if (data->bridge == NULL) { + /* The region has been cleaned. + * In the process, objects reachable from the bridge have already + * been moved from our temporary GC lists to the region GC list. + * The remaining objects can be returned to the local GC. + */ + gc_list_set_space(&contained, gcstate->visited_space); + gc_list_merge(&contained, &gcstate->young.head); + } + else { + /* No cleaning happened, so the region still has the same data block. + * We return the reachable objects to the region GC list, + * making sure child regions come first. + * Objects in the region GC list always use old space 0. + */ + gc_list_validate_space(&contained, 0); + gc_list_merge(&contained, &data->gc_list); + } + + /* Remove the artificial local reference now + * to allow child regions to use the optimization for closed regions. + */ + PyRegion_RemoveLocalRef(bridge); + Py_DECREF(bridge); - /* Collect child regions. */ + /* Collect child regions. + * + * TODO(regions): + * If the region has been cleaned, previously open child regions + * have already been collected. We should still collect + * the remaining child regions, but we currently don't. + * The GC list will be empty and this loop will not do anything. + */ gc = GC_NEXT(&data->gc_list); while (gc != &data->gc_list) { // Stop looping if this is not a bridge @@ -1956,8 +2017,18 @@ gc_collect_region(PyThreadState *tstate, if (Py_TYPE(obj) != &_PyRegion_Type) { break; } - Py_region_t child = _PyRegion_Get(obj); - gc_collect_region(tstate, child, stats); + Py_region_t child_id = _PyRegion_Get(obj); + _PyRegion_IncRc(child_id); + gc_collect_region(tstate, child_id, stats); + bool is_still_child = _PyRegion_GetParent(child_id) == region_id; + _PyRegion_DecRc(child_id); + if (!is_still_child) { + /* The region hierarchy has changed. + * Following the next pointer would take us who knows where. + * Give up collecting the remaining child regions. + */ + break; + } gc = GC_NEXT(gc); } } @@ -2268,19 +2339,6 @@ _PyGC_CollectNoFail(PyThreadState *tstate) _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN); } -static const char * -get_region_name(_PyRegionObject* bridge) { - PyObject *name = bridge->name; - if (name == NULL || !PyUnicode_Check(name)) { - return NULL; - } - const char *name_str = PyUnicode_AsUTF8(name); - if (name_str == NULL) { - return NULL; - } - return name_str; -} - Py_ssize_t _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason) { @@ -2298,22 +2356,19 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason if (!Py_IS_TYPE(region, &_PyRegion_Type)) { goto error; } - Py_region_t region_id = _PyRegion_Get(region); - // TODO(regions-gc): gc callback + GCState *gcstate = &tstate->interp->gc; - struct gc_collection_stats stats = { 0 }; + // TODO(regions-gc): gc callback if (gcstate->debug & _PyGC_DEBUG_STATS) { - _PyRegionObject* bridge = _Py_region_data_CAST(region_id)->bridge; - const char *name = get_region_name(bridge); - if (name == NULL) { - PySys_WriteStderr("gc: collecting region at %p\n", bridge); - } - else { - PySys_WriteStderr("gc: collecting region '%s' at %p\n", name, bridge); - } + debug_region_collection("collecting region tree with root", _PyRegionObject_CAST(region)); } + + struct gc_collection_stats stats = { 0 }; + Py_region_t region_id = _PyRegion_Get(region); PyObject *exc = _PyErr_GetRaisedException(tstate); + _PyRegion_IncRc(region_id); gc_collect_region(tstate, region_id, &stats); + _PyRegion_DecRc(region_id); _PyErr_SetRaisedException(tstate, exc); return stats.collected; diff --git a/Python/region.c b/Python/region.c index 909eb4b2631446d..ea53313505a1f67 100644 --- a/Python/region.c +++ b/Python/region.c @@ -1963,6 +1963,12 @@ int _PyRegion_Dissolve(Py_region_t region) { return regiondata_union_merge(region, _Py_LOCAL_REGION); } +/* Increments the reference count of the region. + */ +void _PyRegion_IncRc(Py_region_t region) { + regiondata_inc_rc(region); +} + /* Decrements the reference count of the region. This may deallocate the region. */ void _PyRegion_DecRc(Py_region_t region) { From 4750a2ba4c570ad6c106f4148de98232faf8c545 Mon Sep 17 00:00:00 2001 From: David Klement Date: Fri, 24 Apr 2026 13:21:45 +0200 Subject: [PATCH 10/15] Regions GC: Avoid recursion and traversal problems --- Include/internal/pycore_region.h | 3 + Include/internal/pycore_regionobject.h | 4 +- Lib/test/test_regions/test_gc.py | 9 +- Objects/regionobject.c | 1 + Python/gc.c | 210 +++++++++++++++---------- Python/region.c | 15 +- 6 files changed, 146 insertions(+), 96 deletions(-) diff --git a/Include/internal/pycore_region.h b/Include/internal/pycore_region.h index 6fb587c60a035d5..015e71e0eb45088 100644 --- a/Include/internal/pycore_region.h +++ b/Include/internal/pycore_region.h @@ -98,6 +98,9 @@ typedef struct _Py_region_data { */ PyGC_Head gc_list; + /* List of unreachable objects in the region, saved to be deleted later. */ + PyGC_Head unreachable; + #ifdef Py_OWNERSHIP_INVARIANT _Py_ownership_invariant_region_data invariant_data; #endif diff --git a/Include/internal/pycore_regionobject.h b/Include/internal/pycore_regionobject.h index dbf23a686a9e5d7..bca597861e3af2b 100644 --- a/Include/internal/pycore_regionobject.h +++ b/Include/internal/pycore_regionobject.h @@ -22,6 +22,8 @@ struct _PyRegionObject { /** The name of the region or NULL */ PyObject *name; PyObject *dict; + /* A link in a list of regions to be garbage collected. */ + struct _PyRegionObject *next; }; #define _PyRegionObject_CAST(op) _Py_CAST(_PyRegionObject*, op) @@ -30,4 +32,4 @@ PyAPI_DATA(PyTypeObject) _PyRegion_Type; #ifdef __cplusplus } #endif -#endif /* !Py_INTERNAL_REGIONOBJECT_H */ \ No newline at end of file +#endif /* !Py_INTERNAL_REGIONOBJECT_H */ diff --git a/Lib/test/test_regions/test_gc.py b/Lib/test/test_regions/test_gc.py index f061d788a76ce5b..cde7dc1c710e397 100644 --- a/Lib/test/test_regions/test_gc.py +++ b/Lib/test/test_regions/test_gc.py @@ -87,13 +87,12 @@ def test_collect_unreachable_child_region(self): r.a.child = self.build_region_with_unreachable_cycle() r.a = None - # The cycle inside the parent region should be collected, - # and the child region should be dissolved into the local region, - # allowing the cycle inside it to be collected by the local GC. + # Both cycles should be collected. # Note that the bridge object is never counted; # perhaps not ideal, but it would be difficult to implement otherwise. - self.assertEqual(gc.collect_region(r), 2) - self.assertEqual(gc.collect(), 2) + self.assertEqual(gc.collect_region(r), 4) + # Nothing should have been dissolved. + self.assertEqual(gc.collect(), 0) def test_finalizer(self): class Resurrectable: diff --git a/Objects/regionobject.c b/Objects/regionobject.c index 2d8131bde50b338..4f7b7795c57dccb 100644 --- a/Objects/regionobject.c +++ b/Objects/regionobject.c @@ -40,6 +40,7 @@ static int Region_init(_PyRegionObject *self, PyObject *args, PyObject *kwds) { self->region = NULL_REGION; self->name = NULL; + self->next = NULL; // Allocate the new region object if (_PyRegion_New(_PyRegionObject_CAST(self))) { diff --git a/Python/gc.c b/Python/gc.c index 6eb22f60df2709c..4489fe37bbb9bfd 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1120,6 +1120,16 @@ debug_cycle(const char *msg, PyObject *op) msg, Py_TYPE(op)->tp_name, op); } +static void +debug_print_unreachable(GCState *gcstate, PyGC_Head *unreachable) +{ + if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { + for (PyGC_Head *gc = GC_NEXT(unreachable); gc != unreachable; gc = GC_NEXT(gc)) { + debug_cycle("collectable", FROM_GC(gc)); + } + } +} + /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable * only from such cycles). * If _PyGC_DEBUG_SAVEALL, all objects in finalizers are appended to the module @@ -1799,12 +1809,7 @@ gc_collect_chunk(PyThreadState *tstate, move_legacy_finalizer_reachable(&finalizers); validate_list(&finalizers, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); - /* Print debugging information. */ - if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { - for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { - debug_cycle("collectable", FROM_GC(gc)); - } - } + debug_print_unreachable(gcstate, &unreachable); /* Invoke weakref callbacks as necessary. */ stats->collected += handle_weakref_callbacks(&unreachable, to); @@ -1873,8 +1878,39 @@ debug_region_collection(const char *message, _PyRegionObject *bridge) } } +static void region_list_add_children(_PyRegionObject **list, Py_region_t parent) +{ + _Py_region_data *data = _Py_region_data_CAST(parent); + PyGC_Head *gc = GC_NEXT(&data->gc_list); + while (gc != &data->gc_list) { + // Stop looping if this is not a bridge. + PyObject *obj = _Py_FROM_GC(gc); + if (Py_TYPE(obj) != &_PyRegion_Type) { + break; + } + // Prepend to list. + _PyRegionObject *child = _PyRegionObject_CAST(obj); + child->next = *list; + *list = child; + gc = GC_NEXT(gc); + } +} + +/* Unwrap the region tree into a linked list in depth-first order. */ +static void +region_list_build_dfs(_PyRegionObject *root) +{ + root->next = NULL; + _PyRegionObject *current = root; + while (current != NULL) { + // Adding child regions will prepend before next and update it. + region_list_add_children(¤t->next, _PyRegion_Get(current)); + current = current->next; + } +} + static void -region_list_split(PyGC_Head *list, PyGC_Head *contained) +gc_region_list_split(PyGC_Head *list, PyGC_Head *contained) { // Child regions are at the start of the list PyGC_Head *node = GC_NEXT(list); @@ -1903,39 +1939,21 @@ region_list_split(PyGC_Head *list, PyGC_Head *contained) validate_list(contained, collecting_clear_unreachable_clear); } -/* Collect garbage in a region and its tree. - * The caller must ensure that the region data will not be deallocated - * during the collection, even if the region gets cleaned. +/* Identify unreachable objects in a region and move them + * from the region GC list to the region unreachable list. + * This function can run without the GIL if the region is closed. */ static void -gc_collect_region(PyThreadState *tstate, - Py_region_t region_id, - struct gc_collection_stats *stats) +region_extract_unreachable(Py_region_t region_id) { - PyGC_Head *gc; - GCState *gcstate = &tstate->interp->gc; _Py_region_data *data = _Py_region_data_CAST(region_id); - assert(!_PyErr_Occurred(tstate)); - - /* Create an artificial local reference to keep the region open, - * preventing Python code from sharing it with other interpreters. - */ - PyObject *bridge = PyRegion_XNewRef(data->bridge); - if (bridge == NULL) { - // Nothing to do. - assert(gc_list_is_empty(&data->gc_list)); - return; - } - if (gcstate->debug & _PyGC_DEBUG_STATS) { - debug_region_collection("handling garbage in region", _PyRegionObject_CAST(bridge)); - } /* Separate child regions from contained objects. * Finalizers need them to be in the GC list, at the start of the list. */ PyGC_Head contained; gc_list_init(&contained); - region_list_split(&data->gc_list, &contained); + gc_region_list_split(&data->gc_list, &contained); /* Identify unreachable objects. */ PyGC_Head unreachable; @@ -1944,16 +1962,36 @@ gc_collect_region(PyThreadState *tstate, untrack_tuples(&contained); clear_unreachable_mask(&unreachable); - /* Print debugging information. */ - if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) { - for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { - debug_cycle("collectable", FROM_GC(gc)); - } + /* Return the reachable objects, making sure child regions come first. */ + gc_list_merge(&contained, &data->gc_list); + /* Save the unreachable objects. */ + gc_list_merge(&unreachable, &data->unreachable); +} + +static void +region_handle_garbage(PyThreadState *tstate, + Py_region_t region_id, + struct gc_collection_stats *stats) +{ + GCState *gcstate = &tstate->interp->gc; + _Py_region_data *data = _Py_region_data_CAST(region_id); + if (gcstate->debug & _PyGC_DEBUG_STATS) { + debug_region_collection("handling garbage in region", data->bridge); } + PyGC_Head surviving; + gc_list_init(&surviving); + PyGC_Head unreachable; + gc_list_init(&unreachable); + /* Move the unreachable objects to the local list. + * This prevents objects being taken out of the region GC list + * in case it is merged with another region. + */ + gc_list_merge(&data->unreachable, &unreachable); + debug_print_unreachable(gcstate, &unreachable); + /* Invoke weakref callbacks as necessary. */ - stats->collected += handle_weakref_callbacks(&unreachable, &contained); - validate_list(&contained, collecting_clear_unreachable_clear); + stats->collected += handle_weakref_callbacks(&unreachable, &surviving); validate_list(&unreachable, collecting_set_unreachable_clear); /* Call tp_finalize on objects which have one. */ @@ -1963,7 +2001,7 @@ gc_collect_region(PyThreadState *tstate, * objects that are still unreachable */ PyGC_Head final_unreachable; gc_list_init(&final_unreachable); - handle_resurrected_objects(&unreachable, &final_unreachable, &contained); + handle_resurrected_objects(&unreachable, &final_unreachable, &surviving); /* Clear weakrefs to objects in the unreachable set. See the comments * above handle_weakref_callbacks() for details. @@ -1975,61 +2013,63 @@ gc_collect_region(PyThreadState *tstate, * in finalizers to be freed. */ stats->collected += gc_list_size(&final_unreachable); - delete_garbage(tstate, gcstate, &final_unreachable, &contained); + delete_garbage(tstate, gcstate, &final_unreachable, &surviving); + validate_list(&surviving, collecting_clear_unreachable_clear); + /* The region could have been merged with another region. + * Find out where to return the surviving objects. + */ if (data->bridge == NULL) { - /* The region has been cleaned. - * In the process, objects reachable from the bridge have already - * been moved from our temporary GC lists to the region GC list. - * The remaining objects can be returned to the local GC. - */ - gc_list_set_space(&contained, gcstate->visited_space); - gc_list_merge(&contained, &gcstate->young.head); + /* The region has been dissolved, return to the local GC. */ + gc_list_set_space(&surviving, gcstate->visited_space); + gc_list_merge(&surviving, &gcstate->young.head); } else { - /* No cleaning happened, so the region still has the same data block. - * We return the reachable objects to the region GC list, - * making sure child regions come first. - * Objects in the region GC list always use old space 0. + Py_region_t new_region = _PyRegion_Get(data->bridge); + /* Objects in the region GC list always use old space 0. */ + gc_list_validate_space(&surviving, 0); + gc_list_merge(&surviving, &_Py_region_data_CAST(new_region)->gc_list); + } +} + +/* Collect garbage in a region and its tree. */ +static void +gc_collect_region_tree(PyThreadState *tstate, + Py_region_t root_id, + struct gc_collection_stats *stats) +{ + assert(!_PyErr_Occurred(tstate)); + _PyRegionObject *root = _Py_region_data_CAST(root_id)->bridge; + + // TODO(regions-gc): Drop the GIL if the region is closed. + region_list_build_dfs(root); + for (_PyRegionObject *curr = root; curr != NULL; curr = curr->next) { + region_extract_unreachable(_PyRegion_Get(curr)); + /* Create an artificial local reference to the bridge to: + * 1. ensure it will not go away, + * 2. prevent Python code from sharing it with other interpreters. */ - gc_list_validate_space(&contained, 0); - gc_list_merge(&contained, &data->gc_list); + PyRegion_NewRef(curr); } - /* Remove the artificial local reference now - * to allow child regions to use the optimization for closed regions. + /* This runs Python code, which can change the region topology. + * We hold the GIL and only this interpreter has access to the regions, + * so nobody should interfere and we can safely handle the garbage. */ - PyRegion_RemoveLocalRef(bridge); - Py_DECREF(bridge); + for (_PyRegionObject *curr = root; curr != NULL; curr = curr->next) { + region_handle_garbage(tstate, _PyRegion_Get(curr), stats); + } - /* Collect child regions. - * - * TODO(regions): - * If the region has been cleaned, previously open child regions - * have already been collected. We should still collect - * the remaining child regions, but we currently don't. - * The GC list will be empty and this loop will not do anything. + /* Remove the artificial local references. + * That can cause deallocation of the bridge objects. */ - gc = GC_NEXT(&data->gc_list); - while (gc != &data->gc_list) { - // Stop looping if this is not a bridge - PyObject *obj = _Py_FROM_GC(gc); - if (Py_TYPE(obj) != &_PyRegion_Type) { - break; - } - Py_region_t child_id = _PyRegion_Get(obj); - _PyRegion_IncRc(child_id); - gc_collect_region(tstate, child_id, stats); - bool is_still_child = _PyRegion_GetParent(child_id) == region_id; - _PyRegion_DecRc(child_id); - if (!is_still_child) { - /* The region hierarchy has changed. - * Following the next pointer would take us who knows where. - * Give up collecting the remaining child regions. - */ - break; - } - gc = GC_NEXT(gc); + _PyRegionObject *curr = root; + while (curr != NULL) { + _PyRegionObject *next = curr->next; + curr->next = NULL; // not necessary but nice + PyRegion_RemoveLocalRef(curr); + Py_DECREF(curr); + curr = next; } } @@ -2366,9 +2406,7 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason struct gc_collection_stats stats = { 0 }; Py_region_t region_id = _PyRegion_Get(region); PyObject *exc = _PyErr_GetRaisedException(tstate); - _PyRegion_IncRc(region_id); - gc_collect_region(tstate, region_id, &stats); - _PyRegion_DecRc(region_id); + gc_collect_region_tree(tstate, region_id, &stats); _PyErr_SetRaisedException(tstate, exc); return stats.collected; diff --git a/Python/region.c b/Python/region.c index ea53313505a1f67..384361ab7cae6e4 100644 --- a/Python/region.c +++ b/Python/region.c @@ -290,6 +290,12 @@ static int gc_list_for_each_subregion(PyGC_Head *list, gc_list_callback_t callba return 0; } +static void gc_list_return_to_local(PyGC_Head *list) { + struct _gc_runtime_state* gc_state = get_gc_state(); + // Use `old[0]` here, we are setting the visited space to 0 in add_visited_set(). + gc_list_merge(list, &(gc_state->old[0].head)); +} + static int _gc_region_list_dissolve_callback(Py_region_t region, void* _ignore) { PyObject* obj = _PyRegion_GetBridge(region); // Bump LRC for the reference which was previously owning this @@ -305,10 +311,7 @@ static int _gc_region_list_dissolve_callback(Py_region_t region, void* _ignore) static void gc_region_list_dissolve(PyGC_Head *list) { gc_list_for_each_subregion(list, (gc_list_callback_t)_gc_region_list_dissolve_callback, NULL); - - struct _gc_runtime_state* gc_state = get_gc_state(); - // Use `old[0]` here, we are setting the visited space to 0 in add_visited_set(). - gc_list_merge(list, &(gc_state->old[0].head)); + gc_list_return_to_local(list); } // ********************************************************************** @@ -345,6 +348,7 @@ static Py_region_t regiondata_new(void) { } gc_list_init(&data->gc_list); + gc_list_init(&data->unreachable); data->rc = 1; return (Py_region_t)data; } @@ -559,6 +563,7 @@ static int regiondata_union_merge( // Do a region merge, which keeps the bridge objects at the start // of the list and the contained objects at the end gc_region_list_merge(&source_data->gc_list, &target_data->gc_list); + gc_list_merge(&source_data->unreachable, &target_data->unreachable); // Check how the `open_tick` should be updated if (target_data->open_tick == OPEN_TICK_CLOSED) { @@ -586,6 +591,7 @@ static int regiondata_union_merge( // The function below also bumps the LRC of the sub-regions // meaning this should be all covered now. gc_region_list_dissolve(&(source_data->gc_list)); + gc_list_return_to_local(&(source_data->unreachable)); } else { trace("%lx: Merging %lx into %lx", source, source, target); } @@ -597,6 +603,7 @@ static int regiondata_union_merge( source_data->open_tick = OPEN_TICK_CLOSED; assert(gc_list_is_empty(&source_data->gc_list)); + assert(gc_list_is_empty(&source_data->unreachable)); // Skip the error label and run the normal cleanup code goto cleanup; From 7f1fdabc1275958fbefa87579c87e27937734341 Mon Sep 17 00:00:00 2001 From: David Klement Date: Fri, 24 Apr 2026 20:29:10 +0200 Subject: [PATCH 11/15] Regions GC: GIL release --- Python/gc.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 4489fe37bbb9bfd..9cede58e09f807a 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -2036,19 +2036,41 @@ region_handle_garbage(PyThreadState *tstate, static void gc_collect_region_tree(PyThreadState *tstate, Py_region_t root_id, + _PyCownObject *cown, struct gc_collection_stats *stats) { assert(!_PyErr_Occurred(tstate)); _PyRegionObject *root = _Py_region_data_CAST(root_id)->bridge; - // TODO(regions-gc): Drop the GIL if the region is closed. + /* If the region is closed, nobody can interfere with unreachable + * object identification, and this section can run without the GIL. + * To prevent the current interpreter from opening the region, + * we switch the cown's owner to a special owner representing the GC. + */ + bool release_gil = cown != NULL && !_PyRegion_IsOpen(root_id); + PyThreadState *_save; + if (release_gil) { + Py_region_t contained; + int res = _PyCown_SwitchFromIpToGc(cown, &contained); + assert(res == 0); + assert(contained == root_id); + Py_UNBLOCK_THREADS + } region_list_build_dfs(root); for (_PyRegionObject *curr = root; curr != NULL; curr = curr->next) { region_extract_unreachable(_PyRegion_Get(curr)); - /* Create an artificial local reference to the bridge to: - * 1. ensure it will not go away, - * 2. prevent Python code from sharing it with other interpreters. - */ + } + if (release_gil) { + Py_BLOCK_THREADS + int res = _PyCown_SwitchFromGcToIp(cown); + assert(res == 0); + } + + /* Create artificial local references to the bridges to: + * 1. ensure they will not go away, + * 2. prevent Python code from sharing them with other interpreters. + */ + for (_PyRegionObject *curr = root; curr != NULL; curr = curr->next) { PyRegion_NewRef(curr); } @@ -2382,9 +2404,11 @@ _PyGC_CollectNoFail(PyThreadState *tstate) Py_ssize_t _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason) { + _PyCownObject *cown = NULL; // We accept cowns to allow passing a closed region. if (Py_IS_TYPE(region, &_PyCown_Type)) { - PyObject *value = _PyCown_GetValue(_PyCownObject_CAST(region)); + cown = _PyCownObject_CAST(region); + PyObject *value = _PyCown_GetValue(cown); if (value == NULL) { goto error; } @@ -2406,7 +2430,7 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason struct gc_collection_stats stats = { 0 }; Py_region_t region_id = _PyRegion_Get(region); PyObject *exc = _PyErr_GetRaisedException(tstate); - gc_collect_region_tree(tstate, region_id, &stats); + gc_collect_region_tree(tstate, region_id, cown, &stats); _PyErr_SetRaisedException(tstate, exc); return stats.collected; From a430c2c37b59ffbf1a88d998e8e184c7c952ddc5 Mon Sep 17 00:00:00 2001 From: David Klement Date: Mon, 4 May 2026 13:47:51 +0200 Subject: [PATCH 12/15] Cowns: Simplify GC functions --- Include/internal/pycore_cown.h | 4 +- Objects/cownobject.c | 74 ++++------------------------------ Python/gc.c | 4 +- 3 files changed, 9 insertions(+), 73 deletions(-) diff --git a/Include/internal/pycore_cown.h b/Include/internal/pycore_cown.h index dc2caac4fcdc0bf..ca66ca5d35edc2d 100644 --- a/Include/internal/pycore_cown.h +++ b/Include/internal/pycore_cown.h @@ -27,10 +27,8 @@ PyAPI_FUNC(int) _PyCown_SetValue(_PyCownObject* self, PyObject* value); PyAPI_FUNC(_PyCown_ipid_t) _PyCown_ThisInterpreterId(void); PyAPI_FUNC(_PyCown_thread_id_t) _PyCown_ThisThreadId(void); PyAPI_FUNC(int) _PyCown_RegionOpen(_PyCownObject *self, _PyRegionObject* region, _PyCown_ipid_t ip); -PyAPI_FUNC(int) _PyCown_AcquireGC(_PyCownObject *self, Py_region_t *region); PyAPI_FUNC(int) _PyCown_SwitchFromGcToIp(_PyCownObject *self); -PyAPI_FUNC(int) _PyCown_SwitchFromIpToGc(_PyCownObject *self, Py_region_t *contained_region); -PyAPI_FUNC(int) _PyCown_ReleaseGC(_PyCownObject *self); +PyAPI_FUNC(int) _PyCown_SwitchFromIpToGc(_PyCownObject *self); #ifdef __cplusplus diff --git a/Objects/cownobject.c b/Objects/cownobject.c index 00dd54484d258e0..a0036e17bf91ecf 100644 --- a/Objects/cownobject.c +++ b/Objects/cownobject.c @@ -619,84 +619,24 @@ PyTypeObject _PyCown_Type = { .tp_flags2 = Py_TPFLAGS2_REGION_AWARE }; -/* This acquires the current cown for the GC. The cown returns a borrowed - * reference to the contained region via the `region` argument. - * - * Possible returns: - * (-1): Indicates a error state. (This should never happen). - * (0): the acquisition failed, probably because a different thread - * acquired the cown first. - * (1): The cown was acquired and the `region` argument was updated. The - * cown needs to be manually released via `_PyCown_ReleaseGC`. - */ -int _PyCown_AcquireGC(_PyCownObject *self, Py_region_t *region) { - // Attempt to lock the cown - int res = cown_lock(self, NO_BLOCKING_TIMEOUT, GC_IPID, false); - if (res == COWN_ACQUIRE_ERROR) { - return -1; - } - - // The cown was snatched up by something else. This is fine for - // the GC - if (res == COWN_ACQUIRE_FAIL) { - return 0; - } - assert(res == COWN_ACQUIRE_SUCCESS); - - // This accesses the value directly, to keep a potential region closed - *region = _PyRegion_Get(self->value); - return 1; -} - int _PyCown_SwitchFromGcToIp(_PyCownObject *self) { - BAIL_UNLESS_OWNED_BY(self, GC_IPID, -1); - _PyCown_ipid_t ipid = _PyCown_ThisInterpreterId(); _PyCown_ipid_t gcid = GC_IPID; - if (!_Py_atomic_compare_exchange_uint64(&self->owning_ip, &gcid, ipid)) { - return -1; - } + BAIL_UNLESS_OWNED_BY(self, gcid, -1); - return 0; -} - -static int cown_switch_to_gc_unchecked(_PyCownObject *self, _PyCown_ipid_t ipid, Py_region_t *contained_region) { - if (!_Py_atomic_compare_exchange_uint64(&self->owning_ip, &ipid, GC_IPID)) { + if (!_Py_atomic_compare_exchange_uint64(&self->owning_ip, &gcid, ipid)) { return -1; } - *contained_region = _PyRegion_Get(self->value); return 0; } -int _PyCown_SwitchFromIpToGc(_PyCownObject *self, Py_region_t *contained_region) { +int _PyCown_SwitchFromIpToGc(_PyCownObject *self) { _PyCown_ipid_t ipid = _PyCown_ThisInterpreterId(); - *contained_region = NULL_REGION; - if (cown_check_owner_before_release(self, ipid) < 0) { - return -1; - } - - if (cown_is_value_cown_or_immutable(self)) { - // Can be switched without any restrictions - return cown_switch_to_gc_unchecked(self, ipid, contained_region); - } - assert(_PyRegion_Get(self->value) != _Py_LOCAL_REGION); + _PyCown_ipid_t gcid = GC_IPID; + BAIL_UNLESS_OWNED_BY(self, ipid, -1); - int clean_res = cown_try_closing_region(self); - if (clean_res < 0) { + if (!_Py_atomic_compare_exchange_uint64(&self->owning_ip, &ipid, gcid)) { return -1; } - if (clean_res == 1) { - // The region is still open, and we won't be able to release the cown. - // After GC, the cown will still be owned by the current interpreter. - // Nobody expects this. - // Replace the cown's value with an exception. - // FIXME(cowns): exceptions cannot yet be frozen, setting None for now - cown_set_value_unchecked(self, Py_None); - } - // Region is closed, safe to switch - return cown_switch_to_gc_unchecked(self, ipid, contained_region); -} - -int _PyCown_ReleaseGC(_PyCownObject *self) { - return cown_release(self, GC_IPID); + return 0; } diff --git a/Python/gc.c b/Python/gc.c index 9cede58e09f807a..7b1f94521bded0c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -2050,10 +2050,8 @@ gc_collect_region_tree(PyThreadState *tstate, bool release_gil = cown != NULL && !_PyRegion_IsOpen(root_id); PyThreadState *_save; if (release_gil) { - Py_region_t contained; - int res = _PyCown_SwitchFromIpToGc(cown, &contained); + int res = _PyCown_SwitchFromIpToGc(cown); assert(res == 0); - assert(contained == root_id); Py_UNBLOCK_THREADS } region_list_build_dfs(root); From 80fe7480dea7068638e7401d00968bad82863c2b Mon Sep 17 00:00:00 2001 From: David Klement Date: Mon, 4 May 2026 13:54:41 +0200 Subject: [PATCH 13/15] Regions GC: Scheduling on release --- Include/internal/pycore_cown.h | 1 + Include/internal/pycore_gc.h | 2 + Include/internal/pycore_interp_structs.h | 3 + Lib/test/test_regions/test_gc.py | 111 ++++++++++++++++++++--- Objects/cownobject.c | 53 +++++++++++ Python/gc.c | 42 ++++++++- 6 files changed, 194 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_cown.h b/Include/internal/pycore_cown.h index ca66ca5d35edc2d..64b8eee2c1710b1 100644 --- a/Include/internal/pycore_cown.h +++ b/Include/internal/pycore_cown.h @@ -27,6 +27,7 @@ PyAPI_FUNC(int) _PyCown_SetValue(_PyCownObject* self, PyObject* value); PyAPI_FUNC(_PyCown_ipid_t) _PyCown_ThisInterpreterId(void); PyAPI_FUNC(_PyCown_thread_id_t) _PyCown_ThisThreadId(void); PyAPI_FUNC(int) _PyCown_RegionOpen(_PyCownObject *self, _PyRegionObject* region, _PyCown_ipid_t ip); +PyAPI_FUNC(void) _PyCown_SetCollecting(_PyCownObject *self, int value); PyAPI_FUNC(int) _PyCown_SwitchFromGcToIp(_PyCownObject *self); PyAPI_FUNC(int) _PyCown_SwitchFromIpToGc(_PyCownObject *self); diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 3e841306dd78387..8a8ac14a8d99edd 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -339,6 +339,8 @@ extern void _PyGC_InitState(struct _gc_runtime_state *); extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason); extern void _PyGC_CollectNoFail(PyThreadState *tstate); extern Py_ssize_t _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason); +extern void _PyGC_IncreaseRegionBudget(PyThreadState *tstate); +extern bool _PyGC_CanRunRegionGC(PyThreadState *tstate); /* Freeze objects tracked by the GC and ignore them in future collections. */ extern void _PyGC_Freeze(PyInterpreterState *interp); diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index d67a9fcaf9a72b7..c8bf4c98e089818 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -227,6 +227,9 @@ struct _gc_runtime_state { int visited_space; int phase; + /* How many objects in regions can be collected within this cycle */ + Py_ssize_t region_budget; + #ifdef Py_GIL_DISABLED /* This is the number of objects that survived the last full collection. It approximates the number of long lived objects diff --git a/Lib/test/test_regions/test_gc.py b/Lib/test/test_regions/test_gc.py index cde7dc1c710e397..865bda3e9e8e7cb 100644 --- a/Lib/test/test_regions/test_gc.py +++ b/Lib/test/test_regions/test_gc.py @@ -1,17 +1,59 @@ +import gc +import test.support import unittest +from immutable import InterpreterLocal from regions import Cown, Region -import gc + class TestRegionGC(unittest.TestCase): class A: pass + class Resurrectable: + def __init__(self, data): + self.data = data + + def __del__(self): + self.data["counter"] += 1 + self.data["instance"] = self + + class GcDetector: + def __init__(self, data): + self.data = data + self.loop = self + + def __del__(self): + self.data["counter"] += 1 + + class CownReleaser: + def __init__(self, to_release): + self.to_release = to_release + self.loop = self + + def __del__(self): + self.to_release.release() + + class RegionOpener: + def __init__(self, iplocal): + self.iplocal = iplocal + self.loop = self + + def __del__(self): + self.iplocal.set(self) + def setUp(self): # Need to run collection multiple times to clean up region chains while gc.collect() > 0: pass + def tearDown(self): + gc.disable() + + def trigger_local_gc(self): + # Threshold was set to 20 in setUpModule + l = [object() for _ in range(20)] + def build_cycle(self): a = self.A() a.b = self.A() @@ -24,6 +66,13 @@ def build_region_with_unreachable_cycle(self): r.a = None return r + def build_detector_cown(self): + r = Region() + r.data = {"counter": 0} + r.detector = self.GcDetector(r.data) + r.detector = None + return Cown(r) + def test_local_gc_ignores_regions(self): r = Region() @@ -95,18 +144,10 @@ def test_collect_unreachable_child_region(self): self.assertEqual(gc.collect(), 0) def test_finalizer(self): - class Resurrectable: - def __init__(self, data): - self.data = data - - def __del__(self): - self.data["counter"] += 1 - self.data["instance"] = self - r = Region() r.data = {"counter": 0, "instance": None} r.a = self.build_cycle() - r.a.f = Resurrectable(r.data) + r.a.resurrectable = self.Resurrectable(r.data) r.a = None # The cycle should be collected @@ -119,23 +160,63 @@ def __del__(self): r.data["instance"] = None self.assertEqual(r.data["counter"], 1) - # TODO(regions-gc): test that region GC is triggered, but not when disabled - # TODO(regions-gc): GC callbacks - # TODO(regions-gc): weakrefs + def test_release_while_gc(self): + r = Region() + cown = Cown(r) + r.releaser = self.CownReleaser(cown) + r.releaser = None + r = None + + # Releasing a garbage collected cown should fail + with test.support.catch_unraisable_exception() as cm: + gc.collect_region(cown) + self.assertIs(cm.unraisable.exc_type, RuntimeError) + + def test_collection_triggered(self): + gc.enable() + cown = self.build_detector_cown() + self.trigger_local_gc() + cown.release() + cown.acquire() + + # The cown should have been collected on release + self.assertEqual(cown.value.data["counter"], 1) + + def test_region_left_open(self): + gc.enable() + r = Region() + cown = Cown(r) + # Using InterpreterLocal to create a local reference + r.iplocal = InterpreterLocal(None) + r.opener = self.RegionOpener(r.iplocal) + r.opener = None + r = None + self.trigger_local_gc() + + with test.support.catch_unraisable_exception() as cm: + cown.release() + cown.acquire() + # The cown could not have been released. + # That should have triggered an unraisable exception + # and replaced the cown value with None to indicate an error. + self.assertIs(cm.unraisable.exc_type, RuntimeError) + self.assertIs(cown.value, None) def setUpModule(): - global enabled, debug + global enabled, debug, threshold enabled = gc.isenabled() debug = gc.get_debug() + threshold = gc.get_threshold() gc.disable() gc.set_debug(debug & ~gc.DEBUG_LEAK) + gc.set_threshold(20, 10, 10) def tearDownModule(): gc.set_debug(debug) gc.enable() if enabled else gc.disable() - + gc.set_threshold(*threshold) if __name__ == "__main__": unittest.main() diff --git a/Objects/cownobject.c b/Objects/cownobject.c index a0036e17bf91ecf..469d9f9b616440b 100644 --- a/Objects/cownobject.c +++ b/Objects/cownobject.c @@ -57,6 +57,9 @@ struct _PyCownObject { * Therefore, we are responsible for releasing and acquireing the GIL. */ PyMutex lock; + + /* Whether this cown is currently being collected by the region GC. */ + int collecting; }; static _PyCown_ipid_t cown_get_owner(_PyCownObject *obj) { @@ -431,6 +434,13 @@ static int cown_release(_PyCownObject *self, _PyCown_ipid_t unlocking_ip) { return -1; } + if (self->collecting) { + PyErr_Format( + PyExc_RuntimeError, + "the cown can't be released, since it is currently being garbage collected"); + return -1; + } + if (cown_is_value_cown_or_immutable(self)) { // Can be released without any restrictions return cown_release_unchecked(self, unlocking_ip); @@ -451,11 +461,50 @@ static int cown_release(_PyCownObject *self, _PyCown_ipid_t unlocking_ip) { return cown_release_unchecked(self, unlocking_ip); } +/* Release the cown after performing garbage collection. */ +static void cown_gc_release(_PyCownObject *self, _PyCown_ipid_t this_ip) { + if (cown_release(self, this_ip) == 0) { + return; + } + // Cannot release the cown, likely because the region has been opened. + // Nobody expects this cown to suddently be acquired. + // Replace the cown's value. + // FIXME(cowns): Replace with an exception once they can be frozen. + PyErr_FormatUnraisable("Exception ignored while garbage collecting a cown"); + cown_set_value_unchecked(self, Py_None); + // Should be able to release now. + int res = cown_release(self, this_ip); + assert(res == 0); +} + +/* Collect the region tree inside the cown, + * but only if we have budget to do so and the cown is released. + */ +static void cown_try_collect(_PyCownObject *self, _PyCown_ipid_t this_ip) { + if (cown_is_value_cown_or_immutable(self)) { + // Not a region. + return; + } + PyThreadState *tstate = PyThreadState_Get(); + if (!_PyGC_CanRunRegionGC(tstate)) { + return; + } + + // Lock without blocking, we only want the cown if it is released. + int res = cown_lock(self, NO_BLOCKING_TIMEOUT, this_ip, true); + if (res != COWN_ACQUIRE_SUCCESS) { + return; + } + _PyGC_CollectRegion(tstate, _PyObject_CAST(self), _Py_GC_REASON_HEAP); + cown_gc_release(self, this_ip); +} + static PyObject* CownObject_release(_PyCownObject *self, PyObject *ignored) { _PyCown_ipid_t this_ip = _PyCown_ThisInterpreterId(); if (cown_release(self, this_ip) < 0) { return NULL; } + cown_try_collect(self, this_ip); Py_RETURN_NONE; } @@ -619,6 +668,10 @@ PyTypeObject _PyCown_Type = { .tp_flags2 = Py_TPFLAGS2_REGION_AWARE }; +void _PyCown_SetCollecting(_PyCownObject *self, int value) { + self->collecting = value; +} + int _PyCown_SwitchFromGcToIp(_PyCownObject *self) { _PyCown_ipid_t ipid = _PyCown_ThisInterpreterId(); _PyCown_ipid_t gcid = GC_IPID; diff --git a/Python/gc.c b/Python/gc.c index 7b1f94521bded0c..67cfb51abfade49 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1944,7 +1944,7 @@ gc_region_list_split(PyGC_Head *list, PyGC_Head *contained) * This function can run without the GIL if the region is closed. */ static void -region_extract_unreachable(Py_region_t region_id) +region_extract_unreachable(Py_region_t region_id, Py_ssize_t *object_count) { _Py_region_data *data = _Py_region_data_CAST(region_id); @@ -1966,6 +1966,9 @@ region_extract_unreachable(Py_region_t region_id) gc_list_merge(&contained, &data->gc_list); /* Save the unreachable objects. */ gc_list_merge(&unreachable, &data->unreachable); + + /* Estimate number of objects using the rc.*/ + *object_count += data->rc; } static void @@ -2056,7 +2059,8 @@ gc_collect_region_tree(PyThreadState *tstate, } region_list_build_dfs(root); for (_PyRegionObject *curr = root; curr != NULL; curr = curr->next) { - region_extract_unreachable(_PyRegion_Get(curr)); + // Abusing stats.uncollectable to count objects in the region tree. + region_extract_unreachable(_PyRegion_Get(curr), &stats->uncollectable); } if (release_gil) { Py_BLOCK_THREADS @@ -2420,16 +2424,28 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason } GCState *gcstate = &tstate->interp->gc; - // TODO(regions-gc): gc callback + // TODO(regions): GC callback if (gcstate->debug & _PyGC_DEBUG_STATS) { debug_region_collection("collecting region tree with root", _PyRegionObject_CAST(region)); } struct gc_collection_stats stats = { 0 }; Py_region_t region_id = _PyRegion_Get(region); + + if (cown != NULL) { + // Ensure the cown will not be released. + _PyCown_SetCollecting(cown, 1); + } PyObject *exc = _PyErr_GetRaisedException(tstate); gc_collect_region_tree(tstate, region_id, cown, &stats); _PyErr_SetRaisedException(tstate, exc); + if (cown != NULL) { + _PyCown_SetCollecting(cown, 0); + } + if (reason == _Py_GC_REASON_HEAP) { + // uncollectable is used to count the objects in the region tree. + gcstate->region_budget -= stats.uncollectable; + } return stats.collected; error: @@ -2438,6 +2454,25 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason return -1; } +void +_PyGC_IncreaseRegionBudget(PyThreadState *tstate) +{ + GCState *gcstate = &tstate->interp->gc; + /* From assess_work_to_do: + * For a steady state heap, the amount of work to do is three times the number + * of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. + */ + gcstate->region_budget = gcstate->young.threshold * 3; +} + +bool +_PyGC_CanRunRegionGC(PyThreadState *tstate) +{ + GCState *gcstate = &tstate->interp->gc; + return gcstate->enabled && gcstate->region_budget > 0; +} + void _PyGC_DumpShutdownStats(PyInterpreterState *interp) { @@ -2614,6 +2649,7 @@ _Py_RunGC(PyThreadState *tstate) { if (tstate->interp->gc.enabled) { _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP); + _PyGC_IncreaseRegionBudget(tstate); } } From 04243bbfdab310b70db3c1c09add5c789c5bbb2a Mon Sep 17 00:00:00 2001 From: David Klement Date: Wed, 20 May 2026 13:52:20 +0200 Subject: [PATCH 14/15] Regions GC: Increase budget when moving an object --- Lib/test/test_regions/test_gc.py | 7 ++----- Python/gc.c | 7 +++++-- Python/region.c | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_regions/test_gc.py b/Lib/test/test_regions/test_gc.py index 865bda3e9e8e7cb..73fd2663c87c4e9 100644 --- a/Lib/test/test_regions/test_gc.py +++ b/Lib/test/test_regions/test_gc.py @@ -50,9 +50,6 @@ def setUp(self): def tearDown(self): gc.disable() - def trigger_local_gc(self): - # Threshold was set to 20 in setUpModule - l = [object() for _ in range(20)] def build_cycle(self): a = self.A() @@ -175,7 +172,7 @@ def test_release_while_gc(self): def test_collection_triggered(self): gc.enable() cown = self.build_detector_cown() - self.trigger_local_gc() + # Assuming that the budget was increased sufficiently. cown.release() cown.acquire() @@ -191,9 +188,9 @@ def test_region_left_open(self): r.opener = self.RegionOpener(r.iplocal) r.opener = None r = None - self.trigger_local_gc() with test.support.catch_unraisable_exception() as cm: + # Assuming that the budget was increased sufficiently. cown.release() cown.acquire() # The cown could not have been released. diff --git a/Python/gc.c b/Python/gc.c index 67cfb51abfade49..2f99c464964c896 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -2454,6 +2454,7 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason return -1; } +/* Called when an object is added to a region. */ void _PyGC_IncreaseRegionBudget(PyThreadState *tstate) { @@ -2463,7 +2464,10 @@ _PyGC_IncreaseRegionBudget(PyThreadState *tstate) * of new objects added to the heap. This ensures that we stay ahead in the * worst case of all new objects being garbage. */ - gcstate->region_budget = gcstate->young.threshold * 3; + gcstate->region_budget += 3; + if (gcstate->region_budget > gcstate->young.threshold) { + gcstate->region_budget = gcstate->young.threshold; + } } bool @@ -2649,7 +2653,6 @@ _Py_RunGC(PyThreadState *tstate) { if (tstate->interp->gc.enabled) { _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP); - _PyGC_IncreaseRegionBudget(tstate); } } diff --git a/Python/region.c b/Python/region.c index 384361ab7cae6e4..096112144db6810 100644 --- a/Python/region.c +++ b/Python/region.c @@ -1287,6 +1287,7 @@ static void _PyRegion_SetMoveGC(PyObject* obj, Py_region_t new_region) { gc_clear_collecting(_Py_AS_GC(obj)); gc_set_old_space(_Py_AS_GC(obj), 0); gc_list_move(_Py_AS_GC(obj), &data->gc_list); + _PyGC_IncreaseRegionBudget(PyThreadState_Get()); } else if (IS_LOCAL_REGION(new_region)) { // Objects can't be moved from regions into the local region via // `_PyRegion_SetMoveGC`. Dissolve will always merge the entire list From 22db1e57fda903be66e75d88c6ba57c7073a5e64 Mon Sep 17 00:00:00 2001 From: David Klement Date: Fri, 12 Jun 2026 11:15:54 +0200 Subject: [PATCH 15/15] Regions GC: Write barriers --- Python/gc.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 2f99c464964c896..67d5200db0ed057 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1188,6 +1188,9 @@ finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) (finalize = Py_TYPE(op)->tp_finalize) != NULL) { _PyGC_SET_FINALIZED(op); + // Regions: No barrier needed. + // If called from the local GC, op is local. + // If called from the region GC, the region is already open. Py_INCREF(op); finalize(op); assert(!_PyErr_Occurred(tstate)); @@ -1223,6 +1226,9 @@ delete_garbage(PyThreadState *tstate, GCState *gcstate, else { inquiry clear; if ((clear = Py_TYPE(op)->tp_clear) != NULL) { + // Regions: No barrier needed. + // If called from the local GC, op is local. + // If called from the region GC, the region is already open. Py_INCREF(op); // TODO(Immutable): This is only required until we have the SCC support working. _Py_CLEAR_IMMUTABLE(op); @@ -1909,6 +1915,28 @@ region_list_build_dfs(_PyRegionObject *root) } } +/* Create artificial local references to the bridges. */ +static int +region_list_add_local_refs(_PyRegionObject *root) { + _PyRegionObject *revert_end = NULL; + for (_PyRegionObject *curr = root; curr != NULL; curr = curr->next) { + if (PyRegion_AddLocalRef(curr)) { + revert_end = curr; + break; + } + Py_INCREF(curr); + } + if (revert_end == NULL) { + return 0; + } + // AddLocalRef failed, revert the changes. + for (_PyRegionObject *curr = root; curr != revert_end; curr = curr->next) { + PyRegion_RemoveLocalRef(curr); + Py_DECREF(curr); + } + return 1; +} + static void gc_region_list_split(PyGC_Head *list, PyGC_Head *contained) { @@ -2072,8 +2100,9 @@ gc_collect_region_tree(PyThreadState *tstate, * 1. ensure they will not go away, * 2. prevent Python code from sharing them with other interpreters. */ - for (_PyRegionObject *curr = root; curr != NULL; curr = curr->next) { - PyRegion_NewRef(curr); + if (region_list_add_local_refs(root)) { + PyErr_Clear(); + return; } /* This runs Python code, which can change the region topology. @@ -2130,6 +2159,9 @@ do_gc_callback(GCState *gcstate, const char *phase, PyObject *stack[] = {phase_obj, info}; for (Py_ssize_t i=0; icallbacks); i++) { PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); + if (PyRegion_AddLocalRef(cb)) { + continue; + } Py_INCREF(cb); /* make sure cb doesn't go away */ r = PyObject_Vectorcall(cb, stack, 2, NULL); if (r == NULL) { @@ -2137,8 +2169,10 @@ do_gc_callback(GCState *gcstate, const char *phase, "calling GC callback %R", cb); } else { + PyRegion_RemoveLocalRef(r); Py_DECREF(r); } + PyRegion_RemoveLocalRef(cb); Py_DECREF(cb); } Py_DECREF(phase_obj); @@ -2415,6 +2449,8 @@ _PyGC_CollectRegion(PyThreadState *tstate, PyObject *region, _PyGC_Reason reason goto error; } region = value; + // Remove the reference to allow gc_collect_region_tree to + // release the GIL if the region is closed. PyRegion_RemoveLocalRef(value); Py_DECREF(value); } @@ -2800,6 +2836,8 @@ visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen gc_list = &gen->head; for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) { PyObject *op = FROM_GC(gc); + // TODO(regions): If callback moves op into a region, + // this function would start iterating objects in the region. Py_INCREF(op); int res = callback(op, arg); Py_DECREF(op);