diff --git a/src/underworld3/cython/petsc_generic_snes_solvers.pyx b/src/underworld3/cython/petsc_generic_snes_solvers.pyx index 4848c719..ee30c5d1 100644 --- a/src/underworld3/cython/petsc_generic_snes_solvers.pyx +++ b/src/underworld3/cython/petsc_generic_snes_solvers.pyx @@ -1008,6 +1008,8 @@ class SolverBaseClass(uw_object): self._stokes_nullspace = None if hasattr(self, "_stokes_nullspace_basis"): self._stokes_nullspace_basis = () + if hasattr(self, "_velocity_rotation_nullspace"): + self._velocity_rotation_nullspace = None if hasattr(self, "_constant_nullspace_obj"): self._constant_nullspace_obj = None @@ -2168,6 +2170,45 @@ class SNES_Scalar(SolverBaseClass): cdef DS ds = self.dm.getDS() cdef PtrContainer ext = self.compiled_extensions + # TODO(BUG): natural BC on an INTERNAL surface is partition-dependent. + # An interior facet has two support cells; the FE-assembly DM is kept + # non-overlapped on purpose (overlap double-counts volume assembly via + # LocalToGlobal+ADD — see discretisation_mesh.py ~L760). At a partition + # seam ~few interior facets have only ONE local support cell, and PETSc's + # DMPlexComputeBdResidual_Single_Internal attributes the whole per-facet + # elemVec to support[0]'s cell closure (plexfem.c ~L4928) with the facet + # normal oriented outward from support[0] (dmfieldds.c ~L790). When the + # local-only support cell is the OPPOSITE side from the serial support[0], + # the integral is the same value but is scattered through a different + # cell closure; closure DOFs that are non-owned on this rank are added + # locally then dropped at global assembly → the assembled load UNDER-counts + # by ~0.027% (the force *function* integral / RMS is identical to machine + # precision). In an ill-conditioned solve (augmented Stokes_Constrained) + # this amplifies to ~0.1% velocity. + # + # Three cheap workarounds were TESTED and do NOT work: + # * editing the boundary LABEL (owner-only / ghost-strip): no-op — the + # ghost facet copies already contribute nothing (PETSc integrates each + # facet once), so stripping them is bit-identical; + # * a 1-cell partition OVERLAP on the assembly DM: no-op — verified the + # overlap propagates (+ghost cells) yet F0 is bit-identical, so overlap + # does NOT recover the dropped DOFs (and it double-counts volume anyway); + # * assembling the surface load on a codim-1 SUBMESH (extract_surface): + # blocked — UW3 FE on an embedded surface fails at setup because the + # gradient/flux term has cdim components but is reshaped to dim + # (the manifold vector-FE gap). + # A deterministic support[0] cannot be imposed from UW3 (support order + # follows the DMPlex point partition). Genuine fixes, all substantial: + # (a) MANUALLY assemble the boundary load (per-facet ∫ f·φ keyed by GLOBAL + # velocity DOF, each facet once) into a partition-independent vector + # and inject it via a guarded SNES function wrap — reimplements the bd + # residual but sidesteps PETSc's support[0] scatter; + # (b) fix UW3 manifold vector-FE, then assemble the load on the surface + # submesh and map to the parent by coincident-node coordinate; + # (c) a PETSc patch making the interior-facet bd residual scatter to the + # OWNED support cell. + # See planning file (underworld.md, Bugs) and + # project_stokes_constrained_parallel_session. for index,bc in enumerate(self.natural_bcs): components = bc.components @@ -4532,7 +4573,11 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): # interior h DOFs directly in the fine local PetscSection # (_constrain_interior_multipliers_in_section) — lossless (correct # constraint-boundary closure) and refined/FMG-safe (no DMAddBoundary - # ordering restriction). Default ON. + # ordering restriction). Default ON: it is both a correctness-neutral DOF + # reduction AND a conditioning win — leaving the interior h DOFs in place + # keeps the near-singular ε-screened interior block in the solved system, + # which an iterative ([p,h] Schur) solve handles poorly. See that method's + # docstring for why disabling it is ill-conditioned, not "more accurate". self._reduce_interior_multiplier = True self._degree = degree @@ -4654,6 +4699,7 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): self._petsc_velocity_nullspace_basis = () self._stokes_nullspace = None self._stokes_nullspace_basis = () + self._velocity_rotation_nullspace = None # Construct strainrate tensor for future usage. # Grab gradients, and let's switch out to sympy.Matrix notation @@ -5295,6 +5341,10 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): def _reset_stokes_nullspace(self): self._stokes_nullspace = None self._stokes_nullspace_basis = () + # the cached velocity rotation nullspace is built from node coordinates, + # so it must be invalidated whenever the nullspace config / geometry + # changes (e.g. mesh deformation) — see _build_velocity_rotation_nullspace. + self._velocity_rotation_nullspace = None def _pressure_dirichlet_bcs(self): """Return essential boundary conditions applied to the pressure field.""" @@ -5515,6 +5565,79 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): flush=True, ) + def _build_velocity_rotation_nullspace(self): + """Cache a velocity-rotation-only ``MatNullSpace`` (zero in pressure / + multiplier blocks). Used to project the rigid-rotation gauge out of the + converged solution — see ``_remove_velocity_rotation_gauge``. Cached; + the cache is invalidated by ``_reset_stokes_nullspace`` / solver rebuild + because the modes depend on node coordinates.""" + if self._velocity_rotation_nullspace is not None: + return self._velocity_rotation_nullspace + if not self._petsc_velocity_nullspace_basis: + return None + + vecs = [self._build_velocity_nullspace_vector(m) + for m in self._petsc_velocity_nullspace_basis] + orthonormal = [] + for bvec in vecs: + for ovec in orthonormal: + bvec.axpy(-ovec.dot(bvec), ovec) + bnorm = bvec.norm() + if np.isclose(bnorm, 0.0): + bvec.destroy() + continue + bvec.scale(1.0 / bnorm) + orthonormal.append(bvec) + if not orthonormal: + return None + + self._velocity_rotation_nullspace = PETSc.NullSpace().create( + constant=False, vectors=tuple(orthonormal), comm=self.dm.comm) + return self._velocity_rotation_nullspace + + def _remove_velocity_rotation_gauge(self, gvec): + """Project the rigid-body rotation gauge out of the converged solution. + + For a free-slip (non-Dirichlet) velocity, the rigid rotations are a true + nullspace of the velocity block (A_uu·rotation = 0). The monolithic + nullspace attached for the solve is NOT removed inside a fieldsplit/Schur + iteration — the inner velocity KSP has no rotation nullspace, so it leaves + an unconstrained rigid rotation in the velocity. Because the operator is + blind to it, the true residual still converges to machine precision, but + the rotation amplitude is PARTITION-DEPENDENT (the tangential velocity then + differs serial-vs-parallel by ~0.1-1% while the physical / radial flow is + partition-clean to round-off). Since the rotation is a genuine nullspace, + removing it from the converged ``gvec`` does NOT change the residual and + yields the same rotation-free solution on any decomposition. This is the + velocity analogue of the constant-pressure gauge (see set_pressure_gauge). + No-op when there are no velocity rotation modes. + + Why a post-solve projection rather than attaching the rotation nullspace + to the fieldsplit velocity SUB-BLOCK so the inner KSP removes it in-solve: + the in-solve route was implemented and MEASURED, and it does not fix the + gauge. A ``DMSetNullSpaceConstructor(dm, 0, ...)`` (rotations-only, built + with ``DMPlexCreateRigidBody``) DOES attach the rotation nullspace to the + fieldsplit velocity block — verified: the velocity sub-block operator + carries the 1 (2-D) / 3 (3-D) rotation modes after PC setup — yet the + converged GLOBAL velocity still retained the full rotation gauge (rotation + coefficient ~0.18, i.e. unchanged). Removing a nullspace gauge is a + projection on the converged GLOBAL solution; attaching the nullspace to a + sub-block operator only constrains the inner Krylov solves, and with + ``fgmres`` + a variable (fieldsplit/Schur) preconditioner that reintroduces + rotation, the assembled outer solution is not gauge-free. (PETSc also + dispatches ``DMCreateSubDM`` nullspace constructors by SUB-DM-LOCAL field + index, so a field-0 constructor needs a block-sniffing guard to avoid + leaking onto the ``[p,h]`` Schur block — but even with that, the global + gauge persists.) So the explicit post-solve projection is the correct AND + necessary mechanism, not a stopgap: it is mathematically exact (the rotation + is a true nullspace, so it does not change the residual) and robust to + operator rebuild. Building only the rotation modes (zero in the pressure / + multiplier blocks) keeps it from touching the pressure/multiplier gauge, + which is handled by the monolithic nullspace.""" + rot_ns = self._build_velocity_rotation_nullspace() + if rot_ns is not None: + rot_ns.remove(gvec) + ## F0, F1 should be f0 and F1, (pf0 for Saddles can be added here) ## don't add new ones uf0, uF1 are redundant @@ -6224,6 +6347,28 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): appends the local offset of every h-bearing point regardless of constraint, so the copy back into the full-domain ``h`` MeshVariable is size-correct without change. + + Why this is lossless (and why disabling it is NOT a "more accurate" + reference). The interior ``h`` rows are the screening block alone, + ``ε M_ii h_i + ε M_ib h_b = 0`` (no constraint or buoyancy source reaches + the interior), so the interior multiplier is fully determined by the + boundary trace, ``h_i = -M_ii^{-1} M_ib h_b`` — a *bounded* O(h_b) + quantity that feeds back into the boundary row only at O(ε). At ε=1e-6 its + effect on the physical solution is negligible: pinning ``h_i = 0`` instead + moves a converged solve by ~1e-8 in velocity and ~1e-5 in topography + (measured, 2-D annulus). So the pinned DOFs carry no physical signal. + + Disabling the reduction (``_reduce_interior_multiplier = False``) does NOT + recover lost physics; it re-admits ~ndof/3 interior DOFs whose only + coupling is the near-singular ``ε M_ii`` block, enlarging and + ill-conditioning the ``[p,h]`` Schur complement. On a direct/well-converged + solve the answer is essentially unchanged (lossless, as above); on an + iterative grouped-Schur solve (e.g. ``snes_type=ksponly`` on the 3-D shell) + the extra ill-conditioned DOFs degrade convergence and the solve can land + on a different representative — which is the origin of the "answer moves a + few percent / parallel spread worsens when off" behaviour. That is a + conditioning effect, not evidence the knockout drops information. Keep it + ON; it is the recommended, validated path. """ from petsc4py import PETSc import numpy as np @@ -7245,6 +7390,13 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): self._attach_stokes_nullspace() self._snes_solve_with_retries(gvec, divergence_retries, verbose) + # Project the rigid-body rotation gauge out of the converged solution. + # The fieldsplit/Schur inner velocity solve leaves an unconstrained (and + # partition-dependent) rigid rotation that the true residual is blind to; + # removing this genuine velocity nullspace makes the tangential velocity + # partition-independent to round-off without changing the residual. + self._remove_velocity_rotation_gauge(gvec) + # SNESSetUpdate fires only at the START of each iteration, so the final # converged iterate is otherwise un-hooked. Apply the callbacks once more # to it (e.g. so a pressure gauge pins the FINAL pressure, and an diff --git a/src/underworld3/systems/solvers.py b/src/underworld3/systems/solvers.py index 081d552f..abc81502 100644 --- a/src/underworld3/systems/solvers.py +++ b/src/underworld3/systems/solvers.py @@ -2069,8 +2069,19 @@ class SNES_Stokes_Constrained(SNES_Stokes): The constraint is enforced in one coupled solve (no outer iteration). The augmented-Lagrangian term conditions the :math:`[p,h]` Schur complement without biasing the multiplier, and the interior multiplier DOFs are reduced - away so the solved block is boundary-sized. Serial only (the boundary mask is - not yet MPI-decomposed). See + away so the solved block is boundary-sized. + + Runs in parallel: the interior-multiplier reduction is rank-local section + surgery so the global system, velocity solve, and gauge-fixed topography are + partition-independent (validated in + ``tests/parallel/test_1063_constrained_freeslip_parallel.py``). On an + **enclosed** problem (a pressure null space is active) the constant pressure + and constant multiplier are gauge-free, and the solver lands on a + partition-dependent level for each. To keep the raw **pressure** reproducible + across ranks the solver pins the surface-mean pressure automatically (see + :attr:`auto_pressure_gauge`); the raw **multiplier** keeps its own gauge level, + so read dynamic topography through :meth:`topography` with ``reference="mean"`` + (gauge-invariant). See ``docs/developer/design/CONSTRAINED_FREESLIP_MULTIPLIER.md``. See Also @@ -2104,6 +2115,17 @@ def __init__( # ordinary Stokes solve, so the base assembly is unaffected. self._block_constraint_bcs = [] + # Guarded automatic pressure gauge (see _maybe_install_auto_gauge). On an + # enclosed constrained problem the constant pressure and constant + # multiplier are gauge-free; the solver lands on a partition-dependent + # level for each, so the raw fields are not reproducible across ranks. + # When a pressure null space is active we pin the surface-mean pressure, + # making the raw PRESSURE reproducible at zero cost to the velocity (the + # raw multiplier keeps its own gauge — read topography via reference="mean"). + # Set to False to opt out. + self.auto_pressure_gauge = True + self._auto_gauge_callback = None + # Default the grouped [p, lambda] Schur preconditioner to `selfp` (the base # Stokes default is `a11`). The constraint Schur complement # S_lambda = C A^-1 C^T needs a preconditioner built from the actual operator @@ -2114,8 +2136,158 @@ def __init__( # multiplier (= dynamic topography) clean at small augmentation. Override via # `solver.petsc_options["pc_fieldsplit_schur_precondition"] = "a11"` if desired. self.petsc_options["pc_fieldsplit_schur_precondition"] = "selfp" + + # Flexible outer Krylov (fgmres) is REQUIRED, not a tuning choice. The + # grouped [p, lambda] Schur preconditioner runs inner *iterative* sub-solves + # (velocity multigrid + the Schur KSP), so the preconditioner operator + # varies from one outer iteration to the next — a variable/nonlinear + # preconditioner. A non-flexible Krylov method (the base's gmres) is + # invalid for that: the preconditioned residual it minimises false-converges + # (it reported CONVERGED_RTOL in ~2 iterations while the TRUE residual + # ||r||/||b|| blew up to ~10^3), landing on a wrong, partition-dependent + # answer (the ~0.4% serial-vs-parallel velocity spread, and the failure to + # reproduce the Zhong response). fgmres handles the variable preconditioner + # correctly: the true residual converges and the solve is + # partition-independent. Override via petsc_options["ksp_type"] if needed. + self.petsc_options["ksp_type"] = "fgmres" + + # Judge outer convergence on the TRUE (unpreconditioned) residual. With a + # variable preconditioner the preconditioned-residual norm is not a fixed + # inner product and false-converges (it can report convergence while the + # true residual is still large), so the default preconditioned-norm test + # would stop the solve early even with a tight rtol — re-introducing the + # partition-dependent velocity. The unpreconditioned norm costs one extra + # matvec per iteration and makes the stopping test honest. + self.petsc_options["ksp_norm_type"] = "unpreconditioned" + + # Bound the outer iteration count. The well-set-up solve converges in a + # handful of outer iterations (~6 on the 3-D shell at cellSize 1/8); this + # generous cap just guarantees a pathological case fails loudly + # (DIVERGED_ITS) rather than silently grinding. Raise it for genuinely + # hard problems via petsc_options["ksp_max_it"]. + self.petsc_options["ksp_max_it"] = 100 + + # Tighten the Eisenstat-Walker adaptive linear tolerance (the base saddle + # point enables snes_ksp_ew with the PETSc default initial/max rtol 0.3). + # EW's loose-early heuristic is right for a NONLINEAR Newton solve, but the + # constrained solver is normally run linear (snes_type=ksponly), where EW + # then caps the single outer solve at rtol 0.3 — one outer iteration, true + # residual ~1e-5 relative. The augmented saddle point is ill-conditioned + # (augmentation ~1e4), so that loose residual is amplified into a ~0.1% + # partition-dependent velocity (GAMG's per-partition aggregation makes the + # preconditioner partition-dependent, which only cancels once the TRUE + # residual is driven down). Pinning EW's initial = max rtol to the solver + # tolerance makes the outer fgmres iterate until genuinely converged, so + # the velocity is partition-independent to round-off. Kept in sync by the + # `tolerance` setter below. + self.petsc_options["snes_ksp_ew_rtol0"] = self._tolerance * 1.0e-1 + self.petsc_options["snes_ksp_ew_rtolmax"] = self._tolerance * 1.0e-1 return + @property + def tolerance(self): + """Solver tolerance (see :class:`SNES_Stokes_SaddlePt.tolerance`). + + Overridden so that, in addition to ``snes_rtol`` / ``ksp_rtol`` / + ``ksp_atol``, the Eisenstat-Walker initial and max relative tolerances are + pinned to ``tolerance * 0.1`` — otherwise EW's default (0.3) under-solves + the ill-conditioned augmented constrained system on a linear solve and the + velocity becomes partition-dependent (see ``__init__``). + """ + return self._tolerance + + @tolerance.setter + def tolerance(self, value): + self._tolerance = value + self.petsc_options["snes_rtol"] = value + self.petsc_options["ksp_rtol"] = value * 1.0e-1 + self.petsc_options["ksp_atol"] = value * 1.0e-6 + self.petsc_options["snes_ksp_ew_rtol0"] = value * 1.0e-1 + self.petsc_options["snes_ksp_ew_rtolmax"] = value * 1.0e-1 + + def solve(self, *args, **kwargs): + """Solve the constrained Stokes system (see :meth:`SNES_Stokes.solve`). + + Installs the guarded automatic pressure gauge (if eligible) before + delegating to the base solve, so the raw pressure and multiplier are + partition-reproducible by construction on enclosed problems. + """ + self._warn_if_monolithic_direct() + self._maybe_install_auto_gauge() + return super().solve(*args, **kwargs) + + def _warn_if_monolithic_direct(self): + """Warn that a monolithic direct solve of the constrained system is a + serial diagnostic only. + + Setting ``pc_type = lu``/``cholesky`` factorises the whole + :math:`[\\,u, p, h\\,]` saddle point as one block. On the constrained + system this is a KNOWN-BAD path: it does not reproduce the validated + grouped-Schur (``selfp``) velocity response (e.g. surface velocity ~4e-3 + vs the ~1e-2 Zhong reference on the 3-D shell) AND it segfaults in + parallel (the indefinite KKT factorisation is not robust here). The + grouped :math:`\\mathbf{u}\\,|\\,[p,h]` field-split is the supported path; + use ``pc_type = lu`` only as a serial cross-check, knowing the response is + not the benchmark reference. + """ + if not self._block_constraint_bcs: + return + try: + pc_type = self.petsc_options["pc_type"] + except KeyError: + return + if pc_type in ("lu", "cholesky"): + import warnings + warnings.warn( + "Stokes_Constrained: a monolithic direct solve " + f"(pc_type='{pc_type}') of the constrained saddle point is a " + "serial diagnostic only — it does not reproduce the validated " + "grouped-Schur velocity response and segfaults in parallel. Use " + "the default field-split (u | [p,h]) path for production runs.", + stacklevel=2, + ) + + def _maybe_install_auto_gauge(self): + """Pin the (p, h) gauge consistently on an enclosed constrained problem. + + Conservative — installs ``set_pressure_gauge`` on the first constraint + boundary (``reference=0``) only when ALL of the following hold, otherwise + it is a no-op and the solve path is unchanged: + + * ``auto_pressure_gauge`` is left True (the default), + * we have not already installed it (idempotent across re-solves), + * at least one multiplier constraint is registered, + * a pressure null space is active (``petsc_use_nullspace`` / enclosed) — + this is the gauge-freedom flag; with it off the pressure is determined + and must not be shifted, + * no pressure Dirichlet BC already pins the gauge, + * the user has registered no update callback of their own (e.g. an + explicit ``set_pressure_gauge`` or a smoother) — we never clobber it. + + Pinning the surface-mean pressure makes the raw **pressure** field + partition-reproducible (measured: the enclosed-shell mean pressure goes + from a ~10% serial-vs-np8 spread to the assembly floor, ~0.4%). It is + physics-neutral: the velocity is bit-identical with and without the pin. + It does NOT fix the raw **multiplier** ``h`` — the constant multiplier is + an independent gauge freedom the pressure pin does not touch — so dynamic + topography must still be read with ``topography(..., reference="mean")``, + which is gauge-invariant by construction. + """ + if not self.auto_pressure_gauge: + return + if self._auto_gauge_callback is not None: + return + if not self._block_constraint_bcs: + return + if not self._petsc_use_pressure_nullspace: + return + if self._pressure_dirichlet_bcs(): + return + if self._snes_update_callbacks: + return + boundary = self._block_constraint_bcs[0].boundary + self._auto_gauge_callback = self.set_pressure_gauge(boundary, 0.0) + @property def saddle_preconditioner(self): """Not used by ``Stokes_Constrained`` — the Schur preconditioner is built @@ -2194,10 +2366,12 @@ def add_constraint_bc(self, boundary, g=0.0, normal=None, screening=None, # Validated bit-identical at np=1/2/4 (velocity L2 and mean-stripped # boundary topography) in # tests/parallel/test_1063_constrained_freeslip_parallel.py. - # NOTE: on enclosed problems the raw multiplier h carries the [p,λ] gauge - # constant, of which the solver lands on a partition-dependent - # representative — strip its boundary mean for a reproducible topography - # (`topography(..., reference="mean")`). + # NOTE: on enclosed problems the constant pressure and constant multiplier + # are gauge-free and land on a partition-dependent level. The automatic + # pressure gauge (auto_pressure_gauge, see _maybe_install_auto_gauge) pins + # the raw PRESSURE reproducibly, but the raw multiplier h keeps its own + # gauge level — read dynamic topography via topography(..., reference="mean"), + # which is gauge-invariant and partition-reproducible by construction. if not hasattr(self.mesh.boundaries, boundary): raise ValueError( @@ -2284,6 +2458,13 @@ def topography(self, boundary, buoyancy_scale=1.0, reference=None): **no** gauge freedom (e.g. an open boundary), where the mean of :math:`h` is the physical mean traction and must NOT be removed. + Note that the automatic pressure gauge (:attr:`auto_pressure_gauge`) fixes + the raw *pressure* level but NOT the raw *multiplier* level (the constant + multiplier is an independent gauge freedom). So on an enclosed problem the + raw multiplier (``reference=None``) is still partition-dependent — + ``reference="mean"`` is the gauge-invariant, partition-reproducible read + for dynamic topography and is the recommended path. + Parameters ---------- boundary : str diff --git a/tests/parallel/test_1063_constrained_freeslip_parallel.py b/tests/parallel/test_1063_constrained_freeslip_parallel.py index 789e1845..0d1c75b7 100644 --- a/tests/parallel/test_1063_constrained_freeslip_parallel.py +++ b/tests/parallel/test_1063_constrained_freeslip_parallel.py @@ -76,6 +76,76 @@ def _solve_diagnostics(kind): return L2, topo +# Serial (np=1) reference for the gauge-reproducibility test below. The enclosed +# iso problem with an ACTIVE pressure null space exercises the automatic pressure +# gauge (auto_pressure_gauge, default on): the constant pressure and constant +# multiplier are both gauge-free, so without a pin the solver lands on a +# partition-dependent level for each. The auto gauge pins the raw PRESSURE +# reproducibly (the raw multiplier keeps its own gauge freedom — topography is +# read gauge-invariantly via reference="mean"). Velocity is physics-neutral. +# (velocity L2, raw meanP, MEAN-STRIPPED boundary topography). +# Recompute with `python gauge`. +GOLDEN_GAUGE = (6.194547487092e-01, -3.847657609797e-10, 3.781793823254e+01) + + +def _solve_gauge_diagnostics(): + """Enclosed constrained annulus with an active pressure null space (so the + automatic pressure gauge fires); return the gauge-relevant diagnostics.""" + mesh = uw.meshing.Annulus(radiusOuter=1.0, radiusInner=0.5, + cellSize=0.12, qdegree=4) + v = uw.discretisation.MeshVariable("Ug", mesh, 2, degree=2) + p = uw.discretisation.MeshVariable("Pg", mesh, 1, degree=1) + X = mesh.CoordinateSystem.X + unit_r = mesh.CoordinateSystem.unit_e_0 + + st = uw.systems.Stokes_Constrained(mesh, velocityField=v, pressureField=p) + st.constitutive_model = uw.constitutive_models.ViscousFlowModel + st.constitutive_model.Parameters.shear_viscosity_0 = 1.0 + st.tolerance = 1.0e-10 + st.petsc_use_nullspace = True # enclosed -> pressure null space active + st.add_essential_bc((0.0, 0.0), "Lower") # kill the velocity rotation null space + st.add_constraint_bc("Upper", g=0.0, normal=unit_r) + st.bodyforce = 1.0e2 * sympy.sin(3 * sympy.atan2(X[1], X[0])) * unit_r + st.solve(zero_init_guess=True) + assert st._auto_gauge_callback is not None, "auto pressure gauge should have fired" + + L2 = float(np.sqrt(uw.maths.Integral(mesh, v.sym.dot(v.sym)).evaluate())) + vol = float(uw.maths.Integral(mesh, sympy.Integer(1)).evaluate()) + # RAW mean pressure — pinned to ~0 by the auto gauge, partition-reproducible. + meanP = float(uw.maths.Integral(mesh, p.sym[0]).evaluate()) / vol + # MEAN-STRIPPED topography (reference="mean") — gauge-invariant, the supported + # partition-reproducible read of the multiplier. + topo_fn = st.topography("Upper", reference="mean") + topoL2 = float(np.sqrt(uw.maths.BdIntegral( + mesh=mesh, fn=topo_fn ** 2, boundary="Upper").evaluate())) + return L2, meanP, topoL2 + + +def test_constrained_raw_gauge_partition_independent(): + """With the automatic pressure gauge on (default), the RAW mean pressure is + partition-independent (pinned to ~0), the velocity stays bit-identical (the + gauge is physics-neutral), and the mean-stripped topography is reproducible. + The raw multiplier level is NOT asserted reproducible — it keeps an + independent gauge freedom the pressure pin does not touch (use + reference="mean").""" + L2, meanP, topo = _solve_gauge_diagnostics() + L2_ref, meanP_ref, topo_ref = GOLDEN_GAUGE + # Velocity is physics-neutral under the gauge; the residual ~1e-9 spread is + # parallel round-off in the pressure-null-space projection (this enclosed + # config carries the null space, unlike the half-pinned case above), well + # below any real partition effect. + assert np.isclose(L2, L2_ref, rtol=1e-8, atol=0), ( + f"velocity L2 differs serial vs np={uw.mpi.size}: {L2_ref} vs {L2}") + # meanP is pinned to ~0 on the gauge boundary; compare on an absolute scale + # (a relative tolerance is meaningless near machine zero). + assert np.isclose(meanP, meanP_ref, rtol=0, atol=1e-6), ( + f"raw mean pressure differs serial vs np={uw.mpi.size}: " + f"{meanP_ref} vs {meanP}") + assert np.isclose(topo, topo_ref, rtol=1e-6, atol=0), ( + f"mean-stripped topography differs serial vs np={uw.mpi.size}: " + f"{topo_ref} vs {topo}") + + @pytest.mark.parametrize("kind", ["iso", "ti"]) def test_constrained_freeslip_partition_independent(kind): """The parallel solve must reproduce the serial reference: velocity bit- @@ -91,9 +161,14 @@ def test_constrained_freeslip_partition_independent(kind): if __name__ == "__main__": - # Recompute the serial GOLDEN reference: `python {iso,ti}`. + # Recompute the serial GOLDEN reference: `python {iso,ti,gauge}`. import sys _kind = sys.argv[1] if len(sys.argv) > 1 else "iso" - _L2, _topo = _solve_diagnostics(_kind) - if uw.mpi.rank == 0: - print(f"DIAG {_L2:.12e} {_topo:.12e}") + if _kind == "gauge": + _L2, _meanP, _topo = _solve_gauge_diagnostics() + if uw.mpi.rank == 0: + print(f"DIAG_GAUGE {_L2:.12e} {_meanP:.12e} {_topo:.12e}") + else: + _L2, _topo = _solve_diagnostics(_kind) + if uw.mpi.rank == 0: + print(f"DIAG {_L2:.12e} {_topo:.12e}") diff --git a/tests/test_1064_constrained_spherical_shell_response.py b/tests/test_1064_constrained_spherical_shell_response.py index 56acda5a..72af086b 100644 --- a/tests/test_1064_constrained_spherical_shell_response.py +++ b/tests/test_1064_constrained_spherical_shell_response.py @@ -6,9 +6,23 @@ scale for this low-resolution response case; * a direct-LU diagnostic path gives matching Nitsche and constrained responses, but it does not reproduce the validated Nitsche/default response and should - not be treated as the benchmark reference; -* the practical fast grouped-Schur constrained path currently does not reproduce - the Zhong velocity response and remains an expected failure. + not be treated as the benchmark reference. A monolithic direct factorisation + of the *constrained* saddle point is a SERIAL diagnostic only: it gives the + wrong velocity response and segfaults in parallel (the indefinite KKT + factorisation is not robust here), so ``Stokes_Constrained`` now emits a + warning when ``pc_type`` is ``lu``/``cholesky``. Use the grouped + ``u | [p,h]`` field-split for production; +* the DEFAULT constrained field-split path now reproduces the Zhong velocity + response: ``Stokes_Constrained`` defaults to a flexible outer Krylov (fgmres) + with an unpreconditioned (true-residual) convergence test and a tightened + Eisenstat-Walker tolerance — required because the grouped ``u | [p,h]`` Schur + preconditioner uses inner *iterative* sub-solves (a variable preconditioner), + which a non-flexible method false-converged on, giving a wrong, partition- + dependent answer; +* two *alternative* sub-solver choices remain expected failures for this load: + the fast grouped-Schur variant with a ``gasm`` Schur block, and the + direct-LU-sub-solve field-split (LU on the indefinite KKT blocks is not robust + and segfaults in parallel). Run: pixi run -e amr-dev pytest -q tests/test_1064_constrained_spherical_shell_response.py @@ -176,6 +190,24 @@ def test_default_nitsche_matches_zhong_velocity_response(): assert abs(cmb_velocity - ZHONG_CMB_VELOCITY) / ZHONG_CMB_VELOCITY < 0.05 +def test_default_constrained_matches_zhong_velocity_response(): + """The DEFAULT constrained field-split path reproduces the Zhong response. + + This is the headline of the fgmres + unpreconditioned-norm + tightened-EW + solver defaults: the grouped u | [p,h] Schur solve now genuinely converges + (it previously false-converged on the variable preconditioner) and matches + the validated Nitsche/default velocity scale to within the benchmark tol. + """ + surface_velocity, cmb_velocity, snes_reason = solve_response( + "constrained", + "default", + ) + + assert snes_reason > 0 + assert abs(surface_velocity - ZHONG_SURFACE_VELOCITY) / ZHONG_SURFACE_VELOCITY < 0.05 + assert abs(cmb_velocity - ZHONG_CMB_VELOCITY) / ZHONG_CMB_VELOCITY < 0.05 + + def test_direct_lu_diagnostic_constrained_matches_direct_lu_diagnostic_nitsche(): nitsche_surface, nitsche_cmb, nitsche_reason = solve_response( "nitsche", diff --git a/tests/test_1065_rotation_gauge_freeslip.py b/tests/test_1065_rotation_gauge_freeslip.py new file mode 100644 index 00000000..a7ac3706 --- /dev/null +++ b/tests/test_1065_rotation_gauge_freeslip.py @@ -0,0 +1,79 @@ +"""Regression test for the free-slip velocity rigid-rotation gauge fix. + +On a fully free-slip (no Dirichlet velocity BC) domain the rigid-body rotations +are a true nullspace of the velocity block (A_uu·rotation = 0). PETSc's +fieldsplit/Schur inner velocity solve has no rotation nullspace attached, so it +leaves an UNCONSTRAINED rigid rotation in the converged velocity. The operator is +blind to it (the true residual still converges to machine precision), but the +rotation amplitude is partition-dependent — so the tangential velocity differs +serial-vs-parallel even though the physical (radial) flow does not. + +``SNES_Stokes_SaddlePt.solve`` projects this rotation gauge out of the converged +solution (``_remove_velocity_rotation_gauge``). This test pins that behaviour: on +a fully free-slip annulus driven by a PURELY RADIAL body force (which does zero +work against a rigid rotation, so the physical solution carries no net rotation), +the converged velocity's projection onto the rigid-rotation mode must be ~0. + +Without the fix the solve leaves a non-trivial rotation component (~1e-3 of the +flow); with the fix it is at round-off. Serial, fast — no MPI required. +""" + +import numpy as np +import sympy +import pytest + +import underworld3 as uw + +pytestmark = [pytest.mark.timeout(120)] + + +def _freeslip_rotation_coefficient(): + """Solve a fully free-slip annulus with a radial body force and return the + normalised projection of the velocity onto the rigid-rotation mode + t(x,y) = (-y, x): || / (|v| |t|). ~0 iff the rotation gauge is removed.""" + mesh = uw.meshing.Annulus(radiusOuter=1.0, radiusInner=0.5, + cellSize=0.12, qdegree=4) + v = uw.discretisation.MeshVariable("U", mesh, 2, degree=2) + p = uw.discretisation.MeshVariable("P", mesh, 1, degree=1) + X = mesh.CoordinateSystem.X + unit_r = mesh.CoordinateSystem.unit_e_0 + + st = uw.systems.Stokes_Constrained(mesh, velocityField=v, pressureField=p) + st.constitutive_model = uw.constitutive_models.ViscousFlowModel + st.constitutive_model.Parameters.shear_viscosity_0 = 1.0 + st.tolerance = 1.0e-10 + st.petsc_use_nullspace = True # build the rotation (+ pressure) nullspace + # Fully free-slip: BOTH boundaries are constraint BCs, NO essential velocity + # BC -> the rigid-rotation nullspace is ACTIVE (the case the fix targets). + st.add_constraint_bc("Upper", g=0.0, normal=unit_r) + st.add_constraint_bc("Lower", g=0.0, normal=unit_r) + # purely radial drive: does zero work against a rigid rotation, so the + # physical solution has no net rotation component. + st.bodyforce = 1.0e2 * sympy.sin(3 * sympy.atan2(X[1], X[0])) * unit_r + st.solve(zero_init_guess=True) + + # rigid-rotation field t = (-y, x); project the converged velocity onto it. + rot = sympy.Matrix([-X[1], X[0]]) + v_dot_rot = float(uw.maths.Integral(mesh, v.sym.dot(rot)).evaluate()) + rot_dot_rot = float(uw.maths.Integral(mesh, rot.dot(rot)).evaluate()) + v_dot_v = float(uw.maths.Integral(mesh, v.sym.dot(v.sym)).evaluate()) + # normalised rotation coefficient: || / (|v| |t|) + return abs(v_dot_rot) / (np.sqrt(v_dot_v) * np.sqrt(rot_dot_rot)) + + +@pytest.mark.tier_a +@pytest.mark.level_1 +def test_freeslip_velocity_has_no_rotation_gauge(): + """The converged free-slip velocity must carry no rigid-rotation component: + the solver projects the rotation gauge out. Guards + ``_remove_velocity_rotation_gauge`` — without it this coefficient is ~1e-3.""" + coeff = _freeslip_rotation_coefficient() + assert coeff < 1.0e-8, ( + "free-slip velocity retains a rigid-rotation gauge component " + f"(||/(|v||rot|) = {coeff:.3e}); the post-solve rotation " + "projection (_remove_velocity_rotation_gauge) did not run / is ineffective" + ) + + +if __name__ == "__main__": + print(f"rotation coefficient = {_freeslip_rotation_coefficient():.6e}")