From 96afab6f3aa65ce091f9ce7f7f24525ba1ed967b Mon Sep 17 00:00:00 2001 From: lmoresi Date: Thu, 18 Jun 2026 16:55:29 +1000 Subject: [PATCH 1/4] =?UTF-8?q?WIP:=20Jacobian=20consistent-tangent=20fix?= =?UTF-8?q?=20+=20Picard=E2=86=92Newton=20continuation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gated behind consistent_jacobian (default False = bit-identical Picard). - symbolic_keep_constants unwrap mode (constants[]-safe, drift-guarded) - consistent_jacobian: False | True | 'continuation' (alpha-blend via constants[]) - model-owned flux_jacobian hook; Nitsche bd_F1 wired Validated: level_1 tier_a 225/225, units 64/64, constant-visc bit-identical. Underworld development team with AI support from Claude Code --- src/underworld3/constitutive_models.py | 20 ++ .../cython/petsc_generic_snes_solvers.pyx | 299 ++++++++++++++++-- src/underworld3/function/expressions.py | 25 +- 3 files changed, 310 insertions(+), 34 deletions(-) diff --git a/src/underworld3/constitutive_models.py b/src/underworld3/constitutive_models.py index cc75c20b..5e5fb8c1 100644 --- a/src/underworld3/constitutive_models.py +++ b/src/underworld3/constitutive_models.py @@ -573,6 +573,26 @@ def flux_1d(self): return uw.maths.tensor.rank2_to_voigt(flux, dim=self.dim) + @property + def flux_jacobian(self): + """Optional smooth surrogate flux for Jacobian assembly. + + Returns ``None`` by default, meaning the solver differentiates the + exact :attr:`flux` (the Newton fix unwraps it first; a generic Min/Max + kink-smoothing fallback then rounds any remaining yield kink). + + A model whose flux has a non-smooth yield kink (e.g. hard-``Min`` + viscoplasticity) may override this to supply a physically-motivated + *smooth constitutive law for the tangent only*. The residual still uses + the exact :attr:`flux`, so the converged solution satisfies the true + constitutive law — only the Newton search direction is smoothed, giving + a robust, line-search-friendly tangent without changing the answer. + + Shape must match :attr:`flux` (the solver substitutes it for ``F1`` when + forming the velocity-gradient Jacobian blocks). + """ + return None + def _reset(self): """Flags that the expressions in the consitutive tensor need to be refreshed and also that the solver will need to rebuild the stiffness matrix and jacobians""" diff --git a/src/underworld3/cython/petsc_generic_snes_solvers.pyx b/src/underworld3/cython/petsc_generic_snes_solvers.pyx index d64b3c12..e4905f89 100644 --- a/src/underworld3/cython/petsc_generic_snes_solvers.pyx +++ b/src/underworld3/cython/petsc_generic_snes_solvers.pyx @@ -18,6 +18,34 @@ from underworld3.utilities._api_tools import class_or_instance_method from underworld3.function import expression as public_expression expression = lambda *x, **X: public_expression(*x, _unique_name_generation=True, **X) +from underworld3.function.expressions import unwrap_expression as _unwrap_expression + + +def _jacobian_unwrap(expr): + """Expand UWexpressions down to (but NOT including) constant atoms, for use + as the input to a Jacobian derivative (``derive_by_array`` / ``diff``). + + Applied element-wise over a sympy ``Matrix``/``Array`` so atoms embedded in + the residual flux are reached. Non-constant UWexpressions (e.g. the + effective viscosity ``Min(eta0, tau_y/2/eps_II)``) are expanded so the + derivative sees their field / grad-v dependence and forms the full Newton + tangent. Truly-constant atoms (``eta0``, ``tau_y``, ...) are kept as the + *same* symbol object so the JIT ``constants[]`` runtime-update mechanism is + preserved — the keep-constants predicate is shared with + ``getext()._extract_constants`` so the two cannot drift apart. + + This is a no-op for constant-viscosity problems (eta has no grad-v + dependence), so those Jacobians stay bit-identical. + + See ``docs/developer/design/jacobian-unwrap-constants-bug.md``. + """ + f = lambda e: _unwrap_expression(e, mode="symbolic_keep_constants") + if isinstance(expr, sympy.MatrixBase): + return expr.applyfunc(f) + if isinstance(expr, sympy.NDimArray): + return sympy.Array([f(e) for e in expr], expr.shape) + return f(expr) # scalar expression + include "petsc_extras.pxi" @@ -40,6 +68,38 @@ class SolverBaseClass(uw_object): self.compiled_extensions = None self.constants_manifest = [] + # Jacobian tangent selection: False | True | "continuation". + # + # False (default): differentiate the residual flux *as wrapped* — the + # effective viscosity is frozen, giving a Picard / defect-correction + # tangent. BIT-IDENTICAL to the long-standing behaviour. Globally + # robust; load-bearing for the tuned hard-yield viscoplastic paths. + # True: unwrap the flux before differentiation so the tangent captures + # d(eta)/d(grad v) (full Newton). Fast near the solution; its yield + # kink can stall the line search far from it. + # "continuation": Picard -> Newton. Blend J(alpha) = J_picard + + # alpha*(J_newton - J_picard) with alpha a constants[] parameter + # ramped 0 -> 1 by a SNES monitor as the residual drops. Picard + # locates the basin, Newton gives quadratic convergence inside it + # (cf. Spiegelman et al. 2016; ASPECT defect-correction-then-Newton). + # alpha=0 is bit-identical to Picard, so no recompile to switch. + # + # The Newton flux for a model whose flux has a non-smooth yield kink is + # the model's own smooth law (constitutive_model.flux_jacobian) when it + # provides one; otherwise the exact unwrapped flux. + # + # See docs/developer/design/jacobian-unwrap-constants-bug.md. + self.consistent_jacobian = False + # Picard->Newton continuation parameter (constants[]-routed so it can be + # ramped at solve time without a JIT recompile). 0 = Picard, 1 = Newton. + self._newton_alpha = uw.function.expression( + r"\alpha_{N}", sympy.Float(0.0), + "Picard->Newton continuation fraction (0=Picard, 1=Newton)", + ) + # Switch threshold: ramp alpha toward 1 once the relative residual falls + # below this (the basin-of-attraction heuristic for Newton). + self.newton_switch_rtol = 1.0e-2 + # Fine-grained rebuild flags backing the is_setup property. See the # is_setup docstring and _build() for how these are consumed. self._needs_dm_rebuild = True @@ -88,6 +148,86 @@ class SolverBaseClass(uw_object): # smoother / coarse-solver options with the framework FMG bundle. self._pc_user_override = False + def _jacobian_source(self, expr, newton_expr=None): + """Prepare a residual flux for Jacobian differentiation. + + ``expr`` is the exact (Picard) flux; ``newton_expr`` is the consistent + (Newton) flux to use — when None it is the unwrapped ``expr`` (the + derivative then captures d(eta)/d(grad v)). The residual itself is never + passed through here, so the converged solution always satisfies the + exact constitutive law. + + Returns, by ``consistent_jacobian`` mode: + * False -> ``expr`` (frozen viscosity; bit-identical Picard tangent) + * True -> ``newton_expr`` (full consistent Newton tangent) + * "continuation" -> ``expr + alpha*(newton_expr - expr)`` with alpha a + constants[] parameter ramped 0->1 at solve time. Differentiating + this gives ``G_picard + alpha*(G_newton - G_picard)`` because alpha + is constant w.r.t. the unknowns. alpha=0 is bit-identical to Picard. + + No-op for constant viscosity (``newton_expr`` == ``expr``). + """ + mode = self.consistent_jacobian + if not mode: + return expr + if newton_expr is None: + newton_expr = _jacobian_unwrap(expr) + if mode == "continuation": + a = self._newton_alpha + if isinstance(expr, sympy.MatrixBase): + ne = newton_expr if isinstance(newton_expr, sympy.MatrixBase) \ + else sympy.Matrix(newton_expr) + return expr + a * (ne - expr) + if isinstance(expr, sympy.NDimArray): + # flatten to scalars first — iterating an N-d Array yields + # sub-arrays, not elements + ex = expr.reshape(expr._loop_size) + ne = sympy.Array(newton_expr).reshape(expr._loop_size) + return sympy.Array( + [e + a * (n - e) for e, n in zip(ex, ne)], expr.shape + ) + return expr + a * (newton_expr - expr) # scalar + return newton_expr + + def _newton_flux(self, exact_F1): + """Newton (consistent) flux for the bulk ``F1`` Jacobian source. + + Returns the constitutive model's own smooth tangent law + (``constitutive_model.flux_jacobian``) when it supplies one — matched to + the container/shape of ``exact_F1`` — otherwise ``None`` so that + :meth:`_jacobian_source` falls back to the exact flux unwrapped. Lets a + model whose flux has a non-smooth yield kink provide a physically + motivated smooth tangent without changing the residual. + """ + cm = getattr(self, "constitutive_model", None) + smooth = getattr(cm, "flux_jacobian", None) if cm is not None else None + if smooth is None: + return None + try: + sm = sympy.Array(smooth) + ex = sympy.Array(exact_F1) + if sm.shape != ex.shape: + sm = sm.reshape(*ex.shape) + if isinstance(exact_F1, sympy.MatrixBase): + return sympy.Matrix(exact_F1.shape[0], exact_F1.shape[1], list(sm)) + return sm + except Exception: + return None + + def _set_newton_alpha(self, value): + """Set the Picard->Newton continuation fraction and push it to the DS. + + alpha is routed through ``constants[]`` (it appears as a constant atom + in the blended Jacobian), so this updates the live tangent WITHOUT a JIT + recompile. No-op outside ``consistent_jacobian == "continuation"`` (alpha + is then absent from the constants manifest). + """ + self._newton_alpha.sym = sympy.Float(value) + try: + self._update_constants() + except Exception: + pass + @property def preconditioner(self): """Preconditioner selection for the (velocity) block. @@ -767,7 +907,10 @@ class SolverBaseClass(uw_object): verbose : bool, default=False Log each retry on rank 0. """ - self.snes.solve(None, gvec) + if self.consistent_jacobian == "continuation": + self._continuation_solve(gvec, verbose=verbose) + else: + self.snes.solve(None, gvec) if divergence_retries <= 0: return for _r in range(divergence_retries): @@ -782,6 +925,36 @@ class SolverBaseClass(uw_object): ) self.snes.solve(None, gvec) + def _continuation_solve(self, gvec, verbose=False): + """Picard -> Newton continuation via the constants[]-routed alpha. + + Stage 1 solves with the frozen (Picard) tangent (alpha=0) to a loose + tolerance to enter Newton's basin of attraction; stage 2 ramps to the + consistent (Newton) tangent (alpha=1) and warm-starts to the requested + tolerance. alpha is toggled through ``constants[]`` so neither stage + triggers a JIT recompile (cf. Spiegelman et al. 2016; ASPECT). + """ + rtol, atol, stol, max_it = self.snes.getTolerances() + + # Stage 1 — Picard (alpha=0), loose tolerance. + self._set_newton_alpha(0.0) + self.snes.setTolerances(rtol=max(self.newton_switch_rtol, rtol)) + self.snes.solve(None, gvec) + if verbose and uw.mpi.rank == 0: + print(f"continuation Picard: reason={self.snes.getConvergedReason()} " + f"it={self.snes.getIterationNumber()}", flush=True) + + # Stage 2 — Newton (alpha=1), requested tolerance, warm-started. + self._set_newton_alpha(1.0) + self.snes.setTolerances(rtol=rtol, atol=atol, stol=stol, max_it=max_it) # restore + self.snes.solve(None, gvec) + if verbose and uw.mpi.rank == 0: + print(f"continuation Newton: reason={self.snes.getConvergedReason()} " + f"it={self.snes.getIterationNumber()}", flush=True) + + # Restore a clean Picard tangent for any subsequent solve (next step). + self._set_newton_alpha(0.0) + @timing.routine_timer_decorator def _build(self, verbose: bool = False, @@ -2174,8 +2347,8 @@ class SNES_Scalar(SolverBaseClass): # f0 = sympy.Array(uw.function.fn_substitute_expressions(self.F0.sym)).reshape(1).as_immutable() # F1 = sympy.Array(uw.function.fn_substitute_expressions(self.F1.sym)).reshape(dim).as_immutable() - # Don't unwrap here — let getext()'s two-phase unwrap handle it. - # This preserves constant UWexpressions as symbols for the constants[] mechanism. + # RESIDUAL: don't unwrap here — let getext()'s two-phase unwrap handle + # it (preserves constant UWexpressions as symbols for constants[]). f0 = sympy.Array(self.F0.sym).reshape(1).as_immutable() # F1 is the flux vector, which lives in the embedded coordinate # space (cdim components). For volume meshes dim==cdim so this @@ -2191,10 +2364,17 @@ class SNES_Scalar(SolverBaseClass): fns_residual = [self._u_f0, self._u_F1] - G0 = sympy.derive_by_array(f0, U) - G1 = sympy.derive_by_array(f0, L) - G2 = sympy.derive_by_array(F1, U) - G3 = sympy.derive_by_array(F1, L) + # JACOBIAN: unwrap (keep constants) + smooth Min/Max kinks so the + # derivative sees the field-dependence of any nonlinear coefficient + # (full Newton) while the residual keeps the exact form. No-op for + # constant coefficients -> bit-identical. See _jacobian_source. + f0_jac = self._jacobian_source(f0) + F1_jac = self._jacobian_source(F1, self._newton_flux(F1)) + + G0 = sympy.derive_by_array(f0_jac, U) + G1 = sympy.derive_by_array(f0_jac, L) + G2 = sympy.derive_by_array(F1_jac, U) + G3 = sympy.derive_by_array(F1_jac, L) # Re-organise if needed / make hashable @@ -3092,8 +3272,9 @@ class SNES_Vector(SolverBaseClass): # F1 = sympy.Array(uw.function.fn_substitute_expressions(self.F1.sym)).reshape(dim,dim).as_immutable() # Residual piece shapes: f0 is (dim,) per-component, F1 is (dim, dim). - # Don't unwrap here — let getext()'s two-phase unwrap handle it. - # This preserves constant UWexpressions as symbols for the constants[] mechanism. + # RESIDUAL: don't unwrap here — let getext()'s two-phase unwrap handle + # it (preserves constant UWexpressions as symbols for constants[]). The + # Jacobian sources (f0_jac_list / F1_user_jac) are derived below. F0_user = sympy.Matrix(self.F0.sym) F1_user = sympy.Matrix(self.F1.sym) @@ -3126,6 +3307,13 @@ class SNES_Vector(SolverBaseClass): U_list = [self.u.sym[0, c] for c in range(dim)] L = self.Unknowns.L + # JACOBIAN sources: unwrap (keep constants) + smooth Min/Max kinks so + # each sympy.diff below sees the field-dependence of any nonlinear + # coefficient (full Newton) while the residual above stays exact. No-op + # for constant coefficients -> bit-identical. See _jacobian_source. + f0_jac_list = [self._jacobian_source(e) for e in f0_list] + F1_jac = self._jacobian_source(F1_user, self._newton_flux(F1_user)) + # Explicit-index Jacobian construction — writes each entry directly # into PETSc's flat [fc, gc, df, dg] layout via row-major 2D matrices. # See docs/developer/subsystems/petsc-jacobian-layout.md for the @@ -3137,21 +3325,21 @@ class SNES_Vector(SolverBaseClass): G0 = sympy.zeros(Nc, Nc) for fc in range(Nc): for gc in range(Nc): - G0[fc, gc] = sympy.diff(f0_list[fc], U_list[gc]) + G0[fc, gc] = sympy.diff(f0_jac_list[fc], U_list[gc]) # G1[fc*Nc + gc, df] = ∂f0[fc] / ∂L[gc, df] G1 = sympy.zeros(Nc * Nc, dim) for fc in range(Nc): for gc in range(Nc): for df in range(dim): - G1[fc * Nc + gc, df] = sympy.diff(f0_list[fc], L[gc, df]) + G1[fc * Nc + gc, df] = sympy.diff(f0_jac_list[fc], L[gc, df]) # G2[fc*Nc + gc, df] = ∂F1[fc, df] / ∂U[gc] G2 = sympy.zeros(Nc * Nc, dim) for fc in range(Nc): for gc in range(Nc): for df in range(dim): - G2[fc * Nc + gc, df] = sympy.diff(F1_user[fc, df], U_list[gc]) + G2[fc * Nc + gc, df] = sympy.diff(F1_jac[fc, df], U_list[gc]) # G3[fc*Nc + gc, df*dim + dg] = ∂F1[fc, df] / ∂L[gc, dg] G3 = sympy.zeros(Nc * Nc, dim * dim) @@ -3159,7 +3347,7 @@ class SNES_Vector(SolverBaseClass): for gc in range(Nc): for df in range(dim): for dg in range(dim): - G3[fc * Nc + gc, df * dim + dg] = sympy.diff(F1_user[fc, df], L[gc, dg]) + G3[fc * Nc + gc, df * dim + dg] = sympy.diff(F1_jac[fc, df], L[gc, dg]) self._G0 = sympy.ImmutableMatrix(G0) self._G1 = sympy.ImmutableMatrix(G1) @@ -3223,6 +3411,10 @@ class SNES_Vector(SolverBaseClass): bc.fns["u_F1"] = sympy.ImmutableDenseMatrix(bd_F1) fns_bd_residual += [bc.fns["u_F1"]] + # Nitsche gradient-traction Jacobian source (unwrap + smooth + # kinks); residual u_F1 above stays exact. See _jacobian_source. + bd_F1_jac = self._jacobian_source(bd_F1) + # BC G2[fc*Nc + gc, df] = ∂bd_F1[fc, df]/∂U[gc] # BC G3[fc*Nc + gc, df*dim + dg] = ∂bd_F1[fc, df]/∂L[gc, dg] bd_G2 = sympy.zeros(Nc * Nc, dim) @@ -3230,9 +3422,9 @@ class SNES_Vector(SolverBaseClass): for fc in range(Nc): for gc in range(Nc): for df in range(dim): - bd_G2[fc * Nc + gc, df] = sympy.diff(bd_F1[fc, df], U_list[gc]) + bd_G2[fc * Nc + gc, df] = sympy.diff(bd_F1_jac[fc, df], U_list[gc]) for dg in range(dim): - bd_G3[fc * Nc + gc, df * dim + dg] = sympy.diff(bd_F1[fc, df], L[gc, dg]) + bd_G3[fc * Nc + gc, df * dim + dg] = sympy.diff(bd_F1_jac[fc, df], L[gc, dg]) bc.fns["uu_G2"] = sympy.ImmutableMatrix(bd_G2) bc.fns["uu_G3"] = sympy.ImmutableMatrix(bd_G3) @@ -3920,6 +4112,13 @@ class SNES_MultiComponent(SolverBaseClass): U_list = [self.u.sym[0, c] for c in range(Nc)] L = self.Unknowns.L + # JACOBIAN sources: unwrap (keep constants) + smooth Min/Max kinks so + # each sympy.diff below sees the field-dependence of any nonlinear + # coefficient (full Newton) while the residual above stays exact. No-op + # for constant coefficients -> bit-identical. See _jacobian_source. + f0_jac_list = [self._jacobian_source(e) for e in f0_list] + F1_jac = self._jacobian_source(F1_user, self._newton_flux(F1_user)) + # ----- Explicit-differentiation Jacobian construction ----- # PETSc's element-matrix assembly walks the Jacobian arrays in the # order [test_component, trial_component, test_deriv, trial_deriv]. @@ -3937,21 +4136,21 @@ class SNES_MultiComponent(SolverBaseClass): G0 = sympy.zeros(Nc, Nc) for fc in range(Nc): for gc in range(Nc): - G0[fc, gc] = sympy.diff(f0_list[fc], U_list[gc]) + G0[fc, gc] = sympy.diff(f0_jac_list[fc], U_list[gc]) # G1[fc*Nc + gc, df] = ∂f0[fc] / ∂L[gc, df] G1 = sympy.zeros(Nc * Nc, dim) for fc in range(Nc): for gc in range(Nc): for df in range(dim): - G1[fc * Nc + gc, df] = sympy.diff(f0_list[fc], L[gc, df]) + G1[fc * Nc + gc, df] = sympy.diff(f0_jac_list[fc], L[gc, df]) # G2[fc*Nc + gc, df] = ∂F1[fc, df] / ∂U[gc] G2 = sympy.zeros(Nc * Nc, dim) for fc in range(Nc): for gc in range(Nc): for df in range(dim): - G2[fc * Nc + gc, df] = sympy.diff(F1_user[fc, df], U_list[gc]) + G2[fc * Nc + gc, df] = sympy.diff(F1_jac[fc, df], U_list[gc]) # G3[fc*Nc + gc, df*dim + dg] = ∂F1[fc, df] / ∂L[gc, dg] G3 = sympy.zeros(Nc * Nc, dim * dim) @@ -3959,7 +4158,7 @@ class SNES_MultiComponent(SolverBaseClass): for gc in range(Nc): for df in range(dim): for dg in range(dim): - G3[fc * Nc + gc, df * dim + dg] = sympy.diff(F1_user[fc, df], L[gc, dg]) + G3[fc * Nc + gc, df * dim + dg] = sympy.diff(F1_jac[fc, df], L[gc, dg]) self._G0 = sympy.ImmutableMatrix(G0) self._G1 = sympy.ImmutableMatrix(G1) @@ -4014,14 +4213,18 @@ class SNES_MultiComponent(SolverBaseClass): bc.fns["u_F1"] = sympy.ImmutableDenseMatrix(bd_F1) fns_bd_residual += [bc.fns["u_F1"]] + # Nitsche gradient-traction Jacobian source (unwrap + smooth + # kinks); residual u_F1 above stays exact. See _jacobian_source. + bd_F1_jac = self._jacobian_source(bd_F1) + bd_G2 = sympy.zeros(Nc * Nc, dim) bd_G3 = sympy.zeros(Nc * Nc, dim * dim) for fc in range(Nc): for gc in range(Nc): for df in range(dim): - bd_G2[fc * Nc + gc, df] = sympy.diff(bd_F1[fc, df], U_list[gc]) + bd_G2[fc * Nc + gc, df] = sympy.diff(bd_F1_jac[fc, df], U_list[gc]) for dg in range(dim): - bd_G3[fc * Nc + gc, df * dim + dg] = sympy.diff(bd_F1[fc, df], L[gc, dg]) + bd_G3[fc * Nc + gc, df * dim + dg] = sympy.diff(bd_F1_jac[fc, df], L[gc, dg]) bc.fns["uu_G2"] = sympy.ImmutableMatrix(bd_G2) bc.fns["uu_G3"] = sympy.ImmutableMatrix(bd_G3) @@ -5563,8 +5766,10 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): ## and do these one by one as required by PETSc. However, at the moment, this ## is working .. so be careful !! - # Don't unwrap here — let getext()'s two-phase unwrap handle it. - # This preserves constant UWexpressions as symbols for the constants[] mechanism. + # RESIDUAL: don't unwrap here — let getext()'s two-phase unwrap handle + # it (preserves constant UWexpressions as symbols for constants[]). The + # JACOBIAN sources are unwrapped separately below (see _jac_source) so + # the derivative sees through the viscosity — that is the Newton fix. F0 = sympy.Array(self.F0.sym) F1 = sympy.Array(self.F1.sym) PF0 = sympy.Array(self.PF0.sym) @@ -5591,15 +5796,39 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): U = sympy.Array(self.u.sym).reshape(dim) P = sympy.Array(self.p.sym).reshape(1) + # Expand UWexpressions down to (but NOT including) constant atoms, + # element-wise, for the Jacobian derivative ONLY. This exposes the + # field / grad-v dependence of the (effective) viscosity so that + # derive_by_array forms the full Newton tangent (e.g. Min -> Heaviside + # yield switch), instead of freezing eta_eff as an opaque atom and + # silently running a Picard / defect-correction tangent. Truly-constant + # atoms (eta0, tau_y, ...) survive as symbols so the constants[] + # runtime-update mechanism is preserved (the keep-constants predicate + # is shared with getext()'s _extract_constants, so they cannot drift). + # The residual fns above (self._u_F0/_u_F1/_p_F0) are left untouched — + # getext() unwraps those itself. For constant-viscosity problems this + # is a no-op (eta has no grad-v dependence) so the Jacobian is + # bit-identical. See docs/developer/design/jacobian-unwrap-constants-bug.md + # + # (see consistent_jacobian / _jacobian_source: default Picard, bit- + # identical; True -> Newton; "continuation" -> alpha-blended.) + F0_jac = self._jacobian_source(F0) + PF0_jac = self._jacobian_source(PF0) + # Optional override: differentiate an alternative F1 to build the # uu and up Jacobian blocks while leaving the residual F1 # unchanged. Used for inexact Newton (e.g. softmin Jacobian with - # Min residual at a yield kink). When None, autodiff F1 itself. + # Min residual at a yield kink). When None, autodiff F1 itself — + # the Newton flux being the model's smooth law (flux_jacobian) if it + # provides one, else the exact flux unwrapped. F1_jac_src = getattr(self, "_F1_jacobian_source", None) - F1_for_jac = sympy.Array(F1_jac_src) if F1_jac_src is not None else F1 + if F1_jac_src is not None: + F1_for_jac = self._jacobian_source(sympy.Array(F1_jac_src)) + else: + F1_for_jac = self._jacobian_source(F1, self._newton_flux(F1)) - G0 = sympy.derive_by_array(F0, self.u.sym) - G1 = sympy.derive_by_array(F0, self.Unknowns.L) + G0 = sympy.derive_by_array(F0_jac, self.u.sym) + G1 = sympy.derive_by_array(F0_jac, self.Unknowns.L) G2 = sympy.derive_by_array(F1_for_jac, self.u.sym) G3 = sympy.derive_by_array(F1_for_jac, self.Unknowns.L) @@ -5624,8 +5853,8 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): # U/P block (check permutations - hard to validate without a full collection of examples) - G0 = sympy.derive_by_array(F0, self.p.sym) - G1 = sympy.derive_by_array(F0, self._G) + G0 = sympy.derive_by_array(F0_jac, self.p.sym) + G1 = sympy.derive_by_array(F0_jac, self._G) G2 = sympy.derive_by_array(F1_for_jac, self.p.sym) G3 = sympy.derive_by_array(F1_for_jac, self._G) @@ -5638,8 +5867,8 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): # P/U block (check permutations) - G0 = sympy.derive_by_array(PF0, self.u.sym) - G1 = sympy.derive_by_array(PF0, self.Unknowns.L) + G0 = sympy.derive_by_array(PF0_jac, self.u.sym) + G1 = sympy.derive_by_array(PF0_jac, self.Unknowns.L) # G2 = sympy.derive_by_array(FP1, U) # We don't have an FP1 ! # G3 = sympy.derive_by_array(FP1, self.Unknowns.L) @@ -5716,8 +5945,12 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): bc.fns["u_F1"] = sympy.ImmutableDenseMatrix(bd_F1) fns_bd_residual += [bc.fns["u_F1"]] - G2 = sympy.derive_by_array(bd_F1, self.Unknowns.u.sym) - G3 = sympy.derive_by_array(bd_F1, self.Unknowns.L) + # Nitsche gradient-traction depends on eta(grad v); unwrap + + # smooth-kink the Jacobian source so its tangent is Newton- + # consistent (same fix as the bulk). Residual u_F1 stays exact. + bd_F1_jac = self._jacobian_source(bd_F1) + G2 = sympy.derive_by_array(bd_F1_jac, self.Unknowns.u.sym) + G3 = sympy.derive_by_array(bd_F1_jac, self.Unknowns.L) bc.fns["uu_G2"] = sympy.ImmutableMatrix(sympy.permutedims(G2, permutation).reshape(dim*dim, dim)) bc.fns["uu_G3"] = sympy.ImmutableMatrix(sympy.permutedims(G3, permutation).reshape(dim*dim, dim*dim)) fns_bd_jacobian += [bc.fns["uu_G2"], bc.fns["uu_G3"]] diff --git a/src/underworld3/function/expressions.py b/src/underworld3/function/expressions.py index 2a35b631..2ee20a20 100644 --- a/src/underworld3/function/expressions.py +++ b/src/underworld3/function/expressions.py @@ -82,6 +82,12 @@ def _unwrap_atom(atom, mode='nondimensional'): mode: 'nondimensional' - use .data for ND values (JIT/evaluate) 'dimensional' - use .value for display 'symbolic' - use .sym for symbolic substitution + 'symbolic_keep_constants' - like 'symbolic', but stop at truly + constant UWexpressions (eta0, tau_y, ...), leaving them as the + *same* symbol object so the JIT constants[] mechanism still + routes them. Used to unwrap F0/F1 *before* the Jacobian + derivative so field/grad-v dependence of the viscosity is + differentiated (full Newton) while constants stay symbolic. Returns: The unwrapped value (float, sympy.Number, or sympy expression) @@ -112,12 +118,26 @@ def _unwrap_atom(atom, mode='nondimensional'): if isinstance(inner, UWQuantity) and not isinstance(inner, UWexpression): return _unwrap_atom(inner, mode) return inner + elif mode == 'symbolic_keep_constants': + # Expand non-constant UWexpressions one level (reveals the field / + # grad-v dependence of the viscosity); leave truly-constant atoms + # untouched as the SAME object so they survive to constants[]. + # The predicate is shared with _extract_constants() so the set of + # atoms kept symbolic here is exactly the set routed to constants[] + # by getext() — they cannot drift apart. + from underworld3.utilities._jitextension import _is_truly_constant + if _is_truly_constant(atom, UWexpression): + return atom + return atom.sym else: # symbolic return atom.sym # UWQuantity (not wrapped in UWexpression) if isinstance(atom, UWQuantity): - if mode == 'nondimensional': + if mode in ('nondimensional', 'symbolic_keep_constants'): + # A bare UWQuantity is never a constants[] entry (those are + # UWexpression atoms), so for keep-constants we resolve it to a + # value just like nondimensional rather than leaving it unresolved. import underworld3 if underworld3._is_scaling_active() and atom.has_units: try: @@ -189,6 +209,9 @@ def unwrap_expression(expr, mode='nondimensional', depth=None): mode: 'nondimensional' - for JIT compilation and evaluation (uses .data) 'dimensional' - for user display (uses .value) 'symbolic' - just expand .sym structure + 'symbolic_keep_constants' - expand .sym structure but stop at + truly-constant UWexpressions (keep them as symbols for + constants[]). Use before Jacobian differentiation. depth: Maximum expansion depth (None = complete expansion) Returns: From a2d4a75dffe0774a7730d196fdaee38a2cb0be44 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Thu, 18 Jun 2026 18:41:38 +1000 Subject: [PATCH 2/4] Make Picard->Newton alpha lazy (zero default-path perturbation) Creating the continuation alpha UWexpression in every solver __init__ bumped the global unique-name counter, shifting JIT cache keys enough to flip two known-flaky VEP variable-dt yield-lock tests in full-suite runs. Construct alpha lazily (continuation mode only) so the default Picard path creates no extra expression. Verified: level_2 tier_a forked failure set now IDENTICAL to pristine origin/development (4 pre-existing failures). Underworld development team with AI support from Claude Code --- .../cython/petsc_generic_snes_solvers.pyx | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/underworld3/cython/petsc_generic_snes_solvers.pyx b/src/underworld3/cython/petsc_generic_snes_solvers.pyx index e4905f89..ce3ba6a0 100644 --- a/src/underworld3/cython/petsc_generic_snes_solvers.pyx +++ b/src/underworld3/cython/petsc_generic_snes_solvers.pyx @@ -92,10 +92,10 @@ class SolverBaseClass(uw_object): self.consistent_jacobian = False # Picard->Newton continuation parameter (constants[]-routed so it can be # ramped at solve time without a JIT recompile). 0 = Picard, 1 = Newton. - self._newton_alpha = uw.function.expression( - r"\alpha_{N}", sympy.Float(0.0), - "Picard->Newton continuation fraction (0=Picard, 1=Newton)", - ) + # Created LAZILY (see _get_newton_alpha) only when continuation is used, + # so the default path constructs no extra UWexpression and therefore + # cannot perturb global symbol-naming / JIT-cache state. + self._newton_alpha = None # Switch threshold: ramp alpha toward 1 once the relative residual falls # below this (the basin-of-attraction heuristic for Newton). self.newton_switch_rtol = 1.0e-2 @@ -173,7 +173,7 @@ class SolverBaseClass(uw_object): if newton_expr is None: newton_expr = _jacobian_unwrap(expr) if mode == "continuation": - a = self._newton_alpha + a = self._get_newton_alpha() if isinstance(expr, sympy.MatrixBase): ne = newton_expr if isinstance(newton_expr, sympy.MatrixBase) \ else sympy.Matrix(newton_expr) @@ -214,6 +214,20 @@ class SolverBaseClass(uw_object): except Exception: return None + def _get_newton_alpha(self): + """Lazily construct the Picard->Newton continuation parameter. + + Built on first use (continuation mode only) so the default Picard path + creates no extra UWexpression and leaves global symbol-naming / JIT-cache + state byte-identical to historical behaviour. + """ + if self._newton_alpha is None: + self._newton_alpha = uw.function.expression( + r"\alpha_{N}", sympy.Float(0.0), + "Picard->Newton continuation fraction (0=Picard, 1=Newton)", + ) + return self._newton_alpha + def _set_newton_alpha(self, value): """Set the Picard->Newton continuation fraction and push it to the DS. @@ -222,7 +236,7 @@ class SolverBaseClass(uw_object): recompile. No-op outside ``consistent_jacobian == "continuation"`` (alpha is then absent from the constants manifest). """ - self._newton_alpha.sym = sympy.Float(value) + self._get_newton_alpha().sym = sympy.Float(value) try: self._update_constants() except Exception: From da3b1edec6c9b84ecbc31508f5610d1c6e8880b0 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Fri, 19 Jun 2026 11:13:43 +1000 Subject: [PATCH 3/4] VEP flux_jacobian: harmonic smooth tangent for hard-Min yield (model-owned) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ViscoElasticPlasticFlowModel.flux_jacobian returns the flux with Min->harmonic (1/(1/eta_ve+1/eta_pl)) and Max smoothed, for the Jacobian source ONLY (exact Min residual preserved). Pure symbolic substitution on the live flux — no state mutation (fixes the earlier err-77 from a yield_mode toggle hack). Runs clean. Guard _newton_flux on consistent_jacobian so the default (Picard) path never evaluates flux_jacobian — keeps default assembly allocation-free / bit-identical (test_1010 6/6). NOTE: smooth-Jacobian + hard-Min residual is an inconsistent tangent and does NOT improve convergence on the BDF-2 VEP loading test (8/15 vs Picard 3/15); only full harmonic (consistent smooth residual+Jacobian) converges (0/15). The hook is correct, clean, and available; VEP convergence remains a separate issue. Underworld development team with AI support from Claude Code --- src/underworld3/constitutive_models.py | 33 +++++++++++++++++++ .../cython/petsc_generic_snes_solvers.pyx | 5 +++ 2 files changed, 38 insertions(+) diff --git a/src/underworld3/constitutive_models.py b/src/underworld3/constitutive_models.py index 5e5fb8c1..7ef7332b 100644 --- a/src/underworld3/constitutive_models.py +++ b/src/underworld3/constitutive_models.py @@ -1710,6 +1710,39 @@ def viscosity(self): else: return effective_viscosity + @property + def flux_jacobian(self): + r"""Smooth-tangent flux for hard-``Min`` yield (exact residual, smooth Jacobian). + + For ``_yield_mode == "min"`` the flux carries a ``Min(η_ve, η_pl)`` yield + kink whose exact tangent has a ``Heaviside`` jump that can stall the SNES + line search (the well-known hard-yield convergence problem). This returns + the flux with that kink replaced by the model's own harmonic blend + ``1/(1/η_ve + 1/η_pl)`` (and the viscosity-floor ``Max`` smoothed dually), + for use as the Jacobian source ONLY. The residual keeps the exact ``Min`` + so the converged solution still satisfies the true yield surface — only + the Newton search direction is smoothed. + + No model state is mutated (a pure symbolic substitution on the live + flux). Returns ``None`` for the already-smooth yield modes (and when no + ``Min``/``Max`` is present), so the solver then differentiates the exact + flux unchanged. + + Consumed by the solver via ``consistent_jacobian`` (see + ``petsc_generic_snes_solvers``); the unwrap fix differentiates the + returned smooth flux correctly. + """ + if not self.is_viscoplastic or self._yield_mode != "min": + return None + f = self.flux + if not f.has(sympy.Min, sympy.Max): + return None + harmonic = lambda *args: 1 / sum(1 / x for x in args) + smooth_max = lambda *args: sum(args) - harmonic(*args) + return f.applyfunc( + lambda e: e.replace(sympy.Min, harmonic).replace(sympy.Max, smooth_max) + ) + @property def _plastic_effective_viscosity(self): parameters = self.Parameters diff --git a/src/underworld3/cython/petsc_generic_snes_solvers.pyx b/src/underworld3/cython/petsc_generic_snes_solvers.pyx index d17f13c0..f45a1096 100644 --- a/src/underworld3/cython/petsc_generic_snes_solvers.pyx +++ b/src/underworld3/cython/petsc_generic_snes_solvers.pyx @@ -204,6 +204,11 @@ class SolverBaseClass(uw_object): model whose flux has a non-smooth yield kink provide a physically motivated smooth tangent without changing the residual. """ + # Default (Picard) path never needs the Newton flux — short-circuit so + # the (potentially expensive) model.flux_jacobian is not even evaluated, + # keeping the default assembly allocation-free and bit-identical. + if not self.consistent_jacobian: + return None cm = getattr(self, "constitutive_model", None) smooth = getattr(cm, "flux_jacobian", None) if cm is not None else None if smooth is None: From 96f9e3c6423e4518da57cfa318791231b966deb6 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Fri, 19 Jun 2026 20:17:19 +1000 Subject: [PATCH 4/4] Defer VEP harmonic flux_jacobian to follow-up; add design doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The smooth-Jacobian-with-Min-residual tangent is inconsistent (consistent with the harmonic problem, not Min) and converges worse than Picard on hard-yield VEP — so the VEP-specific harmonic flux_jacobian override is deferred to the yield-law / delta-homotopy follow-up PR. The generic Constitutive_Model. flux_jacobian hook (default None) + the _newton_flux guard remain. Add docs/developer/design/jacobian-consistent-tangent.md: the bug, the opt-in/default-off fix, non-regression evidence, the tangent hierarchy, and the delta-homotopy successor work. Underworld development team with AI support from Claude Code --- .../design/jacobian-consistent-tangent.md | 99 +++++++++++++++++++ src/underworld3/constitutive_models.py | 40 ++------ 2 files changed, 107 insertions(+), 32 deletions(-) create mode 100644 docs/developer/design/jacobian-consistent-tangent.md diff --git a/docs/developer/design/jacobian-consistent-tangent.md b/docs/developer/design/jacobian-consistent-tangent.md new file mode 100644 index 00000000..c14477ea --- /dev/null +++ b/docs/developer/design/jacobian-consistent-tangent.md @@ -0,0 +1,99 @@ +# Consistent Jacobian tangent for nonlinear (viscoplastic) solves + +## The bug + +The SNES Jacobian assembly in `src/underworld3/cython/petsc_generic_snes_solvers.pyx` +formed the `G0`–`G3` blocks by calling `sympy.derive_by_array` / `sympy.diff` on the +residual flux `F1` **while the effective viscosity was still a wrapped `UWexpression` +atom**. `derive_by_array` therefore treated the viscosity as a constant, so the term +`∂η/∂(grad v)` was **silently dropped from every Jacobian**. + +Consequence: UW3 viscoplastic Stokes was running an accidental **Picard / +defect-correction** tangent, not full Newton — not by design, but because the unwrap +happened *after* the derivative instead of *before* it. Constant-viscosity problems were +unaffected (η has no `grad v` dependence), which is why the entire constant-viscosity +suite passed bit-identically and the bug stayed hidden behind the "≈20 Picard iterations +is intrinsic" folklore. + +## The fix (this PR — opt-in, default-off) + +A new unwrap mode and a gate, so the default path is **bit-identical** to the historical +(Picard) behaviour and the consistent tangent is **opt-in**. + +- **`symbolic_keep_constants` unwrap mode** (`function/expressions.py`): expands every + `UWexpression` atom down to — but **not including** — truly-constant atoms (η₀, τ_y, …), + which stay as symbols so the `constants[]` runtime-update mechanism survives. The + "stop at constant" predicate is the *same* `_is_truly_constant` that `getext()`'s + `_extract_constants` uses, so the kept-symbolic set and the `constants[]` set provably + cannot drift (drift-guard test). + +- **`solver.consistent_jacobian`** (`SolverBaseClass`), one of: + - `False` (**default**) — differentiate the flux *as wrapped*: frozen viscosity, the + historical Picard tangent. **Bit-identical** to prior behaviour. + - `True` — unwrap before differentiating: full consistent Newton tangent + (`Min → Heaviside` yield switch appears). + - `"continuation"` — Picard → Newton. Blend `J(α) = J_picard + α·(J_newton − J_picard)` + with `α` a `constants[]` parameter ramped 0 → 1 at solve time (`_continuation_solve`): + Picard locates the basin, Newton sharpens inside it. Because `α` lives in `constants[]`, + switching costs **no JIT recompile**, and `α=0` is bit-identical to Picard. + +- **Model-owned smooth-tangent hook** `Constitutive_Model.flux_jacobian` (default `None`): + a model may supply a smooth surrogate flux for the *Jacobian only* while the residual + keeps the exact law. Consumed via `_newton_flux`, which is **guarded** so the default + (Picard) path never even evaluates it. + +The residual is never routed through the new path, so a converged solution always satisfies +the exact constitutive law regardless of the tangent used. + +### Non-regression evidence +- Constant-viscosity Jacobian is **symbolically bit-identical** (0/N blocks change). +- `test_1010`, SolCx `test_1015`, `test_0610`, the asymmetric-Jacobian guard, units (64) all + pass; `level_1 tier_a` 225/225. +- Crash-isolated (`--forked`) `level_2 tier_a` failure set is **identical to pristine + `development`** — the pre-existing reds (`test_1012` gmsh crash + 3 `test_1052` VEP) are + not introduced here. (Creating the continuation `α` lazily was required to avoid a global + symbol-counter perturbation that flipped two flaky VEP variable-dt tests.) + +## Why a smooth Jacobian on a sharp residual is *not* the fix (tangent hierarchy) + +For a hard-`Min` yield residual there is a strict ordering of tangents: + +1. **Picard** (η frozen, `J = η·∂E/∂L`) — not the true Jacobian, but a *contractive* + defect-correction operator whose fixed point **is** the `Min` solution. Slow (linear) + but globally stable. +2. **Consistent Newton** (exact `Heaviside` tangent) — fast near the solution; the kink + discontinuity breaks the line search far from it. +3. **Inconsistent smooth Jacobian** (harmonic tangent + `Min` residual) — the consistent + tangent of a *different* (harmonic) problem; it points at the wrong solution and has + neither Picard's contractivity nor Newton's consistency. **Worst of the three** — it + diverges *more* than Picard on hard-yield VEP (measured 8/15 vs Picard 3/15 on the VEP + loading test). +4. **Full smooth** (smooth residual *and* Jacobian) — consistent and smooth, converges, but + solves a *smoothed* problem (different physics). + +So "exact residual + smooth Jacobian" is option (3) — appealing but mathematically a dead +end. This is why the model-owned harmonic `flux_jacobian` override is **not** in this PR. +The valuable, well-posed part — the consistent tangent and the Picard→Newton continuation — +helps the *non-elastic* viscoplastic Stokes it was built for (the shear box converges via +continuation). + +## Successor work (separate PR): δ-homotopy + yield-law unification + +The robust route for hard-yield convergence is **problem-space** continuation, not tangent +tricks: ramp the *residual* smooth → sharp. + +- Use the softmin softness **δ** as the homotopy parameter. softmin at **δ=0 is identically + `Min`** (`g(f) = max(1, f)` exactly), and is C∞ for δ>0. So ramping δ → 0 gives a + differentiable path whose endpoint has **zero physics change** (exact yield law) — unlike a + yield-stress staircase, which moves the target. +- Empirically: solving smooth-first then warm-starting the sharp solve converges the hard + VEP loading test **0/15** (vs cold 3/15). +- Production form mirrors the `α`-continuation: make δ a `constants[]` parameter and ramp it + within one solve (per-iteration via the `add_update_callback` / `SNESSetUpdate` hook), so + there is one compiled problem, one BDF-history update per step, and no recompile. +- Unify the yield-mode "zoo": collapse `softmin`/`smooth`/`min` into one δ-parameterised law + (δ=0 default = exact `Min`; δ>0 = a *controlled* smooth-min), keep `harmonic` as a distinct + physical model, drop the redundant `smooth` formula. "Smooth-min" then means *incomplete + blending* (final δ>0), not an ad-hoc surrogate with simulation-dependent deviation from τ_y. + +That work depends on this PR's `constants[]`-ramp machinery, so it lands after. diff --git a/src/underworld3/constitutive_models.py b/src/underworld3/constitutive_models.py index 7ef7332b..0206f511 100644 --- a/src/underworld3/constitutive_models.py +++ b/src/underworld3/constitutive_models.py @@ -1710,38 +1710,14 @@ def viscosity(self): else: return effective_viscosity - @property - def flux_jacobian(self): - r"""Smooth-tangent flux for hard-``Min`` yield (exact residual, smooth Jacobian). - - For ``_yield_mode == "min"`` the flux carries a ``Min(η_ve, η_pl)`` yield - kink whose exact tangent has a ``Heaviside`` jump that can stall the SNES - line search (the well-known hard-yield convergence problem). This returns - the flux with that kink replaced by the model's own harmonic blend - ``1/(1/η_ve + 1/η_pl)`` (and the viscosity-floor ``Max`` smoothed dually), - for use as the Jacobian source ONLY. The residual keeps the exact ``Min`` - so the converged solution still satisfies the true yield surface — only - the Newton search direction is smoothed. - - No model state is mutated (a pure symbolic substitution on the live - flux). Returns ``None`` for the already-smooth yield modes (and when no - ``Min``/``Max`` is present), so the solver then differentiates the exact - flux unchanged. - - Consumed by the solver via ``consistent_jacobian`` (see - ``petsc_generic_snes_solvers``); the unwrap fix differentiates the - returned smooth flux correctly. - """ - if not self.is_viscoplastic or self._yield_mode != "min": - return None - f = self.flux - if not f.has(sympy.Min, sympy.Max): - return None - harmonic = lambda *args: 1 / sum(1 / x for x in args) - smooth_max = lambda *args: sum(args) - harmonic(*args) - return f.applyfunc( - lambda e: e.replace(sympy.Min, harmonic).replace(sympy.Max, smooth_max) - ) + # NOTE: a hard-Min smooth-tangent override (flux_jacobian = harmonic) was + # prototyped here but deferred to the yield-law / δ-homotopy follow-up. The + # smooth-Jacobian-with-Min-residual tangent is inconsistent (it is the + # consistent tangent of the *harmonic* problem) and converges WORSE than + # Picard on hard-yield VEP; the robust route is problem-space homotopy + # (ramp the softmin softness δ→0), not a smooth tangent. See the design doc + # docs/developer/design/jacobian-unwrap-constants-bug.md. The generic + # Constitutive_Model.flux_jacobian hook (default None) remains available. @property def _plastic_effective_viscosity(self):