From fe4c1eb782250a9561bc0e1c125242859532e152 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 22 May 2026 08:48:56 +0300 Subject: [PATCH 1/2] fix ordered_logistic higher order ad --- .../prim/prob/ordered_logistic_glm_lpmf.hpp | 17 +- stan/math/prim/prob/ordered_logistic_lpmf.hpp | 17 +- .../prob/ordered_logistic_glm_lpmf_test.cpp | 66 +++++++ .../math/mix/prob/ordered_logistic_test.cpp | 168 ++++++++++++++++++ 4 files changed, 254 insertions(+), 14 deletions(-) diff --git a/stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp b/stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp index b90b50020c5..24e53962a54 100644 --- a/stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp +++ b/stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp @@ -162,16 +162,19 @@ inline return_type_t ordered_logistic_glm_lpmf( auto ops_partials = make_partials_propagator(x_ref, beta_ref, cuts_ref); if constexpr (is_any_autodiff_v) { - Array exp_m_cut1 = exp(-cut1); - Array exp_m_cut2 = exp(-cut2); + Array exp_m_abs_cut1 = (-cut1.abs()).exp(); + Array exp_m_abs_cut2 = (-cut2.abs()).exp(); Array exp_cuts_diff = exp(cuts_y2 - cuts_y1); + Array inv_logit_neg_cut2 + = (cut2 > 0).select(exp_m_abs_cut2 / (1 + exp_m_abs_cut2), + 1 / (1 + exp_m_abs_cut2)); + Array inv_logit_neg_cut1 + = (cut1 > 0).select(exp_m_abs_cut1 / (1 + exp_m_abs_cut1), + 1 / (1 + exp_m_abs_cut1)); Array d1 - = (cut2 > 0).select(exp_m_cut2 / (1 + exp_m_cut2), 1 / (1 + exp(cut2))) - - exp_cuts_diff / (exp_cuts_diff - 1); + = inv_logit_neg_cut2 - exp_cuts_diff / (exp_cuts_diff - 1); Array d2 - = 1 / (1 - exp_cuts_diff) - - (cut1 > 0).select(exp_m_cut1 / (1 + exp_m_cut1), - 1 / (1 + exp(cut1))); + = 1 / (1 - exp_cuts_diff) - inv_logit_neg_cut1; if constexpr (is_any_autodiff_v) { Matrix location_derivative = d1 - d2; if constexpr (is_autodiff_v) { diff --git a/stan/math/prim/prob/ordered_logistic_lpmf.hpp b/stan/math/prim/prob/ordered_logistic_lpmf.hpp index dfc550ed883..491550ce60a 100644 --- a/stan/math/prim/prob/ordered_logistic_lpmf.hpp +++ b/stan/math/prim/prob/ordered_logistic_lpmf.hpp @@ -175,16 +175,19 @@ inline return_type_t ordered_logistic_lpmf(const T_y& y, auto ops_partials = make_partials_propagator(lambda_ref, c_ref); if constexpr (is_any_autodiff_v) { - Array exp_m_cut1 = exp(-cut1); - Array exp_m_cut2 = exp(-cut2); + Array exp_m_abs_cut1 = (-cut1.abs()).exp(); + Array exp_m_abs_cut2 = (-cut2.abs()).exp(); Array exp_cuts_diff = exp(cuts_y2 - cuts_y1); + Array inv_logit_neg_cut2 + = (cut2 > 0).select(exp_m_abs_cut2 / (1 + exp_m_abs_cut2), + 1 / (1 + exp_m_abs_cut2)); + Array inv_logit_neg_cut1 + = (cut1 > 0).select(exp_m_abs_cut1 / (1 + exp_m_abs_cut1), + 1 / (1 + exp_m_abs_cut1)); Array d1 - = (cut2 > 0).select(exp_m_cut2 / (1 + exp_m_cut2), 1 / (1 + exp(cut2))) - - exp_cuts_diff / (exp_cuts_diff - 1); + = inv_logit_neg_cut2 - exp_cuts_diff / (exp_cuts_diff - 1); Array d2 - = 1 / (1 - exp_cuts_diff) - - (cut1 > 0).select(exp_m_cut1 / (1 + exp_m_cut1), - 1 / (1 + exp(cut1))); + = 1 / (1 - exp_cuts_diff) - inv_logit_neg_cut1; if constexpr (is_autodiff_v) { partials<0>(ops_partials) = d1 - d2; } diff --git a/test/unit/math/mix/prob/ordered_logistic_glm_lpmf_test.cpp b/test/unit/math/mix/prob/ordered_logistic_glm_lpmf_test.cpp index b4791cdbed5..f3b85259f25 100644 --- a/test/unit/math/mix/prob/ordered_logistic_glm_lpmf_test.cpp +++ b/test/unit/math/mix/prob/ordered_logistic_glm_lpmf_test.cpp @@ -19,3 +19,69 @@ TEST_F(AgradRev, mathMixScalFun_ordered_logistic_glm_lpmf) { stan::test::expect_ad(f(y), x, beta, cutpoints); stan::test::expect_ad(f(y), x_rowvec, beta, cutpoints); } + +// Regression test for the higher-order-AD NaN bug reported 2026-05-21 +// (mirror of the test in ordered_logistic_test.cpp; same root cause: +// orphan exp(+INF) vari created by the partials block for y == N_classes +// and y == 1 sentinel cases). See that file for the full bug description. +TEST_F(AgradRev, + mathMixScalFun_ordered_logistic_glm_lpmf_top_category_higher_order_ad) { + using stan::math::fvar; + using stan::math::ordered_logistic_glm_lpmf; + using stan::math::var; + + // N_classes = 3 (cutpoints size 2). y contains both 1 and N_classes. + std::vector y{1, 2, 3, 2, 1}; + const int N = 5; + const int D = 2; + + Eigen::Matrix>, Eigen::Dynamic, Eigen::Dynamic> x_ffv(N, D); + Eigen::Matrix>, Eigen::Dynamic, 1> beta_ffv(D); + Eigen::Matrix>, Eigen::Dynamic, 1> cuts_ffv(2); + + x_ffv << 0.5, -0.3, 0.1, 0.7, -0.2, 0.4, 0.8, -0.1, 0.3, 0.2; + beta_ffv << 0.4, -0.6; + cuts_ffv << 0.0, 1.0; + + // Mixed-zero tangent seeds (compute_s2 / third_diff pattern). + for (int i = 0; i < N; i++) { + for (int j = 0; j < D; j++) { + x_ffv(i, j).d_ = (i + j) % 2 == 0 ? 1.0 : 0.0; + x_ffv(i, j).val_.d_ = (i + j) % 3 == 0 ? 1.0 : 0.0; + } + } + for (int j = 0; j < D; j++) { + beta_ffv(j).d_ = 1.0; + beta_ffv(j).val_.d_ = 0.0; + } + for (int j = 0; j < 2; j++) { + cuts_ffv(j).d_ = 0.0; + cuts_ffv(j).val_.d_ = 1.0; + } + + fvar> out_ffv + = ordered_logistic_glm_lpmf(y, x_ffv, beta_ffv, cuts_ffv); + + EXPECT_TRUE(std::isfinite(out_ffv.val_.val_.val())); + EXPECT_TRUE(std::isfinite(out_ffv.d_.val_.val())); + EXPECT_TRUE(std::isfinite(out_ffv.val_.d_.val())); + EXPECT_TRUE(std::isfinite(out_ffv.d_.d_.val())); + + out_ffv.d_.d_.grad(); + + for (int i = 0; i < N; i++) { + for (int j = 0; j < D; j++) { + EXPECT_TRUE(std::isfinite(x_ffv(i, j).val_.val_.adj())) + << "x_ffv(" << i << "," << j + << ").val_.val_.adj() non-finite (y[" << i << "]=" << y[i] << ")"; + } + } + for (int j = 0; j < D; j++) { + EXPECT_TRUE(std::isfinite(beta_ffv(j).val_.val_.adj())) + << "beta_ffv(" << j << ").val_.val_.adj() non-finite"; + } + for (int j = 0; j < 2; j++) { + EXPECT_TRUE(std::isfinite(cuts_ffv(j).val_.val_.adj())) + << "cuts_ffv(" << j << ").val_.val_.adj() non-finite"; + } +} diff --git a/test/unit/math/mix/prob/ordered_logistic_test.cpp b/test/unit/math/mix/prob/ordered_logistic_test.cpp index c749d754223..96753750265 100644 --- a/test/unit/math/mix/prob/ordered_logistic_test.cpp +++ b/test/unit/math/mix/prob/ordered_logistic_test.cpp @@ -554,3 +554,171 @@ TEST_F(AgradRev, ProbDistributionsOrdLog_fv_d_stvec) { EXPECT_FLOAT_EQ(std_c_ffv[3][1].d_.val_.adj(), 0.0); EXPECT_FLOAT_EQ(std_c_ffv[3][2].d_.val_.adj(), -0.704745697998091); } + +// Regression test for the higher-order-AD NaN bug reported 2026-05-21: +// `ordered_logistic_lpmf` returned NaN under `laplace_marginal_tol` when y +// contained the top category K. Root cause: the partials block materialized +// `exp(-cut1)` and `exp(cut1)` (in an unselected branch of Eigen .select), +// which for y == K (cut1 = -INF) and y == 1 (cut2 = +INF) pushed an orphan +// `vari` with val == +INFINITY onto the autodiff stack. `stan::math::grad()` +// then ran the standard `exp_vari::chain` body +// a.adj() += vi.adj() * vi.val(); +// with vi.adj() == 0 for the orphan (e.g. in laplace_marginal_density's +// compute_s2 where most tangent seeds are zero), producing 0 * INF = NaN on +// the val-side adjoint chain of the input. The pre-fix mix tests only +// asserted `.d_.adj()` (tangent-side) with all `.d_` seeds equal to 1, so +// they hit 1 * INF = +INF (finite-ish, escaping the EXPECT_FLOAT_EQ) and +// never inspected the val-side adjoint where the NaN actually lands. +// +// The tests below specifically exercise: +// 1. y contains K (top category): triggers cut1 = -INF. +// 2. y contains 1 (bottom category): triggers cut2 = +INF. +// 3. Some .d_ tangent seeds are zero: triggers 0 * INF = NaN in chain(). +// 4. Val-side adjoints (`.val_.val_.adj()` for fvar>) are +// asserted finite \u2014 these are the adjoints that Laplace reads. +TEST_F(AgradRev, ProbDistributionsOrdLog_top_category_higher_order_ad) { + using stan::math::fvar; + using stan::math::ordered_logistic_lpmf; + using stan::math::var; + using stan::math::vector_ffv; + + // K = 3 (cutpoints has size 2). y contains both 1 (bottom) and K=3 (top). + std::vector y{1, 2, 3, 2, 1}; + + vector_ffv lam_ffv(5); + lam_ffv << -0.5, 0.2, 0.7, 0.1, -0.3; + // Mixed tangent seeds: some zero, some non-zero. This matches Laplace's + // compute_s2 pattern where v(j) = 0 for most indices when + // hessian_block_size > 1. Zero seeds combined with an orphan +INF vari + // produce 0 * INF = NaN in the buggy version. + lam_ffv[0].d_ = 1.0; + lam_ffv[0].val_.d_ = 1.0; + lam_ffv[1].d_ = 0.0; // zero outer tangent + lam_ffv[1].val_.d_ = 1.0; + lam_ffv[2].d_ = 1.0; + lam_ffv[2].val_.d_ = 0.0; // zero inner tangent + lam_ffv[3].d_ = 0.0; + lam_ffv[3].val_.d_ = 0.0; // both zero + lam_ffv[4].d_ = 1.0; + lam_ffv[4].val_.d_ = 1.0; + + vector_ffv c_ffv(2); + c_ffv << 0.0, 1.0; + for (int i = 0; i < 2; i++) { + c_ffv[i].d_ = 1.0; + c_ffv[i].val_.d_ = 1.0; + } + + fvar> out_ffv = ordered_logistic_lpmf(y, lam_ffv, c_ffv); + + // Value must be finite. + EXPECT_TRUE(std::isfinite(out_ffv.val_.val_.val())) + << "Forward value is non-finite when y contains top category K"; + EXPECT_TRUE(std::isfinite(out_ffv.d_.val_.val())) + << "First-order tangent value is non-finite"; + EXPECT_TRUE(std::isfinite(out_ffv.val_.d_.val())) + << "Inner tangent value is non-finite"; + EXPECT_TRUE(std::isfinite(out_ffv.d_.d_.val())) + << "Second-order tangent value is non-finite"; + + // Hessian-style grad: differentiate the second-order tangent. This is the + // exact code path used by laplace_likelihood::compute_s2 / + // laplace_likelihood::third_diff. + out_ffv.d_.d_.grad(); + + for (int i = 0; i < 5; i++) { + // VAL-SIDE adjoints (the ones the pre-fix tests never checked): + EXPECT_TRUE(std::isfinite(lam_ffv[i].val_.val_.adj())) + << "lam_ffv[" << i << "].val_.val_.adj() is NaN/INF -- " + << "regression: orphan exp(+INF) vari injected NaN into val-side " + << "chain. y[i]=" << y[i] << ", lam.d_=" << lam_ffv[i].d_.val_.val() + << ", lam.val_.d_=" << lam_ffv[i].val_.d_.val(); + EXPECT_TRUE(std::isfinite(lam_ffv[i].d_.val_.adj())); + EXPECT_TRUE(std::isfinite(lam_ffv[i].val_.d_.adj())); + EXPECT_TRUE(std::isfinite(lam_ffv[i].d_.d_.adj())); + } + for (int i = 0; i < 2; i++) { + EXPECT_TRUE(std::isfinite(c_ffv[i].val_.val_.adj())) + << "c_ffv[" << i << "].val_.val_.adj() is NaN/INF"; + EXPECT_TRUE(std::isfinite(c_ffv[i].d_.val_.adj())); + EXPECT_TRUE(std::isfinite(c_ffv[i].val_.d_.adj())); + EXPECT_TRUE(std::isfinite(c_ffv[i].d_.d_.adj())); + } +} + +// Same regression test but for scalar y, scalar lambda \u2014 mirrors the +// production failure pattern in mixtureis/test_ordered_logistic_laplace_bug +// where ordered_logistic_lpmf is called once per observation with y == K. +TEST_F(AgradRev, ProbDistributionsOrdLog_scalar_top_category_higher_order_ad) { + using stan::math::fvar; + using stan::math::ordered_logistic_lpmf; + using stan::math::var; + using stan::math::vector_ffv; + + // K = 3, y = K (top category) \u2014 the case that produced NaN. + int y_top = 3; + int y_bot = 1; + + fvar> lam_ffv; + lam_ffv.val_ = 0.7; + lam_ffv.d_ = 0.0; // zero outer tangent (compute_s2 pattern) + lam_ffv.val_.d_ = 1.0; + + vector_ffv c_ffv(2); + c_ffv << 0.0, 1.0; + for (int i = 0; i < 2; i++) { + c_ffv[i].d_ = 0.0; + c_ffv[i].val_.d_ = 1.0; + } + + fvar> out_top = ordered_logistic_lpmf(y_top, lam_ffv, c_ffv); + EXPECT_TRUE(std::isfinite(out_top.val_.val_.val())); + EXPECT_TRUE(std::isfinite(out_top.d_.val_.val())); + EXPECT_TRUE(std::isfinite(out_top.val_.d_.val())); + EXPECT_TRUE(std::isfinite(out_top.d_.d_.val())); + + out_top.d_.d_.grad(); + EXPECT_TRUE(std::isfinite(lam_ffv.val_.val_.adj())) + << "scalar y == K: val-side adjoint of lambda is non-finite"; + EXPECT_TRUE(std::isfinite(c_ffv[0].val_.val_.adj())); + EXPECT_TRUE(std::isfinite(c_ffv[1].val_.val_.adj())); + + // Same for y == 1 (bottom category), which triggers cut2 = +INF. + stan::math::set_zero_all_adjoints(); + fvar> out_bot = ordered_logistic_lpmf(y_bot, lam_ffv, c_ffv); + EXPECT_TRUE(std::isfinite(out_bot.val_.val_.val())); + out_bot.d_.d_.grad(); + EXPECT_TRUE(std::isfinite(lam_ffv.val_.val_.adj())) + << "scalar y == 1: val-side adjoint of lambda is non-finite"; + EXPECT_TRUE(std::isfinite(c_ffv[0].val_.val_.adj())); + EXPECT_TRUE(std::isfinite(c_ffv[1].val_.val_.adj())); +} + +// Numerical-value regression: verify that the value AND first-order +// gradients are unchanged by the fix when y contains K. The reference +// values are computed from the algebraically-equivalent hand-rolled form +// log p(y = K | lambda, c) = log_inv_logit(lambda - c[K-2]) +// and its gradient +// dlog p/dlambda = inv_logit(c[K-2] - lambda) +// dlog p/dc[K-2] = -inv_logit(c[K-2] - lambda) +TEST_F(AgradRev, ProbDistributionsOrdLog_top_category_value_and_gradient) { + using stan::math::ordered_logistic_lpmf; + using stan::math::var; + + // K = 3, y = K. lambda = 0.7, c = (0.0, 1.0). c[K-2] = c[1] = 1.0. + // sigmoid(c[K-2] - lambda) = sigmoid(0.3) = 0.574442516811659 + // log p = log_inv_logit(lambda - c[K-2]) = log_inv_logit(-0.3) + // = -log(1 + exp(0.3)) = -0.854355244468526 + int y = 3; + var lam = 0.7; + Eigen::Matrix c(2); + c << 0.0, 1.0; + + var out = ordered_logistic_lpmf(y, lam, c); + EXPECT_FLOAT_EQ(out.val(), -0.854355244468526); + + out.grad(); + EXPECT_FLOAT_EQ(lam.adj(), 0.574442516811659); + EXPECT_FLOAT_EQ(c[0].adj(), 0.0); + EXPECT_FLOAT_EQ(c[1].adj(), -0.574442516811659); +} From e4c668b99cf3d222d98d26b641ecabcf8988761f Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Fri, 22 May 2026 03:05:46 -0400 Subject: [PATCH 2/2] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp | 10 ++++------ stan/math/prim/prob/ordered_logistic_lpmf.hpp | 10 ++++------ .../math/mix/prob/ordered_logistic_glm_lpmf_test.cpp | 4 ++-- test/unit/math/mix/prob/ordered_logistic_test.cpp | 2 +- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp b/stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp index 24e53962a54..e5855241666 100644 --- a/stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp +++ b/stan/math/prim/prob/ordered_logistic_glm_lpmf.hpp @@ -165,12 +165,10 @@ inline return_type_t ordered_logistic_glm_lpmf( Array exp_m_abs_cut1 = (-cut1.abs()).exp(); Array exp_m_abs_cut2 = (-cut2.abs()).exp(); Array exp_cuts_diff = exp(cuts_y2 - cuts_y1); - Array inv_logit_neg_cut2 - = (cut2 > 0).select(exp_m_abs_cut2 / (1 + exp_m_abs_cut2), - 1 / (1 + exp_m_abs_cut2)); - Array inv_logit_neg_cut1 - = (cut1 > 0).select(exp_m_abs_cut1 / (1 + exp_m_abs_cut1), - 1 / (1 + exp_m_abs_cut1)); + Array inv_logit_neg_cut2 = (cut2 > 0).select( + exp_m_abs_cut2 / (1 + exp_m_abs_cut2), 1 / (1 + exp_m_abs_cut2)); + Array inv_logit_neg_cut1 = (cut1 > 0).select( + exp_m_abs_cut1 / (1 + exp_m_abs_cut1), 1 / (1 + exp_m_abs_cut1)); Array d1 = inv_logit_neg_cut2 - exp_cuts_diff / (exp_cuts_diff - 1); Array d2 diff --git a/stan/math/prim/prob/ordered_logistic_lpmf.hpp b/stan/math/prim/prob/ordered_logistic_lpmf.hpp index 491550ce60a..db2b7d86d35 100644 --- a/stan/math/prim/prob/ordered_logistic_lpmf.hpp +++ b/stan/math/prim/prob/ordered_logistic_lpmf.hpp @@ -178,12 +178,10 @@ inline return_type_t ordered_logistic_lpmf(const T_y& y, Array exp_m_abs_cut1 = (-cut1.abs()).exp(); Array exp_m_abs_cut2 = (-cut2.abs()).exp(); Array exp_cuts_diff = exp(cuts_y2 - cuts_y1); - Array inv_logit_neg_cut2 - = (cut2 > 0).select(exp_m_abs_cut2 / (1 + exp_m_abs_cut2), - 1 / (1 + exp_m_abs_cut2)); - Array inv_logit_neg_cut1 - = (cut1 > 0).select(exp_m_abs_cut1 / (1 + exp_m_abs_cut1), - 1 / (1 + exp_m_abs_cut1)); + Array inv_logit_neg_cut2 = (cut2 > 0).select( + exp_m_abs_cut2 / (1 + exp_m_abs_cut2), 1 / (1 + exp_m_abs_cut2)); + Array inv_logit_neg_cut1 = (cut1 > 0).select( + exp_m_abs_cut1 / (1 + exp_m_abs_cut1), 1 / (1 + exp_m_abs_cut1)); Array d1 = inv_logit_neg_cut2 - exp_cuts_diff / (exp_cuts_diff - 1); Array d2 diff --git a/test/unit/math/mix/prob/ordered_logistic_glm_lpmf_test.cpp b/test/unit/math/mix/prob/ordered_logistic_glm_lpmf_test.cpp index f3b85259f25..649d5cf2f46 100644 --- a/test/unit/math/mix/prob/ordered_logistic_glm_lpmf_test.cpp +++ b/test/unit/math/mix/prob/ordered_logistic_glm_lpmf_test.cpp @@ -72,8 +72,8 @@ TEST_F(AgradRev, for (int i = 0; i < N; i++) { for (int j = 0; j < D; j++) { EXPECT_TRUE(std::isfinite(x_ffv(i, j).val_.val_.adj())) - << "x_ffv(" << i << "," << j - << ").val_.val_.adj() non-finite (y[" << i << "]=" << y[i] << ")"; + << "x_ffv(" << i << "," << j << ").val_.val_.adj() non-finite (y[" + << i << "]=" << y[i] << ")"; } } for (int j = 0; j < D; j++) { diff --git a/test/unit/math/mix/prob/ordered_logistic_test.cpp b/test/unit/math/mix/prob/ordered_logistic_test.cpp index 96753750265..28d4e5856ff 100644 --- a/test/unit/math/mix/prob/ordered_logistic_test.cpp +++ b/test/unit/math/mix/prob/ordered_logistic_test.cpp @@ -661,7 +661,7 @@ TEST_F(AgradRev, ProbDistributionsOrdLog_scalar_top_category_higher_order_ad) { fvar> lam_ffv; lam_ffv.val_ = 0.7; - lam_ffv.d_ = 0.0; // zero outer tangent (compute_s2 pattern) + lam_ffv.d_ = 0.0; // zero outer tangent (compute_s2 pattern) lam_ffv.val_.d_ = 1.0; vector_ffv c_ffv(2);