Agenta-AI · mmabrouk · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 31, 2026
diff --git a/.github/workflows/41-railway-setup.yml b/.github/workflows/41-railway-setup.yml
@@ -73,10 +73,45 @@ jobs:
           chmod +x hosting/railway/oss/scripts/*.sh
           # shellcheck source=/dev/null
           source hosting/railway/oss/scripts/preview-resolve-env.sh
-          hosting/railway/oss/scripts/bootstrap.sh
+
+          # Persist the full bootstrap output so the "Upload setup log" step can
+          # publish it as an artifact, regardless of live-log truncation.
+          log_file="${GITHUB_WORKSPACE:-$PWD}/railway-setup-${PR_NUMBER:-unknown}.log"
+
+          set +e
+          hosting/railway/oss/scripts/bootstrap.sh 2>&1 | tee "$log_file"
+          setup_status=${PIPESTATUS[0]}
+          set -e
+
           echo "project_name=${RAILWAY_PROJECT_NAME}" >> "$GITHUB_OUTPUT"
           echo "environment_name=${RAILWAY_ENVIRONMENT_NAME}" >> "$GITHUB_OUTPUT"
 
+          if [ "$setup_status" -ne 0 ]; then
+            {
+              echo "### Railway Preview Setup — Failed"
+              echo
+              echo "<details><summary>Setup log (last 100 lines)</summary>"
+              echo
+              echo '```'
+              tail -n 100 "$log_file" 2>/dev/null
+              echo '```'
+              echo "</details>"
+            } >> "$GITHUB_STEP_SUMMARY"
+            exit "$setup_status"
+          fi
+
+      - name: Upload setup log
+        if: always()
+        # Diagnostics only: a failed/duplicate upload must never fail the job.
+        continue-on-error: true
+        uses: actions/upload-artifact@v4
+        with:
+          name: railway-setup-log-${{ inputs.pr_number }}
+          path: railway-setup-*.log
+          if-no-files-found: ignore
+          overwrite: true
+          retention-days: 7
+
       - name: Summary
         run: |
           {

diff --git a/.github/workflows/43-railway-deploy.yml b/.github/workflows/43-railway-deploy.yml
@@ -112,11 +112,10 @@ jobs:
           # shellcheck source=/dev/null
           source hosting/railway/oss/scripts/preview-resolve-env.sh
 
-          log_file="$(mktemp)"
-          cleanup() {
-            rm -f "$log_file"
-          }
-          trap cleanup EXIT
+          # Keep the log in the workspace so the "Upload deploy log" step can
+          # publish it as an artifact. GitHub's live log can truncate streamed
+          # output, so we always persist a full copy.
+          log_file="${GITHUB_WORKSPACE:-$PWD}/railway-deploy-${PR_NUMBER:-unknown}.log"
 
           project="$RAILWAY_PROJECT_NAME"
           environment_name="$RAILWAY_ENVIRONMENT_NAME"
@@ -177,13 +176,58 @@ jobs:
           echo "environment_name=${environment_name}" >> "$GITHUB_OUTPUT"
           echo "railway_logs_url=${railway_logs_url}" >> "$GITHUB_OUTPUT"
 
-          trap - EXIT
-          cleanup
+          # Best-effort diagnostics; never let these change the step outcome.
+          set +e
+          # On failure, pull the tail of the key services' Railway logs into
+          # this job so the root cause (e.g. a Postgres crash-loop) is visible
+          # here instead of only in the Railway dashboard.
+          if [ "$deploy_failed" = "true" ]; then
+            # Tee into the persisted log so the uploaded artifact and the
+            # step-summary tail include the Railway service logs too, not just
+            # the (possibly truncated) live Actions log.
+            dump_railway_logs 2>&1 | tee -a "$log_file"
+          fi
+
+          status_label="Deployed"
+          [ "$deploy_failed" = "true" ] && status_label="Failed"
+          {
+            echo "### Railway Preview Deploy"
+            echo
+            echo "| Item | Value |"
+            echo "| --- | --- |"
+            echo "| PR | \`${PR_NUMBER}\` |"
+            echo "| Image tag | \`${IMAGE_TAG}\` |"
+            echo "| Status | ${status_label} |"
+            [ -n "$url" ] && echo "| Preview URL | ${url} |"
+            [ -n "$railway_logs_url" ] && echo "| Railway logs | [Open logs](${railway_logs_url}) |"
+            if [ "$deploy_failed" = "true" ]; then
+              echo
+              echo "<details><summary>Deploy log (last 100 lines)</summary>"
+              echo
+              echo '```'
+              tail -n 100 "$log_file" 2>/dev/null
+              echo '```'
+              echo "</details>"
+            fi
+          } >> "$GITHUB_STEP_SUMMARY"
+          set -e
 
           if [ "$deploy_failed" = "true" ]; then
             exit 1
           fi
 
+      - name: Upload deploy log
+        if: always()
+        # Diagnostics only: a failed/duplicate upload must never fail the job.
+        continue-on-error: true
+        uses: actions/upload-artifact@v4
+        with:
+          name: railway-deploy-log-${{ inputs.pr_number }}
+          path: railway-deploy-*.log
+          if-no-files-found: ignore
+          overwrite: true
+          retention-days: 7
+
       - name: Post preview URL as PR comment
         if: inputs.pr_number != '' && steps.deploy.outputs.preview_url != ''
         uses: actions/github-script@v7

diff --git a/hosting/railway/oss/scripts/bootstrap.sh b/hosting/railway/oss/scripts/bootstrap.sh
@@ -7,6 +7,8 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
 # shellcheck source=lib.sh
 source "$(dirname "${BASH_SOURCE[0]}")/lib.sh"
 
+install_error_trap
+
 PROJECT_NAME="${RAILWAY_PROJECT_NAME:-agenta-oss-railway}"
 ENV_NAME="${RAILWAY_ENVIRONMENT_NAME:-staging}"
 SOURCE_COMPOSE_FILE="${RAILWAY_SOURCE_COMPOSE_FILE:-$(railway_source_compose_file "$ROOT_DIR")}"

diff --git a/hosting/railway/oss/scripts/configure.sh b/hosting/railway/oss/scripts/configure.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 # shellcheck source=lib.sh
 source "$(dirname "${BASH_SOURCE[0]}")/lib.sh"
 
+install_error_trap
+
 PROJECT_NAME="${RAILWAY_PROJECT_NAME:-agenta-oss-railway}"
 ENV_NAME="${RAILWAY_ENVIRONMENT_NAME:-staging}"
 SKIP_UNSETS="${CONFIGURE_SKIP_UNSETS:-false}"

diff --git a/hosting/railway/oss/scripts/deploy-from-images.sh b/hosting/railway/oss/scripts/deploy-from-images.sh
@@ -8,6 +8,8 @@ TMP_DIR="$(mktemp -d)"
 # shellcheck source=lib.sh
 source "$(dirname "${BASH_SOURCE[0]}")/lib.sh"
 
+install_error_trap
+
 cleanup() {
     rm -rf "$TMP_DIR"
 }

diff --git a/hosting/railway/oss/scripts/lib.sh b/hosting/railway/oss/scripts/lib.sh
@@ -91,45 +91,181 @@ require_compose_redis_image() {
     printf "%s" "$image"
 }
 
-# railway_call: Run a railway CLI command, retrying on rate-limit responses.
+# _railway_redact: Mask secret values before they are logged. Reads stdin and
+# writes redacted text to stdout. Masks the value of any KEY=VALUE token whose
+# (uppercase) key contains PASSWORD/TOKEN/SECRET/KEY, plus the password segment
+# of any scheme://user:password@host URL. Keeps diagnostic output safe to print
+# and to upload as a CI artifact even when a failing command echoes its args.
 #
-# Railway returns "You are being ratelimited. Please try again later" on
-# HTTP 429. This wrapper detects that message and backs off with exponential
-# backoff before retrying.
+# Applied only to failure/diagnostic output, never to the success path that
+# callers parse (e.g. `variable list -k` results).
+_railway_redact() {
+    sed -E \
+        -e 's/([A-Z0-9_]*(PASSWORD|TOKEN|SECRET|KEY)[A-Z0-9_]*[[:space:]]*=[[:space:]]*)[^[:space:]]+/\1***REDACTED***/g' \
+        -e 's#(://[A-Za-z0-9._~-]+:)[^@[:space:]/]+@#\1***REDACTED***@#g'
+}
+
+# railway_call: Run a railway CLI command with smart, context-aware retries.
 #
-# Usage:
-#   railway_call [railway args...]
+# Railway's GraphQL API (backboard.railway.com) intermittently rate-limits
+# (HTTP 429) and, far more often for us, times out write mutations such as
+# `variable set` ("operation timed out" / "error sending request"). This
+# wrapper retries the cases that are safe + useful to retry, and fails fast on
+# the rest:
 #
-# Environment variables:
-#   RAILWAY_RETRY_MAX     Max retry attempts (default: 5)
-#   RAILWAY_RETRY_DELAY   Initial delay in seconds (default: 10)
+#   - rate-limit          -> always retried. A 429 is a clean rejection: the
+#                            request was not processed, so a retry cannot
+#                            duplicate work.
+#   - transient network   -> retried ONLY for idempotent commands. A timed-out
+#     (timeout/5xx/reset)    mutation may have already succeeded server-side, so
+#                            we do NOT blind-retry non-idempotent creates
+#                            (`init`, `add`, `environment new`, `volume add`):
+#                            that risks duplicate projects/services/volumes.
+#                            `variable set` (our actual offender) is an
+#                            idempotent upsert, so it is retried.
+#   - anything else       -> not retried. Deterministic errors ("not found",
+#                            "unauthorized", "No service linked") fail fast
+#                            instead of burning the backoff budget.
 #
+# Backoff is exponential. Failure output is redacted (see _railway_redact).
+#
+# Environment variables:
+#   RAILWAY_RETRY_MAX     Max attempts (default: 5)
+#   RAILWAY_RETRY_DELAY   Initial backoff delay in seconds (default: 10)
 railway_call() {
     local max_attempts="${RAILWAY_RETRY_MAX:-5}"
     local delay="${RAILWAY_RETRY_DELAY:-10}"
     local attempt=1
     local output
     local exit_code
 
+    # Is this a non-idempotent create? If so, an ambiguous timeout must not be
+    # blind-retried (the resource may already exist). Rate-limit retries are
+    # still safe because a 429 is rejected before any work is done.
+    local idempotent=1
+    case "$1" in
+        init | add) idempotent=0 ;;
+        environment) [ "${2:-}" = "new" ] && idempotent=0 ;;
+        volume) [ "${2:-}" = "add" ] && idempotent=0 ;;
+    esac
+
     while [ "$attempt" -le "$max_attempts" ]; do
-        output="$(railway "$@" 2>&1)" && exit_code=0 || exit_code=$?
-
-        if printf "%s" "$output" | grep -qi "ratelimit\|rate.limit\|rate limit"; then
-            if [ "$attempt" -eq "$max_attempts" ]; then
-                printf "railway %s: rate-limited after %d attempts\n" "$*" "$max_attempts" >&2
-                printf "%s\n" "$output" >&2
-                return 1
-            fi
-            printf "railway %s: rate-limited, retrying in %ds (attempt %d/%d)\n" \
-                "$1" "$delay" "$attempt" "$max_attempts" >&2
+        # `set +Ee` inside the subshell so a non-zero exit from `railway`
+        # neither trips errexit nor fires an inherited ERR trap. This wrapper
+        # handles retries and reports failures itself.
+        output="$(set +Ee; railway "$@" 2>&1)" && exit_code=0 || exit_code=$?
+
+        # Success: emit on stdout so callers can capture the output.
+        if [ "$exit_code" -eq 0 ]; then
+            printf "%s\n" "$output"
+            return 0
+        fi
+
+        # Classify the failure to decide whether a retry is safe + useful.
+        local kind="error"
+        if printf "%s" "$output" | grep -qiE "ratelimit|rate.?limit"; then
+            kind="rate-limit"
+        elif printf "%s" "$output" | grep -qiE "timed out|error sending request|failed to fetch|error trying to connect|connection (reset|refused|closed|error)|temporarily unavailable|service unavailable|bad gateway|gateway time-?out|broken pipe|unexpected eof|tls handshake"; then
+            kind="transient"
+        fi
+
+        local retryable=0
+        case "$kind" in
+            rate-limit) retryable=1 ;;
+            transient) [ "$idempotent" -eq 1 ] && retryable=1 ;;
+        esac
+
+        if [ "$retryable" -eq 1 ] && [ "$attempt" -lt "$max_attempts" ]; then
+            printf "railway %s: %s, retrying in %ds (attempt %d/%d)\n" \
+                "$1" "$kind" "$delay" "$attempt" "$max_attempts" >&2
             sleep "$delay"
             delay=$((delay * 2))
             attempt=$((attempt + 1))
             continue
         fi
 
-        # Not a rate-limit error. Print output and return the original exit code.
-        printf "%s\n" "$output"
+        # Give up: not retryable, attempts exhausted, or an ambiguous timeout
+        # on a non-idempotent create. Surface the (redacted) railway error.
+        if [ "$retryable" -eq 1 ]; then
+            printf "railway %s: %s — giving up after %d attempts\n" \
+                "$1" "$kind" "$attempt" >&2
+        elif [ "$kind" = "transient" ] && [ "$idempotent" -eq 0 ]; then
+            printf "railway %s: %s on non-idempotent command — not retrying (may have partially succeeded)\n" \
+                "$1" "$kind" >&2
+        fi
+        [ -n "$output" ] && printf "%s\n" "$output" | _railway_redact >&2
         return "$exit_code"
     done
 }
+
+# install_error_trap: Turn a bare "exit 1" into a diagnostic that names the
+# failing command and prints a short call stack. Call once near the top of a
+# script, after sourcing this file. Enables errtrace (set -E) so the trap also
+# fires for failures inside functions.
+#
+# railway_call disables errtrace inside its own command substitution, so
+# tolerated failures (callers using `|| true`) do not reach this trap.
+_railway_on_error() {
+    # The same failure can unwind through several stack frames; report once.
+    [ -n "${_RAILWAY_ERR_HANDLED:-}" ] && return 0
+    _RAILWAY_ERR_HANDLED=1
+
+    local exit_code="$1"
+    # Redact secrets: $BASH_COMMAND can contain secret-bearing args (e.g.
+    # `railway variable set ... AGENTA_AUTH_KEY=...`).
+    local cmd
+    cmd="$(printf '%s' "$2" | _railway_redact)"
+
+    printf '\n[railway][FAIL] command failed (exit %s): %s\n' "$exit_code" "$cmd" >&2
+
+    local i
+    for ((i = 1; i < ${#FUNCNAME[@]}; i++)); do
+        printf '    at %s (%s:%s)\n' \
+            "${FUNCNAME[i]}" "${BASH_SOURCE[i]}" "${BASH_LINENO[i - 1]}" >&2
+    done
+
+    # Surface a GitHub Actions annotation when running in CI.
+    if [ "${GITHUB_ACTIONS:-}" = "true" ]; then
+        printf '::error::[railway] command failed (exit %s): %s\n' "$exit_code" "$cmd" >&2
+    fi
+}
+
+install_error_trap() {
+    set -E
+    trap '_railway_on_error "$?" "$BASH_COMMAND"' ERR
+}
+
+# dump_railway_logs: Best-effort snapshot of Railway service logs, for CI
+# debugging. Uses --lines (non-streaming) and --latest (works even when a
+# deployment failed or is crash-looping), wrapped in a hard timeout so it can
+# never hang or fail the caller. Requires a linked project/environment.
+#
+# Usage: dump_railway_logs [service ...]   (defaults to the core infra set)
+#
+# Environment variables:
+#   RAILWAY_LOG_TAIL      Lines to fetch per service (default: 50)
+#   RAILWAY_LOG_TIMEOUT   Per-service timeout in seconds (default: 30)
+dump_railway_logs() {
+    local services=("$@")
+    if [ "${#services[@]}" -eq 0 ]; then
+        services=(Postgres redis alembic api supertokens web)
+    fi
+
+    local lines="${RAILWAY_LOG_TAIL:-50}"
+    local timeout_s="${RAILWAY_LOG_TIMEOUT:-30}"
+    local svc
+    local logs
+
+    for svc in "${services[@]}"; do
+        printf '\n===== railway logs (last %s lines): %s =====\n' "$lines" "$svc" >&2
+        # Capture first so the exit status reflects railway/timeout (not the
+        # redactor), then print redacted (service logs may embed DB URIs).
+        if logs="$(timeout "$timeout_s" railway logs --service "$svc" --lines "$lines" --latest 2>&1)"; then
+            printf '%s\n' "$logs" | _railway_redact >&2
+        else
+            printf '(no logs available for service: %s)\n' "$svc" >&2
+        fi
+    done
+
+    return 0
+}
diff --git a/web/packages/agenta-playground/src/state/controllers/playgroundController.ts b/web/packages/agenta-playground/src/state/controllers/playgroundController.ts
@@ -82,6 +82,7 @@ import {
     newTestcaseCountAtom,
     newTestcaseDataHashAtom,
 } from "../execution/selectors"
+import {pruneDanglingConnections} from "../helpers/connectionGraph"
 import {extractAndLoadChatMessagesAtom} from "../helpers/extractAndLoadChatMessages"
 import {normalizeTestcaseRowsForLoad} from "../helpers/testcaseRowNormalization"
 import type {EntitySelection, PlaygroundNode, RunnableType} from "../types"
@@ -412,8 +413,17 @@ const changePrimaryNodeAtom = atom(null, (get, set, entity: EntitySelection) =>
         label: entity.label,
     }
 
-    set(playgroundNodesAtom, [updatedNode, ...nodes.slice(1)])
-    set(outputConnectionsAtom, [])
+    const nextNodes = [updatedNode, ...nodes.slice(1)]
+    set(playgroundNodesAtom, nextNodes)
+
+    // Preserve downstream connections instead of clearing them. The primary
+    // node is updated in place, so its `id` is unchanged and any chain sourced
+    // from it (e.g. app → evaluator) stays valid. Clearing unconditionally
+    // orphaned the downstream evaluator on app-revision re-selection:
+    // connectDownstreamNode then no-ops because the evaluator node is still
+    // present, so the edge was never recreated and the evaluator silently
+    // stopped running. Only drop connections whose endpoints no longer exist.
+    set(outputConnectionsAtom, pruneDanglingConnections(get(outputConnectionsAtom), nextNodes))
 
     // Update local testset name if not connected to a remote testset
     const currentTestset = get(connectedTestsetAtom)