diff --git a/README.md b/README.md
index 8eeb1cba1..433d498fa 100644
--- a/README.md
+++ b/README.md
@@ -128,8 +128,6 @@ export function HelloTriangle() {
       passEncoder.end();
 
       device.queue.submit([commandEncoder.finish()]);
-
-      context.present();
     };
     helloTriangle();
   }, [ref]);
@@ -174,17 +172,28 @@ ctx.canvas.height = ctx.canvas.clientHeight * PixelRatio.get();
 
 ### Frame Scheduling
 
-In React Native, we want to keep frame presentation as a manual operation as we plan to provide more advanced rendering options that are React Native specific.  
-This means that when you are ready to present a frame, you need to call `present` on the context.
+On the **main JS runtime** and the **Reanimated UI runtime**, frame presentation is automatic: once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call.
 
 ```tsx
 // draw
 // submit to the queue
 device.queue.submit([commandEncoder.finish()]);
-// This method is React Native only
-context.present();
+// The frame is presented automatically on the next vsync.
 ```
 
+When you render from a **dedicated worklet runtime** (e.g. `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame processor), it runs on its own thread where present can't be driven automatically. Call `context.present()` yourself after submitting:
+
+```tsx
+const onFrame = () => {
+  "worklet";
+  // draw on the dedicated runtime's thread
+  device.queue.submit([commandEncoder.finish()]);
+  context.present(); // required on dedicated worklet runtimes; a no-op on JS/UI
+};
+```
+
+`present()` is safe to call from a worklet that runs on either the UI runtime or a dedicated runtime: it presents on the dedicated runtime and does nothing on the JS/UI runtime (which auto-present).
+
 ### Canvas Transparency
 
 On Android, the `alphaMode` property is ignored when configuring the canvas.
@@ -296,7 +305,6 @@ const render = () => {
 
   // Release the surface's access window right after the submit that sampled it.
   externalTexture.destroy();
-  context.present();
 };
 ```
 
@@ -328,7 +336,6 @@ const renderFrame = (device: GPUDevice, context: GPUCanvasContext) => {
   const commandEncoder = device.createCommandEncoder();
   // ... render ...
   device.queue.submit([commandEncoder.finish()]);
-  context.present();
 };
 
 // Initialize WebGPU on main thread, then run on UI thread
diff --git a/apps/example/ios/Podfile.lock b/apps/example/ios/Podfile.lock
index fd5ba968c..b4c5f158a 100644
--- a/apps/example/ios/Podfile.lock
+++ b/apps/example/ios/Podfile.lock
@@ -1924,7 +1924,7 @@ PODS:
     - ReactCommon/turbomodule/core
     - SocketRocket
     - Yoga
-  - react-native-wgpu (0.5.12):
+  - react-native-wgpu (0.5.13):
     - boost
     - DoubleConversion
     - fast_float
@@ -3074,7 +3074,7 @@ SPEC CHECKSUMS:
   React-microtasksnativemodule: 75b6604b667d297292345302cc5bfb6b6aeccc1b
   react-native-safe-area-context: c00143b4823773bba23f2f19f85663ae89ceb460
   react-native-skia: fc73e9bdc46ebb420a98c9c2be29fee80f565e79
-  react-native-wgpu: 274ffec11ee3a082260d9f3d1fb54030a5ca0873
+  react-native-wgpu: 0496e9efeb4c3939ab56371005ede4e1468591d1
   React-NativeModulesApple: 879fbdc5dcff7136abceb7880fe8a2022a1bd7c3
   React-oscompat: 93b5535ea7f7dff46aaee4f78309a70979bdde9d
   React-perflogger: 5536d2df3d18fe0920263466f7b46a56351c0510
diff --git a/apps/example/src/CanvasAPI/CanvasAPI.tsx b/apps/example/src/CanvasAPI/CanvasAPI.tsx
index a9f5c4928..a403c8388 100644
--- a/apps/example/src/CanvasAPI/CanvasAPI.tsx
+++ b/apps/example/src/CanvasAPI/CanvasAPI.tsx
@@ -89,8 +89,6 @@ export const CanvasAPI = () => {
             passEncoder.end();
 
             device.queue.submit([commandEncoder.finish()]);
-
-            context.present();
           })()
         }
         title="check surface"
diff --git a/apps/example/src/ComputeToys/engine/index.ts b/apps/example/src/ComputeToys/engine/index.ts
index f0fa08f07..8db2562ad 100644
--- a/apps/example/src/ComputeToys/engine/index.ts
+++ b/apps/example/src/ComputeToys/engine/index.ts
@@ -398,7 +398,6 @@ fn passSampleLevelBilinearRepeat(pass_index: int, uv: float2, lod: float) -> flo
 
       // Submit command buffer
       this.device.queue.submit([encoder.finish()]);
-      this.surface!.present();
 
       // Update frame counter
       this.bindings!.time.host.frame += 1;
diff --git a/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx b/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx
index f8399ee8a..7c973e03f 100644
--- a/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx
+++ b/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx
@@ -247,7 +247,6 @@ export const ImportExternalTexture = () => {
       // Now that the work sampling it has been submitted, end the external
       // texture's access window so the frame's surface is released promptly.
       externalTex?.destroy();
-      context.present();
       rafRef.current = requestAnimationFrame(render);
     };
     rafRef.current = requestAnimationFrame(render);
diff --git a/apps/example/src/Reanimated/Reanimated.tsx b/apps/example/src/Reanimated/Reanimated.tsx
index 505296565..3761c90f9 100644
--- a/apps/example/src/Reanimated/Reanimated.tsx
+++ b/apps/example/src/Reanimated/Reanimated.tsx
@@ -78,8 +78,10 @@ export const webGPUDemo = (
     passEncoder.end();
 
     device.queue.submit([commandEncoder.finish()]);
-
+    // Needed on a dedicated worklet runtime (DedicatedThread); a no-op on the
+    // UI runtime (UIThread), where present is automatic.
     context.present();
+
     if (runAnimation.value) {
       requestAnimationFrame(frame);
     }
diff --git a/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx b/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx
index b5627cc43..197657460 100644
--- a/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx
+++ b/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx
@@ -268,7 +268,6 @@ export const SharedTextureMemory = () => {
       }
       pass.end();
       device.queue.submit([encoder.finish()]);
-      context.present();
       rafRef.current = requestAnimationFrame(render);
     };
     rafRef.current = requestAnimationFrame(render);
diff --git a/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx b/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx
index 907264638..b1906cf74 100644
--- a/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx
+++ b/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx
@@ -185,8 +185,6 @@ export function StorageBufferVertices() {
 
     const commandBuffer = encoder.finish();
     device.queue.submit([commandBuffer]);
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    (context as any).present();
   });
 
   return (
diff --git a/apps/example/src/ThreeJS/Backdrop.tsx b/apps/example/src/ThreeJS/Backdrop.tsx
index 8ed2a8c91..113325b9d 100644
--- a/apps/example/src/ThreeJS/Backdrop.tsx
+++ b/apps/example/src/ThreeJS/Backdrop.tsx
@@ -150,7 +150,6 @@ export const Backdrop = () => {
       }
 
       renderer.render(scene, camera);
-      context!.present();
     }
     return () => {
       renderer.setAnimationLoop(null);
diff --git a/apps/example/src/ThreeJS/Cube.tsx b/apps/example/src/ThreeJS/Cube.tsx
index d3e9707b5..ea3fe0f23 100644
--- a/apps/example/src/ThreeJS/Cube.tsx
+++ b/apps/example/src/ThreeJS/Cube.tsx
@@ -31,7 +31,6 @@ export const Cube = () => {
       mesh.rotation.y = time / 1000;
 
       renderer.render(scene, camera);
-      context.present();
     }
     renderer.setAnimationLoop(animate);
     return () => {
diff --git a/apps/example/src/ThreeJS/Helmet.tsx b/apps/example/src/ThreeJS/Helmet.tsx
index be7cb626f..70720d360 100644
--- a/apps/example/src/ThreeJS/Helmet.tsx
+++ b/apps/example/src/ThreeJS/Helmet.tsx
@@ -49,7 +49,6 @@ export const Helmet = () => {
     function animate() {
       animateCamera();
       renderer.render(scene, camera);
-      context!.present();
     }
 
     return () => {
diff --git a/apps/example/src/ThreeJS/InstancedMesh.tsx b/apps/example/src/ThreeJS/InstancedMesh.tsx
index 3f60631de..5b7c7ca4d 100644
--- a/apps/example/src/ThreeJS/InstancedMesh.tsx
+++ b/apps/example/src/ThreeJS/InstancedMesh.tsx
@@ -59,7 +59,6 @@ export const InstancedMesh = () => {
 
     function animate() {
       render();
-      context!.present();
     }
 
     function render() {
diff --git a/apps/example/src/ThreeJS/PostProcessing.tsx b/apps/example/src/ThreeJS/PostProcessing.tsx
index d94ef1728..0c2980501 100644
--- a/apps/example/src/ThreeJS/PostProcessing.tsx
+++ b/apps/example/src/ThreeJS/PostProcessing.tsx
@@ -72,7 +72,6 @@ export const PostProcessing = () => {
         mixer.update(delta);
       }
       postProcessing.render();
-      context!.present();
     }
     return () => {
       renderer.setAnimationLoop(null);
diff --git a/apps/example/src/ThreeJS/Retargeting.tsx b/apps/example/src/ThreeJS/Retargeting.tsx
index c25601885..8b8dd9a29 100644
--- a/apps/example/src/ThreeJS/Retargeting.tsx
+++ b/apps/example/src/ThreeJS/Retargeting.tsx
@@ -302,7 +302,6 @@ export const Retargeting = () => {
       source.mixer.update(delta);
       mixer.update(delta);
       renderer.render(scene, camera);
-      context.present();
     });
 
     return () => {
diff --git a/apps/example/src/ThreeJS/components/FiberCanvas.tsx b/apps/example/src/ThreeJS/components/FiberCanvas.tsx
index 91b699553..92b928987 100644
--- a/apps/example/src/ThreeJS/components/FiberCanvas.tsx
+++ b/apps/example/src/ThreeJS/components/FiberCanvas.tsx
@@ -66,7 +66,6 @@ export const FiberCanvas = ({
         const renderFrame = state.gl.render.bind(state.gl);
         state.gl.render = (s: THREE.Scene, c: THREE.Camera) => {
           renderFrame(s, c);
-          context?.present();
         };
       },
     });
diff --git a/apps/example/src/Triangle/HelloTriangle.tsx b/apps/example/src/Triangle/HelloTriangle.tsx
index 3e28d6c12..caeb560b3 100644
--- a/apps/example/src/Triangle/HelloTriangle.tsx
+++ b/apps/example/src/Triangle/HelloTriangle.tsx
@@ -77,8 +77,6 @@ export function HelloTriangle() {
       passEncoder.end();
 
       device.queue.submit([commandEncoder.finish()]);
-
-      context.present();
     })();
   }, [ref]);
 
diff --git a/apps/example/src/Triangle/HelloTriangleMSAA.tsx b/apps/example/src/Triangle/HelloTriangleMSAA.tsx
index 5d66983d5..b9518fbe9 100644
--- a/apps/example/src/Triangle/HelloTriangleMSAA.tsx
+++ b/apps/example/src/Triangle/HelloTriangleMSAA.tsx
@@ -87,7 +87,6 @@ export function HelloTriangleMSAA() {
       }
 
       frame();
-      context.present();
     })();
   }, [ref]);
 
diff --git a/apps/example/src/VisionCamera/VisionCamera.tsx b/apps/example/src/VisionCamera/VisionCamera.tsx
index c4adcfaa0..f6c6c95bd 100644
--- a/apps/example/src/VisionCamera/VisionCamera.tsx
+++ b/apps/example/src/VisionCamera/VisionCamera.tsx
@@ -613,11 +613,13 @@ const CameraView = () => {
           pass.draw(3);
           pass.end();
           device.queue.submit([encoder.finish()]);
+          // Vision Camera frame processors run on a dedicated worklet runtime,
+          // so present explicitly (auto-present only covers the JS/UI runtime).
+          context.present();
           // The work sampling it is submitted, so end the external texture's
           // access window now to release the camera frame's surface promptly
           // (don't wait for GC, which would starve the frame buffer pool).
           externalTex.destroy();
-          context.present();
         } finally {
           videoFrame.release();
         }
diff --git a/apps/example/src/components/Texture.tsx b/apps/example/src/components/Texture.tsx
index d9e689b41..5bd82a911 100644
--- a/apps/example/src/components/Texture.tsx
+++ b/apps/example/src/components/Texture.tsx
@@ -145,7 +145,6 @@ export const Texture = ({ texture, style, device }: GPUTextureProps) => {
     renderPass.end();
 
     device.queue.submit([commandEncoder.finish()]);
-    context.present();
   }, [device, state, texture, ref]);
   return <Canvas ref={ref} style={style} />;
 };
diff --git a/apps/example/src/components/useWebGPU.ts b/apps/example/src/components/useWebGPU.ts
index ac8a631ac..1a399aafe 100644
--- a/apps/example/src/components/useWebGPU.ts
+++ b/apps/example/src/components/useWebGPU.ts
@@ -57,7 +57,6 @@ export const useWebGPU = (scene: Scene) => {
         const render = () => {
           const timestamp = Date.now();
           renderScene(timestamp);
-          context.present();
           animationFrameId.current = requestAnimationFrame(render);
         };
 
diff --git a/docs/refactor-async-present-plan.md b/docs/refactor-async-present-plan.md
deleted file mode 100644
index e69706534..000000000
--- a/docs/refactor-async-present-plan.md
+++ /dev/null
@@ -1,317 +0,0 @@
-# Refactor: event-driven async + auto-present
-
-Status: **Phase 0 complete — all spikes GREEN, ready for Phase 1**
-Branch: `claude/keen-darwin-xeywa`
-
-This document is the handoff for moving the async + present refactor forward. Phase 0
-(spikes) needs a real local machine: installed `node_modules`, a Dawn build, and the
-iOS/Android toolchains. Everything below the "How to resume locally" section is meant to
-be executed on your computer, not in the web container.
-
----
-
-## Goals (locked)
-
-- **Async**: replace the JS-thread polling loop with a **background `WaitAny` GPU thread**
-  (Dawn `TimedWaitAny` is already enabled — `packages/webgpu/cpp/rnwgpu/api/GPU.cpp:17-23`).
-- **Present**: **remove `context.present()` entirely** (breaking) in favor of a **global
-  Choreographer / CADisplayLink-driven auto-present**.
-- **Scope**: first-class for **all runtimes** — main JS, the reanimated UI runtime, and
-  `createWorkletRuntime` dedicated runtimes.
-
----
-
-## What exists today (the two problems)
-
-### Async (polling) — `packages/webgpu/cpp/rnwgpu/async/`
-- Every async op (`requestAdapter`, `requestDevice`, `mapAsync`, `onSubmittedWorkDone`,
-  `createRender/ComputePipelineAsync`, `popErrorScope`) registers a Dawn callback with
-  `CallbackMode::AllowProcessEvents` and calls `AsyncRunner::postTask`.
-- `AsyncRunner::requestTick` (`async/AsyncRunner.cpp:89-177`) schedules `tick()` via
-  `setImmediate` / `setTimeout(4ms)` / `queueMicrotask`; `tick()` calls
-  `_instance.ProcessEvents()` and **re-schedules itself while any task is "pumping"**
-  (`AsyncRunner.cpp:189-191`). This is a busy reschedule loop: wasted CPU when idle, added
-  latency, and `JSIMicrotaskDispatcher`'s `queueMicrotask` dispatch is only thread-safe when
-  called on the runtime's own thread.
-
-### Present (manual, non-standard)
-`api/GPUCanvasContext.cpp:56-65` → `SurfaceRegistry.h:116-121` → `wgpu::Surface::Present()`.
-The user must call `context.present()` after every `queue.submit` (**16 JS/TS call sites**).
-No CADisplayLink/Choreographer exists; RN's `requestAnimationFrame` is the only frame driver.
-On Apple, present also does a blocking `WaitForCommandsToBeScheduled` on the JS thread.
-
----
-
-## Target architecture
-
-Three new pieces:
-
-### A. `RuntimeScheduler` — thread-safe "post to this runtime's JS thread"
-Replaces `AsyncDispatcher` / `JSIMicrotaskDispatcher` (which use non-thread-safe
-`queueMicrotask`).
-- Interface: `void scheduleOnJS(std::function<void(jsi::Runtime&)>)`, callable from any thread.
-- **Main runtime**: wraps `react::CallInvoker::invokeAsync` (already available —
-  `apple/WebGPUModule.mm:70`, `android/cpp/cpp-adapter.cpp:25-29`).
-- **Worklet runtimes**: wraps the worklet runtime's own thread executor from
-  `react-native-worklets` 0.8.3 (**see Phase 0 spike #1**).
-- Stored per-runtime in a `RuntimeContext` (the "per-JS-thread event loop"), created on first
-  WebGPU use, torn down via the existing `RuntimeLifecycleMonitor` / `RuntimeAwareCache`
-  (`cpp/jsi/RuntimeAwareCache.h`).
-
-### B. `GpuEventLoop` — background `WaitAny` thread (no polling)
-One per `wgpu::Instance` (effectively global).
-- All async sites switch `CallbackMode::AllowProcessEvents` → **`CallbackMode::WaitAnyOnly`**,
-  returning a `wgpu::Future`.
-- A **small bounded thread pool**; each pending future is waited via
-  `instance.WaitAny(future, /*timeout*/UINT64_MAX)` on a pool thread → genuinely event-driven,
-  **zero idle CPU**, resolves the instant GPU work completes. No wake/interrupt problem (each
-  thread owns one future). **See Phase 0 spike #2.**
-- On completion the worker marshals the result and calls the owning runtime's
-  `RuntimeScheduler.scheduleOnJS` to settle the JS Promise. `AsyncTaskHandle` / `Promise`
-  settle logic is reused; `AsyncRunner` + its tick loop are deleted.
-- Fallback (if concurrent `WaitAny` on one instance is unsafe): single worker thread waiting on
-  the batched future set with a condition-variable re-arm.
-
-### C. `FrameDriver` — global vsync source for auto-present
-One UI-thread singleton; removes the need for `present()`.
-- **iOS**: `CADisplayLink` on the main run loop. **Android**: NDK
-  `AChoreographer_postFrameCallback` from C++ (API 24+, avoids JNI). **See Phase 0 spike #3.**
-- Lifecycle: started when ≥1 surface is configured, stopped at 0.
-- **Auto-present semantics** (spec-aligned "update the rendering" after rAF):
-  1. `GPUCanvasContext::getCurrentTexture()` marks its `SurfaceInfo` dirty and registers a
-     present request with `FrameDriver`, tagged with the owning runtime.
-  2. Each vsync (UI thread), `FrameDriver` dispatches each dirty context's present onto its
-     **owning runtime's `RuntimeScheduler`** — so `Surface.Present()` + the Apple Metal
-     scheduling wait run on the same thread that did `getCurrentTexture` / `submit`, preserving
-     Dawn surface thread-affinity and guaranteeing present-after-submit ordering (FIFO on that
-     loop). Clear dirty after present.
-- Offscreen path (`SurfaceRegistry` `switchToOffscreen`, `src/Offscreen.ts`) has no surface →
-  present is a no-op; tests keep reading back the CPU texture.
-
----
-
-## Phase 0 — Local spikes (DO THESE FIRST, on your machine)
-
-These de-risk the refactor before any large change. Run from repo root.
-
-```bash
-# 0. install deps (web container can't do this)
-yarn install
-```
-
-### Spike 1 — worklet-runtime scheduler (HIGHEST RISK)
-Goal: obtain a **thread-safe** "schedule this lambda on runtime R's thread" for an arbitrary
-worklet runtime (UI runtime + a `createWorkletRuntime` runtime) using
-`react-native-worklets@0.8.3`.
-
-```bash
-# inspect the worklets native API actually shipped at 0.8.3
-find node_modules/react-native-worklets -name "*.h" | grep -iE "Runtime|Scheduler|Invoker|Queue"
-# look for: WorkletRuntime, RuntimeManager / WorkletsModuleProxy, UIScheduler / JSScheduler,
-# and any per-runtime executor / async queue we can call from a background C++ thread.
-```
-Deliverable: a one-paragraph note on the exact symbol(s) to use (or "not exposed → needs JS
-shim / worklets PR"). This determines whether Phase 3 (first-class worklet runtimes) is cheap
-or needs a workaround.
-
-### Spike 2 — concurrent `WaitAny` on one Dawn instance
-Goal: confirm multiple threads can each call `instance.WaitAny(singleFuture, UINT64_MAX)`
-concurrently on the **same** instance safely. If not, switch `GpuEventLoop` to the
-single-worker + condition-variable fallback.
-- Search Dawn headers/docs in `externals/dawn` (or built `libs/`) for `WaitAny` threading
-  guarantees. A tiny throwaway C++ test against the built Dawn is ideal.
-
-### Spike 3 — Android frame callback
-Goal: confirm NDK `AChoreographer_postFrameCallback` is usable at the project `minSdk`
-(`packages/webgpu/android/build.gradle`). If `minSdk < 24` for that API, plan the Java
-`Choreographer` + JNI bridge instead.
-
----
-
-## Phase 0 — Findings (completed 2026-06-02, branch `claude/keen-darwin-xeywa`)
-
-Environment verified: `node_modules` installed, `externals/dawn` present, RN **0.81.4**,
-`react-native-worklets` **0.8.3**, Android `minSdk` **26**, NDK 26/27 available.
-
-### Spike 1 — worklet-runtime scheduler → **GREEN (symbol exists, thread-safe)**
-`worklets/WorkletRuntime/WorkletRuntime.h` exposes exactly what we need:
-- `WorkletRuntime::schedule(std::function<void(jsi::Runtime &)> job)` — posts `job` onto the
-  runtime's own `AsyncQueue` (`WorkletRuntime.cpp:211-227`). It is **callable from any thread**
-  (the underlying `AsyncQueueImpl` is a mutex+condvar queue; `AsyncQueueUI` forwards to the
-  `UIScheduler`). The job runs on the runtime's event-loop thread, under `runtimeMutex_`, and
-  uses `weak_from_this()` so it is a **safe no-op if the runtime was torn down**. This is a
-  drop-in for `RuntimeScheduler::scheduleOnJS` for worklet runtimes.
-- `WorkletRuntime::getWeakRuntimeFromJSIRuntime(jsi::Runtime &rt)` (RN ≥ 0.81, we have 0.81.4)
-  maps a bare `jsi::Runtime&` → `weak_ptr<WorkletRuntime>`, so the per-runtime
-  `RuntimeContext` can recover the scheduler from any worklet runtime (UI + dedicated
-  `createWorkletRuntime`) with no JS shim.
-
-**Caveat (build wiring, not API):** webgpu does **not** currently link worklets natively
-(no worklets entry in `packages/webgpu/*.podspec` or `android/CMakeLists.txt`; only JS-level
-serialization helpers exist). Phase 3 must add the native dependency:
-- iOS: depend on `RNWorklets` pod (it ships public headers under `worklets/`,
-  `header_dir = "worklets"`).
-- Android: import the worklets **prefab** module `worklets` (`prefabPublishing` is on in
-  `react-native-worklets/android/build.gradle`).
-Worklets is already a `peerDependency`, so this adds no new install. Phase 3 stays cheap; no
-worklets PR or JS shim needed.
-
-### Spike 2 — concurrent `WaitAny` on one instance → **GREEN (designed for it)**
-Dawn's native `EventManager` (`externals/dawn/src/dawn/native/EventManager.{h,cpp}`) is built
-for multi-threaded waits:
-- State is `MutexProtected<EventState>`; `mNextFutureID` is atomic; a code comment
-  (`EventManager.h:78-82`) explicitly notes "another thread can race to complete the event …
-  via a WaitAny call".
-- Each `WaitAny` call with a non-zero timeout creates a **stack-local `Waiter`** with its **own**
-  `MutexCondVarProtected<bool>` (`EventManager.cpp:338`, `:106`), registers it per-FutureID in
-  the shared map, then blocks on its own condvar. `SetFutureReady` signals the registered
-  waiters. → **N threads can each block in `WaitAny` on the same instance concurrently, each
-  owning its own future.** This is exactly the plan's primary "one future per pool thread" model.
-
-**Hard constraint discovered (`EventManager.cpp:341-354`):** within a *single* `WaitAny` call
-with a non-zero timeout, you may **not** mix events from multiple queues, nor a queue event
-together with a non-queue event — it returns `WaitStatus::Error` ("Mixed source waits with
-timeouts are not currently supported"). Note `mapAsync`/`onSubmittedWorkDone` are *queue*
-events while `requestAdapter`/`requestDevice`/`createPipelineAsync`/`popErrorScope` are
-*non-queue* events.
-→ **Implication:** adopt the **per-future-per-thread** design (each pool thread waits on exactly
-one future) — it is single-source and always legal. The plan's stated fallback ("single worker
-waiting on the batched future set") is **not viable** as written, because batching mixed sources
-hits this restriction. If a bounded pool is undesirable, the correct fallback is one
-worker-thread *per future* (still single-source), not one worker for a batched set.
-
-### Spike 3 — Android frame callback → **GREEN (no JNI bridge needed)**
-In `android/choreographer.h`, `AChoreographer_getInstance()` and
-`AChoreographer_postFrameCallback()` are both `__INTRODUCED_IN(24)`; `minSdk` is **26**, so the
-pure-NDK path works with no Java `Choreographer`/JNI bridge.
-- `postFrameCallback` is `__DEPRECATED_IN(29)` in favor of `postFrameCallback64` (API 29) /
-  `postVsyncCallback` (API 33). Recommendation: call `postFrameCallback64` when
-  `android_get_device_api_level() >= 29`, else `postFrameCallback` (works on 26-28). Both are
-  acceptable; the 64-bit variant just avoids the deprecation warning and 32-bit time wrap.
-- `AChoreographer_getInstance()` must be called on a thread with a `Looper` (the main/UI
-  thread) — `FrameDriver` already lives on the UI thread, so this is satisfied.
-
-### Net go/no-go
-All three risks clear. Proceed to Phase 1. Two plan amendments: (1) Phase 3 must add the
-worklets native build dependency (podspec + prefab); (2) `GpuEventLoop` must use
-per-future-per-thread waits (drop the batched-future fallback).
-
-## Implementation phases (after Phase 0)
-
-**Phase 1 — Event-driven async** (no public API change; `present()` untouched) — **DONE**
-- Add `RuntimeScheduler` (+ main-runtime CallInvoker impl) and `GpuEventLoop`.
-- Switch all 7 async sites to `WaitAnyOnly` + `GpuEventLoop.addFuture(...)`:
-  `api/GPU.cpp`, `api/GPUAdapter.cpp`, `api/GPUDevice.cpp` (×3), `api/GPUBuffer.cpp`,
-  `api/GPUQueue.cpp`, `api/GPUShaderModule.cpp`.
-- Delete `async/AsyncRunner.*` polling + `async/JSIMicrotaskDispatcher.*`; keep
-  `AsyncTaskHandle` / `Promise` settle path on the new scheduler.
-
-### Phase 1 — what shipped (branch `claude/keen-darwin-xeywa`)
-New files (`cpp/rnwgpu/async/`):
-- `RuntimeScheduler.h` — interface `scheduleOnJS(std::function<void(jsi::Runtime&)>)`,
-  callable from any thread.
-- `CallInvokerScheduler.{h,cpp}` — main-runtime impl wrapping
-  `react::CallInvoker::invokeAsync(CallFunc&&)` (RN 0.81 delivers the job on the JS thread
-  with the runtime).
-- `GpuEventLoop.{h,cpp}` — background `WaitAny` driver. Lazily-grown bounded worker pool
-  (cap = `clamp(hardware_concurrency, 2, 8)`); each worker does a single-future
-  `instance.WaitAny(future, UINT64_MAX)` (always a legal single-source wait, per Phase 0
-  spike 2). Shared state held behind a `shared_ptr` so detached workers (and the
-  `wgpu::Instance` ref they need) outlive the object safely; teardown sets `running=false`
-  and notifies idle workers without joining in-flight GPU waits.
-
-Deviations from the original plan (intentional):
-1. **`AsyncRunner` was replaced by `RuntimeContext`** (`async/RuntimeContext.{h,cpp}`), the
-   per-runtime coordinator the plan's Target-architecture §A already named. It bundles
-   `{RuntimeScheduler, GpuEventLoop}` and exposes `postTask`; all polling internals
-   (`tick`/`requestTick`/`ProcessEvents`/pump counters) are gone. `AsyncTaskHandle` depends
-   only on `RuntimeScheduler`. The old `AsyncRunner` name/files no longer exist anywhere
-   (the 6 `api/*` classes now hold `std::shared_ptr<async::RuntimeContext> _async`); the dead
-   `GPU::getAsyncRunner()` accessor was deleted.
-2. **`postTask`'s callback now returns a `wgpu::Future`** (the value returned by the Dawn
-   `WaitAnyOnly` call), which `AsyncRunner` hands to `GpuEventLoop.addFuture`. A returned
-   future with `id == 0` means "no event to wait on" and is ignored — used by
-   `GPUDevice::getLost` (resolved synchronously or later via `notifyDeviceLost`). This
-   replaced the old `keepPumping` bool argument, which is gone.
-
-`GPU`'s constructor now takes the `CallInvoker` (threaded through from `RNWebGPUManager`,
-which already held it) to build the `CallInvokerScheduler`. `AsyncDispatcher.h` and
-`JSIMicrotaskDispatcher.{h,cpp}` deleted; `android/CMakeLists.txt` updated (iOS podspec
-globs `cpp/**` so it needs no change).
-
-Validation run locally: all changed + new TUs syntax-check under the Android NDK toolchain;
-the full `react-native-wgpu` native lib **compiles and links** for `arm64-v8a` (ninja);
-`cpplint` clean (project filters); `clang-format` (pinned 15.0.0) applied; `yarn tsc` passes
-(no TS changed). On-device runtime behaviour (frame pacing, zero idle CPU) is Phase 4.
-
-**Phase 2 — Auto-present + remove `present()`**
-- Add `FrameDriver` (iOS `CADisplayLink`, Android `AChoreographer`); wire
-  `getCurrentTexture` → register; vsync → dispatch present to owning runtime.
-- Remove `GPUCanvasContext::present` (`api/GPUCanvasContext.h:50,58`, `.cpp:56-65`) and
-  `SurfaceInfo::present` (`SurfaceRegistry.h:116-121`).
-- JS: drop `present` from `RNCanvasContext` (`src/Canvas.tsx:22-24`, `src/types.ts`).
-- Migrate all 16 example / `useWebGPU` call sites + `README.md` + `packages/webgpu/README.md`.
-
-**Phase 3 — First-class worklet runtimes**
-- Worklet-runtime `RuntimeScheduler` impl (per Spike 1); verify auto-present dispatch on UI +
-  dedicated runtimes; update `apps/example/src/Reanimated/Reanimated.tsx` (drop `present()`,
-  keep its own rAF loop).
-
-**Phase 4 — Validation**
-```bash
-yarn tsc && yarn lint
-yarn workspace react-native-wgpu test         # offscreen readback + demo specs
-yarn build:ios        # or: yarn workspace example ios
-yarn build:android    # or: yarn workspace example android
-```
-Verify: no idle-CPU polling (logging), correct frame pacing, no present-ordering glitches,
-Reanimated UI/Dedicated examples render.
-
----
-
-## 16 `present()` call sites to migrate (Phase 2)
-
-```
-apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx
-apps/example/src/components/useWebGPU.ts
-apps/example/src/components/Texture.tsx
-apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx
-apps/example/src/ThreeJS/Helmet.tsx
-apps/example/src/ComputeToys/engine/index.ts
-apps/example/src/CanvasAPI/CanvasAPI.tsx
-apps/example/src/ThreeJS/PostProcessing.tsx
-apps/example/src/ThreeJS/Cube.tsx
-apps/example/src/Triangle/HelloTriangle.tsx
-apps/example/src/Triangle/HelloTriangleMSAA.tsx
-apps/example/src/ThreeJS/InstancedMesh.tsx
-apps/example/src/ThreeJS/Retargeting.tsx
-apps/example/src/ThreeJS/components/FiberCanvas.tsx
-apps/example/src/Reanimated/Reanimated.tsx
-apps/example/src/ThreeJS/Backdrop.tsx
-```
-Plus `README.md` and `packages/webgpu/README.md`.
-
----
-
-## Risks / open questions
-- **Worklet-runtime scheduler** access in worklets 0.8.3 (Spike 1 — highest risk).
-- **Concurrent `WaitAny`** semantics on one Dawn instance (Spike 2; single-worker fallback ready).
-- **Present timing**: vsync-dispatched-to-owning-loop must land after submit (FIFO on that loop)
-  and before the next `getCurrentTexture`.
-- **Breaking change**: `present()` removed — type, examples, README updated together.
-- **Apple Metal wait** moves into the frame-boundary present task, off the synchronous call path.
-
----
-
-## How to resume locally
-
-```bash
-git fetch origin claude/keen-darwin-xeywa
-git checkout claude/keen-darwin-xeywa
-git pull origin claude/keen-darwin-xeywa
-# open this file and run Phase 0 spikes, then start Claude Code:
-#   claude
-# suggested kickoff prompt:
-#   "Read docs/refactor-async-present-plan.md. Run the Phase 0 spikes and report
-#    findings before implementing. Develop on this branch."
-```
diff --git a/packages/webgpu/README.md b/packages/webgpu/README.md
index 8eeb1cba1..433d498fa 100644
--- a/packages/webgpu/README.md
+++ b/packages/webgpu/README.md
@@ -128,8 +128,6 @@ export function HelloTriangle() {
       passEncoder.end();
 
       device.queue.submit([commandEncoder.finish()]);
-
-      context.present();
     };
     helloTriangle();
   }, [ref]);
@@ -174,17 +172,28 @@ ctx.canvas.height = ctx.canvas.clientHeight * PixelRatio.get();
 
 ### Frame Scheduling
 
-In React Native, we want to keep frame presentation as a manual operation as we plan to provide more advanced rendering options that are React Native specific.  
-This means that when you are ready to present a frame, you need to call `present` on the context.
+On the **main JS runtime** and the **Reanimated UI runtime**, frame presentation is automatic: once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call.
 
 ```tsx
 // draw
 // submit to the queue
 device.queue.submit([commandEncoder.finish()]);
-// This method is React Native only
-context.present();
+// The frame is presented automatically on the next vsync.
 ```
 
+When you render from a **dedicated worklet runtime** (e.g. `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame processor), it runs on its own thread where present can't be driven automatically. Call `context.present()` yourself after submitting:
+
+```tsx
+const onFrame = () => {
+  "worklet";
+  // draw on the dedicated runtime's thread
+  device.queue.submit([commandEncoder.finish()]);
+  context.present(); // required on dedicated worklet runtimes; a no-op on JS/UI
+};
+```
+
+`present()` is safe to call from a worklet that runs on either the UI runtime or a dedicated runtime: it presents on the dedicated runtime and does nothing on the JS/UI runtime (which auto-present).
+
 ### Canvas Transparency
 
 On Android, the `alphaMode` property is ignored when configuring the canvas.
@@ -296,7 +305,6 @@ const render = () => {
 
   // Release the surface's access window right after the submit that sampled it.
   externalTexture.destroy();
-  context.present();
 };
 ```
 
@@ -328,7 +336,6 @@ const renderFrame = (device: GPUDevice, context: GPUCanvasContext) => {
   const commandEncoder = device.createCommandEncoder();
   // ... render ...
   device.queue.submit([commandEncoder.finish()]);
-  context.present();
 };
 
 // Initialize WebGPU on main thread, then run on UI thread
diff --git a/packages/webgpu/android/CMakeLists.txt b/packages/webgpu/android/CMakeLists.txt
index 50756e72e..51005acdc 100644
--- a/packages/webgpu/android/CMakeLists.txt
+++ b/packages/webgpu/android/CMakeLists.txt
@@ -47,6 +47,7 @@ add_library(${PACKAGE_NAME} SHARED
     ../cpp/rnwgpu/api/GPUComputePipeline.cpp
     ../cpp/rnwgpu/api/GPUCanvasContext.cpp
     ../cpp/rnwgpu/RNWebGPUManager.cpp
+    ../cpp/rnwgpu/FrameDriver.cpp
     ../cpp/jsi/Promise.cpp
     ../cpp/jsi/RuntimeLifecycleMonitor.cpp
     ../cpp/jsi/RuntimeAwareCache.cpp
diff --git a/packages/webgpu/android/cpp/cpp-adapter.cpp b/packages/webgpu/android/cpp/cpp-adapter.cpp
index 2a441c218..4f0ba61d3 100644
--- a/packages/webgpu/android/cpp/cpp-adapter.cpp
+++ b/packages/webgpu/android/cpp/cpp-adapter.cpp
@@ -10,6 +10,7 @@
 #include <webgpu/webgpu_cpp.h>
 
 #include "AndroidPlatformContext.h"
+#include "FrameDriver.h"
 #include "GPUCanvasContext.h"
 #include "RNWebGPUManager.h"
 
@@ -17,6 +18,37 @@
 
 std::shared_ptr<rnwgpu::RNWebGPUManager> manager;
 
+// JNI handles for driving the vsync source (com.webgpu.WebGPUFrameDriver),
+// cached on the JNI thread in initializeNative (which has the app classloader).
+static JavaVM *gJavaVM = nullptr;
+static jclass gFrameDriverClass = nullptr;
+static jmethodID gFrameDriverStart = nullptr;
+static jmethodID gFrameDriverStop = nullptr;
+
+static void callFrameDriver(jmethodID method) {
+  if (gJavaVM == nullptr || gFrameDriverClass == nullptr || method == nullptr) {
+    return;
+  }
+  JNIEnv *env = nullptr;
+  bool attached = false;
+  jint res = gJavaVM->GetEnv(reinterpret_cast<void **>(&env), JNI_VERSION_1_6);
+  if (res == JNI_EDETACHED) {
+    if (gJavaVM->AttachCurrentThread(&env, nullptr) != JNI_OK) {
+      return;
+    }
+    attached = true;
+  } else if (res != JNI_OK) {
+    return;
+  }
+  env->CallStaticVoidMethod(gFrameDriverClass, method);
+  if (env->ExceptionCheck()) {
+    env->ExceptionClear();
+  }
+  if (attached) {
+    gJavaVM->DetachCurrentThread();
+  }
+}
+
 extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUModule_initializeNative(
     JNIEnv *env, jobject /* this */, jlong jsRuntime,
     jobject jsCallInvokerHolder, jobject blobModule) {
@@ -31,6 +63,27 @@ extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUModule_initializeNative(
       std::make_shared<rnwgpu::AndroidPlatformContext>(globalBlobModule);
   manager = std::make_shared<rnwgpu::RNWebGPUManager>(runtime, jsCallInvoker,
                                                       platformContext);
+
+  // Cache JNI handles for the Choreographer-based vsync source and register it
+  // with the FrameDriver to drive auto-present (replaces context.present()).
+  env->GetJavaVM(&gJavaVM);
+  jclass localCls = env->FindClass("com/webgpu/WebGPUFrameDriver");
+  if (localCls != nullptr) {
+    gFrameDriverClass = reinterpret_cast<jclass>(env->NewGlobalRef(localCls));
+    gFrameDriverStart =
+        env->GetStaticMethodID(gFrameDriverClass, "start", "()V");
+    gFrameDriverStop = env->GetStaticMethodID(gFrameDriverClass, "stop", "()V");
+    env->DeleteLocalRef(localCls);
+  }
+  rnwgpu::FrameDriver::getInstance().setPlatformVSync(
+      [] { callFrameDriver(gFrameDriverStart); },
+      [] { callFrameDriver(gFrameDriverStop); });
+}
+
+extern "C" JNIEXPORT void JNICALL
+Java_com_webgpu_WebGPUFrameDriver_nativeOnVSync(JNIEnv * /*env*/,
+                                                jclass /*clazz*/) {
+  rnwgpu::FrameDriver::getInstance().onVSync();
 }
 
 extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUView_onSurfaceChanged(
@@ -66,6 +119,7 @@ Java_com_webgpu_WebGPUView_switchToOffscreenSurface(JNIEnv *env, jobject thiz,
 
 extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUView_onSurfaceDestroy(
     JNIEnv *env, jobject thiz, jint contextId) {
+  rnwgpu::FrameDriver::getInstance().cancelPresent(contextId);
   auto &registry = rnwgpu::SurfaceRegistry::getInstance();
   registry.removeSurfaceInfo(contextId);
 }
\ No newline at end of file
diff --git a/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java b/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java
new file mode 100644
index 000000000..03a1d2c29
--- /dev/null
+++ b/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java
@@ -0,0 +1,66 @@
+package com.webgpu;
+
+import android.os.Handler;
+import android.os.Looper;
+import android.view.Choreographer;
+
+/**
+ * Drives WebGPU auto-present from the main-thread {@link Choreographer},
+ * replacing the manual {@code context.present()} call.
+ *
+ * <p>{@link #start()} / {@link #stop()} are invoked from native code
+ * (rnwgpu::FrameDriver::setPlatformVSync) on arbitrary threads; both hop to the
+ * main thread. While running, {@link #doFrame(long)} calls back into native
+ * once per vsync, where pending surfaces are presented.
+ */
+public class WebGPUFrameDriver implements Choreographer.FrameCallback {
+  private static final WebGPUFrameDriver INSTANCE = new WebGPUFrameDriver();
+
+  private final Handler mainHandler = new Handler(Looper.getMainLooper());
+  private boolean running = false;
+
+  private WebGPUFrameDriver() {}
+
+  /** Called from native (any thread). */
+  public static void start() {
+    INSTANCE.startInternal();
+  }
+
+  /** Called from native (any thread). */
+  public static void stop() {
+    INSTANCE.stopInternal();
+  }
+
+  private void startInternal() {
+    mainHandler.post(
+        () -> {
+          if (running) {
+            return;
+          }
+          running = true;
+          Choreographer.getInstance().postFrameCallback(this);
+        });
+  }
+
+  private void stopInternal() {
+    mainHandler.post(
+        () -> {
+          if (!running) {
+            return;
+          }
+          running = false;
+          Choreographer.getInstance().removeFrameCallback(this);
+        });
+  }
+
+  @Override
+  public void doFrame(long frameTimeNanos) {
+    if (!running) {
+      return;
+    }
+    nativeOnVSync();
+    Choreographer.getInstance().postFrameCallback(this);
+  }
+
+  private static native void nativeOnVSync();
+}
diff --git a/packages/webgpu/apple/MetalView.mm b/packages/webgpu/apple/MetalView.mm
index ccff1245c..e617da889 100644
--- a/packages/webgpu/apple/MetalView.mm
+++ b/packages/webgpu/apple/MetalView.mm
@@ -1,6 +1,8 @@
 #import "MetalView.h"
 #import "webgpu/webgpu_cpp.h"
 
+#include "FrameDriver.h"
+
 @implementation MetalView {
   BOOL _isConfigured;
 }
@@ -42,6 +44,8 @@ - (void)update {
 }
 
 - (void)dealloc {
+  // Stop any pending auto-present for this surface before it goes away.
+  rnwgpu::FrameDriver::getInstance().cancelPresent([_contextId intValue]);
   auto &registry = rnwgpu::SurfaceRegistry::getInstance();
   // Remove the surface info from the registry
   registry.removeSurfaceInfo([_contextId intValue]);
diff --git a/packages/webgpu/apple/WebGPUFrameDriver.h b/packages/webgpu/apple/WebGPUFrameDriver.h
new file mode 100644
index 000000000..aacae84ee
--- /dev/null
+++ b/packages/webgpu/apple/WebGPUFrameDriver.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#import <Foundation/Foundation.h>
+
+// Objective-C wrapper around the platform vsync source (CADisplayLink) that
+// drives rnwgpu::FrameDriver::onVSync() once per frame. start/stop are invoked
+// by the C++ FrameDriver via setPlatformVSync; both hop to the main thread.
+@interface WebGPUFrameDriver : NSObject
+
++ (void)start;
++ (void)stop;
+
+@end
diff --git a/packages/webgpu/apple/WebGPUFrameDriver.mm b/packages/webgpu/apple/WebGPUFrameDriver.mm
new file mode 100644
index 000000000..1d302e2fa
--- /dev/null
+++ b/packages/webgpu/apple/WebGPUFrameDriver.mm
@@ -0,0 +1,88 @@
+#import "WebGPUFrameDriver.h"
+
+#import "RNWGUIKit.h"
+#import <QuartzCore/QuartzCore.h>
+
+#include "FrameDriver.h"
+
+@implementation WebGPUFrameDriver
+
++ (void)onFrame {
+  rnwgpu::FrameDriver::getInstance().onVSync();
+}
+
+#if !TARGET_OS_OSX
+
+// iOS / tvOS: CADisplayLink on the main run loop, paused/resumed for
+// start/stop.
+static CADisplayLink *sDisplayLink = nil;
+
++ (void)tick:(CADisplayLink *)link {
+  [WebGPUFrameDriver onFrame];
+}
+
++ (void)start {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if (sDisplayLink == nil) {
+      sDisplayLink = [CADisplayLink displayLinkWithTarget:self
+                                                 selector:@selector(tick:)];
+      [sDisplayLink addToRunLoop:[NSRunLoop mainRunLoop]
+                         forMode:NSRunLoopCommonModes];
+    }
+    sDisplayLink.paused = NO;
+  });
+}
+
++ (void)stop {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    sDisplayLink.paused = YES;
+  });
+}
+
+#else // TARGET_OS_OSX
+
+// macOS: CADisplayLink is available via NSScreen on 14.0+. On older systems we
+// fall back to an NSTimer at ~60Hz (not vsync-aligned, but keeps auto-present
+// working). FrameDriver self-idles cheaply when nothing is rendering.
+static id sDisplayLink = nil;
+
++ (void)tick:(id)sender {
+  [WebGPUFrameDriver onFrame];
+}
+
++ (void)start {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if (sDisplayLink == nil) {
+      if (@available(macOS 14.0, *)) {
+        CADisplayLink *link =
+            [NSScreen.mainScreen displayLinkWithTarget:self
+                                              selector:@selector(tick:)];
+        [link addToRunLoop:[NSRunLoop mainRunLoop]
+                   forMode:NSRunLoopCommonModes];
+        sDisplayLink = link;
+      } else {
+        sDisplayLink = [NSTimer scheduledTimerWithTimeInterval:1.0 / 60.0
+                                                        target:self
+                                                      selector:@selector(tick:)
+                                                      userInfo:nil
+                                                       repeats:YES];
+      }
+    }
+    if ([sDisplayLink isKindOfClass:[CADisplayLink class]]) {
+      ((CADisplayLink *)sDisplayLink).paused = NO;
+    }
+  });
+}
+
++ (void)stop {
+  dispatch_async(dispatch_get_main_queue(), ^{
+    if ([sDisplayLink isKindOfClass:[CADisplayLink class]]) {
+      ((CADisplayLink *)sDisplayLink).paused = YES;
+    }
+    // NSTimer fallback keeps firing; onVSync is a cheap no-op while idle.
+  });
+}
+
+#endif // TARGET_OS_OSX
+
+@end
diff --git a/packages/webgpu/apple/WebGPUModule.mm b/packages/webgpu/apple/WebGPUModule.mm
index 99580aa14..c4c7224ad 100644
--- a/packages/webgpu/apple/WebGPUModule.mm
+++ b/packages/webgpu/apple/WebGPUModule.mm
@@ -1,6 +1,8 @@
 #import "WebGPUModule.h"
 #include "ApplePlatformContext.h"
+#include "FrameDriver.h"
 #import "GPUCanvasContext.h"
+#import "WebGPUFrameDriver.h"
 
 #import <React/RCTBridge+Private.h>
 #import <React/RCTCallInvoker.h>
@@ -78,6 +80,11 @@ - (void)invalidate {
       std::make_shared<rnwgpu::ApplePlatformContext>();
   webgpuManager = std::make_shared<rnwgpu::RNWebGPUManager>(runtime, jsInvoker,
                                                             platformContext);
+
+  // Drive auto-present from the display's vsync (replaces context.present()).
+  rnwgpu::FrameDriver::getInstance().setPlatformVSync(
+      [] { [WebGPUFrameDriver start]; }, [] { [WebGPUFrameDriver stop]; });
+
   return @true;
 }
 
diff --git a/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp b/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp
new file mode 100644
index 000000000..792940e5e
--- /dev/null
+++ b/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp
@@ -0,0 +1,81 @@
+#include "FrameDriver.h"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+namespace jsi = facebook::jsi;
+
+namespace rnwgpu {
+
+FrameDriver &FrameDriver::getInstance() {
+  static FrameDriver instance;
+  return instance;
+}
+
+void FrameDriver::setPlatformVSync(std::function<void()> start,
+                                   std::function<void()> stop) {
+  std::lock_guard<std::mutex> lock(_mutex);
+  _start = std::move(start);
+  _stop = std::move(stop);
+}
+
+void FrameDriver::requestPresent(
+    int contextId, std::shared_ptr<SurfaceInfo> surface,
+    std::shared_ptr<async::RuntimeScheduler> scheduler) {
+  if (!surface || !scheduler) {
+    return;
+  }
+
+  std::function<void()> startToCall;
+  {
+    std::lock_guard<std::mutex> lock(_mutex);
+    _pending[contextId] = {std::move(surface), std::move(scheduler)};
+    _idleFrames = 0;
+    if (!_running && _start) {
+      _running = true;
+      startToCall = _start;
+    }
+  }
+
+  // Invoked outside the lock: the platform start hops to the UI thread.
+  if (startToCall) {
+    startToCall();
+  }
+}
+
+void FrameDriver::cancelPresent(int contextId) {
+  std::lock_guard<std::mutex> lock(_mutex);
+  _pending.erase(contextId);
+}
+
+void FrameDriver::onVSync() {
+  std::vector<Pending> toPresent;
+  std::function<void()> stopToCall;
+  {
+    std::lock_guard<std::mutex> lock(_mutex);
+    if (!_pending.empty()) {
+      toPresent.reserve(_pending.size());
+      for (auto &entry : _pending) {
+        toPresent.push_back(std::move(entry.second));
+      }
+      _pending.clear();
+      _idleFrames = 0;
+    } else if (_running && ++_idleFrames >= kMaxIdleFrames) {
+      _running = false;
+      stopToCall = _stop;
+    }
+  }
+
+  for (auto &pending : toPresent) {
+    auto surface = pending.surface;
+    pending.scheduler->scheduleOnJS(
+        [surface](jsi::Runtime & /*runtime*/) { surface->presentFrame(); });
+  }
+
+  if (stopToCall) {
+    stopToCall();
+  }
+}
+
+} // namespace rnwgpu
diff --git a/packages/webgpu/cpp/rnwgpu/FrameDriver.h b/packages/webgpu/cpp/rnwgpu/FrameDriver.h
new file mode 100644
index 000000000..c16fedabf
--- /dev/null
+++ b/packages/webgpu/cpp/rnwgpu/FrameDriver.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "SurfaceRegistry.h"
+#include "rnwgpu/async/RuntimeScheduler.h"
+
+namespace rnwgpu {
+
+/**
+ * Global vsync-driven auto-present coordinator. Replaces the manual
+ * `context.present()` call.
+ *
+ * Flow:
+ *   - `GPUCanvasContext::getCurrentTexture()` (JS thread) calls
+ * `requestPresent` for its surface, tagged with the owning runtime's
+ * RuntimeScheduler.
+ *   - A platform vsync source (iOS CADisplayLink / Android Choreographer) calls
+ *     `onVSync()` on the UI thread once per frame.
+ *   - On each vsync, every surface that requested a present has its present
+ *     dispatched onto its owning runtime's JS thread (so `Surface.Present()`
+ * and the Apple Metal scheduling wait run on the same thread that did
+ *     getCurrentTexture / submit, preserving Dawn surface thread-affinity and
+ *     present-after-submit ordering via FIFO on that loop).
+ *
+ * The vsync source is request-driven: it is started when the first present is
+ * requested and stopped after a few idle frames, so an idle (non-rendering) app
+ * costs zero CPU.
+ */
+class FrameDriver {
+public:
+  static FrameDriver &getInstance();
+
+  /**
+   * Register how to start/stop the platform vsync source. `start`/`stop` are
+   * invoked when presents begin/cease; each implementation is responsible for
+   * hopping to the UI thread as needed. Called once per platform at init.
+   */
+  void setPlatformVSync(std::function<void()> start,
+                        std::function<void()> stop);
+
+  /**
+   * Request that `surface` be presented at the next vsync. Coalesced per
+   * contextId (at most one present per surface per frame). Thread-safe; called
+   * from a JS thread inside getCurrentTexture. Surfaces with no on-screen
+   * `wgpu::Surface` (offscreen) should not be registered.
+   */
+  void requestPresent(int contextId, std::shared_ptr<SurfaceInfo> surface,
+                      std::shared_ptr<async::RuntimeScheduler> scheduler);
+
+  /**
+   * Drop any pending present for a surface (e.g. when its view is torn down).
+   * Thread-safe.
+   */
+  void cancelPresent(int contextId);
+
+  /** Called by the platform vsync source on the UI thread, once per frame. */
+  void onVSync();
+
+private:
+  FrameDriver() = default;
+
+  struct Pending {
+    std::shared_ptr<SurfaceInfo> surface;
+    std::shared_ptr<async::RuntimeScheduler> scheduler;
+  };
+
+  // Number of consecutive empty frames before the vsync source is stopped.
+  // A small grace period avoids start/stop thrash during continuous rendering.
+  static constexpr int kMaxIdleFrames = 3;
+
+  std::mutex _mutex;
+  std::unordered_map<int, Pending> _pending;
+  std::function<void()> _start;
+  std::function<void()> _stop;
+  bool _running = false;
+  int _idleFrames = 0;
+};
+
+} // namespace rnwgpu
diff --git a/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h b/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h
index 110a45d44..ed098896a 100644
--- a/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h
+++ b/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h
@@ -7,6 +7,12 @@
 
 #include "webgpu/webgpu_cpp.h"
 
+#ifdef __APPLE__
+namespace dawn::native::metal {
+void WaitForCommandsToBeScheduled(WGPUDevice device);
+} // namespace dawn::native::metal
+#endif
+
 namespace rnwgpu {
 
 struct NativeInfo {
@@ -113,7 +119,22 @@ class SurfaceInfo {
     height = newHeight;
   }
 
-  void present() {
+  // Present the current surface texture. Called at the frame boundary from the
+  // owning runtime's JS thread (via FrameDriver), replacing the old manual
+  // present(). No-op when offscreen / unconfigured (no surface).
+  void presentFrame() {
+#ifdef __APPLE__
+    // Ensure command buffers are scheduled before presenting. Read the device
+    // under a shared lock, then wait without holding it (the wait can block).
+    wgpu::Device device;
+    {
+      std::shared_lock<std::shared_mutex> lock(_mutex);
+      device = config.device;
+    }
+    if (device) {
+      dawn::native::metal::WaitForCommandsToBeScheduled(device.Get());
+    }
+#endif
     std::unique_lock<std::shared_mutex> lock(_mutex);
     if (surface) {
       surface.Present();
@@ -131,6 +152,12 @@ class SurfaceInfo {
     }
   }
 
+  // True when an on-screen wgpu::Surface is attached (vs offscreen texture).
+  bool hasSurface() {
+    std::shared_lock<std::shared_mutex> lock(_mutex);
+    return surface != nullptr;
+  }
+
   NativeInfo getNativeInfo() {
     std::shared_lock<std::shared_mutex> lock(_mutex);
     return {.nativeSurface = nativeSurface, .width = width, .height = height};
diff --git a/packages/webgpu/cpp/rnwgpu/api/GPU.h b/packages/webgpu/cpp/rnwgpu/api/GPU.h
index e7dc15caf..b2488d4c7 100644
--- a/packages/webgpu/cpp/rnwgpu/api/GPU.h
+++ b/packages/webgpu/cpp/rnwgpu/api/GPU.h
@@ -53,6 +53,7 @@ class GPU : public NativeObject<GPU> {
   }
 
   inline const wgpu::Instance get() { return _instance; }
+  inline std::shared_ptr<async::RuntimeContext> getContext() { return _async; }
 
 private:
   wgpu::Instance _instance;
diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp
index d75eb7b0f..c4390ba6d 100644
--- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp
+++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp
@@ -1,17 +1,33 @@
 #include "GPUCanvasContext.h"
 #include "Convertors.h"
+#include "FrameDriver.h"
 #include "RNWebGPUManager.h"
 #include <memory>
 
-#ifdef __APPLE__
-namespace dawn::native::metal {
-
-void WaitForCommandsToBeScheduled(WGPUDevice device);
+namespace rnwgpu {
 
+namespace {
+// Runtimes whose present is automatic (no ctx.present() needed): the main JS
+// runtime and the Reanimated UI runtime. Both are reached correctly by the
+// global vsync FrameDriver dispatching through the main runtime's scheduler.
+// Dedicated worklet runtimes (createWorkletRuntime, Vision Camera frame
+// processors, …) run on their own thread with no safe scheduler hook, so they
+// present explicitly via ctx.present().
+bool isAutoPresentedRuntime(jsi::Runtime &runtime) {
+  if (async::RuntimeContext::get(runtime) != nullptr) {
+    return true; // main JS runtime
+  }
+  // Worklets tags every runtime with a numeric `__RUNTIME_KIND`
+  // (worklets::RuntimeKind: ReactNative=1, UI=2, Worker=3). Auto-present only
+  // the UI runtime; treat Worker / unknown / untagged as needing ctx.present().
+  auto kind = runtime.global().getProperty(runtime, "__RUNTIME_KIND");
+  if (kind.isNumber()) {
+    constexpr int kRuntimeKindUI = 2;
+    return static_cast<int>(kind.asNumber()) == kRuntimeKindUI;
+  }
+  return false;
 }
-#endif
-
-namespace rnwgpu {
+} // namespace
 
 void GPUCanvasContext::configure(
     std::shared_ptr<GPUCanvasConfiguration> configuration) {
@@ -39,7 +55,10 @@ void GPUCanvasContext::configure(
 
 void GPUCanvasContext::unconfigure() {}
 
-std::shared_ptr<GPUTexture> GPUCanvasContext::getCurrentTexture() {
+jsi::Value GPUCanvasContext::getCurrentTexture(jsi::Runtime &runtime,
+                                               const jsi::Value & /*thisValue*/,
+                                               const jsi::Value * /*args*/,
+                                               size_t /*count*/) {
   auto prevSize = _surfaceInfo->getConfig();
   auto width = _canvas->getWidth();
   auto height = _canvas->getHeight();
@@ -47,21 +66,44 @@ std::shared_ptr<GPUTexture> GPUCanvasContext::getCurrentTexture() {
   if (sizeHasChanged) {
     _surfaceInfo->reconfigure(width, height);
   }
+
   auto texture = _surfaceInfo->getCurrentTexture();
-  // Pass reportsMemoryPressure=false to avoid triggering spurious Hermes GC
-  // cycles every frame since the canvas texture doesn't own the buffer.
-  return std::make_shared<GPUTexture>(texture, "", false);
-}
 
-void GPUCanvasContext::present() {
-#ifdef __APPLE__
-  dawn::native::metal::WaitForCommandsToBeScheduled(
-      _surfaceInfo->getDevice().Get());
-#endif
   auto size = _surfaceInfo->getSize();
   _canvas->setClientWidth(size.width);
   _canvas->setClientHeight(size.height);
-  _surfaceInfo->present();
+
+  // Auto-present on the JS / UI runtime: acquiring the current texture
+  // schedules a present for this surface at the next vsync (spec-aligned
+  // "update the rendering" after the frame), dispatched through the main
+  // runtime's scheduler. Dedicated worklet runtimes instead call ctx.present()
+  // explicitly on their own thread. Offscreen surfaces have no wgpu::Surface,
+  // so skip them (their texture is read back directly).
+  if (_surfaceInfo->hasSurface() && isAutoPresentedRuntime(runtime)) {
+    FrameDriver::getInstance().requestPresent(_contextId, _surfaceInfo,
+                                              _gpu->getContext()->scheduler());
+  }
+
+  // Pass reportsMemoryPressure=false to avoid triggering spurious Hermes GC
+  // cycles every frame since the canvas texture doesn't own the buffer.
+  auto gpuTexture = std::make_shared<GPUTexture>(texture, "", false);
+  return JSIConverter<std::shared_ptr<GPUTexture>>::toJSI(runtime, gpuTexture);
+}
+
+jsi::Value GPUCanvasContext::present(jsi::Runtime &runtime,
+                                     const jsi::Value & /*thisValue*/,
+                                     const jsi::Value * /*args*/,
+                                     size_t /*count*/) {
+  // Only meaningful on a dedicated worklet runtime, where present can't be
+  // automated. On the JS / UI runtime present is automatic, so this is a no-op
+  // there — which makes it safe to call from a worklet shared between the UI
+  // runtime and a dedicated runtime. Presents synchronously on the calling
+  // thread (the one that did getCurrentTexture / submit), preserving Dawn
+  // surface thread-affinity.
+  if (!isAutoPresentedRuntime(runtime) && _surfaceInfo->hasSurface()) {
+    _surfaceInfo->presentFrame();
+  }
+  return jsi::Value::undefined();
 }
 
 } // namespace rnwgpu
diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h
index 4b97a7887..a2e80b7cc 100644
--- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h
+++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h
@@ -26,7 +26,7 @@ class GPUCanvasContext : public NativeObject<GPUCanvasContext> {
 
   GPUCanvasContext(std::shared_ptr<GPU> gpu, int contextId, int width,
                    int height)
-      : NativeObject(CLASS_NAME), _gpu(std::move(gpu)) {
+      : NativeObject(CLASS_NAME), _contextId(contextId), _gpu(std::move(gpu)) {
     _canvas = std::make_shared<Canvas>(nullptr, width, height);
     auto &registry = rnwgpu::SurfaceRegistry::getInstance();
     _surfaceInfo =
@@ -54,10 +54,17 @@ class GPUCanvasContext : public NativeObject<GPUCanvasContext> {
   inline const wgpu::Surface get() { return nullptr; }
   void configure(std::shared_ptr<GPUCanvasConfiguration> configuration);
   void unconfigure();
-  std::shared_ptr<GPUTexture> getCurrentTexture();
-  void present();
+  // Full-control signatures so we can learn the *calling* runtime and decide
+  // how this frame is presented (auto on the JS / UI runtime; explicit
+  // ctx.present() on a dedicated worklet runtime).
+  jsi::Value getCurrentTexture(jsi::Runtime &runtime,
+                               const jsi::Value &thisValue,
+                               const jsi::Value *args, size_t count);
+  jsi::Value present(jsi::Runtime &runtime, const jsi::Value &thisValue,
+                     const jsi::Value *args, size_t count);
 
 private:
+  int _contextId;
   std::shared_ptr<Canvas> _canvas;
   std::shared_ptr<SurfaceInfo> _surfaceInfo;
   std::shared_ptr<GPU> _gpu;
diff --git a/packages/webgpu/src/Canvas.tsx b/packages/webgpu/src/Canvas.tsx
index 1030f3e38..43c9621e7 100644
--- a/packages/webgpu/src/Canvas.tsx
+++ b/packages/webgpu/src/Canvas.tsx
@@ -20,6 +20,15 @@ export interface NativeCanvas {
 }
 
 export type RNCanvasContext = GPUCanvasContext & {
+  /**
+   * Present the current frame.
+   *
+   * Only needed when rendering from a **dedicated worklet runtime** (e.g.
+   * `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame
+   * processor), which runs on its own thread. On the main JS runtime and the
+   * Reanimated UI runtime present is automatic (driven by a global vsync), so
+   * calling this there is a no-op. Call it after `queue.submit()`.
+   */
   present: () => void;
 };
 
diff --git a/packages/webgpu/src/Offscreen.ts b/packages/webgpu/src/Offscreen.ts
index c4e460bb2..4deab8a1c 100644
--- a/packages/webgpu/src/Offscreen.ts
+++ b/packages/webgpu/src/Offscreen.ts
@@ -65,7 +65,7 @@ class GPUOffscreenCanvasContext implements GPUCanvasContext {
   }
 
   present() {
-    // Do nothing
+    // Offscreen contexts have nothing to present; readback is via getImageData.
   }
 
   getDevice() {
diff --git a/packages/webgpu/src/WebPolyfillGPUModule.ts b/packages/webgpu/src/WebPolyfillGPUModule.ts
index 9dcc1f1c5..8b629a0c9 100644
--- a/packages/webgpu/src/WebPolyfillGPUModule.ts
+++ b/packages/webgpu/src/WebPolyfillGPUModule.ts
@@ -40,9 +40,9 @@ function makeWebGPUCanvasContext(
   }
 
   const context = canvas.getContext("webgpu")!;
-  return Object.assign(context, {
-    present: () => {},
-  });
+  // On web there is no manual present; expose a no-op so RNCanvasContext's
+  // present() (used on native dedicated worklet runtimes) is callable here too.
+  return Object.assign(context, { present: () => {} });
 }
 
 // @ts-expect-error - polyfill for RNWebGPU native module
diff --git a/packages/webgpu/src/types.ts b/packages/webgpu/src/types.ts
index c03f92b4b..1608a4ff0 100644
--- a/packages/webgpu/src/types.ts
+++ b/packages/webgpu/src/types.ts
@@ -9,6 +9,15 @@ export interface NativeCanvas {
 }
 
 export type RNCanvasContext = GPUCanvasContext & {
+  /**
+   * Present the current frame.
+   *
+   * Only needed when rendering from a **dedicated worklet runtime** (e.g.
+   * `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame
+   * processor), which runs on its own thread. On the main JS runtime and the
+   * Reanimated UI runtime present is automatic (driven by a global vsync), so
+   * calling this there is a no-op. Call it after `queue.submit()`.
+   */
   present: () => void;
 };