diff --git a/README.md b/README.md index 8eeb1cba1..433d498fa 100644 --- a/README.md +++ b/README.md @@ -128,8 +128,6 @@ export function HelloTriangle() { passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - - context.present(); }; helloTriangle(); }, [ref]); @@ -174,17 +172,28 @@ ctx.canvas.height = ctx.canvas.clientHeight * PixelRatio.get(); ### Frame Scheduling -In React Native, we want to keep frame presentation as a manual operation as we plan to provide more advanced rendering options that are React Native specific. -This means that when you are ready to present a frame, you need to call `present` on the context. +On the **main JS runtime** and the **Reanimated UI runtime**, frame presentation is automatic: once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call. ```tsx // draw // submit to the queue device.queue.submit([commandEncoder.finish()]); -// This method is React Native only -context.present(); +// The frame is presented automatically on the next vsync. ``` +When you render from a **dedicated worklet runtime** (e.g. `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame processor), it runs on its own thread where present can't be driven automatically. Call `context.present()` yourself after submitting: + +```tsx +const onFrame = () => { + "worklet"; + // draw on the dedicated runtime's thread + device.queue.submit([commandEncoder.finish()]); + context.present(); // required on dedicated worklet runtimes; a no-op on JS/UI +}; +``` + +`present()` is safe to call from a worklet that runs on either the UI runtime or a dedicated runtime: it presents on the dedicated runtime and does nothing on the JS/UI runtime (which auto-present). + ### Canvas Transparency On Android, the `alphaMode` property is ignored when configuring the canvas. @@ -296,7 +305,6 @@ const render = () => { // Release the surface's access window right after the submit that sampled it. externalTexture.destroy(); - context.present(); }; ``` @@ -328,7 +336,6 @@ const renderFrame = (device: GPUDevice, context: GPUCanvasContext) => { const commandEncoder = device.createCommandEncoder(); // ... render ... device.queue.submit([commandEncoder.finish()]); - context.present(); }; // Initialize WebGPU on main thread, then run on UI thread diff --git a/apps/example/ios/Podfile.lock b/apps/example/ios/Podfile.lock index fd5ba968c..b4c5f158a 100644 --- a/apps/example/ios/Podfile.lock +++ b/apps/example/ios/Podfile.lock @@ -1924,7 +1924,7 @@ PODS: - ReactCommon/turbomodule/core - SocketRocket - Yoga - - react-native-wgpu (0.5.12): + - react-native-wgpu (0.5.13): - boost - DoubleConversion - fast_float @@ -3074,7 +3074,7 @@ SPEC CHECKSUMS: React-microtasksnativemodule: 75b6604b667d297292345302cc5bfb6b6aeccc1b react-native-safe-area-context: c00143b4823773bba23f2f19f85663ae89ceb460 react-native-skia: fc73e9bdc46ebb420a98c9c2be29fee80f565e79 - react-native-wgpu: 274ffec11ee3a082260d9f3d1fb54030a5ca0873 + react-native-wgpu: 0496e9efeb4c3939ab56371005ede4e1468591d1 React-NativeModulesApple: 879fbdc5dcff7136abceb7880fe8a2022a1bd7c3 React-oscompat: 93b5535ea7f7dff46aaee4f78309a70979bdde9d React-perflogger: 5536d2df3d18fe0920263466f7b46a56351c0510 diff --git a/apps/example/src/CanvasAPI/CanvasAPI.tsx b/apps/example/src/CanvasAPI/CanvasAPI.tsx index a9f5c4928..a403c8388 100644 --- a/apps/example/src/CanvasAPI/CanvasAPI.tsx +++ b/apps/example/src/CanvasAPI/CanvasAPI.tsx @@ -89,8 +89,6 @@ export const CanvasAPI = () => { passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - - context.present(); })() } title="check surface" diff --git a/apps/example/src/ComputeToys/engine/index.ts b/apps/example/src/ComputeToys/engine/index.ts index f0fa08f07..8db2562ad 100644 --- a/apps/example/src/ComputeToys/engine/index.ts +++ b/apps/example/src/ComputeToys/engine/index.ts @@ -398,7 +398,6 @@ fn passSampleLevelBilinearRepeat(pass_index: int, uv: float2, lod: float) -> flo // Submit command buffer this.device.queue.submit([encoder.finish()]); - this.surface!.present(); // Update frame counter this.bindings!.time.host.frame += 1; diff --git a/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx b/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx index f8399ee8a..7c973e03f 100644 --- a/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx +++ b/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx @@ -247,7 +247,6 @@ export const ImportExternalTexture = () => { // Now that the work sampling it has been submitted, end the external // texture's access window so the frame's surface is released promptly. externalTex?.destroy(); - context.present(); rafRef.current = requestAnimationFrame(render); }; rafRef.current = requestAnimationFrame(render); diff --git a/apps/example/src/Reanimated/Reanimated.tsx b/apps/example/src/Reanimated/Reanimated.tsx index 505296565..3761c90f9 100644 --- a/apps/example/src/Reanimated/Reanimated.tsx +++ b/apps/example/src/Reanimated/Reanimated.tsx @@ -78,8 +78,10 @@ export const webGPUDemo = ( passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - + // Needed on a dedicated worklet runtime (DedicatedThread); a no-op on the + // UI runtime (UIThread), where present is automatic. context.present(); + if (runAnimation.value) { requestAnimationFrame(frame); } diff --git a/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx b/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx index b5627cc43..197657460 100644 --- a/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx +++ b/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx @@ -268,7 +268,6 @@ export const SharedTextureMemory = () => { } pass.end(); device.queue.submit([encoder.finish()]); - context.present(); rafRef.current = requestAnimationFrame(render); }; rafRef.current = requestAnimationFrame(render); diff --git a/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx b/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx index 907264638..b1906cf74 100644 --- a/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx +++ b/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx @@ -185,8 +185,6 @@ export function StorageBufferVertices() { const commandBuffer = encoder.finish(); device.queue.submit([commandBuffer]); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (context as any).present(); }); return ( diff --git a/apps/example/src/ThreeJS/Backdrop.tsx b/apps/example/src/ThreeJS/Backdrop.tsx index 8ed2a8c91..113325b9d 100644 --- a/apps/example/src/ThreeJS/Backdrop.tsx +++ b/apps/example/src/ThreeJS/Backdrop.tsx @@ -150,7 +150,6 @@ export const Backdrop = () => { } renderer.render(scene, camera); - context!.present(); } return () => { renderer.setAnimationLoop(null); diff --git a/apps/example/src/ThreeJS/Cube.tsx b/apps/example/src/ThreeJS/Cube.tsx index d3e9707b5..ea3fe0f23 100644 --- a/apps/example/src/ThreeJS/Cube.tsx +++ b/apps/example/src/ThreeJS/Cube.tsx @@ -31,7 +31,6 @@ export const Cube = () => { mesh.rotation.y = time / 1000; renderer.render(scene, camera); - context.present(); } renderer.setAnimationLoop(animate); return () => { diff --git a/apps/example/src/ThreeJS/Helmet.tsx b/apps/example/src/ThreeJS/Helmet.tsx index be7cb626f..70720d360 100644 --- a/apps/example/src/ThreeJS/Helmet.tsx +++ b/apps/example/src/ThreeJS/Helmet.tsx @@ -49,7 +49,6 @@ export const Helmet = () => { function animate() { animateCamera(); renderer.render(scene, camera); - context!.present(); } return () => { diff --git a/apps/example/src/ThreeJS/InstancedMesh.tsx b/apps/example/src/ThreeJS/InstancedMesh.tsx index 3f60631de..5b7c7ca4d 100644 --- a/apps/example/src/ThreeJS/InstancedMesh.tsx +++ b/apps/example/src/ThreeJS/InstancedMesh.tsx @@ -59,7 +59,6 @@ export const InstancedMesh = () => { function animate() { render(); - context!.present(); } function render() { diff --git a/apps/example/src/ThreeJS/PostProcessing.tsx b/apps/example/src/ThreeJS/PostProcessing.tsx index d94ef1728..0c2980501 100644 --- a/apps/example/src/ThreeJS/PostProcessing.tsx +++ b/apps/example/src/ThreeJS/PostProcessing.tsx @@ -72,7 +72,6 @@ export const PostProcessing = () => { mixer.update(delta); } postProcessing.render(); - context!.present(); } return () => { renderer.setAnimationLoop(null); diff --git a/apps/example/src/ThreeJS/Retargeting.tsx b/apps/example/src/ThreeJS/Retargeting.tsx index c25601885..8b8dd9a29 100644 --- a/apps/example/src/ThreeJS/Retargeting.tsx +++ b/apps/example/src/ThreeJS/Retargeting.tsx @@ -302,7 +302,6 @@ export const Retargeting = () => { source.mixer.update(delta); mixer.update(delta); renderer.render(scene, camera); - context.present(); }); return () => { diff --git a/apps/example/src/ThreeJS/components/FiberCanvas.tsx b/apps/example/src/ThreeJS/components/FiberCanvas.tsx index 91b699553..92b928987 100644 --- a/apps/example/src/ThreeJS/components/FiberCanvas.tsx +++ b/apps/example/src/ThreeJS/components/FiberCanvas.tsx @@ -66,7 +66,6 @@ export const FiberCanvas = ({ const renderFrame = state.gl.render.bind(state.gl); state.gl.render = (s: THREE.Scene, c: THREE.Camera) => { renderFrame(s, c); - context?.present(); }; }, }); diff --git a/apps/example/src/Triangle/HelloTriangle.tsx b/apps/example/src/Triangle/HelloTriangle.tsx index 3e28d6c12..caeb560b3 100644 --- a/apps/example/src/Triangle/HelloTriangle.tsx +++ b/apps/example/src/Triangle/HelloTriangle.tsx @@ -77,8 +77,6 @@ export function HelloTriangle() { passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - - context.present(); })(); }, [ref]); diff --git a/apps/example/src/Triangle/HelloTriangleMSAA.tsx b/apps/example/src/Triangle/HelloTriangleMSAA.tsx index 5d66983d5..b9518fbe9 100644 --- a/apps/example/src/Triangle/HelloTriangleMSAA.tsx +++ b/apps/example/src/Triangle/HelloTriangleMSAA.tsx @@ -87,7 +87,6 @@ export function HelloTriangleMSAA() { } frame(); - context.present(); })(); }, [ref]); diff --git a/apps/example/src/VisionCamera/VisionCamera.tsx b/apps/example/src/VisionCamera/VisionCamera.tsx index c4adcfaa0..f6c6c95bd 100644 --- a/apps/example/src/VisionCamera/VisionCamera.tsx +++ b/apps/example/src/VisionCamera/VisionCamera.tsx @@ -613,11 +613,13 @@ const CameraView = () => { pass.draw(3); pass.end(); device.queue.submit([encoder.finish()]); + // Vision Camera frame processors run on a dedicated worklet runtime, + // so present explicitly (auto-present only covers the JS/UI runtime). + context.present(); // The work sampling it is submitted, so end the external texture's // access window now to release the camera frame's surface promptly // (don't wait for GC, which would starve the frame buffer pool). externalTex.destroy(); - context.present(); } finally { videoFrame.release(); } diff --git a/apps/example/src/components/Texture.tsx b/apps/example/src/components/Texture.tsx index d9e689b41..5bd82a911 100644 --- a/apps/example/src/components/Texture.tsx +++ b/apps/example/src/components/Texture.tsx @@ -145,7 +145,6 @@ export const Texture = ({ texture, style, device }: GPUTextureProps) => { renderPass.end(); device.queue.submit([commandEncoder.finish()]); - context.present(); }, [device, state, texture, ref]); return ; }; diff --git a/apps/example/src/components/useWebGPU.ts b/apps/example/src/components/useWebGPU.ts index ac8a631ac..1a399aafe 100644 --- a/apps/example/src/components/useWebGPU.ts +++ b/apps/example/src/components/useWebGPU.ts @@ -57,7 +57,6 @@ export const useWebGPU = (scene: Scene) => { const render = () => { const timestamp = Date.now(); renderScene(timestamp); - context.present(); animationFrameId.current = requestAnimationFrame(render); }; diff --git a/docs/refactor-async-present-plan.md b/docs/refactor-async-present-plan.md deleted file mode 100644 index e69706534..000000000 --- a/docs/refactor-async-present-plan.md +++ /dev/null @@ -1,317 +0,0 @@ -# Refactor: event-driven async + auto-present - -Status: **Phase 0 complete — all spikes GREEN, ready for Phase 1** -Branch: `claude/keen-darwin-xeywa` - -This document is the handoff for moving the async + present refactor forward. Phase 0 -(spikes) needs a real local machine: installed `node_modules`, a Dawn build, and the -iOS/Android toolchains. Everything below the "How to resume locally" section is meant to -be executed on your computer, not in the web container. - ---- - -## Goals (locked) - -- **Async**: replace the JS-thread polling loop with a **background `WaitAny` GPU thread** - (Dawn `TimedWaitAny` is already enabled — `packages/webgpu/cpp/rnwgpu/api/GPU.cpp:17-23`). -- **Present**: **remove `context.present()` entirely** (breaking) in favor of a **global - Choreographer / CADisplayLink-driven auto-present**. -- **Scope**: first-class for **all runtimes** — main JS, the reanimated UI runtime, and - `createWorkletRuntime` dedicated runtimes. - ---- - -## What exists today (the two problems) - -### Async (polling) — `packages/webgpu/cpp/rnwgpu/async/` -- Every async op (`requestAdapter`, `requestDevice`, `mapAsync`, `onSubmittedWorkDone`, - `createRender/ComputePipelineAsync`, `popErrorScope`) registers a Dawn callback with - `CallbackMode::AllowProcessEvents` and calls `AsyncRunner::postTask`. -- `AsyncRunner::requestTick` (`async/AsyncRunner.cpp:89-177`) schedules `tick()` via - `setImmediate` / `setTimeout(4ms)` / `queueMicrotask`; `tick()` calls - `_instance.ProcessEvents()` and **re-schedules itself while any task is "pumping"** - (`AsyncRunner.cpp:189-191`). This is a busy reschedule loop: wasted CPU when idle, added - latency, and `JSIMicrotaskDispatcher`'s `queueMicrotask` dispatch is only thread-safe when - called on the runtime's own thread. - -### Present (manual, non-standard) -`api/GPUCanvasContext.cpp:56-65` → `SurfaceRegistry.h:116-121` → `wgpu::Surface::Present()`. -The user must call `context.present()` after every `queue.submit` (**16 JS/TS call sites**). -No CADisplayLink/Choreographer exists; RN's `requestAnimationFrame` is the only frame driver. -On Apple, present also does a blocking `WaitForCommandsToBeScheduled` on the JS thread. - ---- - -## Target architecture - -Three new pieces: - -### A. `RuntimeScheduler` — thread-safe "post to this runtime's JS thread" -Replaces `AsyncDispatcher` / `JSIMicrotaskDispatcher` (which use non-thread-safe -`queueMicrotask`). -- Interface: `void scheduleOnJS(std::function)`, callable from any thread. -- **Main runtime**: wraps `react::CallInvoker::invokeAsync` (already available — - `apple/WebGPUModule.mm:70`, `android/cpp/cpp-adapter.cpp:25-29`). -- **Worklet runtimes**: wraps the worklet runtime's own thread executor from - `react-native-worklets` 0.8.3 (**see Phase 0 spike #1**). -- Stored per-runtime in a `RuntimeContext` (the "per-JS-thread event loop"), created on first - WebGPU use, torn down via the existing `RuntimeLifecycleMonitor` / `RuntimeAwareCache` - (`cpp/jsi/RuntimeAwareCache.h`). - -### B. `GpuEventLoop` — background `WaitAny` thread (no polling) -One per `wgpu::Instance` (effectively global). -- All async sites switch `CallbackMode::AllowProcessEvents` → **`CallbackMode::WaitAnyOnly`**, - returning a `wgpu::Future`. -- A **small bounded thread pool**; each pending future is waited via - `instance.WaitAny(future, /*timeout*/UINT64_MAX)` on a pool thread → genuinely event-driven, - **zero idle CPU**, resolves the instant GPU work completes. No wake/interrupt problem (each - thread owns one future). **See Phase 0 spike #2.** -- On completion the worker marshals the result and calls the owning runtime's - `RuntimeScheduler.scheduleOnJS` to settle the JS Promise. `AsyncTaskHandle` / `Promise` - settle logic is reused; `AsyncRunner` + its tick loop are deleted. -- Fallback (if concurrent `WaitAny` on one instance is unsafe): single worker thread waiting on - the batched future set with a condition-variable re-arm. - -### C. `FrameDriver` — global vsync source for auto-present -One UI-thread singleton; removes the need for `present()`. -- **iOS**: `CADisplayLink` on the main run loop. **Android**: NDK - `AChoreographer_postFrameCallback` from C++ (API 24+, avoids JNI). **See Phase 0 spike #3.** -- Lifecycle: started when ≥1 surface is configured, stopped at 0. -- **Auto-present semantics** (spec-aligned "update the rendering" after rAF): - 1. `GPUCanvasContext::getCurrentTexture()` marks its `SurfaceInfo` dirty and registers a - present request with `FrameDriver`, tagged with the owning runtime. - 2. Each vsync (UI thread), `FrameDriver` dispatches each dirty context's present onto its - **owning runtime's `RuntimeScheduler`** — so `Surface.Present()` + the Apple Metal - scheduling wait run on the same thread that did `getCurrentTexture` / `submit`, preserving - Dawn surface thread-affinity and guaranteeing present-after-submit ordering (FIFO on that - loop). Clear dirty after present. -- Offscreen path (`SurfaceRegistry` `switchToOffscreen`, `src/Offscreen.ts`) has no surface → - present is a no-op; tests keep reading back the CPU texture. - ---- - -## Phase 0 — Local spikes (DO THESE FIRST, on your machine) - -These de-risk the refactor before any large change. Run from repo root. - -```bash -# 0. install deps (web container can't do this) -yarn install -``` - -### Spike 1 — worklet-runtime scheduler (HIGHEST RISK) -Goal: obtain a **thread-safe** "schedule this lambda on runtime R's thread" for an arbitrary -worklet runtime (UI runtime + a `createWorkletRuntime` runtime) using -`react-native-worklets@0.8.3`. - -```bash -# inspect the worklets native API actually shipped at 0.8.3 -find node_modules/react-native-worklets -name "*.h" | grep -iE "Runtime|Scheduler|Invoker|Queue" -# look for: WorkletRuntime, RuntimeManager / WorkletsModuleProxy, UIScheduler / JSScheduler, -# and any per-runtime executor / async queue we can call from a background C++ thread. -``` -Deliverable: a one-paragraph note on the exact symbol(s) to use (or "not exposed → needs JS -shim / worklets PR"). This determines whether Phase 3 (first-class worklet runtimes) is cheap -or needs a workaround. - -### Spike 2 — concurrent `WaitAny` on one Dawn instance -Goal: confirm multiple threads can each call `instance.WaitAny(singleFuture, UINT64_MAX)` -concurrently on the **same** instance safely. If not, switch `GpuEventLoop` to the -single-worker + condition-variable fallback. -- Search Dawn headers/docs in `externals/dawn` (or built `libs/`) for `WaitAny` threading - guarantees. A tiny throwaway C++ test against the built Dawn is ideal. - -### Spike 3 — Android frame callback -Goal: confirm NDK `AChoreographer_postFrameCallback` is usable at the project `minSdk` -(`packages/webgpu/android/build.gradle`). If `minSdk < 24` for that API, plan the Java -`Choreographer` + JNI bridge instead. - ---- - -## Phase 0 — Findings (completed 2026-06-02, branch `claude/keen-darwin-xeywa`) - -Environment verified: `node_modules` installed, `externals/dawn` present, RN **0.81.4**, -`react-native-worklets` **0.8.3**, Android `minSdk` **26**, NDK 26/27 available. - -### Spike 1 — worklet-runtime scheduler → **GREEN (symbol exists, thread-safe)** -`worklets/WorkletRuntime/WorkletRuntime.h` exposes exactly what we need: -- `WorkletRuntime::schedule(std::function job)` — posts `job` onto the - runtime's own `AsyncQueue` (`WorkletRuntime.cpp:211-227`). It is **callable from any thread** - (the underlying `AsyncQueueImpl` is a mutex+condvar queue; `AsyncQueueUI` forwards to the - `UIScheduler`). The job runs on the runtime's event-loop thread, under `runtimeMutex_`, and - uses `weak_from_this()` so it is a **safe no-op if the runtime was torn down**. This is a - drop-in for `RuntimeScheduler::scheduleOnJS` for worklet runtimes. -- `WorkletRuntime::getWeakRuntimeFromJSIRuntime(jsi::Runtime &rt)` (RN ≥ 0.81, we have 0.81.4) - maps a bare `jsi::Runtime&` → `weak_ptr`, so the per-runtime - `RuntimeContext` can recover the scheduler from any worklet runtime (UI + dedicated - `createWorkletRuntime`) with no JS shim. - -**Caveat (build wiring, not API):** webgpu does **not** currently link worklets natively -(no worklets entry in `packages/webgpu/*.podspec` or `android/CMakeLists.txt`; only JS-level -serialization helpers exist). Phase 3 must add the native dependency: -- iOS: depend on `RNWorklets` pod (it ships public headers under `worklets/`, - `header_dir = "worklets"`). -- Android: import the worklets **prefab** module `worklets` (`prefabPublishing` is on in - `react-native-worklets/android/build.gradle`). -Worklets is already a `peerDependency`, so this adds no new install. Phase 3 stays cheap; no -worklets PR or JS shim needed. - -### Spike 2 — concurrent `WaitAny` on one instance → **GREEN (designed for it)** -Dawn's native `EventManager` (`externals/dawn/src/dawn/native/EventManager.{h,cpp}`) is built -for multi-threaded waits: -- State is `MutexProtected`; `mNextFutureID` is atomic; a code comment - (`EventManager.h:78-82`) explicitly notes "another thread can race to complete the event … - via a WaitAny call". -- Each `WaitAny` call with a non-zero timeout creates a **stack-local `Waiter`** with its **own** - `MutexCondVarProtected` (`EventManager.cpp:338`, `:106`), registers it per-FutureID in - the shared map, then blocks on its own condvar. `SetFutureReady` signals the registered - waiters. → **N threads can each block in `WaitAny` on the same instance concurrently, each - owning its own future.** This is exactly the plan's primary "one future per pool thread" model. - -**Hard constraint discovered (`EventManager.cpp:341-354`):** within a *single* `WaitAny` call -with a non-zero timeout, you may **not** mix events from multiple queues, nor a queue event -together with a non-queue event — it returns `WaitStatus::Error` ("Mixed source waits with -timeouts are not currently supported"). Note `mapAsync`/`onSubmittedWorkDone` are *queue* -events while `requestAdapter`/`requestDevice`/`createPipelineAsync`/`popErrorScope` are -*non-queue* events. -→ **Implication:** adopt the **per-future-per-thread** design (each pool thread waits on exactly -one future) — it is single-source and always legal. The plan's stated fallback ("single worker -waiting on the batched future set") is **not viable** as written, because batching mixed sources -hits this restriction. If a bounded pool is undesirable, the correct fallback is one -worker-thread *per future* (still single-source), not one worker for a batched set. - -### Spike 3 — Android frame callback → **GREEN (no JNI bridge needed)** -In `android/choreographer.h`, `AChoreographer_getInstance()` and -`AChoreographer_postFrameCallback()` are both `__INTRODUCED_IN(24)`; `minSdk` is **26**, so the -pure-NDK path works with no Java `Choreographer`/JNI bridge. -- `postFrameCallback` is `__DEPRECATED_IN(29)` in favor of `postFrameCallback64` (API 29) / - `postVsyncCallback` (API 33). Recommendation: call `postFrameCallback64` when - `android_get_device_api_level() >= 29`, else `postFrameCallback` (works on 26-28). Both are - acceptable; the 64-bit variant just avoids the deprecation warning and 32-bit time wrap. -- `AChoreographer_getInstance()` must be called on a thread with a `Looper` (the main/UI - thread) — `FrameDriver` already lives on the UI thread, so this is satisfied. - -### Net go/no-go -All three risks clear. Proceed to Phase 1. Two plan amendments: (1) Phase 3 must add the -worklets native build dependency (podspec + prefab); (2) `GpuEventLoop` must use -per-future-per-thread waits (drop the batched-future fallback). - -## Implementation phases (after Phase 0) - -**Phase 1 — Event-driven async** (no public API change; `present()` untouched) — **DONE** -- Add `RuntimeScheduler` (+ main-runtime CallInvoker impl) and `GpuEventLoop`. -- Switch all 7 async sites to `WaitAnyOnly` + `GpuEventLoop.addFuture(...)`: - `api/GPU.cpp`, `api/GPUAdapter.cpp`, `api/GPUDevice.cpp` (×3), `api/GPUBuffer.cpp`, - `api/GPUQueue.cpp`, `api/GPUShaderModule.cpp`. -- Delete `async/AsyncRunner.*` polling + `async/JSIMicrotaskDispatcher.*`; keep - `AsyncTaskHandle` / `Promise` settle path on the new scheduler. - -### Phase 1 — what shipped (branch `claude/keen-darwin-xeywa`) -New files (`cpp/rnwgpu/async/`): -- `RuntimeScheduler.h` — interface `scheduleOnJS(std::function)`, - callable from any thread. -- `CallInvokerScheduler.{h,cpp}` — main-runtime impl wrapping - `react::CallInvoker::invokeAsync(CallFunc&&)` (RN 0.81 delivers the job on the JS thread - with the runtime). -- `GpuEventLoop.{h,cpp}` — background `WaitAny` driver. Lazily-grown bounded worker pool - (cap = `clamp(hardware_concurrency, 2, 8)`); each worker does a single-future - `instance.WaitAny(future, UINT64_MAX)` (always a legal single-source wait, per Phase 0 - spike 2). Shared state held behind a `shared_ptr` so detached workers (and the - `wgpu::Instance` ref they need) outlive the object safely; teardown sets `running=false` - and notifies idle workers without joining in-flight GPU waits. - -Deviations from the original plan (intentional): -1. **`AsyncRunner` was replaced by `RuntimeContext`** (`async/RuntimeContext.{h,cpp}`), the - per-runtime coordinator the plan's Target-architecture §A already named. It bundles - `{RuntimeScheduler, GpuEventLoop}` and exposes `postTask`; all polling internals - (`tick`/`requestTick`/`ProcessEvents`/pump counters) are gone. `AsyncTaskHandle` depends - only on `RuntimeScheduler`. The old `AsyncRunner` name/files no longer exist anywhere - (the 6 `api/*` classes now hold `std::shared_ptr _async`); the dead - `GPU::getAsyncRunner()` accessor was deleted. -2. **`postTask`'s callback now returns a `wgpu::Future`** (the value returned by the Dawn - `WaitAnyOnly` call), which `AsyncRunner` hands to `GpuEventLoop.addFuture`. A returned - future with `id == 0` means "no event to wait on" and is ignored — used by - `GPUDevice::getLost` (resolved synchronously or later via `notifyDeviceLost`). This - replaced the old `keepPumping` bool argument, which is gone. - -`GPU`'s constructor now takes the `CallInvoker` (threaded through from `RNWebGPUManager`, -which already held it) to build the `CallInvokerScheduler`. `AsyncDispatcher.h` and -`JSIMicrotaskDispatcher.{h,cpp}` deleted; `android/CMakeLists.txt` updated (iOS podspec -globs `cpp/**` so it needs no change). - -Validation run locally: all changed + new TUs syntax-check under the Android NDK toolchain; -the full `react-native-wgpu` native lib **compiles and links** for `arm64-v8a` (ninja); -`cpplint` clean (project filters); `clang-format` (pinned 15.0.0) applied; `yarn tsc` passes -(no TS changed). On-device runtime behaviour (frame pacing, zero idle CPU) is Phase 4. - -**Phase 2 — Auto-present + remove `present()`** -- Add `FrameDriver` (iOS `CADisplayLink`, Android `AChoreographer`); wire - `getCurrentTexture` → register; vsync → dispatch present to owning runtime. -- Remove `GPUCanvasContext::present` (`api/GPUCanvasContext.h:50,58`, `.cpp:56-65`) and - `SurfaceInfo::present` (`SurfaceRegistry.h:116-121`). -- JS: drop `present` from `RNCanvasContext` (`src/Canvas.tsx:22-24`, `src/types.ts`). -- Migrate all 16 example / `useWebGPU` call sites + `README.md` + `packages/webgpu/README.md`. - -**Phase 3 — First-class worklet runtimes** -- Worklet-runtime `RuntimeScheduler` impl (per Spike 1); verify auto-present dispatch on UI + - dedicated runtimes; update `apps/example/src/Reanimated/Reanimated.tsx` (drop `present()`, - keep its own rAF loop). - -**Phase 4 — Validation** -```bash -yarn tsc && yarn lint -yarn workspace react-native-wgpu test # offscreen readback + demo specs -yarn build:ios # or: yarn workspace example ios -yarn build:android # or: yarn workspace example android -``` -Verify: no idle-CPU polling (logging), correct frame pacing, no present-ordering glitches, -Reanimated UI/Dedicated examples render. - ---- - -## 16 `present()` call sites to migrate (Phase 2) - -``` -apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx -apps/example/src/components/useWebGPU.ts -apps/example/src/components/Texture.tsx -apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx -apps/example/src/ThreeJS/Helmet.tsx -apps/example/src/ComputeToys/engine/index.ts -apps/example/src/CanvasAPI/CanvasAPI.tsx -apps/example/src/ThreeJS/PostProcessing.tsx -apps/example/src/ThreeJS/Cube.tsx -apps/example/src/Triangle/HelloTriangle.tsx -apps/example/src/Triangle/HelloTriangleMSAA.tsx -apps/example/src/ThreeJS/InstancedMesh.tsx -apps/example/src/ThreeJS/Retargeting.tsx -apps/example/src/ThreeJS/components/FiberCanvas.tsx -apps/example/src/Reanimated/Reanimated.tsx -apps/example/src/ThreeJS/Backdrop.tsx -``` -Plus `README.md` and `packages/webgpu/README.md`. - ---- - -## Risks / open questions -- **Worklet-runtime scheduler** access in worklets 0.8.3 (Spike 1 — highest risk). -- **Concurrent `WaitAny`** semantics on one Dawn instance (Spike 2; single-worker fallback ready). -- **Present timing**: vsync-dispatched-to-owning-loop must land after submit (FIFO on that loop) - and before the next `getCurrentTexture`. -- **Breaking change**: `present()` removed — type, examples, README updated together. -- **Apple Metal wait** moves into the frame-boundary present task, off the synchronous call path. - ---- - -## How to resume locally - -```bash -git fetch origin claude/keen-darwin-xeywa -git checkout claude/keen-darwin-xeywa -git pull origin claude/keen-darwin-xeywa -# open this file and run Phase 0 spikes, then start Claude Code: -# claude -# suggested kickoff prompt: -# "Read docs/refactor-async-present-plan.md. Run the Phase 0 spikes and report -# findings before implementing. Develop on this branch." -``` diff --git a/packages/webgpu/README.md b/packages/webgpu/README.md index 8eeb1cba1..433d498fa 100644 --- a/packages/webgpu/README.md +++ b/packages/webgpu/README.md @@ -128,8 +128,6 @@ export function HelloTriangle() { passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - - context.present(); }; helloTriangle(); }, [ref]); @@ -174,17 +172,28 @@ ctx.canvas.height = ctx.canvas.clientHeight * PixelRatio.get(); ### Frame Scheduling -In React Native, we want to keep frame presentation as a manual operation as we plan to provide more advanced rendering options that are React Native specific. -This means that when you are ready to present a frame, you need to call `present` on the context. +On the **main JS runtime** and the **Reanimated UI runtime**, frame presentation is automatic: once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call. ```tsx // draw // submit to the queue device.queue.submit([commandEncoder.finish()]); -// This method is React Native only -context.present(); +// The frame is presented automatically on the next vsync. ``` +When you render from a **dedicated worklet runtime** (e.g. `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame processor), it runs on its own thread where present can't be driven automatically. Call `context.present()` yourself after submitting: + +```tsx +const onFrame = () => { + "worklet"; + // draw on the dedicated runtime's thread + device.queue.submit([commandEncoder.finish()]); + context.present(); // required on dedicated worklet runtimes; a no-op on JS/UI +}; +``` + +`present()` is safe to call from a worklet that runs on either the UI runtime or a dedicated runtime: it presents on the dedicated runtime and does nothing on the JS/UI runtime (which auto-present). + ### Canvas Transparency On Android, the `alphaMode` property is ignored when configuring the canvas. @@ -296,7 +305,6 @@ const render = () => { // Release the surface's access window right after the submit that sampled it. externalTexture.destroy(); - context.present(); }; ``` @@ -328,7 +336,6 @@ const renderFrame = (device: GPUDevice, context: GPUCanvasContext) => { const commandEncoder = device.createCommandEncoder(); // ... render ... device.queue.submit([commandEncoder.finish()]); - context.present(); }; // Initialize WebGPU on main thread, then run on UI thread diff --git a/packages/webgpu/android/CMakeLists.txt b/packages/webgpu/android/CMakeLists.txt index 50756e72e..51005acdc 100644 --- a/packages/webgpu/android/CMakeLists.txt +++ b/packages/webgpu/android/CMakeLists.txt @@ -47,6 +47,7 @@ add_library(${PACKAGE_NAME} SHARED ../cpp/rnwgpu/api/GPUComputePipeline.cpp ../cpp/rnwgpu/api/GPUCanvasContext.cpp ../cpp/rnwgpu/RNWebGPUManager.cpp + ../cpp/rnwgpu/FrameDriver.cpp ../cpp/jsi/Promise.cpp ../cpp/jsi/RuntimeLifecycleMonitor.cpp ../cpp/jsi/RuntimeAwareCache.cpp diff --git a/packages/webgpu/android/cpp/cpp-adapter.cpp b/packages/webgpu/android/cpp/cpp-adapter.cpp index 2a441c218..4f0ba61d3 100644 --- a/packages/webgpu/android/cpp/cpp-adapter.cpp +++ b/packages/webgpu/android/cpp/cpp-adapter.cpp @@ -10,6 +10,7 @@ #include #include "AndroidPlatformContext.h" +#include "FrameDriver.h" #include "GPUCanvasContext.h" #include "RNWebGPUManager.h" @@ -17,6 +18,37 @@ std::shared_ptr manager; +// JNI handles for driving the vsync source (com.webgpu.WebGPUFrameDriver), +// cached on the JNI thread in initializeNative (which has the app classloader). +static JavaVM *gJavaVM = nullptr; +static jclass gFrameDriverClass = nullptr; +static jmethodID gFrameDriverStart = nullptr; +static jmethodID gFrameDriverStop = nullptr; + +static void callFrameDriver(jmethodID method) { + if (gJavaVM == nullptr || gFrameDriverClass == nullptr || method == nullptr) { + return; + } + JNIEnv *env = nullptr; + bool attached = false; + jint res = gJavaVM->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_6); + if (res == JNI_EDETACHED) { + if (gJavaVM->AttachCurrentThread(&env, nullptr) != JNI_OK) { + return; + } + attached = true; + } else if (res != JNI_OK) { + return; + } + env->CallStaticVoidMethod(gFrameDriverClass, method); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + } + if (attached) { + gJavaVM->DetachCurrentThread(); + } +} + extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUModule_initializeNative( JNIEnv *env, jobject /* this */, jlong jsRuntime, jobject jsCallInvokerHolder, jobject blobModule) { @@ -31,6 +63,27 @@ extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUModule_initializeNative( std::make_shared(globalBlobModule); manager = std::make_shared(runtime, jsCallInvoker, platformContext); + + // Cache JNI handles for the Choreographer-based vsync source and register it + // with the FrameDriver to drive auto-present (replaces context.present()). + env->GetJavaVM(&gJavaVM); + jclass localCls = env->FindClass("com/webgpu/WebGPUFrameDriver"); + if (localCls != nullptr) { + gFrameDriverClass = reinterpret_cast(env->NewGlobalRef(localCls)); + gFrameDriverStart = + env->GetStaticMethodID(gFrameDriverClass, "start", "()V"); + gFrameDriverStop = env->GetStaticMethodID(gFrameDriverClass, "stop", "()V"); + env->DeleteLocalRef(localCls); + } + rnwgpu::FrameDriver::getInstance().setPlatformVSync( + [] { callFrameDriver(gFrameDriverStart); }, + [] { callFrameDriver(gFrameDriverStop); }); +} + +extern "C" JNIEXPORT void JNICALL +Java_com_webgpu_WebGPUFrameDriver_nativeOnVSync(JNIEnv * /*env*/, + jclass /*clazz*/) { + rnwgpu::FrameDriver::getInstance().onVSync(); } extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUView_onSurfaceChanged( @@ -66,6 +119,7 @@ Java_com_webgpu_WebGPUView_switchToOffscreenSurface(JNIEnv *env, jobject thiz, extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUView_onSurfaceDestroy( JNIEnv *env, jobject thiz, jint contextId) { + rnwgpu::FrameDriver::getInstance().cancelPresent(contextId); auto ®istry = rnwgpu::SurfaceRegistry::getInstance(); registry.removeSurfaceInfo(contextId); } \ No newline at end of file diff --git a/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java b/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java new file mode 100644 index 000000000..03a1d2c29 --- /dev/null +++ b/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java @@ -0,0 +1,66 @@ +package com.webgpu; + +import android.os.Handler; +import android.os.Looper; +import android.view.Choreographer; + +/** + * Drives WebGPU auto-present from the main-thread {@link Choreographer}, + * replacing the manual {@code context.present()} call. + * + *

{@link #start()} / {@link #stop()} are invoked from native code + * (rnwgpu::FrameDriver::setPlatformVSync) on arbitrary threads; both hop to the + * main thread. While running, {@link #doFrame(long)} calls back into native + * once per vsync, where pending surfaces are presented. + */ +public class WebGPUFrameDriver implements Choreographer.FrameCallback { + private static final WebGPUFrameDriver INSTANCE = new WebGPUFrameDriver(); + + private final Handler mainHandler = new Handler(Looper.getMainLooper()); + private boolean running = false; + + private WebGPUFrameDriver() {} + + /** Called from native (any thread). */ + public static void start() { + INSTANCE.startInternal(); + } + + /** Called from native (any thread). */ + public static void stop() { + INSTANCE.stopInternal(); + } + + private void startInternal() { + mainHandler.post( + () -> { + if (running) { + return; + } + running = true; + Choreographer.getInstance().postFrameCallback(this); + }); + } + + private void stopInternal() { + mainHandler.post( + () -> { + if (!running) { + return; + } + running = false; + Choreographer.getInstance().removeFrameCallback(this); + }); + } + + @Override + public void doFrame(long frameTimeNanos) { + if (!running) { + return; + } + nativeOnVSync(); + Choreographer.getInstance().postFrameCallback(this); + } + + private static native void nativeOnVSync(); +} diff --git a/packages/webgpu/apple/MetalView.mm b/packages/webgpu/apple/MetalView.mm index ccff1245c..e617da889 100644 --- a/packages/webgpu/apple/MetalView.mm +++ b/packages/webgpu/apple/MetalView.mm @@ -1,6 +1,8 @@ #import "MetalView.h" #import "webgpu/webgpu_cpp.h" +#include "FrameDriver.h" + @implementation MetalView { BOOL _isConfigured; } @@ -42,6 +44,8 @@ - (void)update { } - (void)dealloc { + // Stop any pending auto-present for this surface before it goes away. + rnwgpu::FrameDriver::getInstance().cancelPresent([_contextId intValue]); auto ®istry = rnwgpu::SurfaceRegistry::getInstance(); // Remove the surface info from the registry registry.removeSurfaceInfo([_contextId intValue]); diff --git a/packages/webgpu/apple/WebGPUFrameDriver.h b/packages/webgpu/apple/WebGPUFrameDriver.h new file mode 100644 index 000000000..aacae84ee --- /dev/null +++ b/packages/webgpu/apple/WebGPUFrameDriver.h @@ -0,0 +1,13 @@ +#pragma once + +#import + +// Objective-C wrapper around the platform vsync source (CADisplayLink) that +// drives rnwgpu::FrameDriver::onVSync() once per frame. start/stop are invoked +// by the C++ FrameDriver via setPlatformVSync; both hop to the main thread. +@interface WebGPUFrameDriver : NSObject + ++ (void)start; ++ (void)stop; + +@end diff --git a/packages/webgpu/apple/WebGPUFrameDriver.mm b/packages/webgpu/apple/WebGPUFrameDriver.mm new file mode 100644 index 000000000..1d302e2fa --- /dev/null +++ b/packages/webgpu/apple/WebGPUFrameDriver.mm @@ -0,0 +1,88 @@ +#import "WebGPUFrameDriver.h" + +#import "RNWGUIKit.h" +#import + +#include "FrameDriver.h" + +@implementation WebGPUFrameDriver + ++ (void)onFrame { + rnwgpu::FrameDriver::getInstance().onVSync(); +} + +#if !TARGET_OS_OSX + +// iOS / tvOS: CADisplayLink on the main run loop, paused/resumed for +// start/stop. +static CADisplayLink *sDisplayLink = nil; + ++ (void)tick:(CADisplayLink *)link { + [WebGPUFrameDriver onFrame]; +} + ++ (void)start { + dispatch_async(dispatch_get_main_queue(), ^{ + if (sDisplayLink == nil) { + sDisplayLink = [CADisplayLink displayLinkWithTarget:self + selector:@selector(tick:)]; + [sDisplayLink addToRunLoop:[NSRunLoop mainRunLoop] + forMode:NSRunLoopCommonModes]; + } + sDisplayLink.paused = NO; + }); +} + ++ (void)stop { + dispatch_async(dispatch_get_main_queue(), ^{ + sDisplayLink.paused = YES; + }); +} + +#else // TARGET_OS_OSX + +// macOS: CADisplayLink is available via NSScreen on 14.0+. On older systems we +// fall back to an NSTimer at ~60Hz (not vsync-aligned, but keeps auto-present +// working). FrameDriver self-idles cheaply when nothing is rendering. +static id sDisplayLink = nil; + ++ (void)tick:(id)sender { + [WebGPUFrameDriver onFrame]; +} + ++ (void)start { + dispatch_async(dispatch_get_main_queue(), ^{ + if (sDisplayLink == nil) { + if (@available(macOS 14.0, *)) { + CADisplayLink *link = + [NSScreen.mainScreen displayLinkWithTarget:self + selector:@selector(tick:)]; + [link addToRunLoop:[NSRunLoop mainRunLoop] + forMode:NSRunLoopCommonModes]; + sDisplayLink = link; + } else { + sDisplayLink = [NSTimer scheduledTimerWithTimeInterval:1.0 / 60.0 + target:self + selector:@selector(tick:) + userInfo:nil + repeats:YES]; + } + } + if ([sDisplayLink isKindOfClass:[CADisplayLink class]]) { + ((CADisplayLink *)sDisplayLink).paused = NO; + } + }); +} + ++ (void)stop { + dispatch_async(dispatch_get_main_queue(), ^{ + if ([sDisplayLink isKindOfClass:[CADisplayLink class]]) { + ((CADisplayLink *)sDisplayLink).paused = YES; + } + // NSTimer fallback keeps firing; onVSync is a cheap no-op while idle. + }); +} + +#endif // TARGET_OS_OSX + +@end diff --git a/packages/webgpu/apple/WebGPUModule.mm b/packages/webgpu/apple/WebGPUModule.mm index 99580aa14..c4c7224ad 100644 --- a/packages/webgpu/apple/WebGPUModule.mm +++ b/packages/webgpu/apple/WebGPUModule.mm @@ -1,6 +1,8 @@ #import "WebGPUModule.h" #include "ApplePlatformContext.h" +#include "FrameDriver.h" #import "GPUCanvasContext.h" +#import "WebGPUFrameDriver.h" #import #import @@ -78,6 +80,11 @@ - (void)invalidate { std::make_shared(); webgpuManager = std::make_shared(runtime, jsInvoker, platformContext); + + // Drive auto-present from the display's vsync (replaces context.present()). + rnwgpu::FrameDriver::getInstance().setPlatformVSync( + [] { [WebGPUFrameDriver start]; }, [] { [WebGPUFrameDriver stop]; }); + return @true; } diff --git a/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp b/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp new file mode 100644 index 000000000..792940e5e --- /dev/null +++ b/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp @@ -0,0 +1,81 @@ +#include "FrameDriver.h" + +#include +#include +#include + +namespace jsi = facebook::jsi; + +namespace rnwgpu { + +FrameDriver &FrameDriver::getInstance() { + static FrameDriver instance; + return instance; +} + +void FrameDriver::setPlatformVSync(std::function start, + std::function stop) { + std::lock_guard lock(_mutex); + _start = std::move(start); + _stop = std::move(stop); +} + +void FrameDriver::requestPresent( + int contextId, std::shared_ptr surface, + std::shared_ptr scheduler) { + if (!surface || !scheduler) { + return; + } + + std::function startToCall; + { + std::lock_guard lock(_mutex); + _pending[contextId] = {std::move(surface), std::move(scheduler)}; + _idleFrames = 0; + if (!_running && _start) { + _running = true; + startToCall = _start; + } + } + + // Invoked outside the lock: the platform start hops to the UI thread. + if (startToCall) { + startToCall(); + } +} + +void FrameDriver::cancelPresent(int contextId) { + std::lock_guard lock(_mutex); + _pending.erase(contextId); +} + +void FrameDriver::onVSync() { + std::vector toPresent; + std::function stopToCall; + { + std::lock_guard lock(_mutex); + if (!_pending.empty()) { + toPresent.reserve(_pending.size()); + for (auto &entry : _pending) { + toPresent.push_back(std::move(entry.second)); + } + _pending.clear(); + _idleFrames = 0; + } else if (_running && ++_idleFrames >= kMaxIdleFrames) { + _running = false; + stopToCall = _stop; + } + } + + for (auto &pending : toPresent) { + auto surface = pending.surface; + pending.scheduler->scheduleOnJS( + [surface](jsi::Runtime & /*runtime*/) { surface->presentFrame(); }); + } + + if (stopToCall) { + stopToCall(); + } +} + +} // namespace rnwgpu diff --git a/packages/webgpu/cpp/rnwgpu/FrameDriver.h b/packages/webgpu/cpp/rnwgpu/FrameDriver.h new file mode 100644 index 000000000..c16fedabf --- /dev/null +++ b/packages/webgpu/cpp/rnwgpu/FrameDriver.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include + +#include "SurfaceRegistry.h" +#include "rnwgpu/async/RuntimeScheduler.h" + +namespace rnwgpu { + +/** + * Global vsync-driven auto-present coordinator. Replaces the manual + * `context.present()` call. + * + * Flow: + * - `GPUCanvasContext::getCurrentTexture()` (JS thread) calls + * `requestPresent` for its surface, tagged with the owning runtime's + * RuntimeScheduler. + * - A platform vsync source (iOS CADisplayLink / Android Choreographer) calls + * `onVSync()` on the UI thread once per frame. + * - On each vsync, every surface that requested a present has its present + * dispatched onto its owning runtime's JS thread (so `Surface.Present()` + * and the Apple Metal scheduling wait run on the same thread that did + * getCurrentTexture / submit, preserving Dawn surface thread-affinity and + * present-after-submit ordering via FIFO on that loop). + * + * The vsync source is request-driven: it is started when the first present is + * requested and stopped after a few idle frames, so an idle (non-rendering) app + * costs zero CPU. + */ +class FrameDriver { +public: + static FrameDriver &getInstance(); + + /** + * Register how to start/stop the platform vsync source. `start`/`stop` are + * invoked when presents begin/cease; each implementation is responsible for + * hopping to the UI thread as needed. Called once per platform at init. + */ + void setPlatformVSync(std::function start, + std::function stop); + + /** + * Request that `surface` be presented at the next vsync. Coalesced per + * contextId (at most one present per surface per frame). Thread-safe; called + * from a JS thread inside getCurrentTexture. Surfaces with no on-screen + * `wgpu::Surface` (offscreen) should not be registered. + */ + void requestPresent(int contextId, std::shared_ptr surface, + std::shared_ptr scheduler); + + /** + * Drop any pending present for a surface (e.g. when its view is torn down). + * Thread-safe. + */ + void cancelPresent(int contextId); + + /** Called by the platform vsync source on the UI thread, once per frame. */ + void onVSync(); + +private: + FrameDriver() = default; + + struct Pending { + std::shared_ptr surface; + std::shared_ptr scheduler; + }; + + // Number of consecutive empty frames before the vsync source is stopped. + // A small grace period avoids start/stop thrash during continuous rendering. + static constexpr int kMaxIdleFrames = 3; + + std::mutex _mutex; + std::unordered_map _pending; + std::function _start; + std::function _stop; + bool _running = false; + int _idleFrames = 0; +}; + +} // namespace rnwgpu diff --git a/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h b/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h index 110a45d44..ed098896a 100644 --- a/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h +++ b/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h @@ -7,6 +7,12 @@ #include "webgpu/webgpu_cpp.h" +#ifdef __APPLE__ +namespace dawn::native::metal { +void WaitForCommandsToBeScheduled(WGPUDevice device); +} // namespace dawn::native::metal +#endif + namespace rnwgpu { struct NativeInfo { @@ -113,7 +119,22 @@ class SurfaceInfo { height = newHeight; } - void present() { + // Present the current surface texture. Called at the frame boundary from the + // owning runtime's JS thread (via FrameDriver), replacing the old manual + // present(). No-op when offscreen / unconfigured (no surface). + void presentFrame() { +#ifdef __APPLE__ + // Ensure command buffers are scheduled before presenting. Read the device + // under a shared lock, then wait without holding it (the wait can block). + wgpu::Device device; + { + std::shared_lock lock(_mutex); + device = config.device; + } + if (device) { + dawn::native::metal::WaitForCommandsToBeScheduled(device.Get()); + } +#endif std::unique_lock lock(_mutex); if (surface) { surface.Present(); @@ -131,6 +152,12 @@ class SurfaceInfo { } } + // True when an on-screen wgpu::Surface is attached (vs offscreen texture). + bool hasSurface() { + std::shared_lock lock(_mutex); + return surface != nullptr; + } + NativeInfo getNativeInfo() { std::shared_lock lock(_mutex); return {.nativeSurface = nativeSurface, .width = width, .height = height}; diff --git a/packages/webgpu/cpp/rnwgpu/api/GPU.h b/packages/webgpu/cpp/rnwgpu/api/GPU.h index e7dc15caf..b2488d4c7 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPU.h +++ b/packages/webgpu/cpp/rnwgpu/api/GPU.h @@ -53,6 +53,7 @@ class GPU : public NativeObject { } inline const wgpu::Instance get() { return _instance; } + inline std::shared_ptr getContext() { return _async; } private: wgpu::Instance _instance; diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp index d75eb7b0f..c4390ba6d 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp +++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp @@ -1,17 +1,33 @@ #include "GPUCanvasContext.h" #include "Convertors.h" +#include "FrameDriver.h" #include "RNWebGPUManager.h" #include -#ifdef __APPLE__ -namespace dawn::native::metal { - -void WaitForCommandsToBeScheduled(WGPUDevice device); +namespace rnwgpu { +namespace { +// Runtimes whose present is automatic (no ctx.present() needed): the main JS +// runtime and the Reanimated UI runtime. Both are reached correctly by the +// global vsync FrameDriver dispatching through the main runtime's scheduler. +// Dedicated worklet runtimes (createWorkletRuntime, Vision Camera frame +// processors, …) run on their own thread with no safe scheduler hook, so they +// present explicitly via ctx.present(). +bool isAutoPresentedRuntime(jsi::Runtime &runtime) { + if (async::RuntimeContext::get(runtime) != nullptr) { + return true; // main JS runtime + } + // Worklets tags every runtime with a numeric `__RUNTIME_KIND` + // (worklets::RuntimeKind: ReactNative=1, UI=2, Worker=3). Auto-present only + // the UI runtime; treat Worker / unknown / untagged as needing ctx.present(). + auto kind = runtime.global().getProperty(runtime, "__RUNTIME_KIND"); + if (kind.isNumber()) { + constexpr int kRuntimeKindUI = 2; + return static_cast(kind.asNumber()) == kRuntimeKindUI; + } + return false; } -#endif - -namespace rnwgpu { +} // namespace void GPUCanvasContext::configure( std::shared_ptr configuration) { @@ -39,7 +55,10 @@ void GPUCanvasContext::configure( void GPUCanvasContext::unconfigure() {} -std::shared_ptr GPUCanvasContext::getCurrentTexture() { +jsi::Value GPUCanvasContext::getCurrentTexture(jsi::Runtime &runtime, + const jsi::Value & /*thisValue*/, + const jsi::Value * /*args*/, + size_t /*count*/) { auto prevSize = _surfaceInfo->getConfig(); auto width = _canvas->getWidth(); auto height = _canvas->getHeight(); @@ -47,21 +66,44 @@ std::shared_ptr GPUCanvasContext::getCurrentTexture() { if (sizeHasChanged) { _surfaceInfo->reconfigure(width, height); } + auto texture = _surfaceInfo->getCurrentTexture(); - // Pass reportsMemoryPressure=false to avoid triggering spurious Hermes GC - // cycles every frame since the canvas texture doesn't own the buffer. - return std::make_shared(texture, "", false); -} -void GPUCanvasContext::present() { -#ifdef __APPLE__ - dawn::native::metal::WaitForCommandsToBeScheduled( - _surfaceInfo->getDevice().Get()); -#endif auto size = _surfaceInfo->getSize(); _canvas->setClientWidth(size.width); _canvas->setClientHeight(size.height); - _surfaceInfo->present(); + + // Auto-present on the JS / UI runtime: acquiring the current texture + // schedules a present for this surface at the next vsync (spec-aligned + // "update the rendering" after the frame), dispatched through the main + // runtime's scheduler. Dedicated worklet runtimes instead call ctx.present() + // explicitly on their own thread. Offscreen surfaces have no wgpu::Surface, + // so skip them (their texture is read back directly). + if (_surfaceInfo->hasSurface() && isAutoPresentedRuntime(runtime)) { + FrameDriver::getInstance().requestPresent(_contextId, _surfaceInfo, + _gpu->getContext()->scheduler()); + } + + // Pass reportsMemoryPressure=false to avoid triggering spurious Hermes GC + // cycles every frame since the canvas texture doesn't own the buffer. + auto gpuTexture = std::make_shared(texture, "", false); + return JSIConverter>::toJSI(runtime, gpuTexture); +} + +jsi::Value GPUCanvasContext::present(jsi::Runtime &runtime, + const jsi::Value & /*thisValue*/, + const jsi::Value * /*args*/, + size_t /*count*/) { + // Only meaningful on a dedicated worklet runtime, where present can't be + // automated. On the JS / UI runtime present is automatic, so this is a no-op + // there — which makes it safe to call from a worklet shared between the UI + // runtime and a dedicated runtime. Presents synchronously on the calling + // thread (the one that did getCurrentTexture / submit), preserving Dawn + // surface thread-affinity. + if (!isAutoPresentedRuntime(runtime) && _surfaceInfo->hasSurface()) { + _surfaceInfo->presentFrame(); + } + return jsi::Value::undefined(); } } // namespace rnwgpu diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h index 4b97a7887..a2e80b7cc 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h +++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h @@ -26,7 +26,7 @@ class GPUCanvasContext : public NativeObject { GPUCanvasContext(std::shared_ptr gpu, int contextId, int width, int height) - : NativeObject(CLASS_NAME), _gpu(std::move(gpu)) { + : NativeObject(CLASS_NAME), _contextId(contextId), _gpu(std::move(gpu)) { _canvas = std::make_shared(nullptr, width, height); auto ®istry = rnwgpu::SurfaceRegistry::getInstance(); _surfaceInfo = @@ -54,10 +54,17 @@ class GPUCanvasContext : public NativeObject { inline const wgpu::Surface get() { return nullptr; } void configure(std::shared_ptr configuration); void unconfigure(); - std::shared_ptr getCurrentTexture(); - void present(); + // Full-control signatures so we can learn the *calling* runtime and decide + // how this frame is presented (auto on the JS / UI runtime; explicit + // ctx.present() on a dedicated worklet runtime). + jsi::Value getCurrentTexture(jsi::Runtime &runtime, + const jsi::Value &thisValue, + const jsi::Value *args, size_t count); + jsi::Value present(jsi::Runtime &runtime, const jsi::Value &thisValue, + const jsi::Value *args, size_t count); private: + int _contextId; std::shared_ptr _canvas; std::shared_ptr _surfaceInfo; std::shared_ptr _gpu; diff --git a/packages/webgpu/src/Canvas.tsx b/packages/webgpu/src/Canvas.tsx index 1030f3e38..43c9621e7 100644 --- a/packages/webgpu/src/Canvas.tsx +++ b/packages/webgpu/src/Canvas.tsx @@ -20,6 +20,15 @@ export interface NativeCanvas { } export type RNCanvasContext = GPUCanvasContext & { + /** + * Present the current frame. + * + * Only needed when rendering from a **dedicated worklet runtime** (e.g. + * `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame + * processor), which runs on its own thread. On the main JS runtime and the + * Reanimated UI runtime present is automatic (driven by a global vsync), so + * calling this there is a no-op. Call it after `queue.submit()`. + */ present: () => void; }; diff --git a/packages/webgpu/src/Offscreen.ts b/packages/webgpu/src/Offscreen.ts index c4e460bb2..4deab8a1c 100644 --- a/packages/webgpu/src/Offscreen.ts +++ b/packages/webgpu/src/Offscreen.ts @@ -65,7 +65,7 @@ class GPUOffscreenCanvasContext implements GPUCanvasContext { } present() { - // Do nothing + // Offscreen contexts have nothing to present; readback is via getImageData. } getDevice() { diff --git a/packages/webgpu/src/WebPolyfillGPUModule.ts b/packages/webgpu/src/WebPolyfillGPUModule.ts index 9dcc1f1c5..8b629a0c9 100644 --- a/packages/webgpu/src/WebPolyfillGPUModule.ts +++ b/packages/webgpu/src/WebPolyfillGPUModule.ts @@ -40,9 +40,9 @@ function makeWebGPUCanvasContext( } const context = canvas.getContext("webgpu")!; - return Object.assign(context, { - present: () => {}, - }); + // On web there is no manual present; expose a no-op so RNCanvasContext's + // present() (used on native dedicated worklet runtimes) is callable here too. + return Object.assign(context, { present: () => {} }); } // @ts-expect-error - polyfill for RNWebGPU native module diff --git a/packages/webgpu/src/types.ts b/packages/webgpu/src/types.ts index c03f92b4b..1608a4ff0 100644 --- a/packages/webgpu/src/types.ts +++ b/packages/webgpu/src/types.ts @@ -9,6 +9,15 @@ export interface NativeCanvas { } export type RNCanvasContext = GPUCanvasContext & { + /** + * Present the current frame. + * + * Only needed when rendering from a **dedicated worklet runtime** (e.g. + * `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame + * processor), which runs on its own thread. On the main JS runtime and the + * Reanimated UI runtime present is automatic (driven by a global vsync), so + * calling this there is a no-op. Call it after `queue.submit()`. + */ present: () => void; };