From c32687360e26eec21f1f5ec0d4fc6f454109b9fd Mon Sep 17 00:00:00 2001 From: William Candillon Date: Tue, 2 Jun 2026 16:11:57 +0200 Subject: [PATCH 1/4] :wrench: --- README.md | 10 +--- apps/example/ios/Podfile.lock | 4 +- apps/example/src/CanvasAPI/CanvasAPI.tsx | 2 - apps/example/src/ComputeToys/engine/index.ts | 1 - .../ImportExternalTexture.tsx | 1 - apps/example/src/Reanimated/Reanimated.tsx | 1 - .../SharedTextureMemory.tsx | 1 - .../StorageBufferVertices.tsx | 2 - apps/example/src/ThreeJS/Backdrop.tsx | 1 - apps/example/src/ThreeJS/Cube.tsx | 1 - apps/example/src/ThreeJS/Helmet.tsx | 1 - apps/example/src/ThreeJS/InstancedMesh.tsx | 1 - apps/example/src/ThreeJS/PostProcessing.tsx | 1 - apps/example/src/ThreeJS/Retargeting.tsx | 1 - .../src/ThreeJS/components/FiberCanvas.tsx | 1 - apps/example/src/Triangle/HelloTriangle.tsx | 2 - .../src/Triangle/HelloTriangleMSAA.tsx | 1 - .../example/src/VisionCamera/VisionCamera.tsx | 1 - apps/example/src/components/Texture.tsx | 1 - apps/example/src/components/useWebGPU.ts | 1 - docs/refactor-async-present-plan.md | 46 +++++++++++++++- packages/webgpu/README.md | 10 +--- packages/webgpu/android/CMakeLists.txt | 1 + packages/webgpu/android/cpp/cpp-adapter.cpp | 54 +++++++++++++++++++ packages/webgpu/apple/MetalView.mm | 4 ++ packages/webgpu/apple/WebGPUModule.mm | 7 +++ packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h | 29 +++++++++- packages/webgpu/cpp/rnwgpu/api/GPU.h | 1 + .../cpp/rnwgpu/api/GPUCanvasContext.cpp | 35 ++++++------ .../webgpu/cpp/rnwgpu/api/GPUCanvasContext.h | 5 +- packages/webgpu/src/Canvas.tsx | 6 +-- packages/webgpu/src/Offscreen.ts | 4 -- packages/webgpu/src/WebPolyfillGPUModule.ts | 5 +- packages/webgpu/src/types.ts | 6 +-- 34 files changed, 172 insertions(+), 76 deletions(-) diff --git a/README.md b/README.md index 8eeb1cba1..d7415053b 100644 --- a/README.md +++ b/README.md @@ -128,8 +128,6 @@ export function HelloTriangle() { passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - - context.present(); }; helloTriangle(); }, [ref]); @@ -174,15 +172,13 @@ ctx.canvas.height = ctx.canvas.clientHeight * PixelRatio.get(); ### Frame Scheduling -In React Native, we want to keep frame presentation as a manual operation as we plan to provide more advanced rendering options that are React Native specific. -This means that when you are ready to present a frame, you need to call `present` on the context. +Frame presentation is automatic. Once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call. ```tsx // draw // submit to the queue device.queue.submit([commandEncoder.finish()]); -// This method is React Native only -context.present(); +// The frame is presented automatically on the next vsync. ``` ### Canvas Transparency @@ -296,7 +292,6 @@ const render = () => { // Release the surface's access window right after the submit that sampled it. externalTexture.destroy(); - context.present(); }; ``` @@ -328,7 +323,6 @@ const renderFrame = (device: GPUDevice, context: GPUCanvasContext) => { const commandEncoder = device.createCommandEncoder(); // ... render ... device.queue.submit([commandEncoder.finish()]); - context.present(); }; // Initialize WebGPU on main thread, then run on UI thread diff --git a/apps/example/ios/Podfile.lock b/apps/example/ios/Podfile.lock index fd5ba968c..b4c5f158a 100644 --- a/apps/example/ios/Podfile.lock +++ b/apps/example/ios/Podfile.lock @@ -1924,7 +1924,7 @@ PODS: - ReactCommon/turbomodule/core - SocketRocket - Yoga - - react-native-wgpu (0.5.12): + - react-native-wgpu (0.5.13): - boost - DoubleConversion - fast_float @@ -3074,7 +3074,7 @@ SPEC CHECKSUMS: React-microtasksnativemodule: 75b6604b667d297292345302cc5bfb6b6aeccc1b react-native-safe-area-context: c00143b4823773bba23f2f19f85663ae89ceb460 react-native-skia: fc73e9bdc46ebb420a98c9c2be29fee80f565e79 - react-native-wgpu: 274ffec11ee3a082260d9f3d1fb54030a5ca0873 + react-native-wgpu: 0496e9efeb4c3939ab56371005ede4e1468591d1 React-NativeModulesApple: 879fbdc5dcff7136abceb7880fe8a2022a1bd7c3 React-oscompat: 93b5535ea7f7dff46aaee4f78309a70979bdde9d React-perflogger: 5536d2df3d18fe0920263466f7b46a56351c0510 diff --git a/apps/example/src/CanvasAPI/CanvasAPI.tsx b/apps/example/src/CanvasAPI/CanvasAPI.tsx index a9f5c4928..a403c8388 100644 --- a/apps/example/src/CanvasAPI/CanvasAPI.tsx +++ b/apps/example/src/CanvasAPI/CanvasAPI.tsx @@ -89,8 +89,6 @@ export const CanvasAPI = () => { passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - - context.present(); })() } title="check surface" diff --git a/apps/example/src/ComputeToys/engine/index.ts b/apps/example/src/ComputeToys/engine/index.ts index f0fa08f07..8db2562ad 100644 --- a/apps/example/src/ComputeToys/engine/index.ts +++ b/apps/example/src/ComputeToys/engine/index.ts @@ -398,7 +398,6 @@ fn passSampleLevelBilinearRepeat(pass_index: int, uv: float2, lod: float) -> flo // Submit command buffer this.device.queue.submit([encoder.finish()]); - this.surface!.present(); // Update frame counter this.bindings!.time.host.frame += 1; diff --git a/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx b/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx index f8399ee8a..7c973e03f 100644 --- a/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx +++ b/apps/example/src/ImportExternalTexture/ImportExternalTexture.tsx @@ -247,7 +247,6 @@ export const ImportExternalTexture = () => { // Now that the work sampling it has been submitted, end the external // texture's access window so the frame's surface is released promptly. externalTex?.destroy(); - context.present(); rafRef.current = requestAnimationFrame(render); }; rafRef.current = requestAnimationFrame(render); diff --git a/apps/example/src/Reanimated/Reanimated.tsx b/apps/example/src/Reanimated/Reanimated.tsx index 505296565..2f8b5e5cb 100644 --- a/apps/example/src/Reanimated/Reanimated.tsx +++ b/apps/example/src/Reanimated/Reanimated.tsx @@ -79,7 +79,6 @@ export const webGPUDemo = ( device.queue.submit([commandEncoder.finish()]); - context.present(); if (runAnimation.value) { requestAnimationFrame(frame); } diff --git a/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx b/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx index b5627cc43..197657460 100644 --- a/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx +++ b/apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx @@ -268,7 +268,6 @@ export const SharedTextureMemory = () => { } pass.end(); device.queue.submit([encoder.finish()]); - context.present(); rafRef.current = requestAnimationFrame(render); }; rafRef.current = requestAnimationFrame(render); diff --git a/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx b/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx index 907264638..b1906cf74 100644 --- a/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx +++ b/apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx @@ -185,8 +185,6 @@ export function StorageBufferVertices() { const commandBuffer = encoder.finish(); device.queue.submit([commandBuffer]); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (context as any).present(); }); return ( diff --git a/apps/example/src/ThreeJS/Backdrop.tsx b/apps/example/src/ThreeJS/Backdrop.tsx index 8ed2a8c91..113325b9d 100644 --- a/apps/example/src/ThreeJS/Backdrop.tsx +++ b/apps/example/src/ThreeJS/Backdrop.tsx @@ -150,7 +150,6 @@ export const Backdrop = () => { } renderer.render(scene, camera); - context!.present(); } return () => { renderer.setAnimationLoop(null); diff --git a/apps/example/src/ThreeJS/Cube.tsx b/apps/example/src/ThreeJS/Cube.tsx index d3e9707b5..ea3fe0f23 100644 --- a/apps/example/src/ThreeJS/Cube.tsx +++ b/apps/example/src/ThreeJS/Cube.tsx @@ -31,7 +31,6 @@ export const Cube = () => { mesh.rotation.y = time / 1000; renderer.render(scene, camera); - context.present(); } renderer.setAnimationLoop(animate); return () => { diff --git a/apps/example/src/ThreeJS/Helmet.tsx b/apps/example/src/ThreeJS/Helmet.tsx index be7cb626f..70720d360 100644 --- a/apps/example/src/ThreeJS/Helmet.tsx +++ b/apps/example/src/ThreeJS/Helmet.tsx @@ -49,7 +49,6 @@ export const Helmet = () => { function animate() { animateCamera(); renderer.render(scene, camera); - context!.present(); } return () => { diff --git a/apps/example/src/ThreeJS/InstancedMesh.tsx b/apps/example/src/ThreeJS/InstancedMesh.tsx index 3f60631de..5b7c7ca4d 100644 --- a/apps/example/src/ThreeJS/InstancedMesh.tsx +++ b/apps/example/src/ThreeJS/InstancedMesh.tsx @@ -59,7 +59,6 @@ export const InstancedMesh = () => { function animate() { render(); - context!.present(); } function render() { diff --git a/apps/example/src/ThreeJS/PostProcessing.tsx b/apps/example/src/ThreeJS/PostProcessing.tsx index d94ef1728..0c2980501 100644 --- a/apps/example/src/ThreeJS/PostProcessing.tsx +++ b/apps/example/src/ThreeJS/PostProcessing.tsx @@ -72,7 +72,6 @@ export const PostProcessing = () => { mixer.update(delta); } postProcessing.render(); - context!.present(); } return () => { renderer.setAnimationLoop(null); diff --git a/apps/example/src/ThreeJS/Retargeting.tsx b/apps/example/src/ThreeJS/Retargeting.tsx index c25601885..8b8dd9a29 100644 --- a/apps/example/src/ThreeJS/Retargeting.tsx +++ b/apps/example/src/ThreeJS/Retargeting.tsx @@ -302,7 +302,6 @@ export const Retargeting = () => { source.mixer.update(delta); mixer.update(delta); renderer.render(scene, camera); - context.present(); }); return () => { diff --git a/apps/example/src/ThreeJS/components/FiberCanvas.tsx b/apps/example/src/ThreeJS/components/FiberCanvas.tsx index 91b699553..92b928987 100644 --- a/apps/example/src/ThreeJS/components/FiberCanvas.tsx +++ b/apps/example/src/ThreeJS/components/FiberCanvas.tsx @@ -66,7 +66,6 @@ export const FiberCanvas = ({ const renderFrame = state.gl.render.bind(state.gl); state.gl.render = (s: THREE.Scene, c: THREE.Camera) => { renderFrame(s, c); - context?.present(); }; }, }); diff --git a/apps/example/src/Triangle/HelloTriangle.tsx b/apps/example/src/Triangle/HelloTriangle.tsx index 3e28d6c12..caeb560b3 100644 --- a/apps/example/src/Triangle/HelloTriangle.tsx +++ b/apps/example/src/Triangle/HelloTriangle.tsx @@ -77,8 +77,6 @@ export function HelloTriangle() { passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - - context.present(); })(); }, [ref]); diff --git a/apps/example/src/Triangle/HelloTriangleMSAA.tsx b/apps/example/src/Triangle/HelloTriangleMSAA.tsx index 5d66983d5..b9518fbe9 100644 --- a/apps/example/src/Triangle/HelloTriangleMSAA.tsx +++ b/apps/example/src/Triangle/HelloTriangleMSAA.tsx @@ -87,7 +87,6 @@ export function HelloTriangleMSAA() { } frame(); - context.present(); })(); }, [ref]); diff --git a/apps/example/src/VisionCamera/VisionCamera.tsx b/apps/example/src/VisionCamera/VisionCamera.tsx index c4adcfaa0..cba2d2948 100644 --- a/apps/example/src/VisionCamera/VisionCamera.tsx +++ b/apps/example/src/VisionCamera/VisionCamera.tsx @@ -617,7 +617,6 @@ const CameraView = () => { // access window now to release the camera frame's surface promptly // (don't wait for GC, which would starve the frame buffer pool). externalTex.destroy(); - context.present(); } finally { videoFrame.release(); } diff --git a/apps/example/src/components/Texture.tsx b/apps/example/src/components/Texture.tsx index d9e689b41..5bd82a911 100644 --- a/apps/example/src/components/Texture.tsx +++ b/apps/example/src/components/Texture.tsx @@ -145,7 +145,6 @@ export const Texture = ({ texture, style, device }: GPUTextureProps) => { renderPass.end(); device.queue.submit([commandEncoder.finish()]); - context.present(); }, [device, state, texture, ref]); return ; }; diff --git a/apps/example/src/components/useWebGPU.ts b/apps/example/src/components/useWebGPU.ts index ac8a631ac..1a399aafe 100644 --- a/apps/example/src/components/useWebGPU.ts +++ b/apps/example/src/components/useWebGPU.ts @@ -57,7 +57,6 @@ export const useWebGPU = (scene: Scene) => { const render = () => { const timestamp = Date.now(); renderScene(timestamp); - context.present(); animationFrameId.current = requestAnimationFrame(render); }; diff --git a/docs/refactor-async-present-plan.md b/docs/refactor-async-present-plan.md index e69706534..e4d38b802 100644 --- a/docs/refactor-async-present-plan.md +++ b/docs/refactor-async-present-plan.md @@ -244,7 +244,7 @@ the full `react-native-wgpu` native lib **compiles and links** for `arm64-v8a` ( `cpplint` clean (project filters); `clang-format` (pinned 15.0.0) applied; `yarn tsc` passes (no TS changed). On-device runtime behaviour (frame pacing, zero idle CPU) is Phase 4. -**Phase 2 — Auto-present + remove `present()`** +**Phase 2 — Auto-present + remove `present()`** — **DONE** - Add `FrameDriver` (iOS `CADisplayLink`, Android `AChoreographer`); wire `getCurrentTexture` → register; vsync → dispatch present to owning runtime. - Remove `GPUCanvasContext::present` (`api/GPUCanvasContext.h:50,58`, `.cpp:56-65`) and @@ -252,6 +252,50 @@ the full `react-native-wgpu` native lib **compiles and links** for `arm64-v8a` ( - JS: drop `present` from `RNCanvasContext` (`src/Canvas.tsx:22-24`, `src/types.ts`). - Migrate all 16 example / `useWebGPU` call sites + `README.md` + `packages/webgpu/README.md`. +### Phase 2 — what shipped (branch `claude/keen-darwin-xeywa`) +New files: +- `cpp/rnwgpu/FrameDriver.{h,cpp}` — global vsync auto-present coordinator. `requestPresent` + (from `getCurrentTexture`, JS thread) coalesces per `contextId`; `onVSync` (UI thread) + dispatches each pending surface's present onto its owning runtime's `RuntimeScheduler` + (`surface->presentFrame()`). Request-driven: starts the platform vsync on first request, + stops after `kMaxIdleFrames` (3) idle frames → zero idle CPU. +- `apple/WebGPUFrameDriver.{h,mm}` — iOS/tvOS `CADisplayLink` on the main run loop (paused + toggled by start/stop). macOS uses `NSScreen.displayLinkWithTarget:` on 14+, else an + `NSTimer` fallback. Selector → `FrameDriver::onVSync()`. +- `android/.../com/webgpu/WebGPUFrameDriver.java` — main-thread `Choreographer` driver; + `doFrame` → static `nativeOnVSync()` JNI → `FrameDriver::onVSync()`, reposts while running. + +Wiring: +- `SurfaceInfo::present()` → `presentFrame()` (Apple `WaitForCommandsToBeScheduled` + Present, + no-op offscreen); added `SurfaceInfo::hasSurface()`. Metal extern moved to `SurfaceRegistry.h`. +- `GPU::getContext()` re-exposes the per-runtime `RuntimeContext` (so the canvas can reach its + scheduler). `GPUCanvasContext` stores `_contextId`, registers the present in + `getCurrentTexture` (and now sets the canvas client size there), and dropped `present()` + + its JS binding. +- iOS `WebGPUModule install` and Android `initializeNative` register `setPlatformVSync`. View + teardown (`MetalView dealloc`, Android `onSurfaceDestroy`) calls `FrameDriver::cancelPresent`. +- JS: `RNCanvasContext` is now just `GPUCanvasContext` (`src/Canvas.tsx`, `src/types.ts`); + removed the no-op `present` from `Offscreen.ts` and `WebPolyfillGPUModule.ts`. 18 example + call sites (the plan's 16 + `VisionCamera`, `ImportExternalTexture`) and both READMEs migrated. + +Decisions / deviations: +1. **Android vsync = Java `Choreographer` + JNI** (not pure NDK `AChoreographer`), chosen for + robustness — pure NDK needs a JNI hop to a Looper thread to bootstrap anyway. Confirmed with + the user. +2. **`present()` hard-removed** (breaking), confirmed with the user. +3. **Owning-runtime caveat (→ Phase 3):** `getCurrentTexture` currently dispatches present via + the **main** runtime's scheduler (`_gpu->getContext()`). Correct for main-JS rendering. The + Reanimated example renders on the **UI (worklet) runtime**, so its present is migrated (call + removed) but auto-present won't target the correct thread until Phase 3 tags the present with + the *calling* runtime and gives worklet runtimes their own `RuntimeScheduler`. Expect the + Reanimated/Dedicated examples to be visually broken between Phase 2 and Phase 3. + +Validation (local): `react-native-wgpu` native lib **compiles and links** for `arm64-v8a` +(ninja, CMake picked up `FrameDriver.cpp`); `cpplint` clean; `clang-format` applied; `yarn tsc` +and `yarn lint` pass for both `packages/webgpu` and `apps/example`. iOS `.mm` and the Java +driver are not compiled locally (no iOS/gradle build run here) — review-only; needs a device +build. On-device frame pacing / zero-idle-CPU verification is Phase 4. + **Phase 3 — First-class worklet runtimes** - Worklet-runtime `RuntimeScheduler` impl (per Spike 1); verify auto-present dispatch on UI + dedicated runtimes; update `apps/example/src/Reanimated/Reanimated.tsx` (drop `present()`, diff --git a/packages/webgpu/README.md b/packages/webgpu/README.md index 8eeb1cba1..d7415053b 100644 --- a/packages/webgpu/README.md +++ b/packages/webgpu/README.md @@ -128,8 +128,6 @@ export function HelloTriangle() { passEncoder.end(); device.queue.submit([commandEncoder.finish()]); - - context.present(); }; helloTriangle(); }, [ref]); @@ -174,15 +172,13 @@ ctx.canvas.height = ctx.canvas.clientHeight * PixelRatio.get(); ### Frame Scheduling -In React Native, we want to keep frame presentation as a manual operation as we plan to provide more advanced rendering options that are React Native specific. -This means that when you are ready to present a frame, you need to call `present` on the context. +Frame presentation is automatic. Once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call. ```tsx // draw // submit to the queue device.queue.submit([commandEncoder.finish()]); -// This method is React Native only -context.present(); +// The frame is presented automatically on the next vsync. ``` ### Canvas Transparency @@ -296,7 +292,6 @@ const render = () => { // Release the surface's access window right after the submit that sampled it. externalTexture.destroy(); - context.present(); }; ``` @@ -328,7 +323,6 @@ const renderFrame = (device: GPUDevice, context: GPUCanvasContext) => { const commandEncoder = device.createCommandEncoder(); // ... render ... device.queue.submit([commandEncoder.finish()]); - context.present(); }; // Initialize WebGPU on main thread, then run on UI thread diff --git a/packages/webgpu/android/CMakeLists.txt b/packages/webgpu/android/CMakeLists.txt index 50756e72e..51005acdc 100644 --- a/packages/webgpu/android/CMakeLists.txt +++ b/packages/webgpu/android/CMakeLists.txt @@ -47,6 +47,7 @@ add_library(${PACKAGE_NAME} SHARED ../cpp/rnwgpu/api/GPUComputePipeline.cpp ../cpp/rnwgpu/api/GPUCanvasContext.cpp ../cpp/rnwgpu/RNWebGPUManager.cpp + ../cpp/rnwgpu/FrameDriver.cpp ../cpp/jsi/Promise.cpp ../cpp/jsi/RuntimeLifecycleMonitor.cpp ../cpp/jsi/RuntimeAwareCache.cpp diff --git a/packages/webgpu/android/cpp/cpp-adapter.cpp b/packages/webgpu/android/cpp/cpp-adapter.cpp index 2a441c218..4f0ba61d3 100644 --- a/packages/webgpu/android/cpp/cpp-adapter.cpp +++ b/packages/webgpu/android/cpp/cpp-adapter.cpp @@ -10,6 +10,7 @@ #include #include "AndroidPlatformContext.h" +#include "FrameDriver.h" #include "GPUCanvasContext.h" #include "RNWebGPUManager.h" @@ -17,6 +18,37 @@ std::shared_ptr manager; +// JNI handles for driving the vsync source (com.webgpu.WebGPUFrameDriver), +// cached on the JNI thread in initializeNative (which has the app classloader). +static JavaVM *gJavaVM = nullptr; +static jclass gFrameDriverClass = nullptr; +static jmethodID gFrameDriverStart = nullptr; +static jmethodID gFrameDriverStop = nullptr; + +static void callFrameDriver(jmethodID method) { + if (gJavaVM == nullptr || gFrameDriverClass == nullptr || method == nullptr) { + return; + } + JNIEnv *env = nullptr; + bool attached = false; + jint res = gJavaVM->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_6); + if (res == JNI_EDETACHED) { + if (gJavaVM->AttachCurrentThread(&env, nullptr) != JNI_OK) { + return; + } + attached = true; + } else if (res != JNI_OK) { + return; + } + env->CallStaticVoidMethod(gFrameDriverClass, method); + if (env->ExceptionCheck()) { + env->ExceptionClear(); + } + if (attached) { + gJavaVM->DetachCurrentThread(); + } +} + extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUModule_initializeNative( JNIEnv *env, jobject /* this */, jlong jsRuntime, jobject jsCallInvokerHolder, jobject blobModule) { @@ -31,6 +63,27 @@ extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUModule_initializeNative( std::make_shared(globalBlobModule); manager = std::make_shared(runtime, jsCallInvoker, platformContext); + + // Cache JNI handles for the Choreographer-based vsync source and register it + // with the FrameDriver to drive auto-present (replaces context.present()). + env->GetJavaVM(&gJavaVM); + jclass localCls = env->FindClass("com/webgpu/WebGPUFrameDriver"); + if (localCls != nullptr) { + gFrameDriverClass = reinterpret_cast(env->NewGlobalRef(localCls)); + gFrameDriverStart = + env->GetStaticMethodID(gFrameDriverClass, "start", "()V"); + gFrameDriverStop = env->GetStaticMethodID(gFrameDriverClass, "stop", "()V"); + env->DeleteLocalRef(localCls); + } + rnwgpu::FrameDriver::getInstance().setPlatformVSync( + [] { callFrameDriver(gFrameDriverStart); }, + [] { callFrameDriver(gFrameDriverStop); }); +} + +extern "C" JNIEXPORT void JNICALL +Java_com_webgpu_WebGPUFrameDriver_nativeOnVSync(JNIEnv * /*env*/, + jclass /*clazz*/) { + rnwgpu::FrameDriver::getInstance().onVSync(); } extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUView_onSurfaceChanged( @@ -66,6 +119,7 @@ Java_com_webgpu_WebGPUView_switchToOffscreenSurface(JNIEnv *env, jobject thiz, extern "C" JNIEXPORT void JNICALL Java_com_webgpu_WebGPUView_onSurfaceDestroy( JNIEnv *env, jobject thiz, jint contextId) { + rnwgpu::FrameDriver::getInstance().cancelPresent(contextId); auto ®istry = rnwgpu::SurfaceRegistry::getInstance(); registry.removeSurfaceInfo(contextId); } \ No newline at end of file diff --git a/packages/webgpu/apple/MetalView.mm b/packages/webgpu/apple/MetalView.mm index ccff1245c..e617da889 100644 --- a/packages/webgpu/apple/MetalView.mm +++ b/packages/webgpu/apple/MetalView.mm @@ -1,6 +1,8 @@ #import "MetalView.h" #import "webgpu/webgpu_cpp.h" +#include "FrameDriver.h" + @implementation MetalView { BOOL _isConfigured; } @@ -42,6 +44,8 @@ - (void)update { } - (void)dealloc { + // Stop any pending auto-present for this surface before it goes away. + rnwgpu::FrameDriver::getInstance().cancelPresent([_contextId intValue]); auto ®istry = rnwgpu::SurfaceRegistry::getInstance(); // Remove the surface info from the registry registry.removeSurfaceInfo([_contextId intValue]); diff --git a/packages/webgpu/apple/WebGPUModule.mm b/packages/webgpu/apple/WebGPUModule.mm index 99580aa14..c4c7224ad 100644 --- a/packages/webgpu/apple/WebGPUModule.mm +++ b/packages/webgpu/apple/WebGPUModule.mm @@ -1,6 +1,8 @@ #import "WebGPUModule.h" #include "ApplePlatformContext.h" +#include "FrameDriver.h" #import "GPUCanvasContext.h" +#import "WebGPUFrameDriver.h" #import #import @@ -78,6 +80,11 @@ - (void)invalidate { std::make_shared(); webgpuManager = std::make_shared(runtime, jsInvoker, platformContext); + + // Drive auto-present from the display's vsync (replaces context.present()). + rnwgpu::FrameDriver::getInstance().setPlatformVSync( + [] { [WebGPUFrameDriver start]; }, [] { [WebGPUFrameDriver stop]; }); + return @true; } diff --git a/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h b/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h index 110a45d44..ed098896a 100644 --- a/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h +++ b/packages/webgpu/cpp/rnwgpu/SurfaceRegistry.h @@ -7,6 +7,12 @@ #include "webgpu/webgpu_cpp.h" +#ifdef __APPLE__ +namespace dawn::native::metal { +void WaitForCommandsToBeScheduled(WGPUDevice device); +} // namespace dawn::native::metal +#endif + namespace rnwgpu { struct NativeInfo { @@ -113,7 +119,22 @@ class SurfaceInfo { height = newHeight; } - void present() { + // Present the current surface texture. Called at the frame boundary from the + // owning runtime's JS thread (via FrameDriver), replacing the old manual + // present(). No-op when offscreen / unconfigured (no surface). + void presentFrame() { +#ifdef __APPLE__ + // Ensure command buffers are scheduled before presenting. Read the device + // under a shared lock, then wait without holding it (the wait can block). + wgpu::Device device; + { + std::shared_lock lock(_mutex); + device = config.device; + } + if (device) { + dawn::native::metal::WaitForCommandsToBeScheduled(device.Get()); + } +#endif std::unique_lock lock(_mutex); if (surface) { surface.Present(); @@ -131,6 +152,12 @@ class SurfaceInfo { } } + // True when an on-screen wgpu::Surface is attached (vs offscreen texture). + bool hasSurface() { + std::shared_lock lock(_mutex); + return surface != nullptr; + } + NativeInfo getNativeInfo() { std::shared_lock lock(_mutex); return {.nativeSurface = nativeSurface, .width = width, .height = height}; diff --git a/packages/webgpu/cpp/rnwgpu/api/GPU.h b/packages/webgpu/cpp/rnwgpu/api/GPU.h index e7dc15caf..b2488d4c7 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPU.h +++ b/packages/webgpu/cpp/rnwgpu/api/GPU.h @@ -53,6 +53,7 @@ class GPU : public NativeObject { } inline const wgpu::Instance get() { return _instance; } + inline std::shared_ptr getContext() { return _async; } private: wgpu::Instance _instance; diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp index d75eb7b0f..7a2c32886 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp +++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp @@ -1,16 +1,9 @@ #include "GPUCanvasContext.h" #include "Convertors.h" +#include "FrameDriver.h" #include "RNWebGPUManager.h" #include -#ifdef __APPLE__ -namespace dawn::native::metal { - -void WaitForCommandsToBeScheduled(WGPUDevice device); - -} -#endif - namespace rnwgpu { void GPUCanvasContext::configure( @@ -48,20 +41,26 @@ std::shared_ptr GPUCanvasContext::getCurrentTexture() { _surfaceInfo->reconfigure(width, height); } auto texture = _surfaceInfo->getCurrentTexture(); - // Pass reportsMemoryPressure=false to avoid triggering spurious Hermes GC - // cycles every frame since the canvas texture doesn't own the buffer. - return std::make_shared(texture, "", false); -} -void GPUCanvasContext::present() { -#ifdef __APPLE__ - dawn::native::metal::WaitForCommandsToBeScheduled( - _surfaceInfo->getDevice().Get()); -#endif + // Auto-present: acquiring the current texture schedules a present for this + // surface at the next vsync (spec-aligned "update the rendering" after the + // frame). Replaces the old explicit context.present(). Offscreen surfaces + // have no wgpu::Surface, so skip them (their texture is read back directly). auto size = _surfaceInfo->getSize(); _canvas->setClientWidth(size.width); _canvas->setClientHeight(size.height); - _surfaceInfo->present(); + if (_surfaceInfo->hasSurface()) { + // Phase 2: dispatch the present on the main runtime (the only runtime that + // owns WebGPU rendering today). Phase 3 will tag this with the *calling* + // runtime so worklet-runtime rendering (e.g. the Reanimated example) + // presents on its own JS thread, preserving Dawn surface thread-affinity. + FrameDriver::getInstance().requestPresent(_contextId, _surfaceInfo, + _gpu->getContext()->scheduler()); + } + + // Pass reportsMemoryPressure=false to avoid triggering spurious Hermes GC + // cycles every frame since the canvas texture doesn't own the buffer. + return std::make_shared(texture, "", false); } } // namespace rnwgpu diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h index 4b97a7887..2ab5d69c2 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h +++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h @@ -26,7 +26,7 @@ class GPUCanvasContext : public NativeObject { GPUCanvasContext(std::shared_ptr gpu, int contextId, int width, int height) - : NativeObject(CLASS_NAME), _gpu(std::move(gpu)) { + : NativeObject(CLASS_NAME), _contextId(contextId), _gpu(std::move(gpu)) { _canvas = std::make_shared(nullptr, width, height); auto ®istry = rnwgpu::SurfaceRegistry::getInstance(); _surfaceInfo = @@ -47,7 +47,6 @@ class GPUCanvasContext : public NativeObject { &GPUCanvasContext::unconfigure); installMethod(runtime, prototype, "getCurrentTexture", &GPUCanvasContext::getCurrentTexture); - installMethod(runtime, prototype, "present", &GPUCanvasContext::present); } // TODO: is this ok? @@ -55,9 +54,9 @@ class GPUCanvasContext : public NativeObject { void configure(std::shared_ptr configuration); void unconfigure(); std::shared_ptr getCurrentTexture(); - void present(); private: + int _contextId; std::shared_ptr _canvas; std::shared_ptr _surfaceInfo; std::shared_ptr _gpu; diff --git a/packages/webgpu/src/Canvas.tsx b/packages/webgpu/src/Canvas.tsx index 1030f3e38..7c2a47a6e 100644 --- a/packages/webgpu/src/Canvas.tsx +++ b/packages/webgpu/src/Canvas.tsx @@ -19,9 +19,9 @@ export interface NativeCanvas { clientHeight: number; } -export type RNCanvasContext = GPUCanvasContext & { - present: () => void; -}; +// Auto-present (a global vsync FrameDriver) replaces the old manual present(); +// the native context is now just a spec GPUCanvasContext. +export type RNCanvasContext = GPUCanvasContext; export interface CanvasRef { getContextId: () => number; diff --git a/packages/webgpu/src/Offscreen.ts b/packages/webgpu/src/Offscreen.ts index c4e460bb2..6ce2f589c 100644 --- a/packages/webgpu/src/Offscreen.ts +++ b/packages/webgpu/src/Offscreen.ts @@ -64,10 +64,6 @@ class GPUOffscreenCanvasContext implements GPUCanvasContext { throw new Error("Method not implemented."); } - present() { - // Do nothing - } - getDevice() { if (!this.device) { throw new Error("Device is not configured."); diff --git a/packages/webgpu/src/WebPolyfillGPUModule.ts b/packages/webgpu/src/WebPolyfillGPUModule.ts index 9dcc1f1c5..04229cd05 100644 --- a/packages/webgpu/src/WebPolyfillGPUModule.ts +++ b/packages/webgpu/src/WebPolyfillGPUModule.ts @@ -39,10 +39,7 @@ function makeWebGPUCanvasContext( canvas.setAttribute("height", pixelHeight); } - const context = canvas.getContext("webgpu")!; - return Object.assign(context, { - present: () => {}, - }); + return canvas.getContext("webgpu")!; } // @ts-expect-error - polyfill for RNWebGPU native module diff --git a/packages/webgpu/src/types.ts b/packages/webgpu/src/types.ts index c03f92b4b..0758c73f4 100644 --- a/packages/webgpu/src/types.ts +++ b/packages/webgpu/src/types.ts @@ -8,9 +8,9 @@ export interface NativeCanvas { clientHeight: number; } -export type RNCanvasContext = GPUCanvasContext & { - present: () => void; -}; +// Auto-present (a global vsync FrameDriver) replaces the old manual present(); +// the native context is now just a spec GPUCanvasContext. +export type RNCanvasContext = GPUCanvasContext; export interface CanvasRef { getContextId: () => number; From f5bc1c20b2287ff71c1290e27ada6ed0dc5e4e8b Mon Sep 17 00:00:00 2001 From: William Candillon Date: Tue, 2 Jun 2026 17:05:24 +0200 Subject: [PATCH 2/4] :wrench: --- docs/refactor-async-present-plan.md | 74 +++++++++++++++- .../java/com/webgpu/WebGPUFrameDriver.java | 66 ++++++++++++++ packages/webgpu/apple/WebGPUFrameDriver.h | 13 +++ packages/webgpu/apple/WebGPUFrameDriver.mm | 88 +++++++++++++++++++ packages/webgpu/cpp/rnwgpu/FrameDriver.cpp | 81 +++++++++++++++++ packages/webgpu/cpp/rnwgpu/FrameDriver.h | 83 +++++++++++++++++ .../cpp/rnwgpu/api/GPUCanvasContext.cpp | 51 ++++++++--- .../webgpu/cpp/rnwgpu/api/GPUCanvasContext.h | 10 ++- 8 files changed, 450 insertions(+), 16 deletions(-) create mode 100644 packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java create mode 100644 packages/webgpu/apple/WebGPUFrameDriver.h create mode 100644 packages/webgpu/apple/WebGPUFrameDriver.mm create mode 100644 packages/webgpu/cpp/rnwgpu/FrameDriver.cpp create mode 100644 packages/webgpu/cpp/rnwgpu/FrameDriver.h diff --git a/docs/refactor-async-present-plan.md b/docs/refactor-async-present-plan.md index e4d38b802..65490af29 100644 --- a/docs/refactor-async-present-plan.md +++ b/docs/refactor-async-present-plan.md @@ -1,6 +1,6 @@ # Refactor: event-driven async + auto-present -Status: **Phase 0 complete — all spikes GREEN, ready for Phase 1** +Status: **Phases 1–3 complete (local build/lint green). Phase 4 (SurfaceRegistry rework) proposed; Phase 5 = on-device validation.** Branch: `claude/keen-darwin-xeywa` This document is the handoff for moving the async + present refactor forward. Phase 0 @@ -296,12 +296,80 @@ and `yarn lint` pass for both `packages/webgpu` and `apps/example`. iOS `.mm` an driver are not compiled locally (no iOS/gradle build run here) — review-only; needs a device build. On-device frame pacing / zero-idle-CPU verification is Phase 4. -**Phase 3 — First-class worklet runtimes** +**Phase 3 — First-class worklet runtimes** — **DONE** - Worklet-runtime `RuntimeScheduler` impl (per Spike 1); verify auto-present dispatch on UI + dedicated runtimes; update `apps/example/src/Reanimated/Reanimated.tsx` (drop `present()`, keep its own rAF loop). -**Phase 4 — Validation** +### Phase 3 — what shipped (branch `claude/keen-darwin-xeywa`) +Observed after Phase 2: the **UI-runtime** Reanimated example worked (the Reanimated UI runtime +executes on the **main thread**, so dispatching its present to the main runtime's scheduler +happened to land on the right thread), but the **dedicated `createWorkletRuntime`** example +(`Reanimated/DedicatedThread.tsx`, `runOnRuntime`) crashed — its render thread is its own, so a +main-thread present violated Dawn surface thread-affinity. + +**Decision (confirmed with the user): self-scheduled present, no native worklets dependency.** +Rather than link `react-native-worklets` natively and have the FrameDriver dispatch via +`WorkletRuntime::schedule` (the original plan / Spike 1 primary), worklet runtimes now schedule +their own present on their own event loop. This avoids a new native build dependency entirely +and is fully buildable/validatable locally (it is Spike 1's documented "JS-scheduling" +contingency). + +Implementation (native only; no JS/build-system changes): +- `GPUCanvasContext::getCurrentTexture` switched to the full-control HostFunction signature + (`jsi::Value(rt, thisVal, args, count)`, same pattern as `RNWebGPU::createImageBitmap`) so it + learns the **calling** runtime. New `schedulePresent(runtime)`: + - **Main runtime** (`RuntimeContext::get(runtime)` is non-null): unchanged — register with the + global vsync `FrameDriver` using that runtime's scheduler. + - **Any worklet runtime** (no `RuntimeContext` — Reanimated UI/dedicated, Vision Camera frame + processors, …): **present-on-next-acquire**. `getCurrentTexture` presents the *previous* + frame synchronously (inline, on the calling thread) just before acquiring the next texture; + by then the previous frame's submit has happened, and present runs on the same thread that + rendered it. This is the natural swapchain boundary and needs no scheduler. + + Why not schedule onto the runtime's own loop: two earlier attempts failed. (1) + `queueMicrotask` is **disabled** on worklet runtimes (throws "microtasks are disabled in this + runtime"). (2) `setImmediate`/`setTimeout` exist but route through the runtime's `EventLoop` + `AsyncQueue`, which for **Vision Camera** is a custom `NativeThreadAsyncQueue` that hops back + through JNI (`fbjni Environment::current()`) and **crashes** when pushed from a + non-JVM-attached thread. Present-on-next-acquire avoids the runtime's task queue entirely. + Trade-off: one frame of latency, and a worklet that renders exactly once would not present + its single frame (continuous loops — rAF, camera frames — are unaffected; the main runtime's + one-shot case is covered by the FrameDriver). +- `Reanimated.tsx` already had `present()` removed in Phase 2; `DedicatedThread.tsx` / + `UIThread.tsx` need no changes. + +Known limitation (out of scope, examples don't hit it): **async ops** (`mapAsync`, +`onSubmittedWorkDone`, …) invoked *on a worklet runtime* still settle their Promise via the +object's creation-runtime context (main), not the calling worklet runtime — the example worklets +only do synchronous rendering + present (device/adapter are created on the main runtime). Routing +async settlement to the calling runtime would need the same calling-runtime detection applied to +the 7 async sites; deferred until a use case needs it. + +Validation (local): native lib **compiles + links** for `arm64-v8a`; `cpplint` clean; +`clang-format` applied; `yarn tsc`/`yarn lint` unaffected (no JS changed). On-device +verification of the dedicated-worklet example is for the maintainer. + +**Phase 4 — `SurfaceRegistry` / surface-model rework** (proposed) +The `SurfaceInfo` / `SurfaceRegistry` model (`cpp/rnwgpu/SurfaceRegistry.h`) predates the +event-driven + auto-present work and is now the rough edge. Candidate improvements to scope: +- **Surface thread-affinity.** Surface lifecycle (`configure`/`switchToOnscreen`/ + `switchToOffscreen`/`resize`) runs on the **UI thread** (native view callbacks) while + `getCurrentTexture`/`presentFrame` run on the **owning runtime's render thread**. A single + `shared_mutex` serializes them but they're still cross-thread against a Dawn surface that + prefers single-thread access. Consider routing all surface ops through the owning runtime + (e.g. via the `RuntimeScheduler`), making affinity structural rather than lock-guarded. +- **State clarity.** The on-screen-`surface` vs offscreen-`texture` duality is encoded as + `if (surface) … else …` branches throughout; a small explicit state (Offscreen / Onscreen) + would remove the implicit coupling and the `switchToOnscreen` flush path's validation cost + (its existing `// TODO: faster way without validation?`). +- **Dead/again-evaluated fields.** e.g. the stored `wgpu::Instance gpu` member appears unused; + audit members now that present/`hasSurface` were added. +- **Lifetime vs `contextId`.** Registry keyed by a JS-side incrementing `int`; `FrameDriver` + now also keys pending presents by `contextId`. Confirm teardown ordering (view dealloc → + `cancelPresent` + `removeSurfaceInfo`) is race-free under the new threading. + +**Phase 5 — Validation** ```bash yarn tsc && yarn lint yarn workspace react-native-wgpu test # offscreen readback + demo specs diff --git a/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java b/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java new file mode 100644 index 000000000..03a1d2c29 --- /dev/null +++ b/packages/webgpu/android/src/main/java/com/webgpu/WebGPUFrameDriver.java @@ -0,0 +1,66 @@ +package com.webgpu; + +import android.os.Handler; +import android.os.Looper; +import android.view.Choreographer; + +/** + * Drives WebGPU auto-present from the main-thread {@link Choreographer}, + * replacing the manual {@code context.present()} call. + * + *

{@link #start()} / {@link #stop()} are invoked from native code + * (rnwgpu::FrameDriver::setPlatformVSync) on arbitrary threads; both hop to the + * main thread. While running, {@link #doFrame(long)} calls back into native + * once per vsync, where pending surfaces are presented. + */ +public class WebGPUFrameDriver implements Choreographer.FrameCallback { + private static final WebGPUFrameDriver INSTANCE = new WebGPUFrameDriver(); + + private final Handler mainHandler = new Handler(Looper.getMainLooper()); + private boolean running = false; + + private WebGPUFrameDriver() {} + + /** Called from native (any thread). */ + public static void start() { + INSTANCE.startInternal(); + } + + /** Called from native (any thread). */ + public static void stop() { + INSTANCE.stopInternal(); + } + + private void startInternal() { + mainHandler.post( + () -> { + if (running) { + return; + } + running = true; + Choreographer.getInstance().postFrameCallback(this); + }); + } + + private void stopInternal() { + mainHandler.post( + () -> { + if (!running) { + return; + } + running = false; + Choreographer.getInstance().removeFrameCallback(this); + }); + } + + @Override + public void doFrame(long frameTimeNanos) { + if (!running) { + return; + } + nativeOnVSync(); + Choreographer.getInstance().postFrameCallback(this); + } + + private static native void nativeOnVSync(); +} diff --git a/packages/webgpu/apple/WebGPUFrameDriver.h b/packages/webgpu/apple/WebGPUFrameDriver.h new file mode 100644 index 000000000..aacae84ee --- /dev/null +++ b/packages/webgpu/apple/WebGPUFrameDriver.h @@ -0,0 +1,13 @@ +#pragma once + +#import + +// Objective-C wrapper around the platform vsync source (CADisplayLink) that +// drives rnwgpu::FrameDriver::onVSync() once per frame. start/stop are invoked +// by the C++ FrameDriver via setPlatformVSync; both hop to the main thread. +@interface WebGPUFrameDriver : NSObject + ++ (void)start; ++ (void)stop; + +@end diff --git a/packages/webgpu/apple/WebGPUFrameDriver.mm b/packages/webgpu/apple/WebGPUFrameDriver.mm new file mode 100644 index 000000000..1d302e2fa --- /dev/null +++ b/packages/webgpu/apple/WebGPUFrameDriver.mm @@ -0,0 +1,88 @@ +#import "WebGPUFrameDriver.h" + +#import "RNWGUIKit.h" +#import + +#include "FrameDriver.h" + +@implementation WebGPUFrameDriver + ++ (void)onFrame { + rnwgpu::FrameDriver::getInstance().onVSync(); +} + +#if !TARGET_OS_OSX + +// iOS / tvOS: CADisplayLink on the main run loop, paused/resumed for +// start/stop. +static CADisplayLink *sDisplayLink = nil; + ++ (void)tick:(CADisplayLink *)link { + [WebGPUFrameDriver onFrame]; +} + ++ (void)start { + dispatch_async(dispatch_get_main_queue(), ^{ + if (sDisplayLink == nil) { + sDisplayLink = [CADisplayLink displayLinkWithTarget:self + selector:@selector(tick:)]; + [sDisplayLink addToRunLoop:[NSRunLoop mainRunLoop] + forMode:NSRunLoopCommonModes]; + } + sDisplayLink.paused = NO; + }); +} + ++ (void)stop { + dispatch_async(dispatch_get_main_queue(), ^{ + sDisplayLink.paused = YES; + }); +} + +#else // TARGET_OS_OSX + +// macOS: CADisplayLink is available via NSScreen on 14.0+. On older systems we +// fall back to an NSTimer at ~60Hz (not vsync-aligned, but keeps auto-present +// working). FrameDriver self-idles cheaply when nothing is rendering. +static id sDisplayLink = nil; + ++ (void)tick:(id)sender { + [WebGPUFrameDriver onFrame]; +} + ++ (void)start { + dispatch_async(dispatch_get_main_queue(), ^{ + if (sDisplayLink == nil) { + if (@available(macOS 14.0, *)) { + CADisplayLink *link = + [NSScreen.mainScreen displayLinkWithTarget:self + selector:@selector(tick:)]; + [link addToRunLoop:[NSRunLoop mainRunLoop] + forMode:NSRunLoopCommonModes]; + sDisplayLink = link; + } else { + sDisplayLink = [NSTimer scheduledTimerWithTimeInterval:1.0 / 60.0 + target:self + selector:@selector(tick:) + userInfo:nil + repeats:YES]; + } + } + if ([sDisplayLink isKindOfClass:[CADisplayLink class]]) { + ((CADisplayLink *)sDisplayLink).paused = NO; + } + }); +} + ++ (void)stop { + dispatch_async(dispatch_get_main_queue(), ^{ + if ([sDisplayLink isKindOfClass:[CADisplayLink class]]) { + ((CADisplayLink *)sDisplayLink).paused = YES; + } + // NSTimer fallback keeps firing; onVSync is a cheap no-op while idle. + }); +} + +#endif // TARGET_OS_OSX + +@end diff --git a/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp b/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp new file mode 100644 index 000000000..792940e5e --- /dev/null +++ b/packages/webgpu/cpp/rnwgpu/FrameDriver.cpp @@ -0,0 +1,81 @@ +#include "FrameDriver.h" + +#include +#include +#include + +namespace jsi = facebook::jsi; + +namespace rnwgpu { + +FrameDriver &FrameDriver::getInstance() { + static FrameDriver instance; + return instance; +} + +void FrameDriver::setPlatformVSync(std::function start, + std::function stop) { + std::lock_guard lock(_mutex); + _start = std::move(start); + _stop = std::move(stop); +} + +void FrameDriver::requestPresent( + int contextId, std::shared_ptr surface, + std::shared_ptr scheduler) { + if (!surface || !scheduler) { + return; + } + + std::function startToCall; + { + std::lock_guard lock(_mutex); + _pending[contextId] = {std::move(surface), std::move(scheduler)}; + _idleFrames = 0; + if (!_running && _start) { + _running = true; + startToCall = _start; + } + } + + // Invoked outside the lock: the platform start hops to the UI thread. + if (startToCall) { + startToCall(); + } +} + +void FrameDriver::cancelPresent(int contextId) { + std::lock_guard lock(_mutex); + _pending.erase(contextId); +} + +void FrameDriver::onVSync() { + std::vector toPresent; + std::function stopToCall; + { + std::lock_guard lock(_mutex); + if (!_pending.empty()) { + toPresent.reserve(_pending.size()); + for (auto &entry : _pending) { + toPresent.push_back(std::move(entry.second)); + } + _pending.clear(); + _idleFrames = 0; + } else if (_running && ++_idleFrames >= kMaxIdleFrames) { + _running = false; + stopToCall = _stop; + } + } + + for (auto &pending : toPresent) { + auto surface = pending.surface; + pending.scheduler->scheduleOnJS( + [surface](jsi::Runtime & /*runtime*/) { surface->presentFrame(); }); + } + + if (stopToCall) { + stopToCall(); + } +} + +} // namespace rnwgpu diff --git a/packages/webgpu/cpp/rnwgpu/FrameDriver.h b/packages/webgpu/cpp/rnwgpu/FrameDriver.h new file mode 100644 index 000000000..c16fedabf --- /dev/null +++ b/packages/webgpu/cpp/rnwgpu/FrameDriver.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include + +#include "SurfaceRegistry.h" +#include "rnwgpu/async/RuntimeScheduler.h" + +namespace rnwgpu { + +/** + * Global vsync-driven auto-present coordinator. Replaces the manual + * `context.present()` call. + * + * Flow: + * - `GPUCanvasContext::getCurrentTexture()` (JS thread) calls + * `requestPresent` for its surface, tagged with the owning runtime's + * RuntimeScheduler. + * - A platform vsync source (iOS CADisplayLink / Android Choreographer) calls + * `onVSync()` on the UI thread once per frame. + * - On each vsync, every surface that requested a present has its present + * dispatched onto its owning runtime's JS thread (so `Surface.Present()` + * and the Apple Metal scheduling wait run on the same thread that did + * getCurrentTexture / submit, preserving Dawn surface thread-affinity and + * present-after-submit ordering via FIFO on that loop). + * + * The vsync source is request-driven: it is started when the first present is + * requested and stopped after a few idle frames, so an idle (non-rendering) app + * costs zero CPU. + */ +class FrameDriver { +public: + static FrameDriver &getInstance(); + + /** + * Register how to start/stop the platform vsync source. `start`/`stop` are + * invoked when presents begin/cease; each implementation is responsible for + * hopping to the UI thread as needed. Called once per platform at init. + */ + void setPlatformVSync(std::function start, + std::function stop); + + /** + * Request that `surface` be presented at the next vsync. Coalesced per + * contextId (at most one present per surface per frame). Thread-safe; called + * from a JS thread inside getCurrentTexture. Surfaces with no on-screen + * `wgpu::Surface` (offscreen) should not be registered. + */ + void requestPresent(int contextId, std::shared_ptr surface, + std::shared_ptr scheduler); + + /** + * Drop any pending present for a surface (e.g. when its view is torn down). + * Thread-safe. + */ + void cancelPresent(int contextId); + + /** Called by the platform vsync source on the UI thread, once per frame. */ + void onVSync(); + +private: + FrameDriver() = default; + + struct Pending { + std::shared_ptr surface; + std::shared_ptr scheduler; + }; + + // Number of consecutive empty frames before the vsync source is stopped. + // A small grace period avoids start/stop thrash during continuous rendering. + static constexpr int kMaxIdleFrames = 3; + + std::mutex _mutex; + std::unordered_map _pending; + std::function _start; + std::function _stop; + bool _running = false; + int _idleFrames = 0; +}; + +} // namespace rnwgpu diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp index 7a2c32886..2eb76c0b4 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp +++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp @@ -32,7 +32,15 @@ void GPUCanvasContext::configure( void GPUCanvasContext::unconfigure() {} -std::shared_ptr GPUCanvasContext::getCurrentTexture() { +jsi::Value GPUCanvasContext::getCurrentTexture(jsi::Runtime &runtime, + const jsi::Value & /*thisValue*/, + const jsi::Value * /*args*/, + size_t /*count*/) { + // Main JS runtime owns a RuntimeContext; worklet runtimes (Reanimated UI / + // dedicated, Vision Camera frame processors, …) do not. + auto runtimeContext = async::RuntimeContext::get(runtime); + const bool isMainRuntime = runtimeContext != nullptr; + auto prevSize = _surfaceInfo->getConfig(); auto width = _canvas->getWidth(); auto height = _canvas->getHeight(); @@ -40,27 +48,46 @@ std::shared_ptr GPUCanvasContext::getCurrentTexture() { if (sizeHasChanged) { _surfaceInfo->reconfigure(width, height); } + + // Worklet-runtime auto-present: present the PREVIOUS frame synchronously on + // this thread, just before acquiring the next texture. By now that frame's + // submit has already happened (during the previous frame's work), and this + // runs on the same thread that did getCurrentTexture/submit — preserving Dawn + // surface thread-affinity. We can't use the UI-thread FrameDriver here, and + // scheduling onto the worklet runtime's own task queue is unsafe in general + // (e.g. Vision Camera's queue hops through JNI and crashes off the JS + // thread), so we present inline at the natural swapchain boundary instead. + if (!isMainRuntime && _hasUnpresentedFrame && _surfaceInfo->hasSurface()) { + _surfaceInfo->presentFrame(); + _hasUnpresentedFrame = false; + } + auto texture = _surfaceInfo->getCurrentTexture(); - // Auto-present: acquiring the current texture schedules a present for this - // surface at the next vsync (spec-aligned "update the rendering" after the - // frame). Replaces the old explicit context.present(). Offscreen surfaces - // have no wgpu::Surface, so skip them (their texture is read back directly). auto size = _surfaceInfo->getSize(); _canvas->setClientWidth(size.width); _canvas->setClientHeight(size.height); + + // Auto-present: acquiring the current texture arranges for this frame to be + // presented (spec-aligned "update the rendering" after the frame). Replaces + // the old explicit context.present(). Offscreen surfaces have no + // wgpu::Surface, so skip them (their texture is read back directly). if (_surfaceInfo->hasSurface()) { - // Phase 2: dispatch the present on the main runtime (the only runtime that - // owns WebGPU rendering today). Phase 3 will tag this with the *calling* - // runtime so worklet-runtime rendering (e.g. the Reanimated example) - // presents on its own JS thread, preserving Dawn surface thread-affinity. - FrameDriver::getInstance().requestPresent(_contextId, _surfaceInfo, - _gpu->getContext()->scheduler()); + if (isMainRuntime) { + // Main runtime: drive present from the global vsync FrameDriver (handles + // one-shot renders too, since it presents the current frame at vsync). + FrameDriver::getInstance().requestPresent(_contextId, _surfaceInfo, + runtimeContext->scheduler()); + } else { + // Worklet runtime: present at the next acquire (see above). + _hasUnpresentedFrame = true; + } } // Pass reportsMemoryPressure=false to avoid triggering spurious Hermes GC // cycles every frame since the canvas texture doesn't own the buffer. - return std::make_shared(texture, "", false); + auto gpuTexture = std::make_shared(texture, "", false); + return JSIConverter>::toJSI(runtime, gpuTexture); } } // namespace rnwgpu diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h index 2ab5d69c2..bdf6bee8c 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h +++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h @@ -53,13 +53,21 @@ class GPUCanvasContext : public NativeObject { inline const wgpu::Surface get() { return nullptr; } void configure(std::shared_ptr configuration); void unconfigure(); - std::shared_ptr getCurrentTexture(); + // Full-control signature so we can learn the *calling* runtime and route the + // auto-present onto its own thread (main runtime → FrameDriver vsync; worklet + // runtime → presented inline at the next getCurrentTexture). + jsi::Value getCurrentTexture(jsi::Runtime &runtime, + const jsi::Value &thisValue, + const jsi::Value *args, size_t count); private: int _contextId; std::shared_ptr _canvas; std::shared_ptr _surfaceInfo; std::shared_ptr _gpu; + // For worklet-runtime auto-present: true when a frame was acquired on a + // worklet runtime and not yet presented (presented at the next acquire). + bool _hasUnpresentedFrame = false; }; } // namespace rnwgpu From ba9efe94ecf8942f9e834a75422447ee027505ad Mon Sep 17 00:00:00 2001 From: William Candillon Date: Tue, 2 Jun 2026 17:33:07 +0200 Subject: [PATCH 3/4] :wrench: --- README.md | 15 +++- apps/example/src/Reanimated/Reanimated.tsx | 3 + .../example/src/VisionCamera/VisionCamera.tsx | 3 + docs/refactor-async-present-plan.md | 65 +++++++++------ packages/webgpu/README.md | 15 +++- .../cpp/rnwgpu/api/GPUCanvasContext.cpp | 80 +++++++++++-------- .../webgpu/cpp/rnwgpu/api/GPUCanvasContext.h | 12 +-- packages/webgpu/src/Canvas.tsx | 15 +++- packages/webgpu/src/Offscreen.ts | 4 + packages/webgpu/src/WebPolyfillGPUModule.ts | 5 +- packages/webgpu/src/types.ts | 15 +++- 11 files changed, 159 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index d7415053b..433d498fa 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ ctx.canvas.height = ctx.canvas.clientHeight * PixelRatio.get(); ### Frame Scheduling -Frame presentation is automatic. Once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call. +On the **main JS runtime** and the **Reanimated UI runtime**, frame presentation is automatic: once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call. ```tsx // draw @@ -181,6 +181,19 @@ device.queue.submit([commandEncoder.finish()]); // The frame is presented automatically on the next vsync. ``` +When you render from a **dedicated worklet runtime** (e.g. `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame processor), it runs on its own thread where present can't be driven automatically. Call `context.present()` yourself after submitting: + +```tsx +const onFrame = () => { + "worklet"; + // draw on the dedicated runtime's thread + device.queue.submit([commandEncoder.finish()]); + context.present(); // required on dedicated worklet runtimes; a no-op on JS/UI +}; +``` + +`present()` is safe to call from a worklet that runs on either the UI runtime or a dedicated runtime: it presents on the dedicated runtime and does nothing on the JS/UI runtime (which auto-present). + ### Canvas Transparency On Android, the `alphaMode` property is ignored when configuring the canvas. diff --git a/apps/example/src/Reanimated/Reanimated.tsx b/apps/example/src/Reanimated/Reanimated.tsx index 2f8b5e5cb..3761c90f9 100644 --- a/apps/example/src/Reanimated/Reanimated.tsx +++ b/apps/example/src/Reanimated/Reanimated.tsx @@ -78,6 +78,9 @@ export const webGPUDemo = ( passEncoder.end(); device.queue.submit([commandEncoder.finish()]); + // Needed on a dedicated worklet runtime (DedicatedThread); a no-op on the + // UI runtime (UIThread), where present is automatic. + context.present(); if (runAnimation.value) { requestAnimationFrame(frame); diff --git a/apps/example/src/VisionCamera/VisionCamera.tsx b/apps/example/src/VisionCamera/VisionCamera.tsx index cba2d2948..f6c6c95bd 100644 --- a/apps/example/src/VisionCamera/VisionCamera.tsx +++ b/apps/example/src/VisionCamera/VisionCamera.tsx @@ -613,6 +613,9 @@ const CameraView = () => { pass.draw(3); pass.end(); device.queue.submit([encoder.finish()]); + // Vision Camera frame processors run on a dedicated worklet runtime, + // so present explicitly (auto-present only covers the JS/UI runtime). + context.present(); // The work sampling it is submitted, so end the external texture's // access window now to release the camera frame's surface promptly // (don't wait for GC, which would starve the frame buffer pool). diff --git a/docs/refactor-async-present-plan.md b/docs/refactor-async-present-plan.md index 65490af29..82e0de054 100644 --- a/docs/refactor-async-present-plan.md +++ b/docs/refactor-async-present-plan.md @@ -308,36 +308,49 @@ happened to land on the right thread), but the **dedicated `createWorkletRuntime (`Reanimated/DedicatedThread.tsx`, `runOnRuntime`) crashed — its render thread is its own, so a main-thread present violated Dawn surface thread-affinity. -**Decision (confirmed with the user): self-scheduled present, no native worklets dependency.** -Rather than link `react-native-worklets` natively and have the FrameDriver dispatch via -`WorkletRuntime::schedule` (the original plan / Spike 1 primary), worklet runtimes now schedule -their own present on their own event loop. This avoids a new native build dependency entirely -and is fully buildable/validatable locally (it is Spike 1's documented "JS-scheduling" -contingency). - -Implementation (native only; no JS/build-system changes): +**Decision (confirmed with the user): auto-present on the JS + UI runtimes, explicit +`ctx.present()` on dedicated worklet runtimes. No native worklets dependency.** Rather than link +`react-native-worklets` natively and dispatch via `WorkletRuntime::schedule` (the original plan / +Spike 1 primary), the FrameDriver covers the JS and UI runtimes; dedicated runtimes — which run +on their own thread with no safe scheduler/vsync hook — keep an explicit `present()`. (A +scheduler-free auto path for dedicated runtimes was prototyped but rejected — see below — because +it added one frame of latency and never presented a one-shot frame.) This needs no new native +build dependency and is fully buildable/validatable locally. + +Implementation: - `GPUCanvasContext::getCurrentTexture` switched to the full-control HostFunction signature (`jsi::Value(rt, thisVal, args, count)`, same pattern as `RNWebGPU::createImageBitmap`) so it - learns the **calling** runtime. New `schedulePresent(runtime)`: + learns the **calling** runtime. Present routing: - **Main runtime** (`RuntimeContext::get(runtime)` is non-null): unchanged — register with the global vsync `FrameDriver` using that runtime's scheduler. - - **Any worklet runtime** (no `RuntimeContext` — Reanimated UI/dedicated, Vision Camera frame - processors, …): **present-on-next-acquire**. `getCurrentTexture` presents the *previous* - frame synchronously (inline, on the calling thread) just before acquiring the next texture; - by then the previous frame's submit has happened, and present runs on the same thread that - rendered it. This is the natural swapchain boundary and needs no scheduler. - - Why not schedule onto the runtime's own loop: two earlier attempts failed. (1) - `queueMicrotask` is **disabled** on worklet runtimes (throws "microtasks are disabled in this - runtime"). (2) `setImmediate`/`setTimeout` exist but route through the runtime's `EventLoop` - `AsyncQueue`, which for **Vision Camera** is a custom `NativeThreadAsyncQueue` that hops back - through JNI (`fbjni Environment::current()`) and **crashes** when pushed from a - non-JVM-attached thread. Present-on-next-acquire avoids the runtime's task queue entirely. - Trade-off: one frame of latency, and a worklet that renders exactly once would not present - its single frame (continuous loops — rAF, camera frames — are unaffected; the main runtime's - one-shot case is covered by the FrameDriver). -- `Reanimated.tsx` already had `present()` removed in Phase 2; `DedicatedThread.tsx` / - `UIThread.tsx` need no changes. + - **Reanimated UI runtime** (`globalThis.__RUNTIME_KIND === 2`, worklets' `RuntimeKind::UI`): + also auto-present via the FrameDriver + main scheduler. The UI runtime is reached correctly + by this path (Phase 2 confirmed it), so no `present()` is needed. + - **Dedicated worklet runtimes** (`RuntimeKind::Worker`, or any untagged/unknown worklet + runtime — e.g. Vision Camera frame processors): **explicit `ctx.present()`**, kept in the + public API for exactly this case. They run on their own thread with no safe scheduler/vsync + hook, so present is called synchronously by the author after `submit`, on that thread + (preserving Dawn surface thread-affinity). + + `ctx.present()` is a **no-op on the JS / UI runtime** (they auto-present), which makes it safe + to call from a worklet shared between the UI and a dedicated runtime (the example's + `webGPUDemo`). Runtime classification uses `RuntimeContext::get(rt)` (main) and the stable + worklets global `__RUNTIME_KIND` (`ReactNative=1`, `UI=2`, `Worker=3`); no worklets headers + are linked. + + Two scheduler-based approaches were tried and rejected before landing here: (1) + `queueMicrotask` is **disabled** on worklet runtimes (throws); (2) `setImmediate`/`setTimeout` + exist but route through the runtime's `EventLoop` `AsyncQueue`, which for **Vision Camera** is + a custom `NativeThreadAsyncQueue` that hops through JNI (`fbjni Environment::current()`) and + **crashes** when pushed from a non-JVM-attached thread. A scheduler-free + "present-on-next-acquire" fallback worked everywhere but added one frame of latency and never + presented a one-shot frame, so the explicit-`present()`-on-dedicated split was chosen instead. +- JS surface: `present()` re-added to `RNCanvasContext` (`src/Canvas.tsx`, `src/types.ts`, + documented dedicated-only) and as a no-op on `Offscreen.ts` / `WebPolyfillGPUModule.ts`. Native + `GPUCanvasContext::present` re-added (full-control signature; no-op on auto-presented runtimes). +- Examples: `present()` re-added to `Reanimated/Reanimated.tsx`'s shared `webGPUDemo` (no-op on + UIThread, real on DedicatedThread) and to `VisionCamera.tsx`'s frame processor. Both READMEs' + "Frame Scheduling" sections document the JS/UI-auto vs dedicated-manual split. Known limitation (out of scope, examples don't hit it): **async ops** (`mapAsync`, `onSubmittedWorkDone`, …) invoked *on a worklet runtime* still settle their Promise via the diff --git a/packages/webgpu/README.md b/packages/webgpu/README.md index d7415053b..433d498fa 100644 --- a/packages/webgpu/README.md +++ b/packages/webgpu/README.md @@ -172,7 +172,7 @@ ctx.canvas.height = ctx.canvas.clientHeight * PixelRatio.get(); ### Frame Scheduling -Frame presentation is automatic. Once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call. +On the **main JS runtime** and the **Reanimated UI runtime**, frame presentation is automatic: once you acquire the frame's texture with `context.getCurrentTexture()` and submit your commands, the frame is presented on the next display refresh (driven by a global vsync source: `CADisplayLink` on iOS, `Choreographer` on Android). There is no `present()` call. ```tsx // draw @@ -181,6 +181,19 @@ device.queue.submit([commandEncoder.finish()]); // The frame is presented automatically on the next vsync. ``` +When you render from a **dedicated worklet runtime** (e.g. `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame processor), it runs on its own thread where present can't be driven automatically. Call `context.present()` yourself after submitting: + +```tsx +const onFrame = () => { + "worklet"; + // draw on the dedicated runtime's thread + device.queue.submit([commandEncoder.finish()]); + context.present(); // required on dedicated worklet runtimes; a no-op on JS/UI +}; +``` + +`present()` is safe to call from a worklet that runs on either the UI runtime or a dedicated runtime: it presents on the dedicated runtime and does nothing on the JS/UI runtime (which auto-present). + ### Canvas Transparency On Android, the `alphaMode` property is ignored when configuring the canvas. diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp index 2eb76c0b4..c4390ba6d 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp +++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.cpp @@ -6,6 +6,29 @@ namespace rnwgpu { +namespace { +// Runtimes whose present is automatic (no ctx.present() needed): the main JS +// runtime and the Reanimated UI runtime. Both are reached correctly by the +// global vsync FrameDriver dispatching through the main runtime's scheduler. +// Dedicated worklet runtimes (createWorkletRuntime, Vision Camera frame +// processors, …) run on their own thread with no safe scheduler hook, so they +// present explicitly via ctx.present(). +bool isAutoPresentedRuntime(jsi::Runtime &runtime) { + if (async::RuntimeContext::get(runtime) != nullptr) { + return true; // main JS runtime + } + // Worklets tags every runtime with a numeric `__RUNTIME_KIND` + // (worklets::RuntimeKind: ReactNative=1, UI=2, Worker=3). Auto-present only + // the UI runtime; treat Worker / unknown / untagged as needing ctx.present(). + auto kind = runtime.global().getProperty(runtime, "__RUNTIME_KIND"); + if (kind.isNumber()) { + constexpr int kRuntimeKindUI = 2; + return static_cast(kind.asNumber()) == kRuntimeKindUI; + } + return false; +} +} // namespace + void GPUCanvasContext::configure( std::shared_ptr configuration) { Convertor conv; @@ -36,11 +59,6 @@ jsi::Value GPUCanvasContext::getCurrentTexture(jsi::Runtime &runtime, const jsi::Value & /*thisValue*/, const jsi::Value * /*args*/, size_t /*count*/) { - // Main JS runtime owns a RuntimeContext; worklet runtimes (Reanimated UI / - // dedicated, Vision Camera frame processors, …) do not. - auto runtimeContext = async::RuntimeContext::get(runtime); - const bool isMainRuntime = runtimeContext != nullptr; - auto prevSize = _surfaceInfo->getConfig(); auto width = _canvas->getWidth(); auto height = _canvas->getHeight(); @@ -49,39 +67,21 @@ jsi::Value GPUCanvasContext::getCurrentTexture(jsi::Runtime &runtime, _surfaceInfo->reconfigure(width, height); } - // Worklet-runtime auto-present: present the PREVIOUS frame synchronously on - // this thread, just before acquiring the next texture. By now that frame's - // submit has already happened (during the previous frame's work), and this - // runs on the same thread that did getCurrentTexture/submit — preserving Dawn - // surface thread-affinity. We can't use the UI-thread FrameDriver here, and - // scheduling onto the worklet runtime's own task queue is unsafe in general - // (e.g. Vision Camera's queue hops through JNI and crashes off the JS - // thread), so we present inline at the natural swapchain boundary instead. - if (!isMainRuntime && _hasUnpresentedFrame && _surfaceInfo->hasSurface()) { - _surfaceInfo->presentFrame(); - _hasUnpresentedFrame = false; - } - auto texture = _surfaceInfo->getCurrentTexture(); auto size = _surfaceInfo->getSize(); _canvas->setClientWidth(size.width); _canvas->setClientHeight(size.height); - // Auto-present: acquiring the current texture arranges for this frame to be - // presented (spec-aligned "update the rendering" after the frame). Replaces - // the old explicit context.present(). Offscreen surfaces have no - // wgpu::Surface, so skip them (their texture is read back directly). - if (_surfaceInfo->hasSurface()) { - if (isMainRuntime) { - // Main runtime: drive present from the global vsync FrameDriver (handles - // one-shot renders too, since it presents the current frame at vsync). - FrameDriver::getInstance().requestPresent(_contextId, _surfaceInfo, - runtimeContext->scheduler()); - } else { - // Worklet runtime: present at the next acquire (see above). - _hasUnpresentedFrame = true; - } + // Auto-present on the JS / UI runtime: acquiring the current texture + // schedules a present for this surface at the next vsync (spec-aligned + // "update the rendering" after the frame), dispatched through the main + // runtime's scheduler. Dedicated worklet runtimes instead call ctx.present() + // explicitly on their own thread. Offscreen surfaces have no wgpu::Surface, + // so skip them (their texture is read back directly). + if (_surfaceInfo->hasSurface() && isAutoPresentedRuntime(runtime)) { + FrameDriver::getInstance().requestPresent(_contextId, _surfaceInfo, + _gpu->getContext()->scheduler()); } // Pass reportsMemoryPressure=false to avoid triggering spurious Hermes GC @@ -90,4 +90,20 @@ jsi::Value GPUCanvasContext::getCurrentTexture(jsi::Runtime &runtime, return JSIConverter>::toJSI(runtime, gpuTexture); } +jsi::Value GPUCanvasContext::present(jsi::Runtime &runtime, + const jsi::Value & /*thisValue*/, + const jsi::Value * /*args*/, + size_t /*count*/) { + // Only meaningful on a dedicated worklet runtime, where present can't be + // automated. On the JS / UI runtime present is automatic, so this is a no-op + // there — which makes it safe to call from a worklet shared between the UI + // runtime and a dedicated runtime. Presents synchronously on the calling + // thread (the one that did getCurrentTexture / submit), preserving Dawn + // surface thread-affinity. + if (!isAutoPresentedRuntime(runtime) && _surfaceInfo->hasSurface()) { + _surfaceInfo->presentFrame(); + } + return jsi::Value::undefined(); +} + } // namespace rnwgpu diff --git a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h index bdf6bee8c..a2e80b7cc 100644 --- a/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h +++ b/packages/webgpu/cpp/rnwgpu/api/GPUCanvasContext.h @@ -47,27 +47,27 @@ class GPUCanvasContext : public NativeObject { &GPUCanvasContext::unconfigure); installMethod(runtime, prototype, "getCurrentTexture", &GPUCanvasContext::getCurrentTexture); + installMethod(runtime, prototype, "present", &GPUCanvasContext::present); } // TODO: is this ok? inline const wgpu::Surface get() { return nullptr; } void configure(std::shared_ptr configuration); void unconfigure(); - // Full-control signature so we can learn the *calling* runtime and route the - // auto-present onto its own thread (main runtime → FrameDriver vsync; worklet - // runtime → presented inline at the next getCurrentTexture). + // Full-control signatures so we can learn the *calling* runtime and decide + // how this frame is presented (auto on the JS / UI runtime; explicit + // ctx.present() on a dedicated worklet runtime). jsi::Value getCurrentTexture(jsi::Runtime &runtime, const jsi::Value &thisValue, const jsi::Value *args, size_t count); + jsi::Value present(jsi::Runtime &runtime, const jsi::Value &thisValue, + const jsi::Value *args, size_t count); private: int _contextId; std::shared_ptr _canvas; std::shared_ptr _surfaceInfo; std::shared_ptr _gpu; - // For worklet-runtime auto-present: true when a frame was acquired on a - // worklet runtime and not yet presented (presented at the next acquire). - bool _hasUnpresentedFrame = false; }; } // namespace rnwgpu diff --git a/packages/webgpu/src/Canvas.tsx b/packages/webgpu/src/Canvas.tsx index 7c2a47a6e..43c9621e7 100644 --- a/packages/webgpu/src/Canvas.tsx +++ b/packages/webgpu/src/Canvas.tsx @@ -19,9 +19,18 @@ export interface NativeCanvas { clientHeight: number; } -// Auto-present (a global vsync FrameDriver) replaces the old manual present(); -// the native context is now just a spec GPUCanvasContext. -export type RNCanvasContext = GPUCanvasContext; +export type RNCanvasContext = GPUCanvasContext & { + /** + * Present the current frame. + * + * Only needed when rendering from a **dedicated worklet runtime** (e.g. + * `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame + * processor), which runs on its own thread. On the main JS runtime and the + * Reanimated UI runtime present is automatic (driven by a global vsync), so + * calling this there is a no-op. Call it after `queue.submit()`. + */ + present: () => void; +}; export interface CanvasRef { getContextId: () => number; diff --git a/packages/webgpu/src/Offscreen.ts b/packages/webgpu/src/Offscreen.ts index 6ce2f589c..4deab8a1c 100644 --- a/packages/webgpu/src/Offscreen.ts +++ b/packages/webgpu/src/Offscreen.ts @@ -64,6 +64,10 @@ class GPUOffscreenCanvasContext implements GPUCanvasContext { throw new Error("Method not implemented."); } + present() { + // Offscreen contexts have nothing to present; readback is via getImageData. + } + getDevice() { if (!this.device) { throw new Error("Device is not configured."); diff --git a/packages/webgpu/src/WebPolyfillGPUModule.ts b/packages/webgpu/src/WebPolyfillGPUModule.ts index 04229cd05..8b629a0c9 100644 --- a/packages/webgpu/src/WebPolyfillGPUModule.ts +++ b/packages/webgpu/src/WebPolyfillGPUModule.ts @@ -39,7 +39,10 @@ function makeWebGPUCanvasContext( canvas.setAttribute("height", pixelHeight); } - return canvas.getContext("webgpu")!; + const context = canvas.getContext("webgpu")!; + // On web there is no manual present; expose a no-op so RNCanvasContext's + // present() (used on native dedicated worklet runtimes) is callable here too. + return Object.assign(context, { present: () => {} }); } // @ts-expect-error - polyfill for RNWebGPU native module diff --git a/packages/webgpu/src/types.ts b/packages/webgpu/src/types.ts index 0758c73f4..1608a4ff0 100644 --- a/packages/webgpu/src/types.ts +++ b/packages/webgpu/src/types.ts @@ -8,9 +8,18 @@ export interface NativeCanvas { clientHeight: number; } -// Auto-present (a global vsync FrameDriver) replaces the old manual present(); -// the native context is now just a spec GPUCanvasContext. -export type RNCanvasContext = GPUCanvasContext; +export type RNCanvasContext = GPUCanvasContext & { + /** + * Present the current frame. + * + * Only needed when rendering from a **dedicated worklet runtime** (e.g. + * `createWorkletRuntime` / `runOnRuntime`, or a Vision Camera frame + * processor), which runs on its own thread. On the main JS runtime and the + * Reanimated UI runtime present is automatic (driven by a global vsync), so + * calling this there is a no-op. Call it after `queue.submit()`. + */ + present: () => void; +}; export interface CanvasRef { getContextId: () => number; From 0696aaa049c3ad89bb1b3c51535e442ea52fe82c Mon Sep 17 00:00:00 2001 From: William Candillon Date: Tue, 2 Jun 2026 18:00:49 +0200 Subject: [PATCH 4/4] Delete docs/refactor-async-present-plan.md --- docs/refactor-async-present-plan.md | 442 ---------------------------- 1 file changed, 442 deletions(-) delete mode 100644 docs/refactor-async-present-plan.md diff --git a/docs/refactor-async-present-plan.md b/docs/refactor-async-present-plan.md deleted file mode 100644 index 82e0de054..000000000 --- a/docs/refactor-async-present-plan.md +++ /dev/null @@ -1,442 +0,0 @@ -# Refactor: event-driven async + auto-present - -Status: **Phases 1–3 complete (local build/lint green). Phase 4 (SurfaceRegistry rework) proposed; Phase 5 = on-device validation.** -Branch: `claude/keen-darwin-xeywa` - -This document is the handoff for moving the async + present refactor forward. Phase 0 -(spikes) needs a real local machine: installed `node_modules`, a Dawn build, and the -iOS/Android toolchains. Everything below the "How to resume locally" section is meant to -be executed on your computer, not in the web container. - ---- - -## Goals (locked) - -- **Async**: replace the JS-thread polling loop with a **background `WaitAny` GPU thread** - (Dawn `TimedWaitAny` is already enabled — `packages/webgpu/cpp/rnwgpu/api/GPU.cpp:17-23`). -- **Present**: **remove `context.present()` entirely** (breaking) in favor of a **global - Choreographer / CADisplayLink-driven auto-present**. -- **Scope**: first-class for **all runtimes** — main JS, the reanimated UI runtime, and - `createWorkletRuntime` dedicated runtimes. - ---- - -## What exists today (the two problems) - -### Async (polling) — `packages/webgpu/cpp/rnwgpu/async/` -- Every async op (`requestAdapter`, `requestDevice`, `mapAsync`, `onSubmittedWorkDone`, - `createRender/ComputePipelineAsync`, `popErrorScope`) registers a Dawn callback with - `CallbackMode::AllowProcessEvents` and calls `AsyncRunner::postTask`. -- `AsyncRunner::requestTick` (`async/AsyncRunner.cpp:89-177`) schedules `tick()` via - `setImmediate` / `setTimeout(4ms)` / `queueMicrotask`; `tick()` calls - `_instance.ProcessEvents()` and **re-schedules itself while any task is "pumping"** - (`AsyncRunner.cpp:189-191`). This is a busy reschedule loop: wasted CPU when idle, added - latency, and `JSIMicrotaskDispatcher`'s `queueMicrotask` dispatch is only thread-safe when - called on the runtime's own thread. - -### Present (manual, non-standard) -`api/GPUCanvasContext.cpp:56-65` → `SurfaceRegistry.h:116-121` → `wgpu::Surface::Present()`. -The user must call `context.present()` after every `queue.submit` (**16 JS/TS call sites**). -No CADisplayLink/Choreographer exists; RN's `requestAnimationFrame` is the only frame driver. -On Apple, present also does a blocking `WaitForCommandsToBeScheduled` on the JS thread. - ---- - -## Target architecture - -Three new pieces: - -### A. `RuntimeScheduler` — thread-safe "post to this runtime's JS thread" -Replaces `AsyncDispatcher` / `JSIMicrotaskDispatcher` (which use non-thread-safe -`queueMicrotask`). -- Interface: `void scheduleOnJS(std::function)`, callable from any thread. -- **Main runtime**: wraps `react::CallInvoker::invokeAsync` (already available — - `apple/WebGPUModule.mm:70`, `android/cpp/cpp-adapter.cpp:25-29`). -- **Worklet runtimes**: wraps the worklet runtime's own thread executor from - `react-native-worklets` 0.8.3 (**see Phase 0 spike #1**). -- Stored per-runtime in a `RuntimeContext` (the "per-JS-thread event loop"), created on first - WebGPU use, torn down via the existing `RuntimeLifecycleMonitor` / `RuntimeAwareCache` - (`cpp/jsi/RuntimeAwareCache.h`). - -### B. `GpuEventLoop` — background `WaitAny` thread (no polling) -One per `wgpu::Instance` (effectively global). -- All async sites switch `CallbackMode::AllowProcessEvents` → **`CallbackMode::WaitAnyOnly`**, - returning a `wgpu::Future`. -- A **small bounded thread pool**; each pending future is waited via - `instance.WaitAny(future, /*timeout*/UINT64_MAX)` on a pool thread → genuinely event-driven, - **zero idle CPU**, resolves the instant GPU work completes. No wake/interrupt problem (each - thread owns one future). **See Phase 0 spike #2.** -- On completion the worker marshals the result and calls the owning runtime's - `RuntimeScheduler.scheduleOnJS` to settle the JS Promise. `AsyncTaskHandle` / `Promise` - settle logic is reused; `AsyncRunner` + its tick loop are deleted. -- Fallback (if concurrent `WaitAny` on one instance is unsafe): single worker thread waiting on - the batched future set with a condition-variable re-arm. - -### C. `FrameDriver` — global vsync source for auto-present -One UI-thread singleton; removes the need for `present()`. -- **iOS**: `CADisplayLink` on the main run loop. **Android**: NDK - `AChoreographer_postFrameCallback` from C++ (API 24+, avoids JNI). **See Phase 0 spike #3.** -- Lifecycle: started when ≥1 surface is configured, stopped at 0. -- **Auto-present semantics** (spec-aligned "update the rendering" after rAF): - 1. `GPUCanvasContext::getCurrentTexture()` marks its `SurfaceInfo` dirty and registers a - present request with `FrameDriver`, tagged with the owning runtime. - 2. Each vsync (UI thread), `FrameDriver` dispatches each dirty context's present onto its - **owning runtime's `RuntimeScheduler`** — so `Surface.Present()` + the Apple Metal - scheduling wait run on the same thread that did `getCurrentTexture` / `submit`, preserving - Dawn surface thread-affinity and guaranteeing present-after-submit ordering (FIFO on that - loop). Clear dirty after present. -- Offscreen path (`SurfaceRegistry` `switchToOffscreen`, `src/Offscreen.ts`) has no surface → - present is a no-op; tests keep reading back the CPU texture. - ---- - -## Phase 0 — Local spikes (DO THESE FIRST, on your machine) - -These de-risk the refactor before any large change. Run from repo root. - -```bash -# 0. install deps (web container can't do this) -yarn install -``` - -### Spike 1 — worklet-runtime scheduler (HIGHEST RISK) -Goal: obtain a **thread-safe** "schedule this lambda on runtime R's thread" for an arbitrary -worklet runtime (UI runtime + a `createWorkletRuntime` runtime) using -`react-native-worklets@0.8.3`. - -```bash -# inspect the worklets native API actually shipped at 0.8.3 -find node_modules/react-native-worklets -name "*.h" | grep -iE "Runtime|Scheduler|Invoker|Queue" -# look for: WorkletRuntime, RuntimeManager / WorkletsModuleProxy, UIScheduler / JSScheduler, -# and any per-runtime executor / async queue we can call from a background C++ thread. -``` -Deliverable: a one-paragraph note on the exact symbol(s) to use (or "not exposed → needs JS -shim / worklets PR"). This determines whether Phase 3 (first-class worklet runtimes) is cheap -or needs a workaround. - -### Spike 2 — concurrent `WaitAny` on one Dawn instance -Goal: confirm multiple threads can each call `instance.WaitAny(singleFuture, UINT64_MAX)` -concurrently on the **same** instance safely. If not, switch `GpuEventLoop` to the -single-worker + condition-variable fallback. -- Search Dawn headers/docs in `externals/dawn` (or built `libs/`) for `WaitAny` threading - guarantees. A tiny throwaway C++ test against the built Dawn is ideal. - -### Spike 3 — Android frame callback -Goal: confirm NDK `AChoreographer_postFrameCallback` is usable at the project `minSdk` -(`packages/webgpu/android/build.gradle`). If `minSdk < 24` for that API, plan the Java -`Choreographer` + JNI bridge instead. - ---- - -## Phase 0 — Findings (completed 2026-06-02, branch `claude/keen-darwin-xeywa`) - -Environment verified: `node_modules` installed, `externals/dawn` present, RN **0.81.4**, -`react-native-worklets` **0.8.3**, Android `minSdk` **26**, NDK 26/27 available. - -### Spike 1 — worklet-runtime scheduler → **GREEN (symbol exists, thread-safe)** -`worklets/WorkletRuntime/WorkletRuntime.h` exposes exactly what we need: -- `WorkletRuntime::schedule(std::function job)` — posts `job` onto the - runtime's own `AsyncQueue` (`WorkletRuntime.cpp:211-227`). It is **callable from any thread** - (the underlying `AsyncQueueImpl` is a mutex+condvar queue; `AsyncQueueUI` forwards to the - `UIScheduler`). The job runs on the runtime's event-loop thread, under `runtimeMutex_`, and - uses `weak_from_this()` so it is a **safe no-op if the runtime was torn down**. This is a - drop-in for `RuntimeScheduler::scheduleOnJS` for worklet runtimes. -- `WorkletRuntime::getWeakRuntimeFromJSIRuntime(jsi::Runtime &rt)` (RN ≥ 0.81, we have 0.81.4) - maps a bare `jsi::Runtime&` → `weak_ptr`, so the per-runtime - `RuntimeContext` can recover the scheduler from any worklet runtime (UI + dedicated - `createWorkletRuntime`) with no JS shim. - -**Caveat (build wiring, not API):** webgpu does **not** currently link worklets natively -(no worklets entry in `packages/webgpu/*.podspec` or `android/CMakeLists.txt`; only JS-level -serialization helpers exist). Phase 3 must add the native dependency: -- iOS: depend on `RNWorklets` pod (it ships public headers under `worklets/`, - `header_dir = "worklets"`). -- Android: import the worklets **prefab** module `worklets` (`prefabPublishing` is on in - `react-native-worklets/android/build.gradle`). -Worklets is already a `peerDependency`, so this adds no new install. Phase 3 stays cheap; no -worklets PR or JS shim needed. - -### Spike 2 — concurrent `WaitAny` on one instance → **GREEN (designed for it)** -Dawn's native `EventManager` (`externals/dawn/src/dawn/native/EventManager.{h,cpp}`) is built -for multi-threaded waits: -- State is `MutexProtected`; `mNextFutureID` is atomic; a code comment - (`EventManager.h:78-82`) explicitly notes "another thread can race to complete the event … - via a WaitAny call". -- Each `WaitAny` call with a non-zero timeout creates a **stack-local `Waiter`** with its **own** - `MutexCondVarProtected` (`EventManager.cpp:338`, `:106`), registers it per-FutureID in - the shared map, then blocks on its own condvar. `SetFutureReady` signals the registered - waiters. → **N threads can each block in `WaitAny` on the same instance concurrently, each - owning its own future.** This is exactly the plan's primary "one future per pool thread" model. - -**Hard constraint discovered (`EventManager.cpp:341-354`):** within a *single* `WaitAny` call -with a non-zero timeout, you may **not** mix events from multiple queues, nor a queue event -together with a non-queue event — it returns `WaitStatus::Error` ("Mixed source waits with -timeouts are not currently supported"). Note `mapAsync`/`onSubmittedWorkDone` are *queue* -events while `requestAdapter`/`requestDevice`/`createPipelineAsync`/`popErrorScope` are -*non-queue* events. -→ **Implication:** adopt the **per-future-per-thread** design (each pool thread waits on exactly -one future) — it is single-source and always legal. The plan's stated fallback ("single worker -waiting on the batched future set") is **not viable** as written, because batching mixed sources -hits this restriction. If a bounded pool is undesirable, the correct fallback is one -worker-thread *per future* (still single-source), not one worker for a batched set. - -### Spike 3 — Android frame callback → **GREEN (no JNI bridge needed)** -In `android/choreographer.h`, `AChoreographer_getInstance()` and -`AChoreographer_postFrameCallback()` are both `__INTRODUCED_IN(24)`; `minSdk` is **26**, so the -pure-NDK path works with no Java `Choreographer`/JNI bridge. -- `postFrameCallback` is `__DEPRECATED_IN(29)` in favor of `postFrameCallback64` (API 29) / - `postVsyncCallback` (API 33). Recommendation: call `postFrameCallback64` when - `android_get_device_api_level() >= 29`, else `postFrameCallback` (works on 26-28). Both are - acceptable; the 64-bit variant just avoids the deprecation warning and 32-bit time wrap. -- `AChoreographer_getInstance()` must be called on a thread with a `Looper` (the main/UI - thread) — `FrameDriver` already lives on the UI thread, so this is satisfied. - -### Net go/no-go -All three risks clear. Proceed to Phase 1. Two plan amendments: (1) Phase 3 must add the -worklets native build dependency (podspec + prefab); (2) `GpuEventLoop` must use -per-future-per-thread waits (drop the batched-future fallback). - -## Implementation phases (after Phase 0) - -**Phase 1 — Event-driven async** (no public API change; `present()` untouched) — **DONE** -- Add `RuntimeScheduler` (+ main-runtime CallInvoker impl) and `GpuEventLoop`. -- Switch all 7 async sites to `WaitAnyOnly` + `GpuEventLoop.addFuture(...)`: - `api/GPU.cpp`, `api/GPUAdapter.cpp`, `api/GPUDevice.cpp` (×3), `api/GPUBuffer.cpp`, - `api/GPUQueue.cpp`, `api/GPUShaderModule.cpp`. -- Delete `async/AsyncRunner.*` polling + `async/JSIMicrotaskDispatcher.*`; keep - `AsyncTaskHandle` / `Promise` settle path on the new scheduler. - -### Phase 1 — what shipped (branch `claude/keen-darwin-xeywa`) -New files (`cpp/rnwgpu/async/`): -- `RuntimeScheduler.h` — interface `scheduleOnJS(std::function)`, - callable from any thread. -- `CallInvokerScheduler.{h,cpp}` — main-runtime impl wrapping - `react::CallInvoker::invokeAsync(CallFunc&&)` (RN 0.81 delivers the job on the JS thread - with the runtime). -- `GpuEventLoop.{h,cpp}` — background `WaitAny` driver. Lazily-grown bounded worker pool - (cap = `clamp(hardware_concurrency, 2, 8)`); each worker does a single-future - `instance.WaitAny(future, UINT64_MAX)` (always a legal single-source wait, per Phase 0 - spike 2). Shared state held behind a `shared_ptr` so detached workers (and the - `wgpu::Instance` ref they need) outlive the object safely; teardown sets `running=false` - and notifies idle workers without joining in-flight GPU waits. - -Deviations from the original plan (intentional): -1. **`AsyncRunner` was replaced by `RuntimeContext`** (`async/RuntimeContext.{h,cpp}`), the - per-runtime coordinator the plan's Target-architecture §A already named. It bundles - `{RuntimeScheduler, GpuEventLoop}` and exposes `postTask`; all polling internals - (`tick`/`requestTick`/`ProcessEvents`/pump counters) are gone. `AsyncTaskHandle` depends - only on `RuntimeScheduler`. The old `AsyncRunner` name/files no longer exist anywhere - (the 6 `api/*` classes now hold `std::shared_ptr _async`); the dead - `GPU::getAsyncRunner()` accessor was deleted. -2. **`postTask`'s callback now returns a `wgpu::Future`** (the value returned by the Dawn - `WaitAnyOnly` call), which `AsyncRunner` hands to `GpuEventLoop.addFuture`. A returned - future with `id == 0` means "no event to wait on" and is ignored — used by - `GPUDevice::getLost` (resolved synchronously or later via `notifyDeviceLost`). This - replaced the old `keepPumping` bool argument, which is gone. - -`GPU`'s constructor now takes the `CallInvoker` (threaded through from `RNWebGPUManager`, -which already held it) to build the `CallInvokerScheduler`. `AsyncDispatcher.h` and -`JSIMicrotaskDispatcher.{h,cpp}` deleted; `android/CMakeLists.txt` updated (iOS podspec -globs `cpp/**` so it needs no change). - -Validation run locally: all changed + new TUs syntax-check under the Android NDK toolchain; -the full `react-native-wgpu` native lib **compiles and links** for `arm64-v8a` (ninja); -`cpplint` clean (project filters); `clang-format` (pinned 15.0.0) applied; `yarn tsc` passes -(no TS changed). On-device runtime behaviour (frame pacing, zero idle CPU) is Phase 4. - -**Phase 2 — Auto-present + remove `present()`** — **DONE** -- Add `FrameDriver` (iOS `CADisplayLink`, Android `AChoreographer`); wire - `getCurrentTexture` → register; vsync → dispatch present to owning runtime. -- Remove `GPUCanvasContext::present` (`api/GPUCanvasContext.h:50,58`, `.cpp:56-65`) and - `SurfaceInfo::present` (`SurfaceRegistry.h:116-121`). -- JS: drop `present` from `RNCanvasContext` (`src/Canvas.tsx:22-24`, `src/types.ts`). -- Migrate all 16 example / `useWebGPU` call sites + `README.md` + `packages/webgpu/README.md`. - -### Phase 2 — what shipped (branch `claude/keen-darwin-xeywa`) -New files: -- `cpp/rnwgpu/FrameDriver.{h,cpp}` — global vsync auto-present coordinator. `requestPresent` - (from `getCurrentTexture`, JS thread) coalesces per `contextId`; `onVSync` (UI thread) - dispatches each pending surface's present onto its owning runtime's `RuntimeScheduler` - (`surface->presentFrame()`). Request-driven: starts the platform vsync on first request, - stops after `kMaxIdleFrames` (3) idle frames → zero idle CPU. -- `apple/WebGPUFrameDriver.{h,mm}` — iOS/tvOS `CADisplayLink` on the main run loop (paused - toggled by start/stop). macOS uses `NSScreen.displayLinkWithTarget:` on 14+, else an - `NSTimer` fallback. Selector → `FrameDriver::onVSync()`. -- `android/.../com/webgpu/WebGPUFrameDriver.java` — main-thread `Choreographer` driver; - `doFrame` → static `nativeOnVSync()` JNI → `FrameDriver::onVSync()`, reposts while running. - -Wiring: -- `SurfaceInfo::present()` → `presentFrame()` (Apple `WaitForCommandsToBeScheduled` + Present, - no-op offscreen); added `SurfaceInfo::hasSurface()`. Metal extern moved to `SurfaceRegistry.h`. -- `GPU::getContext()` re-exposes the per-runtime `RuntimeContext` (so the canvas can reach its - scheduler). `GPUCanvasContext` stores `_contextId`, registers the present in - `getCurrentTexture` (and now sets the canvas client size there), and dropped `present()` + - its JS binding. -- iOS `WebGPUModule install` and Android `initializeNative` register `setPlatformVSync`. View - teardown (`MetalView dealloc`, Android `onSurfaceDestroy`) calls `FrameDriver::cancelPresent`. -- JS: `RNCanvasContext` is now just `GPUCanvasContext` (`src/Canvas.tsx`, `src/types.ts`); - removed the no-op `present` from `Offscreen.ts` and `WebPolyfillGPUModule.ts`. 18 example - call sites (the plan's 16 + `VisionCamera`, `ImportExternalTexture`) and both READMEs migrated. - -Decisions / deviations: -1. **Android vsync = Java `Choreographer` + JNI** (not pure NDK `AChoreographer`), chosen for - robustness — pure NDK needs a JNI hop to a Looper thread to bootstrap anyway. Confirmed with - the user. -2. **`present()` hard-removed** (breaking), confirmed with the user. -3. **Owning-runtime caveat (→ Phase 3):** `getCurrentTexture` currently dispatches present via - the **main** runtime's scheduler (`_gpu->getContext()`). Correct for main-JS rendering. The - Reanimated example renders on the **UI (worklet) runtime**, so its present is migrated (call - removed) but auto-present won't target the correct thread until Phase 3 tags the present with - the *calling* runtime and gives worklet runtimes their own `RuntimeScheduler`. Expect the - Reanimated/Dedicated examples to be visually broken between Phase 2 and Phase 3. - -Validation (local): `react-native-wgpu` native lib **compiles and links** for `arm64-v8a` -(ninja, CMake picked up `FrameDriver.cpp`); `cpplint` clean; `clang-format` applied; `yarn tsc` -and `yarn lint` pass for both `packages/webgpu` and `apps/example`. iOS `.mm` and the Java -driver are not compiled locally (no iOS/gradle build run here) — review-only; needs a device -build. On-device frame pacing / zero-idle-CPU verification is Phase 4. - -**Phase 3 — First-class worklet runtimes** — **DONE** -- Worklet-runtime `RuntimeScheduler` impl (per Spike 1); verify auto-present dispatch on UI + - dedicated runtimes; update `apps/example/src/Reanimated/Reanimated.tsx` (drop `present()`, - keep its own rAF loop). - -### Phase 3 — what shipped (branch `claude/keen-darwin-xeywa`) -Observed after Phase 2: the **UI-runtime** Reanimated example worked (the Reanimated UI runtime -executes on the **main thread**, so dispatching its present to the main runtime's scheduler -happened to land on the right thread), but the **dedicated `createWorkletRuntime`** example -(`Reanimated/DedicatedThread.tsx`, `runOnRuntime`) crashed — its render thread is its own, so a -main-thread present violated Dawn surface thread-affinity. - -**Decision (confirmed with the user): auto-present on the JS + UI runtimes, explicit -`ctx.present()` on dedicated worklet runtimes. No native worklets dependency.** Rather than link -`react-native-worklets` natively and dispatch via `WorkletRuntime::schedule` (the original plan / -Spike 1 primary), the FrameDriver covers the JS and UI runtimes; dedicated runtimes — which run -on their own thread with no safe scheduler/vsync hook — keep an explicit `present()`. (A -scheduler-free auto path for dedicated runtimes was prototyped but rejected — see below — because -it added one frame of latency and never presented a one-shot frame.) This needs no new native -build dependency and is fully buildable/validatable locally. - -Implementation: -- `GPUCanvasContext::getCurrentTexture` switched to the full-control HostFunction signature - (`jsi::Value(rt, thisVal, args, count)`, same pattern as `RNWebGPU::createImageBitmap`) so it - learns the **calling** runtime. Present routing: - - **Main runtime** (`RuntimeContext::get(runtime)` is non-null): unchanged — register with the - global vsync `FrameDriver` using that runtime's scheduler. - - **Reanimated UI runtime** (`globalThis.__RUNTIME_KIND === 2`, worklets' `RuntimeKind::UI`): - also auto-present via the FrameDriver + main scheduler. The UI runtime is reached correctly - by this path (Phase 2 confirmed it), so no `present()` is needed. - - **Dedicated worklet runtimes** (`RuntimeKind::Worker`, or any untagged/unknown worklet - runtime — e.g. Vision Camera frame processors): **explicit `ctx.present()`**, kept in the - public API for exactly this case. They run on their own thread with no safe scheduler/vsync - hook, so present is called synchronously by the author after `submit`, on that thread - (preserving Dawn surface thread-affinity). - - `ctx.present()` is a **no-op on the JS / UI runtime** (they auto-present), which makes it safe - to call from a worklet shared between the UI and a dedicated runtime (the example's - `webGPUDemo`). Runtime classification uses `RuntimeContext::get(rt)` (main) and the stable - worklets global `__RUNTIME_KIND` (`ReactNative=1`, `UI=2`, `Worker=3`); no worklets headers - are linked. - - Two scheduler-based approaches were tried and rejected before landing here: (1) - `queueMicrotask` is **disabled** on worklet runtimes (throws); (2) `setImmediate`/`setTimeout` - exist but route through the runtime's `EventLoop` `AsyncQueue`, which for **Vision Camera** is - a custom `NativeThreadAsyncQueue` that hops through JNI (`fbjni Environment::current()`) and - **crashes** when pushed from a non-JVM-attached thread. A scheduler-free - "present-on-next-acquire" fallback worked everywhere but added one frame of latency and never - presented a one-shot frame, so the explicit-`present()`-on-dedicated split was chosen instead. -- JS surface: `present()` re-added to `RNCanvasContext` (`src/Canvas.tsx`, `src/types.ts`, - documented dedicated-only) and as a no-op on `Offscreen.ts` / `WebPolyfillGPUModule.ts`. Native - `GPUCanvasContext::present` re-added (full-control signature; no-op on auto-presented runtimes). -- Examples: `present()` re-added to `Reanimated/Reanimated.tsx`'s shared `webGPUDemo` (no-op on - UIThread, real on DedicatedThread) and to `VisionCamera.tsx`'s frame processor. Both READMEs' - "Frame Scheduling" sections document the JS/UI-auto vs dedicated-manual split. - -Known limitation (out of scope, examples don't hit it): **async ops** (`mapAsync`, -`onSubmittedWorkDone`, …) invoked *on a worklet runtime* still settle their Promise via the -object's creation-runtime context (main), not the calling worklet runtime — the example worklets -only do synchronous rendering + present (device/adapter are created on the main runtime). Routing -async settlement to the calling runtime would need the same calling-runtime detection applied to -the 7 async sites; deferred until a use case needs it. - -Validation (local): native lib **compiles + links** for `arm64-v8a`; `cpplint` clean; -`clang-format` applied; `yarn tsc`/`yarn lint` unaffected (no JS changed). On-device -verification of the dedicated-worklet example is for the maintainer. - -**Phase 4 — `SurfaceRegistry` / surface-model rework** (proposed) -The `SurfaceInfo` / `SurfaceRegistry` model (`cpp/rnwgpu/SurfaceRegistry.h`) predates the -event-driven + auto-present work and is now the rough edge. Candidate improvements to scope: -- **Surface thread-affinity.** Surface lifecycle (`configure`/`switchToOnscreen`/ - `switchToOffscreen`/`resize`) runs on the **UI thread** (native view callbacks) while - `getCurrentTexture`/`presentFrame` run on the **owning runtime's render thread**. A single - `shared_mutex` serializes them but they're still cross-thread against a Dawn surface that - prefers single-thread access. Consider routing all surface ops through the owning runtime - (e.g. via the `RuntimeScheduler`), making affinity structural rather than lock-guarded. -- **State clarity.** The on-screen-`surface` vs offscreen-`texture` duality is encoded as - `if (surface) … else …` branches throughout; a small explicit state (Offscreen / Onscreen) - would remove the implicit coupling and the `switchToOnscreen` flush path's validation cost - (its existing `// TODO: faster way without validation?`). -- **Dead/again-evaluated fields.** e.g. the stored `wgpu::Instance gpu` member appears unused; - audit members now that present/`hasSurface` were added. -- **Lifetime vs `contextId`.** Registry keyed by a JS-side incrementing `int`; `FrameDriver` - now also keys pending presents by `contextId`. Confirm teardown ordering (view dealloc → - `cancelPresent` + `removeSurfaceInfo`) is race-free under the new threading. - -**Phase 5 — Validation** -```bash -yarn tsc && yarn lint -yarn workspace react-native-wgpu test # offscreen readback + demo specs -yarn build:ios # or: yarn workspace example ios -yarn build:android # or: yarn workspace example android -``` -Verify: no idle-CPU polling (logging), correct frame pacing, no present-ordering glitches, -Reanimated UI/Dedicated examples render. - ---- - -## 16 `present()` call sites to migrate (Phase 2) - -``` -apps/example/src/StorageBufferVertices/StorageBufferVertices.tsx -apps/example/src/components/useWebGPU.ts -apps/example/src/components/Texture.tsx -apps/example/src/SharedTextureMemory/SharedTextureMemory.tsx -apps/example/src/ThreeJS/Helmet.tsx -apps/example/src/ComputeToys/engine/index.ts -apps/example/src/CanvasAPI/CanvasAPI.tsx -apps/example/src/ThreeJS/PostProcessing.tsx -apps/example/src/ThreeJS/Cube.tsx -apps/example/src/Triangle/HelloTriangle.tsx -apps/example/src/Triangle/HelloTriangleMSAA.tsx -apps/example/src/ThreeJS/InstancedMesh.tsx -apps/example/src/ThreeJS/Retargeting.tsx -apps/example/src/ThreeJS/components/FiberCanvas.tsx -apps/example/src/Reanimated/Reanimated.tsx -apps/example/src/ThreeJS/Backdrop.tsx -``` -Plus `README.md` and `packages/webgpu/README.md`. - ---- - -## Risks / open questions -- **Worklet-runtime scheduler** access in worklets 0.8.3 (Spike 1 — highest risk). -- **Concurrent `WaitAny`** semantics on one Dawn instance (Spike 2; single-worker fallback ready). -- **Present timing**: vsync-dispatched-to-owning-loop must land after submit (FIFO on that loop) - and before the next `getCurrentTexture`. -- **Breaking change**: `present()` removed — type, examples, README updated together. -- **Apple Metal wait** moves into the frame-boundary present task, off the synchronous call path. - ---- - -## How to resume locally - -```bash -git fetch origin claude/keen-darwin-xeywa -git checkout claude/keen-darwin-xeywa -git pull origin claude/keen-darwin-xeywa -# open this file and run Phase 0 spikes, then start Claude Code: -# claude -# suggested kickoff prompt: -# "Read docs/refactor-async-present-plan.md. Run the Phase 0 spikes and report -# findings before implementing. Develop on this branch." -```