diff --git a/.github/workflows/samples-integration-test.yml b/.github/workflows/samples-integration-test.yml
index 7ca9192f2..9ed3f8a2e 100644
--- a/.github/workflows/samples-integration-test.yml
+++ b/.github/workflows/samples-integration-test.yml
@@ -36,48 +36,40 @@ jobs:
         with:
           python-version: '3.12'
 
-      - name: Configure pip for Azure Artifacts
+      - name: Configure pip for Azure Artifacts (ORT-Nightly feed for onnxruntime deps)
         run: |
           pip config set global.index-url https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/pypi/simple/
           pip config set global.extra-index-url https://pypi.org/simple/
           pip config set global.pre true
 
-      - name: Build and install SDK from source
-        working-directory: sdk/python
-        shell: pwsh
-        run: |
-          python -m pip install build
-          echo '__version__ = "0.0.0-dev"' > src/version.py
-          python -m build --wheel --outdir dist/
-          $wheel = (Get-ChildItem dist/*.whl | Select-Object -First 1).FullName
-          pip install $wheel
-
-      - name: Install sample dependencies
-        shell: pwsh
-        run: |
-          Get-ChildItem samples/python/*/requirements.txt -ErrorAction SilentlyContinue | ForEach-Object {
-            Write-Host "Installing dependencies for $($_.Directory.Name)..."
-            pip install -r $_.FullName
-          }
-
-      - name: Syntax check Python samples
+      # Samples consume the SDK from local source (tracking main) via an editable
+      # install declared in each sample's requirements.txt: `-e ../../../sdk/python`.
+      # We install from inside each sample directory so that relative path resolves.
+      - name: Install samples (SDK from local source) and syntax check
         shell: pwsh
         run: |
           $failed = @()
-          $samples = Get-ChildItem samples/python/*/src/app.py -ErrorAction SilentlyContinue
-          foreach ($sample in $samples) {
-            $name = $sample.Directory.Parent.Name
-            Write-Host "=== Checking: $name ==="
-            python -m py_compile $sample.FullName
+          $samples = Get-ChildItem samples/python -Directory -ErrorAction SilentlyContinue
+          foreach ($dir in $samples) {
+            $app = Join-Path $dir.FullName 'src/app.py'
+            if (-not (Test-Path $app)) { continue }
+            Write-Host "=== $($dir.Name) ==="
+            Push-Location $dir.FullName
+            if (Test-Path requirements.txt) {
+              pip install -r requirements.txt
+              if ($LASTEXITCODE -ne 0) { Write-Host "INSTALL FAILED: $($dir.Name)"; $failed += $dir.Name; Pop-Location; continue }
+            }
+            Pop-Location
+            python -m py_compile $app
             if ($LASTEXITCODE -ne 0) {
-              Write-Host "FAILED: $name"
-              $failed += $name
+              Write-Host "FAILED: $($dir.Name)"
+              $failed += $dir.Name
             } else {
-              Write-Host "OK: $name"
+              Write-Host "OK: $($dir.Name)"
             }
           }
           if ($failed.Count -gt 0) {
-            Write-Error "Failed syntax checks: $($failed -join ', ')"
+            Write-Error "Failed: $($failed -join ', ')"
             exit 1
           }
 
@@ -134,13 +126,14 @@ jobs:
         run: |
           npm install
           npm run build
-          npm link
 
-      - name: Syntax check JS samples
+      # Samples consume the SDK from local source (tracking main) via a
+      # `file:../../../sdk/js` dependency in each sample's package.json, so a plain
+      # `npm install` inside the sample resolves the locally built SDK (no npm link).
+      - name: Install and syntax check JS samples
         shell: pwsh
         run: |
           $failed = @()
-          # Find all sample app.js files (either in root or src/)
           $samples = @()
           $samples += Get-ChildItem samples/js/*/app.js -ErrorAction SilentlyContinue
           $samples += Get-ChildItem samples/js/*/src/app.js -ErrorAction SilentlyContinue
@@ -148,12 +141,9 @@ jobs:
             $dir = if ($sample.Directory.Name -eq 'src') { $sample.Directory.Parent } else { $sample.Directory }
             $name = $dir.Name
             Write-Host "=== Checking: $name ==="
-            # Link SDK and install dependencies
             Push-Location $dir.FullName
-            npm link foundry-local-sdk 2>$null
-            if (Test-Path "package.json") { npm install 2>$null }
+            if (Test-Path "package.json") { npm install }
             Pop-Location
-            # Syntax check
             node --check $sample.FullName 2>&1
             if ($LASTEXITCODE -ne 0) {
               Write-Host "FAILED: $name"
@@ -188,35 +178,13 @@ jobs:
             8.0.x
             10.0.x
 
-      - name: Build SDK from source
-        shell: pwsh
-        run: |
-          # Build cross-platform SDK package
-          # Note: /p:TreatWarningsAsErrors=false avoids failing on SDK doc warnings
-          dotnet pack sdk/cs/src/Microsoft.AI.Foundry.Local.csproj `
-            -o local-packages `
-            /p:Version=0.9.0-dev `
-            /p:IsPacking=true `
-            /p:TreatWarningsAsErrors=false `
-            --configuration Release
-          
-          # Build WinML SDK package (Windows only)
-          if ($IsWindows) {
-            dotnet pack sdk/cs/src/Microsoft.AI.Foundry.Local.csproj `
-              -o local-packages `
-              /p:Version=0.9.0-dev-20260324 `
-              /p:UseWinML=true `
-              /p:IsPacking=true `
-              /p:TreatWarningsAsErrors=false `
-              --configuration Release
-          }
-          
-          Write-Host "Local packages:"
-          Get-ChildItem local-packages/*.nupkg | ForEach-Object { Write-Host "  $($_.Name)" }
-
-      - name: Build C# samples
+      - name: Build C# samples (SDK via ProjectReference to sdk/cs source)
         shell: pwsh
         run: |
+          # Samples consume the SDK from local source (tracking main) via a
+          # ProjectReference to sdk/cs/src/Microsoft.AI.Foundry.Local.csproj.
+          # Building each sample compiles the SDK from source and restores its
+          # transitive Microsoft.AI.Foundry.Local.Core + third-party packages from nuget.org.
           $failed = @()
           $projects = Get-ChildItem samples/cs -Recurse -Filter "*.csproj"
           foreach ($proj in $projects) {
@@ -282,3 +250,14 @@ jobs:
       - name: Clippy check
         working-directory: samples/rust
         run: cargo clippy --workspace -- -D warnings
+
+  # ── C++ Samples ─────────────────────────────────────────────────────
+  # Intentionally not built here. The C++ samples consume the C++ SDK
+  # (sdk_v2/cpp), which is built only on dedicated build agents with vcpkg + ONNX
+  # Runtime provisioned (see .pipelines/) — there is no GitHub-hosted-runner recipe,
+  # and building it on every samples PR would be slow and flaky. The samples
+  # reference a *locally built* SDK unambiguously via cmake/FoundryLocalSDK.cmake,
+  # which fails fast with clear guidance if the SDK has not been built. Validate
+  # locally after `python sdk_v2/cpp/build.py`:
+  #   cmake -S samples/cpp/<sample> -B samples/cpp/<sample>/build
+  #   cmake --build samples/cpp/<sample>/build
diff --git a/README.md b/README.md
index 405f45d93..9a66bb4b0 100644
--- a/README.md
+++ b/README.md
@@ -156,18 +156,25 @@ await whisperModel.unload();
 ```
 
 > [!TIP]
-> A single `FoundryLocalManager` can manage both chat and audio models simultaneously. See the [chat-and-audio sample](samples/js/chat-and-audio-foundry-local/) for a complete example.
+> A single `FoundryLocalManager` can manage both chat and audio models simultaneously.
 
 ## 📦 Samples
 
-Explore complete working examples in the [`samples/`](samples/) folder:
+Explore complete working examples in the [`samples/`](samples/) folder. These samples
+track **`main`** and build the SDK from local source in this repo, so they always reflect
+the latest changes:
 
 | Language | Samples | Highlights |
 |----------|---------|------------|
-| [**C#**](samples/cs/) | 12 | Native chat, audio transcription, tool calling, model management, web server, tutorials |
-| [**JavaScript**](samples/js/) | 12 | Native chat, audio, Electron app, Copilot SDK, LangChain, tool calling, tutorials |
-| [**Python**](samples/python/) | 9 | Chat completions, audio transcription, LangChain, tool calling, tutorials |
-| [**Rust**](samples/rust/) | 8 | Native chat, audio transcription, tool calling, web server, tutorials |
+| [**C#**](samples/cs/) | 4 | Chat (native + web server), embeddings, audio (live + file), responses (vision) |
+| [**JavaScript**](samples/js/) | 4 | Chat (native + web server), embeddings, audio (live + file), responses (vision) |
+| [**Python**](samples/python/) | 4 | Chat (native + web server), embeddings, audio (live + file), responses (vision) |
+| [**Rust**](samples/rust/) | 4 | Chat (native + web server), embeddings, audio (live + file), responses (vision) |
+| [**C++**](samples/cpp/) | 4 | Chat (native + web server), embeddings, audio (live + file), responses (vision) |
+
+> [!TIP]
+> Looking for comprehensive, version-pinned samples (used across Microsoft Learn)? See
+> [microsoft-foundry/foundry-samples](https://github.com/microsoft-foundry/foundry-samples/).
 
 ## 🖥️ CLI
 
diff --git a/docs/README.md b/docs/README.md
index 5fa298a0f..b805d623e 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -21,9 +21,14 @@ Foundry Local is a unified local AI runtime that supports both **text generation
 
 ## Samples
 
-- [JavaScript: Native Chat Completions](../samples/js/native-chat-completions/) — Chat completions using the native SDK API
-- [JavaScript: Audio Transcription](../samples/js/audio-transcription-example/) — Speech-to-text with Whisper
-- [JavaScript: Chat + Audio](../samples/js/chat-and-audio-foundry-local/) — Unified chat and audio in one app
-- [JavaScript: Tool Calling](../samples/js/tool-calling-foundry-local/) — Function calling with local models
-- [JavaScript: Electron Chat App](../samples/js/electron-chat-application/) — Desktop chat application
-- [C#: Getting Started](../samples/cs/GettingStarted/) — C# SDK examples including audio transcription
+Working examples for each language live in the [`samples/`](../samples/) folder. Each
+language provides the same four samples, built from local SDK source (tracking `main`):
+
+- **chat-completion** — native inference plus the local OpenAI-compatible web server (`/v1/chat/completions`)
+- **embeddings** — text embeddings (single and batch)
+- **audio** — live microphone transcription (Nemotron) and file-based transcription (Whisper)
+- **responses-api** — vision via the local web server Responses API (`/v1/responses`)
+
+See [`samples/README.md`](../samples/README.md) for details. For comprehensive, version-pinned
+samples (used across Microsoft Learn), see
+[microsoft-foundry/foundry-samples](https://github.com/microsoft-foundry/foundry-samples/).
diff --git a/samples/README.md b/samples/README.md
index a9d680412..3aed00308 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -1,15 +1,61 @@
 # Foundry Local Samples
 
-Explore complete working examples that demonstrate how to use Foundry Local — an end-to-end local AI solution that runs entirely on-device. These samples cover chat completions, embeddings, audio transcription, tool calling, LangChain integration, and more.
+A small, focused set of working examples for [Foundry Local](https://learn.microsoft.com/azure/foundry-local/) — an end-to-end local AI solution that runs entirely on-device.
 
 > **New to Foundry Local?** Check out the [main README](../README.md) for an overview and quickstart, or visit the [Foundry Local documentation](https://learn.microsoft.com/azure/foundry-local/) on Microsoft Learn.
 
+## These samples track `main`
+
+Every sample here **consumes the SDK from local source in this repository** and therefore
+reflects the current state of `main` — they are intentionally **not pinned** to a published
+package version. Concretely:
+
+| Language | How the SDK is consumed | Built from |
+|----------|-------------------------|------------|
+| **C#** | `ProjectReference` to `sdk/cs/src/Microsoft.AI.Foundry.Local.csproj` | `sdk/cs` |
+| **JavaScript** | `file:` dependency on the local SDK (`foundry-local-sdk`) | `sdk/js` |
+| **Python** | editable install (`-e ../../../sdk/python`) in `requirements.txt` | `sdk/python` |
+| **Rust** | `path` dependency (`foundry-local-sdk = { path = "../../../sdk/rust" }`) | `sdk/rust` |
+| **C++** | links the locally built `foundry_local_cpp` library | `sdk_v2/cpp` |
+
+> Build the relevant SDK first (see each sample's README), then build/run the sample. Because
+> the samples reference local source, there is no version to bump — they always use the code
+> currently checked out.
+
+> **What "local source" means precisely:** the Foundry Local **SDK binding** always resolves to
+> the in-repo source above — never a published PyPI/npm/crates/NuGet release. Only the
+> third-party **native runtime** (ONNX Runtime / GenAI / Foundry Core native) is fetched from
+> public package feeds, exactly as each SDK itself obtains it.
+
+> **Hardware acceleration (WinML):** for simplicity and consistency, these samples use the
+> standard cross-platform SDK on **all** platforms (Windows, macOS, Linux). Windows hardware
+> acceleration via WinML is a capability of the SDK itself, not wired into these samples — see
+> the [main README](../README.md) quickstart to enable it in your own app.
+
+## Want version-pinned or comprehensive samples?
+
+For a broader catalog of samples pinned to specific released versions — including the examples
+referenced from **Microsoft Learn** content — see:
+
+> 👉 **[microsoft-foundry/foundry-samples](https://github.com/microsoft-foundry/foundry-samples/)**
+
+## What's included
+
+Each language provides the same four samples:
+
+| Sample | Description |
+|--------|-------------|
+| **chat** (`chat-completion`) | Runs a prompt through **native in-process inference**, then the **same prompt over the local web server** (OpenAI-compatible `/v1/chat/completions`). |
+| **embeddings** (`embeddings`) | Generates text embeddings (single and batch) using the native SDK. |
+| **audio** (`audio`) | **Live** microphone streaming transcription (Nemotron ASR) by default, plus **file-based** transcription (Whisper) via `--file <path>`. |
+| **responses** (`responses-api`) | Vision (image understanding) via the local web server **Responses API** (`/v1/responses`). |
+
 ## Samples by Language
 
-| Language | Samples | Description |
-|----------|---------|-------------|
-| [**C#**](cs/) | 14 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, vision via Responses API, tutorials, and WinML EP verification. Uses WinML on Windows for hardware acceleration. |
-| [**JavaScript**](js/) | 16 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, vision via Responses API, tutorials, and WinML EP verification. |
-| [**Python**](python/) | 14 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, Responses API, tutorials, and WinML EP verification. |
-| [**Rust**](rust/) | 12 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, vision via Responses API, tutorials, and WinML EP verification. |
-| [**C++**](cpp/) | 1 | C++ sample for live audio transcription. |
+| Language | Folder | Notes |
+|----------|--------|-------|
+| **C#** | [`cs/`](cs/) | .NET SDK. |
+| **JavaScript** | [`js/`](js/) | Node.js SDK. |
+| **Python** | [`python/`](python/) | Python SDK (OpenAI-compatible API for web-server samples). |
+| **Rust** | [`rust/`](rust/) | Rust SDK (Cargo workspace). |
+| **C++** | [`cpp/`](cpp/) | C++ SDK (`sdk_v2/cpp`); build the SDK with `python sdk_v2/cpp/build.py` first. |
diff --git a/samples/cpp/.gitignore b/samples/cpp/.gitignore
new file mode 100644
index 000000000..d26e46380
--- /dev/null
+++ b/samples/cpp/.gitignore
@@ -0,0 +1,2 @@
+# Sample build trees
+build/
diff --git a/samples/cpp/README.md b/samples/cpp/README.md
new file mode 100644
index 000000000..1aa4e02f2
--- /dev/null
+++ b/samples/cpp/README.md
@@ -0,0 +1,43 @@
+# Foundry Local — C++ Samples
+
+Self-contained C++ samples for the **`sdk_v2/cpp`** SDK (the C++ rewrite, public
+header `foundry_local/foundry_local_cpp.h`).
+
+These samples track **`main`**: they build against your **local** `sdk_v2/cpp`
+build, not a pinned SDK release.
+
+## Build the SDK first
+
+Every sample links the locally-built SDK shared library, so build it once:
+
+```bash
+python ../../sdk_v2/cpp/build.py
+```
+
+This produces `sdk_v2/cpp/build/<platform>/<config>/` (default config
+`RelWithDebInfo`). The shared `cmake/FoundryLocalSDK.cmake` module locates that
+output automatically; override it with `-DFOUNDRY_LOCAL_BUILD_CONFIG=...`,
+`-DFOUNDRY_LOCAL_SDK_DIR=...`, or `-DFOUNDRY_LOCAL_BUILD_DIR=...` if needed.
+
+## Samples
+
+| Sample                              | What it shows                                                                 |
+|-------------------------------------|-------------------------------------------------------------------------------|
+| [`chat-completion`](chat-completion)| One chat prompt, run natively in-process **and** over the local web server (`POST /v1/chat/completions`), including streaming. |
+| [`embeddings`](embeddings)          | Native single and batch text embeddings.                                       |
+| [`audio`](audio)                    | Streaming ASR transcription from live mic (optional PortAudio) or a WAV file.   |
+| [`responses-api`](responses-api)    | Vision / image understanding over the local web server (`POST /v1/responses`).  |
+
+## Build and run a sample
+
+Each sample is standalone:
+
+```bash
+cd chat-completion          # or embeddings, audio, responses-api
+cmake -S . -B build
+cmake --build build
+./build/<target>            # see the sample's README for the exact target/args
+```
+
+Shared, header-only helpers (`common/`) and the build-wiring module (`cmake/`) are
+reused across samples.
diff --git a/samples/cpp/audio/CMakeLists.txt b/samples/cpp/audio/CMakeLists.txt
new file mode 100644
index 000000000..b02f4ad53
--- /dev/null
+++ b/samples/cpp/audio/CMakeLists.txt
@@ -0,0 +1,50 @@
+# Copyright (c) Microsoft. All rights reserved.
+#
+# Standalone build for the Foundry Local C++ audio transcription sample.
+# Build the SDK first:  python ../../../sdk_v2/cpp/build.py
+# Then:                 cmake -S . -B build && cmake --build build
+#
+# PortAudio is OPTIONAL: when found, live microphone capture is enabled
+# (HAS_PORTAUDIO). Without it, the sample still builds and transcribes files /
+# synthetic audio.
+
+cmake_minimum_required(VERSION 3.20)
+project(foundry_local_audio_sample CXX)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Locate the locally-built SDK and define the foundry_local_cpp target.
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/FoundryLocalSDK.cmake)
+
+add_executable(audio main.cc)
+
+target_link_libraries(audio PRIVATE foundry_local_cpp)
+
+# Absolute path to this sample dir so the bundled Recording.wav is found
+# regardless of where the executable runs from.
+target_compile_definitions(audio PRIVATE SAMPLE_SOURCE_DIR="${CMAKE_CURRENT_LIST_DIR}")
+
+# --- Optional PortAudio for live microphone capture -------------------------
+find_package(PkgConfig QUIET)
+if(PkgConfig_FOUND)
+  pkg_check_modules(PORTAUDIO QUIET portaudio-2.0)
+endif()
+
+find_path(PORTAUDIO_INCLUDE_DIR portaudio.h
+    HINTS ${PORTAUDIO_INCLUDE_DIRS} /opt/homebrew/include /usr/local/include)
+find_library(PORTAUDIO_LIBRARY NAMES portaudio
+    HINTS ${PORTAUDIO_LIBRARY_DIRS} /opt/homebrew/lib /usr/local/lib)
+
+if(PORTAUDIO_INCLUDE_DIR AND PORTAUDIO_LIBRARY)
+  message(STATUS "PortAudio found — live microphone capture enabled")
+  target_compile_definitions(audio PRIVATE HAS_PORTAUDIO)
+  target_include_directories(audio PRIVATE ${PORTAUDIO_INCLUDE_DIR})
+  target_link_libraries(audio PRIVATE ${PORTAUDIO_LIBRARY})
+else()
+  message(STATUS "PortAudio not found — building file/synthetic-only (no live mic)")
+endif()
+
+# Bake in the rpath so the executable finds the SDK shared library at runtime.
+foundry_local_configure_sample(audio)
diff --git a/samples/cpp/audio/README.md b/samples/cpp/audio/README.md
new file mode 100644
index 000000000..dec0064b2
--- /dev/null
+++ b/samples/cpp/audio/README.md
@@ -0,0 +1,77 @@
+# Audio Transcription (C++)
+
+Transcribes audio with Foundry Local using two model paths — matching the
+other-language `audio` samples:
+
+- **Live microphone → Nemotron streaming ASR** (`nemotron-speech-streaming-en-0.6b`):
+  incremental, real-time transcription, the same flow as
+  `sdk_v2/cpp/examples/realtime_audio`.
+- **File → Whisper** (`whisper-tiny`): whole-file, non-streaming transcription.
+
+This sample tracks **`main`** — it builds against your **local** `sdk_v2/cpp` build,
+not a pinned SDK release.
+
+## What it does
+
+- **Live path** uses a streaming `AudioSession`: a `Request` carries a `pcm` format
+  descriptor plus an `ItemQueue`, a background producer pushes PCM chunks into the
+  queue, and a streaming callback prints transcribed text as it arrives.
+- **File path** uses a non-streaming `AudioSession`: a single `Item::AudioFromUri(path)`
+  drives Whisper, and the transcript is read from the response's text item. The SDK
+  reads and decodes the file, so no manual PCM handling is needed.
+
+### Modes
+
+| Invocation            | Model                | Source                                          |
+|-----------------------|----------------------|-------------------------------------------------|
+| *(default)*           | Nemotron (streaming) | Live microphone via PortAudio; falls back to Whisper transcription of the bundled WAV. |
+| `--file [path]`       | Whisper              | An audio file. With no path, uses the bundled `Recording.wav`. |
+| `--synth`             | Nemotron (streaming) | A generated 440 Hz sine tone (no mic, no file). |
+
+The Nemotron streaming model expects **16 kHz mono** PCM; `Recording.wav` already
+matches.
+
+### Live microphone is optional
+
+Live capture uses [PortAudio](http://www.portaudio.com/) and is enabled **only when
+PortAudio is found at configure time** (the build defines `HAS_PORTAUDIO` and links
+it). Without PortAudio the sample still builds and runs — the default mode falls back
+to Whisper transcription of the bundled WAV, and `--file` / `--synth` work as usual.
+
+Install PortAudio for live mic capture:
+
+```bash
+# macOS
+brew install portaudio
+# Debian/Ubuntu
+sudo apt-get install portaudio19-dev
+```
+
+## Prerequisites
+
+```bash
+python ../../../sdk_v2/cpp/build.py
+```
+
+## Build
+
+```bash
+cmake -S . -B build
+cmake --build build
+```
+
+Override the SDK config/location if needed:
+`-DFOUNDRY_LOCAL_BUILD_CONFIG=Debug`, `-DFOUNDRY_LOCAL_SDK_DIR=...`,
+`-DFOUNDRY_LOCAL_BUILD_DIR=...`.
+
+## Run
+
+```bash
+./build/audio              # live mic (Nemotron); falls back to Whisper file if no mic
+./build/audio --file       # bundled Recording.wav (Whisper)
+./build/audio --file /path/to/audio.wav
+./build/audio --synth      # generated sine tone (Nemotron streaming)
+```
+
+Press `Ctrl+C` to stop live capture gracefully.
+
diff --git a/samples/cpp/audio/Recording.wav b/samples/cpp/audio/Recording.wav
new file mode 100644
index 000000000..3b0b08a94
Binary files /dev/null and b/samples/cpp/audio/Recording.wav differ
diff --git a/samples/cpp/audio/main.cc b/samples/cpp/audio/main.cc
new file mode 100644
index 000000000..da3752b68
--- /dev/null
+++ b/samples/cpp/audio/main.cc
@@ -0,0 +1,402 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// Sample: Live / file audio transcription with the Foundry Local C++ SDK (sdk_v2/cpp).
+//
+// Two transcription paths, matching the other-language audio samples:
+//   * Live microphone  -> Nemotron streaming ASR (incremental, real-time).
+//   * File             -> Whisper (whole-file, non-streaming).
+//
+// The live path mirrors sdk_v2/cpp/examples/realtime_audio: a streaming AudioSession
+// receives PCM through an ItemQueue and emits incremental text via a streaming
+// callback. The file path submits a single AUDIO item (file URI) and reads the
+// transcript from the response.
+//
+// Modes:
+//   (default)      Live microphone capture via PortAudio (compile-time optional,
+//                  behind HAS_PORTAUDIO). Falls back to Whisper transcription of the
+//                  bundled WAV if PortAudio is unavailable.
+//   --file [path]  Transcribe an audio file with Whisper. With no path, uses the
+//                  bundled Recording.wav.
+//   --synth        Stream a generated 440 Hz sine tone through the Nemotron model.
+//
+// The Nemotron streaming model expects 16 kHz mono PCM.
+
+#include <foundry_local/foundry_local_cpp.h>
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cmath>
+#include <csignal>
+#include <cstdint>
+#include <deque>
+#include <filesystem>
+#include <functional>
+#include <iostream>
+#include <mutex>
+#include <stdexcept>
+#include <string>
+#include <thread>
+#include <vector>
+
+// PortAudio is optional: the CMake build defines HAS_PORTAUDIO and links the
+// library only when it is found, so the sample also builds without a mic stack.
+#ifdef HAS_PORTAUDIO
+#include <portaudio.h>
+#endif
+
+using namespace foundry_local;
+
+namespace {
+
+constexpr const char* kStreamingModel = "nemotron-speech-streaming-en-0.6b";  // live mic / synthetic PCM
+constexpr const char* kWhisperModel = "whisper-tiny";                          // file-based transcription
+constexpr int kSampleRate = 16000;
+constexpr int kChannels = 1;
+
+// Set to false by Ctrl+C to request a graceful stop of live capture.
+std::atomic<bool> g_running{true};
+
+void HandleSigint(int /*signum*/) {
+  g_running = false;
+}
+
+/// Resolve a catalog model by alias, download it if needed, and load it.
+std::unique_ptr<IModel> LoadModel(Manager& manager, const std::string& alias) {
+  auto model = manager.GetCatalog().GetModel(alias);
+  if (!model) {
+    throw std::runtime_error("Model '" + alias + "' not found in catalog.");
+  }
+
+  std::cout << "Using model: " << model->GetInfo().Name() << "\n";
+
+  if (!model->IsCached()) {
+    std::cout << "Downloading...\n";
+    model->Download([](float progress) -> int {
+      std::cout << "\r  " << static_cast<int>(progress) << "%" << std::flush;
+      return 0;  // return non-zero to cancel
+    });
+    std::cout << "\n";
+  }
+
+  if (!model->IsLoaded()) {
+    std::cout << "Loading model...\n";
+    model->Load();
+  }
+
+  return model;
+}
+
+/// A producer pushes PCM into the session's ItemQueue, then returns.
+/// RunSession marks the queue finished once the producer returns.
+using Producer = std::function<void(ItemQueue&)>;
+
+/// Drive an AudioSession: stream PCM from `produce` into the session and print
+/// transcribed text incrementally via the streaming callback.
+void RunSession(IModel& model, int sample_rate, int channels, const Producer& produce) {
+  AudioSession session(model);
+
+  // The streaming callback receives one item per invocation; print TEXT items as they arrive.
+  session.SetStreamingCallback([](flStreamingCallbackData event) -> int {
+    const auto* item_api = detail::item_api();
+
+    flItem* raw_item = nullptr;
+    if (item_api->ItemQueue_TryPop(event.item_queue, &raw_item)) {
+      Item item(*raw_item);
+      if (item.GetType() == FOUNDRY_LOCAL_ITEM_TEXT) {
+        std::cout << item.GetText().text << std::flush;
+      }
+    }
+
+    return 0;  // return non-zero to cancel
+  });
+
+  // Queue that carries streamed audio chunks. Added to the request without
+  // transferring ownership so the producer thread can keep pushing into it.
+  ItemQueue audio_input;
+
+  Request request;
+  request.AddItem(Item::AudioFromData("pcm", nullptr, 0, sample_rate, channels));  // format descriptor
+  request.AddItem(audio_input, /*take_ownership*/ false);
+
+  std::thread producer([&] {
+    try {
+      produce(audio_input);
+    } catch (const std::exception& ex) {
+      std::cerr << "\nAudio producer error: " << ex.what() << "\n";
+    }
+
+    audio_input.MarkFinished();
+  });
+
+  std::cout << "Transcription: ";
+  Response response = [&]() -> Response {
+    try {
+      return session.ProcessRequest(request);
+    } catch (...) {
+      // Signal + join the producer before propagating so it never outlives `audio_input`.
+      audio_input.MarkFinished();
+      if (producer.joinable()) {
+        producer.join();
+      }
+
+      throw;
+    }
+  }();
+  std::cout << "\n";
+
+  producer.join();
+
+  const flUsage usage = response.GetUsage();
+  std::cout << "Tokens — prompt: " << usage.prompt_tokens << ", completion: " << usage.completion_tokens
+            << ", total: " << usage.total_tokens << "\n";
+}
+
+/// Produce non-owning chunks that slice a long-lived PCM buffer (file / synthetic modes).
+/// `pcm` must outlive the RunSession call — BYTES items reference its storage, they do not copy.
+Producer StreamBuffer(const std::vector<uint8_t>& pcm) {
+  return [&pcm](ItemQueue& queue) {
+    constexpr size_t kChunkSize = 4096;
+    size_t offset = 0;
+    while (offset < pcm.size() && g_running) {
+      const size_t chunk_size = std::min(kChunkSize, pcm.size() - offset);
+      queue.Push(Item::Bytes(FOUNDRY_LOCAL_ITEM_BYTES, pcm.data() + offset, chunk_size));
+      offset += chunk_size;
+
+      // Pace the stream to roughly real time so the demo resembles live audio.
+      std::this_thread::sleep_for(std::chrono::milliseconds(125));
+    }
+  };
+}
+
+/// Generate `seconds` of 16-bit mono PCM for a sine tone at `frequency_hz`.
+std::vector<uint8_t> GenerateSinePcm(int sample_rate, int seconds, double frequency_hz) {
+  const auto total_samples = static_cast<size_t>(sample_rate) * static_cast<size_t>(seconds);
+  std::vector<uint8_t> pcm(total_samples * 2, 0);
+  for (size_t i = 0; i < total_samples; ++i) {
+    const double t = static_cast<double>(i) / sample_rate;
+    const auto sample = static_cast<int16_t>(0.5 * INT16_MAX * std::sin(2.0 * M_PI * frequency_hz * t));
+    const auto encoded = static_cast<uint16_t>(sample);
+    pcm[i * 2] = static_cast<uint8_t>(encoded & 0xFF);
+    pcm[i * 2 + 1] = static_cast<uint8_t>((encoded >> 8) & 0xFF);
+  }
+
+  return pcm;
+}
+
+#ifdef HAS_PORTAUDIO
+
+/// Bounded, thread-safe queue of captured PCM chunks (drops oldest on overflow).
+class CaptureQueue {
+ public:
+  void Push(std::vector<uint8_t> chunk) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (queue_.size() >= kMaxSize) {
+      queue_.pop_front();
+    }
+
+    queue_.push_back(std::move(chunk));
+  }
+
+  bool TryPop(std::vector<uint8_t>& out) {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (queue_.empty()) {
+      return false;
+    }
+
+    out = std::move(queue_.front());
+    queue_.pop_front();
+    return true;
+  }
+
+ private:
+  static constexpr size_t kMaxSize = 100;
+  std::deque<std::vector<uint8_t>> queue_;
+  std::mutex mutex_;
+};
+
+/// PortAudio callback: copy 16-bit mono PCM into the capture queue.
+int PaCapture(const void* input, void* /*output*/, unsigned long frame_count,
+              const PaStreamCallbackTimeInfo* /*time_info*/, PaStreamCallbackFlags /*flags*/, void* user_data) {
+  auto* queue = static_cast<CaptureQueue*>(user_data);
+  const auto* bytes = static_cast<const uint8_t*>(input);
+  if (bytes != nullptr) {
+    const size_t byte_count = static_cast<size_t>(frame_count) * 2;  // 16-bit mono
+    queue->Push(std::vector<uint8_t>(bytes, bytes + byte_count));
+  }
+
+  return g_running ? paContinue : paComplete;
+}
+
+/// Producer that captures live microphone PCM and streams it into the session.
+/// Returns false if the microphone could not be opened (so the caller can fall back).
+bool TryRunMic(IModel& model) {
+  if (Pa_Initialize() != paNoError) {
+    return false;
+  }
+
+  CaptureQueue capture;
+  PaStream* stream = nullptr;
+
+  PaStreamParameters input_params{};
+  input_params.device = Pa_GetDefaultInputDevice();
+  if (input_params.device == paNoDevice) {
+    Pa_Terminate();
+    return false;
+  }
+
+  input_params.channelCount = kChannels;
+  input_params.sampleFormat = paInt16;
+  input_params.suggestedLatency = Pa_GetDeviceInfo(input_params.device)->defaultLowInputLatency;
+  input_params.hostApiSpecificStreamInfo = nullptr;
+
+  PaError err = Pa_OpenStream(&stream, &input_params, nullptr, kSampleRate, 3200, paClipOff, PaCapture, &capture);
+  if (err == paNoError) {
+    err = Pa_StartStream(stream);
+  }
+
+  if (err != paNoError) {
+    if (stream != nullptr) {
+      Pa_CloseStream(stream);
+    }
+
+    Pa_Terminate();
+    return false;
+  }
+
+  std::cout << "\n=== LIVE TRANSCRIPTION ACTIVE — speak into your microphone (Ctrl+C to stop) ===\n";
+
+  // Each captured chunk is moved into an owning BYTES item: the item holds the
+  // buffer (via a deleter) for as long as the session needs it, so chunks can
+  // arrive dynamically without a single long-lived backing buffer.
+  RunSession(model, kSampleRate, kChannels, [&capture](ItemQueue& queue) {
+    while (g_running) {
+      std::vector<uint8_t> chunk;
+      if (capture.TryPop(chunk)) {
+        auto* held = new std::vector<uint8_t>(std::move(chunk));
+        queue.Push(Item::Bytes(FOUNDRY_LOCAL_ITEM_BYTES, held->data(), held->size(),
+                               [held](const flBytesData*) { delete held; }));
+      } else {
+        std::this_thread::sleep_for(std::chrono::milliseconds(10));
+      }
+    }
+  });
+
+  Pa_StopStream(stream);
+  Pa_CloseStream(stream);
+  Pa_Terminate();
+  return true;
+}
+
+#endif  // HAS_PORTAUDIO
+
+/// File-based transcription with the Whisper model (native, non-streaming).
+/// A single AUDIO item carrying the file URI drives transcription; the SDK reads
+/// and decodes the file and returns the full transcript as a TEXT item.
+void RunFile(Manager& manager, const std::string& path) {
+  std::cout << "\n=== FILE TRANSCRIPTION (Whisper) ===\n";
+  auto model = LoadModel(manager, kWhisperModel);
+
+  AudioSession session(*model);
+
+  Request request;
+  request.AddItem(Item::AudioFromUri(path));
+
+  std::cout << "Transcribing: " << path << "\n";
+  Response response = session.ProcessRequest(request);
+
+  std::cout << "Transcription: ";
+  for (const auto& item : response.GetItems()) {
+    if (item.GetType() == FOUNDRY_LOCAL_ITEM_TEXT) {
+      std::cout << item.GetText().text;
+    }
+  }
+  std::cout << "\n";
+
+  const flUsage usage = response.GetUsage();
+  std::cout << "Tokens — prompt: " << usage.prompt_tokens << ", completion: " << usage.completion_tokens
+            << ", total: " << usage.total_tokens << "\n";
+
+  model->Unload();
+}
+
+/// Stream a generated sine tone through the Nemotron streaming model.
+void RunSynth(Manager& manager) {
+  std::cout << "\n=== SYNTHETIC TONE (Nemotron streaming) ===\n";
+  auto model = LoadModel(manager, kStreamingModel);
+
+  std::cout << "Synthetic 440 Hz sine tone (2 s).\n";
+  const std::vector<uint8_t> pcm = GenerateSinePcm(kSampleRate, 2, 440.0);
+  RunSession(*model, kSampleRate, kChannels, StreamBuffer(pcm));
+
+  model->Unload();
+}
+
+struct Options {
+  bool use_file = false;
+  bool use_synth = false;
+  std::string file_path;
+};
+
+Options ParseArgs(int argc, char* argv[]) {
+  Options opts;
+  for (int i = 1; i < argc; ++i) {
+    const std::string arg = argv[i];
+    if (arg == "--file") {
+      opts.use_file = true;
+      if (i + 1 < argc && argv[i + 1][0] != '-') {
+        opts.file_path = argv[++i];
+      }
+    } else if (arg == "--synth") {
+      opts.use_synth = true;
+    }
+  }
+
+  return opts;
+}
+
+}  // namespace
+
+int main(int argc, char* argv[]) {
+  const Options opts = ParseArgs(argc, argv);
+  const std::string bundled_wav = (std::filesystem::path(SAMPLE_SOURCE_DIR) / "Recording.wav").string();
+
+  std::signal(SIGINT, HandleSigint);
+
+  try {
+    Configuration config("foundry_local_samples");
+    Manager manager(std::move(config));
+
+    // Mode selection: explicit --synth / --file win; otherwise try the live mic,
+    // falling back to Whisper transcription of the bundled WAV.
+    if (opts.use_synth) {
+      RunSynth(manager);
+    } else if (opts.use_file) {
+      RunFile(manager, opts.file_path.empty() ? bundled_wav : opts.file_path);
+    } else {
+#ifdef HAS_PORTAUDIO
+      auto model = LoadModel(manager, kStreamingModel);
+      const bool mic_ran = TryRunMic(*model);
+      model->Unload();
+
+      if (!mic_ran) {
+        std::cout << "Microphone unavailable — falling back to Whisper file transcription.\n";
+        RunFile(manager, bundled_wav);
+      }
+#else
+      std::cout << "Built without PortAudio — transcribing the bundled WAV with Whisper.\n";
+      std::cout << "(Pass --file <path> for another file, or --synth for a generated tone.)\n";
+      RunFile(manager, bundled_wav);
+#endif
+    }
+  } catch (const Error& ex) {
+    std::cerr << "Foundry Local error [" << ex.Code() << "]: " << ex.what() << "\n";
+    return 1;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error: " << ex.what() << "\n";
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/samples/cpp/chat-completion/CMakeLists.txt b/samples/cpp/chat-completion/CMakeLists.txt
new file mode 100644
index 000000000..b64b7221e
--- /dev/null
+++ b/samples/cpp/chat-completion/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright (c) Microsoft. All rights reserved.
+#
+# Standalone build for the Foundry Local C++ "chat-completion" sample.
+# Build the SDK first:  python ../../../sdk_v2/cpp/build.py
+# Then:                 cmake -S . -B build && cmake --build build
+
+cmake_minimum_required(VERSION 3.20)
+project(foundry_local_chat_completion_sample CXX)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Locate the locally-built SDK and define the foundry_local_cpp target.
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/FoundryLocalSDK.cmake)
+
+add_executable(chat_completion main.cc)
+
+# Shared sample helpers (the minimal localhost HTTP client) live in samples/cpp/common.
+target_include_directories(chat_completion PRIVATE ${CMAKE_CURRENT_LIST_DIR}/..)
+target_link_libraries(chat_completion PRIVATE foundry_local_cpp)
+
+# Bake in the rpath so the executable finds the SDK shared library at runtime.
+foundry_local_configure_sample(chat_completion)
diff --git a/samples/cpp/chat-completion/README.md b/samples/cpp/chat-completion/README.md
new file mode 100644
index 000000000..da27d7919
--- /dev/null
+++ b/samples/cpp/chat-completion/README.md
@@ -0,0 +1,55 @@
+# Chat Completion (C++)
+
+Runs the **same chat prompt** through the Foundry Local C++ SDK (`sdk_v2/cpp`) in
+three ways so you can see the two execution surfaces side by side:
+
+1. **Native, in-process (non-streaming)** — `ChatSession::ProcessRequest`.
+2. **Native, in-process (streaming)** — incremental tokens via a streaming callback.
+3. **Local web server** — host the embedded OpenAI-compatible service with
+   `AddWebServiceEndpoint` + `StartWebService`, then `POST /v1/chat/completions`
+   over loopback using a tiny built-in HTTP client (no third-party HTTP dependency).
+
+The same loaded model backs all three paths — the web service reuses the model the
+SDK already loaded in-process.
+
+This sample tracks **`main`** — it builds against your **local** `sdk_v2/cpp` build,
+not a pinned SDK release.
+
+## What it does
+
+1. Creates a `Manager` with an embedded web service endpoint
+   (`http://127.0.0.1:0` — an ephemeral port chosen by the OS).
+2. Resolves the `qwen2.5-0.5b` chat model, downloading + loading it if needed.
+3. Runs the prompt natively (non-streaming, then streaming).
+4. Starts the web service, discovers the bound URL via `GetWebServiceEndpoints()`,
+   and POSTs the same prompt to `/v1/chat/completions` (the request body is built
+   from typed structs serialized with `nlohmann/json`).
+
+> The web service resolves models by their full **variant id** (e.g.
+> `qwen2.5-0.5b-instruct-generic-cpu`), which the sample reads from
+> `ModelInfo::Id()` — not the short alias.
+
+## Prerequisites
+
+```bash
+python ../../../sdk_v2/cpp/build.py
+```
+
+## Build
+
+```bash
+cmake -S . -B build
+cmake --build build
+```
+
+Override the SDK config/location if needed:
+`-DFOUNDRY_LOCAL_BUILD_CONFIG=Debug`, `-DFOUNDRY_LOCAL_SDK_DIR=...`,
+`-DFOUNDRY_LOCAL_BUILD_DIR=...`.
+
+## Run
+
+```bash
+./build/chat_completion     # Windows: .\build\chat_completion.exe
+```
+
+The first run downloads the model; later runs use the cache.
diff --git a/samples/cpp/chat-completion/main.cc b/samples/cpp/chat-completion/main.cc
new file mode 100644
index 000000000..97efaf9b8
--- /dev/null
+++ b/samples/cpp/chat-completion/main.cc
@@ -0,0 +1,201 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// Sample: Chat completions with the Foundry Local C++ SDK (sdk_v2/cpp), shown two
+// ways with the *same* prompt so you can compare them:
+//
+//   1. Native, in-process inference via ChatSession (non-streaming + streaming).
+//   2. The embedded OpenAI-compatible web service: host it with
+//      AddWebServiceEndpoint + StartWebService, then POST /v1/chat/completions.
+//
+// Both paths use the same loaded model — the web service reuses the in-process
+// model the SDK already loaded.
+
+#include <foundry_local/foundry_local_cpp.h>
+
+#include <nlohmann/json.hpp>
+
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "common/local_http_client.h"
+
+using namespace foundry_local;
+using json = nlohmann::json;
+
+namespace {
+
+constexpr const char* kModelAlias = "qwen2.5-0.5b";
+constexpr const char* kPrompt = "What is the capital of France?";
+
+// ---------------------------------------------------------------------------
+// JSON contract types for POST /v1/chat/completions (OpenAI shape).
+// ---------------------------------------------------------------------------
+
+struct ChatMessage {
+  std::string role;
+  std::string content;
+};
+
+struct ChatCompletionRequest {
+  std::string model;
+  std::vector<ChatMessage> messages;
+  bool stream = false;
+};
+
+void to_json(json& j, const ChatMessage& m) {
+  j = json{{"role", m.role}, {"content", m.content}};
+}
+
+void to_json(json& j, const ChatCompletionRequest& r) {
+  j = json{{"model", r.model}, {"messages", r.messages}, {"stream", r.stream}};
+}
+
+// ---------------------------------------------------------------------------
+// Native, in-process inference.
+// ---------------------------------------------------------------------------
+
+void NativeNonStreaming(IModel& model) {
+  ChatSession session(model);
+
+  Request request{UserMessage(kPrompt)};
+  Response response = session.ProcessRequest(request);
+
+  for (const auto& item : response.GetItems()) {
+    if (item.GetType() == FOUNDRY_LOCAL_ITEM_MESSAGE) {
+      std::cout << "Assistant: " << item.GetMessage().GetSimpleText() << "\n";
+    }
+  }
+
+  const flUsage usage = response.GetUsage();
+  std::cout << "Tokens — prompt: " << usage.prompt_tokens << ", completion: " << usage.completion_tokens
+            << ", total: " << usage.total_tokens << "\n";
+}
+
+void NativeStreaming(IModel& model) {
+  ChatSession session(model);
+
+  // Each callback delivers exactly one item from the queue; we print TEXT items as they arrive.
+  session.SetStreamingCallback([](flStreamingCallbackData event) -> int {
+    const auto* item_api = detail::item_api();
+
+    flItem* raw_item = nullptr;
+    if (item_api->ItemQueue_TryPop(event.item_queue, &raw_item)) {
+      Item item(*raw_item);
+      if (item.GetType() == FOUNDRY_LOCAL_ITEM_TEXT) {
+        std::cout << item.GetText().text << std::flush;
+      }
+    }
+
+    return 0;  // return non-zero to cancel
+  });
+
+  Request request{UserMessage(kPrompt)};
+
+  std::cout << "Assistant: ";
+  session.ProcessRequest(request);
+  std::cout << "\n";
+}
+
+// ---------------------------------------------------------------------------
+// Web service inference: POST the same prompt to /v1/chat/completions.
+// ---------------------------------------------------------------------------
+
+void WebServiceChat(Manager& manager, IModel& model) {
+  manager.StartWebService();
+
+  const std::vector<std::string> endpoints = manager.GetWebServiceEndpoints();
+  if (endpoints.empty()) {
+    std::cerr << "Web service did not report any endpoints.\n";
+    return;
+  }
+
+  const sample::http::Url url = sample::http::ParseUrl(endpoints[0]);
+  std::cout << "Web service listening at " << endpoints[0] << "\n";
+
+  // The web service resolves models by their full variant id, not the alias.
+  ChatCompletionRequest request{.model = std::string(model.GetInfo().Id()),
+                                .messages = {{"user", kPrompt}},
+                                .stream = false};
+
+  const json body = request;
+  const sample::http::Response response =
+      sample::http::Post(url.host, url.port, "/v1/chat/completions", body.dump());
+
+  if (response.status != 200) {
+    std::cerr << "HTTP " << response.status << ": " << response.body << "\n";
+    manager.StopWebService();
+    return;
+  }
+
+  const json parsed = json::parse(response.body);
+  const std::string content = parsed["choices"][0]["message"].value("content", "");
+  std::cout << "Assistant: " << content << "\n";
+
+  if (parsed.contains("usage")) {
+    const auto& usage = parsed["usage"];
+    std::cout << "Tokens — prompt: " << usage.value("prompt_tokens", 0)
+              << ", completion: " << usage.value("completion_tokens", 0)
+              << ", total: " << usage.value("total_tokens", 0) << "\n";
+  }
+
+  manager.StopWebService();
+}
+
+}  // namespace
+
+int main() {
+  try {
+    // 1. Configure the SDK and request an embedded web service endpoint
+    //    (ephemeral port — the bound URL is reported by GetWebServiceEndpoints()).
+    Configuration config("foundry_local_samples");
+    config.AddWebServiceEndpoint("http://127.0.0.1:0");
+
+    Manager manager(std::move(config));
+
+    // 2. Resolve the chat model and prepare it.
+    auto& catalog = manager.GetCatalog();
+    auto model = catalog.GetModel(kModelAlias);
+    if (!model) {
+      std::cerr << "Model '" << kModelAlias << "' not found in catalog.\n";
+      return 1;
+    }
+
+    std::cout << "Using model: " << model->GetInfo().Name() << "\n";
+
+    if (!model->IsCached()) {
+      std::cout << "Downloading...\n";
+      model->Download([](float progress) -> int {
+        std::cout << "\r  " << static_cast<int>(progress) << "%" << std::flush;
+        return 0;  // return non-zero to cancel
+      });
+      std::cout << "\n";
+    }
+
+    if (!model->IsLoaded()) {
+      std::cout << "Loading model...\n";
+      model->Load();
+    }
+
+    // 3. Run the same prompt three ways.
+    std::cout << "\n=== Native in-process (non-streaming) ===\n";
+    NativeNonStreaming(*model);
+
+    std::cout << "\n=== Native in-process (streaming) ===\n";
+    NativeStreaming(*model);
+
+    std::cout << "\n=== Local web server (POST /v1/chat/completions) ===\n";
+    WebServiceChat(manager, *model);
+
+    model->Unload();
+  } catch (const Error& ex) {
+    std::cerr << "Foundry Local error [" << ex.Code() << "]: " << ex.what() << "\n";
+    return 1;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error: " << ex.what() << "\n";
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/samples/cpp/cmake/FoundryLocalSDK.cmake b/samples/cpp/cmake/FoundryLocalSDK.cmake
new file mode 100644
index 000000000..5cb70dcef
--- /dev/null
+++ b/samples/cpp/cmake/FoundryLocalSDK.cmake
@@ -0,0 +1,139 @@
+# Copyright (c) Microsoft. All rights reserved.
+#
+# FoundryLocalSDK.cmake — wires a sample against a *locally built* copy of the
+# Foundry Local C++ SDK (sdk_v2/cpp).
+#
+# The SDK does not install/export a CMake package, and re-building it via
+# add_subdirectory() would require the full vcpkg toolchain. Instead, after a
+# user runs `python sdk_v2/cpp/build.py`, this module references that build tree
+# directly: it picks up the public headers from sdk_v2/cpp/include, the bundled
+# third-party headers vcpkg produced (gsl/span — required by the C++ wrapper —
+# and nlohmann/json, used by the web-service samples), and the built shared
+# library. It then defines an INTERFACE target `foundry_local_cpp` so each
+# sample links it exactly like the in-tree SDK examples do.
+#
+# Override points (cache variables):
+#   FOUNDRY_LOCAL_SDK_DIR       Path to sdk_v2/cpp        (default: repo layout)
+#   FOUNDRY_LOCAL_BUILD_CONFIG  SDK build config          (default: RelWithDebInfo)
+#   FOUNDRY_LOCAL_BUILD_DIR     SDK build output dir      (default: derived)
+
+if(TARGET foundry_local_cpp)
+  return()
+endif()
+
+# --- Locate the SDK source tree ---------------------------------------------
+get_filename_component(_fl_default_sdk_dir "${CMAKE_CURRENT_LIST_DIR}/../../../sdk_v2/cpp" ABSOLUTE)
+set(FOUNDRY_LOCAL_SDK_DIR "${_fl_default_sdk_dir}" CACHE PATH "Path to the sdk_v2/cpp source tree")
+
+set(_fl_include_dir "${FOUNDRY_LOCAL_SDK_DIR}/include")
+if(NOT EXISTS "${_fl_include_dir}/foundry_local/foundry_local_cpp.h")
+  message(FATAL_ERROR
+    "Foundry Local public header not found under '${_fl_include_dir}'.\n"
+    "Set -DFOUNDRY_LOCAL_SDK_DIR=<path-to>/sdk_v2/cpp.")
+endif()
+
+# --- Derive the build output directory (mirrors build.py's layout) ----------
+# build.py writes to build/<Windows|Linux|macOS>/<Config>.
+if(WIN32)
+  set(_fl_platform "Windows")
+elseif(APPLE)
+  set(_fl_platform "macOS")
+else()
+  set(_fl_platform "Linux")
+endif()
+
+set(FOUNDRY_LOCAL_BUILD_CONFIG "RelWithDebInfo"
+    CACHE STRING "SDK build configuration produced by build.py (Debug/Release/RelWithDebInfo/MinSizeRel)")
+set(FOUNDRY_LOCAL_BUILD_DIR "${FOUNDRY_LOCAL_SDK_DIR}/build/${_fl_platform}/${FOUNDRY_LOCAL_BUILD_CONFIG}"
+    CACHE PATH "SDK build output directory")
+
+if(NOT EXISTS "${FOUNDRY_LOCAL_BUILD_DIR}")
+  message(FATAL_ERROR
+    "SDK build directory '${FOUNDRY_LOCAL_BUILD_DIR}' does not exist.\n"
+    "Build the SDK first:  python ${FOUNDRY_LOCAL_SDK_DIR}/build.py --config ${FOUNDRY_LOCAL_BUILD_CONFIG}\n"
+    "Or point -DFOUNDRY_LOCAL_BUILD_DIR=<your build dir>.")
+endif()
+
+# --- Bundled third-party headers (gsl, nlohmann/json) -----------------------
+# The C++ wrapper includes <gsl/span>, so every TU that includes it needs the
+# GSL headers. vcpkg dropped them under build/.../vcpkg_installed/<triplet>/include.
+file(GLOB _fl_vcpkg_includes "${FOUNDRY_LOCAL_BUILD_DIR}/vcpkg_installed/*/include")
+set(_fl_thirdparty_include "")
+foreach(_inc ${_fl_vcpkg_includes})
+  if(EXISTS "${_inc}/gsl/span")
+    set(_fl_thirdparty_include "${_inc}")
+    break()
+  endif()
+endforeach()
+
+if(_fl_thirdparty_include STREQUAL "")
+  message(FATAL_ERROR
+    "Could not find the bundled GSL headers (gsl/span) under "
+    "'${FOUNDRY_LOCAL_BUILD_DIR}/vcpkg_installed/*/include'.\n"
+    "Re-run the SDK build:  python ${FOUNDRY_LOCAL_SDK_DIR}/build.py")
+endif()
+
+# --- Locate the shared library ----------------------------------------------
+# Unix single-config: build/.../bin. Windows multi-config: build/.../bin/<Config>.
+set(_fl_bin_candidates
+    "${FOUNDRY_LOCAL_BUILD_DIR}/bin"
+    "${FOUNDRY_LOCAL_BUILD_DIR}/bin/${FOUNDRY_LOCAL_BUILD_CONFIG}")
+
+find_library(FOUNDRY_LOCAL_LINK_LIB
+    NAMES foundry_local
+    PATHS ${_fl_bin_candidates} "${FOUNDRY_LOCAL_BUILD_DIR}/${FOUNDRY_LOCAL_BUILD_CONFIG}"
+    NO_DEFAULT_PATH)
+
+if(NOT FOUNDRY_LOCAL_LINK_LIB)
+  message(FATAL_ERROR
+    "Could not find the foundry_local library under '${FOUNDRY_LOCAL_BUILD_DIR}'.\n"
+    "Build the SDK first:  python ${FOUNDRY_LOCAL_SDK_DIR}/build.py --config ${FOUNDRY_LOCAL_BUILD_CONFIG}")
+endif()
+
+# Runtime directory that holds the shared library + co-located ORT/GenAI libs.
+# On Windows the import .lib may sit elsewhere, so locate the .dll explicitly.
+if(WIN32)
+  find_file(FOUNDRY_LOCAL_DLL
+      NAMES foundry_local.dll
+      PATHS ${_fl_bin_candidates}
+      NO_DEFAULT_PATH)
+  if(FOUNDRY_LOCAL_DLL)
+    get_filename_component(FOUNDRY_LOCAL_BIN_DIR "${FOUNDRY_LOCAL_DLL}" DIRECTORY)
+  else()
+    set(FOUNDRY_LOCAL_BIN_DIR "${FOUNDRY_LOCAL_BUILD_DIR}/bin/${FOUNDRY_LOCAL_BUILD_CONFIG}")
+  endif()
+else()
+  get_filename_component(FOUNDRY_LOCAL_BIN_DIR "${FOUNDRY_LOCAL_LINK_LIB}" DIRECTORY)
+endif()
+
+# --- The consumable INTERFACE target ----------------------------------------
+add_library(foundry_local_cpp INTERFACE)
+target_include_directories(foundry_local_cpp INTERFACE
+    "${_fl_include_dir}"
+    "${_fl_thirdparty_include}")
+target_link_libraries(foundry_local_cpp INTERFACE "${FOUNDRY_LOCAL_LINK_LIB}")
+target_compile_features(foundry_local_cpp INTERFACE cxx_std_20)
+
+message(STATUS "Foundry Local SDK library: ${FOUNDRY_LOCAL_LINK_LIB}")
+message(STATUS "Foundry Local SDK headers: ${_fl_include_dir}")
+
+# --- Per-target finalizer: make the executable find the shared lib at runtime.
+# The shared library bakes in @loader_path/$ORIGIN, so co-located ORT/GenAI libs
+# resolve automatically once the executable can find libfoundry_local itself.
+function(foundry_local_configure_sample _target)
+  if(APPLE)
+    set_target_properties(${_target} PROPERTIES BUILD_RPATH "${FOUNDRY_LOCAL_BIN_DIR}")
+  elseif(UNIX)
+    # --disable-new-dtags forces RPATH (not RUNPATH) so it propagates to GenAI's
+    # internal dlopen("libonnxruntime.so") — same treatment as the SDK examples.
+    set_target_properties(${_target} PROPERTIES BUILD_RPATH "${FOUNDRY_LOCAL_BIN_DIR}")
+    target_link_options(${_target} PRIVATE -Wl,--disable-new-dtags)
+  elseif(WIN32)
+    # Windows has no rpath: copy every runtime DLL next to the executable.
+    file(GLOB _fl_runtime_dlls "${FOUNDRY_LOCAL_BIN_DIR}/*.dll")
+    foreach(_dll ${_fl_runtime_dlls})
+      add_custom_command(TARGET ${_target} POST_BUILD
+          COMMAND ${CMAKE_COMMAND} -E copy_if_different "${_dll}" "$<TARGET_FILE_DIR:${_target}>")
+    endforeach()
+  endif()
+endfunction()
diff --git a/samples/cpp/common/base64.h b/samples/cpp/common/base64.h
new file mode 100644
index 000000000..c59b1d1d2
--- /dev/null
+++ b/samples/cpp/common/base64.h
@@ -0,0 +1,50 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// Minimal standard Base64 encoder used to embed image bytes in a Responses API
+// `data:` URL. Header-only and dependency-free.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace sample {
+
+/// Encode raw bytes as standard (RFC 4648) Base64 with '=' padding.
+inline std::string Base64Encode(const uint8_t* data, size_t size) {
+  static constexpr char kChars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+  std::string out;
+  out.reserve(((size + 2) / 3) * 4);
+
+  size_t i = 0;
+  while (i < size) {
+    const uint32_t octet_a = i < size ? data[i++] : 0;
+    const uint32_t octet_b = i < size ? data[i++] : 0;
+    const uint32_t octet_c = i < size ? data[i++] : 0;
+    const uint32_t triple = (octet_a << 16) | (octet_b << 8) | octet_c;
+
+    out.push_back(kChars[(triple >> 18) & 0x3F]);
+    out.push_back(kChars[(triple >> 12) & 0x3F]);
+    out.push_back(kChars[(triple >> 6) & 0x3F]);
+    out.push_back(kChars[triple & 0x3F]);
+  }
+
+  // Apply '=' padding for the trailing partial group.
+  if (const size_t mod = size % 3; mod == 1) {
+    out[out.size() - 2] = '=';
+    out[out.size() - 1] = '=';
+  } else if (mod == 2) {
+    out[out.size() - 1] = '=';
+  }
+
+  return out;
+}
+
+inline std::string Base64Encode(const std::vector<uint8_t>& data) {
+  return Base64Encode(data.data(), data.size());
+}
+
+}  // namespace sample
diff --git a/samples/cpp/common/local_http_client.h b/samples/cpp/common/local_http_client.h
new file mode 100644
index 000000000..2e2a5decc
--- /dev/null
+++ b/samples/cpp/common/local_http_client.h
@@ -0,0 +1,268 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// Minimal, dependency-free HTTP/1.1 client for talking to the Foundry Local
+// embedded web service on localhost. It is intentionally tiny: a single blocking
+// POST that returns the full response. The Foundry Local web service is reached
+// over loopback, so we don't need TLS, redirects, proxies, or keep-alive — we
+// send `Connection: close` and read the body until the server closes the socket.
+//
+// Header-only so every sample can include it without extra build wiring.
+
+#pragma once
+
+#include <cctype>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#pragma comment(lib, "ws2_32.lib")
+#else
+#include <netdb.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+namespace sample::http {
+
+/// Parsed components of an "http://host:port/path" URL.
+struct Url {
+  std::string host;
+  std::string port = "80";
+  std::string path = "/";
+};
+
+/// Parse "http://127.0.0.1:5273/v1" into {host, port, path}. Only the http scheme is supported.
+inline Url ParseUrl(const std::string& url) {
+  std::string rest = url;
+
+  if (const auto scheme = rest.find("://"); scheme != std::string::npos) {
+    rest = rest.substr(scheme + 3);
+  }
+
+  Url out;
+  std::string host_port = rest;
+  if (const auto slash = rest.find('/'); slash != std::string::npos) {
+    host_port = rest.substr(0, slash);
+    out.path = rest.substr(slash);
+  }
+
+  out.host = host_port;
+  if (const auto colon = host_port.rfind(':'); colon != std::string::npos) {
+    out.host = host_port.substr(0, colon);
+    out.port = host_port.substr(colon + 1);
+  }
+
+  return out;
+}
+
+/// Result of an HTTP request: numeric status code and the raw response body.
+struct Response {
+  int status = 0;
+  std::string body;
+};
+
+namespace detail {
+
+#ifdef _WIN32
+using socket_t = SOCKET;
+constexpr socket_t kInvalidSocket = INVALID_SOCKET;
+
+inline void CloseSocket(socket_t s) { ::closesocket(s); }
+
+/// Initialize Winsock once per process via a function-local static.
+inline void EnsureWinsock() {
+  static const bool ok = [] {
+    WSADATA data;
+    return ::WSAStartup(MAKEWORD(2, 2), &data) == 0;
+  }();
+
+  if (!ok) {
+    throw std::runtime_error("WSAStartup failed");
+  }
+}
+#else
+using socket_t = int;
+constexpr socket_t kInvalidSocket = -1;
+
+inline void CloseSocket(socket_t s) { ::close(s); }
+inline void EnsureWinsock() {}
+#endif
+
+/// RAII wrapper so the socket is always closed, even on exceptions.
+class Socket {
+ public:
+  explicit Socket(socket_t fd) : fd_(fd) {}
+  ~Socket() {
+    if (fd_ != kInvalidSocket) {
+      CloseSocket(fd_);
+    }
+  }
+
+  Socket(const Socket&) = delete;
+  Socket& operator=(const Socket&) = delete;
+
+  socket_t get() const noexcept { return fd_; }
+
+ private:
+  socket_t fd_;
+};
+
+/// Open a TCP connection to host:port, returning a connected socket.
+inline socket_t Connect(const std::string& host, const std::string& port) {
+  EnsureWinsock();
+
+  addrinfo hints{};
+  hints.ai_family = AF_UNSPEC;
+  hints.ai_socktype = SOCK_STREAM;
+
+  addrinfo* result = nullptr;
+  if (::getaddrinfo(host.c_str(), port.c_str(), &hints, &result) != 0 || result == nullptr) {
+    throw std::runtime_error("Failed to resolve " + host + ":" + port);
+  }
+
+  socket_t fd = kInvalidSocket;
+  for (addrinfo* ai = result; ai != nullptr; ai = ai->ai_next) {
+    fd = ::socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+    if (fd == kInvalidSocket) {
+      continue;
+    }
+
+    if (::connect(fd, ai->ai_addr, static_cast<int>(ai->ai_addrlen)) == 0) {
+      break;
+    }
+
+    CloseSocket(fd);
+    fd = kInvalidSocket;
+  }
+
+  ::freeaddrinfo(result);
+
+  if (fd == kInvalidSocket) {
+    throw std::runtime_error("Failed to connect to " + host + ":" + port);
+  }
+
+  return fd;
+}
+
+/// Send the entire buffer, looping until every byte is written.
+inline void SendAll(socket_t fd, const std::string& data) {
+  size_t sent = 0;
+  while (sent < data.size()) {
+    const auto n = ::send(fd, data.data() + sent, static_cast<int>(data.size() - sent), 0);
+    if (n <= 0) {
+      throw std::runtime_error("Socket send failed");
+    }
+
+    sent += static_cast<size_t>(n);
+  }
+}
+
+/// Read the full response until the peer closes the connection.
+inline std::string ReadAll(socket_t fd) {
+  std::string out;
+  char buffer[8192];
+  while (true) {
+    const auto n = ::recv(fd, buffer, sizeof(buffer), 0);
+    if (n < 0) {
+      throw std::runtime_error("Socket recv failed");
+    }
+
+    if (n == 0) {
+      break;  // peer closed
+    }
+
+    out.append(buffer, static_cast<size_t>(n));
+  }
+
+  return out;
+}
+
+/// Split a raw HTTP response into status code + body, decoding chunked bodies.
+inline Response ParseHttpResponse(const std::string& raw) {
+  Response resp;
+
+  const auto header_end = raw.find("\r\n\r\n");
+  if (header_end == std::string::npos) {
+    throw std::runtime_error("Malformed HTTP response (no header terminator)");
+  }
+
+  const std::string headers = raw.substr(0, header_end);
+  std::string body = raw.substr(header_end + 4);
+
+  // Status line: "HTTP/1.1 200 OK"
+  if (const auto sp = headers.find(' '); sp != std::string::npos) {
+    resp.status = std::atoi(headers.c_str() + sp + 1);
+  }
+
+  // Decode Transfer-Encoding: chunked if present (the web service uses it for some responses).
+  std::string lower_headers = headers;
+  for (char& c : lower_headers) {
+    c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
+  }
+
+  if (lower_headers.find("transfer-encoding: chunked") != std::string::npos) {
+    std::string decoded;
+    size_t pos = 0;
+    while (pos < body.size()) {
+      const auto line_end = body.find("\r\n", pos);
+      if (line_end == std::string::npos) {
+        break;
+      }
+
+      const size_t chunk_size = std::strtoul(body.substr(pos, line_end - pos).c_str(), nullptr, 16);
+      if (chunk_size == 0) {
+        break;
+      }
+
+      const size_t data_start = line_end + 2;
+      if (data_start + chunk_size > body.size()) {
+        break;
+      }
+
+      decoded.append(body, data_start, chunk_size);
+      pos = data_start + chunk_size + 2;  // skip chunk data + trailing CRLF
+    }
+
+    body = std::move(decoded);
+  }
+
+  resp.body = std::move(body);
+  return resp;
+}
+
+}  // namespace detail
+
+/// POST a body to host:port/path and return the full response.
+/// `extra_headers` entries are sent verbatim (e.g. {"Accept: application/json"}).
+inline Response Post(const std::string& host, const std::string& port, const std::string& path,
+                     const std::string& body, const std::string& content_type = "application/json",
+                     const std::vector<std::string>& extra_headers = {}) {
+  detail::Socket sock(detail::Connect(host, port));
+
+  std::string request;
+  request += "POST " + path + " HTTP/1.1\r\n";
+  request += "Host: " + host + ":" + port + "\r\n";
+  request += "Content-Type: " + content_type + "\r\n";
+  request += "Content-Length: " + std::to_string(body.size()) + "\r\n";
+  request += "Connection: close\r\n";
+  for (const auto& header : extra_headers) {
+    request += header + "\r\n";
+  }
+
+  request += "\r\n";
+  request += body;
+
+  detail::SendAll(sock.get(), request);
+
+  return detail::ParseHttpResponse(detail::ReadAll(sock.get()));
+}
+
+}  // namespace sample::http
diff --git a/samples/cpp/embeddings/CMakeLists.txt b/samples/cpp/embeddings/CMakeLists.txt
new file mode 100644
index 000000000..8c2618358
--- /dev/null
+++ b/samples/cpp/embeddings/CMakeLists.txt
@@ -0,0 +1,21 @@
+# Copyright (c) Microsoft. All rights reserved.
+#
+# Standalone build for the Foundry Local C++ "embeddings" sample.
+# Build the SDK first:  python ../../../sdk_v2/cpp/build.py
+# Then:                 cmake -S . -B build && cmake --build build
+
+cmake_minimum_required(VERSION 3.20)
+project(foundry_local_embeddings_sample CXX)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Locate the locally-built SDK and define the foundry_local_cpp target.
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/FoundryLocalSDK.cmake)
+
+add_executable(embeddings main.cc)
+target_link_libraries(embeddings PRIVATE foundry_local_cpp)
+
+# Bake in the rpath so the executable finds the SDK shared library at runtime.
+foundry_local_configure_sample(embeddings)
diff --git a/samples/cpp/embeddings/README.md b/samples/cpp/embeddings/README.md
new file mode 100644
index 000000000..12462139b
--- /dev/null
+++ b/samples/cpp/embeddings/README.md
@@ -0,0 +1,55 @@
+# Embeddings (C++)
+
+Generates text embeddings **natively, in-process** with the Foundry Local C++ SDK
+(`sdk_v2/cpp`) — no web server involved. It embeds a single sentence, then a batch
+of sentences, and prints the cosine similarity between every pair so you can see
+that semantically related sentences score higher.
+
+This sample tracks **`main`** — it builds against your **local** `sdk_v2/cpp` build,
+not a pinned SDK release.
+
+## What it does
+
+1. Creates a `Manager` and finds the first `embeddings` model in the catalog
+   (e.g. `qwen3-embedding-0.6b`).
+2. Downloads the model if it isn't cached, then loads it.
+3. Uses an `EmbeddingsSession` to:
+   - embed a single string and print its dimensionality + first few values;
+   - embed a batch of strings and print pairwise cosine similarities.
+
+## Prerequisites
+
+Build the SDK once so the shared library and headers exist:
+
+```bash
+python ../../../sdk_v2/cpp/build.py
+```
+
+This produces `sdk_v2/cpp/build/<macOS|Linux|Windows>/RelWithDebInfo/`, which the
+sample's CMake locates automatically.
+
+## Build
+
+```bash
+cmake -S . -B build
+cmake --build build
+```
+
+If you built the SDK with a different configuration, pass it through:
+
+```bash
+cmake -S . -B build -DFOUNDRY_LOCAL_BUILD_CONFIG=Debug
+```
+
+You can also point at a non-default SDK location with
+`-DFOUNDRY_LOCAL_SDK_DIR=<path-to>/sdk_v2/cpp` or
+`-DFOUNDRY_LOCAL_BUILD_DIR=<path-to-build-output>`.
+
+## Run
+
+```bash
+./build/embeddings          # Windows: .\build\embeddings.exe
+```
+
+The first run downloads the embeddings model (a few hundred MB); subsequent runs
+use the cache.
diff --git a/samples/cpp/embeddings/main.cc b/samples/cpp/embeddings/main.cc
new file mode 100644
index 000000000..c4d5bdc02
--- /dev/null
+++ b/samples/cpp/embeddings/main.cc
@@ -0,0 +1,136 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// Sample: Text embeddings with the Foundry Local C++ SDK (sdk_v2/cpp).
+// Demonstrates native, in-process embedding generation for a single input and a
+// batch, then computes cosine similarity between the batch vectors.
+
+#include <foundry_local/foundry_local_cpp.h>
+
+#include <cmath>
+#include <iostream>
+#include <string>
+#include <vector>
+
+using namespace foundry_local;
+
+namespace {
+
+/// Cosine similarity between two equal-length vectors. Returns 0 if either is a zero vector.
+float CosineSimilarity(const std::vector<float>& a, const std::vector<float>& b) {
+  float dot = 0.0f;
+  float norm_a = 0.0f;
+  float norm_b = 0.0f;
+  for (size_t i = 0; i < a.size() && i < b.size(); ++i) {
+    dot += a[i] * b[i];
+    norm_a += a[i] * a[i];
+    norm_b += b[i] * b[i];
+  }
+
+  if (norm_a == 0.0f || norm_b == 0.0f) {
+    return 0.0f;
+  }
+
+  return dot / (std::sqrt(norm_a) * std::sqrt(norm_b));
+}
+
+/// Find the first embeddings model in the catalog and return its alias, or "" if none exist.
+std::string FindEmbeddingsAlias(ICatalog& catalog) {
+  ModelList all_models = catalog.GetModels();
+  for (const auto& model : all_models.Models()) {
+    if (model->GetInfo().Task() == "embeddings") {
+      return std::string(model->GetInfo().Alias());
+    }
+  }
+
+  return "";
+}
+
+}  // namespace
+
+int main() {
+  try {
+    // 1. Create a configuration and manager (long-lived; keep it alive while using the SDK).
+    Configuration config("foundry_local_samples");
+    Manager manager(std::move(config));
+
+    // 2. Locate an embeddings model in the catalog.
+    auto& catalog = manager.GetCatalog();
+    const std::string alias = FindEmbeddingsAlias(catalog);
+    if (alias.empty()) {
+      std::cerr << "No embeddings model found in the catalog.\n";
+      return 1;
+    }
+
+    auto model = catalog.GetModel(alias);
+    if (!model) {
+      std::cerr << "Failed to retrieve embeddings model '" << alias << "'.\n";
+      return 1;
+    }
+
+    ModelInfo info = model->GetInfo();
+    std::cout << "Using model: " << info.Name() << " (alias: " << info.Alias() << ")\n";
+
+    // 3. Download if not already cached.
+    if (!model->IsCached()) {
+      std::cout << "Downloading...\n";
+      model->Download([](float progress) -> int {
+        std::cout << "\r  " << static_cast<int>(progress) << "%" << std::flush;
+        return 0;  // return non-zero to cancel
+      });
+      std::cout << "\n";
+    }
+
+    // 4. Load the model into memory.
+    if (!model->IsLoaded()) {
+      std::cout << "Loading model...\n";
+      model->Load();
+    }
+
+    // 5. Create an embeddings session and generate vectors.
+    {
+      EmbeddingsSession session(*model);
+
+      std::cout << "\n=== Single embedding ===\n";
+      std::vector<float> embedding = session.Embed("The quick brown fox jumps over the lazy dog.");
+      std::cout << "Dimensions: " << embedding.size() << "\n";
+      std::cout << "First 5 values: [";
+      for (size_t i = 0; i < 5 && i < embedding.size(); ++i) {
+        std::cout << (i > 0 ? ", " : "") << embedding[i];
+      }
+      std::cout << "]\n";
+
+      std::cout << "\n=== Batch embeddings + cosine similarity ===\n";
+      const std::vector<std::string> sentences = {
+          "The cat sat on the mat.",
+          "A kitten rested on the rug.",
+          "The stock market crashed yesterday.",
+      };
+
+      std::vector<std::vector<float>> embeddings = session.Embed(sentences);
+      if (embeddings.empty()) {
+        std::cerr << "No embeddings returned for the batch input.\n";
+        return 1;
+      }
+
+      std::cout << "Generated " << embeddings.size() << " embeddings of dimension " << embeddings[0].size() << "\n\n";
+
+      // Compare every pair: semantically similar sentences should score higher.
+      for (size_t i = 0; i < sentences.size(); ++i) {
+        for (size_t j = i + 1; j < sentences.size(); ++j) {
+          const float similarity = CosineSimilarity(embeddings[i], embeddings[j]);
+          std::cout << "  similarity(\"" << sentences[i] << "\",\n"
+                    << "             \"" << sentences[j] << "\") = " << similarity << "\n\n";
+        }
+      }
+    }  // session destroyed before unload
+
+    // 6. Unload when done (the destructor would also handle this).
+    model->Unload();
+  } catch (const Error& ex) {
+    std::cerr << "Foundry Local error [" << ex.Code() << "]: " << ex.what() << "\n";
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/samples/cpp/live-audio-transcription/README.md b/samples/cpp/live-audio-transcription/README.md
deleted file mode 100644
index 3e8b8e8d6..000000000
--- a/samples/cpp/live-audio-transcription/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Live Audio Transcription Example (C++)
-
-Demonstrates real-time microphone-to-text using the Foundry Local C++ SDK.
-
-Uses [PortAudio](http://www.portaudio.com/) for cross-platform microphone capture
-(the C/C++ equivalent of `naudiodon2` used by the JS sample). If PortAudio is not
-available, falls back to synthetic PCM audio.
-
-
-## Build
-
-```bash
-# With PortAudio (live microphone)
-g++ -std=c++20 -DHAS_PORTAUDIO main.cpp -lfoundry_local -lportaudio -o live-audio-transcription-example
-
-# Without PortAudio (synthetic audio only)
-g++ -std=c++20 main.cpp -lfoundry_local -o live-audio-transcription-example
-```
-
-## Run
-
-```bash
-# Live microphone (requires PortAudio)
-./live-audio-transcription-example
-
-# Synthetic 440Hz sine wave (no microphone needed)
-./live-audio-transcription-example --synth
-```
-
-Press `Ctrl+C` to request a graceful stop. The sample passes that signal to
-execution-provider and model downloads so long-running downloads can be
-cancelled before transcription starts.
diff --git a/samples/cpp/live-audio-transcription/main.cpp b/samples/cpp/live-audio-transcription/main.cpp
deleted file mode 100644
index 9068a46c3..000000000
--- a/samples/cpp/live-audio-transcription/main.cpp
+++ /dev/null
@@ -1,281 +0,0 @@
-// Live Audio Transcription — Foundry Local C++ SDK Example
-//
-// Demonstrates real-time microphone-to-text using the C++ SDK.
-// Uses PortAudio for cross-platform mic capture (like naudiodon2 in the JS sample).
-// Falls back to synthetic PCM if PortAudio is unavailable.
-//
-// Requires: PortAudio (libportaudio), Foundry Local C++ SDK
-//
-// Usage: ./live-audio-transcription-example [--synth]
-
-#include <algorithm>
-#include <atomic>
-#include <chrono>
-#include <climits>
-#include <cmath>
-#include <csignal>
-#include <cstdint>
-#include <deque>
-#include <iostream>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include "foundry_local.h"
-
-// PortAudio is optional — compile with -DHAS_PORTAUDIO and link -lportaudio
-// to enable live microphone capture.
-#ifdef HAS_PORTAUDIO
-#include <portaudio.h>
-#endif
-
-namespace {
-
-// Global flag for Ctrl+C graceful shutdown (mirrors JS process.on('SIGINT'))
-std::atomic<bool> g_running{true};
-
-void SignalHandler(int /*signum*/) {
-    g_running = false;
-}
-
-// Bounded audio queue (mirrors JS appendQueue with cap of 100)
-class AudioQueue {
-public:
-    void Push(std::vector<uint8_t> chunk) {
-        std::lock_guard<std::mutex> lock(mu_);
-        if (queue_.size() >= kMaxSize) {
-            queue_.pop_front();
-            if (!warnedDrop_) {
-                warnedDrop_ = true;
-                std::cerr << "Audio append queue overflow; dropping oldest chunk to keep stream alive." << std::endl;
-            }
-        }
-        queue_.push_back(std::move(chunk));
-    }
-
-    bool TryPop(std::vector<uint8_t>& out) {
-        std::lock_guard<std::mutex> lock(mu_);
-        if (queue_.empty()) return false;
-        out = std::move(queue_.front());
-        queue_.pop_front();
-        return true;
-    }
-
-private:
-    static constexpr size_t kMaxSize = 100;
-    std::deque<std::vector<uint8_t>> queue_;
-    std::mutex mu_;
-    bool warnedDrop_ = false;
-};
-
-std::vector<uint8_t> GenerateSineWavePcm(int sampleRate, int durationSeconds, double frequencyHz) {
-    const auto totalSamples = static_cast<size_t>(sampleRate * durationSeconds);
-    std::vector<uint8_t> pcm(totalSamples * 2, 0); // 16-bit mono, little-endian
-
-    for (size_t i = 0; i < totalSamples; ++i) {
-        const double t = static_cast<double>(i) / static_cast<double>(sampleRate);
-        const auto sample = static_cast<int16_t>(
-            static_cast<double>(INT16_MAX) * 0.5 * std::sin(2.0 * 3.14159265358979323846 * frequencyHz * t));
-        const auto encodedSample = static_cast<uint16_t>(sample);
-        pcm[i * 2] = static_cast<uint8_t>(encodedSample & 0xFF);
-        pcm[i * 2 + 1] = static_cast<uint8_t>((encodedSample >> 8) & 0xFF);
-    }
-    return pcm;
-}
-
-#ifdef HAS_PORTAUDIO
-// PortAudio callback — captures 16-bit mono PCM and pushes to the queue
-int PaCallback(const void* input, void* /*output*/,
-               unsigned long frameCount,
-               const PaStreamCallbackTimeInfo* /*timeInfo*/,
-               PaStreamCallbackFlags /*statusFlags*/,
-               void* userData) {
-    auto* queue = static_cast<AudioQueue*>(userData);
-    const auto* pcm = static_cast<const uint8_t*>(input);
-    const size_t byteCount = frameCount * 2; // 16-bit mono = 2 bytes per frame
-    std::vector<uint8_t> chunk(pcm, pcm + byteCount);
-    queue->Push(std::move(chunk));
-    return g_running ? paContinue : paComplete;
-}
-#endif
-
-} // namespace
-
-int main(int argc, char* argv[]) {
-    bool useSynth = false;
-    for (int i = 1; i < argc; ++i) {
-        if (std::string(argv[i]) == "--synth") useSynth = true;
-    }
-
-    // Install Ctrl+C handler (mirrors JS process.on('SIGINT'))
-    std::signal(SIGINT, SignalHandler);
-
-    try {
-        std::cout << "===========================================================" << std::endl;
-        std::cout << "   Foundry Local -- Live Audio Transcription Demo (C++)" << std::endl;
-        std::cout << "===========================================================" << std::endl;
-        std::cout << std::endl;
-
-        foundry_local::Configuration config;
-        config.appName = "foundry_local_samples";
-
-        foundry_local::Manager::Create(config);
-        auto& manager = foundry_local::Manager::Instance();
-        auto isCancellationRequested = [] { return !g_running.load(); };
-        manager.DownloadAndRegisterEps(nullptr, isCancellationRequested);
-
-        auto& catalog = manager.GetCatalog();
-        // English-only:
-        const char* modelAlias = "nemotron-speech-streaming-en-0.6b";
-        // Multi-lingual (supports 30+ languages including auto-detect):
-        // const char* modelAlias = "nemotron-3.5-asr-streaming-0.6b";
-        auto* model = catalog.GetModel(modelAlias);
-        if (!model) {
-            throw std::runtime_error(std::string("Model \"") + modelAlias + "\" not found in catalog");
-        }
-
-        std::cout << "Downloading model (if needed)..." << std::endl;
-        model->Download(
-            [](float pct) {
-                std::cout << "\rDownloading: " << pct << "%   " << std::flush;
-                return true;
-            },
-            isCancellationRequested);
-        std::cout << std::endl;
-        std::cout << "Loading model..." << std::endl;
-        model->Load();
-        std::cout << "Model loaded" << std::endl;
-
-        // NOTE: CreateLiveTranscriptionSession() is not yet available in the C++ SDK.
-        // The audio client and session code below is forward-looking.
-        foundry_local::OpenAIAudioClient audioClient(*model);
-        auto session = audioClient.CreateLiveTranscriptionSession();
-
-        session->Settings().sample_rate = 16000;
-        session->Settings().channels = 1;
-        session->Settings().bits_per_sample = 16;
-        session->Settings().language = "en";                  // English (default)
-        // Multi-lingual examples:
-        // session->Settings().language = "de";     // German
-        // session->Settings().language = "zh-CN";  // Chinese (Simplified)
-        // session->Settings().language = "auto";   // Auto-detect language
-        session->Start();
-        std::cout << "Session started" << std::endl;
-
-        // Read transcription results in a background thread (mirrors JS readPromise)
-        std::thread readThread([&session]() {
-            foundry_local::LiveAudioTranscriptionResponse result;
-            while (g_running) {
-                const auto status = session->TryGetNext(result, std::chrono::milliseconds(500));
-                if (status == foundry_local::TranscriptionStatus::Result) {
-                    if (result.is_final) {
-                        std::cout << "\n  [FINAL] " << result.text << std::endl;
-                    } else if (!result.text.empty()) {
-                        std::cout << result.text << std::flush;
-                    }
-                } else if (status == foundry_local::TranscriptionStatus::Closed) {
-                    break;
-                } else if (status == foundry_local::TranscriptionStatus::Timeout) {
-                    continue;
-                } else {
-                    std::cerr << "Transcription stream error: " << session->GetErrorMessage() << std::endl;
-                    break;
-                }
-            }
-        });
-
-        // --- Microphone capture (mirrors JS naudiodon2 section) ---
-        // Uses PortAudio for cross-platform audio capture. If PortAudio is not
-        // available or --synth is passed, falls back to synthetic PCM.
-
-        bool micActive = false;
-
-#ifdef HAS_PORTAUDIO
-        PaStream* paStream = nullptr;
-        AudioQueue audioQueue;
-
-        if (!useSynth) {
-            PaError err = Pa_Initialize();
-            if (err == paNoError) {
-                PaStreamParameters inputParams{};
-                inputParams.device = Pa_GetDefaultInputDevice();
-                if (inputParams.device != paNoDevice) {
-                    inputParams.channelCount = 1;
-                    inputParams.sampleFormat = paInt16;
-                    inputParams.suggestedLatency =
-                        Pa_GetDeviceInfo(inputParams.device)->defaultLowInputLatency;
-                    inputParams.hostApiSpecificStreamInfo = nullptr;
-
-                    // framesPerBuffer=3200 matches JS framesPerBuffer setting
-                    err = Pa_OpenStream(&paStream, &inputParams, nullptr,
-                                        16000, 3200, paClipOff,
-                                        PaCallback, &audioQueue);
-                    if (err == paNoError) {
-                        err = Pa_StartStream(paStream);
-                    }
-                }
-
-                if (err == paNoError && paStream) {
-                    micActive = true;
-                    std::cout << std::endl;
-                    std::cout << "===========================================================" << std::endl;
-                    std::cout << "  LIVE TRANSCRIPTION ACTIVE" << std::endl;
-                    std::cout << "  Speak into your microphone." << std::endl;
-                    std::cout << "  Press Ctrl+C to stop." << std::endl;
-                    std::cout << "===========================================================" << std::endl;
-                    std::cout << std::endl;
-
-                    // Pump audio from the queue to the session (mirrors JS pumpAudio)
-                    while (g_running) {
-                        std::vector<uint8_t> chunk;
-                        if (audioQueue.TryPop(chunk)) {
-                            session->Append(chunk.data(), chunk.size());
-                        } else {
-                            std::this_thread::sleep_for(std::chrono::milliseconds(10));
-                        }
-                    }
-
-                    Pa_StopStream(paStream);
-                    Pa_CloseStream(paStream);
-                } else {
-                    std::cerr << "Could not initialize microphone: "
-                              << Pa_GetErrorText(err) << std::endl;
-                    std::cerr << "Falling back to synthetic audio test..." << std::endl;
-                    std::cerr << std::endl;
-                }
-                Pa_Terminate();
-            }
-        }
-#endif
-
-        // Fallback: push synthetic PCM (440Hz sine wave) — mirrors JS catch block
-        if (!micActive) {
-            std::cout << "Pushing synthetic audio (440Hz sine, 2s)..." << std::endl;
-            const auto pcm = GenerateSineWavePcm(16000, 2, 440.0);
-            const size_t chunkSize = static_cast<size_t>(16000 / 10 * 2); // 100ms
-            for (size_t offset = 0; offset < pcm.size() && g_running; offset += chunkSize) {
-                const size_t len = std::min(chunkSize, pcm.size() - offset);
-                session->Append(pcm.data() + offset, len);
-                std::this_thread::sleep_for(std::chrono::milliseconds(100));
-            }
-            std::cout << "Synthetic audio pushed" << std::endl;
-
-            // Wait briefly for remaining transcription results
-            std::this_thread::sleep_for(std::chrono::seconds(3));
-        }
-
-        // Graceful shutdown (mirrors JS SIGINT handler)
-        std::cout << "\n\nStopping..." << std::endl;
-        session->Stop();
-        readThread.join();
-        model->Unload();
-        foundry_local::Manager::Destroy();
-        std::cout << "Done" << std::endl;
-        return 0;
-    } catch (const std::exception& ex) {
-        std::cerr << "Error: " << ex.what() << std::endl;
-        foundry_local::Manager::Destroy();
-        return 1;
-    }
-}
diff --git a/samples/cpp/responses-api/CMakeLists.txt b/samples/cpp/responses-api/CMakeLists.txt
new file mode 100644
index 000000000..55ba58c71
--- /dev/null
+++ b/samples/cpp/responses-api/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (c) Microsoft. All rights reserved.
+#
+# Standalone build for the Foundry Local C++ "responses-api" (vision) sample.
+# Build the SDK first:  python ../../../sdk_v2/cpp/build.py
+# Then:                 cmake -S . -B build && cmake --build build
+
+cmake_minimum_required(VERSION 3.20)
+project(foundry_local_responses_api_sample CXX)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Locate the locally-built SDK and define the foundry_local_cpp target.
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/FoundryLocalSDK.cmake)
+
+add_executable(responses_api main.cc)
+
+# Shared sample helpers (HTTP client, base64) live in samples/cpp/common.
+target_include_directories(responses_api PRIVATE ${CMAKE_CURRENT_LIST_DIR}/..)
+target_link_libraries(responses_api PRIVATE foundry_local_cpp)
+
+# Absolute path to this sample dir so the bundled test image is found regardless
+# of where the executable runs from.
+target_compile_definitions(responses_api PRIVATE SAMPLE_SOURCE_DIR="${CMAKE_CURRENT_LIST_DIR}")
+
+# Bake in the rpath so the executable finds the SDK shared library at runtime.
+foundry_local_configure_sample(responses_api)
diff --git a/samples/cpp/responses-api/README.md b/samples/cpp/responses-api/README.md
new file mode 100644
index 000000000..95ac34739
--- /dev/null
+++ b/samples/cpp/responses-api/README.md
@@ -0,0 +1,65 @@
+# Responses API — Vision (C++)
+
+Demonstrates **image understanding** with the Foundry Local C++ SDK (`sdk_v2/cpp`)
+through the OpenAI-compatible **Responses API** (`POST /v1/responses`).
+
+The Responses API is exposed by the embedded **web service**, so this sample hosts
+that service in-process with `AddWebServiceEndpoint` + `StartWebService`, then sends
+an image + prompt over loopback HTTP and prints the model's description.
+
+This sample tracks **`main`** — it builds against your **local** `sdk_v2/cpp` build,
+not a pinned SDK release.
+
+## What it does
+
+1. Creates a `Manager` with an embedded web service endpoint
+   (`http://127.0.0.1:0` — an ephemeral port).
+2. Resolves a vision-capable model (default: `qwen3.5-0.8b`), downloading +
+   loading it if needed.
+3. Starts the web service and discovers the bound URL via `GetWebServiceEndpoints()`.
+4. Base64-encodes a bundled image into a `data:image/jpeg;base64,...` URL.
+5. POSTs a Responses API request whose message has an `input_text` part and an
+   `input_image` part, then prints the response's `output_text`.
+
+A small default image (`test_image.jpg`, 256×256) ships with the sample so it runs
+out of the box.
+
+> **Image input format.** The sdk_v2 Responses API expects `input_image.image_url`
+> to be either a `data:` URL (used here) or a local file path — remote `http(s)`
+> image URLs are not supported. This differs from the v1 sample, which used a
+> separate `image_data` + `media_type` shape.
+>
+> **Model id vs alias.** The web service resolves models by their full **variant
+> id** (from `ModelInfo::Id()`), not the short alias.
+
+## Prerequisites
+
+```bash
+python ../../../sdk_v2/cpp/build.py
+```
+
+## Build
+
+```bash
+cmake -S . -B build
+cmake --build build
+```
+
+Override the SDK config/location if needed:
+`-DFOUNDRY_LOCAL_BUILD_CONFIG=Debug`, `-DFOUNDRY_LOCAL_SDK_DIR=...`,
+`-DFOUNDRY_LOCAL_BUILD_DIR=...`.
+
+## Run
+
+```bash
+# Default vision model + bundled image:
+./build/responses_api                          # Windows: .\build\responses_api.exe
+
+# Custom vision model:
+./build/responses_api qwen3.5-0.8b
+
+# Custom model + custom image:
+./build/responses_api qwen3.5-0.8b /path/to/image.jpg
+```
+
+The first run downloads the model; later runs use the cache.
diff --git a/samples/cpp/responses-api/main.cc b/samples/cpp/responses-api/main.cc
new file mode 100644
index 000000000..6ec24b7a2
--- /dev/null
+++ b/samples/cpp/responses-api/main.cc
@@ -0,0 +1,148 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// Sample: Vision / image understanding via the Foundry Local embedded web service
+// and the OpenAI Responses API (POST /v1/responses).
+//
+// Flow:
+//   1. Host the web service (AddWebServiceEndpoint + StartWebService).
+//   2. Load a vision-capable model.
+//   3. Base64-encode a local image into a `data:` URL.
+//   4. POST /v1/responses with an `input_text` + `input_image` message.
+//   5. Print the model's description from the response's `output_text`.
+//
+// The Responses API is only exposed over the web service, so — unlike the chat
+// and embeddings samples — vision here goes through HTTP rather than a native
+// in-process session.
+
+#include <foundry_local/foundry_local_cpp.h>
+
+#include <nlohmann/json.hpp>
+
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "common/base64.h"
+#include "common/local_http_client.h"
+
+using namespace foundry_local;
+using json = nlohmann::json;
+
+namespace {
+
+// Default vision model alias (overridable on the command line), matching the
+// other-language responses-vision samples.
+constexpr const char* kDefaultModelAlias = "qwen3.5-0.8b";
+
+/// Read an entire file into a byte buffer. Throws std::runtime_error if it can't be opened.
+std::vector<uint8_t> ReadFileBytes(const std::filesystem::path& path) {
+  std::ifstream file(path, std::ios::binary);
+  if (!file) {
+    throw std::runtime_error("Failed to open image: " + path.string());
+  }
+
+  return std::vector<uint8_t>((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
+}
+
+/// Build the /v1/responses request body: one user message with text + image content.
+json BuildVisionRequest(const std::string& model_id, const std::string& data_url, const std::string& prompt) {
+  return json{
+      {"model", model_id},
+      {"stream", false},
+      {"input",
+       json::array({{{"type", "message"},
+                     {"role", "user"},
+                     {"content", json::array({{{"type", "input_text"}, {"text", prompt}},
+                                              {{"type", "input_image"}, {"image_url", data_url}}})}}})}};
+}
+
+}  // namespace
+
+int main(int argc, char* argv[]) {
+  const std::string model_alias = argc > 1 ? argv[1] : kDefaultModelAlias;
+
+  // Default to the bundled test image; allow an override as the second argument.
+  std::filesystem::path image_path =
+      argc > 2 ? std::filesystem::path(argv[2]) : std::filesystem::path(SAMPLE_SOURCE_DIR) / "test_image.jpg";
+
+  try {
+    // 1. Configure the SDK with an embedded web service endpoint.
+    Configuration config("foundry_local_samples");
+    config.AddWebServiceEndpoint("http://127.0.0.1:0");
+
+    Manager manager(std::move(config));
+
+    // 2. Resolve and prepare the vision model.
+    auto& catalog = manager.GetCatalog();
+    auto model = catalog.GetModel(model_alias);
+    if (!model) {
+      std::cerr << "Model '" << model_alias << "' not found in catalog.\n";
+      return 1;
+    }
+
+    std::cout << "Using model: " << model->GetInfo().Name() << "\n";
+
+    if (!model->IsCached()) {
+      std::cout << "Downloading...\n";
+      model->Download([](float progress) -> int {
+        std::cout << "\r  " << static_cast<int>(progress) << "%" << std::flush;
+        return 0;  // return non-zero to cancel
+      });
+      std::cout << "\n";
+    }
+
+    if (!model->IsLoaded()) {
+      std::cout << "Loading model...\n";
+      model->Load();
+    }
+
+    // 3. Start the web service and discover its bound URL.
+    std::cout << "\n=== Starting web service ===\n";
+    manager.StartWebService();
+
+    const std::vector<std::string> endpoints = manager.GetWebServiceEndpoints();
+    if (endpoints.empty()) {
+      std::cerr << "Web service did not report any endpoints.\n";
+      return 1;
+    }
+
+    const sample::http::Url url = sample::http::ParseUrl(endpoints[0]);
+    std::cout << "Web service listening at " << endpoints[0] << "\n";
+
+    // 4. Encode the image as a base64 data URL (the Responses API requires a
+    //    `data:<mime>;base64,<payload>` URL or a local file path for input_image).
+    std::cout << "\n=== Vision request (POST /v1/responses) ===\n";
+    std::cout << "Image: " << image_path.string() << "\n";
+
+    const std::vector<uint8_t> image_bytes = ReadFileBytes(image_path);
+    const std::string data_url = "data:image/jpeg;base64," + sample::Base64Encode(image_bytes);
+
+    const json body = BuildVisionRequest(std::string(model->GetInfo().Id()), data_url, "Describe this image in detail.");
+
+    const sample::http::Response response = sample::http::Post(url.host, url.port, "/v1/responses", body.dump());
+
+    if (response.status != 200) {
+      std::cerr << "HTTP " << response.status << ": " << response.body << "\n";
+      manager.StopWebService();
+      return 1;
+    }
+
+    // 5. Print the assistant's description.
+    const json parsed = json::parse(response.body);
+    std::cout << "\nAssistant: " << parsed.value("output_text", "") << "\n";
+
+    manager.StopWebService();
+    model->Unload();
+  } catch (const Error& ex) {
+    std::cerr << "Foundry Local error [" << ex.Code() << "]: " << ex.what() << "\n";
+    return 1;
+  } catch (const std::exception& ex) {
+    std::cerr << "Error: " << ex.what() << "\n";
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/samples/cs/foundry-local-web-server-responses-vision/test_image.jpg b/samples/cpp/responses-api/test_image.jpg
similarity index 100%
rename from samples/cs/foundry-local-web-server-responses-vision/test_image.jpg
rename to samples/cpp/responses-api/test_image.jpg
diff --git a/samples/cs/Directory.Packages.props b/samples/cs/Directory.Packages.props
index 77b68c4cc..f8d39f79d 100644
--- a/samples/cs/Directory.Packages.props
+++ b/samples/cs/Directory.Packages.props
@@ -1,12 +1,14 @@
 <Project>
   <PropertyGroup>
     <ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
-    <CentralPackageFloatingVersionsEnabled>true</CentralPackageFloatingVersionsEnabled>
   </PropertyGroup>
   <ItemGroup>
-    <PackageVersion Include="Microsoft.AI.Foundry.Local" Version="*-*" />
-    <PackageVersion Include="Microsoft.AI.Foundry.Local.WinML" Version="*-*" />
-    <PackageVersion Include="Betalgo.Ranul.OpenAI" Version="9.2.0" />
+    <!--
+      The Foundry Local SDK is consumed from local source via a ProjectReference
+      (see each sample's .csproj). These samples track 'main' and are not version-pinned.
+      The Microsoft.AI.Foundry.Local.Core* packages and native runtime assets flow
+      transitively from the SDK project and restore from nuget.org.
+    -->
     <PackageVersion Include="Microsoft.Extensions.Logging" Version="9.0.15" />
     <PackageVersion Include="Microsoft.Extensions.Logging.Console" Version="9.0.15" />
     <PackageVersion Include="NAudio" Version="2.2.1" />
diff --git a/samples/cs/README.md b/samples/cs/README.md
index fb594717e..1de9aa441 100644
--- a/samples/cs/README.md
+++ b/samples/cs/README.md
@@ -1,29 +1,20 @@
 # 🚀 Foundry Local C# Samples
 
-These samples demonstrate how to use the Foundry Local C# SDK. Each sample uses a **unified project file** that automatically detects your operating system and selects the optimal NuGet package:
+These samples demonstrate how to use the Foundry Local C# SDK.
 
-- **Windows**: Uses `Microsoft.AI.Foundry.Local.WinML` for hardware acceleration via Windows ML.
-- **macOS / Linux**: Uses `Microsoft.AI.Foundry.Local` for cross-platform support.
-
-Both packages provide the same APIs, so the same source code works on all platforms.
+They **track `main`** and consume the SDK **from local source** via a `ProjectReference` to
+`sdk/cs/src/Microsoft.AI.Foundry.Local.csproj` — they are **not** pinned to a published package
+version. The `Microsoft.AI.Foundry.Local.Core*` packages and native runtime assets flow
+transitively from that SDK project and restore from nuget.org, along with any third-party packages.
 
 ## Samples
 
 | Sample | Description |
 |---|---|
-| [native-chat-completions](native-chat-completions/) | Initialize the SDK, download a model, and run chat completions. |
+| [chat-completion](chat-completion/) | Run the same chat prompt two ways: native in-process inference **and** the local OpenAI-compatible web server (`/v1/chat/completions`). |
 | [embeddings](embeddings/) | Generate single and batch text embeddings using the Foundry Local SDK. |
-| [audio-transcription-example](audio-transcription-example/) | Transcribe audio files using the Foundry Local SDK. |
-| [foundry-local-web-server](foundry-local-web-server/) | Set up a local OpenAI-compliant web server. |
-| [foundry-local-web-server-responses-vision](foundry-local-web-server-responses-vision/) | Stream a vision (image understanding) response from the local web server using the Responses API. |
-| [tool-calling-foundry-local-sdk](tool-calling-foundry-local-sdk/) | Use tool calling with native chat completions. |
-| [tool-calling-foundry-local-web-server](tool-calling-foundry-local-web-server/) | Use tool calling with the local web server. |
-| [model-management-example](model-management-example/) | Manage models, variant selection, and updates. |
-| [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive chat assistant (tutorial). |
-| [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). |
-| [tutorial-tool-calling](tutorial-tool-calling/) | Create a tool-calling assistant (tutorial). |
-| [tutorial-voice-to-text](tutorial-voice-to-text/) | Transcribe and summarize audio (tutorial). |
-
+| [audio](audio/) | Live microphone streaming transcription (Nemotron ASR) **and** file-based transcription (Whisper) via `--file [path]`. |
+| [responses-api](responses-api/) | Stream a vision (image understanding) response from the local web server using the Responses API. |
 
 ## Running a sample
 
@@ -33,9 +24,10 @@ Both packages provide the same APIs, so the same source code works on all platfo
    cd Foundry-Local/samples/cs
    ```
 
-2. Open and run a sample:
+2. Build and run a sample (the SDK is resolved from `sdk/cs` source via the project reference;
+   `Microsoft.AI.Foundry.Local.Core` and third-party packages restore from nuget.org):
    ```bash
-   cd native-chat-completions
+   cd chat-completion
+   dotnet build
    dotnet run
    ```
-
diff --git a/samples/cs/audio-transcription-example/AudioTranscriptionExample.sln b/samples/cs/audio-transcription-example/AudioTranscriptionExample.sln
deleted file mode 100644
index 46fb73d98..000000000
--- a/samples/cs/audio-transcription-example/AudioTranscriptionExample.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AudioTranscriptionExample", "AudioTranscriptionExample.csproj", "{11616852-BB4F-4B60-9FAC-D94E2688BB30}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|x64.ActiveCfg = Debug|x64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|x64.Build.0 = Debug|x64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|x86.ActiveCfg = Debug|ARM64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|x86.Build.0 = Debug|ARM64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|Any CPU.Build.0 = Release|ARM64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|x64.ActiveCfg = Release|x64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|x64.Build.0 = Release|x64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|x86.ActiveCfg = Release|ARM64
-		{11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/audio-transcription-example/Program.cs b/samples/cs/audio-transcription-example/Program.cs
deleted file mode 100644
index 10047421a..000000000
--- a/samples/cs/audio-transcription-example/Program.cs
+++ /dev/null
@@ -1,87 +0,0 @@
-﻿// <complete_code>
-// <imports>
-using Microsoft.AI.Foundry.Local;
-// </imports>
-
-// <init>
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
-};
-
-
-// Initialize the singleton instance.
-await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
-var mgr = FoundryLocalManager.Instance;
-
-
-// Ensure that any Execution Provider (EP) downloads run and are completed.
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-// </init>
-
-
-// <model_setup>
-// Get the model catalog
-var catalog = await mgr.GetCatalogAsync();
-
-
-// Get a model using an alias and select the CPU model variant
-var model = await catalog.GetModelAsync("whisper-tiny") ?? throw new System.Exception("Model not found");
-var modelVariant = model.Variants.First(v => v.Info.Runtime?.DeviceType == DeviceType.CPU);
-model.SelectVariant(modelVariant);
-
-
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
-    {
-        Console.WriteLine();
-    }
-});
-
-
-// Load the model
-Console.Write($"Loading model {model.Id}...");
-await model.LoadAsync();
-Console.WriteLine("done.");
-// </model_setup>
-
-
-// <transcription>
-// Get an audio client
-var audioClient = await model.GetAudioClientAsync();
-audioClient.Settings.Language = "en";
-
-// Get a transcription with streaming outputs
-var audioFile = args.Length > 0 ? args[0] : Path.Combine(AppContext.BaseDirectory, "Recording.mp3");
-Console.WriteLine($"Transcribing audio with streaming output: {Path.GetFileName(audioFile)}");
-var response = audioClient.TranscribeAudioStreamingAsync(audioFile, CancellationToken.None);
-await foreach (var chunk in response)
-{
-    Console.Write(chunk.Text);
-    Console.Out.Flush();
-}
-
-Console.WriteLine();
-// </transcription>
-
-
-// <cleanup>
-// Tidy up - unload the model
-await model.UnloadAsync();
-// </cleanup>
-// </complete_code>
\ No newline at end of file
diff --git a/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj b/samples/cs/audio/Audio.csproj
similarity index 63%
rename from samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj
rename to samples/cs/audio/Audio.csproj
index 4a0eed349..c15692090 100644
--- a/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj
+++ b/samples/cs/audio/Audio.csproj
@@ -11,14 +11,11 @@
     <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
   </PropertyGroup>
 
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
+  <!-- Foundry Local SDK, consumed from local source (tracks 'main', not version-pinned).
+       The Microsoft.AI.Foundry.Local.Core* packages and native runtime assets flow
+       transitively from this project reference. -->
+  <ItemGroup>
+    <ProjectReference Include="../../../sdk/cs/src/Microsoft.AI.Foundry.Local.csproj" />
   </ItemGroup>
 
   <!-- Linux GPU support -->
@@ -27,6 +24,13 @@
     <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
   </ItemGroup>
 
+  <!-- Bundled default audio file used by the file-based (Whisper) mode; copied next to the binary -->
+  <ItemGroup>
+    <None Include="Recording.mp3">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </None>
+  </ItemGroup>
+
   <!-- Shared utilities -->
   <ItemGroup>
     <Compile Include="../Shared/*.cs" />
diff --git a/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.sln b/samples/cs/audio/Audio.sln
similarity index 90%
rename from samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.sln
rename to samples/cs/audio/Audio.sln
index f8c882849..4d80abe67 100644
--- a/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.sln
+++ b/samples/cs/audio/Audio.sln
@@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio Version 17
 VisualStudioVersion = 17.0.31903.59
 MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LiveAudioTranscriptionExample", "LiveAudioTranscriptionExample.csproj", "{A2B3C4D5-E6F7-4A8B-9C0D-1E2F3A4B5C6D}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Audio", "Audio.csproj", "{A2B3C4D5-E6F7-4A8B-9C0D-1E2F3A4B5C6D}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
diff --git a/samples/cs/live-audio-transcription/Program.cs b/samples/cs/audio/Program.cs
similarity index 68%
rename from samples/cs/live-audio-transcription/Program.cs
rename to samples/cs/audio/Program.cs
index 9caa2569d..810ff8f16 100644
--- a/samples/cs/live-audio-transcription/Program.cs
+++ b/samples/cs/audio/Program.cs
@@ -1,13 +1,30 @@
-// Live Audio Transcription — Foundry Local SDK Example
+// Audio Transcription — Foundry Local SDK Example
 //
-// NAudio's WaveInEvent is Windows-only. On non-Windows platforms, the sample
-// falls back to synthetic PCM audio.
+// Two modes:
+//   * Default (no --file): live microphone streaming transcription with Nemotron ASR.
+//       NAudio's WaveInEvent is Windows-only. On non-Windows platforms (or with --synth)
+//       the sample falls back to synthetic PCM audio.
+//   * --file [path]: file-based transcription with Whisper. Uses the bundled Recording.mp3
+//       when no path is supplied.
 
 using Microsoft.AI.Foundry.Local;
 using NAudio.Wave;
 
+// Parse CLI options.
+int fileFlagIndex = Array.IndexOf(args, "--file");
+bool fileMode = fileFlagIndex >= 0;
+bool useSynth = args.Contains("--synth");
+
+string defaultAudioFile = Path.Combine(AppContext.BaseDirectory, "Recording.mp3");
+string audioFile = defaultAudioFile;
+if (fileMode && fileFlagIndex + 1 < args.Length && !args[fileFlagIndex + 1].StartsWith("--", StringComparison.Ordinal))
+{
+    audioFile = args[fileFlagIndex + 1];
+}
+
 Console.WriteLine("===========================================================");
-Console.WriteLine("   Foundry Local -- Live Audio Transcription Demo");
+Console.WriteLine("   Foundry Local -- Audio Transcription Demo");
+Console.WriteLine($"   Mode: {(fileMode ? "file (Whisper)" : "live microphone (Nemotron ASR)")}");
 Console.WriteLine("===========================================================");
 Console.WriteLine();
 
@@ -24,6 +41,52 @@
 
 var catalog = await mgr.GetCatalogAsync();
 
+if (fileMode)
+{
+    // ===== File-based transcription (Whisper) =====
+    if (!File.Exists(audioFile))
+    {
+        Console.Error.WriteLine($"Audio file not found: {audioFile}");
+        return 1;
+    }
+
+    // Get the Whisper model and select the CPU variant.
+    var fileModel = await catalog.GetModelAsync("whisper-tiny") ?? throw new Exception("Model \"whisper-tiny\" not found in catalog");
+    var modelVariant = fileModel.Variants.First(v => v.Info.Runtime?.DeviceType == DeviceType.CPU);
+    fileModel.SelectVariant(modelVariant);
+
+    await fileModel.DownloadAsync(progress =>
+    {
+        Console.Write($"\rDownloading model: {progress:F2}%");
+        if (progress >= 100f)
+        {
+            Console.WriteLine();
+        }
+    });
+
+    Console.Write($"Loading model {fileModel.Id}...");
+    await fileModel.LoadAsync();
+    Console.WriteLine("done.");
+
+    var fileAudioClient = await fileModel.GetAudioClientAsync();
+    fileAudioClient.Settings.Language = "en";
+
+    Console.WriteLine($"Transcribing audio file: {Path.GetFileName(audioFile)}");
+    Console.Write("[TRANSCRIPT]: ");
+    var fileResponse = fileAudioClient.TranscribeAudioStreamingAsync(audioFile, CancellationToken.None);
+    await foreach (var chunk in fileResponse)
+    {
+        Console.Write(chunk.Text);
+        Console.Out.Flush();
+    }
+    Console.WriteLine();
+
+    await fileModel.UnloadAsync();
+    return 0;
+}
+
+// ===== Live microphone transcription (Nemotron ASR) =====
+
 // English-only:
 var modelAlias = "nemotron-speech-streaming-en-0.6b";
 // Multi-lingual (supports 30+ languages including auto-detect):
@@ -82,8 +145,6 @@ await model.DownloadAsync(progress =>
     catch (OperationCanceledException) { }
 });
 
-bool useSynth = args.Contains("--synth");
-
 // NAudio WaveInEvent is Windows-only. On other platforms, fall back to synthetic audio.
 if (!useSynth && OperatingSystem.IsWindows())
 {
@@ -141,6 +202,7 @@ await model.DownloadAsync(progress =>
     if (!OperatingSystem.IsWindows() && !useSynth)
     {
         Console.WriteLine("NAudio mic capture is Windows-only. Falling back to synthetic audio...");
+        Console.WriteLine("(Use --file [path] for file-based Whisper transcription instead.)");
     }
 
     // Synthetic PCM fallback: 440Hz sine wave, 2 seconds
@@ -173,3 +235,4 @@ await model.DownloadAsync(progress =>
 await readTask;
 
 await model.UnloadAsync();
+return 0;
diff --git a/samples/cs/audio/README.md b/samples/cs/audio/README.md
new file mode 100644
index 000000000..e52f63ce4
--- /dev/null
+++ b/samples/cs/audio/README.md
@@ -0,0 +1,104 @@
+# Audio Transcription Example
+
+One sample, two transcription modes against Foundry Local:
+
+- **Live microphone streaming** (default) with **Nemotron ASR** — real-time speech-to-text.
+- **File-based transcription** with **Whisper** via the `--file [path]` option.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- .NET 9 SDK
+- A microphone for live mode (optional — falls back to synthetic audio on non-Windows or with `--synth`)
+
+> **Note:** Microphone capture uses [NAudio](https://github.com/naudio/NAudio) and is Windows-only.
+> On other platforms the live mode falls back to synthetic audio; use `--file` for real
+> transcription of an audio file.
+
+## SDK consumption
+
+This sample tracks `main`: it consumes the Foundry Local C# SDK **from local source** via a
+`ProjectReference` to `sdk/cs/src/Microsoft.AI.Foundry.Local.csproj`. It is **not** version-pinned
+to a published package. The `Microsoft.AI.Foundry.Local.Core*` packages and native runtime assets
+flow transitively from that project and restore from nuget.org, along with the third-party `NAudio`
+and `Microsoft.Extensions.Logging` packages.
+
+## Build & run
+
+```bash
+# from this directory
+dotnet build
+```
+
+`dotnet build` resolves the SDK from `sdk/cs` source via the project reference and restores
+`Microsoft.AI.Foundry.Local.Core` plus third-party packages from nuget.org.
+
+### Live microphone transcription (default — Nemotron ASR)
+
+```bash
+dotnet run
+```
+
+Speak into your microphone. Transcription appears in real-time (cyan text). Press `ENTER` to stop.
+
+To force synthetic audio (e.g., for CI or non-Windows):
+
+```bash
+dotnet run -- --synth
+```
+
+### File-based transcription (Whisper)
+
+```bash
+# transcribe the bundled Recording.mp3
+dotnet run -- --file
+
+# transcribe your own file
+dotnet run -- --file /path/to/audio.wav
+```
+
+A small `Recording.mp3` is bundled and used as the default when no path is given.
+
+## How it works
+
+### Live mode (Nemotron ASR)
+
+1. Loads the Nemotron streaming ASR model.
+2. Creates a live transcription session (`audioClient.CreateLiveTranscriptionSession()`) with
+   16kHz / 16-bit / mono PCM settings.
+3. Captures microphone audio via `NAudio.WaveInEvent` (or generates synthetic audio as a fallback).
+4. Pushes PCM chunks via `session.AppendAsync()` through a bounded channel for backpressure.
+5. Reads results via `await foreach (var result in session.GetStream())`.
+
+### File mode (Whisper)
+
+1. Loads the `whisper-tiny` model and selects its CPU variant.
+2. Streams the transcript via `audioClient.TranscribeAudioStreamingAsync(path)`.
+
+## API
+
+```csharp
+// Live streaming
+var audioClient = await model.GetAudioClientAsync();
+var session = audioClient.CreateLiveTranscriptionSession();
+session.Settings.SampleRate = 16000;
+session.Settings.Channels = 1;
+session.Settings.Language = "en";
+
+await session.StartAsync();
+await session.AppendAsync(pcmBytes);          // push audio
+await foreach (var result in session.GetStream())
+{
+    Console.WriteLine(result.Content[0].Text); // transcribed text
+    Console.WriteLine(result.IsFinal);         // true for final results
+}
+await session.StopAsync();
+
+// File transcription
+var audioClient = await model.GetAudioClientAsync();
+audioClient.Settings.Language = "en";
+await foreach (var chunk in audioClient.TranscribeAudioStreamingAsync(path))
+{
+    Console.Write(chunk.Text);
+}
+```
diff --git a/samples/cs/audio-transcription-example/Recording.mp3 b/samples/cs/audio/Recording.mp3
similarity index 100%
rename from samples/cs/audio-transcription-example/Recording.mp3
rename to samples/cs/audio/Recording.mp3
diff --git a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj b/samples/cs/chat-completion/ChatCompletion.csproj
similarity index 66%
rename from samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj
rename to samples/cs/chat-completion/ChatCompletion.csproj
index 77fc929d6..612b951d8 100644
--- a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj
+++ b/samples/cs/chat-completion/ChatCompletion.csproj
@@ -11,14 +11,11 @@
     <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
   </PropertyGroup>
 
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
+  <!-- Foundry Local SDK, consumed from local source (tracks 'main', not version-pinned).
+       The Microsoft.AI.Foundry.Local.Core* packages and native runtime assets flow
+       transitively from this project reference. -->
+  <ItemGroup>
+    <ProjectReference Include="../../../sdk/cs/src/Microsoft.AI.Foundry.Local.csproj" />
   </ItemGroup>
 
   <!-- Linux GPU support -->
@@ -27,6 +24,7 @@
     <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
   </ItemGroup>
 
+  <!-- OpenAI client, used for the web server (/v1/chat/completions) demonstration -->
   <ItemGroup>
     <PackageReference Include="OpenAI" />
   </ItemGroup>
diff --git a/samples/cs/native-chat-completions/NativeChatCompletions.sln b/samples/cs/chat-completion/ChatCompletion.sln
similarity index 91%
rename from samples/cs/native-chat-completions/NativeChatCompletions.sln
rename to samples/cs/chat-completion/ChatCompletion.sln
index a127bfba6..3bb56f391 100644
--- a/samples/cs/native-chat-completions/NativeChatCompletions.sln
+++ b/samples/cs/chat-completion/ChatCompletion.sln
@@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio Version 17
 VisualStudioVersion = 17.0.31903.59
 MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NativeChatCompletions", "NativeChatCompletions.csproj", "{A53372CE-F7E1-4F09-B186-77F76E388659}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChatCompletion", "ChatCompletion.csproj", "{A53372CE-F7E1-4F09-B186-77F76E388659}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
diff --git a/samples/cs/native-chat-completions/Program.cs b/samples/cs/chat-completion/Program.cs
similarity index 57%
rename from samples/cs/native-chat-completions/Program.cs
rename to samples/cs/chat-completion/Program.cs
index 033786b1f..9f011b0c0 100644
--- a/samples/cs/native-chat-completions/Program.cs
+++ b/samples/cs/chat-completion/Program.cs
@@ -1,16 +1,30 @@
 ﻿// <complete_code>
+// This sample demonstrates two ways to run the same chat prompt against Foundry Local:
+//   1. Native, in-process inference via the SDK's chat client.
+//   2. The local OpenAI-compatible web server (/v1/chat/completions) via the OpenAI SDK.
+//
 // <imports>
 using Microsoft.AI.Foundry.Local;
 using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
+using OpenAI;
+using System.ClientModel;
 // </imports>
 
+// The same prompt is used for both the native and web-server demonstrations.
+const string prompt = "Why is the sky blue?";
+
 // <init>
 CancellationToken ct = new CancellationToken();
 
 var config = new Configuration
 {
     AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
+    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information,
+    // The web server is started later for the second demonstration.
+    Web = new Configuration.WebService
+    {
+        Urls = "http://127.0.0.1:52495"
+    }
 };
 
 
@@ -83,18 +97,20 @@ await model.DownloadAsync(progress =>
 Console.WriteLine("done.");
 // </model_setup>
 
-// <chat_completion>
-// Get a chat client
+// <native_inference>
+// === Native inference ===
+// Run the prompt with the SDK's in-process chat client (no web server involved).
+Console.WriteLine();
+Console.WriteLine("=== Native inference ===");
+
 var chatClient = await model.GetChatClientAsync();
 
-// Create a chat message
 List<ChatMessage> messages = new()
 {
-    new ChatMessage { Role = "user", Content = "Why is the sky blue?" }
+    new ChatMessage { Role = "user", Content = prompt }
 };
 
-// Get a streaming chat completion response
-Console.WriteLine("Chat completion response:");
+Console.Write("[ASSISTANT]: ");
 var streamingResponse = chatClient.CompleteChatStreamingAsync(messages, ct);
 await foreach (var chunk in streamingResponse)
 {
@@ -102,10 +118,44 @@ await model.DownloadAsync(progress =>
     Console.Out.Flush();
 }
 Console.WriteLine();
-// </chat_completion>
+// </native_inference>
+
+// <web_server>
+// === Web server (/v1/chat/completions) ===
+// Run the same prompt against the local OpenAI-compatible web server using the OpenAI SDK.
+Console.WriteLine();
+Console.WriteLine("=== Web server (/v1/chat/completions) ===");
+
+Console.Write($"Starting web service on {config.Web.Urls}...");
+await mgr.StartWebServiceAsync();
+Console.WriteLine("done.");
+
+// Use the OpenAI SDK to call the local Foundry web service.
+ApiKeyCredential key = new ApiKeyCredential("notneeded");
+OpenAIClient client = new OpenAIClient(key, new OpenAIClientOptions
+{
+    Endpoint = new Uri(config.Web.Urls + "/v1"),
+});
+
+var webChatClient = client.GetChatClient(model.Id);
+var completionUpdates = webChatClient.CompleteChatStreaming(prompt);
+
+Console.Write("[ASSISTANT]: ");
+foreach (var completionUpdate in completionUpdates)
+{
+    if (completionUpdate.ContentUpdate.Count > 0)
+    {
+        Console.Write(completionUpdate.ContentUpdate[0].Text);
+    }
+}
+Console.WriteLine();
+
+// Stop the web service.
+await mgr.StopWebServiceAsync();
+// </web_server>
 
 // <cleanup>
 // Tidy up - unload the model
 await model.UnloadAsync();
 // </cleanup>
-// </complete_code>
\ No newline at end of file
+// </complete_code>
diff --git a/samples/cs/chat-completion/README.md b/samples/cs/chat-completion/README.md
new file mode 100644
index 000000000..5747a4db8
--- /dev/null
+++ b/samples/cs/chat-completion/README.md
@@ -0,0 +1,48 @@
+# Native Chat Completions Example
+
+Run the same chat prompt against Foundry Local two ways from a single program:
+
+1. **Native, in-process inference** via the SDK's chat client (no web server involved).
+2. **The local OpenAI-compatible web server** (`/v1/chat/completions`) via the [OpenAI .NET SDK](https://www.nuget.org/packages/OpenAI).
+
+The program prints clear section headers so you can compare the two paths:
+
+```
+=== Native inference ===
+[ASSISTANT]: ...
+
+=== Web server (/v1/chat/completions) ===
+[ASSISTANT]: ...
+```
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- .NET 9 SDK
+
+## SDK consumption
+
+This sample tracks `main`: it consumes the Foundry Local C# SDK **from local source** via a
+`ProjectReference` to `sdk/cs/src/Microsoft.AI.Foundry.Local.csproj`. It is **not** version-pinned
+to a published package. The `Microsoft.AI.Foundry.Local.Core*` packages and native runtime assets
+flow transitively from that project and restore from nuget.org, along with the third-party
+`OpenAI` package.
+
+## Build & run
+
+```bash
+# from this directory
+dotnet build
+dotnet run
+```
+
+`dotnet build` resolves the SDK from `sdk/cs` source via the project reference and restores
+`Microsoft.AI.Foundry.Local.Core` plus third-party packages from nuget.org.
+
+## What it does
+
+1. Initializes the SDK and downloads/registers execution providers.
+2. Downloads and loads the `qwen2.5-0.5b` model.
+3. Streams the prompt through the native chat client.
+4. Starts the local web server and streams the same prompt through the OpenAI SDK.
+5. Stops the web server and unloads the model.
diff --git a/samples/cs/embeddings/Embeddings.csproj b/samples/cs/embeddings/Embeddings.csproj
index 97cb8ef34..964248cba 100644
--- a/samples/cs/embeddings/Embeddings.csproj
+++ b/samples/cs/embeddings/Embeddings.csproj
@@ -11,14 +11,11 @@
     <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
   </PropertyGroup>
 
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
+  <!-- Foundry Local SDK, consumed from local source (tracks 'main', not version-pinned).
+       The Microsoft.AI.Foundry.Local.Core* packages and native runtime assets flow
+       transitively from this project reference. -->
+  <ItemGroup>
+    <ProjectReference Include="../../../sdk/cs/src/Microsoft.AI.Foundry.Local.csproj" />
   </ItemGroup>
 
   <!-- Linux GPU support -->
diff --git a/samples/cs/embeddings/README.md b/samples/cs/embeddings/README.md
new file mode 100644
index 000000000..5522f375b
--- /dev/null
+++ b/samples/cs/embeddings/README.md
@@ -0,0 +1,33 @@
+# Embeddings Example
+
+Generate single and batch text embeddings natively (in-process) with the Foundry Local C# SDK.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- .NET 9 SDK
+
+## SDK consumption
+
+This sample tracks `main`: it consumes the Foundry Local C# SDK **from local source** via a
+`ProjectReference` to `sdk/cs/src/Microsoft.AI.Foundry.Local.csproj`. It is **not** version-pinned
+to a published package. The `Microsoft.AI.Foundry.Local.Core*` packages and native runtime assets
+flow transitively from that project and restore from nuget.org.
+
+## Build & run
+
+```bash
+# from this directory
+dotnet build
+dotnet run
+```
+
+`dotnet build` resolves the SDK from `sdk/cs` source via the project reference and restores
+`Microsoft.AI.Foundry.Local.Core` from nuget.org.
+
+## What it does
+
+1. Initializes the SDK and downloads/loads the `qwen3-embedding-0.6b` model.
+2. Generates a single embedding and prints its dimensions and first values.
+3. Generates a batch of embeddings and prints the dimensions for each.
+4. Unloads the model.
diff --git a/samples/cs/foundry-local-web-server-responses-vision/FoundryLocalWebServerResponsesVision.csproj b/samples/cs/foundry-local-web-server-responses-vision/FoundryLocalWebServerResponsesVision.csproj
deleted file mode 100644
index 06e29a5d2..000000000
--- a/samples/cs/foundry-local-web-server-responses-vision/FoundryLocalWebServerResponsesVision.csproj
+++ /dev/null
@@ -1,54 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-  <!-- Windows: target Windows SDK for WinML hardware acceleration -->
-  <PropertyGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <TargetFramework>net9.0-windows10.0.18362.0</TargetFramework>
-    <Platforms>ARM64;x64</Platforms>
-    <WindowsPackageType>None</WindowsPackageType>
-    <EnableCoreMrtTooling>false</EnableCoreMrtTooling>
-  </PropertyGroup>
-
-  <!-- Non-Windows: standard .NET -->
-  <PropertyGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <!-- Bundled test image is copied next to the binary so the default path resolves at runtime -->
-  <ItemGroup>
-    <None Update="test_image.jpg">
-      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-    </None>
-  </ItemGroup>
-
-  <!-- Shared utilities -->
-  <ItemGroup>
-    <Compile Include="../Shared/*.cs" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj
deleted file mode 100644
index 77fc929d6..000000000
--- a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj
+++ /dev/null
@@ -1,39 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <ItemGroup>
-    <PackageReference Include="OpenAI" />
-  </ItemGroup>
-
-  <!-- Shared utilities -->
-  <ItemGroup>
-    <Compile Include="../Shared/*.cs" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.sln b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.sln
deleted file mode 100644
index 91d7e9536..000000000
--- a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FoundryLocalWebServer", "FoundryLocalWebServer.csproj", "{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|x64.ActiveCfg = Debug|x64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|x64.Build.0 = Debug|x64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|x86.ActiveCfg = Debug|ARM64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|x86.Build.0 = Debug|ARM64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|Any CPU.Build.0 = Release|ARM64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|x64.ActiveCfg = Release|x64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|x64.Build.0 = Release|x64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|x86.ActiveCfg = Release|ARM64
-		{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/foundry-local-web-server/Program.cs b/samples/cs/foundry-local-web-server/Program.cs
deleted file mode 100644
index eb88e4b39..000000000
--- a/samples/cs/foundry-local-web-server/Program.cs
+++ /dev/null
@@ -1,100 +0,0 @@
-﻿// <complete_code>
-// <imports>
-using Microsoft.AI.Foundry.Local;
-using OpenAI;
-using System.ClientModel;
-// </imports>
-
-// <init>
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information,
-    Web = new Configuration.WebService
-    {
-        Urls = "http://127.0.0.1:52495"
-    }
-};
-
-
-// Initialize the singleton instance.
-await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
-var mgr = FoundryLocalManager.Instance;
-
-
-// Ensure that any Execution Provider (EP) downloads run and are completed.
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-// </init>
-
-
-// <model_setup>
-// Get the model catalog
-var catalog = await mgr.GetCatalogAsync();
-
-
-// Get a model using an alias
-var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
-    {
-        Console.WriteLine();
-    }
-});
-
-
-// Load the model
-Console.Write($"Loading model {model.Id}...");
-await model.LoadAsync();
-Console.WriteLine("done.");
-// </model_setup>
-
-
-// <server_setup>
-// Start the web service
-Console.Write($"Starting web service on {config.Web.Urls}...");
-await mgr.StartWebServiceAsync();
-Console.WriteLine("done.");
-
-// <<<<<< OPEN AI SDK USAGE >>>>>>
-// Use the OpenAI SDK to call the local Foundry web service
-
-ApiKeyCredential key = new ApiKeyCredential("notneeded");
-OpenAIClient client = new OpenAIClient(key, new OpenAIClientOptions
-{
-    Endpoint = new Uri(config.Web.Urls + "/v1"),
-});
-
-var chatClient = client.GetChatClient(model.Id);
-var completionUpdates = chatClient.CompleteChatStreaming("Why is the sky blue?");
-
-Console.Write($"[ASSISTANT]: ");
-foreach (var completionUpdate in completionUpdates)
-{
-    if (completionUpdate.ContentUpdate.Count > 0)
-    {
-        Console.Write(completionUpdate.ContentUpdate[0].Text);
-    }
-}
-Console.WriteLine();
-// <<<<<< END OPEN AI SDK USAGE >>>>>>
-
-// Tidy up
-// Stop the web service and unload model
-await mgr.StopWebServiceAsync();
-await model.UnloadAsync();
-// </server_setup>
-// </complete_code>
\ No newline at end of file
diff --git a/samples/cs/live-audio-transcription/README.md b/samples/cs/live-audio-transcription/README.md
deleted file mode 100644
index 2e59eeb43..000000000
--- a/samples/cs/live-audio-transcription/README.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# Live Audio Transcription Example
-
-Real-time microphone-to-text transcription using the Foundry Local C# SDK with Nemotron ASR.
-
-## Prerequisites
-
-- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
-- .NET 9 SDK
-- A microphone (optional — falls back to synthetic audio on non-Windows or with `--synth`)
-
-## Setup
-
-```bash
-dotnet restore
-```
-
-> **Note:** Microphone capture uses [NAudio](https://github.com/naudio/NAudio) and is Windows-only. On other platforms, the sample falls back to synthetic audio for testing.
-
-## Run
-
-```bash
-dotnet run
-```
-
-Speak into your microphone. Transcription appears in real-time (cyan text). Press `ENTER` to stop recording.
-
-To force synthetic audio (e.g., for CI or non-Windows):
-
-```bash
-dotnet run -- --synth
-```
-
-## How it works
-
-1. Initializes the Foundry Local SDK and loads the Nemotron ASR model
-2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
-3. Captures microphone audio via `NAudio.WaveInEvent` (or generates synthetic audio as fallback)
-4. Pushes PCM chunks to the SDK via `session.AppendAsync()` through a bounded channel for backpressure
-5. Reads transcription results via `await foreach (var result in session.GetStream())`
-6. Access text via `result.Content[0].Text` (OpenAI Realtime ConversationItem pattern)
-
-## API
-
-```csharp
-var audioClient = await model.GetAudioClientAsync();
-var session = audioClient.CreateLiveTranscriptionSession();
-session.Settings.SampleRate = 16000;
-session.Settings.Channels = 1;
-session.Settings.Language = "en";
-
-await session.StartAsync();
-
-// Push audio
-await session.AppendAsync(pcmBytes);
-
-// Read results
-await foreach (var result in session.GetStream())
-{
-    Console.WriteLine(result.Content[0].Text);       // transcribed text
-    Console.WriteLine(result.Content[0].Transcript); // alias (OpenAI compat)
-    Console.WriteLine(result.IsFinal);               // true for final results
-}
-
-await session.StopAsync();
-```
diff --git a/samples/cs/model-management-example/ModelManagementExample.csproj b/samples/cs/model-management-example/ModelManagementExample.csproj
deleted file mode 100644
index 97cb8ef34..000000000
--- a/samples/cs/model-management-example/ModelManagementExample.csproj
+++ /dev/null
@@ -1,35 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <!-- Shared utilities -->
-  <ItemGroup>
-    <Compile Include="../Shared/*.cs" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/model-management-example/ModelManagementExample.sln b/samples/cs/model-management-example/ModelManagementExample.sln
deleted file mode 100644
index f255391b2..000000000
--- a/samples/cs/model-management-example/ModelManagementExample.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ModelManagementExample", "ModelManagementExample.csproj", "{9316B939-946C-4956-A4E7-9410017FD319}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{9316B939-946C-4956-A4E7-9410017FD319}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Debug|x64.ActiveCfg = Debug|x64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Debug|x64.Build.0 = Debug|x64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Debug|x86.ActiveCfg = Debug|ARM64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Debug|x86.Build.0 = Debug|ARM64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Release|Any CPU.Build.0 = Release|ARM64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Release|x64.ActiveCfg = Release|x64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Release|x64.Build.0 = Release|x64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Release|x86.ActiveCfg = Release|ARM64
-		{9316B939-946C-4956-A4E7-9410017FD319}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/model-management-example/Program.cs b/samples/cs/model-management-example/Program.cs
deleted file mode 100644
index 76beb89ff..000000000
--- a/samples/cs/model-management-example/Program.cs
+++ /dev/null
@@ -1,155 +0,0 @@
-using Microsoft.AI.Foundry.Local;
-using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
-using System.Diagnostics;
-
-CancellationToken ct = new CancellationToken();
-
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
-};
-
-
-// Initialize the singleton instance.
-await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
-var mgr = FoundryLocalManager.Instance;
-
-
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-
-
-// Model catalog operations
-// In this section of the code we demonstrate the various model catalog operations
-// Get the model catalog object
-var catalog = await mgr.GetCatalogAsync();
-
-// List available models
-Console.WriteLine("Available models for your hardware:");
-var models = await catalog.ListModelsAsync();
-foreach (var availableModel in models)
-{
-    foreach (var variant in availableModel.Variants)
-    {
-        Console.WriteLine($"  - Alias: {variant.Alias} (Id: {string.Join(", ", variant.Id)})");
-    }
-}
-
-// List cached models (i.e. downloaded models) from the catalog
-var cachedModels = await catalog.GetCachedModelsAsync();
-Console.WriteLine("\nCached models:");
-foreach (var cachedModel in cachedModels)
-{
-    Console.WriteLine($"- {cachedModel.Alias} ({cachedModel.Id})");
-}
-
-
-// Get a model using an alias from the catalog
-var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
-
-// Models in Model.Variants are ordered by priority, with the highest priority first.
-// The first downloaded model is selected by default.
-// The highest priority is selected if no models have been downloaded.
-// If the selected variant is not the highest priority, it means that Foundry Local
-// has found a locally cached variant for you to improve performance (remove need to download).
-Console.WriteLine("\nThe default selected model variant is: " + model.Id);
-if (model.Id != model.Variants.First().Id)
-{
-    Debug.Assert(await model.IsCachedAsync());
-    Console.WriteLine("The model variant was selected due to being locally cached.");
-}
-
-
-// OPTIONAL: `model` can be used directly with its currently selected variant.
-//           You can explicitly select (`model.SelectVariant`) or use a specific variant from `model.Variants`
-//           if you want more control over the device and/or execution provider used.
-//
-// Choices:
-//   - Use a model variant directly from the catalog if you know the variant Id
-//     - `var modelVariant = await catalog.GetModelVariantAsync("qwen2.5-0.5b-instruct-generic-gpu:3")`
-//
-//   - Get the model variant from IModel.Variants
-//     - `var modelVariant = model.Variants.First(v => v.Id == "qwen2.5-0.5b-instruct-generic-cpu:4")`
-//     - `var modelVariant = model.Variants.First(v => v.Info.Runtime?.DeviceType == DeviceType.GPU)`
-//       - optional: update selected variant in `model` using `model.SelectVariant(modelVariant);` if you wish to use
-//                   `model` in your code.
-
-// For this example we explicitly select the CPU variant, and call SelectVariant so all the following example code
-// uses the `model` instance. It would be equally valid to use `modelVariant` directly.
-Console.WriteLine("Selecting CPU variant of model");
-var modelVariant = model.Variants.First(v => v.Info.Runtime?.DeviceType == DeviceType.CPU);
-model.SelectVariant(modelVariant);
-
-
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
-    {
-        Console.WriteLine();
-    }
-});
-
-// Load the model
-await model.LoadAsync();
-
-
-// List loaded models (i.e. in memory) from the catalog
-var loadedModels = await catalog.GetLoadedModelsAsync();
-Console.WriteLine("\nLoaded models:");
-foreach (var loadedModel in loadedModels)
-{
-    Console.WriteLine($"- {loadedModel.Alias} ({loadedModel.Id})");
-}
-Console.WriteLine();
-
-
-// Get a chat client
-var chatClient = await model.GetChatClientAsync();
-
-// Create a chat message
-List<ChatMessage> messages = new()
-{
-    new ChatMessage { Role = "user", Content = "Why is the sky blue?" }
-};
-
-// You can adjust settings on the chat client
-chatClient.Settings.Temperature = 0.7f;
-chatClient.Settings.MaxTokens = 512;
-
-Console.WriteLine("Chat completion response:");
-var streamingResponse = chatClient.CompleteChatStreamingAsync(messages, ct);
-await foreach (var chunk in streamingResponse)
-{
-    Console.Write(chunk.Choices[0].Message.Content);
-    Console.Out.Flush();
-}
-Console.WriteLine();
-Console.WriteLine();
-
-// Tidy up - unload the model
-Console.WriteLine($"Unloading model {model.Id}...");
-await model.UnloadAsync();
-Console.WriteLine("Model unloaded.");
-
-// Show loaded models from the catalog after unload
-loadedModels = await catalog.GetLoadedModelsAsync();
-Console.WriteLine("\nLoaded models after unload (will be empty):");
-foreach (var loadedModel in loadedModels)
-{
-    Console.WriteLine($"- {loadedModel.Alias} ({loadedModel.Id})");
-}
-Console.WriteLine();
-Console.WriteLine("Sample complete.");
\ No newline at end of file
diff --git a/samples/cs/native-chat-completions/NativeChatCompletions.csproj b/samples/cs/native-chat-completions/NativeChatCompletions.csproj
deleted file mode 100644
index 97cb8ef34..000000000
--- a/samples/cs/native-chat-completions/NativeChatCompletions.csproj
+++ /dev/null
@@ -1,35 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <!-- Shared utilities -->
-  <ItemGroup>
-    <Compile Include="../Shared/*.cs" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/nuget.config b/samples/cs/nuget.config
index 63954b2fb..534427902 100644
--- a/samples/cs/nuget.config
+++ b/samples/cs/nuget.config
@@ -2,18 +2,14 @@
 <configuration>
   <packageSources>
     <clear />
+    <!-- The Foundry Local SDK is built from local source (sdk/cs) via ProjectReference.
+         Its transitive Microsoft.AI.Foundry.Local.Core* packages and all third-party
+         packages restore from nuget.org. -->
     <add key="nuget.org" value="https://api.nuget.org/v3/index.json" />
-    <!-- CI builds the SDK to local-packages/ before building samples.
-         For local dev, run: dotnet pack sdk/cs/src -o local-packages /p:Version=0.9.0-dev -->
-    <add key="local-sdk" value="../../local-packages" />
   </packageSources>
   <packageSourceMapping>
     <packageSource key="nuget.org">
       <package pattern="*" />
-      <package pattern="Microsoft.AI.Foundry.Local.Core*" />
-    </packageSource>
-    <packageSource key="local-sdk">
-      <package pattern="Microsoft.AI.Foundry.Local*" />
     </packageSource>
   </packageSourceMapping>
-</configuration>
\ No newline at end of file
+</configuration>
diff --git a/samples/cs/foundry-local-web-server-responses-vision/Program.cs b/samples/cs/responses-api/Program.cs
similarity index 100%
rename from samples/cs/foundry-local-web-server-responses-vision/Program.cs
rename to samples/cs/responses-api/Program.cs
diff --git a/samples/cs/responses-api/README.md b/samples/cs/responses-api/README.md
new file mode 100644
index 000000000..3ac706bc6
--- /dev/null
+++ b/samples/cs/responses-api/README.md
@@ -0,0 +1,46 @@
+# Vision via Web Server (Responses API) Example
+
+Stream a vision (image understanding) response from the local Foundry web server using the
+OpenAI-compatible **Responses API** (`/v1/responses`).
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- .NET 9 SDK
+- A vision-capable model (e.g. `qwen2.5-vl-3b`)
+
+## SDK consumption
+
+This sample tracks `main`: it consumes the Foundry Local C# SDK **from local source** via a
+`ProjectReference` to `sdk/cs/src/Microsoft.AI.Foundry.Local.csproj`. It is **not** version-pinned
+to a published package. The `Microsoft.AI.Foundry.Local.Core*` packages and native runtime assets
+flow transitively from that project and restore from nuget.org.
+
+## Build & run
+
+```bash
+# from this directory
+dotnet build
+
+# describe the bundled test image with a vision model
+dotnet run -- <model_alias_or_id>
+
+# describe your own image
+dotnet run -- <model_alias_or_id> /path/to/image.jpg
+
+# list vision models in the catalog
+dotnet run -- --list-models
+```
+
+`dotnet build` resolves the SDK from `sdk/cs` source via the project reference and restores
+`Microsoft.AI.Foundry.Local.Core` from nuget.org.
+
+## What it does
+
+1. Initializes the SDK and downloads/registers execution providers.
+2. Downloads and loads the requested vision model.
+3. Starts the local web server.
+4. Base64-encodes the image and POSTs a streaming request to `/v1/responses` with
+   `input_text` + `input_image` content parts.
+5. Streams `response.output_text.delta` events to the console.
+6. Stops the web server and unloads the model.
diff --git a/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj b/samples/cs/responses-api/ResponsesApi.csproj
similarity index 63%
rename from samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj
rename to samples/cs/responses-api/ResponsesApi.csproj
index ce8a65f04..4ee58566a 100644
--- a/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj
+++ b/samples/cs/responses-api/ResponsesApi.csproj
@@ -4,6 +4,9 @@
     <OutputType>Exe</OutputType>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <PropertyGroup>
     <TargetFramework>net9.0</TargetFramework>
   </PropertyGroup>
 
@@ -11,14 +14,11 @@
     <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
   </PropertyGroup>
 
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
+  <!-- Foundry Local SDK, consumed from local source (tracks 'main', not version-pinned).
+       The Microsoft.AI.Foundry.Local.Core* packages and native runtime assets flow
+       transitively from this project reference. -->
+  <ItemGroup>
+    <ProjectReference Include="../../../sdk/cs/src/Microsoft.AI.Foundry.Local.csproj" />
   </ItemGroup>
 
   <!-- Linux GPU support -->
@@ -27,9 +27,9 @@
     <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
   </ItemGroup>
 
-  <!-- Include audio file in output directory -->
+  <!-- Bundled test image is copied next to the binary so the default path resolves at runtime -->
   <ItemGroup>
-    <None Include="Recording.mp3">
+    <None Update="test_image.jpg">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
   </ItemGroup>
diff --git a/samples/cs/foundry-local-web-server-responses-vision/FoundryLocalWebServerResponsesVision.sln b/samples/cs/responses-api/ResponsesApi.sln
similarity index 89%
rename from samples/cs/foundry-local-web-server-responses-vision/FoundryLocalWebServerResponsesVision.sln
rename to samples/cs/responses-api/ResponsesApi.sln
index ac1df4ebb..185fb4009 100644
--- a/samples/cs/foundry-local-web-server-responses-vision/FoundryLocalWebServerResponsesVision.sln
+++ b/samples/cs/responses-api/ResponsesApi.sln
@@ -3,7 +3,7 @@ Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio Version 17
 VisualStudioVersion = 17.0.31903.59
 MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FoundryLocalWebServerResponsesVision", "FoundryLocalWebServerResponsesVision.csproj", "{8B4D2C97-2B5D-4A4E-9D31-7C8A6E6F3F11}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ResponsesApi", "ResponsesApi.csproj", "{8B4D2C97-2B5D-4A4E-9D31-7C8A6E6F3F11}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
diff --git a/samples/js/web-server-responses-vision-example/test_image.jpg b/samples/cs/responses-api/test_image.jpg
similarity index 100%
rename from samples/js/web-server-responses-vision-example/test_image.jpg
rename to samples/cs/responses-api/test_image.jpg
diff --git a/samples/cs/tool-calling-foundry-local-sdk/Program.cs b/samples/cs/tool-calling-foundry-local-sdk/Program.cs
deleted file mode 100644
index a40742331..000000000
--- a/samples/cs/tool-calling-foundry-local-sdk/Program.cs
+++ /dev/null
@@ -1,179 +0,0 @@
-﻿// <complete_code>
-// <imports>
-using Microsoft.AI.Foundry.Local;
-using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
-using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
-using Betalgo.Ranul.OpenAI.ObjectModels.SharedModels;
-using System.Text.Json;
-// </imports>
-
-// <init>
-CancellationToken ct = new CancellationToken();
-
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
-};
-
-
-// Initialize the singleton instance.
-await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
-var mgr = FoundryLocalManager.Instance;
-
-
-// Ensure that any Execution Provider (EP) downloads run and are completed.
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-// </init>
-
-
-// <model_setup>
-// Get the model catalog
-var catalog = await mgr.GetCatalogAsync();
-
-
-// Get a model using an alias.
-var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
-
-
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
-    {
-        Console.WriteLine();
-    }
-});
-
-
-// Load the model
-Console.Write($"Loading model {model.Id}...");
-await model.LoadAsync();
-Console.WriteLine("done.");
-// </model_setup>
-
-
-// Get a chat client
-var chatClient = await model.GetChatClientAsync();
-chatClient.Settings.ToolChoice = ToolChoice.Required; // Force the model to make a tool call
-
-
-// Prepare messages
-List<ChatMessage> messages =
-[
-    new ChatMessage { Role = "system", Content = "You are a helpful AI assistant. If necessary, you can use any provided tools to answer the question." },
-    new ChatMessage { Role = "user", Content = "What is the answer to 7 multiplied by 6?" }
-];
-
-
-// <tool_definitions>
-// Prepare tools
-List<ToolDefinition> tools =
-[
-    new ToolDefinition
-    {
-        Type = "function",
-        Function = new FunctionDefinition()
-        {
-            Name = "multiply_numbers",
-            Description = "A tool for multiplying two numbers.",
-            Parameters = new PropertyDefinition()
-            {
-                Type = "object",
-                Properties = new Dictionary<string, PropertyDefinition>()
-                {
-                    { "first", new PropertyDefinition() { Type = "integer", Description = "The first number in the operation" } },
-                    { "second", new PropertyDefinition() { Type = "integer", Description = "The second number in the operation" } }
-                },
-                Required = ["first", "second"]
-            }
-        }
-    }
-];
-// </tool_definitions>
-
-
-// <tool_loop>
-// Get a streaming chat completion response
-var toolCallResponses = new List<ChatCompletionCreateResponse>();
-Console.WriteLine("Chat completion response:");
-var streamingResponse = chatClient.CompleteChatStreamingAsync(messages, tools, ct);
-await foreach (var chunk in streamingResponse)
-{
-    var content = chunk.Choices[0].Message.Content;
-    Console.Write(content);
-    Console.Out.Flush();
-
-    if (chunk.Choices[0].FinishReason == "tool_calls")
-    {
-        toolCallResponses.Add(chunk);
-    }
-}
-Console.WriteLine();
-
-
-// Invoke tools called and append responses to the chat
-foreach (var chunk in toolCallResponses)
-{
-    var call = chunk?.Choices[0].Message.ToolCalls?[0].FunctionCall;
-    if (call?.Name == "multiply_numbers")
-    {
-        var arguments = JsonSerializer.Deserialize<Dictionary<string, int>>(call.Arguments!)!;
-        var first = arguments["first"];
-        var second = arguments["second"];
-
-        Console.WriteLine($"\nInvoking tool: {call?.Name} with arguments {first} and {second}");
-        var result = Utils.MultiplyNumbers(first, second);
-        Console.WriteLine($"Tool response: {result.ToString()}");
-
-        var response = new ChatMessage
-        {
-            Role = "tool",
-            ToolCallId = chunk!.Choices[0].Message.ToolCalls![0].Id,
-            Content = result.ToString(),
-        };
-        messages.Add(response);
-    }
-}
-Console.WriteLine("\nTool calls completed. Prompting model to continue conversation...\n");
-
-
-// Prompt the model to continue the conversation after the tool call
-messages.Add(new ChatMessage { Role = "system", Content = "Respond only with the answer generated by the tool." });
-
-
-// Set tool calling back to auto so that the model can decide whether to call
-// the tool again or continue the conversation based on the new user prompt
-chatClient.Settings.ToolChoice = ToolChoice.Auto;
-
-
-// Run the next turn of the conversation
-Console.WriteLine("Chat completion response:");
-streamingResponse = chatClient.CompleteChatStreamingAsync(messages, tools, ct);
-await foreach (var chunk in streamingResponse)
-{
-    var content = chunk.Choices[0].Message.Content;
-    Console.Write(content);
-    Console.Out.Flush();
-}
-Console.WriteLine();
-// </tool_loop>
-
-
-// <cleanup>
-// Tidy up - unload the model
-await model.UnloadAsync();
-// </cleanup>
-// </complete_code>
\ No newline at end of file
diff --git a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj
deleted file mode 100644
index 97cb8ef34..000000000
--- a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj
+++ /dev/null
@@ -1,35 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <!-- Shared utilities -->
-  <ItemGroup>
-    <Compile Include="../Shared/*.cs" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.sln b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.sln
deleted file mode 100644
index adbf5ea23..000000000
--- a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToolCallingFoundryLocalSdk", "ToolCallingFoundryLocalSdk.csproj", "{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|x64.ActiveCfg = Debug|x64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|x64.Build.0 = Debug|x64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|x86.ActiveCfg = Debug|ARM64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|x86.Build.0 = Debug|ARM64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|Any CPU.Build.0 = Release|ARM64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|x64.ActiveCfg = Release|x64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|x64.Build.0 = Release|x64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|x86.ActiveCfg = Release|ARM64
-		{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/tool-calling-foundry-local-web-server/Program.cs b/samples/cs/tool-calling-foundry-local-web-server/Program.cs
deleted file mode 100644
index 6644a438b..000000000
--- a/samples/cs/tool-calling-foundry-local-web-server/Program.cs
+++ /dev/null
@@ -1,190 +0,0 @@
-﻿// <complete_code>
-using Microsoft.AI.Foundry.Local;
-using OpenAI;
-using OpenAI.Chat;
-using System.ClientModel;
-using System.Text.Json;
-
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information,
-    Web = new Configuration.WebService
-    {
-        Urls = "http://127.0.0.1:52495"
-    }
-};
-
-
-// Initialize the singleton instance.
-await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
-var mgr = FoundryLocalManager.Instance;
-
-
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-
-
-// Get the model catalog
-var catalog = await mgr.GetCatalogAsync();
-
-
-// Get a model using an alias
-var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
-    {
-        Console.WriteLine();
-    }
-});
-
-
-// Load the model
-Console.Write($"Loading model {model.Id}...");
-await model.LoadAsync();
-Console.WriteLine("done.");
-
-
-// Start the web service
-Console.Write($"Starting web service on {config.Web.Urls}...");
-await mgr.StartWebServiceAsync();
-Console.WriteLine("done.");
-
-
-// <<<<<< OPEN AI SDK USAGE >>>>>>
-// Use the OpenAI SDK to call the local Foundry web service
-
-ApiKeyCredential key = new ApiKeyCredential("notneeded");
-OpenAIClient client = new OpenAIClient(key, new OpenAIClientOptions
-{
-    Endpoint = new Uri(config.Web.Urls + "/v1"),
-});
-
-
-// Get chat client
-var chatClient = client.GetChatClient(model.Id);
-
-
-// Prepare messages
-var messages = new List<ChatMessage>
-{
-    ChatMessage.CreateSystemMessage("You are a helpful AI assistant. If necessary, you can use any provided tools to answer the question."),
-    ChatMessage.CreateUserMessage("What is the answer to 7 multiplied by 6?")
-};
-
-
-// Prepare tools
-var tools = new List<ChatTool>
-{
-    ChatTool.CreateFunctionTool(
-        functionName: "multiply_numbers",
-        functionDescription: "A tool for multiplying two numbers.",
-        functionParameters: BinaryData.FromString("""
-        {
-            "type": "object",
-            "properties": {
-                "first": { "type": "number", "description": "The first number in the operation" },
-                "second": { "type": "number", "description": "The second number in the operation" }
-            },
-            "required": ["first", "second"]
-        }
-        """)
-    )
-};
-
-
-// Prepare chat completion options
-var options = new ChatCompletionOptions
-{
-    ToolChoice = ChatToolChoice.CreateRequiredChoice()  // Force the model to make a tool call
-};
-foreach (var tool in tools)
-{
-    options.Tools.Add(tool);
-}
-
-
-// Get a streaming chat completion response
-var completionUpdates = chatClient.CompleteChatStreaming(messages, options);
-var toolCalls = new List<StreamingChatToolCallUpdate>();
-Console.Write($"[ASSISTANT]: ");
-foreach (var completionUpdate in completionUpdates)
-{
-    if (completionUpdate.ContentUpdate.Count > 0)
-    {
-        Console.Write(completionUpdate.ContentUpdate[0].Text);
-    }
-
-    if (completionUpdate.FinishReason == ChatFinishReason.ToolCalls)
-    {
-        foreach (var toolCall in completionUpdate.ToolCallUpdates)
-        {
-            toolCalls.Add(toolCall);
-        }
-    }
-}
-Console.WriteLine();
-
-
-// Invoke tools called and append responses to the chat
-foreach (var toolCall in toolCalls)
-{
-    if (toolCall.FunctionName == "multiply_numbers")
-    {
-        var arguments = JsonDocument.Parse(toolCall.FunctionArgumentsUpdate.ToString()).RootElement;
-        var first = arguments.GetProperty("first").GetInt32();
-        var second = arguments.GetProperty("second").GetInt32();
-
-        Console.WriteLine($"\nInvoking tool: {toolCall.FunctionName} with arguments {first} and {second}");
-        var result = Utils.MultiplyNumbers(first, second);
-        Console.WriteLine($"Tool response: {result.ToString()}");
-
-        messages.Add(ChatMessage.CreateToolMessage(toolCallId: "abcd1234", content: result.ToString()));
-    }
-}
-Console.WriteLine("\nTool calls completed. Prompting model to continue conversation...\n");
-
-
-// Prompt the model to continue the conversation after the tool call
-messages.Add(ChatMessage.CreateSystemMessage("Respond only with the answer generated by the tool."));
-
-
-// Set tool calling back to auto so that the model can decide whether to call
-// the tool again or continue the conversation based on the new user prompt
-options.ToolChoice = ChatToolChoice.CreateAutoChoice();
-
-
-// Run the next turn of the conversation
-Console.WriteLine("Chat completion response:");
-completionUpdates = chatClient.CompleteChatStreaming(messages, options);
-Console.Write($"[ASSISTANT]: ");
-foreach (var completionUpdate in completionUpdates)
-{
-    if (completionUpdate.ContentUpdate.Count > 0)
-    {
-        Console.Write(completionUpdate.ContentUpdate[0].Text);
-    }
-}
-Console.WriteLine();
-
-// <<<<<< END OPEN AI SDK USAGE >>>>>>
-
-
-// Tidy up
-// Stop the web service and unload model
-await mgr.StopWebServiceAsync();
-await model.UnloadAsync();
-// </complete_code>
\ No newline at end of file
diff --git a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.sln b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.sln
deleted file mode 100644
index 7d1568e18..000000000
--- a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToolCallingFoundryLocalWebServer", "ToolCallingFoundryLocalWebServer.csproj", "{F9BD2479-A235-4BBF-A722-DF180A076143}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|x64.ActiveCfg = Debug|x64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|x64.Build.0 = Debug|x64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|x86.ActiveCfg = Debug|ARM64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|x86.Build.0 = Debug|ARM64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Release|Any CPU.Build.0 = Release|ARM64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Release|x64.ActiveCfg = Release|x64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Release|x64.Build.0 = Release|x64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Release|x86.ActiveCfg = Release|ARM64
-		{F9BD2479-A235-4BBF-A722-DF180A076143}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/tutorial-chat-assistant/Program.cs b/samples/cs/tutorial-chat-assistant/Program.cs
deleted file mode 100644
index d06de6a5e..000000000
--- a/samples/cs/tutorial-chat-assistant/Program.cs
+++ /dev/null
@@ -1,114 +0,0 @@
-// <complete_code>
-// <imports>
-using Microsoft.AI.Foundry.Local;
-using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
-using Microsoft.Extensions.Logging;
-// </imports>
-
-// <init>
-CancellationToken ct = CancellationToken.None;
-
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
-};
-
-using var loggerFactory = LoggerFactory.Create(builder =>
-{
-    builder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information);
-});
-var logger = loggerFactory.CreateLogger<Program>();
-
-// Initialize the singleton instance
-await FoundryLocalManager.CreateAsync(config, logger);
-var mgr = FoundryLocalManager.Instance;
-
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-
-// Select and load a model from the catalog
-var catalog = await mgr.GetCatalogAsync();
-var model = await catalog.GetModelAsync("qwen2.5-0.5b")
-    ?? throw new Exception("Model not found");
-
-await model.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f) Console.WriteLine();
-});
-
-await model.LoadAsync();
-Console.WriteLine("Model loaded and ready.");
-
-// Get a chat client
-var chatClient = await model.GetChatClientAsync();
-// </init>
-
-// <system_prompt>
-// Start the conversation with a system prompt
-var messages = new List<ChatMessage>
-{
-    new ChatMessage
-    {
-        Role = "system",
-        Content = "You are a helpful, friendly assistant. Keep your responses " +
-                  "concise and conversational. If you don't know something, say so."
-    }
-};
-// </system_prompt>
-
-Console.WriteLine("\nChat assistant ready! Type 'quit' to exit.\n");
-
-// <conversation_loop>
-while (true)
-{
-    Console.Write("You: ");
-    var userInput = Console.ReadLine();
-    if (string.IsNullOrWhiteSpace(userInput) ||
-        userInput.Equals("quit", StringComparison.OrdinalIgnoreCase) ||
-        userInput.Equals("exit", StringComparison.OrdinalIgnoreCase))
-    {
-        break;
-    }
-
-    // Add the user's message to conversation history
-    messages.Add(new ChatMessage { Role = "user", Content = userInput });
-
-    // <streaming>
-    // Stream the response token by token
-    Console.Write("Assistant: ");
-    var fullResponse = string.Empty;
-    var streamingResponse = chatClient.CompleteChatStreamingAsync(messages, ct);
-    await foreach (var chunk in streamingResponse)
-    {
-        var content = chunk.Choices[0].Message.Content;
-        if (!string.IsNullOrEmpty(content))
-        {
-            Console.Write(content);
-            Console.Out.Flush();
-            fullResponse += content;
-        }
-    }
-    Console.WriteLine("\n");
-    // </streaming>
-
-    // Add the complete response to conversation history
-    messages.Add(new ChatMessage { Role = "assistant", Content = fullResponse });
-}
-// </conversation_loop>
-
-// Clean up - unload the model
-await model.UnloadAsync();
-Console.WriteLine("Model unloaded. Goodbye!");
-// </complete_code>
diff --git a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj
deleted file mode 100644
index fcc9257da..000000000
--- a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj
+++ /dev/null
@@ -1,37 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <!-- Packages -->
-  <ItemGroup>
-    <PackageReference Include="Betalgo.Ranul.OpenAI" />
-    <PackageReference Include="Microsoft.Extensions.Logging" />
-    <PackageReference Include="Microsoft.Extensions.Logging.Console" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.sln b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.sln
deleted file mode 100644
index a9c77e164..000000000
--- a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TutorialChatAssistant", "TutorialChatAssistant.csproj", "{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|x64.ActiveCfg = Debug|x64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|x64.Build.0 = Debug|x64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|x86.ActiveCfg = Debug|ARM64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|x86.Build.0 = Debug|ARM64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|Any CPU.Build.0 = Release|ARM64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|x64.ActiveCfg = Release|x64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|x64.Build.0 = Release|x64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|x86.ActiveCfg = Release|ARM64
-		{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/tutorial-document-summarizer/Program.cs b/samples/cs/tutorial-document-summarizer/Program.cs
deleted file mode 100644
index 333d5c964..000000000
--- a/samples/cs/tutorial-document-summarizer/Program.cs
+++ /dev/null
@@ -1,122 +0,0 @@
-// <complete_code>
-// <imports>
-using Microsoft.AI.Foundry.Local;
-using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
-using Microsoft.Extensions.Logging;
-// </imports>
-
-// <init>
-CancellationToken ct = CancellationToken.None;
-
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
-};
-
-using var loggerFactory = LoggerFactory.Create(builder =>
-{
-    builder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information);
-});
-var logger = loggerFactory.CreateLogger<Program>();
-
-// Initialize the singleton instance
-await FoundryLocalManager.CreateAsync(config, logger);
-var mgr = FoundryLocalManager.Instance;
-
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-
-// Select and load a model from the catalog
-var catalog = await mgr.GetCatalogAsync();
-var model = await catalog.GetModelAsync("qwen2.5-0.5b")
-    ?? throw new Exception("Model not found");
-
-await model.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f) Console.WriteLine();
-});
-
-await model.LoadAsync();
-Console.WriteLine("Model loaded and ready.\n");
-
-// Get a chat client
-var chatClient = await model.GetChatClientAsync();
-// </init>
-
-// <summarization>
-var systemPrompt =
-    "Summarize the following document into concise bullet points. " +
-    "Focus on the key points and main ideas.";
-
-// <file_reading>
-var target = args.Length > 0 ? args[0] : "document.txt";
-// </file_reading>
-
-if (Directory.Exists(target))
-{
-    await SummarizeDirectoryAsync(chatClient, target, systemPrompt, ct);
-}
-else
-{
-    Console.WriteLine($"--- {Path.GetFileName(target)} ---");
-    await SummarizeFileAsync(chatClient, target, systemPrompt, ct);
-}
-// </summarization>
-
-// Clean up
-await model.UnloadAsync();
-Console.WriteLine("\nModel unloaded. Done!");
-
-async Task SummarizeFileAsync(
-    dynamic client,
-    string filePath,
-    string prompt,
-    CancellationToken token)
-{
-    var fileContent = await File.ReadAllTextAsync(filePath, token);
-    var messages = new List<ChatMessage>
-    {
-        new ChatMessage { Role = "system", Content = prompt },
-        new ChatMessage { Role = "user", Content = fileContent }
-    };
-
-    var response = await client.CompleteChatAsync(messages, token);
-    Console.WriteLine(response.Choices[0].Message.Content);
-}
-
-async Task SummarizeDirectoryAsync(
-    dynamic client,
-    string directory,
-    string prompt,
-    CancellationToken token)
-{
-    var txtFiles = Directory.GetFiles(directory, "*.txt")
-        .OrderBy(f => f)
-        .ToArray();
-
-    if (txtFiles.Length == 0)
-    {
-        Console.WriteLine($"No .txt files found in {directory}");
-        return;
-    }
-
-    foreach (var txtFile in txtFiles)
-    {
-        Console.WriteLine($"--- {Path.GetFileName(txtFile)} ---");
-        await SummarizeFileAsync(client, txtFile, prompt, token);
-        Console.WriteLine();
-    }
-}
-// </complete_code>
diff --git a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj
deleted file mode 100644
index fcc9257da..000000000
--- a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj
+++ /dev/null
@@ -1,37 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <!-- Packages -->
-  <ItemGroup>
-    <PackageReference Include="Betalgo.Ranul.OpenAI" />
-    <PackageReference Include="Microsoft.Extensions.Logging" />
-    <PackageReference Include="Microsoft.Extensions.Logging.Console" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.sln b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.sln
deleted file mode 100644
index 7d7a0fc99..000000000
--- a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TutorialDocumentSummarizer", "TutorialDocumentSummarizer.csproj", "{6868D03F-BD8E-46ED-9A5B-95346A3810A4}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|x64.ActiveCfg = Debug|x64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|x64.Build.0 = Debug|x64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|x86.ActiveCfg = Debug|ARM64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|x86.Build.0 = Debug|ARM64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|Any CPU.Build.0 = Release|ARM64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|x64.ActiveCfg = Release|x64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|x64.Build.0 = Release|x64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|x86.ActiveCfg = Release|ARM64
-		{6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/tutorial-tool-calling/Program.cs b/samples/cs/tutorial-tool-calling/Program.cs
deleted file mode 100644
index 5ae60419a..000000000
--- a/samples/cs/tutorial-tool-calling/Program.cs
+++ /dev/null
@@ -1,241 +0,0 @@
-// <complete_code>
-// <imports>
-using System.Text.Json;
-using Microsoft.AI.Foundry.Local;
-using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
-using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
-using Betalgo.Ranul.OpenAI.ObjectModels.SharedModels;
-using Microsoft.Extensions.Logging;
-// </imports>
-
-CancellationToken ct = CancellationToken.None;
-
-// <tool_definitions>
-// --- Tool definitions ---
-List<ToolDefinition> tools =
-[
-    new ToolDefinition
-    {
-        Type = "function",
-        Function = new FunctionDefinition()
-        {
-            Name = "get_weather",
-            Description = "Get the current weather for a location",
-            Parameters = new PropertyDefinition()
-            {
-                Type = "object",
-                Properties = new Dictionary<string, PropertyDefinition>()
-                {
-                    { "location", new PropertyDefinition() { Type = "string", Description = "The city or location" } },
-                    { "unit", new PropertyDefinition() { Type = "string", Description = "Temperature unit (celsius or fahrenheit)" } }
-                },
-                Required = ["location"]
-            }
-        }
-    },
-    new ToolDefinition
-    {
-        Type = "function",
-        Function = new FunctionDefinition()
-        {
-            Name = "calculate",
-            Description = "Perform a math calculation",
-            Parameters = new PropertyDefinition()
-            {
-                Type = "object",
-                Properties = new Dictionary<string, PropertyDefinition>()
-                {
-                    { "expression", new PropertyDefinition() { Type = "string", Description = "The math expression to evaluate" } }
-                },
-                Required = ["expression"]
-            }
-        }
-    }
-];
-
-// --- Tool implementations ---
-string ExecuteTool(string functionName, JsonElement arguments)
-{
-    switch (functionName)
-    {
-        case "get_weather":
-            var location = arguments.GetProperty("location")
-                .GetString() ?? "unknown";
-            var unit = arguments.TryGetProperty("unit", out var u)
-                ? u.GetString() ?? "celsius"
-                : "celsius";
-            var temp = unit == "celsius" ? 22 : 72;
-            return JsonSerializer.Serialize(new
-            {
-                location,
-                temperature = temp,
-                unit,
-                condition = "Sunny"
-            });
-
-        case "calculate":
-            var expression = arguments.GetProperty("expression")
-                .GetString() ?? "";
-            try
-            {
-                var result = new System.Data.DataTable()
-                    .Compute(expression, null);
-                return JsonSerializer.Serialize(new
-                {
-                    expression,
-                    result = result?.ToString()
-                });
-            }
-            catch (Exception ex)
-            {
-                return JsonSerializer.Serialize(new
-                {
-                    error = ex.Message
-                });
-            }
-
-        default:
-            return JsonSerializer.Serialize(new
-            {
-                error = $"Unknown function: {functionName}"
-            });
-    }
-}
-// </tool_definitions>
-
-// <init>
-// --- Main application ---
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
-};
-
-using var loggerFactory = LoggerFactory.Create(builder =>
-{
-    builder.SetMinimumLevel(
-        Microsoft.Extensions.Logging.LogLevel.Information
-    );
-});
-var logger = loggerFactory.CreateLogger<Program>();
-
-await FoundryLocalManager.CreateAsync(config, logger);
-var mgr = FoundryLocalManager.Instance;
-
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-
-var catalog = await mgr.GetCatalogAsync();
-var model = await catalog.GetModelAsync("qwen2.5-0.5b")
-    ?? throw new Exception("Model not found");
-
-await model.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f) Console.WriteLine();
-});
-
-await model.LoadAsync();
-Console.WriteLine("Model loaded and ready.");
-
-var chatClient = await model.GetChatClientAsync();
-chatClient.Settings.ToolChoice = ToolChoice.Auto;
-
-var messages = new List<ChatMessage>
-{
-    new ChatMessage
-    {
-        Role = "system",
-        Content = "You are a helpful assistant with access to tools. " +
-                  "Use them when needed to answer questions accurately."
-    }
-};
-// </init>
-
-// <tool_loop>
-Console.WriteLine("\nTool-calling assistant ready! Type 'quit' to exit.\n");
-
-while (true)
-{
-    Console.Write("You: ");
-    var userInput = Console.ReadLine();
-    if (string.IsNullOrWhiteSpace(userInput) ||
-        userInput.Equals("quit", StringComparison.OrdinalIgnoreCase) ||
-        userInput.Equals("exit", StringComparison.OrdinalIgnoreCase))
-    {
-        break;
-    }
-
-    messages.Add(new ChatMessage
-    {
-        Role = "user",
-        Content = userInput
-    });
-
-    var response = await chatClient.CompleteChatAsync(
-        messages, tools, ct
-    );
-
-    var choice = response.Choices[0].Message;
-
-    if (choice.ToolCalls is { Count: > 0 })
-    {
-        messages.Add(choice);
-
-        foreach (var toolCall in choice.ToolCalls)
-        {
-            var toolArgs = JsonDocument.Parse(
-                toolCall.FunctionCall.Arguments
-            ).RootElement;
-            Console.WriteLine(
-                $"  Tool call: {toolCall.FunctionCall.Name}({toolArgs})"
-            );
-
-            var result = ExecuteTool(
-                toolCall.FunctionCall.Name, toolArgs
-            );
-            messages.Add(new ChatMessage
-            {
-                Role = "tool",
-                ToolCallId = toolCall.Id,
-                Content = result
-            });
-        }
-
-        var finalResponse = await chatClient.CompleteChatAsync(
-            messages, tools, ct
-        );
-        var answer = finalResponse.Choices[0].Message.Content ?? "";
-        messages.Add(new ChatMessage
-        {
-            Role = "assistant",
-            Content = answer
-        });
-        Console.WriteLine($"Assistant: {answer}\n");
-    }
-    else
-    {
-        var answer = choice.Content ?? "";
-        messages.Add(new ChatMessage
-        {
-            Role = "assistant",
-            Content = answer
-        });
-        Console.WriteLine($"Assistant: {answer}\n");
-    }
-}
-
-await model.UnloadAsync();
-Console.WriteLine("Model unloaded. Goodbye!");
-// </tool_loop>
-// </complete_code>
diff --git a/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj b/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj
deleted file mode 100644
index fcc9257da..000000000
--- a/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj
+++ /dev/null
@@ -1,37 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <!-- Packages -->
-  <ItemGroup>
-    <PackageReference Include="Betalgo.Ranul.OpenAI" />
-    <PackageReference Include="Microsoft.Extensions.Logging" />
-    <PackageReference Include="Microsoft.Extensions.Logging.Console" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/tutorial-tool-calling/TutorialToolCalling.sln b/samples/cs/tutorial-tool-calling/TutorialToolCalling.sln
deleted file mode 100644
index 6a86331bb..000000000
--- a/samples/cs/tutorial-tool-calling/TutorialToolCalling.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TutorialToolCalling", "TutorialToolCalling.csproj", "{155923AB-A0C6-447D-A46A-7C8318D31596}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|x64.ActiveCfg = Debug|x64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|x64.Build.0 = Debug|x64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|x86.ActiveCfg = Debug|ARM64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|x86.Build.0 = Debug|ARM64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Release|Any CPU.Build.0 = Release|ARM64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Release|x64.ActiveCfg = Release|x64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Release|x64.Build.0 = Release|x64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Release|x86.ActiveCfg = Release|ARM64
-		{155923AB-A0C6-447D-A46A-7C8318D31596}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/tutorial-voice-to-text/Program.cs b/samples/cs/tutorial-voice-to-text/Program.cs
deleted file mode 100644
index 9a1a36c33..000000000
--- a/samples/cs/tutorial-voice-to-text/Program.cs
+++ /dev/null
@@ -1,118 +0,0 @@
-// <complete_code>
-// <imports>
-using Microsoft.AI.Foundry.Local;
-using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
-using Microsoft.Extensions.Logging;
-using System.Text;
-// </imports>
-
-// <init>
-CancellationToken ct = CancellationToken.None;
-
-var config = new Configuration
-{
-    AppName = "foundry_local_samples",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
-};
-
-using var loggerFactory = LoggerFactory.Create(builder =>
-{
-    builder.SetMinimumLevel(
-        Microsoft.Extensions.Logging.LogLevel.Information
-    );
-});
-var logger = loggerFactory.CreateLogger<Program>();
-
-// Initialize the singleton instance
-await FoundryLocalManager.CreateAsync(config, logger);
-var mgr = FoundryLocalManager.Instance;
-
-// Download and register all execution providers.
-var currentEp = "";
-await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
-{
-    if (epName != currentEp)
-    {
-        if (currentEp != "") Console.WriteLine();
-        currentEp = epName;
-    }
-    Console.Write($"\r  {epName.PadRight(30)}  {percent,6:F1}%");
-});
-if (currentEp != "") Console.WriteLine();
-
-var catalog = await mgr.GetCatalogAsync();
-// </init>
-
-// <transcription>
-// Load the speech-to-text model
-var speechModel = await catalog.GetModelAsync("whisper-tiny")
-    ?? throw new Exception("Speech model not found");
-
-await speechModel.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading speech model: {progress:F2}%");
-    if (progress >= 100f) Console.WriteLine();
-});
-
-await speechModel.LoadAsync();
-Console.WriteLine("Speech model loaded.");
-
-// Transcribe the audio file
-var audioClient = await speechModel.GetAudioClientAsync();
-var transcriptionText = new StringBuilder();
-
-Console.WriteLine("\nTranscription:");
-var audioResponse = audioClient
-    .TranscribeAudioStreamingAsync("meeting-notes.wav", ct);
-await foreach (var chunk in audioResponse)
-{
-    Console.Write(chunk.Text);
-    transcriptionText.Append(chunk.Text);
-}
-Console.WriteLine();
-
-// Unload the speech model to free memory
-await speechModel.UnloadAsync();
-// </transcription>
-
-// <summarization>
-// Load the chat model for summarization
-var chatModel = await catalog.GetModelAsync("qwen2.5-0.5b")
-    ?? throw new Exception("Chat model not found");
-
-await chatModel.DownloadAsync(progress =>
-{
-    Console.Write($"\rDownloading chat model: {progress:F2}%");
-    if (progress >= 100f) Console.WriteLine();
-});
-
-await chatModel.LoadAsync();
-Console.WriteLine("Chat model loaded.");
-
-// Summarize the transcription into organized notes
-var chatClient = await chatModel.GetChatClientAsync();
-var messages = new List<ChatMessage>
-{
-    new ChatMessage
-    {
-        Role = "system",
-        Content = "You are a note-taking assistant. Summarize " +
-                  "the following transcription into organized, " +
-                  "concise notes with bullet points."
-    },
-    new ChatMessage
-    {
-        Role = "user",
-        Content = transcriptionText.ToString()
-    }
-};
-
-var chatResponse = await chatClient.CompleteChatAsync(messages, ct);
-var summary = chatResponse.Choices[0].Message.Content;
-Console.WriteLine($"\nSummary:\n{summary}");
-
-// Clean up
-await chatModel.UnloadAsync();
-Console.WriteLine("\nDone. Models unloaded.");
-// </summarization>
-// </complete_code>
diff --git a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj
deleted file mode 100644
index fcc9257da..000000000
--- a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj
+++ /dev/null
@@ -1,37 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <!-- Windows: WinML for hardware acceleration -->
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <!-- Non-Windows: standard SDK -->
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <!-- Linux GPU support -->
-  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
-  </ItemGroup>
-
-  <!-- Packages -->
-  <ItemGroup>
-    <PackageReference Include="Betalgo.Ranul.OpenAI" />
-    <PackageReference Include="Microsoft.Extensions.Logging" />
-    <PackageReference Include="Microsoft.Extensions.Logging.Console" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.sln b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.sln
deleted file mode 100644
index ae2a2b396..000000000
--- a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.sln
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio Version 17
-VisualStudioVersion = 17.0.31903.59
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TutorialVoiceToText", "TutorialVoiceToText.csproj", "{C12663C3-AB3F-4652-BC43-A92E43602ACC}"
-EndProject
-Global
-	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|Any CPU = Debug|Any CPU
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|Any CPU = Release|Any CPU
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
-	EndGlobalSection
-	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|Any CPU.ActiveCfg = Debug|ARM64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|Any CPU.Build.0 = Debug|ARM64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|x64.ActiveCfg = Debug|x64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|x64.Build.0 = Debug|x64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|x86.ActiveCfg = Debug|ARM64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|x86.Build.0 = Debug|ARM64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|Any CPU.ActiveCfg = Release|ARM64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|Any CPU.Build.0 = Release|ARM64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|x64.ActiveCfg = Release|x64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|x64.Build.0 = Release|x64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|x86.ActiveCfg = Release|ARM64
-		{C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|x86.Build.0 = Release|ARM64
-	EndGlobalSection
-	GlobalSection(SolutionProperties) = preSolution
-		HideSolutionNode = FALSE
-	EndGlobalSection
-EndGlobal
diff --git a/samples/cs/verify-winml/Program.cs b/samples/cs/verify-winml/Program.cs
deleted file mode 100644
index 27a141296..000000000
--- a/samples/cs/verify-winml/Program.cs
+++ /dev/null
@@ -1,278 +0,0 @@
-/// <summary>
-/// Foundry Local SDK - WinML 2.0 EP Verification (C#)
-///
-/// Verifies:
-///   1. Execution providers are discovered and registered
-///   2. Accelerated models appear in catalog after EP registration
-///   3. Streaming chat completions work on an accelerated model
-/// </summary>
-
-using Microsoft.AI.Foundry.Local;
-using Microsoft.Extensions.Logging;
-using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
-
-const string PASS = "\x1b[92m[PASS]\x1b[0m";
-const string FAIL = "\x1b[91m[FAIL]\x1b[0m";
-const string INFO = "\x1b[94m[INFO]\x1b[0m";
-const string WARN = "\x1b[93m[WARN]\x1b[0m";
-
-var results = new List<(string Name, bool Passed)>();
-
-void LogResult(string testName, bool passed, string detail = "")
-{
-    var status = passed ? PASS : FAIL;
-    var msg = string.IsNullOrEmpty(detail) ? $"{status} {testName}" : $"{status} {testName} - {detail}";
-    Console.WriteLine(msg);
-    results.Add((testName, passed));
-}
-
-void PrintSeparator(string title)
-{
-    Console.WriteLine($"\n{new string('=', 60)}");
-    Console.WriteLine($"  {title}");
-    Console.WriteLine($"{new string('=', 60)}\n");
-}
-
-void PrintSummary()
-{
-    PrintSeparator("Summary");
-    var passed = results.Count(r => r.Passed);
-    foreach (var (name, p) in results)
-    {
-        Console.WriteLine($"  {(p ? "✓" : "✗")} {name}");
-    }
-
-    Console.WriteLine($"\n  {passed}/{results.Count} tests passed");
-}
-
-bool IsAcceleratedVariant(IModel model)
-{
-    var runtime = model.Info?.Runtime;
-    return runtime != null && (runtime.DeviceType == DeviceType.GPU || runtime.DeviceType == DeviceType.NPU);
-}
-
-CancellationToken ct = CancellationToken.None;
-
-// ── 0. Initialize FoundryLocalManager ──────────────────────
-PrintSeparator("Initialization");
-var config = new Configuration
-{
-    AppName = "verify_winml",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
-};
-
-using var loggerFactory = LoggerFactory.Create(builder =>
-    builder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information));
-var logger = loggerFactory.CreateLogger<Program>();
-
-await FoundryLocalManager.CreateAsync(config, logger);
-var mgr = FoundryLocalManager.Instance;
-Console.WriteLine($"{INFO} FoundryLocalManager initialized.");
-
-// ── 1. Discover & Register EPs ────────────────────────────
-PrintSeparator("Step 1: Discover & Register Execution Providers");
-EpInfo[] eps = [];
-try
-{
-    eps = mgr.DiscoverEps();
-    Console.WriteLine($"{INFO} Discovered {eps.Length} execution providers:");
-    foreach (var ep in eps)
-    {
-        Console.WriteLine($"  - {ep.Name,-40}  Registered: {ep.IsRegistered}");
-    }
-
-    LogResult("EP Discovery", true, $"{eps.Length} EP(s) found");
-}
-catch (Exception e)
-{
-    LogResult("EP Discovery", false, e.Message);
-}
-
-if (eps.Length == 0)
-{
-    var detail = "No execution providers discovered on this machine";
-    LogResult("EP Download & Registration", false, detail);
-    Console.WriteLine($"\n{FAIL} {detail}.");
-    PrintSummary();
-    return;
-}
-
-try
-{
-    string? currentProgressEp = null;
-    var currentProgressPercent = -1d;
-
-    var epResult = await mgr.DownloadAndRegisterEpsAsync(
-        new Action<string, double>((epName, percent) =>
-        {
-            if (currentProgressEp != null &&
-                (!epName.Equals(currentProgressEp, StringComparison.OrdinalIgnoreCase) || percent < currentProgressPercent))
-            {
-                Console.WriteLine();
-            }
-
-            currentProgressEp = epName;
-            currentProgressPercent = percent;
-            Console.Write($"\r  Downloading {epName}: {percent:F1}%");
-        }),
-        ct);
-
-    if (currentProgressEp != null)
-    {
-        Console.WriteLine();
-    }
-
-    Console.WriteLine($"{INFO} EP registration: success={epResult.Success}, status={epResult.Status}");
-    if (epResult.RegisteredEps?.Any() == true)
-    {
-        Console.WriteLine($"  Registered: {string.Join(", ", epResult.RegisteredEps)}");
-    }
-
-    if (epResult.FailedEps?.Any() == true)
-    {
-        Console.WriteLine($"  Failed:     {string.Join(", ", epResult.FailedEps)}");
-    }
-
-    var downloadOk = epResult.Success;
-    var detail = downloadOk && epResult.RegisteredEps?.Any() == true
-        ? $"{epResult.RegisteredEps.Length} EP(s) registered"
-        : epResult.Status;
-    LogResult("EP Download & Registration", downloadOk, detail);
-    if (!downloadOk)
-    {
-        PrintSummary();
-        return;
-    }
-}
-catch (Exception e)
-{
-    Console.WriteLine();
-    LogResult("EP Download & Registration", false, e.Message);
-    PrintSummary();
-    return;
-}
-
-// ── 2. List Models & Find Accelerated Variants ────────────
-PrintSeparator("Step 2: Model Catalog - Accelerated Models");
-var catalog = await mgr.GetCatalogAsync();
-var models = await catalog.ListModelsAsync();
-Console.WriteLine($"{INFO} Total models in catalog: {models.Count}");
-
-var acceleratedVariants = new List<IModel>();
-foreach (var model in models)
-{
-    foreach (var variant in model.Variants)
-    {
-        if (IsAcceleratedVariant(variant))
-        {
-            acceleratedVariants.Add(variant);
-            var runtime = variant.Info?.Runtime;
-            Console.WriteLine($"  - {variant.Id,-50}  Device: {runtime?.DeviceType,-3}  EP: {runtime?.ExecutionProvider ?? "?"}");
-        }
-    }
-}
-
-LogResult("Catalog - Accelerated models found", acceleratedVariants.Count > 0,
-    acceleratedVariants.Count > 0 ? $"{acceleratedVariants.Count} accelerated variant(s)" : "No accelerated model variants");
-
-if (acceleratedVariants.Count == 0)
-{
-    Console.WriteLine($"\n{FAIL} No accelerated model variants are available.");
-    Console.WriteLine($"{WARN} Ensure the system has a compatible accelerator and matching model variants installed.");
-    PrintSummary();
-    return;
-}
-
-// ── 3. Download & Load Model ──────────────────────────────
-PrintSeparator("Step 3: Download & Load Model");
-IModel? chosen = null;
-Exception? lastLoadError = null;
-var downloadedAny = false;
-
-foreach (var candidate in acceleratedVariants)
-{
-    var ep = candidate.Info?.Runtime?.ExecutionProvider ?? "unknown";
-    Console.WriteLine($"\n{INFO} Trying model: {candidate.Id} (EP: {ep})");
-
-    try
-    {
-        await candidate.DownloadAsync(progress =>
-            Console.Write($"\r  Downloading model: {progress:F1}%"));
-        Console.WriteLine();
-        downloadedAny = true;
-    }
-    catch (Exception e)
-    {
-        Console.WriteLine();
-        Console.WriteLine($"{WARN} Skipping {candidate.Id}: download failed: {e.Message}");
-        lastLoadError = e;
-        continue;
-    }
-
-    try
-    {
-        await candidate.LoadAsync();
-        chosen = candidate;
-        break;
-    }
-    catch (Exception e)
-    {
-        Console.WriteLine($"{WARN} Skipping {candidate.Id}: load failed: {e.Message}");
-        lastLoadError = e;
-    }
-}
-
-LogResult("Model Download", downloadedAny,
-    downloadedAny ? "At least one accelerated variant downloaded" : lastLoadError?.Message ?? "No accelerated variant could be downloaded");
-
-if (chosen == null)
-{
-    LogResult("Model Load", false,
-        lastLoadError?.Message ?? "No accelerated variant could be loaded on this machine");
-    PrintSummary();
-    return;
-}
-
-LogResult("Model Load", true, $"Loaded {chosen.Id}");
-
-// ── 4. Streaming Chat Completions (Native SDK) ────────────
-PrintSeparator("Step 4: Streaming Chat Completions (Native)");
-try
-{
-    var chatClient = await chosen.GetChatClientAsync();
-    chatClient.Settings.Temperature = 0;
-    chatClient.Settings.MaxTokens = 16;
-    var messages = new List<ChatMessage>
-    {
-        new() { Role = "system", Content = "You are a helpful assistant." },
-        new() { Role = "user", Content = "What is 2 + 2? Reply with just the number." },
-    };
-
-    var fullResponse = "";
-    var start = DateTime.UtcNow;
-    await foreach (var chunk in chatClient.CompleteChatStreamingAsync(messages, ct))
-    {
-        var content = chunk.Choices?.FirstOrDefault()?.Message?.Content;
-        if (!string.IsNullOrEmpty(content))
-        {
-            Console.Write(content);
-            Console.Out.Flush();
-            fullResponse += content;
-        }
-    }
-
-    var elapsed = (DateTime.UtcNow - start).TotalSeconds;
-    Console.WriteLine();
-    LogResult("Streaming Chat (Native)", fullResponse.Length > 0,
-        $"{fullResponse.Length} chars in {elapsed:F2}s");
-}
-catch (Exception e)
-{
-    LogResult("Streaming Chat (Native)", false, e.Message);
-}
-
-// ── Summary ──────────────────────────────────────────────
-PrintSummary();
-
-await chosen.UnloadAsync();
-Console.WriteLine("Model unloaded. Done!");
diff --git a/samples/cs/verify-winml/README.md b/samples/cs/verify-winml/README.md
deleted file mode 100644
index 88540fbc8..000000000
--- a/samples/cs/verify-winml/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Verify WinML 2.0 Execution Providers (C#)
-
-This sample verifies that WinML 2.0 execution providers are correctly discovered,
-downloaded, and registered using the Foundry Local C# SDK. It uses registered WinML
-EP-backed model variants and finishes with one native streaming chat check.
-
-## Prerequisites
-
-- Windows with a compatible GPU
-- .NET 9.0 SDK
-
-## Build & Run
-
-This sample uses the `Microsoft.AI.Foundry.Local.WinML` SDK package selected by
-the shared central package versions. The SDK package owns its native
-`Microsoft.AI.Foundry.Local.Core.WinML` dependency, so it restores the matching
-Core package transitively.
-
-```bash
-dotnet run
-```
diff --git a/samples/cs/verify-winml/VerifyWinML.csproj b/samples/cs/verify-winml/VerifyWinML.csproj
deleted file mode 100644
index 860aa6740..000000000
--- a/samples/cs/verify-winml/VerifyWinML.csproj
+++ /dev/null
@@ -1,28 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-    <TargetFramework>net9.0</TargetFramework>
-  </PropertyGroup>
-
-  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
-    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
-  </PropertyGroup>
-
-  <ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
-  </ItemGroup>
-
-  <ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
-    <PackageReference Include="Microsoft.AI.Foundry.Local" />
-  </ItemGroup>
-
-  <ItemGroup>
-    <PackageReference Include="Betalgo.Ranul.OpenAI" />
-    <PackageReference Include="Microsoft.Extensions.Logging" />
-    <PackageReference Include="Microsoft.Extensions.Logging.Console" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/js/README.md b/samples/js/README.md
index 0b7d677c4..2636a88c0 100644
--- a/samples/js/README.md
+++ b/samples/js/README.md
@@ -2,6 +2,9 @@
 
 These samples demonstrate how to use the Foundry Local JavaScript SDK (`foundry-local-sdk`) with Node.js.
 
+The samples consume the SDK **directly from local source** (`../../sdk/js` via a `file:` dependency),
+so they always track `main` rather than a published npm version.
+
 ## Prerequisites
 
 - [Node.js](https://nodejs.org/) (v18 or later recommended)
@@ -10,20 +13,10 @@ These samples demonstrate how to use the Foundry Local JavaScript SDK (`foundry-
 
 | Sample | Description |
 |--------|-------------|
-| [native-chat-completions](native-chat-completions/) | Initialize the SDK, download a model, and run non-streaming and streaming chat completions. |
-| [embeddings](embeddings/) | Generate single and batch text embeddings using the Foundry Local SDK. |
-| [audio-transcription-example](audio-transcription-example/) | Transcribe audio files using the Whisper model with streaming output. |
-| [chat-and-audio-foundry-local](chat-and-audio-foundry-local/) | Unified sample demonstrating both chat and audio transcription in one application. |
-| [electron-chat-application](electron-chat-application/) | Full-featured Electron desktop chat app with voice transcription and model management. |
-| [copilot-sdk-foundry-local](copilot-sdk-foundry-local/) | GitHub Copilot SDK integration with Foundry Local for agentic AI workflows. |
-| [langchain-integration-example](langchain-integration-example/) | LangChain.js integration for building text generation chains. |
-| [tool-calling-foundry-local](tool-calling-foundry-local/) | Tool calling with custom function definitions and streaming responses. |
-| [web-server-example](web-server-example/) | Start a local OpenAI-compatible web server and call it with the OpenAI SDK. |
-| [web-server-responses-vision-example](web-server-responses-vision-example/) | Stream a vision (image understanding) response from the local web server using the Responses API. |
-| [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). |
-| [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). |
-| [tutorial-tool-calling](tutorial-tool-calling/) | Create a tool-calling assistant (tutorial). |
-| [tutorial-voice-to-text](tutorial-voice-to-text/) | Transcribe and summarize audio (tutorial). |
+| [chat-completion](chat-completion/) | Run the same chat prompt two ways: native in-process inference (streaming and non-streaming) **and** the local OpenAI-compatible web server (`/v1/chat/completions`). |
+| [embeddings](embeddings/) | Generate single and batch text embeddings using native inference. |
+| [audio](audio/) | Live microphone streaming (Nemotron ASR) **and** file-based transcription (`--file <path>`, Whisper) in one app. |
+| [responses-api](responses-api/) | Stream a vision (image understanding) response from the local web server using the Responses API. |
 
 ## Running a Sample
 
@@ -37,7 +30,7 @@ These samples demonstrate how to use the Foundry Local JavaScript SDK (`foundry-
 1. Navigate to a sample and install dependencies:
 
    ```bash
-   cd native-chat-completions
+   cd chat-completion
    npm install
    ```
 
@@ -48,4 +41,8 @@ These samples demonstrate how to use the Foundry Local JavaScript SDK (`foundry-
    ```
 
 > [!TIP]
-> Each sample's `package.json` includes `foundry-local-sdk` as a dependency and `foundry-local-sdk-winml` as an optional dependency. On **Windows**, the WinML variant installs automatically for broader hardware acceleration. On **macOS and Linux**, the standard SDK is used. Just run `npm install` — platform detection is handled for you.
+> Each sample's `package.json` references the SDK via `"foundry-local-sdk": "file:../../../sdk/js"`,
+> so `npm install` builds against the in-repo SDK. The SDK ships a prebuilt `dist/` and downloads its
+> native runtime on install. If you've changed the SDK source, rebuild it first with
+> `npm install && npm run build` (and `npm run build:native` to rebuild the native addon) inside
+> `sdk/js`.
diff --git a/samples/js/audio-transcription-example/app.js b/samples/js/audio-transcription-example/app.js
deleted file mode 100644
index 51b69f990..000000000
--- a/samples/js/audio-transcription-example/app.js
+++ /dev/null
@@ -1,77 +0,0 @@
-// <complete_code>
-// <imports>
-import { FoundryLocalManager } from 'foundry-local-sdk';
-// </imports>
-
-// Initialize the Foundry Local SDK
-console.log('Initializing Foundry Local SDK...');
-
-// <init>
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info'
-});
-// </init>
-console.log('✓ SDK initialized successfully');
-
-// Download and register all execution providers.
-let currentEp = '';
-await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-});
-if (currentEp !== '') process.stdout.write('\n');
-
-// <model_setup>
-// Get the model object
-const modelAlias = 'whisper-tiny'; // Using an available model from the list above
-let model = await manager.catalog.getModel(modelAlias);
-console.log(`Using model: ${model.id}`);
-
-// Download the model
-console.log(`\nDownloading model ${modelAlias}...`);
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-});
-console.log('\n✓ Model downloaded');
-
-// Load the model
-console.log(`\nLoading model ${modelAlias}...`);
-await model.load();
-console.log('✓ Model loaded');
-// </model_setup>
-
-// <transcription>
-// Create audio client
-console.log('\nCreating audio client...');
-const audioClient = model.createAudioClient();
-console.log('✓ Audio client created');
-
-// Example audio transcription
-const audioFile = process.argv[2] || './Recording.mp3';
-console.log(`\nTranscribing ${audioFile}...`);
-const transcription = await audioClient.transcribe(audioFile);
-
-console.log('\nAudio transcription result:');
-console.log(transcription.text);
-console.log('✓ Audio transcription completed');
-
-// Same example but with streaming transcription using async iteration
-console.log('\nTesting streaming audio transcription...');
-for await (const result of audioClient.transcribeStreaming(audioFile)) {
-    // Output the intermediate transcription results as they are received without line ending
-    process.stdout.write(result.text);
-}
-console.log('\n✓ Streaming transcription completed');
-// </transcription>
-
-// <cleanup>
-// Unload the model
-console.log('Unloading model...');
-await model.unload();
-console.log(`✓ Model unloaded`);
-// </cleanup>
-// </complete_code>
diff --git a/samples/js/audio-transcription-example/package.json b/samples/js/audio-transcription-example/package.json
deleted file mode 100644
index 14a2aafab..000000000
--- a/samples/js/audio-transcription-example/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "audio-transcription-example",
-  "version": "1.0.0",
-  "type": "module",
-  "main": "app.js",
-  "scripts": {
-    "start": "node app.js"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/audio/README.md b/samples/js/audio/README.md
new file mode 100644
index 000000000..fcb440828
--- /dev/null
+++ b/samples/js/audio/README.md
@@ -0,0 +1,102 @@
+# Audio Transcription
+
+A single app with **two transcription modes** using the Foundry Local JS SDK:
+
+- **Live microphone streaming (default)** — real-time mic-to-text using the **Nemotron** streaming ASR model.
+- **File-based transcription** — transcribe an audio file via `--file <path>` using the **Whisper** model. A bundled `Recording.mp3` is used when no path is supplied.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- [Node.js](https://nodejs.org/) v18 or later
+- A microphone (live mode only — falls back to synthetic audio if `naudiodon2` is unavailable)
+
+## Install
+
+This sample consumes the JS SDK **directly from local source** (`sdk/js`) so it always tracks
+`main` rather than a published npm version. It is **not** pinned to a registry release. The
+dependency in `package.json` is:
+
+```json
+"foundry-local-sdk": "file:../../../sdk/js"
+```
+
+Install dependencies:
+
+```bash
+npm install
+```
+
+> **Building the SDK:** `npm install` resolves `foundry-local-sdk` from `sdk/js`. The SDK ships a
+> prebuilt `dist/` and downloads its native runtime on install. If the local SDK has not been built
+> (or you've changed its source), build it first:
+>
+> ```bash
+> cd ../../../sdk/js
+> npm install
+> npm run build          # compile TypeScript -> dist/
+> npm run build:native   # (re)build the native addon if needed
+> ```
+
+> **Note:** `naudiodon2` is an **optional** dependency that provides cross-platform microphone
+> capture for live mode. Without it, live mode falls back to synthetic audio for testing. File
+> mode does not require it.
+
+## Run
+
+```bash
+# Live microphone streaming (Nemotron) — default
+npm start
+# or
+node app.js
+
+# Transcribe the bundled Recording.mp3 (Whisper)
+node app.js --file
+
+# Transcribe a specific audio file (Whisper)
+node app.js --file ./my-audio.mp3
+```
+
+In live mode, speak into your microphone — transcription appears in real-time. Press `Ctrl+C` to stop.
+
+## How it works
+
+### Live mode (Nemotron streaming ASR)
+
+1. Loads the Nemotron streaming ASR model and creates a `LiveAudioTranscriptionSession`
+   (16kHz / 16-bit / mono PCM).
+2. Captures microphone audio via `naudiodon2` (or generates synthetic audio as a fallback).
+3. Pushes PCM chunks to the SDK via `session.append()`.
+4. Reads results via `for await (const result of session.getStream())`.
+5. Accesses text via `result.content[0].text` (OpenAI Realtime ConversationItem pattern).
+
+### File mode (Whisper)
+
+1. Downloads and registers execution providers, then loads the `whisper-tiny` model.
+2. Creates an audio client.
+3. Runs non-streaming transcription via `audioClient.transcribe(file)`.
+4. Runs streaming transcription via `for await (const result of audioClient.transcribeStreaming(file))`.
+
+## API (live mode)
+
+```javascript
+const audioClient = model.createAudioClient();
+const session = audioClient.createLiveTranscriptionSession();
+session.settings.sampleRate = 16000;
+session.settings.channels = 1;
+session.settings.language = 'en';
+
+await session.start();
+
+// Push audio
+await session.append(pcmBytes);
+
+// Read results
+for await (const result of session.getStream()) {
+    console.log(result.content[0].text);       // transcribed text
+    console.log(result.content[0].transcript); // alias (OpenAI compat)
+    console.log(result.is_final);              // true for final results
+}
+
+await session.stop();
+```
diff --git a/samples/js/audio-transcription-example/Recording.mp3 b/samples/js/audio/Recording.mp3
similarity index 100%
rename from samples/js/audio-transcription-example/Recording.mp3
rename to samples/js/audio/Recording.mp3
diff --git a/samples/js/audio/app.js b/samples/js/audio/app.js
new file mode 100644
index 000000000..6f23ac93c
--- /dev/null
+++ b/samples/js/audio/app.js
@@ -0,0 +1,293 @@
+// Audio Transcription Example — Foundry Local JS SDK
+//
+// Two modes in a single app:
+//   • Live microphone streaming (default) using Nemotron streaming ASR.
+//   • File-based transcription via `--file <path>` using Whisper (defaults to
+//     the bundled Recording.mp3 when no path is given).
+//
+// Live capture requires: npm install naudiodon2
+//
+// Usage:
+//   node app.js                 # live mic streaming (Nemotron)
+//   node app.js --file          # transcribe bundled Recording.mp3 (Whisper)
+//   node app.js --file <path>   # transcribe a specific audio file (Whisper)
+
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { FoundryLocalManager } from 'foundry-local-sdk';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// --- Parse CLI args ---
+// `--file` (optionally followed by a path) selects file-based transcription.
+// Without a path it falls back to the bundled Recording.mp3.
+const fileFlagIndex = process.argv.indexOf('--file');
+const fileMode = fileFlagIndex !== -1;
+const fileArg = fileMode ? process.argv[fileFlagIndex + 1] : undefined;
+const audioFilePath = fileMode
+    ? (fileArg && !fileArg.startsWith('--') ? fileArg : path.join(__dirname, 'Recording.mp3'))
+    : undefined;
+
+console.log('╔══════════════════════════════════════════════════════════╗');
+console.log('║   Foundry Local — Audio Transcription (JS SDK)          ║');
+console.log('╚══════════════════════════════════════════════════════════╝');
+console.log();
+
+// Initialize the Foundry Local SDK
+console.log('Initializing Foundry Local SDK...');
+const manager = FoundryLocalManager.create({
+    appName: 'foundry_local_samples',
+    logLevel: 'info'
+});
+console.log('✓ SDK initialized');
+
+if (fileMode) {
+    await runFileTranscription(manager, audioFilePath);
+} else {
+    await runLiveTranscription(manager);
+}
+
+// --- File-based transcription (Whisper) ---
+async function runFileTranscription(manager, audioFile) {
+    console.log(`\nMode: file-based transcription (${audioFile})`);
+
+    // Download and register all execution providers.
+    let currentEp = '';
+    await manager.downloadAndRegisterEps((epName, percent) => {
+        if (epName !== currentEp) {
+            if (currentEp !== '') process.stdout.write('\n');
+            currentEp = epName;
+        }
+        process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
+    });
+    if (currentEp !== '') process.stdout.write('\n');
+
+    // Get the Whisper model
+    const modelAlias = 'whisper-tiny';
+    const model = await manager.catalog.getModel(modelAlias);
+    console.log(`Using model: ${model.id}`);
+
+    console.log(`\nDownloading model ${modelAlias}...`);
+    await model.download((progress) => {
+        process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
+    });
+    console.log('\n✓ Model downloaded');
+
+    console.log(`\nLoading model ${modelAlias}...`);
+    await model.load();
+    console.log('✓ Model loaded');
+
+    // Create audio client
+    console.log('\nCreating audio client...');
+    const audioClient = model.createAudioClient();
+    console.log('✓ Audio client created');
+
+    // Non-streaming transcription
+    console.log(`\nTranscribing ${audioFile}...`);
+    const transcription = await audioClient.transcribe(audioFile);
+    console.log('\nAudio transcription result:');
+    console.log(transcription.text);
+    console.log('✓ Audio transcription completed');
+
+    // Streaming transcription using async iteration
+    console.log('\nTesting streaming audio transcription...');
+    for await (const result of audioClient.transcribeStreaming(audioFile)) {
+        // Output intermediate transcription results as they arrive (no line ending).
+        process.stdout.write(result.text);
+    }
+    console.log('\n✓ Streaming transcription completed');
+
+    // Unload the model
+    console.log('\nUnloading model...');
+    await model.unload();
+    console.log('✓ Model unloaded');
+}
+
+// --- Live microphone transcription (Nemotron streaming ASR) ---
+async function runLiveTranscription(manager) {
+    console.log('\nMode: live microphone streaming');
+
+    // Get and load the nemotron model
+    // English-only:
+    const modelAlias = 'nemotron-speech-streaming-en-0.6b';
+    // Multi-lingual (supports 30+ languages including auto-detect):
+    // const modelAlias = 'nemotron-3.5-asr-streaming-0.6b';
+    const model = await manager.catalog.getModel(modelAlias);
+    if (!model) {
+        console.error(`ERROR: Model "${modelAlias}" not found in catalog.`);
+        process.exit(1);
+    }
+
+    console.log(`Found model: ${model.id}`);
+    console.log('Downloading model (if needed)...');
+    await model.download((progress) => {
+        process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
+    });
+    console.log('\n✓ Model downloaded');
+
+    console.log('Loading model...');
+    await model.load();
+    console.log('✓ Model loaded');
+
+    // Create live transcription session (same pattern as C# sample).
+    const audioClient = model.createAudioClient();
+    const session = audioClient.createLiveTranscriptionSession();
+
+    session.settings.sampleRate = 16000;  // Default is 16000; shown here for clarity
+    session.settings.channels = 1;
+    session.settings.bitsPerSample = 16;
+    session.settings.language = 'en';                  // English (default)
+    // Multi-lingual examples:
+    // session.settings.language = 'de';     // German
+    // session.settings.language = 'zh-CN';  // Chinese (Simplified)
+    // session.settings.language = 'auto';   // Auto-detect language
+
+    console.log('Starting streaming session...');
+    await session.start();
+    console.log('✓ Session started');
+
+    // Read transcription results in background
+    const readPromise = (async () => {
+        try {
+            for await (const result of session.getStream()) {
+                const text = result.content?.[0]?.text;
+                if (!text) continue;
+
+                // `is_final` is a transcript-state marker only. It should not stop the app.
+                if (result.is_final) {
+                    process.stdout.write(`\n  [FINAL] ${text}\n`);
+                } else {
+                    process.stdout.write(text);
+                }
+            }
+        } catch (err) {
+            if (err.name !== 'AbortError') {
+                console.error('Stream error:', err.message);
+            }
+        }
+    })();
+
+    // --- Microphone capture ---
+    // This example uses naudiodon2 for cross-platform audio capture.
+    // Install with: npm install naudiodon2
+    //
+    // If you prefer a different audio library, just push PCM bytes
+    // (16-bit signed LE, mono, 16kHz) via session.append().
+
+    let audioInput;
+    try {
+        const { default: portAudio } = await import('naudiodon2');
+
+        audioInput = portAudio.AudioIO({
+            inOptions: {
+                channelCount: session.settings.channels,
+                sampleFormat: session.settings.bitsPerSample === 16
+                    ? portAudio.SampleFormat16Bit
+                    : portAudio.SampleFormat32Bit,
+                sampleRate: session.settings.sampleRate,
+                // Larger chunk size lowers callback frequency and reduces overflow risk.
+                framesPerBuffer: 3200,
+                // Allow deeper native queue during occasional event-loop stalls.
+                maxQueue: 64
+            }
+        });
+
+        const appendQueue = [];
+        let pumping = false;
+        let warnedQueueDrop = false;
+
+        const pumpAudio = async () => {
+            if (pumping) return;
+            pumping = true;
+            try {
+                while (appendQueue.length > 0) {
+                    const pcm = appendQueue.shift();
+                    await session.append(pcm);
+                }
+            } catch (err) {
+                console.error('append error:', err.message);
+            } finally {
+                pumping = false;
+                // Handle race where new data arrived after loop exit.
+                if (appendQueue.length > 0) {
+                    void pumpAudio();
+                }
+            }
+        };
+
+        audioInput.on('data', (buffer) => {
+            // Single copy: slice the underlying ArrayBuffer to get an independent Uint8Array.
+            const copy = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength).slice();
+
+            // Keep a bounded queue to avoid unbounded memory growth.
+            if (appendQueue.length >= 100) {
+                appendQueue.shift();
+                if (!warnedQueueDrop) {
+                    warnedQueueDrop = true;
+                    console.warn('Audio append queue overflow; dropping oldest chunk to keep stream alive.');
+                }
+            }
+
+            appendQueue.push(copy);
+            void pumpAudio();
+        });
+
+        console.log();
+        console.log('════════════════════════════════════════════════════════════');
+        console.log('  LIVE TRANSCRIPTION ACTIVE');
+        console.log('  Speak into your microphone.');
+        console.log('  Press Ctrl+C to stop.');
+        console.log('════════════════════════════════════════════════════════════');
+        console.log();
+
+        audioInput.start();
+    } catch (err) {
+        console.warn('⚠ Could not initialize microphone (naudiodon2 may not be installed).');
+        console.warn('  Install with: npm install naudiodon2');
+        console.warn('  Or transcribe a file instead: node app.js --file');
+        console.warn('  Falling back to synthetic audio test...');
+        console.warn();
+
+        // Fallback: push 2 seconds of synthetic PCM (440Hz sine wave)
+        const sampleRate = session.settings.sampleRate;
+        const duration = 2;
+        const totalSamples = sampleRate * duration;
+        const pcmBytes = new Uint8Array(totalSamples * 2);
+        for (let i = 0; i < totalSamples; i++) {
+            const t = i / sampleRate;
+            const sample = Math.round(32767 * 0.5 * Math.sin(2 * Math.PI * 440 * t));
+            pcmBytes[i * 2] = sample & 0xFF;
+            pcmBytes[i * 2 + 1] = (sample >> 8) & 0xFF;
+        }
+
+        // Push in 100ms chunks
+        const chunkSize = (sampleRate / 10) * 2;
+        for (let offset = 0; offset < pcmBytes.length; offset += chunkSize) {
+            const len = Math.min(chunkSize, pcmBytes.length - offset);
+            await session.append(pcmBytes.slice(offset, offset + len));
+        }
+
+        console.log('✓ Synthetic audio pushed');
+        console.log('Waiting briefly for final transcription results...');
+        await new Promise((resolve) => setTimeout(resolve, 3000));
+        await session.stop();
+        await readPromise;
+        await model.unload();
+        console.log('✓ Done');
+        process.exit(0);
+    }
+
+    // Handle graceful shutdown
+    process.on('SIGINT', async () => {
+        console.log('\n\nStopping...');
+        if (audioInput) {
+            audioInput.quit();
+        }
+        await session.stop();
+        await readPromise;
+        await model.unload();
+        console.log('✓ Done');
+        process.exit(0);
+    });
+}
diff --git a/samples/js/live-audio-transcription/package.json b/samples/js/audio/package.json
similarity index 54%
rename from samples/js/live-audio-transcription/package.json
rename to samples/js/audio/package.json
index d3e9d4cfe..a30ad3475 100644
--- a/samples/js/live-audio-transcription/package.json
+++ b/samples/js/audio/package.json
@@ -1,14 +1,14 @@
 {
-  "name": "live-audio-transcription-example",
+  "name": "audio",
   "version": "1.0.0",
   "type": "module",
-  "description": "Live audio transcription example using the Foundry Local JS SDK",
+  "description": "Audio transcription example (live mic + file) using the Foundry Local JS SDK",
   "main": "app.js",
   "scripts": {
     "start": "node app.js"
   },
   "dependencies": {
-    "foundry-local-sdk": "latest"
+    "foundry-local-sdk": "file:../../../sdk/js"
   },
   "optionalDependencies": {
     "naudiodon2": "latest"
diff --git a/samples/js/chat-and-audio-foundry-local/.npmrc b/samples/js/chat-and-audio-foundry-local/.npmrc
deleted file mode 100644
index 114ea2a42..000000000
--- a/samples/js/chat-and-audio-foundry-local/.npmrc
+++ /dev/null
@@ -1,2 +0,0 @@
-registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/
-always-auth=true
diff --git a/samples/js/chat-and-audio-foundry-local/package.json b/samples/js/chat-and-audio-foundry-local/package.json
deleted file mode 100644
index 7404589ec..000000000
--- a/samples/js/chat-and-audio-foundry-local/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "chat-and-audio-foundry-local",
-  "version": "1.0.0",
-  "type": "module",
-  "description": "Unified chat + audio transcription sample using Foundry Local",
-  "scripts": {
-    "start": "node src/app.js"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/chat-and-audio-foundry-local/src/app.js b/samples/js/chat-and-audio-foundry-local/src/app.js
deleted file mode 100644
index 12ddabb9d..000000000
--- a/samples/js/chat-and-audio-foundry-local/src/app.js
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-import { FoundryLocalManager } from "foundry-local-sdk";
-import path from "path";
-
-// Model aliases
-const CHAT_MODEL = "phi-3.5-mini";
-const WHISPER_MODEL = "whisper-tiny";
-
-async function main() {
-  console.log("Initializing Foundry Local SDK...");
-  const manager = FoundryLocalManager.create({
-    appName: "foundry_local_samples",
-    logLevel: "info",
-  });
-
-  // Download and register all execution providers.
-  let currentEp = '';
-  await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-      if (currentEp !== '') process.stdout.write('\n');
-      currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-  });
-  if (currentEp !== '') process.stdout.write('\n');
-
-  const catalog = manager.catalog;
-
-  // --- Load both models ---
-  console.log("\n--- Loading models ---");
-
-  const chatModel = await catalog.getModel(CHAT_MODEL);
-  if (!chatModel) {
-    throw new Error(
-      `Chat model "${CHAT_MODEL}" not found. Run "foundry model list" to see available models.`
-    );
-  }
-
-  const whisperModel = await catalog.getModel(WHISPER_MODEL);
-  if (!whisperModel) {
-    throw new Error(
-      `Whisper model "${WHISPER_MODEL}" not found. Run "foundry model list" to see available models.`
-    );
-  }
-
-  // Download models if not cached
-  if (!chatModel.isCached) {
-    console.log(`Downloading ${CHAT_MODEL}...`);
-    await chatModel.download((progress) => {
-      process.stdout.write(`\r  ${CHAT_MODEL}: ${progress.toFixed(1)}%`);
-    });
-    console.log();
-  }
-
-  if (!whisperModel.isCached) {
-    console.log(`Downloading ${WHISPER_MODEL}...`);
-    await whisperModel.download((progress) => {
-      process.stdout.write(`\r  ${WHISPER_MODEL}: ${progress.toFixed(1)}%`);
-    });
-    console.log();
-  }
-
-  // Load both models into memory
-  console.log(`Loading ${CHAT_MODEL}...`);
-  await chatModel.load();
-  console.log(`Loading ${WHISPER_MODEL}...`);
-  await whisperModel.load();
-  console.log("Both models loaded.\n");
-
-  // --- Step 1: Transcribe audio ---
-  console.log("=== Step 1: Audio Transcription ===");
-  const audioClient = whisperModel.createAudioClient();
-  audioClient.settings.language = "en";
-
-  // Update this path to point to your audio file
-  const audioFilePath = path.resolve("recording.mp3");
-  const transcription = await audioClient.transcribe(audioFilePath);
-  console.log("You said:", transcription.text);
-
-  // --- Step 2: Analyze with chat model ---
-  console.log("\n=== Step 2: AI Analysis ===");
-  const chatClient = chatModel.createChatClient();
-  chatClient.settings.temperature = 0.7;
-  chatClient.settings.maxTokens = 500;
-
-  // Summarize the transcription
-  console.log("Generating summary...\n");
-  for await (const chunk of chatClient.completeStreamingChat([
-    {
-      role: "system",
-      content:
-        "You are a helpful assistant. Summarize the following transcribed audio and extract key themes and action items.",
-    },
-    { role: "user", content: transcription.text },
-  ])) {
-    const content = chunk.choices?.[0]?.delta?.content;
-    if (content) {
-      process.stdout.write(content);
-    }
-  }
-  console.log("\n");
-
-  // --- Clean up ---
-  await chatModel.unload();
-  await whisperModel.unload();
-  console.log("Done.");
-}
-
-main().catch(console.error);
diff --git a/samples/js/chat-completion/README.md b/samples/js/chat-completion/README.md
new file mode 100644
index 000000000..deabe6db2
--- /dev/null
+++ b/samples/js/chat-completion/README.md
@@ -0,0 +1,58 @@
+# Native Chat Completions
+
+Runs the **same chat prompt two ways** against Foundry Local from a single app:
+
+1. **Native in-process inference** via the SDK's chat client (non-streaming *and* streaming).
+2. The **local OpenAI-compatible web server** (`/v1/chat/completions`), called with the `openai` client.
+
+The output is split into clearly labelled sections so you can compare the two paths.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- [Node.js](https://nodejs.org/) v18 or later
+
+## Install
+
+This sample consumes the JS SDK **directly from local source** (`sdk/js`) so it always tracks
+`main` rather than a published npm version. It is **not** pinned to a registry release. The
+dependency in `package.json` is:
+
+```json
+"foundry-local-sdk": "file:../../../sdk/js"
+```
+
+Install dependencies:
+
+```bash
+npm install
+```
+
+> **Building the SDK:** `npm install` resolves `foundry-local-sdk` from `sdk/js`. The SDK ships a
+> prebuilt `dist/` and downloads its native runtime on install. If the local SDK has not been built
+> (or you've changed its source), build it first:
+>
+> ```bash
+> cd ../../../sdk/js
+> npm install
+> npm run build          # compile TypeScript -> dist/
+> npm run build:native   # (re)build the native addon if needed
+> ```
+
+## Run
+
+```bash
+npm start
+# or
+node app.js
+```
+
+## What it does
+
+1. Initializes the SDK (with `webServiceUrls` so the local web server has a known endpoint).
+2. Discovers, downloads, and registers execution providers.
+3. Downloads and loads the `qwen2.5-0.5b` model.
+4. **Native inference** — runs `completeChat` and `completeStreamingChat`.
+5. **Web server** — starts the local web service and sends the same prompt through the
+   OpenAI-compatible `/v1/chat/completions` endpoint.
+6. Unloads the model and stops the web service.
diff --git a/samples/js/native-chat-completions/app.js b/samples/js/chat-completion/app.js
similarity index 60%
rename from samples/js/native-chat-completions/app.js
rename to samples/js/chat-completion/app.js
index 2ecc43560..eef596387 100644
--- a/samples/js/native-chat-completions/app.js
+++ b/samples/js/chat-completion/app.js
@@ -1,15 +1,30 @@
 // <complete_code>
 // <imports>
 import { FoundryLocalManager } from 'foundry-local-sdk';
+import { OpenAI } from 'openai';
 // </imports>
 
+// This sample runs the SAME chat prompt two ways against Foundry Local:
+//   1. Native in-process inference via the SDK's chat client.
+//   2. The local OpenAI-compatible web server (/v1/chat/completions).
+const endpointUrl = 'http://localhost:5764';
+const prompt = 'Why is the sky blue?';
+
+function section(title) {
+    console.log(`\n${'═'.repeat(60)}`);
+    console.log(`  ${title}`);
+    console.log('═'.repeat(60));
+}
+
 // Initialize the Foundry Local SDK
 console.log('Initializing Foundry Local SDK...');
 
 // <init>
+// `webServiceUrls` is supplied so the local web server starts on a known endpoint.
 const manager = FoundryLocalManager.create({
     appName: 'foundry_local_samples',
-    logLevel: 'info'
+    logLevel: 'info',
+    webServiceUrls: endpointUrl
 });
 // </init>
 console.log('✓ SDK initialized successfully');
@@ -62,25 +77,25 @@ await model.load();
 console.log('✓ Model loaded');
 // </model_setup>
 
-// <chat_completion>
+// <native_inference>
+section('NATIVE IN-PROCESS INFERENCE');
+
 // Create chat client
-console.log('\nCreating chat client...');
+console.log('Creating chat client...');
 const chatClient = model.createChatClient();
 console.log('✓ Chat client created');
 
 // Example chat completion
-console.log('\nTesting chat completion...');
+console.log(`\nPrompt: ${prompt}`);
 const completion = await chatClient.completeChat([
-    { role: 'user', content: 'Why is the sky blue?' }
+    { role: 'user', content: prompt }
 ]);
 
-console.log('\nChat completion result:');
+console.log('\nResponse:');
 console.log(completion.choices[0]?.message?.content);
-// </chat_completion>
 
-// <streaming>
 // Example streaming completion
-console.log('\nTesting streaming completion...');
+console.log('\nStreaming a second prompt...');
 for await (const chunk of chatClient.completeStreamingChat(
     [{ role: 'user', content: 'Write a short poem about programming.' }]
 )) {
@@ -90,13 +105,38 @@ for await (const chunk of chatClient.completeStreamingChat(
     }
 }
 console.log('\n');
-// </streaming>
+// </native_inference>
+
+// <web_server>
+section('LOCAL WEB SERVER (OpenAI-compatible /v1/chat/completions)');
+
+// Start the web service and call it with the same prompt using the OpenAI client.
+console.log('Starting web service...');
+manager.startWebService();
+console.log('✓ Web service started');
+
+const openai = new OpenAI({
+    baseURL: endpointUrl + '/v1',
+    apiKey: 'notneeded',
+});
+
+console.log(`\nPrompt: ${prompt}`);
+const response = await openai.chat.completions.create({
+    model: model.id,
+    messages: [
+        { role: 'user', content: prompt },
+    ],
+});
+
+console.log('\nResponse:');
+console.log(response.choices[0].message.content);
+// </web_server>
 
 // <cleanup>
-// Unload the model
-console.log('Unloading model...');
+// Unload the model and stop the web service
+console.log('\nUnloading model and stopping web service...');
 await model.unload();
-console.log(`✓ Model unloaded`);
+manager.stopWebService();
+console.log('✓ Model unloaded and web service stopped');
 // </cleanup>
 // </complete_code>
-    
\ No newline at end of file
diff --git a/samples/js/web-server-example/package.json b/samples/js/chat-completion/package.json
similarity index 53%
rename from samples/js/web-server-example/package.json
rename to samples/js/chat-completion/package.json
index 336705142..93bf08de5 100644
--- a/samples/js/web-server-example/package.json
+++ b/samples/js/chat-completion/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "web-server-example",
+  "name": "chat-completion",
   "version": "1.0.0",
   "type": "module",
   "main": "app.js",
@@ -7,10 +7,7 @@
     "start": "node app.js"
   },
   "dependencies": {
-    "foundry-local-sdk": "latest",
+    "foundry-local-sdk": "file:../../../sdk/js",
     "openai": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
   }
 }
diff --git a/samples/js/copilot-sdk-foundry-local/.npmrc b/samples/js/copilot-sdk-foundry-local/.npmrc
deleted file mode 100644
index 114ea2a42..000000000
--- a/samples/js/copilot-sdk-foundry-local/.npmrc
+++ /dev/null
@@ -1,2 +0,0 @@
-registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/
-always-auth=true
diff --git a/samples/js/copilot-sdk-foundry-local/package.json b/samples/js/copilot-sdk-foundry-local/package.json
deleted file mode 100644
index b2457d9aa..000000000
--- a/samples/js/copilot-sdk-foundry-local/package.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-  "name": "copilot-sdk-foundry-local-sample",
-  "version": "1.0.0",
-  "description": "Sample: Using GitHub Copilot SDK with Foundry Local for agentic workflows",
-  "type": "module",
-  "scripts": {
-    "start": "npx tsx src/app.ts",
-    "tools": "npx tsx src/tool-calling.ts"
-  },
-  "dependencies": {
-    "@github/copilot-sdk": "latest",
-    "foundry-local-sdk": "latest",
-    "zod": "^3.0.0"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  },
-  "devDependencies": {
-    "tsx": "^4.0.0",
-    "typescript": "^5.0.0"
-  }
-}
diff --git a/samples/js/copilot-sdk-foundry-local/src/app.ts b/samples/js/copilot-sdk-foundry-local/src/app.ts
deleted file mode 100644
index 4c2013517..000000000
--- a/samples/js/copilot-sdk-foundry-local/src/app.ts
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-/**
- * Basic Example — Copilot SDK + Foundry Local
- *
- * Demonstrates:
- *   - Bootstrapping Foundry Local (download, load, start web service)
- *   - Creating a BYOK session via Copilot SDK
- *   - Using Copilot's built-in tools (file reading) with a local model
- *   - Streaming responses and multi-turn conversation
- *
- * The app asks the local model to read its own source code using Copilot's
- * built-in `view` tool, then explain what it does — showing agentic tool
- * use powered entirely by on-device inference.
- *
- * Run:  npm start
- */
-
-import { CopilotClient, approveAll } from "@github/copilot-sdk";
-import { FoundryLocalManager } from "foundry-local-sdk";
-
-const alias = "phi-4-mini";
-const endpointUrl = "http://localhost:6543";
-
-// Timeout for each model turn (ms).  Override with FOUNDRY_TIMEOUT_MS env var.
-// Local models on CPU can be slow — increase this on less powerful hardware.
-const TIMEOUT_MS = Number(process.env.FOUNDRY_TIMEOUT_MS) || 120_000;
-
-type Model = Awaited<ReturnType<FoundryLocalManager["catalog"]["getModel"]>>;
-
-// ---------------------------------------------------------------------------
-// Helper: send a message and wait for the assistant's full reply.
-// ---------------------------------------------------------------------------
-async function sendMessage(
-    session: Awaited<ReturnType<CopilotClient["createSession"]>>,
-    prompt: string,
-    timeoutMs = TIMEOUT_MS,
-) {
-    try {
-        await session.sendAndWait({ prompt }, timeoutMs);
-    } catch (err: any) {
-        // Foundry Local streaming may omit finish_reason, causing a
-        // session.error that rejects sendAndWait. Treat as non-fatal.
-        console.error(`\n[sendMessage error: ${err?.message ?? err}]`);
-    }
-}
-
-async function main() {
-    let manager: FoundryLocalManager | undefined;
-    let model: Model | undefined;
-    let client: CopilotClient | undefined;
-    let session: Awaited<ReturnType<CopilotClient["createSession"]>> | undefined;
-
-    try {
-        // --- Initialize Foundry Local ---
-        console.log("Initializing Foundry Local...");
-        manager = FoundryLocalManager.create({
-            appName: "foundry_local_samples",
-            webServiceUrls: endpointUrl,
-        });
-
-        // Download and register all execution providers.
-        let currentEp = '';
-        await manager.downloadAndRegisterEps((epName, percent) => {
-            if (epName !== currentEp) {
-                if (currentEp !== '') process.stdout.write('\n');
-                currentEp = epName;
-            }
-            process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-        });
-        if (currentEp !== '') process.stdout.write('\n');
-
-        model = await manager.catalog.getModel(alias);
-        await model.download();
-        await model.load();
-        console.log(`Model: ${model.id}`);
-
-        manager.startWebService();
-        const endpoint = endpointUrl + "/v1";
-        console.log(`Endpoint: ${endpoint}\n`);
-
-        // --- Create a BYOK session with Copilot's built-in tools ---
-        client = new CopilotClient();
-
-        session = await client.createSession({
-            onPermissionRequest: approveAll,
-            model: model.id,
-            provider: {
-                type: "openai",
-                baseUrl: endpoint,
-                apiKey: "local",
-                wireApi: "completions",
-            },
-            streaming: true,
-            workingDirectory: process.cwd(),
-            systemMessage: {
-                content:
-                    "You are a helpful AI assistant running locally via Foundry Local. You can use your tools to read files and answer questions about them.",
-            },
-        });
-
-        // print out current directory
-        console.log("Current working directory:", process.cwd());
-
-        // Stream assistant text to stdout
-        session.on("assistant.message_delta", (event) => {
-            process.stdout.write(event.data.deltaContent);
-        });
-        session.on("tool.execution_start", (event) => {
-            console.log(`\n  [Tool: ${(event as any).data?.toolName ?? "unknown"}]`);
-        });
-
-        // --- Turn 1: Ask the model to read and explain its own source ---
-        console.log("--- Turn 1: Read and explain this app ---\n");
-        process.stdout.write("Assistant: ");
-        await sendMessage(
-            session,
-            "Read src/app.ts, then explain what this application does in a few sentences.",
-        );
-        console.log("\n");
-
-        // --- Turn 2: Follow-up leveraging conversation context ---
-        console.log("--- Turn 2: What technologies does it use? ---\n");
-        process.stdout.write("Assistant: ");
-        await sendMessage(session, "What key technologies and patterns does it demonstrate?");
-        console.log("\n");
-
-        console.log("Done!");
-    } finally {
-        // Clean up resources in reverse order of creation
-        if (session) {
-            await session.destroy().catch(() => {});
-        }
-        if (client) {
-            await client.stop().catch(() => {});
-        }
-        if (model) {
-            console.log("Unloading model...");
-            await model.unload().catch((e) => {
-                console.warn("Warning: failed to unload model:", e);
-            });
-        }
-        if (manager) {
-            console.log("Stopping web service...");
-            try {
-                manager.stopWebService();
-            } catch (e) {
-                console.warn("Warning: failed to stop web service:", e);
-            }
-        }
-    }
-}
-
-main().catch(console.error);
diff --git a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts
deleted file mode 100644
index 3651b5271..000000000
--- a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts
+++ /dev/null
@@ -1,250 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-/**
- * Tool Calling Example — Copilot SDK + Foundry Local
- *
- * Demonstrates multiple custom tools that the model can invoke:
- *   - calculate: Evaluate math expressions
- *   - get_system_info: Return local system details
- *   - lookup_definition: Look up programming term definitions
- *
- * Run:  npm run tools
- */
-
-import { CopilotClient, defineTool, approveAll } from "@github/copilot-sdk";
-import { FoundryLocalManager } from "foundry-local-sdk";
-import { z } from "zod";
-import * as os from "os";
-
-const alias = "phi-4-mini";
-const endpointUrl = "http://localhost:6543";
-
-// Timeout for each model turn (ms).  Override with FOUNDRY_TIMEOUT_MS env var.
-// Local models on CPU can be slow — increase this on less powerful hardware.
-const TIMEOUT_MS = Number(process.env.FOUNDRY_TIMEOUT_MS) || 120_000;
-
-// ---------------------------------------------------------------------------
-// Helper: send a message and wait for the assistant's full reply.
-// Uses sendAndWait with a fallback: if the session emits an error (e.g.
-// missing finish_reason from the local model), we catch it and continue.
-// ---------------------------------------------------------------------------
-async function sendMessage(
-    session: Awaited<ReturnType<CopilotClient["createSession"]>>,
-    prompt: string,
-    timeoutMs = TIMEOUT_MS,
-) {
-    try {
-        await session.sendAndWait({ prompt }, timeoutMs);
-    } catch (err: any) {
-        // Foundry Local streaming may omit finish_reason, causing a
-        // session.error that rejects sendAndWait. Treat as non-fatal.
-        console.error(`\n[sendMessage error: ${err?.message ?? err}]`);
-    }
-}
-
-type Model = Awaited<ReturnType<FoundryLocalManager["catalog"]["getModel"]>>;
-
-// ---------------------------------------------------------------------------
-// Tool definitions
-// ---------------------------------------------------------------------------
-
-function defineCalculateTool() {
-    return defineTool("calculate", {
-        description:
-            "Evaluate a math expression and return the numeric result. " +
-            "Supports +, -, *, /, parentheses, and Math.* functions like Math.sqrt, Math.pow.",
-        parameters: z.object({
-            expression: z.string().describe('Math expression to evaluate, e.g. "2 + 2" or "Math.sqrt(144)"'),
-        }),
-        handler: async (args) => {
-            try {
-                // Only allow safe math characters and Math.* calls
-                const sanitized = args.expression.replace(/[^0-9+\-*/().,%\s]|Math\.\w+/g, (m) =>
-                    m.startsWith("Math.") ? m : "",
-                );
-                const result = new Function(`"use strict"; return (${sanitized})`)();
-                console.log(`\n    → calculate("${args.expression}") = ${result}`);
-                return { expression: args.expression, result: Number(result) };
-            } catch {
-                return { expression: args.expression, error: "Could not evaluate expression" };
-            }
-        },
-    });
-}
-
-function defineLookupTool() {
-    const glossary: Record<string, string> = {
-        "byok": "Bring Your Own Key — a pattern where you supply your own API credentials to route requests to a custom endpoint instead of the default provider.",
-        "onnx": "Open Neural Network Exchange — an open format for representing machine learning models, enabling interoperability between frameworks.",
-        "rag": "Retrieval-Augmented Generation — a technique that combines a retrieval system with a generative model so responses are grounded in external documents.",
-        "json-rpc": "JSON Remote Procedure Call — a lightweight protocol for calling methods on a remote server using JSON-encoded messages.",
-        "streaming": "A technique where the server sends response tokens incrementally as they are generated, rather than waiting for the full response.",
-    };
-
-    return defineTool("lookup_definition", {
-        description:
-            "Look up the definition of a programming or AI term. " +
-            "Available terms: " + Object.keys(glossary).join(", "),
-        parameters: z.object({
-            term: z.string().describe("The term to look up (case-insensitive)"),
-        }),
-        handler: async (args) => {
-            const key = args.term.toLowerCase().trim();
-            const definition = glossary[key];
-            console.log(`\n    → lookup_definition("${args.term}") → ${definition ? "found" : "not found"}`);
-            if (definition) {
-                return { term: args.term, definition };
-            }
-            return { term: args.term, error: `Term not found. Available: ${Object.keys(glossary).join(", ")}` };
-        },
-    });
-}
-
-function defineSystemInfoTool(modelId: string, endpoint: string) {
-    return defineTool("get_system_info", {
-        description: "Get information about the local system: OS, architecture, memory, CPU count, and the running model.",
-        parameters: z.object({}),
-        handler: async () => {
-            const info = {
-                platform: os.platform(),
-                arch: os.arch(),
-                cpus: os.cpus().length,
-                totalMemory: `${Math.round(os.totalmem() / 1024 ** 3)} GB`,
-                freeMemory: `${Math.round(os.freemem() / 1024 ** 3)} GB`,
-                nodeVersion: process.version,
-                model: modelId,
-                endpoint,
-            };
-            console.log(`\n    → get_system_info() → ${JSON.stringify(info)}`);
-            return info;
-        },
-    });
-}
-
-// ---------------------------------------------------------------------------
-// Main
-// ---------------------------------------------------------------------------
-
-async function main() {
-    let manager: FoundryLocalManager | undefined;
-    let model: Model | undefined;
-    let client: CopilotClient | undefined;
-    let session: Awaited<ReturnType<CopilotClient["createSession"]>> | undefined;
-
-    try {
-        console.log("Initializing Foundry Local...");
-        manager = FoundryLocalManager.create({
-            appName: "foundry_local_samples",
-            webServiceUrls: endpointUrl,
-        });
-
-        // Download and register all execution providers.
-        let currentEp = '';
-        await manager.downloadAndRegisterEps((epName, percent) => {
-            if (epName !== currentEp) {
-                if (currentEp !== '') process.stdout.write('\n');
-                currentEp = epName;
-            }
-            process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-        });
-        if (currentEp !== '') process.stdout.write('\n');
-
-        model = await manager.catalog.getModel(alias);
-        await model.download();
-        await model.load();
-        console.log(`Model: ${model.id}`);
-
-        manager.startWebService();
-        const endpoint = endpointUrl + "/v1";
-        console.log(`Endpoint: ${endpoint}\n`);
-
-        const calculate = defineCalculateTool();
-        const lookupDefinition = defineLookupTool();
-        const getSystemInfo = defineSystemInfoTool(model.id, endpoint);
-
-        client = new CopilotClient();
-
-        session = await client.createSession({
-            onPermissionRequest: approveAll,
-            model: model.id,
-            provider: {
-                type: "openai",
-                baseUrl: endpoint,
-                apiKey: "local",
-                wireApi: "completions",
-            },
-            streaming: true,
-            tools: [calculate, lookupDefinition, getSystemInfo],
-            systemMessage: {
-                content:
-                    "You are a helpful AI assistant running locally via Foundry Local. " +
-                    "You have access to tools. ALWAYS use the appropriate tool when the user asks you to " +
-                    "calculate something, look up a term, or get system information. " +
-                    "Do not guess — call the tool and report its result. " +
-                    "Keep responses concise.",
-            },
-        });
-
-        // Stream assistant text to stdout
-        session.on("assistant.message_delta", (event) => {
-            process.stdout.write(event.data.deltaContent);
-        });
-        session.on("tool.execution_start", (event) => {
-            console.log(`\n  [Tool called: ${(event as any).data?.toolName ?? "unknown"}]`);
-        });
-
-        // --- Turn 1: Calculator tool ---
-        console.log("=== Turn 1: Calculator ===\n");
-        process.stdout.write("User: What is the square root of 144 plus 8 times 3?\n\nAssistant: ");
-        await sendMessage(
-            session,
-            "Use the calculate tool to compute: Math.sqrt(144) + 8 * 3",
-        );
-        console.log("\n");
-
-        // --- Turn 2: Glossary lookup tool ---
-        console.log("=== Turn 2: Glossary Lookup ===\n");
-        process.stdout.write("User: What does BYOK mean? And what about RAG?\n\nAssistant: ");
-        await sendMessage(
-            session,
-            "Use the lookup_definition tool to look up 'byok' and 'rag', then explain both.",
-        );
-        console.log("\n");
-
-        // --- Turn 3: System info tool ---
-        console.log("=== Turn 3: System Info ===\n");
-        process.stdout.write("User: What system am I running on?\n\nAssistant: ");
-        await sendMessage(
-            session,
-            "Use the get_system_info tool to check what system this is running on, then summarize.",
-        );
-        console.log("\n");
-
-        console.log("Done!");
-    } finally {
-        // Clean up resources in reverse order of creation
-        if (session) {
-            await session.destroy().catch(() => {});
-        }
-        if (client) {
-            await client.stop().catch(() => {});
-        }
-        if (model) {
-            console.log("Unloading model...");
-            await model.unload().catch((e) => {
-                console.warn("Warning: failed to unload model:", e);
-            });
-        }
-        if (manager) {
-            console.log("Stopping web service...");
-            try {
-                manager.stopWebService();
-            } catch (e) {
-                console.warn("Warning: failed to stop web service:", e);
-            }
-        }
-    }
-}
-
-main().catch(console.error);
diff --git a/samples/js/electron-chat-application/foundry_local_color.svg b/samples/js/electron-chat-application/foundry_local_color.svg
deleted file mode 100644
index 412a6fb70..000000000
--- a/samples/js/electron-chat-application/foundry_local_color.svg
+++ /dev/null
@@ -1,40 +0,0 @@
-<svg width="256" height="256" viewBox="0 0 256 256" fill="none" xmlns="http://www.w3.org/2000/svg">
-<g clip-path="url(#clip0_1197_17983)">
-<rect width="256" height="256" fill="white"/>
-<path d="M154.903 33.0322H35.785C34.2618 33.0322 33.0323 34.2618 33.0323 35.7849V63.3118C33.0323 64.835 34.2618 66.0645 35.785 66.0645H181.677C186.238 66.0645 189.936 69.7623 189.936 74.3225V70.9C189.936 51.3101 175.319 33.0322 154.903 33.0322Z" fill="url(#paint0_linear_1197_17983)"/>
-<path d="M178.255 42.7031C185.733 50.1996 189.936 60.3112 189.936 70.8998V220.215C189.936 221.738 191.165 222.967 192.688 222.967H220.215C221.738 222.967 222.968 221.738 222.968 220.215V101.106C222.968 92.3433 219.49 83.9476 213.297 77.7449L178.255 42.7031Z" fill="url(#paint1_linear_1197_17983)"/>
-<path d="M113.613 74.3228H35.785C34.2618 74.3228 33.0323 75.5523 33.0323 77.0754V104.602C33.0323 106.125 34.2618 107.355 35.785 107.355H140.387C144.947 107.355 148.645 111.053 148.645 115.613V112.191C148.645 92.6006 134.028 74.3228 113.613 74.3228Z" fill="url(#paint2_linear_1197_17983)"/>
-<path d="M136.965 83.9937C144.443 91.4901 148.645 101.602 148.645 112.19V220.215C148.645 221.738 149.875 222.968 151.398 222.968H178.925C180.448 222.968 181.677 221.738 181.677 220.215V142.397C181.677 133.634 178.2 125.238 172.006 119.035L136.965 83.9937Z" fill="url(#paint3_linear_1197_17983)"/>
-<path d="M72.3223 115.613H35.785C34.2618 115.613 33.0323 116.842 33.0323 118.365V145.892C33.0323 147.416 34.2618 148.645 35.785 148.645H99.0968C103.657 148.645 107.355 152.343 107.355 156.903V153.481C107.355 133.891 92.7381 115.613 72.3223 115.613Z" fill="url(#paint4_linear_1197_17983)"/>
-<path d="M95.6743 125.284C103.152 132.781 107.355 142.892 107.355 153.481V220.215C107.355 221.738 108.584 222.968 110.108 222.968H137.634C139.158 222.968 140.387 221.738 140.387 220.215V183.687C140.387 174.924 136.91 166.529 130.716 160.326L95.6743 125.284Z" fill="url(#paint5_linear_1197_17983)"/>
-</g>
-<defs>
-<linearGradient id="paint0_linear_1197_17983" x1="189.936" y1="70.4672" x2="33.0323" y2="70.4672" gradientUnits="userSpaceOnUse">
-<stop stop-color="#2C08AC"/>
-<stop offset="0.8" stop-color="#4F42FD"/>
-</linearGradient>
-<linearGradient id="paint1_linear_1197_17983" x1="201.96" y1="42.7031" x2="311.288" y2="175.174" gradientUnits="userSpaceOnUse">
-<stop offset="0.3" stop-color="#7274FF"/>
-<stop offset="1" stop-color="#4F42FD"/>
-</linearGradient>
-<linearGradient id="paint2_linear_1197_17983" x1="148.645" y1="111.758" x2="33.0323" y2="111.758" gradientUnits="userSpaceOnUse">
-<stop stop-color="#2C08AC"/>
-<stop offset="0.8" stop-color="#4F42FD"/>
-</linearGradient>
-<linearGradient id="paint3_linear_1197_17983" x1="160.67" y1="83.9937" x2="238.429" y2="206.207" gradientUnits="userSpaceOnUse">
-<stop offset="0.3" stop-color="#7274FF"/>
-<stop offset="1" stop-color="#4F42FD"/>
-</linearGradient>
-<linearGradient id="paint4_linear_1197_17983" x1="107.355" y1="150.688" x2="33.0323" y2="150.688" gradientUnits="userSpaceOnUse">
-<stop stop-color="#2C08AC"/>
-<stop offset="0.8" stop-color="#4F42FD"/>
-</linearGradient>
-<linearGradient id="paint5_linear_1197_17983" x1="119.38" y1="125.284" x2="164.354" y2="225.849" gradientUnits="userSpaceOnUse">
-<stop offset="0.3" stop-color="#7274FF"/>
-<stop offset="1" stop-color="#4F42FD"/>
-</linearGradient>
-<clipPath id="clip0_1197_17983">
-<rect width="256" height="256" fill="white"/>
-</clipPath>
-</defs>
-</svg>
diff --git a/samples/js/electron-chat-application/index.html b/samples/js/electron-chat-application/index.html
deleted file mode 100644
index 5d6bd306f..000000000
--- a/samples/js/electron-chat-application/index.html
+++ /dev/null
@@ -1,174 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <meta http-equiv="Content-Security-Policy" content="default-src 'self'; style-src 'self' 'unsafe-inline'; script-src 'self'">
-  <title>Foundry Local Chat</title>
-  <link rel="stylesheet" href="styles.css">
-</head>
-<body>
-  <div class="app-container">
-    <!-- Sidebar -->
-    <aside class="sidebar" id="sidebar">
-      <div class="sidebar-resize-handle" id="sidebarResizeHandle"></div>
-      <div class="sidebar-header">
-        <div class="logo">
-          <img src="foundry_local_color.svg" alt="Foundry Local" width="24" height="24">
-          <span>Foundry Local</span>
-        </div>
-        <button class="sidebar-toggle" id="sidebarToggle" aria-label="Toggle sidebar">
-          <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-            <path d="M15 18l-6-6 6-6"/>
-          </svg>
-        </button>
-      </div>
-      
-      <div class="sidebar-content">
-        <div class="section-header">
-          <h3>Models</h3>
-          <button class="refresh-btn" id="refreshModels" title="Refresh models">
-            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-              <path d="M23 4v6h-6M1 20v-6h6"/>
-              <path d="M3.51 9a9 9 0 0114.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0020.49 15"/>
-            </svg>
-          </button>
-        </div>
-        
-        <div class="model-list" id="modelList">
-          <div class="loading-spinner">
-            <div class="spinner"></div>
-            <span>Loading models...</span>
-          </div>
-        </div>
-      </div>
-    </aside>
-    
-    <!-- Main Chat Area -->
-    <main class="chat-area">
-      <div class="chat-header">
-        <button class="mobile-menu-btn" id="mobileMenuBtn" aria-label="Open menu">
-          <svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-            <line x1="3" y1="12" x2="21" y2="12"/>
-            <line x1="3" y1="6" x2="21" y2="6"/>
-            <line x1="3" y1="18" x2="21" y2="18"/>
-          </svg>
-        </button>
-        <div class="chat-title">
-          <h1>Chat</h1>
-          <span class="model-badge" id="modelBadge">Select a model to start</span>
-        </div>
-        <button class="new-chat-btn" id="newChatBtn" title="New chat">
-          <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-            <line x1="12" y1="5" x2="12" y2="19"/>
-            <line x1="5" y1="12" x2="19" y2="12"/>
-          </svg>
-        </button>
-      </div>
-      
-      <div class="chat-messages" id="chatMessages">
-        <div class="welcome-message">
-          <div class="welcome-icon">
-            <svg width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
-              <path d="M21 15a2 2 0 01-2 2H7l-4 4V5a2 2 0 012-2h14a2 2 0 012 2v10z"/>
-            </svg>
-          </div>
-          <h2>Welcome to Foundry Local Chat</h2>
-          <p>Select a model from the sidebar to start chatting with AI running locally on your machine.</p>
-          <div class="feature-highlights">
-            <div class="feature">
-              <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-                <path d="M12 22s8-4 8-10V5l-8-3-8 3v7c0 6 8 10 8 10z"/>
-              </svg>
-              <span>100% Private</span>
-            </div>
-            <div class="feature">
-              <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-                <circle cx="12" cy="12" r="10"/>
-                <polyline points="12 6 12 12 16 14"/>
-              </svg>
-              <span>Low Latency</span>
-            </div>
-            <div class="feature">
-              <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-                <rect x="2" y="3" width="20" height="14" rx="2" ry="2"/>
-                <line x1="8" y1="21" x2="16" y2="21"/>
-                <line x1="12" y1="17" x2="12" y2="21"/>
-              </svg>
-              <span>Runs Locally</span>
-            </div>
-          </div>
-        </div>
-      </div>
-      
-      <div class="chat-input-container">
-        <form class="chat-input-form" id="chatForm">
-          <div class="input-wrapper">
-            <textarea 
-              id="messageInput" 
-              placeholder="Type your message..." 
-              rows="1"
-              disabled
-            ></textarea>
-            <button type="button" class="record-btn" id="recordBtn" title="Record voice">
-              <svg class="mic-icon" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
-                <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
-                <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
-                <line x1="12" y1="19" x2="12" y2="23"></line>
-                <line x1="8" y1="23" x2="16" y2="23"></line>
-              </svg>
-              <svg class="stop-icon" width="20" height="20" viewBox="0 0 24 24" fill="currentColor" stroke="none">
-                <rect x="6" y="6" width="12" height="12" rx="2"></rect>
-              </svg>
-            </button>
-            <button type="submit" class="send-btn" id="sendBtn" disabled title="Send message">
-              <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-                <line x1="22" y1="2" x2="11" y2="13"/>
-                <polygon points="22 2 15 22 11 13 2 9 22 2"/>
-              </svg>
-            </button>
-          </div>
-          <div class="input-hint">
-            Press <kbd>Enter</kbd> to send, <kbd>Shift+Enter</kbd> for new line
-            <span class="hint-separator">•</span>
-            <button type="button" class="transcription-settings-link" id="transcriptionSettingsBtn">Voice settings</button>
-          </div>
-          <div class="context-usage" id="contextUsage">
-            <span class="context-label-text">Context</span>
-            <div class="context-bar">
-              <div class="context-fill" id="contextFill"></div>
-            </div>
-            <span class="context-label" id="contextLabel">0%</span>
-          </div>
-        </form>
-      </div>
-    </main>
-  </div>
-  
-  <!-- Whisper Model Modal -->
-  <div class="modal-overlay" id="whisperModal">
-    <div class="modal">
-      <h3>Transcription Settings</h3>
-      <p>Select a Whisper model for voice transcription.</p>
-      <div class="current-whisper-model" id="currentWhisperModel">
-        <span class="label">Current model:</span>
-        <span class="model-name">None selected</span>
-      </div>
-      <div class="whisper-models" id="whisperModelList">
-        <div class="loading-spinner">
-          <div class="spinner"></div>
-          <span>Loading models...</span>
-        </div>
-      </div>
-      <div class="modal-actions">
-        <button class="modal-btn secondary" id="whisperModalCancel">Close</button>
-      </div>
-    </div>
-  </div>
-  
-  <!-- Toast Container -->
-  <div class="toast-container" id="toastContainer"></div>
-  
-  <script src="renderer.js"></script>
-</body>
-</html>
diff --git a/samples/js/electron-chat-application/main.js b/samples/js/electron-chat-application/main.js
deleted file mode 100644
index 92473846a..000000000
--- a/samples/js/electron-chat-application/main.js
+++ /dev/null
@@ -1,376 +0,0 @@
-const { app, BrowserWindow, ipcMain } = require('electron');
-const path = require('path');
-const fs = require('fs');
-const os = require('os');
-
-let mainWindow;
-
-function createWindow() {
-  mainWindow = new BrowserWindow({
-    width: 1200,
-    height: 800,
-    minWidth: 800,
-    minHeight: 600,
-    webPreferences: {
-      preload: path.join(__dirname, 'preload.js'),
-      contextIsolation: true,
-      nodeIntegration: false
-    },
-    titleBarStyle: 'hiddenInset',
-    backgroundColor: '#1a1a2e'
-  });
-
-  mainWindow.loadFile('index.html');
-  
-  // Open DevTools in development
-  if (process.argv.includes('--enable-logging')) {
-    mainWindow.webContents.openDevTools();
-  }
-}
-
-app.whenReady().then(createWindow);
-
-app.on('window-all-closed', () => {
-  if (process.platform !== 'darwin') {
-    app.quit();
-  }
-});
-
-app.on('activate', () => {
-  if (BrowserWindow.getAllWindows().length === 0) {
-    createWindow();
-  }
-});
-
-// SDK Management
-let manager = null;
-let currentModel = null;
-let chatClient = null;
-let webServiceStarted = false;
-const SERVICE_PORT = 47392;
-const SERVICE_URL = `http://127.0.0.1:${SERVICE_PORT}`;
-
-let initPromise = null;
-
-async function initializeSDK() {
-  if (initPromise) return initPromise;
-  
-  initPromise = (async () => {
-    const { FoundryLocalManager } = await import('foundry-local-sdk');
-    manager = FoundryLocalManager.create({
-      appName: 'foundry_local_samples',
-      logLevel: 'info',
-      webServiceUrls: SERVICE_URL
-    });
-
-    // Download and register all execution providers.
-    let currentEp = '';
-    await manager.downloadAndRegisterEps((epName, percent) => {
-      if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-      }
-      process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-    });
-    if (currentEp !== '') process.stdout.write('\n');
-    
-    return manager;
-  })();
-  
-  return initPromise;
-}
-
-function ensureWebServiceStarted() {
-  if (!webServiceStarted && manager) {
-    manager.startWebService();
-    webServiceStarted = true;
-  }
-}
-
-// IPC Handlers
-ipcMain.handle('get-models', async () => {
-  try {
-    console.log('get-models: initializing SDK...');
-    await initializeSDK();
-    
-    console.log('get-models: fetching models from catalog...');
-    const models = await manager.catalog.getModels();
-    console.log(`get-models: found ${models.length} models`);
-    
-    const cachedVariants = await manager.catalog.getCachedModels();
-    const cachedIds = new Set(cachedVariants.map(v => v.id));
-    console.log(`get-models: ${cachedVariants.length} cached models`);
-    
-    const result = models.map(m => ({
-      id: m.id,
-      alias: m.alias,
-      isCached: m.isCached,
-      variants: m.variants.map(v => ({
-        id: v.id,
-        alias: v.alias,
-        displayName: v.modelInfo.displayName || v.alias,
-        isCached: cachedIds.has(v.id),
-        fileSizeMb: v.modelInfo.fileSizeMb,
-        modelType: v.modelInfo.modelType,
-        publisher: v.modelInfo.publisher
-      }))
-    }));
-    
-    console.log('get-models: returning', result.length, 'models');
-    return result;
-  } catch (error) {
-    console.error('Error getting models:', error);
-    throw error;
-  }
-});
-
-ipcMain.handle('download-model', async (event, modelAlias) => {
-  try {
-    await initializeSDK();
-    const model = await manager.catalog.getModel(modelAlias);
-    if (!model) throw new Error(`Model ${modelAlias} not found`);
-    
-    await model.download();
-    return { success: true };
-  } catch (error) {
-    console.error('Error downloading model:', error);
-    throw error;
-  }
-});
-
-ipcMain.handle('load-model', async (event, modelAlias) => {
-  try {
-    await initializeSDK();
-    
-    // Start web service for HTTP streaming (only once)
-    ensureWebServiceStarted();
-    
-    // Unload current model if any
-    if (currentModel) {
-      try {
-        await currentModel.unload();
-      } catch (e) {
-        // Ignore unload errors
-      }
-      chatClient = null;
-    }
-    
-    const model = await manager.catalog.getModel(modelAlias);
-    if (!model) throw new Error(`Model ${modelAlias} not found`);
-    
-    // Download if not cached
-    if (!model.isCached) {
-      await model.download();
-    }
-    
-    await model.load();
-    
-    // Wait for model to be fully loaded before creating chat client
-    while (!(await model.isLoaded())) {
-      await new Promise(resolve => setTimeout(resolve, 100));
-    }
-    
-    currentModel = model;
-    chatClient = model.createChatClient();
-    
-    return { success: true, modelId: model.id };
-  } catch (error) {
-    console.error('Error loading model:', error);
-    throw error;
-  }
-});
-
-ipcMain.handle('unload-model', async () => {
-  try {
-    if (currentModel) {
-      await currentModel.unload();
-      currentModel = null;
-      chatClient = null;
-    }
-    return { success: true };
-  } catch (error) {
-    console.error('Error unloading model:', error);
-    throw error;
-  }
-});
-
-ipcMain.handle('delete-model', async (event, modelAlias) => {
-  try {
-    await initializeSDK();
-    const model = await manager.catalog.getModel(modelAlias);
-    if (!model) throw new Error(`Model ${modelAlias} not found`);
-    
-    // Unload if currently loaded
-    if (currentModel && currentModel.alias === modelAlias) {
-      await currentModel.unload();
-      currentModel = null;
-      chatClient = null;
-    }
-    
-    model.removeFromCache();
-    return { success: true };
-  } catch (error) {
-    console.error('Error deleting model:', error);
-    throw error;
-  }
-});
-
-ipcMain.handle('chat', async (event, messages) => {
-  if (!currentModel) throw new Error('No model loaded');
-  
-  const startTime = performance.now();
-  let firstTokenTime = null;
-  let tokenCount = 0;
-  let fullContent = '';
-  
-  // Use HTTP streaming to avoid koffi callback issues with Electron
-  const response = await fetch(`${SERVICE_URL}/v1/chat/completions`, {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({
-      model: currentModel.id,
-      messages,
-      stream: true
-    })
-  });
-  
-  if (!response.ok) {
-    throw new Error(`HTTP error: ${response.status}`);
-  }
-  
-  const reader = response.body.getReader();
-  const decoder = new TextDecoder();
-  
-  while (true) {
-    const { done, value } = await reader.read();
-    if (done) break;
-    
-    const chunk = decoder.decode(value, { stream: true });
-    const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
-    
-    for (const line of lines) {
-      const data = line.slice(6); // Remove 'data: ' prefix
-      if (data === '[DONE]') continue;
-      
-      try {
-        const parsed = JSON.parse(data);
-        const content = parsed.choices?.[0]?.delta?.content;
-        if (content) {
-          if (firstTokenTime === null) {
-            firstTokenTime = performance.now();
-          }
-          tokenCount++;
-          fullContent += content;
-          
-          mainWindow.webContents.send('chat-chunk', {
-            content,
-            tokenCount,
-            timeToFirstToken: firstTokenTime ? (firstTokenTime - startTime) : null
-          });
-        }
-      } catch (e) {
-        // Skip invalid JSON chunks
-      }
-    }
-  }
-  
-  const endTime = performance.now();
-  const totalTime = endTime - startTime;
-  const tokensPerSecond = tokenCount > 0 ? (tokenCount / (totalTime / 1000)).toFixed(2) : 0;
-  
-  return {
-    content: fullContent,
-    stats: {
-      tokenCount,
-      timeToFirstToken: firstTokenTime ? Math.round(firstTokenTime - startTime) : 0,
-      totalTime: Math.round(totalTime),
-      tokensPerSecond: parseFloat(tokensPerSecond)
-    }
-  };
-});
-
-ipcMain.handle('get-loaded-model', async () => {
-  if (!currentModel) return null;
-  return {
-    id: currentModel.id,
-    alias: currentModel.alias
-  };
-});
-
-// Transcription handlers
-ipcMain.handle('get-whisper-models', async () => {
-  await initializeSDK();
-  const models = await manager.catalog.getModels();
-  return models
-    .filter(m => m.alias.toLowerCase().includes('whisper'))
-    .map(m => ({
-      alias: m.alias,
-      isCached: m.isCached,
-      fileSizeMb: m.variants[0]?.modelInfo?.fileSizeMb
-    }));
-});
-
-ipcMain.handle('download-whisper-model', async (event, modelAlias) => {
-  await initializeSDK();
-  const model = await manager.catalog.getModel(modelAlias);
-  if (!model) throw new Error(`Model ${modelAlias} not found`);
-  await model.download();
-  return { success: true };
-});
-
-ipcMain.handle('transcribe-audio', async (event, audioFilePath, base64Data) => {
-  await initializeSDK();
-  ensureWebServiceStarted();
-  
-  // Use OS temp directory
-  const tempDir = os.tmpdir();
-  const tempFilePath = path.join(tempDir, `foundry_audio_${Date.now()}.wav`);
-  
-  // Write audio data to temp file
-  const audioBuffer = Buffer.from(base64Data, 'base64');
-  fs.writeFileSync(tempFilePath, audioBuffer);
-  
-  try {
-    // Find a cached whisper model
-    const models = await manager.catalog.getModels();
-    const whisperModels = models.filter(m => 
-      m.alias.toLowerCase().includes('whisper') && m.isCached
-    );
-    
-    if (whisperModels.length === 0) {
-      throw new Error('No whisper model downloaded');
-    }
-    
-    // Use the smallest cached whisper model
-    const selectedModel = whisperModels.sort((a, b) => {
-      const sizeA = a.variants[0]?.modelInfo?.fileSizeMb || 0;
-      const sizeB = b.variants[0]?.modelInfo?.fileSizeMb || 0;
-      return sizeA - sizeB;
-    })[0];
-    
-    // Load whisper model
-    const whisperModel = await manager.catalog.getModel(selectedModel.alias);
-    await whisperModel.load();
-    
-    // Wait for model to be loaded
-    while (!(await whisperModel.isLoaded())) {
-      await new Promise(resolve => setTimeout(resolve, 100));
-    }
-    
-    // Create audio client and transcribe
-    const audioClient = whisperModel.createAudioClient();
-    const result = await audioClient.transcribe(tempFilePath);
-    
-    // Unload whisper model
-    await whisperModel.unload();
-    
-    return result;
-  } finally {
-    // Clean up temp file
-    try {
-      fs.unlinkSync(tempFilePath);
-    } catch (e) {
-      // Ignore cleanup errors
-    }
-  }
-});
diff --git a/samples/js/electron-chat-application/package.json b/samples/js/electron-chat-application/package.json
deleted file mode 100644
index 3609b2eea..000000000
--- a/samples/js/electron-chat-application/package.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "name": "foundry-local-chat",
-  "version": "1.0.0",
-  "description": "A modern chat application using Foundry Local SDK",
-  "main": "main.js",
-  "scripts": {
-    "start": "electron .",
-    "dev": "electron . --enable-logging"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest",
-    "highlight.js": "^11.11.1",
-    "marked": "^15.0.6"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  },
-  "devDependencies": {
-    "electron": "^42.3.3"
-  }
-}
diff --git a/samples/js/electron-chat-application/preload.js b/samples/js/electron-chat-application/preload.js
deleted file mode 100644
index 7026b0b27..000000000
--- a/samples/js/electron-chat-application/preload.js
+++ /dev/null
@@ -1,20 +0,0 @@
-const { contextBridge, ipcRenderer } = require('electron');
-
-contextBridge.exposeInMainWorld('foundryAPI', {
-  getModels: () => ipcRenderer.invoke('get-models'),
-  downloadModel: (modelAlias) => ipcRenderer.invoke('download-model', modelAlias),
-  loadModel: (modelAlias) => ipcRenderer.invoke('load-model', modelAlias),
-  unloadModel: () => ipcRenderer.invoke('unload-model'),
-  deleteModel: (modelAlias) => ipcRenderer.invoke('delete-model', modelAlias),
-  chat: (messages) => ipcRenderer.invoke('chat', messages),
-  getLoadedModel: () => ipcRenderer.invoke('get-loaded-model'),
-  onChatChunk: (callback) => {
-    const handler = (event, data) => callback(data);
-    ipcRenderer.on('chat-chunk', handler);
-    return () => ipcRenderer.removeListener('chat-chunk', handler);
-  },
-  // Transcription
-  getWhisperModels: () => ipcRenderer.invoke('get-whisper-models'),
-  downloadWhisperModel: (modelAlias) => ipcRenderer.invoke('download-whisper-model', modelAlias),
-  transcribeAudio: (filePath, base64Data) => ipcRenderer.invoke('transcribe-audio', filePath, base64Data)
-});
diff --git a/samples/js/electron-chat-application/renderer.js b/samples/js/electron-chat-application/renderer.js
deleted file mode 100644
index 86b840399..000000000
--- a/samples/js/electron-chat-application/renderer.js
+++ /dev/null
@@ -1,1066 +0,0 @@
-// =====================================================
-// Foundry Local Chat - Renderer Process
-// =====================================================
-
-// Simple markdown parser with code block handling
-const SimpleMarkdown = {
-  parse(text) {
-    if (!text) return '';
-    
-    // Extract code blocks first to protect them from other processing
-    const codeBlocks = [];
-    let html = text.replace(/```(\w*)\n([\s\S]*?)```/g, (match, lang, code) => {
-      const placeholder = `__CODE_BLOCK_${codeBlocks.length}__`;
-      codeBlocks.push({ lang, code });
-      return placeholder;
-    });
-    
-    // Extract inline code
-    const inlineCodes = [];
-    html = html.replace(/`([^`]+)`/g, (match, code) => {
-      const placeholder = `__INLINE_CODE_${inlineCodes.length}__`;
-      inlineCodes.push(code);
-      return placeholder;
-    });
-    
-    // Now escape HTML on the remaining text
-    html = this.escapeHtml(html);
-    
-    // Headings (### before ## before #)
-    html = html.replace(/^### (.+)$/gm, '<h4>$1</h4>');
-    html = html.replace(/^## (.+)$/gm, '<h3>$1</h3>');
-    html = html.replace(/^# (.+)$/gm, '<h2>$1</h2>');
-    
-    // Unordered lists
-    html = html.replace(/^- (.+)$/gm, '<li>$1</li>');
-    html = html.replace(/(<li>.*<\/li>\n?)+/g, '<ul>$&</ul>');
-    
-    // Bold
-    html = html.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
-    
-    // Italic
-    html = html.replace(/\*([^*]+)\*/g, '<em>$1</em>');
-    
-    // Links
-    html = html.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" target="_blank">$1</a>');
-    
-    // Line breaks (but not inside block elements)
-    html = html.replace(/\n/g, '<br>');
-    
-    // Clean up extra <br> around block elements
-    html = html.replace(/<br>(<h[234]>)/g, '$1');
-    html = html.replace(/(<\/h[234]>)<br>/g, '$1');
-    html = html.replace(/<br>(<ul>)/g, '$1');
-    html = html.replace(/(<\/ul>)<br>/g, '$1');
-    
-    // Restore inline code
-    inlineCodes.forEach((code, i) => {
-      html = html.replace(`__INLINE_CODE_${i}__`, `<code>${this.escapeHtml(code)}</code>`);
-    });
-    
-    // Restore code blocks
-    codeBlocks.forEach((block, i) => {
-      const codeHtml = `<div class="code-block-wrapper">
-        <button class="code-copy-btn" data-copy="true" title="Copy code">
-          <span class="copy-icon">⧉</span>
-          <span class="check-icon">✓</span>
-        </button>
-        <pre><code class="language-${block.lang || 'plaintext'}">${this.escapeHtml(block.code.trim())}</code></pre>
-      </div>`;
-      html = html.replace(`__CODE_BLOCK_${i}__`, codeHtml);
-    });
-    
-    return html;
-  },
-  
-  escapeHtml(text) {
-    const div = document.createElement('div');
-    div.textContent = text;
-    return div.innerHTML;
-  }
-};
-
-// Copy code to clipboard - use event delegation
-document.addEventListener('click', async (e) => {
-  const button = e.target.closest('.code-copy-btn');
-  if (!button) return;
-  
-  const codeBlock = button.closest('.code-block-wrapper').querySelector('code');
-  const text = codeBlock.textContent;
-  
-  try {
-    await navigator.clipboard.writeText(text);
-    button.classList.add('copied');
-    setTimeout(() => button.classList.remove('copied'), 2000);
-  } catch (err) {
-    console.error('Failed to copy:', err);
-  }
-});
-
-// Estimate tokens from text (rough approximation: ~4 chars per token)
-function estimateTokens(text) {
-  return Math.ceil(text.length / 4);
-}
-
-// Calculate total context tokens from all messages
-function calculateContextTokens() {
-  return messages.reduce((total, msg) => total + estimateTokens(msg.content), 0);
-}
-
-// Update context usage display
-function updateContextUsage() {
-  contextTokens = calculateContextTokens();
-  const percentage = Math.min(100, Math.round((contextTokens / CONTEXT_LIMIT) * 100));
-  
-  contextFill.style.width = `${percentage}%`;
-  contextLabel.textContent = `${percentage}%`;
-  
-  // Update color based on usage
-  contextFill.classList.remove('warning', 'danger');
-  if (percentage >= 90) {
-    contextFill.classList.add('danger');
-  } else if (percentage >= 70) {
-    contextFill.classList.add('warning');
-  }
-  
-  // Update tooltip
-  contextUsage.title = `Context: ${contextTokens.toLocaleString()} / ${CONTEXT_LIMIT.toLocaleString()} tokens (~${percentage}%)`;
-}
-
-// State
-let messages = [];
-let currentModelAlias = null;
-let isGenerating = false;
-let contextTokens = 0;
-const CONTEXT_LIMIT = 8192; // Default context window, will update based on model
-
-// DOM Elements
-const sidebar = document.getElementById('sidebar');
-const sidebarToggle = document.getElementById('sidebarToggle');
-const mobileMenuBtn = document.getElementById('mobileMenuBtn');
-const modelList = document.getElementById('modelList');
-const refreshModels = document.getElementById('refreshModels');
-const modelBadge = document.getElementById('modelBadge');
-const chatMessages = document.getElementById('chatMessages');
-const chatForm = document.getElementById('chatForm');
-const messageInput = document.getElementById('messageInput');
-const sendBtn = document.getElementById('sendBtn');
-const newChatBtn = document.getElementById('newChatBtn');
-const toastContainer = document.getElementById('toastContainer');
-const recordBtn = document.getElementById('recordBtn');
-const transcriptionSettingsBtn = document.getElementById('transcriptionSettingsBtn');
-const whisperModal = document.getElementById('whisperModal');
-const whisperModelList = document.getElementById('whisperModelList');
-const whisperModalCancel = document.getElementById('whisperModalCancel');
-const currentWhisperModelEl = document.getElementById('currentWhisperModel');
-const contextFill = document.getElementById('contextFill');
-const contextLabel = document.getElementById('contextLabel');
-const contextUsage = document.getElementById('contextUsage');
-
-// Recording state
-let mediaRecorder = null;
-let audioChunks = [];
-let isRecording = false;
-let selectedWhisperModel = null;
-
-// Initialize
-document.addEventListener('DOMContentLoaded', async () => {
-  setupEventListeners();
-  setupSidebarResize();
-  setupRecordButton();
-  updateContextUsage();
-  await loadModels();
-  setupChatChunkListener();
-});
-
-function setupSidebarResize() {
-  const resizeHandle = document.getElementById('sidebarResizeHandle');
-  let isResizing = false;
-  
-  resizeHandle.addEventListener('mousedown', (e) => {
-    isResizing = true;
-    resizeHandle.classList.add('dragging');
-    document.body.style.cursor = 'col-resize';
-    document.body.style.userSelect = 'none';
-  });
-  
-  document.addEventListener('mousemove', (e) => {
-    if (!isResizing) return;
-    const newWidth = Math.min(Math.max(e.clientX, 240), 480);
-    sidebar.style.width = newWidth + 'px';
-  });
-  
-  document.addEventListener('mouseup', () => {
-    if (isResizing) {
-      isResizing = false;
-      resizeHandle.classList.remove('dragging');
-      document.body.style.cursor = '';
-      document.body.style.userSelect = '';
-    }
-  });
-}
-
-function setupRecordButton() {
-  recordBtn.addEventListener('click', handleRecordClick);
-  transcriptionSettingsBtn.addEventListener('click', openTranscriptionSettings);
-  whisperModalCancel.addEventListener('click', () => {
-    whisperModal.classList.remove('visible');
-  });
-}
-
-async function openTranscriptionSettings() {
-  const whisperModels = await window.foundryAPI.getWhisperModels();
-  showWhisperModal(whisperModels, true);
-}
-
-async function handleRecordClick() {
-  if (isRecording) {
-    // Stop recording
-    stopRecording();
-  } else {
-    // Check if whisper model is available
-    const whisperModels = await window.foundryAPI.getWhisperModels();
-    const cachedModels = whisperModels.filter(m => m.isCached);
-    
-    if (cachedModels.length === 0) {
-      // Show modal to download whisper model
-      showWhisperModal(whisperModels, false);
-    } else {
-      // Start recording
-      startRecording();
-    }
-  }
-}
-
-function showWhisperModal(models, isSettings = false) {
-  // Update current model display
-  const cachedModels = models.filter(m => m.isCached);
-  const modelNameEl = currentWhisperModelEl.querySelector('.model-name');
-  if (cachedModels.length > 0) {
-    const current = selectedWhisperModel || cachedModels.sort((a, b) => (a.fileSizeMb || 0) - (b.fileSizeMb || 0))[0].alias;
-    modelNameEl.textContent = current;
-  } else {
-    modelNameEl.textContent = 'None - download a model below';
-  }
-  
-  whisperModelList.innerHTML = '';
-  
-  models.forEach(model => {
-    const sizeStr = model.fileSizeMb ? `${(model.fileSizeMb / 1024).toFixed(1)} GB` : '';
-    const isSelected = selectedWhisperModel === model.alias;
-    const item = document.createElement('div');
-    item.className = 'whisper-model-item' + (isSelected ? ' selected' : '');
-    item.innerHTML = `
-      <div class="model-info">
-        <span class="model-name">${model.alias}</span>
-        <span class="model-size">${sizeStr}</span>
-      </div>
-      <div class="model-actions">
-        ${model.isCached 
-          ? `<button class="use-btn">${isSelected ? '✓ Selected' : 'Select'}</button>
-             <button class="delete-btn" title="Delete from cache">
-               <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
-                 <polyline points="3 6 5 6 21 6"></polyline>
-                 <path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"></path>
-               </svg>
-             </button>`
-          : '<button class="download-btn">Download</button>'
-        }
-      </div>
-    `;
-    
-    if (model.isCached) {
-      const useBtn = item.querySelector('.use-btn');
-      useBtn.addEventListener('click', () => {
-        selectedWhisperModel = model.alias;
-        showToast(`Selected ${model.alias} for transcription`, 'success');
-        // Refresh modal to show selection
-        showWhisperModal(models, true);
-      });
-      
-      const deleteBtn = item.querySelector('.delete-btn');
-      deleteBtn.addEventListener('click', async () => {
-        if (confirm(`Delete ${model.alias} from cache?`)) {
-          try {
-            await window.foundryAPI.deleteModel(model.alias);
-            if (selectedWhisperModel === model.alias) {
-              selectedWhisperModel = null;
-            }
-            showToast(`Deleted ${model.alias}`, 'success');
-            const updatedModels = await window.foundryAPI.getWhisperModels();
-            showWhisperModal(updatedModels, true);
-          } catch (error) {
-            showToast('Delete failed: ' + error.message, 'error');
-          }
-        }
-      });
-    } else {
-      const downloadBtn = item.querySelector('.download-btn');
-      downloadBtn.addEventListener('click', async () => {
-        downloadBtn.textContent = 'Downloading...';
-        downloadBtn.disabled = true;
-        try {
-          await window.foundryAPI.downloadWhisperModel(model.alias);
-          showToast(`Downloaded ${model.alias}`, 'success');
-          selectedWhisperModel = model.alias;
-          const updatedModels = await window.foundryAPI.getWhisperModels();
-          showWhisperModal(updatedModels, true);
-        } catch (error) {
-          showToast('Download failed: ' + error.message, 'error');
-          downloadBtn.textContent = 'Download';
-          downloadBtn.disabled = false;
-        }
-      });
-    }
-    
-    whisperModelList.appendChild(item);
-  });
-  
-  whisperModal.classList.add('visible');
-}
-
-async function startRecording() {
-  try {
-    // Request 16kHz mono audio for Whisper compatibility
-    const stream = await navigator.mediaDevices.getUserMedia({ 
-      audio: {
-        sampleRate: 16000,
-        channelCount: 1,
-        echoCancellation: true,
-        noiseSuppression: true
-      } 
-    });
-    
-    mediaRecorder = new MediaRecorder(stream);
-    audioChunks = [];
-    
-    mediaRecorder.ondataavailable = (e) => {
-      audioChunks.push(e.data);
-    };
-    
-    mediaRecorder.onstop = async () => {
-      // Stop all tracks
-      stream.getTracks().forEach(track => track.stop());
-      
-      // Create audio blob
-      const audioBlob = new Blob(audioChunks, { type: mediaRecorder.mimeType });
-      await transcribeAudio(audioBlob);
-    };
-    
-    mediaRecorder.start();
-    isRecording = true;
-    recordBtn.classList.add('recording');
-    showToast('Recording... Click stop when done', 'warning');
-  } catch (error) {
-    console.error('Failed to start recording:', error);
-    showToast('Failed to access microphone', 'error');
-  }
-}
-
-function stopRecording() {
-  if (mediaRecorder && isRecording) {
-    mediaRecorder.stop();
-    isRecording = false;
-    recordBtn.classList.remove('recording');
-    recordBtn.classList.add('transcribing');
-  }
-}
-
-// Convert audio blob to 16kHz mono WAV format for Whisper
-async function convertToWav(audioBlob) {
-  const audioContext = new AudioContext();
-  try {
-    const arrayBuffer = await audioBlob.arrayBuffer();
-    const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
-    
-    // Resample to 16kHz mono
-    const targetSampleRate = 16000;
-    const offlineContext = new OfflineAudioContext(1, audioBuffer.duration * targetSampleRate, targetSampleRate);
-    
-    const source = offlineContext.createBufferSource();
-    source.buffer = audioBuffer;
-    source.connect(offlineContext.destination);
-    source.start(0);
-    
-    const resampledBuffer = await offlineContext.startRendering();
-    
-    // Convert to WAV
-    const wavBuffer = audioBufferToWav(resampledBuffer);
-    return new Blob([wavBuffer], { type: 'audio/wav' });
-  } finally {
-    await audioContext.close();
-  }
-}
-
-// Encode AudioBuffer to 16-bit PCM WAV format
-function audioBufferToWav(buffer) {
-  const numChannels = 1; // Force mono
-  const sampleRate = buffer.sampleRate;
-  const bitDepth = 16;
-  
-  const bytesPerSample = bitDepth / 8;
-  const blockAlign = numChannels * bytesPerSample;
-  
-  // Get mono channel (mix down if stereo)
-  let monoData;
-  if (buffer.numberOfChannels === 1) {
-    monoData = buffer.getChannelData(0);
-  } else {
-    // Mix stereo to mono
-    const left = buffer.getChannelData(0);
-    const right = buffer.getChannelData(1);
-    monoData = new Float32Array(left.length);
-    for (let i = 0; i < left.length; i++) {
-      monoData[i] = (left[i] + right[i]) / 2;
-    }
-  }
-  
-  const samples = monoData.length;
-  const dataSize = samples * blockAlign;
-  const bufferSize = 44 + dataSize;
-  
-  const arrayBuffer = new ArrayBuffer(bufferSize);
-  const view = new DataView(arrayBuffer);
-  
-  // RIFF header
-  writeString(view, 0, 'RIFF');
-  view.setUint32(4, 36 + dataSize, true);
-  writeString(view, 8, 'WAVE');
-  
-  // fmt chunk
-  writeString(view, 12, 'fmt ');
-  view.setUint32(16, 16, true); // chunk size
-  view.setUint16(20, 1, true);  // PCM format
-  view.setUint16(22, numChannels, true);
-  view.setUint32(24, sampleRate, true);
-  view.setUint32(28, sampleRate * blockAlign, true);
-  view.setUint16(32, blockAlign, true);
-  view.setUint16(34, bitDepth, true);
-  
-  // data chunk
-  writeString(view, 36, 'data');
-  view.setUint32(40, dataSize, true);
-  
-  // Write audio data as 16-bit PCM
-  let offset = 44;
-  for (let i = 0; i < samples; i++) {
-    const sample = Math.max(-1, Math.min(1, monoData[i]));
-    const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7FFF;
-    view.setInt16(offset, intSample, true);
-    offset += 2;
-  }
-  
-  return arrayBuffer;
-}
-
-function writeString(view, offset, string) {
-  for (let i = 0; i < string.length; i++) {
-    view.setUint8(offset + i, string.charCodeAt(i));
-  }
-}
-
-async function transcribeAudio(audioBlob) {
-  try {
-    showToast('Converting audio...', 'warning');
-    
-    // Convert to 16kHz mono WAV format for Whisper compatibility
-    let wavBlob;
-    try {
-      wavBlob = await convertToWav(audioBlob);
-    } catch (e) {
-      console.error('WAV conversion failed:', e);
-      showToast('Audio conversion failed: ' + e.message, 'error');
-      recordBtn.classList.remove('transcribing');
-      return;
-    }
-    
-    showToast('Transcribing audio...', 'warning');
-    
-    // Convert blob to base64
-    const arrayBuffer = await wavBlob.arrayBuffer();
-    const uint8Array = new Uint8Array(arrayBuffer);
-    
-    // Use chunked base64 encoding for large arrays
-    let base64 = '';
-    const chunkSize = 32768;
-    for (let i = 0; i < uint8Array.length; i += chunkSize) {
-      const chunk = uint8Array.subarray(i, i + chunkSize);
-      base64 += String.fromCharCode.apply(null, chunk);
-    }
-    base64 = btoa(base64);
-    
-    const tempPath = `/tmp/foundry_audio_${Date.now()}.wav`;
-    
-    const result = await window.foundryAPI.transcribeAudio(tempPath, base64);
-    
-    // Insert transcribed text into input
-    const text = result.text || result.Text || '';
-    if (text) {
-      messageInput.value += text;
-      messageInput.dispatchEvent(new Event('input'));
-      showToast('Transcription complete', 'success');
-    } else {
-      showToast('No speech detected', 'warning');
-    }
-  } catch (error) {
-    console.error('Transcription failed:', error);
-    showToast('Transcription failed: ' + error.message, 'error');
-  } finally {
-    recordBtn.classList.remove('transcribing');
-  }
-}
-
-function setupEventListeners() {
-  // Sidebar toggle
-  sidebarToggle.addEventListener('click', () => {
-    sidebar.classList.toggle('collapsed');
-  });
-  
-  mobileMenuBtn.addEventListener('click', () => {
-    sidebar.classList.toggle('open');
-  });
-  
-  // Refresh models
-  refreshModels.addEventListener('click', async () => {
-    refreshModels.classList.add('spinning');
-    await loadModels();
-    refreshModels.classList.remove('spinning');
-  });
-  
-  // Chat form
-  chatForm.addEventListener('submit', handleSendMessage);
-  
-  // Textarea auto-resize
-  messageInput.addEventListener('input', () => {
-    messageInput.style.height = 'auto';
-    messageInput.style.height = Math.min(messageInput.scrollHeight, 150) + 'px';
-  });
-  
-  // Enter to send, Shift+Enter for new line
-  messageInput.addEventListener('keydown', (e) => {
-    if (e.key === 'Enter' && !e.shiftKey) {
-      e.preventDefault();
-      chatForm.dispatchEvent(new Event('submit'));
-    }
-  });
-  
-  // New chat
-  newChatBtn.addEventListener('click', clearChat);
-  
-  // Close sidebar on outside click (mobile)
-  document.addEventListener('click', (e) => {
-    if (window.innerWidth <= 768 && 
-        sidebar.classList.contains('open') &&
-        !sidebar.contains(e.target) &&
-        !mobileMenuBtn.contains(e.target)) {
-      sidebar.classList.remove('open');
-    }
-  });
-}
-
-function setupChatChunkListener() {
-  window.foundryAPI.onChatChunk((data) => {
-    if (data.content) {
-      appendToLastAssistantMessage(data.content);
-    }
-  });
-}
-
-// Model Management
-async function loadModels() {
-  modelList.innerHTML = `
-    <div class="loading-spinner">
-      <div class="spinner"></div>
-      <span>Loading models...</span>
-    </div>
-  `;
-  
-  try {
-    const models = await window.foundryAPI.getModels();
-    
-    if (!models || models.length === 0) {
-      modelList.innerHTML = `
-        <div class="loading-spinner">
-          <span>No models found</span>
-        </div>
-      `;
-      return;
-    }
-    
-    // Filter out whisper/audio models - only show chat models
-    const chatModels = models.filter(m => {
-      const alias = m.alias.toLowerCase();
-      // Exclude whisper and other audio models
-      if (alias.includes('whisper')) return false;
-      return true;
-    });
-    
-    const displayModels = chatModels;
-    
-    // Sort: cached first, then by name
-    displayModels.sort((a, b) => {
-      if (a.isCached && !b.isCached) return -1;
-      if (!a.isCached && b.isCached) return 1;
-      return a.alias.localeCompare(b.alias);
-    });
-    
-    // Group by cached status
-    const cachedModels = displayModels.filter(m => m.isCached);
-    const availableModels = displayModels.filter(m => !m.isCached);
-    
-    modelList.innerHTML = '';
-    
-    if (cachedModels.length > 0) {
-      const cachedGroup = document.createElement('div');
-      cachedGroup.className = 'model-group';
-      cachedGroup.innerHTML = `
-        <div class="model-group-header">
-          <div class="status-dot cached"></div>
-          <span>Downloaded</span>
-        </div>
-      `;
-      cachedModels.forEach(model => {
-        cachedGroup.appendChild(createModelItem(model));
-      });
-      modelList.appendChild(cachedGroup);
-    }
-    
-    if (availableModels.length > 0) {
-      const availableGroup = document.createElement('div');
-      availableGroup.className = 'model-group';
-      availableGroup.innerHTML = `
-        <div class="model-group-header">
-          <div class="status-dot"></div>
-          <span>Available</span>
-        </div>
-      `;
-      availableModels.forEach(model => {
-        availableGroup.appendChild(createModelItem(model));
-      });
-      modelList.appendChild(availableGroup);
-    }
-    
-    if (displayModels.length === 0) {
-      modelList.innerHTML = `
-        <div class="loading-spinner">
-          <span>No models available</span>
-        </div>
-      `;
-    }
-  } catch (error) {
-    console.error('Failed to load models:', error);
-    modelList.innerHTML = `
-      <div class="loading-spinner">
-        <span>Failed to load models</span>
-        <span style="font-size: 11px; color: var(--error);">${error.message || error}</span>
-      </div>
-    `;
-    showToast('Failed to load models: ' + error.message, 'error');
-  }
-}
-
-function createModelItem(model) {
-  const variant = model.variants[0];
-  const item = document.createElement('div');
-  item.className = 'model-item';
-  const isActive = model.alias === currentModelAlias;
-  if (isActive) {
-    item.classList.add('active');
-  }
-  
-  const sizeMb = variant?.fileSizeMb;
-  const sizeStr = sizeMb ? `${(sizeMb / 1024).toFixed(1)} GB` : '';
-  
-  let statusHtml;
-  if (isActive) {
-    statusHtml = `
-      <button class="unload-btn">Unload</button>
-    `;
-  } else if (model.isCached) {
-    statusHtml = `
-      <button class="delete-model-btn" title="Delete from cache">
-        <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
-          <polyline points="3 6 5 6 21 6"></polyline>
-          <path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"></path>
-          <line x1="10" y1="11" x2="10" y2="17"></line>
-          <line x1="14" y1="11" x2="14" y2="17"></line>
-        </svg>
-      </button>
-      <button class="load-btn">Load</button>
-    `;
-  } else {
-    statusHtml = '<button class="download-btn">Download</button>';
-  }
-  
-  item.innerHTML = `
-    <div class="model-icon">
-      <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-        <path d="M21 16V8a2 2 0 00-1-1.73l-7-4a2 2 0 00-2 0l-7 4A2 2 0 003 8v8a2 2 0 001 1.73l7 4a2 2 0 002 0l7-4A2 2 0 0021 16z"/>
-        <polyline points="3.27 6.96 12 12.01 20.73 6.96"/>
-        <line x1="12" y1="22.08" x2="12" y2="12"/>
-      </svg>
-    </div>
-    <div class="model-info">
-      <div class="model-name">${model.alias}</div>
-      <div class="model-size">${sizeStr}</div>
-    </div>
-    <div class="model-status">
-      ${statusHtml}
-    </div>
-  `;
-  
-  // Handle click events
-  if (isActive) {
-    const unloadBtn = item.querySelector('.unload-btn');
-    unloadBtn.addEventListener('click', async (e) => {
-      e.stopPropagation();
-      await unloadModel();
-    });
-  } else if (model.isCached) {
-    const loadBtn = item.querySelector('.load-btn');
-    loadBtn.addEventListener('click', async (e) => {
-      e.stopPropagation();
-      await loadModel(model.alias);
-    });
-    
-    const deleteBtn = item.querySelector('.delete-model-btn');
-    deleteBtn.addEventListener('click', async (e) => {
-      e.stopPropagation();
-      if (confirm(`Delete ${model.alias} from cache?`)) {
-        try {
-          await window.foundryAPI.deleteModel(model.alias);
-          showToast(`Deleted ${model.alias}`, 'success');
-          await loadModels();
-        } catch (error) {
-          showToast('Delete failed: ' + error.message, 'error');
-        }
-      }
-    });
-  } else {
-    const downloadBtn = item.querySelector('.download-btn');
-    downloadBtn.addEventListener('click', async (e) => {
-      e.stopPropagation();
-      await downloadModel(model.alias, item);
-    });
-  }
-  
-  return item;
-}
-
-async function downloadModel(alias, itemElement) {
-  const statusEl = itemElement.querySelector('.model-status');
-  statusEl.innerHTML = '<div class="status-indicator loading"></div>';
-  
-  try {
-    showToast(`Downloading ${alias}...`, 'warning');
-    await window.foundryAPI.downloadModel(alias);
-    showToast(`Downloaded ${alias}. Loading...`, 'success');
-    await loadModels();
-    // Auto-load the model after download
-    await loadModel(alias);
-  } catch (error) {
-    console.error('Download failed:', error);
-    showToast('Download failed: ' + error.message, 'error');
-    await loadModels();
-  }
-}
-
-async function loadModel(alias) {
-  if (isGenerating) {
-    showToast('Please wait for the current response to finish', 'warning');
-    return;
-  }
-  
-  // Update UI to show loading
-  const items = modelList.querySelectorAll('.model-item');
-  items.forEach(item => {
-    item.classList.remove('active');
-    const nameEl = item.querySelector('.model-name');
-    if (nameEl.textContent.includes(alias) || item.dataset.alias === alias) {
-      item.classList.add('loading');
-    }
-  });
-  
-  try {
-    showToast(`Loading ${alias}...`, 'warning');
-    await window.foundryAPI.loadModel(alias);
-    currentModelAlias = alias;
-    
-    // Update UI
-    updateCurrentModelDisplay(alias);
-    enableChat();
-    showToast(`Model ${alias} loaded`, 'success');
-    
-    // Refresh model list to update active state
-    await loadModels();
-  } catch (error) {
-    console.error('Failed to load model:', error);
-    showToast('Failed to load model: ' + error.message, 'error');
-    await loadModels();
-  }
-}
-
-async function unloadModel() {
-  if (isGenerating) {
-    showToast('Please wait for the current response to finish', 'warning');
-    return;
-  }
-  
-  try {
-    showToast('Unloading model...', 'warning');
-    await window.foundryAPI.unloadModel();
-    currentModelAlias = null;
-    
-    // Update UI
-    modelBadge.textContent = 'Select a model to start';
-    disableChat();
-    showToast('Model unloaded', 'success');
-    
-    // Refresh model list
-    await loadModels();
-  } catch (error) {
-    console.error('Failed to unload model:', error);
-    showToast('Failed to unload model: ' + error.message, 'error');
-  }
-}
-
-function updateCurrentModelDisplay(alias) {
-  modelBadge.textContent = alias;
-}
-
-function enableChat() {
-  messageInput.disabled = false;
-  sendBtn.disabled = false;
-  messageInput.placeholder = 'Type your message...';
-  messageInput.focus();
-}
-
-function disableChat() {
-  messageInput.disabled = true;
-  sendBtn.disabled = true;
-  messageInput.placeholder = 'Select a model to start chatting...';
-}
-
-// Chat Management
-async function handleSendMessage(e) {
-  e.preventDefault();
-  
-  const content = messageInput.value.trim();
-  if (!content || isGenerating || !currentModelAlias) return;
-  
-  // Clear welcome message if present
-  const welcomeMessage = chatMessages.querySelector('.welcome-message');
-  if (welcomeMessage) {
-    welcomeMessage.remove();
-  }
-  
-  // Add user message
-  messages.push({ role: 'user', content });
-  addMessageToChat('user', content);
-  updateContextUsage();
-  
-  // Clear input
-  messageInput.value = '';
-  messageInput.style.height = 'auto';
-  
-  // Disable send button
-  isGenerating = true;
-  sendBtn.disabled = true;
-  
-  // Add typing indicator
-  const typingEl = addTypingIndicator();
-  
-  try {
-    // Make API call
-    const result = await window.foundryAPI.chat(messages);
-    
-    // Remove typing indicator
-    typingEl.remove();
-    
-    // Add assistant message (content was already streamed, just add stats)
-    messages.push({ role: 'assistant', content: result.content });
-    updateLastAssistantMessageStats(result.stats);
-    updateContextUsage();
-    
-  } catch (error) {
-    console.error('Chat error:', error);
-    typingEl.remove();
-    showToast('Chat error: ' + error.message, 'error');
-  } finally {
-    isGenerating = false;
-    sendBtn.disabled = false;
-    messageInput.focus();
-  }
-}
-
-function addMessageToChat(role, content) {
-  const messageEl = document.createElement('div');
-  messageEl.className = `message ${role}`;
-  
-  const avatar = role === 'user' ? 'U' : 
-    `<svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-      <path d="M21 16V8a2 2 0 00-1-1.73l-7-4a2 2 0 00-2 0l-7 4A2 2 0 003 8v8a2 2 0 001 1.73l7 4a2 2 0 002 0l7-4A2 2 0 0021 16z"/>
-    </svg>`;
-  
-  messageEl.innerHTML = `
-    <div class="message-avatar">${avatar}</div>
-    <div class="message-content">
-      <div class="message-bubble">${role === 'user' ? SimpleMarkdown.escapeHtml(content) : SimpleMarkdown.parse(content)}</div>
-      ${role === 'assistant' ? '<div class="message-stats"></div>' : ''}
-    </div>
-  `;
-  
-  chatMessages.appendChild(messageEl);
-  scrollToBottom();
-  
-  return messageEl;
-}
-
-function addTypingIndicator() {
-  const typingEl = document.createElement('div');
-  typingEl.className = 'message assistant';
-  typingEl.id = 'typing-indicator';
-  typingEl.innerHTML = `
-    <div class="message-avatar">
-      <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-        <path d="M21 16V8a2 2 0 00-1-1.73l-7-4a2 2 0 00-2 0l-7 4A2 2 0 003 8v8a2 2 0 001 1.73l7 4a2 2 0 002 0l7-4A2 2 0 0021 16z"/>
-      </svg>
-    </div>
-    <div class="message-content">
-      <div class="typing-indicator">
-        <span></span>
-        <span></span>
-        <span></span>
-      </div>
-    </div>
-  `;
-  chatMessages.appendChild(typingEl);
-  scrollToBottom();
-  return typingEl;
-}
-
-let currentAssistantMessage = null;
-let currentAssistantContent = '';
-
-function appendToLastAssistantMessage(content) {
-  // If there's a typing indicator, replace it with actual message
-  const typingIndicator = document.getElementById('typing-indicator');
-  if (typingIndicator) {
-    typingIndicator.remove();
-    currentAssistantMessage = addMessageToChat('assistant', '');
-    currentAssistantContent = '';
-  }
-  
-  if (!currentAssistantMessage) {
-    currentAssistantMessage = addMessageToChat('assistant', '');
-    currentAssistantContent = '';
-  }
-  
-  currentAssistantContent += content;
-  const bubble = currentAssistantMessage.querySelector('.message-bubble');
-  bubble.innerHTML = SimpleMarkdown.parse(currentAssistantContent);
-  scrollToBottom();
-}
-
-function updateLastAssistantMessageStats(stats) {
-  if (!currentAssistantMessage) return;
-  
-  const statsEl = currentAssistantMessage.querySelector('.message-stats');
-  if (statsEl && stats) {
-    statsEl.innerHTML = `
-      <div class="stat-item">
-        <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-          <circle cx="12" cy="12" r="10"/>
-          <polyline points="12 6 12 12 16 14"/>
-        </svg>
-        <span>TTFT: ${stats.timeToFirstToken}ms</span>
-      </div>
-      <div class="stat-item">
-        <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-          <polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/>
-        </svg>
-        <span>${stats.tokensPerSecond} tok/s</span>
-      </div>
-      <div class="stat-item">
-        <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-          <path d="M22 11.08V12a10 10 0 11-5.93-9.14"/>
-          <polyline points="22 4 12 14.01 9 11.01"/>
-        </svg>
-        <span>${stats.tokenCount} tokens</span>
-      </div>
-    `;
-  }
-  
-  // Reset for next message
-  currentAssistantMessage = null;
-  currentAssistantContent = '';
-}
-
-function clearChat() {
-  messages = [];
-  currentAssistantMessage = null;
-  currentAssistantContent = '';
-  updateContextUsage();
-  
-  chatMessages.innerHTML = `
-    <div class="welcome-message">
-      <div class="welcome-icon">
-        <svg width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
-          <path d="M21 15a2 2 0 01-2 2H7l-4 4V5a2 2 0 012-2h14a2 2 0 012 2v10z"/>
-        </svg>
-      </div>
-      <h2>Welcome to Foundry Local Chat</h2>
-      <p>Select a model from the sidebar to start chatting with AI running locally on your machine.</p>
-      <div class="feature-highlights">
-        <div class="feature">
-          <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-            <path d="M12 22s8-4 8-10V5l-8-3-8 3v7c0 6 8 10 8 10z"/>
-          </svg>
-          <span>100% Private</span>
-        </div>
-        <div class="feature">
-          <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-            <circle cx="12" cy="12" r="10"/>
-            <polyline points="12 6 12 12 16 14"/>
-          </svg>
-          <span>Low Latency</span>
-        </div>
-        <div class="feature">
-          <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-            <rect x="2" y="3" width="20" height="14" rx="2" ry="2"/>
-            <line x1="8" y1="21" x2="16" y2="21"/>
-            <line x1="12" y1="17" x2="12" y2="21"/>
-          </svg>
-          <span>Runs Locally</span>
-        </div>
-      </div>
-    </div>
-  `;
-}
-
-function scrollToBottom() {
-  chatMessages.scrollTop = chatMessages.scrollHeight;
-}
-
-// Toast Notifications
-function showToast(message, type = 'info') {
-  const toast = document.createElement('div');
-  toast.className = `toast ${type}`;
-  toast.innerHTML = `
-    <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-      ${type === 'success' ? '<polyline points="20 6 9 17 4 12"/>' :
-        type === 'error' ? '<circle cx="12" cy="12" r="10"/><line x1="15" y1="9" x2="9" y2="15"/><line x1="9" y1="9" x2="15" y2="15"/>' :
-        type === 'warning' ? '<path d="M10.29 3.86L1.82 18a2 2 0 001.71 3h16.94a2 2 0 001.71-3L13.71 3.86a2 2 0 00-3.42 0z"/><line x1="12" y1="9" x2="12" y2="13"/><line x1="12" y1="17" x2="12.01" y2="17"/>' :
-        '<circle cx="12" cy="12" r="10"/><line x1="12" y1="16" x2="12" y2="12"/><line x1="12" y1="8" x2="12.01" y2="8"/>'
-      }
-    </svg>
-    <span>${message}</span>
-  `;
-  
-  toastContainer.appendChild(toast);
-  
-  setTimeout(() => {
-    toast.style.animation = 'slideIn 0.3s ease reverse';
-    setTimeout(() => toast.remove(), 300);
-  }, 3000);
-}
diff --git a/samples/js/electron-chat-application/screenshots/electron-description-of-functions.png b/samples/js/electron-chat-application/screenshots/electron-description-of-functions.png
deleted file mode 100644
index ee46f8be8..000000000
Binary files a/samples/js/electron-chat-application/screenshots/electron-description-of-functions.png and /dev/null differ
diff --git a/samples/js/electron-chat-application/screenshots/electron-transcription.png b/samples/js/electron-chat-application/screenshots/electron-transcription.png
deleted file mode 100644
index 32295ac10..000000000
Binary files a/samples/js/electron-chat-application/screenshots/electron-transcription.png and /dev/null differ
diff --git a/samples/js/electron-chat-application/styles.css b/samples/js/electron-chat-application/styles.css
deleted file mode 100644
index 1f0e2fc2d..000000000
--- a/samples/js/electron-chat-application/styles.css
+++ /dev/null
@@ -1,1348 +0,0 @@
-/* =====================================================
-   Foundry Local Chat - Modern Chat Interface Styles
-   ===================================================== */
-
-:root {
-  /* Color Palette - Dark Theme */
-  --bg-primary: #0f0f1a;
-  --bg-secondary: #1a1a2e;
-  --bg-tertiary: #16213e;
-  --bg-hover: #1f2b4d;
-  --bg-active: #2a3a5f;
-  
-  --text-primary: #e8e8e8;
-  --text-secondary: #a0a0b0;
-  --text-muted: #6b6b80;
-  
-  --accent-primary: #6366f1;
-  --accent-secondary: #818cf8;
-  --accent-gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
-  
-  --success: #22c55e;
-  --warning: #f59e0b;
-  --error: #ef4444;
-  
-  --border-color: #2a2a4a;
-  --border-subtle: rgba(255, 255, 255, 0.06);
-  
-  /* Sizing */
-  --sidebar-width: 320px;
-  --sidebar-min-width: 240px;
-  --sidebar-max-width: 480px;
-  --header-height: 60px;
-  --input-height: 56px;
-  
-  /* Spacing */
-  --space-xs: 4px;
-  --space-sm: 8px;
-  --space-md: 16px;
-  --space-lg: 24px;
-  --space-xl: 32px;
-  
-  /* Typography */
-  --font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
-  --font-mono: 'SF Mono', 'Fira Code', 'Consolas', monospace;
-  
-  /* Transitions */
-  --transition-fast: 150ms ease;
-  --transition-normal: 250ms ease;
-  
-  /* Shadows */
-  --shadow-sm: 0 1px 2px rgba(0, 0, 0, 0.3);
-  --shadow-md: 0 4px 6px rgba(0, 0, 0, 0.3);
-  --shadow-lg: 0 10px 25px rgba(0, 0, 0, 0.4);
-  
-  /* Border Radius */
-  --radius-sm: 6px;
-  --radius-md: 10px;
-  --radius-lg: 16px;
-  --radius-full: 9999px;
-}
-
-/* =====================================================
-   Reset & Base Styles
-   ===================================================== */
-
-*, *::before, *::after {
-  box-sizing: border-box;
-  margin: 0;
-  padding: 0;
-}
-
-html, body {
-  height: 100%;
-  overflow: hidden;
-}
-
-body {
-  font-family: var(--font-family);
-  font-size: 14px;
-  line-height: 1.5;
-  color: var(--text-primary);
-  background: var(--bg-primary);
-  -webkit-font-smoothing: antialiased;
-  -moz-osx-font-smoothing: grayscale;
-}
-
-/* =====================================================
-   App Container
-   ===================================================== */
-
-.app-container {
-  display: flex;
-  height: 100vh;
-  width: 100vw;
-  overflow: hidden;
-}
-
-/* =====================================================
-   Sidebar
-   ===================================================== */
-
-.sidebar {
-  width: var(--sidebar-width);
-  min-width: var(--sidebar-min-width);
-  max-width: var(--sidebar-max-width);
-  height: 100%;
-  background: var(--bg-secondary);
-  border-right: 1px solid var(--border-color);
-  display: flex;
-  flex-direction: column;
-  transition: transform var(--transition-normal);
-  z-index: 100;
-  position: relative;
-}
-
-.sidebar-resize-handle {
-  position: absolute;
-  top: 0;
-  right: 0;
-  width: 4px;
-  height: 100%;
-  cursor: col-resize;
-  background: transparent;
-  transition: background var(--transition-fast);
-  z-index: 10;
-}
-
-.sidebar-resize-handle:hover,
-.sidebar-resize-handle.dragging {
-  background: var(--accent-primary);
-}
-
-.sidebar.collapsed {
-  width: 0;
-  min-width: 0;
-  transform: translateX(-100%);
-}
-
-.sidebar-header {
-  padding: var(--space-md);
-  padding-top: 40px; /* Account for macOS title bar */
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  border-bottom: 1px solid var(--border-color);
-  -webkit-app-region: drag;
-}
-
-.logo {
-  display: flex;
-  align-items: center;
-  gap: var(--space-sm);
-  font-weight: 600;
-  font-size: 16px;
-  color: var(--text-primary);
-}
-
-.logo svg {
-  color: var(--accent-primary);
-}
-
-.sidebar-toggle {
-  background: transparent;
-  border: none;
-  color: var(--text-secondary);
-  cursor: pointer;
-  padding: var(--space-xs);
-  border-radius: var(--radius-sm);
-  transition: all var(--transition-fast);
-  -webkit-app-region: no-drag;
-}
-
-.sidebar-toggle:hover {
-  background: var(--bg-hover);
-  color: var(--text-primary);
-}
-
-.sidebar-content {
-  flex: 1;
-  overflow-y: auto;
-  padding: var(--space-md);
-}
-
-.section-header {
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  margin-bottom: var(--space-md);
-}
-
-.section-header h3 {
-  font-size: 12px;
-  font-weight: 600;
-  text-transform: uppercase;
-  letter-spacing: 0.5px;
-  color: var(--text-muted);
-}
-
-.refresh-btn {
-  background: transparent;
-  border: none;
-  color: var(--text-muted);
-  cursor: pointer;
-  padding: var(--space-xs);
-  border-radius: var(--radius-sm);
-  transition: all var(--transition-fast);
-}
-
-.refresh-btn:hover {
-  background: var(--bg-hover);
-  color: var(--text-primary);
-}
-
-.refresh-btn.spinning svg {
-  animation: spin 1s linear infinite;
-}
-
-@keyframes spin {
-  from { transform: rotate(0deg); }
-  to { transform: rotate(360deg); }
-}
-
-/* Model List */
-.model-list {
-  display: flex;
-  flex-direction: column;
-  gap: var(--space-sm);
-}
-
-.model-group {
-  margin-bottom: var(--space-md);
-}
-
-.model-group-header {
-  display: flex;
-  align-items: center;
-  gap: var(--space-sm);
-  padding: var(--space-xs) 0;
-  margin-bottom: var(--space-xs);
-}
-
-.model-group-header .status-dot {
-  width: 8px;
-  height: 8px;
-  border-radius: 50%;
-  background: var(--text-muted);
-}
-
-.model-group-header .status-dot.cached {
-  background: var(--success);
-}
-
-.model-group-header .status-dot.loaded {
-  background: var(--success);
-}
-
-.model-group-header span {
-  font-size: 11px;
-  font-weight: 500;
-  text-transform: uppercase;
-  letter-spacing: 0.5px;
-  color: var(--text-muted);
-}
-
-.model-item {
-  display: flex;
-  align-items: center;
-  padding: var(--space-sm) var(--space-md);
-  background: var(--bg-tertiary);
-  border: 1px solid var(--border-subtle);
-  border-radius: var(--radius-md);
-  cursor: pointer;
-  transition: all var(--transition-fast);
-  gap: var(--space-sm);
-}
-
-.model-item:hover {
-  background: var(--bg-hover);
-  border-color: var(--border-color);
-}
-
-.model-item.active {
-  background: rgba(16, 185, 129, 0.15);
-  border-color: var(--success);
-}
-
-.model-item.active .model-name,
-.model-item.active .model-size {
-  color: var(--text-primary);
-}
-
-.model-item.loading {
-  pointer-events: none;
-  opacity: 0.7;
-}
-
-.model-icon {
-  width: 32px;
-  height: 32px;
-  border-radius: var(--radius-sm);
-  background: var(--bg-primary);
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  flex-shrink: 0;
-}
-
-.model-icon svg {
-  width: 18px;
-  height: 18px;
-  color: var(--accent-secondary);
-}
-
-.model-info {
-  flex: 1;
-  min-width: 0;
-}
-
-.model-name {
-  font-weight: 500;
-  font-size: 13px;
-  color: var(--text-primary);
-  white-space: nowrap;
-  overflow: hidden;
-  text-overflow: ellipsis;
-}
-
-.model-size {
-  font-size: 11px;
-  color: var(--text-muted);
-}
-
-.model-status {
-  display: flex;
-  align-items: center;
-  gap: var(--space-xs);
-}
-
-.status-indicator {
-  width: 8px;
-  height: 8px;
-  border-radius: 50%;
-  background: var(--text-muted);
-}
-
-.status-indicator.cached {
-  background: var(--warning);
-}
-
-.status-indicator.loaded {
-  background: var(--success);
-}
-
-.status-indicator.loading {
-  background: var(--warning);
-  animation: pulse 1.5s ease-in-out infinite;
-}
-
-@keyframes pulse {
-  0%, 100% { opacity: 1; }
-  50% { opacity: 0.5; }
-}
-
-.download-btn,
-.load-btn {
-  padding: var(--space-xs) var(--space-sm);
-  font-size: 11px;
-  font-weight: 500;
-  background: var(--accent-primary);
-  color: white;
-  border: none;
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  transition: all var(--transition-fast);
-}
-
-.download-btn:hover,
-.load-btn:hover {
-  background: var(--accent-secondary);
-}
-
-.unload-btn {
-  padding: var(--space-xs) var(--space-sm);
-  font-size: 11px;
-  font-weight: 500;
-  background: rgba(239, 68, 68, 0.15);
-  color: var(--error);
-  border: none;
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  transition: all var(--transition-fast);
-}
-
-.unload-btn:hover {
-  background: var(--error);
-  color: white;
-}
-
-.delete-model-btn {
-  width: 24px;
-  height: 24px;
-  font-size: 12px;
-  background: transparent;
-  color: var(--text-muted);
-  border: none;
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  transition: all var(--transition-fast);
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  opacity: 0;
-}
-
-.model-item:hover .delete-model-btn {
-  opacity: 1;
-}
-
-.delete-model-btn:hover {
-  color: var(--error);
-}
-
-/* Loading Spinner */
-.loading-spinner {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  justify-content: center;
-  padding: var(--space-xl);
-  gap: var(--space-md);
-  color: var(--text-muted);
-}
-
-.spinner {
-  width: 24px;
-  height: 24px;
-  border: 2px solid var(--border-color);
-  border-top-color: var(--accent-primary);
-  border-radius: 50%;
-  animation: spin 0.8s linear infinite;
-}
-
-/* =====================================================
-   Chat Area
-   ===================================================== */
-
-.chat-area {
-  flex: 1;
-  display: flex;
-  flex-direction: column;
-  min-width: 0;
-  background: var(--bg-primary);
-}
-
-.chat-header {
-  height: var(--header-height);
-  padding: 0 var(--space-lg);
-  padding-top: 20px; /* Account for macOS title bar */
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  border-bottom: 1px solid var(--border-color);
-  background: var(--bg-secondary);
-  -webkit-app-region: drag;
-}
-
-.mobile-menu-btn {
-  display: none;
-  background: transparent;
-  border: none;
-  color: var(--text-secondary);
-  cursor: pointer;
-  padding: var(--space-xs);
-  border-radius: var(--radius-sm);
-  -webkit-app-region: no-drag;
-}
-
-.chat-title {
-  display: flex;
-  align-items: center;
-  gap: var(--space-md);
-}
-
-.chat-title h1 {
-  font-size: 18px;
-  font-weight: 600;
-}
-
-.model-badge {
-  font-size: 12px;
-  padding: var(--space-xs) var(--space-sm);
-  background: var(--bg-tertiary);
-  border-radius: var(--radius-full);
-  color: var(--text-secondary);
-}
-
-.new-chat-btn {
-  background: transparent;
-  border: none;
-  color: var(--text-secondary);
-  cursor: pointer;
-  padding: var(--space-sm);
-  border-radius: var(--radius-sm);
-  transition: all var(--transition-fast);
-  -webkit-app-region: no-drag;
-}
-
-.new-chat-btn:hover {
-  background: var(--bg-hover);
-  color: var(--text-primary);
-}
-
-/* Chat Messages */
-.chat-messages {
-  flex: 1;
-  overflow-y: auto;
-  padding: var(--space-lg);
-  display: flex;
-  flex-direction: column;
-  gap: var(--space-lg);
-}
-
-/* Welcome Message */
-.welcome-message {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  justify-content: center;
-  text-align: center;
-  padding: var(--space-xl);
-  margin: auto;
-  max-width: 500px;
-}
-
-.welcome-icon {
-  width: 80px;
-  height: 80px;
-  background: var(--accent-gradient);
-  border-radius: var(--radius-lg);
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  margin-bottom: var(--space-lg);
-  color: white;
-}
-
-.welcome-message h2 {
-  font-size: 24px;
-  font-weight: 600;
-  margin-bottom: var(--space-sm);
-}
-
-.welcome-message p {
-  color: var(--text-secondary);
-  margin-bottom: var(--space-lg);
-}
-
-.feature-highlights {
-  display: flex;
-  gap: var(--space-lg);
-  flex-wrap: wrap;
-  justify-content: center;
-}
-
-.feature {
-  display: flex;
-  align-items: center;
-  gap: var(--space-sm);
-  color: var(--text-secondary);
-  font-size: 13px;
-}
-
-.feature svg {
-  color: var(--accent-secondary);
-}
-
-/* Message Bubbles */
-.message {
-  display: flex;
-  gap: var(--space-md);
-  max-width: 85%;
-  animation: fadeIn 0.3s ease;
-}
-
-@keyframes fadeIn {
-  from {
-    opacity: 0;
-    transform: translateY(10px);
-  }
-  to {
-    opacity: 1;
-    transform: translateY(0);
-  }
-}
-
-.message.user {
-  margin-left: auto;
-  flex-direction: row-reverse;
-}
-
-.message-avatar {
-  width: 36px;
-  height: 36px;
-  border-radius: var(--radius-md);
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  flex-shrink: 0;
-  font-weight: 600;
-  font-size: 14px;
-}
-
-.message.user .message-avatar {
-  background: var(--accent-gradient);
-  color: white;
-}
-
-.message.assistant .message-avatar {
-  background: var(--bg-tertiary);
-  color: var(--accent-secondary);
-}
-
-.message-content {
-  display: flex;
-  flex-direction: column;
-  gap: var(--space-xs);
-}
-
-.message-bubble {
-  padding: var(--space-md);
-  border-radius: var(--radius-lg);
-  line-height: 1.6;
-}
-
-.message.user .message-bubble {
-  background: var(--accent-gradient);
-  color: white;
-  border-bottom-right-radius: var(--radius-sm);
-}
-
-.message.assistant .message-bubble {
-  background: var(--bg-secondary);
-  border: 1px solid var(--border-color);
-  border-bottom-left-radius: var(--radius-sm);
-}
-
-/* Message Stats */
-.message-stats {
-  display: flex;
-  gap: var(--space-md);
-  font-size: 11px;
-  color: var(--text-muted);
-  padding: 0 var(--space-sm);
-}
-
-.stat-item {
-  display: flex;
-  align-items: center;
-  gap: var(--space-xs);
-}
-
-/* Code Blocks */
-.message-bubble pre {
-  margin: var(--space-sm) 0;
-  padding: var(--space-md);
-  background: var(--bg-primary);
-  border-radius: var(--radius-md);
-  overflow-x: auto;
-  position: relative;
-}
-
-.message-bubble code {
-  font-family: var(--font-mono);
-  font-size: 13px;
-}
-
-.message-bubble pre code {
-  display: block;
-}
-
-.message-bubble :not(pre) > code {
-  background: var(--bg-tertiary);
-  padding: 2px 6px;
-  border-radius: var(--radius-sm);
-}
-
-.code-block-wrapper {
-  position: relative;
-  margin: var(--space-sm) 0;
-}
-
-.code-block-wrapper pre {
-  margin: 0;
-  border-radius: var(--radius-md);
-}
-
-.code-copy-btn {
-  position: absolute;
-  top: 8px;
-  right: 8px;
-  width: 28px;
-  height: 28px;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  background: rgba(30, 30, 50, 0.9);
-  border: 1px solid var(--border-color);
-  border-radius: var(--radius-sm);
-  color: var(--text-secondary);
-  cursor: pointer;
-  opacity: 0;
-  transition: all var(--transition-fast);
-  font-size: 14px;
-  line-height: 1;
-}
-
-.code-block-wrapper:hover .code-copy-btn {
-  opacity: 1;
-}
-
-.code-copy-btn:hover {
-  background: var(--bg-hover);
-  color: var(--text-primary);
-  border-color: var(--text-muted);
-}
-
-.code-copy-btn .copy-icon {
-  display: inline;
-}
-
-.code-copy-btn .check-icon {
-  display: none;
-}
-
-.code-copy-btn.copied {
-  border-color: var(--success);
-  background: rgba(34, 197, 94, 0.2);
-  color: var(--success);
-}
-
-.code-copy-btn.copied .copy-icon {
-  display: none;
-}
-
-.code-copy-btn.copied .check-icon {
-  display: inline;
-}
-
-/* Headings in messages */
-.message-bubble h2 {
-  font-size: 1.3em;
-  font-weight: 600;
-  margin: var(--space-md) 0 var(--space-sm) 0;
-  color: var(--text-primary);
-}
-
-.message-bubble h3 {
-  font-size: 1.15em;
-  font-weight: 600;
-  margin: var(--space-md) 0 var(--space-sm) 0;
-  color: var(--text-primary);
-}
-
-.message-bubble h4 {
-  font-size: 1.05em;
-  font-weight: 600;
-  margin: var(--space-sm) 0 var(--space-xs) 0;
-  color: var(--text-primary);
-}
-
-.message-bubble h2:first-child,
-.message-bubble h3:first-child,
-.message-bubble h4:first-child {
-  margin-top: 0;
-}
-
-/* Lists in messages */
-.message-bubble ul {
-  margin: var(--space-sm) 0;
-  padding-left: var(--space-lg);
-}
-
-.message-bubble li {
-  margin: var(--space-xs) 0;
-}
-
-/* Typing Indicator */
-.typing-indicator {
-  display: flex;
-  gap: 4px;
-  padding: var(--space-md);
-  background: var(--bg-secondary);
-  border: 1px solid var(--border-color);
-  border-radius: var(--radius-lg);
-  border-bottom-left-radius: var(--radius-sm);
-  width: fit-content;
-}
-
-.typing-indicator span {
-  width: 8px;
-  height: 8px;
-  background: var(--text-muted);
-  border-radius: 50%;
-  animation: typing 1.4s ease-in-out infinite;
-}
-
-.typing-indicator span:nth-child(2) {
-  animation-delay: 0.2s;
-}
-
-.typing-indicator span:nth-child(3) {
-  animation-delay: 0.4s;
-}
-
-@keyframes typing {
-  0%, 100% { transform: translateY(0); opacity: 0.5; }
-  50% { transform: translateY(-4px); opacity: 1; }
-}
-
-/* =====================================================
-   Chat Input
-   ===================================================== */
-
-.chat-input-container {
-  padding: var(--space-md) var(--space-lg) var(--space-lg);
-  background: var(--bg-primary);
-}
-
-.chat-input-form {
-  max-width: 900px;
-  margin: 0 auto;
-}
-
-.input-wrapper {
-  display: flex;
-  align-items: flex-end;
-  gap: var(--space-sm);
-  padding: var(--space-sm);
-  background: var(--bg-secondary);
-  border: 1px solid var(--border-color);
-  border-radius: var(--radius-lg);
-  transition: all var(--transition-fast);
-}
-
-.input-wrapper:focus-within {
-  border-color: var(--accent-primary);
-  box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
-}
-
-.input-wrapper textarea {
-  flex: 1;
-  background: transparent;
-  border: none;
-  outline: none;
-  color: var(--text-primary);
-  font-family: inherit;
-  font-size: 14px;
-  line-height: 1.5;
-  resize: none;
-  max-height: 150px;
-  padding: var(--space-sm);
-}
-
-.input-wrapper textarea::placeholder {
-  color: var(--text-muted);
-}
-
-.input-wrapper textarea:disabled {
-  cursor: not-allowed;
-}
-
-.send-btn {
-  width: 40px;
-  height: 40px;
-  background: var(--accent-gradient);
-  border: none;
-  border-radius: var(--radius-md);
-  color: white;
-  cursor: pointer;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  transition: all var(--transition-fast);
-  flex-shrink: 0;
-}
-
-.send-btn:hover:not(:disabled) {
-  transform: scale(1.05);
-}
-
-.send-btn:active:not(:disabled) {
-  transform: scale(0.95);
-}
-
-.send-btn:disabled {
-  opacity: 0.5;
-  cursor: not-allowed;
-}
-
-/* Record Button */
-.record-btn {
-  width: 40px;
-  height: 40px;
-  background: var(--bg-tertiary);
-  border: 1px solid var(--border-color);
-  border-radius: var(--radius-md);
-  cursor: pointer;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  transition: all var(--transition-fast);
-  flex-shrink: 0;
-  color: var(--text-secondary);
-}
-
-.record-btn:hover {
-  background: var(--bg-hover);
-  border-color: var(--text-muted);
-  color: var(--text-primary);
-}
-
-.record-btn .stop-icon {
-  display: none;
-}
-
-.record-btn.recording {
-  background: var(--error);
-  border-color: var(--error);
-  color: white;
-  animation: pulse-recording 1.5s ease-in-out infinite;
-}
-
-.record-btn.recording .mic-icon {
-  display: none;
-}
-
-.record-btn.recording .stop-icon {
-  display: block;
-}
-
-@keyframes pulse-recording {
-  0%, 100% { opacity: 1; }
-  50% { opacity: 0.7; }
-}
-
-.record-btn.transcribing {
-  pointer-events: none;
-  opacity: 0.7;
-}
-
-.input-hint {
-  text-align: center;
-  font-size: 11px;
-  color: var(--text-muted);
-  margin-top: var(--space-sm);
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  gap: var(--space-xs);
-}
-
-.hint-separator {
-  opacity: 0.5;
-}
-
-.transcription-settings-link {
-  background: none;
-  border: none;
-  color: var(--text-muted);
-  font-size: 11px;
-  cursor: pointer;
-  padding: 0;
-  transition: color var(--transition-fast);
-}
-
-.transcription-settings-link:hover {
-  color: var(--accent-secondary);
-  text-decoration: underline;
-}
-
-/* Context Usage Indicator */
-.context-usage {
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  gap: var(--space-sm);
-  margin-top: var(--space-xs);
-}
-
-.context-label-text {
-  font-size: 11px;
-  color: var(--text-muted);
-}
-
-.context-bar {
-  width: 100px;
-  height: 4px;
-  background: var(--bg-tertiary);
-  border-radius: var(--radius-full);
-  overflow: hidden;
-}
-
-.context-fill {
-  height: 100%;
-  width: 0%;
-  background: var(--success);
-  border-radius: var(--radius-full);
-  transition: width 0.3s ease, background 0.3s ease;
-}
-
-.context-fill.warning {
-  background: var(--warning);
-}
-
-.context-fill.danger {
-  background: var(--error);
-}
-
-.context-label {
-  font-size: 11px;
-  color: var(--text-muted);
-  min-width: 28px;
-}
-
-.input-hint kbd {
-  padding: 2px 6px;
-  background: var(--bg-secondary);
-  border: 1px solid var(--border-color);
-  border-radius: var(--radius-sm);
-  font-family: var(--font-mono);
-  font-size: 10px;
-}
-
-/* =====================================================
-   Toast Notifications
-   ===================================================== */
-
-.toast-container {
-  position: fixed;
-  top: var(--space-lg);
-  right: var(--space-lg);
-  display: flex;
-  flex-direction: column;
-  gap: var(--space-sm);
-  z-index: 1000;
-}
-
-.toast {
-  padding: var(--space-md) var(--space-lg);
-  background: var(--bg-secondary);
-  border: 1px solid var(--border-color);
-  border-radius: var(--radius-md);
-  box-shadow: var(--shadow-lg);
-  display: flex;
-  align-items: center;
-  gap: var(--space-sm);
-  animation: slideIn 0.3s ease;
-  max-width: 350px;
-}
-
-@keyframes slideIn {
-  from {
-    opacity: 0;
-    transform: translateX(100%);
-  }
-  to {
-    opacity: 1;
-    transform: translateX(0);
-  }
-}
-
-.toast.success {
-  border-left: 3px solid var(--success);
-}
-
-.toast.error {
-  border-left: 3px solid var(--error);
-}
-
-.toast.warning {
-  border-left: 3px solid var(--warning);
-}
-
-/* =====================================================
-   Scrollbar
-   ===================================================== */
-
-::-webkit-scrollbar {
-  width: 8px;
-  height: 8px;
-}
-
-::-webkit-scrollbar-track {
-  background: transparent;
-}
-
-::-webkit-scrollbar-thumb {
-  background: var(--border-color);
-  border-radius: var(--radius-full);
-}
-
-::-webkit-scrollbar-thumb:hover {
-  background: var(--text-muted);
-}
-
-/* =====================================================
-   Responsive
-   ===================================================== */
-
-@media (max-width: 768px) {
-  .sidebar {
-    position: fixed;
-    left: 0;
-    top: 0;
-    bottom: 0;
-    transform: translateX(-100%);
-    box-shadow: var(--shadow-lg);
-  }
-  
-  .sidebar.open {
-    transform: translateX(0);
-  }
-  
-  .mobile-menu-btn {
-    display: block;
-  }
-  
-  .message {
-    max-width: 95%;
-  }
-  
-  .feature-highlights {
-    flex-direction: column;
-    align-items: center;
-  }
-}
-
-/* =====================================================
-   Syntax Highlighting (Basic)
-   ===================================================== */
-
-.hljs-keyword,
-.hljs-selector-tag,
-.hljs-built_in {
-  color: #c792ea;
-}
-
-.hljs-string,
-.hljs-attr {
-  color: #c3e88d;
-}
-
-.hljs-number,
-.hljs-literal {
-  color: #f78c6c;
-}
-
-.hljs-comment {
-  color: #546e7a;
-  font-style: italic;
-}
-
-.hljs-function,
-.hljs-title {
-  color: #82aaff;
-}
-
-.hljs-variable,
-.hljs-params {
-  color: #f07178;
-}
-
-/* =====================================================
-   Modal
-   ===================================================== */
-
-.modal-overlay {
-  position: fixed;
-  top: 0;
-  left: 0;
-  right: 0;
-  bottom: 0;
-  background: rgba(0, 0, 0, 0.7);
-  display: none;
-  align-items: center;
-  justify-content: center;
-  z-index: 2000;
-}
-
-.modal-overlay.visible {
-  display: flex;
-}
-
-.modal {
-  background: var(--bg-secondary);
-  border: 1px solid var(--border-color);
-  border-radius: var(--radius-lg);
-  padding: var(--space-lg);
-  max-width: 400px;
-  width: 90%;
-  box-shadow: var(--shadow-lg);
-}
-
-.modal h3 {
-  margin-bottom: var(--space-sm);
-  font-size: 18px;
-}
-
-.modal p {
-  color: var(--text-secondary);
-  margin-bottom: var(--space-md);
-  font-size: 14px;
-}
-
-.whisper-models {
-  display: flex;
-  flex-direction: column;
-  gap: var(--space-sm);
-  margin-bottom: var(--space-lg);
-  max-height: 200px;
-  overflow-y: auto;
-}
-
-.whisper-model-item {
-  display: flex;
-  align-items: center;
-  justify-content: space-between;
-  padding: var(--space-sm) var(--space-md);
-  background: var(--bg-tertiary);
-  border: 1px solid var(--border-subtle);
-  border-radius: var(--radius-md);
-}
-
-.whisper-model-item.selected {
-  border-color: var(--success);
-  background: rgba(34, 197, 94, 0.1);
-}
-
-.whisper-model-item .model-info {
-  display: flex;
-  flex-direction: column;
-}
-
-.whisper-model-item .model-name {
-  font-weight: 500;
-  font-size: 13px;
-}
-
-.whisper-model-item .model-size {
-  font-size: 11px;
-  color: var(--text-muted);
-}
-
-.whisper-model-item .model-actions {
-  display: flex;
-  align-items: center;
-  gap: var(--space-xs);
-}
-
-.whisper-model-item .download-btn,
-.whisper-model-item .use-btn {
-  padding: var(--space-xs) var(--space-sm);
-  font-size: 11px;
-  font-weight: 500;
-  border: none;
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  transition: all var(--transition-fast);
-}
-
-.whisper-model-item .download-btn {
-  background: var(--accent-primary);
-  color: white;
-}
-
-.whisper-model-item .download-btn:hover {
-  background: var(--accent-secondary);
-}
-
-.whisper-model-item .use-btn {
-  background: var(--success);
-  color: white;
-}
-
-.whisper-model-item .use-btn:hover {
-  opacity: 0.9;
-}
-
-.whisper-model-item .delete-btn {
-  padding: var(--space-xs);
-  font-size: 12px;
-  background: transparent;
-  color: var(--text-muted);
-  border: none;
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  transition: all var(--transition-fast);
-}
-
-.whisper-model-item .delete-btn:hover {
-  color: var(--error);
-}
-
-.current-whisper-model {
-  padding: var(--space-sm) var(--space-md);
-  background: var(--bg-tertiary);
-  border-radius: var(--radius-md);
-  margin-bottom: var(--space-md);
-  display: flex;
-  align-items: center;
-  gap: var(--space-sm);
-}
-
-.current-whisper-model .label {
-  font-size: 12px;
-  color: var(--text-secondary);
-}
-
-.current-whisper-model .model-name {
-  font-weight: 500;
-  color: var(--accent-secondary);
-}
-
-.modal-actions {
-  display: flex;
-  justify-content: flex-end;
-  gap: var(--space-sm);
-}
-
-.modal-btn {
-  padding: var(--space-sm) var(--space-md);
-  font-size: 13px;
-  font-weight: 500;
-  border: none;
-  border-radius: var(--radius-sm);
-  cursor: pointer;
-  transition: all var(--transition-fast);
-}
-
-.modal-btn.secondary {
-  background: var(--bg-tertiary);
-  color: var(--text-secondary);
-}
-
-.modal-btn.secondary:hover {
-  background: var(--bg-hover);
-  color: var(--text-primary);
-}
diff --git a/samples/js/embeddings/README.md b/samples/js/embeddings/README.md
new file mode 100644
index 000000000..372405268
--- /dev/null
+++ b/samples/js/embeddings/README.md
@@ -0,0 +1,52 @@
+# Embeddings
+
+Generates **single** and **batch** text embeddings using native in-process inference with the
+Foundry Local JS SDK.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- [Node.js](https://nodejs.org/) v18 or later
+
+## Install
+
+This sample consumes the JS SDK **directly from local source** (`sdk/js`) so it always tracks
+`main` rather than a published npm version. It is **not** pinned to a registry release. The
+dependency in `package.json` is:
+
+```json
+"foundry-local-sdk": "file:../../../sdk/js"
+```
+
+Install dependencies:
+
+```bash
+npm install
+```
+
+> **Building the SDK:** `npm install` resolves `foundry-local-sdk` from `sdk/js`. The SDK ships a
+> prebuilt `dist/` and downloads its native runtime on install. If the local SDK has not been built
+> (or you've changed its source), build it first:
+>
+> ```bash
+> cd ../../../sdk/js
+> npm install
+> npm run build          # compile TypeScript -> dist/
+> npm run build:native   # (re)build the native addon if needed
+> ```
+
+## Run
+
+```bash
+npm start
+# or
+node app.js
+```
+
+## What it does
+
+1. Initializes the SDK and loads the `qwen3-embedding-0.6b` embedding model.
+2. Creates an embedding client.
+3. Generates a **single** embedding and prints its dimensionality and first few values.
+4. Generates a **batch** of embeddings for multiple inputs.
+5. Unloads the model.
diff --git a/samples/js/embeddings/package.json b/samples/js/embeddings/package.json
index 8353cb655..99b79c011 100644
--- a/samples/js/embeddings/package.json
+++ b/samples/js/embeddings/package.json
@@ -7,9 +7,6 @@
     "start": "node app.js"
   },
   "dependencies": {
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
+    "foundry-local-sdk": "file:../../../sdk/js"
   }
 }
diff --git a/samples/js/langchain-integration-example/app.js b/samples/js/langchain-integration-example/app.js
deleted file mode 100644
index 8b2e74e39..000000000
--- a/samples/js/langchain-integration-example/app.js
+++ /dev/null
@@ -1,105 +0,0 @@
-// <complete_code>
-// <imports>
-import { ChatOpenAI } from "@langchain/openai";
-import { ChatPromptTemplate } from "@langchain/core/prompts";
-import { FoundryLocalManager } from 'foundry-local-sdk';
-// </imports>
-
-// Initialize the Foundry Local SDK
-console.log('Initializing Foundry Local SDK...');
-
-const endpointUrl = 'http://localhost:5764';
-
-// <init>
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info',
-    webServiceUrls: endpointUrl
-});
-// </init>
-console.log('✓ SDK initialized successfully');
-
-// Download and register all execution providers.
-let currentEp = '';
-await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-});
-if (currentEp !== '') process.stdout.write('\n');
-
-// Get the model object
-const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above
-const model = await manager.catalog.getModel(modelAlias);
-
-// Download the model
-console.log(`\nDownloading model ${modelAlias}...`);
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-});
-console.log('\n✓ Model downloaded');
-
-// Load the model
-console.log(`\nLoading model ${modelAlias}...`);
-await model.load();
-console.log('✓ Model loaded');
-
-// Start the web service
-console.log('\nStarting web service...');
-manager.startWebService();
-console.log('✓ Web service started');
-
-// <langchain_setup>
-
-// Configure ChatOpenAI to use your locally-running model
-const llm = new ChatOpenAI({
-    model: model.id,
-    configuration: {
-        baseURL: endpointUrl + '/v1',
-        apiKey: 'notneeded'
-    },
-    temperature: 0.6,
-    streaming: false
-});
-
-// Create a translation prompt template
-const prompt = ChatPromptTemplate.fromMessages([
-    {
-        role: "system",
-        content: "You are a helpful assistant that translates {input_language} to {output_language}."
-    },
-    {
-        role: "user",
-        content: "{input}"
-    }
-]);
-
-// Build a simple chain by connecting the prompt to the language model
-const chain = prompt.pipe(llm);
-// </langchain_setup>
-
-// <chat_completion>
-const input = "I love to code.";
-console.log(`Translating '${input}' to French...`);
-
-// Run the chain with your inputs
-await chain.invoke({
-    input_language: "English",
-    output_language: "French",
-    input: input
-}).then(aiMsg => {
-    // Print the result content
-    console.log(`Response: ${aiMsg.content}`);
-}).catch(err => {
-    console.error("Error:", err);
-});
-// </chat_completion>
-
-// Tidy up
-console.log('Unloading model and stopping web service...');
-await model.unload();
-manager.stopWebService();
-console.log(`✓ Model unloaded and web service stopped`);
-// </complete_code>
\ No newline at end of file
diff --git a/samples/js/langchain-integration-example/package.json b/samples/js/langchain-integration-example/package.json
deleted file mode 100644
index bb5fb635e..000000000
--- a/samples/js/langchain-integration-example/package.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-  "name": "langchain-integration-example",
-  "version": "1.0.0",
-  "type": "module",
-  "main": "app.js",
-  "scripts": {
-    "start": "node app.js"
-  },
-  "dependencies": {
-    "@langchain/core": "latest",
-    "@langchain/openai": "latest",
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/live-audio-transcription/README.md b/samples/js/live-audio-transcription/README.md
deleted file mode 100644
index 88bede37d..000000000
--- a/samples/js/live-audio-transcription/README.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Live Audio Transcription Example
-
-Real-time microphone-to-text transcription using the Foundry Local JS SDK with Nemotron ASR.
-
-## Prerequisites
-
-- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
-- Node.js 18+
-- A microphone (optional — falls back to synthetic audio)
-
-## Setup
-
-```bash
-npm install
-```
-
-> **Note:** `naudiodon2` is optional — provides cross-platform microphone capture. Without it, the example falls back to synthetic audio for testing.
-
-## Run
-
-```bash
-node app.js
-```
-
-Speak into your microphone. Transcription appears in real-time. Press `Ctrl+C` to stop.
-
-## How it works
-
-1. Initializes the Foundry Local SDK and loads the Nemotron ASR model
-2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
-3. Captures microphone audio via `naudiodon2` (or generates synthetic audio as fallback)
-4. Pushes PCM chunks to the SDK via `session.append()`
-5. Reads transcription results via `for await (const result of session.getStream())`
-6. Access text via `result.content[0].text` (OpenAI Realtime ConversationItem pattern)
-
-## API
-
-```javascript
-const audioClient = model.createAudioClient();
-const session = audioClient.createLiveTranscriptionSession();
-session.settings.sampleRate = 16000;
-session.settings.channels = 1;
-session.settings.language = 'en';
-
-await session.start();
-
-// Push audio
-await session.append(pcmBytes);
-
-// Read results
-for await (const result of session.getStream()) {
-    console.log(result.content[0].text);       // transcribed text
-    console.log(result.content[0].transcript); // alias (OpenAI compat)
-    console.log(result.is_final);              // true for final results
-}
-
-await session.stop();
-```
diff --git a/samples/js/live-audio-transcription/app.js b/samples/js/live-audio-transcription/app.js
deleted file mode 100644
index e7672cdd2..000000000
--- a/samples/js/live-audio-transcription/app.js
+++ /dev/null
@@ -1,203 +0,0 @@
-// Live Audio Transcription Example — Foundry Local JS SDK
-//
-// Demonstrates real-time microphone-to-text using the JS SDK.
-// Requires: npm install foundry-local-sdk naudiodon2
-//
-// Usage: node app.js
-
-import { FoundryLocalManager } from 'foundry-local-sdk';
-
-console.log('╔══════════════════════════════════════════════════════════╗');
-console.log('║   Foundry Local — Live Audio Transcription (JS SDK)      ║');
-console.log('╚══════════════════════════════════════════════════════════╝');
-console.log();
-
-// Initialize the Foundry Local SDK
-console.log('Initializing Foundry Local SDK...');
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info'
-});
-console.log('✓ SDK initialized');
-
-// Get and load the nemotron model
-// English-only:
-const modelAlias = 'nemotron-speech-streaming-en-0.6b';
-// Multi-lingual (supports 30+ languages including auto-detect):
-// const modelAlias = 'nemotron-3.5-asr-streaming-0.6b';
-let model = await manager.catalog.getModel(modelAlias);
-if (!model) {
-    console.error(`ERROR: Model "${modelAlias}" not found in catalog.`);
-    process.exit(1);
-}
-
-console.log(`Found model: ${model.id}`);
-console.log('Downloading model (if needed)...');
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-});
-console.log('\n✓ Model downloaded');
-
-console.log('Loading model...');
-await model.load();
-console.log('✓ Model loaded');
-
-// Create live transcription session (same pattern as C# sample).
-const audioClient = model.createAudioClient();
-const session = audioClient.createLiveTranscriptionSession();
-
-session.settings.sampleRate = 16000;  // Default is 16000; shown here for clarity
-session.settings.channels = 1;
-session.settings.bitsPerSample = 16;
-session.settings.language = 'en';                  // English (default)
-// Multi-lingual examples:
-// session.settings.language = 'de';     // German
-// session.settings.language = 'zh-CN';  // Chinese (Simplified)
-// session.settings.language = 'auto';   // Auto-detect language
-
-console.log('Starting streaming session...');
-await session.start();
-console.log('✓ Session started');
-
-// Read transcription results in background
-const readPromise = (async () => {
-    try {
-        for await (const result of session.getStream()) {
-            const text = result.content?.[0]?.text;
-            if (!text) continue;
-
-            // `is_final` is a transcript-state marker only. It should not stop the app.
-            if (result.is_final) {
-                process.stdout.write(`\n  [FINAL] ${text}\n`);
-            } else {
-                process.stdout.write(text);
-            }
-        }
-    } catch (err) {
-        if (err.name !== 'AbortError') {
-            console.error('Stream error:', err.message);
-        }
-    }
-})();
-
-// --- Microphone capture ---
-// This example uses naudiodon2 for cross-platform audio capture.
-// Install with: npm install naudiodon2
-//
-// If you prefer a different audio library, just push PCM bytes
-// (16-bit signed LE, mono, 16kHz) via session.append().
-
-let audioInput;
-try {
-    const { default: portAudio } = await import('naudiodon2');
-
-    audioInput = portAudio.AudioIO({
-        inOptions: {
-            channelCount: session.settings.channels,
-            sampleFormat: session.settings.bitsPerSample === 16
-                ? portAudio.SampleFormat16Bit
-                : portAudio.SampleFormat32Bit,
-            sampleRate: session.settings.sampleRate,
-            // Larger chunk size lowers callback frequency and reduces overflow risk.
-            framesPerBuffer: 3200,
-            // Allow deeper native queue during occasional event-loop stalls.
-            maxQueue: 64
-        }
-    });
-
-    const appendQueue = [];
-    let pumping = false;
-    let warnedQueueDrop = false;
-
-    const pumpAudio = async () => {
-        if (pumping) return;
-        pumping = true;
-        try {
-            while (appendQueue.length > 0) {
-                const pcm = appendQueue.shift();
-                await session.append(pcm);
-            }
-        } catch (err) {
-            console.error('append error:', err.message);
-        } finally {
-            pumping = false;
-            // Handle race where new data arrived after loop exit.
-            if (appendQueue.length > 0) {
-                void pumpAudio();
-            }
-        }
-    };
-
-    audioInput.on('data', (buffer) => {
-        // Single copy: slice the underlying ArrayBuffer to get an independent Uint8Array.
-        const copy = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength).slice();
-
-        // Keep a bounded queue to avoid unbounded memory growth.
-        if (appendQueue.length >= 100) {
-            appendQueue.shift();
-            if (!warnedQueueDrop) {
-                warnedQueueDrop = true;
-                console.warn('Audio append queue overflow; dropping oldest chunk to keep stream alive.');
-            }
-        }
-
-        appendQueue.push(copy);
-        void pumpAudio();
-    });
-
-    console.log();
-    console.log('════════════════════════════════════════════════════════════');
-    console.log('  LIVE TRANSCRIPTION ACTIVE');
-    console.log('  Speak into your microphone.');
-    console.log('  Press Ctrl+C to stop.');
-    console.log('════════════════════════════════════════════════════════════');
-    console.log();
-
-    audioInput.start();
-} catch (err) {
-    console.warn('⚠ Could not initialize microphone (naudiodon2 may not be installed).');
-    console.warn('  Install with: npm install naudiodon2');
-    console.warn('  Falling back to synthetic audio test...');
-    console.warn();
-
-    // Fallback: push 2 seconds of synthetic PCM (440Hz sine wave)
-    const sampleRate = session.settings.sampleRate;
-    const duration = 2;
-    const totalSamples = sampleRate * duration;
-    const pcmBytes = new Uint8Array(totalSamples * 2);
-    for (let i = 0; i < totalSamples; i++) {
-        const t = i / sampleRate;
-        const sample = Math.round(32767 * 0.5 * Math.sin(2 * Math.PI * 440 * t));
-        pcmBytes[i * 2] = sample & 0xFF;
-        pcmBytes[i * 2 + 1] = (sample >> 8) & 0xFF;
-    }
-
-    // Push in 100ms chunks
-    const chunkSize = (sampleRate / 10) * 2;
-    for (let offset = 0; offset < pcmBytes.length; offset += chunkSize) {
-        const len = Math.min(chunkSize, pcmBytes.length - offset);
-        await session.append(pcmBytes.slice(offset, offset + len));
-    }
-
-    console.log('✓ Synthetic audio pushed');
-    console.log('Waiting briefly for final transcription results...');
-    await new Promise((resolve) => setTimeout(resolve, 3000));
-    await session.stop();
-    await readPromise;
-    await model.unload();
-    console.log('✓ Done');
-    process.exit(0);
-}
-
-// Handle graceful shutdown
-process.on('SIGINT', async () => {
-    console.log('\n\nStopping...');
-    if (audioInput) {
-        audioInput.quit();
-    }
-    await session.stop();
-    await readPromise;
-    await model.unload();
-    console.log('✓ Done');
-    process.exit(0);
-});
diff --git a/samples/js/native-chat-completions/package.json b/samples/js/native-chat-completions/package.json
deleted file mode 100644
index eeba0acd1..000000000
--- a/samples/js/native-chat-completions/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "native-chat-completions",
-  "version": "1.0.0",
-  "type": "module",
-  "main": "app.js",
-  "scripts": {
-    "start": "node app.js"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/responses-api/README.md b/samples/js/responses-api/README.md
new file mode 100644
index 000000000..bdedb16c6
--- /dev/null
+++ b/samples/js/responses-api/README.md
@@ -0,0 +1,63 @@
+# Web Server Responses — Vision Example
+
+Streams a **vision (image understanding)** response from the Foundry Local **local web server**
+using the OpenAI-compatible **Responses API** (`/v1/responses`). A bundled `test_image.jpg` is
+used by default.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- [Node.js](https://nodejs.org/) v18 or later
+
+## Install
+
+This sample consumes the JS SDK **directly from local source** (`sdk/js`) so it always tracks
+`main` rather than a published npm version. It is **not** pinned to a registry release. The
+dependency in `package.json` is:
+
+```json
+"foundry-local-sdk": "file:../../../sdk/js"
+```
+
+Install dependencies:
+
+```bash
+npm install
+```
+
+> **Building the SDK:** `npm install` resolves `foundry-local-sdk` from `sdk/js`. The SDK ships a
+> prebuilt `dist/` and downloads its native runtime on install. If the local SDK has not been built
+> (or you've changed its source), build it first:
+>
+> ```bash
+> cd ../../../sdk/js
+> npm install
+> npm run build          # compile TypeScript -> dist/
+> npm run build:native   # (re)build the native addon if needed
+> ```
+
+## Run
+
+```bash
+# Use a vision model alias with the bundled test image
+node app.js qwen3.5-0.8b
+
+# Use a specific image
+node app.js qwen3.5-0.8b ./my-image.jpg
+
+# Use a specific variant id
+node app.js Qwen2.5-VL-7B-Instruct-generic-cpu
+
+# List vision models (and variants) available in the catalog
+node app.js --list-models
+```
+
+## What it does
+
+1. Initializes the SDK (with `webServiceUrls` so the local web server has a known endpoint).
+2. Downloads and registers execution providers.
+3. Resolves the requested vision model (by alias or variant id), downloads, and loads it.
+4. Starts the local web service.
+5. Base64-encodes the image and POSTs it to `/v1/responses` with `input_text` + `input_image`
+   content parts, streaming the assistant's description back via Server-Sent Events.
+6. Unloads the model and stops the web service.
diff --git a/samples/js/web-server-responses-vision-example/app.js b/samples/js/responses-api/app.js
similarity index 100%
rename from samples/js/web-server-responses-vision-example/app.js
rename to samples/js/responses-api/app.js
diff --git a/samples/js/web-server-responses-vision-example/package.json b/samples/js/responses-api/package.json
similarity index 62%
rename from samples/js/web-server-responses-vision-example/package.json
rename to samples/js/responses-api/package.json
index e02dde17e..171158365 100644
--- a/samples/js/web-server-responses-vision-example/package.json
+++ b/samples/js/responses-api/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "web-server-responses-vision-example",
+  "name": "responses-api",
   "version": "1.0.0",
   "type": "module",
   "main": "app.js",
@@ -7,6 +7,6 @@
     "start": "node app.js"
   },
   "dependencies": {
-    "foundry-local-sdk": "latest"
+    "foundry-local-sdk": "file:../../../sdk/js"
   }
 }
diff --git a/samples/python/web-server-responses-vision/src/test_image.jpg b/samples/js/responses-api/test_image.jpg
similarity index 100%
rename from samples/python/web-server-responses-vision/src/test_image.jpg
rename to samples/js/responses-api/test_image.jpg
diff --git a/samples/js/tool-calling-foundry-local/.npmrc b/samples/js/tool-calling-foundry-local/.npmrc
deleted file mode 100644
index 114ea2a42..000000000
--- a/samples/js/tool-calling-foundry-local/.npmrc
+++ /dev/null
@@ -1,2 +0,0 @@
-registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/
-always-auth=true
diff --git a/samples/js/tool-calling-foundry-local/package.json b/samples/js/tool-calling-foundry-local/package.json
deleted file mode 100644
index 6ae9c0322..000000000
--- a/samples/js/tool-calling-foundry-local/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "tool-calling-foundry-local",
-  "version": "1.0.0",
-  "type": "module",
-  "scripts": {
-    "start": "node src/app.js"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest",
-    "openai": "^6.25.0"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/tool-calling-foundry-local/src/app.js b/samples/js/tool-calling-foundry-local/src/app.js
deleted file mode 100644
index cb06466b2..000000000
--- a/samples/js/tool-calling-foundry-local/src/app.js
+++ /dev/null
@@ -1,206 +0,0 @@
-// <complete_code>
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <imports>
-import { OpenAI } from "openai";
-import { FoundryLocalManager } from "foundry-local-sdk";
-// </imports>
-
-// By using an alias, the most suitable model will be downloaded 
-// to your end-user's device.
-// TIP: You can find a list of available models by running the 
-// following command in your terminal: `foundry model list`.
-const alias = "qwen2.5-0.5b";
-
-// <tool_definitions>
-function multiplyNumbers(first, second) {
-  return first * second;
-}
-// </tool_definitions>
-
-async function runToolCallingExample() {
-  let manager = null;
-  let model = null;
-
-  try {
-    // <init>
-    console.log("Initializing Foundry Local SDK...");
-    manager = FoundryLocalManager.create({
-      appName: "foundry_local_samples",
-      serviceEndpoint: "http://localhost:5000",
-      logLevel: "info"
-    });
-    // </init>
-
-    // Download and register all execution providers.
-    let currentEp = '';
-    await manager.downloadAndRegisterEps((epName, percent) => {
-      if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-      }
-      process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-    });
-    if (currentEp !== '') process.stdout.write('\n');
-
-    // <model_setup>
-    const catalog = manager.catalog;
-    model = await catalog.getModel(alias);
-    if (!model) {
-      throw new Error(`Model ${alias} not found`);
-    }
-
-    console.log(`Loading model ${model.id}...`);
-    await model.download();
-    await model.load();
-    console.log('✓ Model loaded');
-
-    manager.startWebService();
-    const endpoint = manager.urls[0];
-    if (!endpoint) {
-      throw new Error("Foundry Local web service did not return an endpoint.");
-    }
-
-    const openai = new OpenAI({
-      baseURL: `${endpoint.replace(/\/$/, "")}/v1`,
-      apiKey: "local"
-    });
-    // </model_setup>
-
-    // <tool_loop>
-    // Prepare messages
-    const messages = [
-      {
-        role: "system",
-        content: "You are a helpful AI assistant. If necessary, you can use any provided tools to answer the question."
-      },
-      { role: "user", content: "What is the answer to 7 multiplied by 6?" }
-    ];
-
-    // Prepare tools
-    const tools = [
-      {
-        type: "function",
-        function: {
-          name: "multiply_numbers",
-          description: "A tool for multiplying two numbers.",
-          parameters: {
-            type: "object",
-            properties: {
-              first: {
-                type: "integer",
-                description: "The first number in the operation"
-              },
-              second: {
-                type: "integer",
-                description: "The second number in the operation"
-              }
-            },
-            required: ["first", "second"]
-          }
-        }
-      }
-    ];
-
-    // Start the conversation
-    console.log("Chat completion response:");
-    const toolCallResponses = [];
-
-    const firstStream = await openai.chat.completions.create({
-      model: model.id,
-      messages,
-      tools,
-      tool_choice: "required",
-      stream: true
-    });
-
-    for await (const chunk of firstStream) {
-      const content = chunk.choices?.[0]?.delta?.content;
-      if (content) {
-        process.stdout.write(content);
-      }
-
-      if (chunk.choices?.[0]?.finish_reason === "tool_calls") {
-        toolCallResponses.push(chunk);
-      }
-    }
-    console.log();
-
-    // Invoke tools called and append responses to the chat
-    for (const chunk of toolCallResponses) {
-      const toolCalls = chunk.choices?.[0]?.message?.tool_calls ?? chunk.choices?.[0]?.delta?.tool_calls ?? [];
-      for (const toolCall of toolCalls) {
-        if (toolCall.function?.name === "multiply_numbers") {
-          const args = JSON.parse(toolCall.function.arguments || "{}");
-          const first = args.first;
-          const second = args.second;
-
-          console.log(`\nInvoking tool: ${toolCall.function.name} with arguments ${first} and ${second}`);
-          const result = multiplyNumbers(first, second);
-          console.log(`Tool response: ${result}`);
-
-          messages.push({
-            role: "tool",
-            tool_call_id: toolCall.id,
-            content: result.toString()
-          });
-        }
-      }
-    }
-
-    console.log("\nTool calls completed. Prompting model to continue conversation...\n");
-
-    // Prompt the model to continue the conversation after the tool call
-    messages.push({
-      role: "system",
-      content: "Respond only with the answer generated by the tool."
-    });
-
-    // Run the next turn of the conversation
-    console.log("Chat completion response:");
-    const secondStream = await openai.chat.completions.create({
-      model: model.id,
-      messages,
-      tools,
-      tool_choice: "auto",
-      stream: true
-    });
-
-    for await (const chunk of secondStream) {
-      const content = chunk.choices?.[0]?.delta?.content;
-      if (content) {
-        process.stdout.write(content);
-      }
-    }
-
-    console.log();
-    // </tool_loop>
-  } finally {
-    // <cleanup>
-    if (model) {
-      try {
-        if (await model.isLoaded()) {
-          await model.unload();
-        }
-      } catch (cleanupError) {
-        console.warn("Cleanup warning while unloading model:", cleanupError);
-      }
-    }
-
-    if (manager) {
-      try {
-        manager.stopWebService();
-      } catch (cleanupError) {
-        console.warn("Cleanup warning while stopping service:", cleanupError);
-      }
-    }
-    // </cleanup>
-  }
-}
-
-await runToolCallingExample().catch((error) => {
-  console.error("Error running sample:", error);
-  process.exitCode = 1;
-});
-// </complete_code>
diff --git a/samples/js/tutorial-chat-assistant/app.js b/samples/js/tutorial-chat-assistant/app.js
deleted file mode 100644
index 842db0581..000000000
--- a/samples/js/tutorial-chat-assistant/app.js
+++ /dev/null
@@ -1,95 +0,0 @@
-// <complete_code>
-// <imports>
-import { FoundryLocalManager } from 'foundry-local-sdk';
-import * as readline from 'readline';
-// </imports>
-
-// <init>
-// Initialize the Foundry Local SDK
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info'
-});
-
-// Download and register all execution providers.
-let currentEp = '';
-await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-});
-if (currentEp !== '') process.stdout.write('\n');
-
-// Select and load a model from the catalog
-const model = await manager.catalog.getModel('qwen2.5-0.5b');
-
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading model: ${progress.toFixed(2)}%`);
-});
-console.log('\nModel downloaded.');
-
-await model.load();
-console.log('Model loaded and ready.');
-
-// Create a chat client
-const chatClient = model.createChatClient();
-// </init>
-
-// <system_prompt>
-// Start the conversation with a system prompt
-const messages = [
-    {
-        role: 'system',
-        content: 'You are a helpful, friendly assistant. Keep your responses ' +
-                 'concise and conversational. If you don\'t know something, say so.'
-    }
-];
-// </system_prompt>
-
-// Set up readline for console input
-const rl = readline.createInterface({
-    input: process.stdin,
-    output: process.stdout
-});
-
-const askQuestion = (prompt) => new Promise((resolve) => rl.question(prompt, resolve));
-
-console.log('\nChat assistant ready! Type \'quit\' to exit.\n');
-
-// <conversation_loop>
-while (true) {
-    const userInput = await askQuestion('You: ');
-    if (userInput.trim().toLowerCase() === 'quit' ||
-        userInput.trim().toLowerCase() === 'exit') {
-        break;
-    }
-
-    // Add the user's message to conversation history
-    messages.push({ role: 'user', content: userInput });
-
-    // <streaming>
-    // Stream the response token by token
-    process.stdout.write('Assistant: ');
-    let fullResponse = '';
-    for await (const chunk of chatClient.completeStreamingChat(messages)) {
-        const content = chunk.choices?.[0]?.delta?.content;
-        if (content) {
-            process.stdout.write(content);
-            fullResponse += content;
-        }
-    }
-    console.log('\n');
-    // </streaming>
-
-    // Add the complete response to conversation history
-    messages.push({ role: 'assistant', content: fullResponse });
-}
-// </conversation_loop>
-
-// Clean up - unload the model
-await model.unload();
-console.log('Model unloaded. Goodbye!');
-rl.close();
-// </complete_code>
diff --git a/samples/js/tutorial-chat-assistant/package.json b/samples/js/tutorial-chat-assistant/package.json
deleted file mode 100644
index 8a36a2884..000000000
--- a/samples/js/tutorial-chat-assistant/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "tutorial-chat-assistant",
-  "version": "1.0.0",
-  "type": "module",
-  "main": "app.js",
-  "scripts": {
-    "start": "node app.js"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/tutorial-document-summarizer/app.js b/samples/js/tutorial-document-summarizer/app.js
deleted file mode 100644
index 436b626b7..000000000
--- a/samples/js/tutorial-document-summarizer/app.js
+++ /dev/null
@@ -1,95 +0,0 @@
-// <complete_code>
-// <imports>
-import { FoundryLocalManager } from 'foundry-local-sdk';
-import { readFileSync, readdirSync, statSync } from 'fs';
-import { join, basename } from 'path';
-// </imports>
-
-async function summarizeFile(chatClient, filePath, systemPrompt) {
-    const content = readFileSync(filePath, 'utf-8');
-    const messages = [
-        { role: 'system', content: systemPrompt },
-        { role: 'user', content: content }
-    ];
-
-    const response = await chatClient.completeChat(messages);
-    console.log(response.choices[0]?.message?.content);
-}
-
-async function summarizeDirectory(chatClient, directory, systemPrompt) {
-    const txtFiles = readdirSync(directory)
-        .filter(f => f.endsWith('.txt'))
-        .sort();
-
-    if (txtFiles.length === 0) {
-        console.log(`No .txt files found in ${directory}`);
-        return;
-    }
-
-    for (const fileName of txtFiles) {
-        console.log(`--- ${fileName} ---`);
-        await summarizeFile(chatClient, join(directory, fileName), systemPrompt);
-        console.log();
-    }
-}
-
-// <init>
-// Initialize the Foundry Local SDK
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info'
-});
-
-// Download and register all execution providers.
-let currentEp = '';
-await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-});
-if (currentEp !== '') process.stdout.write('\n');
-
-// Select and load a model from the catalog
-const model = await manager.catalog.getModel('qwen2.5-0.5b');
-
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading model: ${progress.toFixed(2)}%`);
-});
-console.log('\nModel downloaded.');
-
-await model.load();
-console.log('Model loaded and ready.\n');
-
-// Create a chat client
-const chatClient = model.createChatClient();
-// </init>
-
-// <summarization>
-const systemPrompt =
-    'Summarize the following document into concise bullet points. ' +
-    'Focus on the key points and main ideas.';
-
-// <file_reading>
-const target = process.argv[2] || 'document.txt';
-// </file_reading>
-
-try {
-    const stats = statSync(target);
-    if (stats.isDirectory()) {
-        await summarizeDirectory(chatClient, target, systemPrompt);
-    } else {
-        console.log(`--- ${basename(target)} ---`);
-        await summarizeFile(chatClient, target, systemPrompt);
-    }
-} catch {
-    console.log(`--- ${basename(target)} ---`);
-    await summarizeFile(chatClient, target, systemPrompt);
-}
-// </summarization>
-
-// Clean up
-await model.unload();
-console.log('\nModel unloaded. Done!');
-// </complete_code>
diff --git a/samples/js/tutorial-document-summarizer/package.json b/samples/js/tutorial-document-summarizer/package.json
deleted file mode 100644
index c97e416fb..000000000
--- a/samples/js/tutorial-document-summarizer/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "tutorial-document-summarizer",
-  "version": "1.0.0",
-  "type": "module",
-  "main": "app.js",
-  "scripts": {
-    "start": "node app.js"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/tutorial-tool-calling/app.js b/samples/js/tutorial-tool-calling/app.js
deleted file mode 100644
index b3b15d0ae..000000000
--- a/samples/js/tutorial-tool-calling/app.js
+++ /dev/null
@@ -1,197 +0,0 @@
-// <complete_code>
-// <imports>
-import { FoundryLocalManager } from 'foundry-local-sdk';
-import * as readline from 'readline';
-// </imports>
-
-// <tool_definitions>
-// --- Tool definitions ---
-const tools = [
-    {
-        type: 'function',
-        function: {
-            name: 'get_weather',
-            description: 'Get the current weather for a location',
-            parameters: {
-                type: 'object',
-                properties: {
-                    location: {
-                        type: 'string',
-                        description: 'The city or location'
-                    },
-                    unit: {
-                        type: 'string',
-                        enum: ['celsius', 'fahrenheit'],
-                        description: 'Temperature unit'
-                    }
-                },
-                required: ['location']
-            }
-        }
-    },
-    {
-        type: 'function',
-        function: {
-            name: 'calculate',
-            description: 'Perform a math calculation',
-            parameters: {
-                type: 'object',
-                properties: {
-                    expression: {
-                        type: 'string',
-                        description:
-                            'The math expression to evaluate'
-                    }
-                },
-                required: ['expression']
-            }
-        }
-    }
-];
-
-// --- Tool implementations ---
-function getWeather(location, unit = 'celsius') {
-    return {
-        location,
-        temperature: unit === 'celsius' ? 22 : 72,
-        unit,
-        condition: 'Sunny'
-    };
-}
-
-function calculate(expression) {
-    // Input is validated against a strict allowlist of numeric/math characters,
-    // making this safe from code injection in this tutorial context.
-    const allowed = /^[0-9+\-*/(). ]+$/;
-    if (!allowed.test(expression)) {
-        return { error: 'Invalid expression' };
-    }
-    try {
-        const result = Function(
-            `"use strict"; return (${expression})`
-        )();
-        return { expression, result };
-    } catch (err) {
-        return { error: err.message };
-    }
-}
-
-const toolFunctions = {
-    get_weather: (args) => getWeather(args.location, args.unit),
-    calculate: (args) => calculate(args.expression)
-};
-// </tool_definitions>
-
-// <tool_loop>
-async function processToolCalls(messages, response, chatClient) {
-    let choice = response.choices[0]?.message;
-
-    while (choice?.tool_calls?.length > 0) {
-        messages.push(choice);
-
-        for (const toolCall of choice.tool_calls) {
-            const functionName = toolCall.function.name;
-            const args = JSON.parse(toolCall.function.arguments);
-            console.log(
-                `  Tool call: ${functionName}` +
-                `(${JSON.stringify(args)})`
-            );
-
-            const result = toolFunctions[functionName](args);
-            messages.push({
-                role: 'tool',
-                tool_call_id: toolCall.id,
-                content: JSON.stringify(result)
-            });
-        }
-
-        response = await chatClient.completeChat(
-            messages, { tools }
-        );
-        choice = response.choices[0]?.message;
-    }
-
-    return choice?.content ?? '';
-}
-// </tool_loop>
-
-// <init>
-// --- Main application ---
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info'
-});
-
-// Download and register all execution providers.
-let currentEp = '';
-await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-});
-if (currentEp !== '') process.stdout.write('\n');
-
-const model = await manager.catalog.getModel('qwen2.5-0.5b');
-
-await model.download((progress) => {
-    process.stdout.write(
-        `\rDownloading model: ${progress.toFixed(2)}%`
-    );
-});
-console.log('\nModel downloaded.');
-
-await model.load();
-console.log('Model loaded and ready.');
-
-const chatClient = model.createChatClient();
-
-const messages = [
-    {
-        role: 'system',
-        content:
-            'You are a helpful assistant with access to tools. ' +
-            'Use them when needed to answer questions accurately.'
-    }
-];
-
-const rl = readline.createInterface({
-    input: process.stdin,
-    output: process.stdout
-});
-
-const askQuestion = (prompt) =>
-    new Promise((resolve) => rl.question(prompt, resolve));
-
-console.log(
-    '\nTool-calling assistant ready! Type \'quit\' to exit.\n'
-);
-
-while (true) {
-    const userInput = await askQuestion('You: ');
-    if (
-        userInput.trim().toLowerCase() === 'quit' ||
-        userInput.trim().toLowerCase() === 'exit'
-    ) {
-        break;
-    }
-
-    messages.push({ role: 'user', content: userInput });
-
-    const response = await chatClient.completeChat(
-        messages, { tools }
-    );
-    const answer = await processToolCalls(
-        messages, response, chatClient
-    );
-
-    messages.push({ role: 'assistant', content: answer });
-    console.log(`Assistant: ${answer}\n`);
-}
-
-await model.unload();
-console.log('Model unloaded. Goodbye!');
-rl.close();
-// </init>
-// </complete_code>
diff --git a/samples/js/tutorial-tool-calling/package.json b/samples/js/tutorial-tool-calling/package.json
deleted file mode 100644
index ab7f62d6c..000000000
--- a/samples/js/tutorial-tool-calling/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "tutorial-tool-calling",
-  "version": "1.0.0",
-  "type": "module",
-  "main": "app.js",
-  "scripts": {
-    "start": "node app.js"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/tutorial-voice-to-text/app.js b/samples/js/tutorial-voice-to-text/app.js
deleted file mode 100644
index 60057e069..000000000
--- a/samples/js/tutorial-voice-to-text/app.js
+++ /dev/null
@@ -1,89 +0,0 @@
-// <complete_code>
-// <imports>
-import { FoundryLocalManager } from 'foundry-local-sdk';
-import { fileURLToPath } from 'url';
-import path from 'path';
-// </imports>
-
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-
-// <init>
-// Initialize the Foundry Local SDK
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info'
-});
-// </init>
-
-// Download and register all execution providers.
-let currentEp = '';
-await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-});
-if (currentEp !== '') process.stdout.write('\n');
-
-// <transcription>
-// Load the speech-to-text model
-const speechModel = await manager.catalog.getModel('whisper-tiny');
-await speechModel.download((progress) => {
-    process.stdout.write(
-        `\rDownloading speech model: ${progress.toFixed(2)}%`
-    );
-});
-console.log('\nSpeech model downloaded.');
-
-await speechModel.load();
-console.log('Speech model loaded.');
-
-// Transcribe the audio file
-const audioClient = speechModel.createAudioClient();
-const transcription = await audioClient.transcribe(
-    path.join(__dirname, 'meeting-notes.wav')
-);
-console.log(`\nTranscription:\n${transcription.text}`);
-
-// Unload the speech model to free memory
-await speechModel.unload();
-// </transcription>
-
-// <summarization>
-// Load the chat model for summarization
-const chatModel = await manager.catalog.getModel('qwen2.5-0.5b');
-await chatModel.download((progress) => {
-    process.stdout.write(
-        `\rDownloading chat model: ${progress.toFixed(2)}%`
-    );
-});
-console.log('\nChat model downloaded.');
-
-await chatModel.load();
-console.log('Chat model loaded.');
-
-// Summarize the transcription into organized notes
-const chatClient = chatModel.createChatClient();
-const messages = [
-    {
-        role: 'system',
-        content: 'You are a note-taking assistant. Summarize ' +
-                 'the following transcription into organized, ' +
-                 'concise notes with bullet points.'
-    },
-    {
-        role: 'user',
-        content: transcription.text
-    }
-];
-
-const response = await chatClient.completeChat(messages);
-const summary = response.choices[0]?.message?.content;
-console.log(`\nSummary:\n${summary}`);
-
-// Clean up
-await chatModel.unload();
-console.log('\nDone. Models unloaded.');
-// </summarization>
-// </complete_code>
diff --git a/samples/js/tutorial-voice-to-text/package.json b/samples/js/tutorial-voice-to-text/package.json
deleted file mode 100644
index 3efb0d4bd..000000000
--- a/samples/js/tutorial-voice-to-text/package.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "name": "tutorial-voice-to-text",
-  "version": "1.0.0",
-  "type": "module",
-  "main": "app.js",
-  "scripts": {
-    "start": "node app.js"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
-  }
-}
diff --git a/samples/js/verify-winml/README.md b/samples/js/verify-winml/README.md
deleted file mode 100644
index a5e52921a..000000000
--- a/samples/js/verify-winml/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Verify WinML 2.0 Execution Providers (JavaScript)
-
-This sample verifies that WinML 2.0 execution providers are correctly discovered,
-downloaded, and registered using the Foundry Local JavaScript SDK. It uses registered
-WinML EP-backed model variants and finishes with one native streaming chat check.
-
-## Prerequisites
-
-- Windows with a compatible GPU
-- Node.js 20+
-
-## Setup
-
-`package.json` installs the repo-local `foundry-local-sdk` package and then
-runs its WinML installer script, so the sample always uses the current
-branch's WinML artifact pins:
-
-```bash
-npm install
-```
-
-## Run
-
-```bash
-node app.js
-```
diff --git a/samples/js/verify-winml/app.js b/samples/js/verify-winml/app.js
deleted file mode 100644
index e7f947774..000000000
--- a/samples/js/verify-winml/app.js
+++ /dev/null
@@ -1,240 +0,0 @@
-/**
- * Foundry Local SDK - WinML 2.0 EP Verification Script (JavaScript)
- *
- * Verifies:
- *   1. Execution providers are discovered and registered
- *   2. Accelerated models appear in catalog after EP registration
- *   3. Streaming chat completions work on an accelerated model
- */
-
-import { FoundryLocalManager } from "foundry-local-sdk";
-
-const PASS = "\x1b[92m[PASS]\x1b[0m";
-const FAIL = "\x1b[91m[FAIL]\x1b[0m";
-const INFO = "\x1b[94m[INFO]\x1b[0m";
-const WARN = "\x1b[93m[WARN]\x1b[0m";
-
-const results = [];
-
-function logResult(testName, passed, detail = "") {
-  const status = passed ? PASS : FAIL;
-  const msg = detail ? `${status} ${testName} - ${detail}` : `${status} ${testName}`;
-  console.log(msg);
-  results.push({ testName, passed });
-}
-
-function printSeparator(title) {
-  console.log(`\n${"=".repeat(60)}`);
-  console.log(`  ${title}`);
-  console.log(`${"=".repeat(60)}\n`);
-}
-
-function isAcceleratedVariant(variant) {
-  const runtime = variant.info?.runtime;
-  return Boolean(runtime && ["GPU", "NPU"].includes(runtime.deviceType));
-}
-
-async function main() {
-  // ── 0. Initialize FoundryLocalManager ──────────────────────
-  printSeparator("Initialization");
-  const manager = FoundryLocalManager.create({
-    appName: "verify_winml",
-    logLevel: "info",
-  });
-  console.log(`${INFO} FoundryLocalManager initialized.`);
-
-  // ── 1. Discover & Register EPs ────────────────────────────
-  printSeparator("Step 1: Discover & Register Execution Providers");
-  let eps = [];
-  try {
-    eps = manager.discoverEps();
-    console.log(`${INFO} Discovered ${eps.length} execution providers:`);
-    for (const ep of eps) {
-      console.log(`  - ${ep.name.padEnd(40)}  Registered: ${ep.isRegistered}`);
-    }
-    logResult("EP Discovery", true, `${eps.length} EP(s) found`);
-  } catch (e) {
-    logResult("EP Discovery", false, e.message);
-  }
-
-  if (!eps.length) {
-    const detail = "No execution providers discovered on this machine";
-    logResult("EP Download & Registration", false, detail);
-    console.log(`\n${FAIL} ${detail}.`);
-    printSummary();
-    return;
-  }
-
-  try {
-    let lastProgressEp = null;
-    let lastProgressPercent = -1;
-    const result = await manager.downloadAndRegisterEps((epName, percent) => {
-      if (lastProgressEp && (lastProgressEp !== epName || percent < lastProgressPercent)) {
-        process.stdout.write("\n");
-      }
-      lastProgressEp = epName;
-      lastProgressPercent = percent;
-      process.stdout.write(`\r  Downloading ${epName}: ${percent.toFixed(1)}%`);
-    });
-    if (lastProgressEp) {
-      console.log();
-    }
-
-    console.log(`${INFO} EP registration result: success=${result.success}, status=${result.status}`);
-    if (result.registeredEps?.length) {
-      console.log(`  Registered: ${result.registeredEps.join(", ")}`);
-    }
-    if (result.failedEps?.length) {
-      console.log(`  Failed:     ${result.failedEps.join(", ")}`);
-    }
-
-    const downloadOk = result.success;
-    const detail = downloadOk && result.registeredEps?.length
-      ? `${result.registeredEps.length} EP(s) registered`
-      : result.status;
-    logResult("EP Download & Registration", downloadOk, detail);
-    if (!downloadOk) {
-      printSummary();
-      return;
-    }
-  } catch (e) {
-    console.log();
-    logResult("EP Download & Registration", false, e.message);
-    printSummary();
-    return;
-  }
-
-  // ── 2. List Models & Find Accelerated Variants ────────────
-  printSeparator("Step 2: Model Catalog - Accelerated Models");
-  const models = await manager.catalog.getModels();
-  console.log(`${INFO} Total models in catalog: ${models.length}`);
-
-  const acceleratedVariants = [];
-
-  for (const model of models) {
-    for (const variant of model.variants) {
-      if (isAcceleratedVariant(variant)) {
-        acceleratedVariants.push(variant);
-      }
-    }
-  }
-
-  console.log(`${INFO} Accelerated model variants: ${acceleratedVariants.length}`);
-  for (const variant of acceleratedVariants) {
-    const runtime = variant.info?.runtime;
-    const ep = runtime?.executionProvider || "?";
-    const device = runtime?.deviceType || "?";
-    console.log(`  - ${variant.id.padEnd(50)}  Device: ${String(device).padEnd(3)}  EP: ${ep}`);
-  }
-
-  logResult(
-    "Catalog - Accelerated models found",
-    acceleratedVariants.length > 0,
-    `${acceleratedVariants.length} accelerated variant(s)`,
-  );
-
-  if (!acceleratedVariants.length) {
-    console.log(`\n${FAIL} No accelerated model variants are available.`);
-    console.log(`${WARN} Ensure the system has a compatible accelerator and matching model variants installed.`);
-    printSummary();
-    process.exit(1);
-  }
-
-  // ── 3. Download & Load Model ──────────────────────────────
-  printSeparator("Step 3: Download & Load Model");
-
-  let chosen = null;
-  let downloadedAny = false;
-  let lastLoadError = null;
-  for (const candidate of acceleratedVariants) {
-    const ep = candidate.info?.runtime?.executionProvider || "unknown";
-    console.log(`\n${INFO} Trying model: ${candidate.id} (EP: ${ep})`);
-
-    try {
-      await candidate.download((percent) => {
-        process.stdout.write(`\r  Downloading model: ${percent.toFixed(1)}%`);
-      });
-      console.log();
-      downloadedAny = true;
-    } catch (e) {
-      console.log();
-      console.log(`${WARN} Skipping ${candidate.id}: download failed: ${e.message}`);
-      lastLoadError = e;
-      continue;
-    }
-
-    try {
-      await candidate.load();
-      chosen = candidate;
-      break;
-    } catch (e) {
-      console.log(`${WARN} Skipping ${candidate.id}: load failed: ${e.message}`);
-      lastLoadError = e;
-    }
-  }
-
-  logResult(
-    "Model Download",
-    downloadedAny,
-    downloadedAny ? "At least one accelerated variant downloaded" : lastLoadError?.message || "No accelerated variant could be downloaded",
-  );
-
-  if (!chosen) {
-    logResult("Model Load", false, lastLoadError?.message || "No accelerated variant could be loaded on this machine");
-    printSummary();
-    process.exit(1);
-  }
-
-  logResult("Model Load", true, `Loaded ${chosen.id}`);
-
-  // ── 4. Streaming Chat Completions (Native SDK) ────────────
-  printSeparator("Step 4: Streaming Chat Completions (Native)");
-  const messages = [
-    { role: "system", content: "You are a helpful assistant." },
-    { role: "user", content: "What is 2 + 2? Reply with just the number." },
-  ];
-
-  try {
-    const client = chosen.createChatClient();
-    client.settings.temperature = 0;
-    client.settings.maxTokens = 16;
-    let responseText = "";
-    const start = Date.now();
-    for await (const chunk of client.completeStreamingChat(messages)) {
-      const content = chunk?.choices?.[0]?.delta?.content;
-      if (content) {
-        responseText += content;
-        process.stdout.write(content);
-      }
-    }
-    const elapsed = ((Date.now() - start) / 1000).toFixed(2);
-    console.log();
-    logResult("Streaming Chat (Native)", responseText.length > 0, `${responseText.length} chars in ${elapsed}s`);
-  } catch (e) {
-    logResult("Streaming Chat (Native)", false, e.message);
-  }
-
-  try {
-    await chosen.unload();
-    console.log(`${INFO} Model unloaded.`);
-  } catch (e) {
-    console.warn(`${WARN} Failed to unload model: ${e.message}`);
-  }
-
-  printSummary();
-}
-
-function printSummary() {
-  printSeparator("Summary");
-  const passed = results.filter((r) => r.passed).length;
-  for (const { testName, passed: p } of results) {
-    console.log(`  ${p ? "✓" : "✗"} ${testName}`);
-  }
-  console.log(`\n  ${passed}/${results.length} tests passed`);
-  if (passed < results.length) process.exit(1);
-}
-
-main().catch((e) => {
-  console.error(e);
-  process.exit(1);
-});
diff --git a/samples/js/verify-winml/package.json b/samples/js/verify-winml/package.json
deleted file mode 100644
index f8ba84ad9..000000000
--- a/samples/js/verify-winml/package.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-  "name": "verify-winml",
-  "version": "1.0.0",
-  "type": "module",
-  "main": "app.js",
-  "scripts": {
-    "postinstall": "node node_modules/foundry-local-sdk/script/install-winml.cjs"
-  },
-  "dependencies": {
-    "foundry-local-sdk": "file:../../../sdk/js"
-  }
-}
diff --git a/samples/js/web-server-example/app.js b/samples/js/web-server-example/app.js
deleted file mode 100644
index c9a1e5ce1..000000000
--- a/samples/js/web-server-example/app.js
+++ /dev/null
@@ -1,81 +0,0 @@
-// <complete_code>
-// <imports>
-import { FoundryLocalManager } from 'foundry-local-sdk';
-import { OpenAI } from 'openai';
-// </imports>
-
-// Initialize the Foundry Local SDK
-console.log('Initializing Foundry Local SDK...');
-
-const endpointUrl = 'http://localhost:5764';
-
-// <init>
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info',
-    webServiceUrls: endpointUrl
-});
-// </init>
-console.log('✓ SDK initialized successfully');
-
-// Download and register all execution providers.
-let currentEp = '';
-await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-});
-if (currentEp !== '') process.stdout.write('\n');
-
-// <model_setup>
-// Get the model object
-const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above
-const model = await manager.catalog.getModel(modelAlias);
-
-// Download the model
-console.log(`\nDownloading model ${modelAlias}...`);
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-});
-console.log('\n✓ Model downloaded');
-
-// Load the model
-console.log(`\nLoading model ${modelAlias}...`);
-await model.load();
-console.log('✓ Model loaded');
-// </model_setup>
-
-// <server_setup>
-// Start the web service
-console.log('\nStarting web service...');
-manager.startWebService();
-console.log('✓ Web service started');
-
-const openai = new OpenAI({
-    baseURL: endpointUrl + '/v1',
-    apiKey: 'notneeded',
-});
-
-// Example chat completion
-console.log('\nTesting chat completion with OpenAI client...');
-const response = await openai.chat.completions.create({
-    model: model.id,
-    messages: [
-    {
-        role: "user",
-        content: "What is the golden ratio?",
-    },
-    ],
-});
-
-console.log(response.choices[0].message.content);
-// </server_setup>
-
-// Tidy up
-console.log('Unloading model and stopping web service...');
-await model.unload();
-manager.stopWebService();
-console.log(`✓ Model unloaded and web service stopped`);
-// </complete_code>
diff --git a/samples/python/README.md b/samples/python/README.md
index 49e99c8a6..51b85d8d0 100644
--- a/samples/python/README.md
+++ b/samples/python/README.md
@@ -2,6 +2,9 @@
 
 These samples demonstrate how to use Foundry Local with Python.
 
+They consume the SDK **from local repo source** (an editable install of
+`sdk/python`), so they always track `main` rather than a published PyPI release.
+
 ## Prerequisites
 
 - [Python](https://www.python.org/) 3.11 or later
@@ -10,17 +13,10 @@ These samples demonstrate how to use Foundry Local with Python.
 
 | Sample | Description |
 |--------|-------------|
-| [native-chat-completions](native-chat-completions/) | Initialize the SDK, start the local service, and run streaming chat completions. |
+| [chat-completion](chat-completion/) | Run the same chat prompt two ways: native in-process inference **and** the local OpenAI-compatible web server (`/v1/chat/completions`). |
 | [embeddings](embeddings/) | Generate single and batch text embeddings using the Foundry Local SDK. |
-| [audio-transcription](audio-transcription/) | Transcribe audio files using the Whisper model. |
-| [web-server](web-server/) | Start a local OpenAI-compatible web server and call it with the OpenAI Python SDK. |
-| [web-server-responses](web-server-responses/) | Call a running local OpenAI-compatible web server with the Responses API, including streaming and tool calling. |
-| [tool-calling](tool-calling/) | Tool calling with custom function definitions (get_weather, calculate). |
-| [langchain-integration](langchain-integration/) | LangChain integration for building translation and text generation chains. |
-| [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). |
-| [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). |
-| [tutorial-tool-calling](tutorial-tool-calling/) | Create a tool-calling assistant (tutorial). |
-| [tutorial-voice-to-text](tutorial-voice-to-text/) | Transcribe and summarize audio (tutorial). |
+| [audio](audio/) | Transcribe audio two ways: live microphone streaming with Nemotron ASR (default) **and** file-based transcription with Whisper via `--file`. |
+| [responses-api](responses-api/) | Vision (image understanding) via the local web server using the OpenAI Responses API. |
 
 ## Running a Sample
 
@@ -31,10 +27,11 @@ These samples demonstrate how to use Foundry Local with Python.
    cd Foundry-Local/samples/python
    ```
 
-2. Navigate to a sample and install dependencies:
+2. Navigate to a sample and install dependencies (this installs the SDK from
+   `sdk/python` source via an editable install):
 
    ```bash
-   cd native-chat-completions
+   cd chat-completion
    pip install -r requirements.txt
    ```
 
@@ -45,4 +42,6 @@ These samples demonstrate how to use Foundry Local with Python.
    ```
 
 > [!TIP]
-> Each sample's `requirements.txt` uses environment markers to automatically install the right SDK for your platform. On **Windows**, `foundry-local-sdk-winml` is installed for broader hardware acceleration. On **macOS and Linux**, the standard `foundry-local-sdk` is used. Just run `pip install -r requirements.txt` — platform detection is handled for you.
+> Each sample's `requirements.txt` installs the base SDK from local source with
+> `-e ../../../sdk/python`, so the samples track `main` and are **not**
+> version-pinned to PyPI.
diff --git a/samples/python/audio-transcription/requirements.txt b/samples/python/audio-transcription/requirements.txt
deleted file mode 100644
index 7602a48b7..000000000
--- a/samples/python/audio-transcription/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
diff --git a/samples/python/audio-transcription/src/app.py b/samples/python/audio-transcription/src/app.py
deleted file mode 100644
index ca06fb280..000000000
--- a/samples/python/audio-transcription/src/app.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# <complete_code>
-# <imports>
-import sys
-from foundry_local_sdk import Configuration, FoundryLocalManager
-# </imports>
-
-
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
-    print()
-
-# Load the whisper model for speech-to-text
-model = manager.catalog.get_model("whisper-tiny")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
-    )
-)
-print()
-model.load()
-print("Model loaded.")
-# </init>
-
-# <transcription>
-# Get the audio client and transcribe
-audio_client = model.get_audio_client()
-audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3"
-result = audio_client.transcribe(audio_file)
-print("Transcription:")
-print(result.text)
-# </transcription>
-
-# Clean up
-model.unload()
-# </complete_code>
diff --git a/samples/python/audio/README.md b/samples/python/audio/README.md
new file mode 100644
index 000000000..6d083f5a8
--- /dev/null
+++ b/samples/python/audio/README.md
@@ -0,0 +1,106 @@
+# Audio Transcription Example (Live + File)
+
+Transcribe audio two ways with the Foundry Local Python SDK:
+
+- **Live microphone** streaming with **Nemotron ASR** (default).
+- **File-based** transcription with **Whisper** via `--file [path]`.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- Python 3.11+
+- A microphone (for live mode only — falls back to synthetic audio with
+  `--synth` or if PyAudio is unavailable). File mode needs no microphone.
+
+## Setup
+
+This sample installs the Foundry Local SDK **from local repo source** (an
+editable install of `sdk/python`), so it always tracks `main` rather than a
+published PyPI release:
+
+```bash
+cd samples/python/audio
+pip install -r requirements.txt
+```
+
+That installs:
+
+- `foundry-local-sdk` (editable, from `../../../sdk/python`)
+
+> **`pyaudio` is optional** — it provides cross-platform microphone capture for
+> live mode. Without it, live mode falls back to synthetic audio, and `--file`
+> transcription works regardless. Install manually if needed:
+>
+> ```bash
+> pip install pyaudio
+> ```
+
+## Run
+
+### Live microphone (default — Nemotron ASR)
+
+```bash
+python src/app.py
+```
+
+Speak into your microphone. Transcription appears in real-time. Press `Ctrl+C`
+to stop.
+
+To force synthetic audio (e.g., for CI or when no microphone is available):
+
+```bash
+python src/app.py --synth
+```
+
+### File-based (Whisper)
+
+Transcribe the bundled `src/Recording.mp3`:
+
+```bash
+python src/app.py --file
+```
+
+Or transcribe a specific file:
+
+```bash
+python src/app.py --file path/to/audio.wav
+```
+
+## How it works
+
+**Live mode (Nemotron ASR):**
+
+1. Initializes the SDK and loads the Nemotron streaming ASR model.
+2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings.
+3. Captures microphone audio via `pyaudio` (or generates synthetic audio).
+4. Pushes PCM chunks to the SDK via `session.append()`.
+5. Reads transcription results in a background thread via
+   `for result in session.get_stream()`.
+
+**File mode (Whisper):**
+
+1. Initializes the SDK and loads the `whisper-tiny` model.
+2. Calls `audio_client.transcribe(audio_file)` and prints the result text.
+
+## API
+
+```python
+# Live streaming
+audio_client = model.get_audio_client()
+session = audio_client.create_live_transcription_session()
+session.settings.sample_rate = 16000
+session.settings.channels = 1
+session.settings.language = "en"
+
+session.start()
+session.append(pcm_bytes)                 # push audio
+for result in session.get_stream():       # read results (background thread)
+    print(result.content[0].text)         # transcribed text
+    print(result.is_final)                # True for final results
+session.stop()
+
+# File transcription
+audio_client = model.get_audio_client()
+result = audio_client.transcribe("Recording.mp3")
+print(result.text)
+```
diff --git a/samples/python/audio/requirements.txt b/samples/python/audio/requirements.txt
new file mode 100644
index 000000000..65794ea6c
--- /dev/null
+++ b/samples/python/audio/requirements.txt
@@ -0,0 +1,7 @@
+# Install the Foundry Local SDK from local repo source so the sample tracks
+# `main` instead of a published PyPI release.
+-e ../../../sdk/python
+# pyaudio is optional — only needed for live microphone capture.
+# Install manually: pip install pyaudio
+# The sample falls back to synthetic audio if pyaudio is unavailable,
+# and --file transcription needs no microphone at all.
diff --git a/samples/python/audio-transcription/Recording.mp3 b/samples/python/audio/src/Recording.mp3
similarity index 100%
rename from samples/python/audio-transcription/Recording.mp3
rename to samples/python/audio/src/Recording.mp3
diff --git a/samples/python/audio/src/app.py b/samples/python/audio/src/app.py
new file mode 100644
index 000000000..01df9092d
--- /dev/null
+++ b/samples/python/audio/src/app.py
@@ -0,0 +1,254 @@
+# Audio Transcription — Foundry Local SDK Example (Python)
+#
+# Two modes:
+#   * Live microphone streaming with Nemotron ASR (default).
+#   * File-based transcription with Whisper via --file [path].
+#
+# Live mode tries PyAudio mic capture first; falls back to synthetic PCM if
+# unavailable.
+#
+# Usage:
+#   pip install -r requirements.txt
+#   python src/app.py                      # Live microphone (Nemotron)
+#   python src/app.py --synth              # Synthetic 440Hz sine wave (Nemotron)
+#   python src/app.py --file               # Transcribe bundled Recording.mp3 (Whisper)
+#   python src/app.py --file path/to.wav   # Transcribe a specific file (Whisper)
+
+import math
+import os
+import signal
+import struct
+import sys
+import threading
+import time
+
+from foundry_local_sdk import Configuration, FoundryLocalManager
+
+
+def init_manager():
+    """Initialize the SDK and register execution providers."""
+    config = Configuration(app_name="foundry_local_samples")
+    FoundryLocalManager.initialize(config)
+    manager = FoundryLocalManager.instance
+    manager.download_and_register_eps()
+    return manager
+
+
+def parse_file_arg(args):
+    """Return the audio file path for --file mode, or None for live/synth mode.
+
+    `--file` with no path falls back to the bundled Recording.mp3 next to this
+    script, so the sample runs end-to-end without extra arguments.
+    """
+    if "--file" not in args:
+        return None
+    idx = args.index("--file")
+    if idx + 1 < len(args) and not args[idx + 1].startswith("-"):
+        return args[idx + 1]
+    return os.path.join(os.path.dirname(__file__), "Recording.mp3")
+
+
+def transcribe_file(manager, audio_file):
+    """File-based transcription with the Whisper model."""
+    print("===========================================================")
+    print("   Foundry Local -- File Audio Transcription (Python)")
+    print("===========================================================")
+    print()
+
+    model = manager.catalog.get_model("whisper-tiny")
+    if model is None:
+        raise RuntimeError('Model "whisper-tiny" not found in catalog')
+
+    model.download(
+        lambda progress: print(
+            f"\rDownloading model: {progress:.2f}%", end="", flush=True
+        )
+    )
+    print()
+    print(f"Loading model {model.id}...", end="", flush=True)
+    model.load()
+    print("done.")
+
+    audio_client = model.get_audio_client()
+    print(f"\nTranscribing: {audio_file}")
+    result = audio_client.transcribe(audio_file)
+    print("\nTranscription:")
+    print(result.text)
+
+    model.unload()
+
+
+def transcribe_live(manager, use_synth):
+    """Live microphone (or synthetic) streaming transcription with Nemotron ASR."""
+    print("===========================================================")
+    print("   Foundry Local -- Live Audio Transcription Demo (Python)")
+    print("===========================================================")
+    print()
+
+    # English-only:
+    model_alias = "nemotron-speech-streaming-en-0.6b"
+    # Multi-lingual (supports 30+ languages including auto-detect):
+    # model_alias = "nemotron-3.5-asr-streaming-0.6b"
+    model = manager.catalog.get_model(model_alias)
+    if model is None:
+        raise RuntimeError(f'Model "{model_alias}" not found in catalog')
+
+    model.download(
+        lambda progress: print(
+            f"\rDownloading model: {progress:.2f}%", end="", flush=True
+        )
+    )
+    print()
+    print(f"Loading model {model.id}...", end="")
+    model.load()
+    print("done.")
+
+    audio_client = model.get_audio_client()
+    session = audio_client.create_live_transcription_session()
+    session.settings.sample_rate = 16000
+    session.settings.channels = 1
+    session.settings.language = "en"                  # English (default)
+    # Multi-lingual examples:
+    # session.settings.language = "de"     # German
+    # session.settings.language = "zh-CN"  # Chinese (Simplified)
+    # session.settings.language = "auto"   # Auto-detect language
+
+    session.start()
+    print("✓ Session started")
+
+    # --- Background thread reads transcription results (mirrors JS readPromise) ---
+
+    def read_results():
+        for result in session.get_stream():
+            text = result.content[0].text if result.content else ""
+            if result.is_final:
+                print()
+                print(f"  [FINAL] {text}")
+            elif text:
+                print(text, end="", flush=True)
+
+    read_thread = threading.Thread(target=read_results, daemon=True)
+    read_thread.start()
+
+    # --- Microphone capture (mirrors JS naudiodon2 / C++ PortAudio) ---
+    # Try PyAudio for mic input; fall back to synthetic PCM on failure.
+
+    RATE = 16000
+    CHANNELS = 1
+    CHUNK = RATE // 10  # 100ms of audio = 1600 frames
+
+    stop_event = threading.Event()
+    mic_active = False
+    pa = None
+    stream = None
+
+    if not use_synth:
+        try:
+            import pyaudio
+
+            pa = pyaudio.PyAudio()
+            stream = pa.open(
+                format=pyaudio.paInt16,
+                channels=CHANNELS,
+                rate=RATE,
+                input=True,
+                frames_per_buffer=CHUNK,
+            )
+            mic_active = True
+
+            print()
+            print("===========================================================")
+            print("  LIVE TRANSCRIPTION ACTIVE")
+            print("  Speak into your microphone.")
+            print("  Press Ctrl+C to stop.")
+            print("===========================================================")
+            print()
+
+            def capture_mic():
+                while not stop_event.is_set():
+                    try:
+                        pcm_data = stream.read(CHUNK, exception_on_overflow=False)
+                        if pcm_data:
+                            session.append(pcm_data)
+                    except Exception as e:
+                        print(f"\n[ERROR] Microphone capture failed: {e}")
+                        stop_event.set()
+                        break
+
+            capture_thread = threading.Thread(target=capture_mic, daemon=True)
+            capture_thread.start()
+
+        except Exception as e:
+            print(f"Could not initialize microphone: {e}")
+            print("Falling back to synthetic audio test...")
+            print()
+            mic_active = False
+            if stream:
+                stream.close()
+            if pa:
+                pa.terminate()
+            pa = None
+            stream = None
+
+    # Fallback: push synthetic PCM (440Hz sine wave) — mirrors JS catch block
+    if not mic_active:
+        print("Pushing synthetic audio (440Hz sine, 2s)...")
+        duration = 2
+        total_samples = RATE * duration
+        pcm_bytes = bytearray(total_samples * 2)
+        for i in range(total_samples):
+            t = i / RATE
+            sample = int(32767 * 0.5 * math.sin(2 * math.pi * 440 * t))
+            struct.pack_into("<h", pcm_bytes, i * 2, sample)
+
+        chunk_size = (RATE // 10) * 2  # 100ms
+        for offset in range(0, len(pcm_bytes), chunk_size):
+            end = min(offset + chunk_size, len(pcm_bytes))
+            session.append(bytes(pcm_bytes[offset:end]))
+            time.sleep(0.1)
+
+        print("✓ Synthetic audio pushed")
+        time.sleep(3)  # Wait for remaining transcription results
+
+    # --- Graceful shutdown (mirrors JS SIGINT handler / C++ SignalHandler) ---
+
+    def shutdown(*_args):
+        print("\n\nStopping...")
+        stop_event.set()
+
+        if stream:
+            stream.stop_stream()
+            stream.close()
+        if pa:
+            pa.terminate()
+
+        session.stop()
+        read_thread.join(timeout=5)
+        model.unload()
+        print("✓ Done")
+        sys.exit(0)
+
+    signal.signal(signal.SIGINT, lambda *a: shutdown())
+
+    if mic_active:
+        # Block until Ctrl+C
+        stop_event.wait()
+    else:
+        shutdown()
+
+
+def main():
+    args = sys.argv[1:]
+    use_synth = "--synth" in args
+    audio_file = parse_file_arg(args)
+
+    manager = init_manager()
+
+    if audio_file is not None:
+        transcribe_file(manager, audio_file)
+    else:
+        transcribe_live(manager, use_synth)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/samples/python/chat-completion/README.md b/samples/python/chat-completion/README.md
new file mode 100644
index 000000000..e27ec4ea1
--- /dev/null
+++ b/samples/python/chat-completion/README.md
@@ -0,0 +1,50 @@
+# Chat Completions Example (Native + Web Server)
+
+Run the **same chat prompt two ways** with the Foundry Local Python SDK:
+
+1. **Native inference** — in-process streaming chat completions through the SDK
+   chat client (`model.get_chat_client()`), no web server involved.
+2. **Web server** — the local OpenAI-compatible REST endpoint
+   (`/v1/chat/completions`) called with the official `openai` Python client.
+
+The program prints clear section headers (`=== Native inference ===` and
+`=== Web server (/v1/chat/completions) ===`) so you can compare the two paths.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- Python 3.11+
+
+## Setup
+
+This sample installs the Foundry Local SDK **from local repo source** (an
+editable install of `sdk/python`), so it always tracks `main` rather than a
+published PyPI release:
+
+```bash
+cd samples/python/chat-completion
+pip install -r requirements.txt
+```
+
+That installs:
+
+- `foundry-local-sdk` (editable, from `../../../sdk/python`)
+- `openai` (for the local web-server flow)
+
+## Run
+
+```bash
+python src/app.py
+```
+
+You will see the model download and load once, then the same prompt answered
+first by native inference and then by the local web server.
+
+## How it works
+
+1. Initializes the SDK and registers execution providers.
+2. Downloads and loads `qwen2.5-0.5b` from the catalog.
+3. Streams a response with the in-process chat client (native inference).
+4. Starts the local web service and sends the same messages through the
+   `openai` client against `http://localhost:<port>/v1`.
+5. Stops the web service and unloads the model.
diff --git a/samples/python/chat-completion/requirements.txt b/samples/python/chat-completion/requirements.txt
new file mode 100644
index 000000000..b664aa60c
--- /dev/null
+++ b/samples/python/chat-completion/requirements.txt
@@ -0,0 +1,5 @@
+# Install the Foundry Local SDK from local repo source so the sample tracks
+# `main` instead of a published PyPI release.
+-e ../../../sdk/python
+# Third-party dependency: OpenAI client for the local web-server flow.
+openai
diff --git a/samples/python/native-chat-completions/src/app.py b/samples/python/chat-completion/src/app.py
similarity index 54%
rename from samples/python/native-chat-completions/src/app.py
rename to samples/python/chat-completion/src/app.py
index eba9df41a..6880238b8 100644
--- a/samples/python/native-chat-completions/src/app.py
+++ b/samples/python/chat-completion/src/app.py
@@ -1,5 +1,7 @@
 # <complete_code>
 # <imports>
+import openai
+
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
@@ -37,29 +39,57 @@ def ep_progress(ep_name: str, percent: float):
     print()
     model.load()
     print("Model loaded and ready.")
-
-    # Get a chat client
-    client = model.get_chat_client()
     # </init>
 
-    # <streaming>
-    # Create the conversation messages
+    # The same prompt is answered two ways below: native in-process inference
+    # and the local OpenAI-compatible web server.
     messages = [
-        {"role": "user", "content": "What is the golden ratio?"}
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "What is the golden ratio?"},
     ]
 
-    # Stream the response token by token
+    # <native_inference>
+    # === Native inference ===
+    # Run the prompt with the in-process chat client (no web server involved).
+    print("\n=== Native inference ===")
+    client = model.get_chat_client()
+
     print("Assistant: ", end="", flush=True)
     for chunk in client.complete_streaming_chat(messages):
         content = chunk.choices[0].delta.content
         if content:
             print(content, end="", flush=True)
     print()
-    # </streaming>
+    # </native_inference>
+
+    # <web_server>
+    # === Web server (/v1/chat/completions) ===
+    # Start the local OpenAI-compatible web server and send the same prompt
+    # through the OpenAI Python client.
+    print("\n=== Web server (/v1/chat/completions) ===")
+    manager.start_web_service()
+    base_url = f"{manager.urls[0]}/v1"
+
+    # Use the OpenAI SDK to connect to the local REST endpoint
+    openai_client = openai.OpenAI(base_url=base_url, api_key="none")
+
+    print("Assistant: ", end="", flush=True)
+    response = openai_client.chat.completions.create(
+        model=model.id,
+        messages=messages,
+        stream=True,
+    )
+    for chunk in response:
+        if chunk.choices[0].delta.content is not None:
+            print(chunk.choices[0].delta.content, end="", flush=True)
+    print()
+
+    manager.stop_web_service()
+    # </web_server>
 
     # Clean up
     model.unload()
-    print("Model unloaded.")
+    print("\nModel unloaded.")
 
 
 if __name__ == "__main__":
diff --git a/samples/python/embeddings/README.md b/samples/python/embeddings/README.md
new file mode 100644
index 000000000..4f5993704
--- /dev/null
+++ b/samples/python/embeddings/README.md
@@ -0,0 +1,36 @@
+# Embeddings Example
+
+Generate single and batch text embeddings with the Foundry Local Python SDK.
+
+## Prerequisites
+
+- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
+- Python 3.11+
+
+## Setup
+
+This sample installs the Foundry Local SDK **from local repo source** (an
+editable install of `sdk/python`), so it always tracks `main` rather than a
+published PyPI release:
+
+```bash
+cd samples/python/embeddings
+pip install -r requirements.txt
+```
+
+That installs:
+
+- `foundry-local-sdk` (editable, from `../../../sdk/python`)
+
+## Run
+
+```bash
+python src/app.py
+```
+
+## How it works
+
+1. Initializes the SDK and loads the `qwen3-embedding-0.6b` model.
+2. Generates a single embedding and prints its dimensions and first values.
+3. Generates a batch of embeddings and prints the dimensions of each.
+4. Unloads the model.
diff --git a/samples/python/embeddings/requirements.txt b/samples/python/embeddings/requirements.txt
index 7602a48b7..0f6903427 100644
--- a/samples/python/embeddings/requirements.txt
+++ b/samples/python/embeddings/requirements.txt
@@ -1,2 +1,3 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
+# Install the Foundry Local SDK from local repo source so the sample tracks
+# `main` instead of a published PyPI release.
+-e ../../../sdk/python
diff --git a/samples/python/langchain-integration/requirements.txt b/samples/python/langchain-integration/requirements.txt
deleted file mode 100644
index 9a6b61817..000000000
--- a/samples/python/langchain-integration/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
-openai
-langchain-openai
-langchain-core
diff --git a/samples/python/langchain-integration/src/app.py b/samples/python/langchain-integration/src/app.py
deleted file mode 100644
index 4f8661cdc..000000000
--- a/samples/python/langchain-integration/src/app.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# <complete_code>
-# <imports>
-from foundry_local_sdk import Configuration, FoundryLocalManager
-from langchain_openai import ChatOpenAI
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import StrOutputParser
-# </imports>
-
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
-    print()
-
-# Load a model
-model = manager.catalog.get_model("qwen2.5-0.5b")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
-    )
-)
-print()
-model.load()
-print("Model loaded.")
-
-# Start the web service to expose an OpenAI-compatible endpoint
-manager.start_web_service()
-base_url = f"{manager.urls[0]}/v1"
-# </init>
-
-# <langchain_setup>
-# Create a LangChain ChatOpenAI instance pointing to the local endpoint
-llm = ChatOpenAI(
-    base_url=base_url,
-    api_key="none",
-    model=model.id,
-)
-# </langchain_setup>
-
-# <chat_completion>
-# Create a translation chain
-prompt = ChatPromptTemplate.from_messages([
-    ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."),
-    ("user", "{text}")
-])
-
-chain = prompt | llm | StrOutputParser()
-
-# Run the chain
-result = chain.invoke({"language": "Spanish", "text": "Hello, how are you today?"})
-print(f"Translation: {result}")
-# </chat_completion>
-
-# Clean up
-model.unload()
-manager.stop_web_service()
-# </complete_code>
diff --git a/samples/python/live-audio-transcription/README.md b/samples/python/live-audio-transcription/README.md
deleted file mode 100644
index dc8a263ad..000000000
--- a/samples/python/live-audio-transcription/README.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Live Audio Transcription Example
-
-Real-time microphone-to-text transcription using the Foundry Local Python SDK with Nemotron ASR.
-
-## Prerequisites
-
-- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed
-- Python 3.9+
-- A microphone (optional — falls back to synthetic audio with `--synth` or if PyAudio is unavailable)
-
-## Setup
-
-```bash
-pip install -r requirements.txt
-```
-
-> **Note:** `pyaudio` is **optional** — it provides cross-platform microphone capture. Without it, the example falls back to synthetic audio for testing.
->
-> Install manually if needed:
-> ```bash
-> pip install pyaudio
-> ```
-
-## Run
-
-```bash
-python src/app.py
-```
-
-Speak into your microphone. Transcription appears in real-time. Press `Ctrl+C` to stop.
-
-To force synthetic audio (e.g., for CI or when no microphone is available):
-
-```bash
-python src/app.py --synth
-```
-
-## How it works
-
-1. Initializes the Foundry Local SDK and loads the Nemotron ASR model
-2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
-3. Captures microphone audio via `pyaudio` (or generates synthetic audio as fallback)
-4. Pushes PCM chunks to the SDK via `session.append()`
-5. Reads transcription results in a background thread via `for result in session.get_stream()`
-6. Access text via `result.content[0].text` (OpenAI Realtime ConversationItem pattern)
-
-## API
-
-```python
-audio_client = model.get_audio_client()
-session = audio_client.create_live_transcription_session()
-session.settings.sample_rate = 16000
-session.settings.channels = 1
-session.settings.language = "en"
-
-session.start()
-
-# Push audio
-session.append(pcm_bytes)
-
-# Read results (typically on a background thread)
-for result in session.get_stream():
-    print(result.content[0].text)        # transcribed text
-    print(result.content[0].transcript)  # alias (OpenAI compat)
-    print(result.is_final)               # True for final results
-
-session.stop()
-```
diff --git a/samples/python/live-audio-transcription/requirements.txt b/samples/python/live-audio-transcription/requirements.txt
deleted file mode 100644
index 6677976f6..000000000
--- a/samples/python/live-audio-transcription/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
-# pyaudio is optional — only needed for live microphone capture.
-# Install manually: pip install pyaudio
-# The sample falls back to synthetic audio if pyaudio is unavailable.
diff --git a/samples/python/live-audio-transcription/src/app.py b/samples/python/live-audio-transcription/src/app.py
deleted file mode 100644
index 07bbc12c8..000000000
--- a/samples/python/live-audio-transcription/src/app.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Live Audio Transcription — Foundry Local SDK Example (Python)
-#
-# Tries PyAudio mic capture first; falls back to synthetic PCM if unavailable.
-#
-# Usage:
-#   pip install -r requirements.txt
-#   python src/app.py              # Live microphone
-#   python src/app.py --synth      # Synthetic 440Hz sine wave
-
-import math
-import signal
-import struct
-import sys
-import threading
-import time
-
-from foundry_local_sdk import Configuration, FoundryLocalManager
-
-use_synth = "--synth" in sys.argv
-
-print("===========================================================")
-print("   Foundry Local -- Live Audio Transcription Demo (Python)")
-print("===========================================================")
-print()
-
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-
-manager.download_and_register_eps()
-
-# English-only:
-model_alias = "nemotron-speech-streaming-en-0.6b"
-# Multi-lingual (supports 30+ languages including auto-detect):
-# model_alias = "nemotron-3.5-asr-streaming-0.6b"
-model = manager.catalog.get_model(model_alias)
-if model is None:
-    raise RuntimeError(f'Model "{model_alias}" not found in catalog')
-
-model.download(
-    lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True)
-)
-print()
-print(f"Loading model {model.id}...", end="")
-model.load()
-print("done.")
-
-audio_client = model.get_audio_client()
-session = audio_client.create_live_transcription_session()
-session.settings.sample_rate = 16000
-session.settings.channels = 1
-session.settings.language = "en"                  # English (default)
-# Multi-lingual examples:
-# session.settings.language = "de"     # German
-# session.settings.language = "zh-CN"  # Chinese (Simplified)
-# session.settings.language = "auto"   # Auto-detect language
-
-session.start()
-print("✓ Session started")
-
-# --- Background thread reads transcription results (mirrors JS readPromise) ---
-
-def read_results():
-    for result in session.get_stream():
-        text = result.content[0].text if result.content else ""
-        if result.is_final:
-            print()
-            print(f"  [FINAL] {text}")
-        elif text:
-            print(text, end="", flush=True)
-
-
-read_thread = threading.Thread(target=read_results, daemon=True)
-read_thread.start()
-
-# --- Microphone capture (mirrors JS naudiodon2 / C++ PortAudio) ---
-# Try PyAudio for mic input; fall back to synthetic PCM on failure.
-
-RATE = 16000
-CHANNELS = 1
-CHUNK = RATE // 10  # 100ms of audio = 1600 frames
-
-stop_event = threading.Event()
-mic_active = False
-pa = None
-stream = None
-
-if not use_synth:
-    try:
-        import pyaudio
-
-        pa = pyaudio.PyAudio()
-        stream = pa.open(
-            format=pyaudio.paInt16,
-            channels=CHANNELS,
-            rate=RATE,
-            input=True,
-            frames_per_buffer=CHUNK,
-        )
-        mic_active = True
-
-        print()
-        print("===========================================================")
-        print("  LIVE TRANSCRIPTION ACTIVE")
-        print("  Speak into your microphone.")
-        print("  Press Ctrl+C to stop.")
-        print("===========================================================")
-        print()
-
-        def capture_mic():
-            while not stop_event.is_set():
-                try:
-                    pcm_data = stream.read(CHUNK, exception_on_overflow=False)
-                    if pcm_data:
-                        session.append(pcm_data)
-                except Exception as e:
-                    print(f"\n[ERROR] Microphone capture failed: {e}")
-                    stop_event.set()
-                    break
-
-        capture_thread = threading.Thread(target=capture_mic, daemon=True)
-        capture_thread.start()
-
-    except Exception as e:
-        print(f"Could not initialize microphone: {e}")
-        print("Falling back to synthetic audio test...")
-        print()
-        mic_active = False
-        if stream:
-            stream.close()
-        if pa:
-            pa.terminate()
-        pa = None
-        stream = None
-
-# Fallback: push synthetic PCM (440Hz sine wave) — mirrors JS catch block
-if not mic_active:
-    print("Pushing synthetic audio (440Hz sine, 2s)...")
-    duration = 2
-    total_samples = RATE * duration
-    pcm_bytes = bytearray(total_samples * 2)
-    for i in range(total_samples):
-        t = i / RATE
-        sample = int(32767 * 0.5 * math.sin(2 * math.pi * 440 * t))
-        struct.pack_into("<h", pcm_bytes, i * 2, sample)
-
-    chunk_size = (RATE // 10) * 2  # 100ms
-    for offset in range(0, len(pcm_bytes), chunk_size):
-        end = min(offset + chunk_size, len(pcm_bytes))
-        session.append(bytes(pcm_bytes[offset:end]))
-        time.sleep(0.1)
-
-    print("✓ Synthetic audio pushed")
-    time.sleep(3)  # Wait for remaining transcription results
-
-
-# --- Graceful shutdown (mirrors JS SIGINT handler / C++ SignalHandler) ---
-
-def shutdown(*_args):
-    print("\n\nStopping...")
-    stop_event.set()
-
-    if stream:
-        stream.stop_stream()
-        stream.close()
-    if pa:
-        pa.terminate()
-
-    session.stop()
-    read_thread.join(timeout=5)
-    model.unload()
-    print("✓ Done")
-    sys.exit(0)
-
-
-signal.signal(signal.SIGINT, lambda *a: shutdown())
-
-if mic_active:
-    # Block until Ctrl+C
-    stop_event.wait()
-else:
-    shutdown()
diff --git a/samples/python/native-chat-completions/requirements.txt b/samples/python/native-chat-completions/requirements.txt
deleted file mode 100644
index 7602a48b7..000000000
--- a/samples/python/native-chat-completions/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
diff --git a/samples/python/web-server-responses-vision/README.md b/samples/python/responses-api/README.md
similarity index 77%
rename from samples/python/web-server-responses-vision/README.md
rename to samples/python/responses-api/README.md
index 75e16950a..bb0286b2d 100644
--- a/samples/python/web-server-responses-vision/README.md
+++ b/samples/python/responses-api/README.md
@@ -9,7 +9,9 @@ It demonstrates:
 
 ## What gets installed
 
-Install the sample dependencies from `requirements.txt`:
+This sample installs the Foundry Local SDK **from local repo source** (an
+editable install of `sdk/python`), so it always tracks `main` rather than a
+published PyPI release:
 
 ```bash
 pip install -r requirements.txt
@@ -17,9 +19,9 @@ pip install -r requirements.txt
 
 That installs:
 
-- `foundry-local-sdk`
+- `foundry-local-sdk` (editable, from `../../../sdk/python`)
 - `openai`
-- `Pillow` (for image resizing)
+- `Pillow` (for image handling)
 
 The sample downloads the specified model the first time it runs (skips if already cached).
 
@@ -28,6 +30,7 @@ The sample downloads the specified model the first time it runs (skips if alread
 From this directory:
 
 ```bash
+cd samples/python/responses-api
 python -m venv .venv
 .\.venv\Scripts\activate
 pip install -r requirements.txt
diff --git a/samples/python/responses-api/requirements.txt b/samples/python/responses-api/requirements.txt
new file mode 100644
index 000000000..5ed06fa65
--- /dev/null
+++ b/samples/python/responses-api/requirements.txt
@@ -0,0 +1,5 @@
+# Install the Foundry Local SDK from local repo source so the sample tracks
+# `main` instead of a published PyPI release.
+-e ../../../sdk/python
+pillow
+openai
diff --git a/samples/python/web-server-responses-vision/src/app.py b/samples/python/responses-api/src/app.py
similarity index 100%
rename from samples/python/web-server-responses-vision/src/app.py
rename to samples/python/responses-api/src/app.py
diff --git a/samples/rust/foundry-local-webserver-responses-vision/test_image.jpg b/samples/python/responses-api/src/test_image.jpg
similarity index 100%
rename from samples/rust/foundry-local-webserver-responses-vision/test_image.jpg
rename to samples/python/responses-api/src/test_image.jpg
diff --git a/samples/python/tool-calling/requirements.txt b/samples/python/tool-calling/requirements.txt
deleted file mode 100644
index 7602a48b7..000000000
--- a/samples/python/tool-calling/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
diff --git a/samples/python/tool-calling/src/app.py b/samples/python/tool-calling/src/app.py
deleted file mode 100644
index db619550c..000000000
--- a/samples/python/tool-calling/src/app.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# <complete_code>
-# <imports>
-import json
-from foundry_local_sdk import Configuration, FoundryLocalManager
-# </imports>
-
-
-# <tool_definitions>
-# --- Tool definitions ---
-tools = [
-    {
-        "type": "function",
-        "function": {
-            "name": "get_weather",
-            "description": "Get the current weather for a location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city or location"
-                    },
-                    "unit": {
-                        "type": "string",
-                        "enum": ["celsius", "fahrenheit"],
-                        "description": "Temperature unit"
-                    }
-                },
-                "required": ["location"]
-            }
-        }
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "calculate",
-            "description": "Perform a math calculation",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "expression": {
-                        "type": "string",
-                        "description": (
-                            "The math expression to evaluate"
-                        )
-                    }
-                },
-                "required": ["expression"]
-            }
-        }
-    }
-]
-
-
-# --- Tool implementations ---
-def get_weather(location, unit="celsius"):
-    """Simulate a weather lookup."""
-    return {
-        "location": location,
-        "temperature": 22 if unit == "celsius" else 72,
-        "unit": unit,
-        "condition": "Sunny"
-    }
-
-
-def calculate(expression):
-    """Evaluate a math expression safely."""
-    allowed = set("0123456789+-*/(). ")
-    if not all(c in allowed for c in expression):
-        return {"error": "Invalid expression"}
-    try:
-        result = eval(expression)
-        return {"expression": expression, "result": result}
-    except Exception as e:
-        return {"error": str(e)}
-
-
-tool_functions = {
-    "get_weather": get_weather,
-    "calculate": calculate
-}
-# </tool_definitions>
-
-
-# <tool_loop>
-def process_tool_calls(messages, response, client):
-    """Handle tool calls in a loop until the model produces a final answer."""
-    choice = response.choices[0].message
-
-    while choice.tool_calls:
-        # Convert the assistant message to a dict for the SDK
-        assistant_msg = {
-            "role": "assistant",
-            "content": choice.content,
-            "tool_calls": [
-                {
-                    "id": tc.id,
-                    "type": tc.type,
-                    "function": {
-                        "name": tc.function.name,
-                        "arguments": tc.function.arguments,
-                    },
-                }
-                for tc in choice.tool_calls
-            ],
-        }
-        messages.append(assistant_msg)
-
-        for tool_call in choice.tool_calls:
-            function_name = tool_call.function.name
-            arguments = json.loads(tool_call.function.arguments)
-            print(f"  Tool call: {function_name}({arguments})")
-
-            # Execute the function and add the result
-            func = tool_functions[function_name]
-            result = func(**arguments)
-            messages.append({
-                "role": "tool",
-                "tool_call_id": tool_call.id,
-                "content": json.dumps(result)
-            })
-
-        # Send the updated conversation back
-        response = client.complete_chat(messages, tools=tools)
-        choice = response.choices[0].message
-
-    return choice.content
-# </tool_loop>
-
-
-# <init>
-def main():
-    # Initialize the Foundry Local SDK
-    config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
-    manager = FoundryLocalManager.instance
-
-    # Download and register all execution providers.
-    current_ep = ""
-    def ep_progress(ep_name: str, percent: float):
-        nonlocal current_ep
-        if ep_name != current_ep:
-            if current_ep:
-                print()
-            current_ep = ep_name
-        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-    manager.download_and_register_eps(progress_callback=ep_progress)
-    if current_ep:
-        print()
-
-    # Select and load a model
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(
-        lambda progress: print(
-            f"\rDownloading model: {progress:.2f}%",
-            end="",
-            flush=True
-        )
-    )
-    print()
-    model.load()
-    print("Model loaded and ready.")
-
-    # Get a chat client
-    client = model.get_chat_client()
-
-    # Conversation with a system prompt
-    messages = [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant with access to tools. "
-                       "Use them when needed to answer questions accurately."
-        },
-        {
-            "role": "user",
-            "content": "What is the weather in Seattle and what is 42 * 17?"
-        }
-    ]
-
-    print("Sending request with tools...")
-    response = client.complete_chat(messages, tools=tools)
-    answer = process_tool_calls(messages, response, client)
-
-    print(f"\nAssistant: {answer}")
-
-    # Clean up
-    model.unload()
-    print("Model unloaded.")
-# </init>
-
-
-if __name__ == "__main__":
-    main()
-# </complete_code>
diff --git a/samples/python/tutorial-chat-assistant/requirements.txt b/samples/python/tutorial-chat-assistant/requirements.txt
deleted file mode 100644
index 7602a48b7..000000000
--- a/samples/python/tutorial-chat-assistant/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
diff --git a/samples/python/tutorial-chat-assistant/src/app.py b/samples/python/tutorial-chat-assistant/src/app.py
deleted file mode 100644
index 13f1c500a..000000000
--- a/samples/python/tutorial-chat-assistant/src/app.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# <complete_code>
-# <imports>
-from foundry_local_sdk import Configuration, FoundryLocalManager
-# </imports>
-
-
-def main():
-    # <init>
-    # Initialize the Foundry Local SDK
-    config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
-    manager = FoundryLocalManager.instance
-
-    # Download and register all execution providers.
-    current_ep = ""
-    def ep_progress(ep_name: str, percent: float):
-        nonlocal current_ep
-        if ep_name != current_ep:
-            if current_ep:
-                print()
-            current_ep = ep_name
-        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-    manager.download_and_register_eps(progress_callback=ep_progress)
-    if current_ep:
-        print()
-
-    # Select and load a model from the catalog
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True))
-    print()
-    model.load()
-    print("Model loaded and ready.")
-
-    # Get a chat client
-    client = model.get_chat_client()
-    # </init>
-
-    # <system_prompt>
-    # Start the conversation with a system prompt
-    messages = [
-        {
-            "role": "system",
-            "content": "You are a helpful, friendly assistant. Keep your responses "
-                       "concise and conversational. If you don't know something, say so."
-        }
-    ]
-    # </system_prompt>
-
-    print("\nChat assistant ready! Type 'quit' to exit.\n")
-
-    # <conversation_loop>
-    while True:
-        user_input = input("You: ")
-        if user_input.strip().lower() in ("quit", "exit"):
-            break
-
-        # Add the user's message to conversation history
-        messages.append({"role": "user", "content": user_input})
-
-        # <streaming>
-        # Stream the response token by token
-        print("Assistant: ", end="", flush=True)
-        full_response = ""
-        for chunk in client.complete_streaming_chat(messages):
-            content = chunk.choices[0].delta.content
-            if content:
-                print(content, end="", flush=True)
-                full_response += content
-        print("\n")
-        # </streaming>
-
-        # Add the complete response to conversation history
-        messages.append({"role": "assistant", "content": full_response})
-    # </conversation_loop>
-
-    # Clean up - unload the model
-    model.unload()
-    print("Model unloaded. Goodbye!")
-
-
-if __name__ == "__main__":
-    main()
-# </complete_code>
diff --git a/samples/python/tutorial-document-summarizer/requirements.txt b/samples/python/tutorial-document-summarizer/requirements.txt
deleted file mode 100644
index 7602a48b7..000000000
--- a/samples/python/tutorial-document-summarizer/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
diff --git a/samples/python/tutorial-document-summarizer/src/app.py b/samples/python/tutorial-document-summarizer/src/app.py
deleted file mode 100644
index 055bb9924..000000000
--- a/samples/python/tutorial-document-summarizer/src/app.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# <complete_code>
-# <imports>
-import sys
-from pathlib import Path
-from foundry_local_sdk import Configuration, FoundryLocalManager
-# </imports>
-
-
-def summarize_file(client, file_path, system_prompt):
-    """Summarize a single file and print the result."""
-    content = Path(file_path).read_text(encoding="utf-8")
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": content}
-    ]
-    response = client.complete_chat(messages)
-    print(response.choices[0].message.content)
-
-
-def summarize_directory(client, directory, system_prompt):
-    """Summarize all .txt files in a directory."""
-    txt_files = sorted(Path(directory).glob("*.txt"))
-
-    if not txt_files:
-        print(f"No .txt files found in {directory}")
-        return
-
-    for txt_file in txt_files:
-        print(f"--- {txt_file.name} ---")
-        summarize_file(client, txt_file, system_prompt)
-        print()
-
-
-def main():
-    # <init>
-    # Initialize the Foundry Local SDK
-    config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
-    manager = FoundryLocalManager.instance
-
-    # Download and register all execution providers.
-    current_ep = ""
-    def ep_progress(ep_name: str, percent: float):
-        nonlocal current_ep
-        if ep_name != current_ep:
-            if current_ep:
-                print()
-            current_ep = ep_name
-        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-    manager.download_and_register_eps(progress_callback=ep_progress)
-    if current_ep:
-        print()
-
-    # Select and load a model from the catalog
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(lambda p: print(f"\rDownloading model: {p:.2f}%", end="", flush=True))
-    print()
-    model.load()
-    print("Model loaded and ready.\n")
-
-    # Get a chat client
-    client = model.get_chat_client()
-    # </init>
-
-    # <summarization>
-    system_prompt = (
-        "Summarize the following document into concise bullet points. "
-        "Focus on the key points and main ideas."
-    )
-
-    # <file_reading>
-    target = sys.argv[1] if len(sys.argv) > 1 else "document.txt"
-    target_path = Path(target)
-    # </file_reading>
-
-    if target_path.is_dir():
-        summarize_directory(client, target_path, system_prompt)
-    else:
-        print(f"--- {target_path.name} ---")
-        summarize_file(client, target_path, system_prompt)
-    # </summarization>
-
-    # Clean up
-    model.unload()
-    print("\nModel unloaded. Done!")
-
-
-if __name__ == "__main__":
-    main()
-# </complete_code>
diff --git a/samples/python/tutorial-tool-calling/requirements.txt b/samples/python/tutorial-tool-calling/requirements.txt
deleted file mode 100644
index 7602a48b7..000000000
--- a/samples/python/tutorial-tool-calling/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
diff --git a/samples/python/tutorial-tool-calling/src/app.py b/samples/python/tutorial-tool-calling/src/app.py
deleted file mode 100644
index bb22bfe0b..000000000
--- a/samples/python/tutorial-tool-calling/src/app.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# <complete_code>
-# <imports>
-import json
-from foundry_local_sdk import Configuration, FoundryLocalManager
-# </imports>
-
-
-# <tool_definitions>
-# --- Tool definitions ---
-tools = [
-    {
-        "type": "function",
-        "function": {
-            "name": "get_weather",
-            "description": "Get the current weather for a location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city or location"
-                    },
-                    "unit": {
-                        "type": "string",
-                        "enum": ["celsius", "fahrenheit"],
-                        "description": "Temperature unit"
-                    }
-                },
-                "required": ["location"]
-            }
-        }
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "calculate",
-            "description": "Perform a math calculation",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "expression": {
-                        "type": "string",
-                        "description": (
-                            "The math expression to evaluate"
-                        )
-                    }
-                },
-                "required": ["expression"]
-            }
-        }
-    }
-]
-
-
-# --- Tool implementations ---
-def get_weather(location, unit="celsius"):
-    """Simulate a weather lookup."""
-    return {
-        "location": location,
-        "temperature": 22 if unit == "celsius" else 72,
-        "unit": unit,
-        "condition": "Sunny"
-    }
-
-
-def calculate(expression):
-    """Evaluate a math expression safely."""
-    allowed = set("0123456789+-*/(). ")
-    if not all(c in allowed for c in expression):
-        return {"error": "Invalid expression"}
-    try:
-        result = eval(expression)
-        return {"expression": expression, "result": result}
-    except Exception as e:
-        return {"error": str(e)}
-
-
-tool_functions = {
-    "get_weather": get_weather,
-    "calculate": calculate
-}
-# </tool_definitions>
-
-
-# <tool_loop>
-def process_tool_calls(messages, response, client):
-    """Handle tool calls in a loop until the model produces a final answer."""
-    choice = response.choices[0].message
-
-    while choice.tool_calls:
-        # Convert the assistant message to a dict for the SDK
-        assistant_msg = {
-            "role": "assistant",
-            "content": choice.content,
-            "tool_calls": [
-                {
-                    "id": tc.id,
-                    "type": tc.type,
-                    "function": {
-                        "name": tc.function.name,
-                        "arguments": tc.function.arguments,
-                    },
-                }
-                for tc in choice.tool_calls
-            ],
-        }
-        messages.append(assistant_msg)
-
-        for tool_call in choice.tool_calls:
-            function_name = tool_call.function.name
-            arguments = json.loads(tool_call.function.arguments)
-            print(f"  Tool call: {function_name}({arguments})")
-
-            # Execute the function and add the result
-            func = tool_functions[function_name]
-            result = func(**arguments)
-            messages.append({
-                "role": "tool",
-                "tool_call_id": tool_call.id,
-                "content": json.dumps(result)
-            })
-
-        # Send the updated conversation back
-        response = client.complete_chat(messages, tools=tools)
-        choice = response.choices[0].message
-
-    return choice.content
-# </tool_loop>
-
-
-# <init>
-def main():
-    # Initialize the Foundry Local SDK
-    config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
-    manager = FoundryLocalManager.instance
-
-    # Download and register all execution providers.
-    current_ep = ""
-    def ep_progress(ep_name: str, percent: float):
-        nonlocal current_ep
-        if ep_name != current_ep:
-            if current_ep:
-                print()
-            current_ep = ep_name
-        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-    manager.download_and_register_eps(progress_callback=ep_progress)
-    if current_ep:
-        print()
-
-    # Select and load a model
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(
-        lambda progress: print(
-            f"\rDownloading model: {progress:.2f}%",
-            end="",
-            flush=True
-        )
-    )
-    print()
-    model.load()
-    print("Model loaded and ready.")
-
-    # Get a chat client
-    client = model.get_chat_client()
-
-    # Conversation with a system prompt
-    messages = [
-        {
-            "role": "system",
-            "content": "You are a helpful assistant with access to tools. "
-                       "Use them when needed to answer questions accurately."
-        }
-    ]
-
-    print("\nTool-calling assistant ready! Type 'quit' to exit.\n")
-
-    while True:
-        user_input = input("You: ")
-        if user_input.strip().lower() in ("quit", "exit"):
-            break
-
-        messages.append({"role": "user", "content": user_input})
-
-        response = client.complete_chat(messages, tools=tools)
-        answer = process_tool_calls(messages, response, client)
-
-        messages.append({"role": "assistant", "content": answer})
-        print(f"Assistant: {answer}\n")
-
-    # Clean up
-    model.unload()
-    print("Model unloaded. Goodbye!")
-# </init>
-
-
-if __name__ == "__main__":
-    main()
-# </complete_code>
diff --git a/samples/python/tutorial-voice-to-text/requirements.txt b/samples/python/tutorial-voice-to-text/requirements.txt
deleted file mode 100644
index 7602a48b7..000000000
--- a/samples/python/tutorial-voice-to-text/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
diff --git a/samples/python/tutorial-voice-to-text/src/app.py b/samples/python/tutorial-voice-to-text/src/app.py
deleted file mode 100644
index 8ebbba1bc..000000000
--- a/samples/python/tutorial-voice-to-text/src/app.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# <complete_code>
-# <imports>
-from foundry_local_sdk import Configuration, FoundryLocalManager
-# </imports>
-
-
-def main():
-    # <init>
-    # Initialize the Foundry Local SDK
-    config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
-    manager = FoundryLocalManager.instance
-    # </init>
-
-    # Download and register all execution providers.
-    current_ep = ""
-    def ep_progress(ep_name: str, percent: float):
-        nonlocal current_ep
-        if ep_name != current_ep:
-            if current_ep:
-                print()
-            current_ep = ep_name
-        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-    manager.download_and_register_eps(progress_callback=ep_progress)
-    if current_ep:
-        print()
-
-    # <transcription>
-    # Load the speech-to-text model
-    speech_model = manager.catalog.get_model("whisper-tiny")
-    speech_model.download(
-        lambda progress: print(
-            f"\rDownloading speech model: {progress:.2f}%",
-            end="",
-            flush=True,
-        )
-    )
-    print()
-    speech_model.load()
-    print("Speech model loaded.")
-
-    # Transcribe the audio file
-    audio_client = speech_model.get_audio_client()
-    transcription = audio_client.transcribe("meeting-notes.wav")
-    print(f"\nTranscription:\n{transcription.text}")
-
-    # Unload the speech model to free memory
-    speech_model.unload()
-    # </transcription>
-
-    # <summarization>
-    # Load the chat model for summarization
-    chat_model = manager.catalog.get_model("qwen2.5-0.5b")
-    chat_model.download(
-        lambda progress: print(
-            f"\rDownloading chat model: {progress:.2f}%",
-            end="",
-            flush=True,
-        )
-    )
-    print()
-    chat_model.load()
-    print("Chat model loaded.")
-
-    # Summarize the transcription into organized notes
-    client = chat_model.get_chat_client()
-    messages = [
-        {
-            "role": "system",
-            "content": "You are a note-taking assistant. "
-                       "Summarize the following transcription "
-                       "into organized, concise notes with "
-                       "bullet points.",
-        },
-        {"role": "user", "content": transcription.text},
-    ]
-
-    response = client.complete_chat(messages)
-    summary = response.choices[0].message.content
-    print(f"\nSummary:\n{summary}")
-
-    # Clean up
-    chat_model.unload()
-    print("\nDone. Models unloaded.")
-    # </summarization>
-
-
-if __name__ == "__main__":
-    main()
-# </complete_code>
diff --git a/samples/python/verify-winml/README.md b/samples/python/verify-winml/README.md
deleted file mode 100644
index eabfd7201..000000000
--- a/samples/python/verify-winml/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# Verify WinML 2.0 Execution Providers
-
-This sample verifies that WinML 2.0 execution providers are correctly discovered,
-downloaded, and registered. It then runs inference on a model variant backed by a
-registered WinML EP. It finishes with one native streaming chat check.
-
-## Prerequisites
-
-- Windows with a compatible GPU
-- Python 3.11+
-
-## Setup
-
-Use a fresh virtual environment for the cleanest setup.
-
-If you want to reuse your existing Python environment instead, delete that
-environment's `Lib\site-packages\foundry_local_core` directory before
-reinstalling so stale native files are not left behind.
-
-`requirements.txt` installs the WinML SDK variant, which brings the matching
-WinML native package transitively. Either install path is enough:
-
-```bash
-python -m venv .venv
-.venv\Scripts\Activate.ps1
-pip install --upgrade -r requirements.txt
-```
-
-Or, after removing `Lib\site-packages\foundry_local_core` from your existing
-Python environment:
-
-```bash
-pip install --upgrade -r requirements.txt
-```
-
-## Run
-
-```bash
-python src/app.py
-```
-
-## What it tests
-
-1. **EP Discovery** — Lists all available execution providers
-2. **EP Download & Registration** — Downloads only the WinML EPs relevant to the machine
-3. **Model Catalog** — Lists model variants backed by the registered WinML EPs
-4. **Streaming Chat** — Runs streaming chat completion on a WinML EP-backed model via native SDK
diff --git a/samples/python/verify-winml/requirements.txt b/samples/python/verify-winml/requirements.txt
deleted file mode 100644
index 481d9dc46..000000000
--- a/samples/python/verify-winml/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-foundry-local-sdk-winml
diff --git a/samples/python/verify-winml/src/app.py b/samples/python/verify-winml/src/app.py
deleted file mode 100644
index a03417b1b..000000000
--- a/samples/python/verify-winml/src/app.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""
-Foundry Local SDK - WinML 2.0 EP Verification Script
-
-Verifies:
-  1. Execution providers are discovered and registered
-  2. Accelerated models appear in catalog after EP registration
-  3. Streaming chat completions work on an accelerated model
-"""
-
-import sys
-import time
-from foundry_local_sdk import Configuration, FoundryLocalManager
-
-
-PASS = "\033[92m[PASS]\033[0m"
-FAIL = "\033[91m[FAIL]\033[0m"
-INFO = "\033[94m[INFO]\033[0m"
-WARN = "\033[93m[WARN]\033[0m"
-
-results = []
-
-
-def log_result(test_name: str, passed: bool, detail: str = ""):
-    status = PASS if passed else FAIL
-    msg = f"{status} {test_name}"
-    if detail:
-        msg += f" - {detail}"
-    print(msg)
-    results.append((test_name, passed))
-
-
-def print_separator(title: str):
-    print(f"\n{'=' * 60}")
-    print(f"  {title}")
-    print(f"{'=' * 60}\n")
-
-
-def is_accelerated_variant(variant) -> bool:
-    rt = variant.info.runtime
-    return rt is not None and rt.device_type in ("GPU", "NPU")
-
-
-def main():
-    # ── 0. Initialize FoundryLocalManager ──────────────────────
-    print_separator("Initialization")
-    config = Configuration(app_name="verify_winml")
-    FoundryLocalManager.initialize(config)
-    manager = FoundryLocalManager.instance
-    print(f"{INFO} FoundryLocalManager initialized.")
-
-    # ── 1. Discover & Register EPs ────────────────────────────
-    print_separator("Step 1: Discover & Register Execution Providers")
-    eps = []
-    try:
-        eps = manager.discover_eps()
-        print(f"{INFO} Discovered {len(eps)} execution providers:")
-        for ep in eps:
-            print(f"  - {ep.name:40s}  Registered: {ep.is_registered}")
-        log_result("EP Discovery", True, f"{len(eps)} EP(s) found")
-    except Exception as e:
-        log_result("EP Discovery", False, str(e))
-
-    if not eps:
-        detail = "No execution providers discovered on this machine"
-        log_result("EP Download & Registration", False, detail)
-        print(f"\n{FAIL} {detail}.")
-        _print_summary()
-        return
-
-    try:
-        progress_state = {"ep": None, "percent": -1.0}
-
-        def ep_progress(ep_name: str, percent: float):
-            if progress_state["ep"] is not None and (
-                progress_state["ep"] != ep_name or percent < progress_state["percent"]
-            ):
-                print()
-            progress_state["ep"] = ep_name
-            progress_state["percent"] = percent
-            print(f"\r  Downloading {ep_name}: {percent:.1f}%", end="", flush=True)
-
-        result = manager.download_and_register_eps(progress_callback=ep_progress)
-        if progress_state["ep"] is not None:
-            print()
-
-        print(f"{INFO} EP registration result: success={result.success}, status={result.status}")
-        if result.registered_eps:
-            print(f"  Registered: {', '.join(result.registered_eps)}")
-        if result.failed_eps:
-            print(f"  Failed:     {', '.join(result.failed_eps)}")
-        download_ok = result.success
-        detail = (
-            f"{len(result.registered_eps)} EP(s) registered"
-            if download_ok and result.registered_eps
-            else result.status
-        )
-        log_result("EP Download & Registration", download_ok, detail)
-        if not download_ok:
-            _print_summary()
-            return
-    except Exception as e:
-        print()
-        log_result("EP Download & Registration", False, str(e))
-        _print_summary()
-        return
-
-    # ── 2. List Models & Find Accelerated Variants ─────────────
-    print_separator("Step 2: Model Catalog - Accelerated Models")
-    catalog = manager.catalog
-    models = catalog.list_models()
-    print(f"{INFO} Total models in catalog: {len(models)}")
-
-    accelerated_variants = []
-
-    for model in models:
-        for variant in model.variants:
-            if is_accelerated_variant(variant):
-                accelerated_variants.append(variant)
-
-    print(f"{INFO} Accelerated model variants: {len(accelerated_variants)}")
-    for v in accelerated_variants:
-        rt = v.info.runtime
-        ep = rt.execution_provider if rt else "?"
-        device = rt.device_type if rt else "?"
-        print(f"  - {v.id:50s}  Device: {device:3s}  EP: {ep}")
-
-    log_result("Catalog - Accelerated models found", len(accelerated_variants) > 0,
-               f"{len(accelerated_variants)} accelerated variant(s)")
-
-    if not accelerated_variants:
-        print(f"\n{FAIL} No accelerated model variants are available.")
-        print(f"{WARN} Ensure the system has a compatible accelerator and matching model variants installed.")
-        _print_summary()
-        return
-
-    # ── 3. Download & Load Model ──────────────────────────────
-    print_separator("Step 3: Download & Load Model")
-
-    chosen = None
-    downloaded_any = False
-    last_load_error = None
-    for candidate in accelerated_variants:
-        chosen_ep = candidate.info.runtime.execution_provider if candidate.info.runtime else "unknown"
-        print(f"\n{INFO} Trying model: {candidate.id} (EP: {chosen_ep})")
-
-        try:
-            def dl_progress(percent):
-                print(f"\r  Downloading model: {percent:.1f}%", end="", flush=True)
-
-            candidate.download(progress_callback=dl_progress)
-            print()
-            downloaded_any = True
-        except Exception as e:
-            print()
-            print(f"{WARN} Skipping {candidate.id}: download failed: {e}")
-            last_load_error = e
-            continue
-
-        try:
-            candidate.load()
-            chosen = candidate
-            break
-        except Exception as e:
-            print(f"{WARN} Skipping {candidate.id}: load failed: {e}")
-            last_load_error = e
-
-    log_result("Model Download", downloaded_any,
-               "At least one accelerated variant downloaded" if downloaded_any
-               else str(last_load_error) if last_load_error else "No accelerated variant could be downloaded")
-
-    if chosen is None:
-        log_result("Model Load", False,
-                   str(last_load_error) if last_load_error else "No accelerated variant could be loaded on this machine")
-        _print_summary()
-        return
-
-    log_result("Model Load", True, f"Loaded {chosen.id}")
-
-    # ── 4. Streaming Chat Completions (Native SDK) ────────────
-    print_separator("Step 4: Streaming Chat Completions (Native)")
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "What is 2 + 2? Reply with just the number."},
-    ]
-
-    try:
-        client = chosen.get_chat_client()
-        client.settings.temperature = 0
-        client.settings.max_tokens = 16
-        response_text = ""
-        start = time.time()
-        for chunk in client.complete_streaming_chat(messages):
-            choices = getattr(chunk, "choices", None)
-            content = choices[0].delta.content if choices and len(choices) > 0 else None
-            if content:
-                response_text += content
-                print(content, end="", flush=True)
-        elapsed = time.time() - start
-        print()
-        log_result("Streaming Chat (Native)", len(response_text) > 0,
-                   f"{len(response_text)} chars in {elapsed:.2f}s")
-    except Exception as e:
-        log_result("Streaming Chat (Native)", False, str(e))
-
-    try:
-        chosen.unload()
-        print(f"{INFO} Model unloaded.")
-    except Exception as e:
-        print(f"{WARN} Failed to unload model: {e}")
-
-    _print_summary()
-
-
-def _print_summary():
-    print_separator("Summary")
-    passed = sum(1 for _, p in results if p)
-    total = len(results)
-    for name, p in results:
-        print(f"  {'PASS' if p else 'FAIL'} {name}")
-    print(f"\n  {passed}/{total} tests passed")
-    if passed < total:
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/samples/python/web-server-responses-vision/requirements.txt b/samples/python/web-server-responses-vision/requirements.txt
deleted file mode 100644
index a92641753..000000000
--- a/samples/python/web-server-responses-vision/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-foundry-local-sdk
-pillow
-openai
diff --git a/samples/python/web-server-responses/README.md b/samples/python/web-server-responses/README.md
deleted file mode 100644
index 95666d910..000000000
--- a/samples/python/web-server-responses/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Foundry Local Python Responses Web-Service Sample
-
-This sample starts the Foundry Local OpenAI-compatible web service, then calls the Responses API with the official OpenAI Python client.
-
-It demonstrates:
-
-- A non-streaming `/v1/responses` call
-- A streaming `/v1/responses` call
-- A function/tool-calling round trip using `previous_response_id`
-
-## What gets installed
-
-Install the sample dependencies from `requirements.txt`:
-
-```bash
-pip install -r requirements.txt
-```
-
-That installs:
-
-- `foundry-local-sdk` on non-Windows platforms
-- `foundry-local-sdk-winml` on Windows
-- `openai`
-
-The sample downloads/registers Foundry Local execution providers and downloads the `qwen2.5-0.5b` model the first time it runs.
-
-## Run the sample
-
-From this directory:
-
-```bash
-python -m venv .venv
-.\.venv\Scripts\activate
-pip install -r requirements.txt
-python src\app.py
-```
-
-On macOS or Linux, activate the virtual environment with:
-
-```bash
-source .venv/bin/activate
-```
-
-The sample starts the local web service, sends Responses API requests to `http://localhost:<port>/v1`, prints the model output, and then unloads the model and stops the web service.
diff --git a/samples/python/web-server-responses/requirements.txt b/samples/python/web-server-responses/requirements.txt
deleted file mode 100644
index db870f608..000000000
--- a/samples/python/web-server-responses/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
-openai
diff --git a/samples/python/web-server-responses/src/app.py b/samples/python/web-server-responses/src/app.py
deleted file mode 100644
index 6f186a2a6..000000000
--- a/samples/python/web-server-responses/src/app.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# <complete_code>
-# <imports>
-import json
-from typing import Any
-
-from openai import OpenAI
-
-from foundry_local_sdk import Configuration, FoundryLocalManager
-# </imports>
-
-
-def get_response_text(response: Any) -> str:
-    if isinstance(getattr(response, "output_text", None), str):
-        return response.output_text
-    return "".join(
-        getattr(part, "text", "")
-        for item in getattr(response, "output", []) or []
-        for part in getattr(item, "content", []) or []
-        if getattr(part, "type", None) == "output_text"
-    )
-
-
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-
-# Download and register all execution providers.
-current_ep = ""
-
-
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
-    print()
-# </init>
-
-# <model_setup>
-model_alias = "qwen2.5-0.5b"
-model = manager.catalog.get_model(model_alias)
-
-print(f"\nDownloading model {model_alias}...")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
-    )
-)
-print("\nModel downloaded")
-
-print("\nLoading model...")
-model.load()
-print("Model loaded")
-# </model_setup>
-
-# <server_setup>
-print("\nStarting web service...")
-manager.start_web_service()
-base_url = manager.urls[0].rstrip("/") + "/v1"
-print("Web service started")
-
-# <<<<<< OPENAI SDK USAGE >>>>>>
-# Use the OpenAI SDK to call the local Foundry web service Responses API
-openai = OpenAI(
-    base_url=base_url,
-    api_key="notneeded",
-)
-# </server_setup>
-
-try:
-    print("\nTesting a non-streaming Responses call...")
-    response = openai.responses.create(
-        model=model.id,
-        input="Reply with one short sentence about local AI.",
-    )
-    print(f"[ASSISTANT]: {get_response_text(response)}")
-
-    print("\nTesting a streaming Responses call...")
-    stream = openai.responses.create(
-        model=model.id,
-        input="Count from one to three.",
-        stream=True,
-    )
-
-    print("[ASSISTANT STREAM]: ", end="", flush=True)
-    for event in stream:
-        if getattr(event, "type", None) == "response.output_text.delta":
-            print(getattr(event, "delta", ""), end="", flush=True)
-    print()
-
-    print("\nTesting Responses tool calling...")
-    tools = [
-        {
-            "type": "function",
-            "name": "get_weather",
-            "description": "Get the current weather. This sample always returns Seattle weather.",
-            "parameters": {
-                "type": "object",
-                "properties": {},
-                "additionalProperties": False,
-            },
-        },
-    ]
-
-    tool_response = openai.responses.create(
-        model=model.id,
-        input="Use the get_weather tool and then answer with the weather.",
-        tools=tools,
-        tool_choice="required",
-        store=True,
-    )
-
-    function_call = next(
-        (item for item in getattr(tool_response, "output", []) or [] if getattr(item, "type", None) == "function_call"),
-        None,
-    )
-    if function_call is None:
-        raise RuntimeError("Expected the model to call get_weather.")
-
-    print(f"[TOOL CALL]: {function_call.name}({function_call.arguments})")
-
-    final_response = openai.responses.create(
-        model=model.id,
-        previous_response_id=tool_response.id,
-        input=[
-            {
-                "type": "function_call_output",
-                "call_id": function_call.call_id,
-                "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
-            }
-        ],
-        tools=tools,
-    )
-
-    print(f"[ASSISTANT FINAL]: {get_response_text(final_response)}")
-    # <<<<<< END OPENAI SDK USAGE >>>>>>
-finally:
-    # Tidy up
-    openai.close()
-    manager.stop_web_service()
-    model.unload()
-# </complete_code>
diff --git a/samples/python/web-server/requirements.txt b/samples/python/web-server/requirements.txt
deleted file mode 100644
index db870f608..000000000
--- a/samples/python/web-server/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-foundry-local-sdk; sys_platform != "win32"
-foundry-local-sdk-winml; sys_platform == "win32"
-openai
diff --git a/samples/python/web-server/src/app.py b/samples/python/web-server/src/app.py
deleted file mode 100644
index 67117029a..000000000
--- a/samples/python/web-server/src/app.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# <complete_code>
-# <imports>
-import openai
-from foundry_local_sdk import Configuration, FoundryLocalManager
-# </imports>
-
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
-    print()
-
-# Load a model
-model = manager.catalog.get_model("qwen2.5-0.5b")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
-    )
-)
-print()
-model.load()
-print("Model loaded.")
-
-# Start the web service to expose an OpenAI-compatible REST endpoint
-manager.start_web_service()
-base_url = f"{manager.urls[0]}/v1"
-# </init>
-
-# <rest_client>
-# Use the OpenAI SDK to connect to the local REST endpoint
-client = openai.OpenAI(
-    base_url=base_url,
-    api_key="none",
-)
-# </rest_client>
-
-# <chat_completion>
-# Make a chat completion request via the REST API
-response = client.chat.completions.create(
-    model=model.id,
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "What is the golden ratio?"}
-    ],
-    stream=True,
-)
-
-for chunk in response:
-    if chunk.choices[0].delta.content is not None:
-        print(chunk.choices[0].delta.content, end="", flush=True)
-print()
-# </chat_completion>
-
-# Clean up
-model.unload()
-manager.stop_web_service()
-# </complete_code>
diff --git a/samples/rust/Cargo.toml b/samples/rust/Cargo.toml
index 7d528e0a1..cf9a0aec4 100644
--- a/samples/rust/Cargo.toml
+++ b/samples/rust/Cargo.toml
@@ -1,16 +1,8 @@
 [workspace]
 members = [
-    "foundry-local-webserver",
-    "foundry-local-webserver-responses-vision",
-    "tool-calling-foundry-local",
-    "native-chat-completions",
-    "audio-transcription-example",
-    "live-audio-transcription",
     "embeddings",
-    "tutorial-chat-assistant",
-    "tutorial-document-summarizer",
-    "tutorial-tool-calling",
-    "tutorial-voice-to-text",
-    "verify-winml",
+    "chat-completion",
+    "audio",
+    "responses-api",
 ]
 resolver = "2"
diff --git a/samples/rust/README.md b/samples/rust/README.md
index f260df7d9..c00cfd2bd 100644
--- a/samples/rust/README.md
+++ b/samples/rust/README.md
@@ -1,6 +1,9 @@
 # 🚀 Foundry Local Rust Samples
 
 These samples demonstrate how to use the Rust binding for Foundry Local.
+Each sample consumes the SDK from this repository with
+`foundry-local-sdk = { path = "../../../sdk/rust" }`, so the samples track
+`main` instead of pinning to a published crate version.
 
 ## Prerequisites
 
@@ -10,17 +13,10 @@ These samples demonstrate how to use the Rust binding for Foundry Local.
 
 | Sample | Description |
 |--------|-------------|
-| [native-chat-completions](native-chat-completions/) | Non-streaming and streaming chat completions using the native chat client. |
 | [embeddings](embeddings/) | Generate single and batch text embeddings using the native embedding client. |
-| [audio-transcription-example](audio-transcription-example/) | Audio transcription (non-streaming and streaming) using the Whisper model. |
-| [foundry-local-webserver](foundry-local-webserver/) | Start a local OpenAI-compatible web server and call it with a standard HTTP client. |
-| [foundry-local-webserver-responses-vision](foundry-local-webserver-responses-vision/) | Stream a vision (image understanding) response from the local web server using the Responses API. |
-| [tool-calling-foundry-local](tool-calling-foundry-local/) | Tool calling with streaming responses, multi-turn conversation, and local tool execution. |
-| [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). |
-| [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). |
-| [tutorial-tool-calling](tutorial-tool-calling/) | Create a tool-calling assistant (tutorial). |
-| [tutorial-voice-to-text](tutorial-voice-to-text/) | Transcribe and summarize audio (tutorial). |
-| [live-audio-transcription-example](live-audio-transcription-example/) | Real-time microphone transcription using the `nemotron` model. *(Requires SDK live-transcription API — not yet available.)* |
+| [chat-completion](chat-completion/) | Chat completions with native in-process inference and the local OpenAI-compatible web server. |
+| [audio](audio/) | Live microphone streaming with Nemotron ASR plus file-based Whisper transcription. |
+| [responses-api](responses-api/) | Stream a vision (image understanding) response from the local web server using the Responses API. |
 
 ## Running a Sample
 
@@ -34,15 +30,12 @@ These samples demonstrate how to use the Rust binding for Foundry Local.
 2. Run a sample:
 
    ```bash
-   cargo run -p native-chat-completions
+   cargo run -p chat-completion
    ```
 
    Or navigate to a sample directory and run directly:
 
    ```bash
-   cd native-chat-completions
+   cd chat-completion
    cargo run
    ```
-
-> [!TIP]
-> Each sample's `Cargo.toml` uses `[target.'cfg(windows)'.dependencies]` to automatically enable the `winml` feature on Windows for broader hardware acceleration. On macOS and Linux, the standard SDK is used. No manual configuration needed.
diff --git a/samples/rust/audio-transcription-example/Cargo.toml b/samples/rust/audio-transcription-example/Cargo.toml
deleted file mode 100644
index 1305170fa..000000000
--- a/samples/rust/audio-transcription-example/Cargo.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[package]
-name = "audio-transcription-example"
-version = "0.1.0"
-edition = "2021"
-description = "Audio transcription example using the Foundry Local Rust SDK"
-
-[dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust" }
-tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
-tokio-stream = "0.1"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/audio-transcription-example/src/main.rs b/samples/rust/audio-transcription-example/src/main.rs
deleted file mode 100644
index 70150546f..000000000
--- a/samples/rust/audio-transcription-example/src/main.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-// <complete_code>
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <imports>
-use std::env;
-use std::io::{self, Write};
-
-use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
-use tokio_stream::StreamExt;
-// </imports>
-
-const ALIAS: &str = "whisper-tiny";
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    println!("Audio Transcription Example");
-    println!("===========================\n");
-
-    // Accept an optional audio file path as a CLI argument, defaulting to Recording.mp3.
-    let audio_path = env::args()
-        .nth(1)
-        .unwrap_or_else(|| "Recording.mp3".to_string());
-
-    // ── 1. Initialise the manager ────────────────────────────────────────
-    // <init>
-    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
-    // </init>
-
-    // Download and register all execution providers.
-    manager
-        .download_and_register_eps_with_progress(None, {
-            let mut current_ep = String::new();
-            move |ep_name: &str, percent: f64| {
-                if ep_name != current_ep {
-                    if !current_ep.is_empty() {
-                        println!();
-                    }
-                    current_ep = ep_name.to_string();
-                }
-                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
-                io::stdout().flush().ok();
-            }
-        })
-        .await?;
-    println!();
-
-    // ── 2. Pick the whispermodel and ensure it is downloaded ────────────
-    // <model_setup>
-    let model = manager.catalog().get_model(ALIAS).await?;
-    println!("Model: {} (id: {})", model.alias(), model.id());
-
-    if !model.is_cached().await? {
-        println!("Downloading model...");
-        model
-            .download(Some(|progress: f64| {
-                print!("\r  {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    println!("Loading model...");
-    model.load().await?;
-    println!("✓ Model loaded\n");
-    // </model_setup>
-
-    // <transcription>
-    // ── 3. Create an audio client────────────────────────────────────────
-    let audio_client = model.create_audio_client();
-
-    // ── 4. Non-streaming transcription ───────────────────────────────────
-    println!("--- Non-streaming transcription ---");
-    let result = audio_client.transcribe(&audio_path).await?;
-    println!("Transcription: {}", result.text);
-
-    // ── 5. Streaming transcription ───────────────────────────────────────
-    println!("--- Streaming transcription ---");
-    print!("Transcription: ");
-    let mut stream = audio_client.transcribe_streaming(&audio_path).await?;
-    while let Some(chunk) = stream.next().await {
-        let chunk = chunk?;
-        print!("{}", chunk.text);
-        io::stdout().flush().ok();
-    }
-    println!("\n");
-    // </transcription>
-
-    // ── 6. Unload the model──────────────────────────────────────────────
-    // <cleanup>
-    println!("Unloading model...");
-    model.unload().await?;
-    println!("Done.");
-    // </cleanup>
-
-    Ok(())
-}
-// </complete_code>
diff --git a/samples/rust/live-audio-transcription/Cargo.toml b/samples/rust/audio/Cargo.toml
similarity index 54%
rename from samples/rust/live-audio-transcription/Cargo.toml
rename to samples/rust/audio/Cargo.toml
index ca7324567..6a787dc21 100644
--- a/samples/rust/live-audio-transcription/Cargo.toml
+++ b/samples/rust/audio/Cargo.toml
@@ -1,8 +1,8 @@
 [package]
-name = "live-audio-transcription-example"
+name = "audio"
 version = "0.1.0"
 edition = "2021"
-description = "Live audio transcription (streaming) example using the Foundry Local Rust SDK"
+description = "Live microphone and file audio transcription using the Foundry Local Rust SDK"
 
 [dependencies]
 foundry-local-sdk = { path = "../../../sdk/rust" }
@@ -10,6 +10,3 @@ tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
 tokio-stream = "0.1"
 cpal = "0.15"
 ctrlc = "3"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/audio/README.md b/samples/rust/audio/README.md
new file mode 100644
index 000000000..4d2390528
--- /dev/null
+++ b/samples/rust/audio/README.md
@@ -0,0 +1,28 @@
+# Live Audio Transcription (Rust)
+
+Demonstrates both audio transcription paths in one sample:
+
+- live microphone streaming with the Nemotron ASR model (default)
+- file-based Whisper transcription with `--file`
+
+The sample tracks the Rust SDK from this repository via
+`foundry-local-sdk = { path = "../../../sdk/rust" }`, so it follows `main`
+instead of pinning to a published crate version.
+
+## Run
+
+```bash
+cd samples/rust
+
+# Live microphone (press Ctrl+C to stop)
+cargo run -p audio
+
+# Synthetic 440Hz sine wave (no microphone needed)
+cargo run -p audio -- --synth
+
+# Transcribe the bundled Recording.mp3 with Whisper
+cargo run -p audio -- --file
+
+# Transcribe a custom audio file with Whisper
+cargo run -p audio -- --file path/to/audio.mp3
+```
diff --git a/samples/rust/audio-transcription-example/Recording.mp3 b/samples/rust/audio/Recording.mp3
similarity index 100%
rename from samples/rust/audio-transcription-example/Recording.mp3
rename to samples/rust/audio/Recording.mp3
diff --git a/samples/rust/live-audio-transcription/src/main.rs b/samples/rust/audio/src/main.rs
similarity index 59%
rename from samples/rust/live-audio-transcription/src/main.rs
rename to samples/rust/audio/src/main.rs
index 57ef15ed1..fc67bbbf8 100644
--- a/samples/rust/live-audio-transcription/src/main.rs
+++ b/samples/rust/audio/src/main.rs
@@ -1,13 +1,15 @@
-// Live Audio Transcription — Foundry Local Rust SDK Example
-//
-// Tries CPAL mic capture first; falls back to synthetic PCM if unavailable.
+// Live and file audio transcription — Foundry Local Rust SDK Example
 //
 // Usage:
 //   cargo run                  # Live microphone (press Ctrl+C to stop)
-//   cargo run -- --synth       # Synthetic 440Hz sine wave
+//   cargo run -- --synth       # Synthetic 440Hz sine wave for live transcription
+//   cargo run -- --file        # Transcribe bundled Recording.mp3 with Whisper
+//   cargo run -- --file <path> # Transcribe a custom audio file with Whisper
 
 use std::env;
+use std::error::Error;
 use std::io::{self, Write};
+use std::path::{Path, PathBuf};
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 
@@ -15,19 +17,108 @@ use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
 use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager, LiveAudioTranscriptionSession};
 use tokio_stream::StreamExt;
 
-// English-only:
-const ALIAS: &str = "nemotron-speech-streaming-en-0.6b";
+const LIVE_ALIAS: &str = "nemotron-speech-streaming-en-0.6b";
 // Multi-lingual (supports 30+ languages including auto-detect):
-// const ALIAS: &str = "nemotron-3.5-asr-streaming-0.6b";
+// const LIVE_ALIAS: &str = "nemotron-3.5-asr-streaming-0.6b";
+const FILE_ALIAS: &str = "whisper-tiny";
+const DEFAULT_AUDIO_FILE: &str = "Recording.mp3";
 
-// Global flag for Ctrl+C graceful shutdown (mirrors JS process.on('SIGINT'))
+// Global flag for Ctrl+C graceful shutdown (mirrors JS process.on('SIGINT')).
 static RUNNING: AtomicBool = AtomicBool::new(true);
 
+enum RunMode {
+    Help,
+    Live { use_synth: bool },
+    File { path: PathBuf },
+}
+
 #[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let use_synth = env::args().any(|a| a == "--synth");
+async fn main() -> Result<(), Box<dyn Error>> {
+    match parse_args() {
+        Ok(RunMode::Help) => {
+            print_usage();
+            Ok(())
+        }
+        Ok(RunMode::Live { use_synth }) => run_live_transcription(use_synth).await,
+        Ok(RunMode::File { path }) => run_file_transcription(&path).await,
+        Err(message) => {
+            eprintln!("{message}\n");
+            print_usage();
+            std::process::exit(2);
+        }
+    }
+}
+
+fn parse_args() -> Result<RunMode, String> {
+    let args: Vec<String> = env::args().skip(1).collect();
+    if args.iter().any(|arg| arg == "--help" || arg == "-h") {
+        return Ok(RunMode::Help);
+    }
+
+    let mut use_synth = false;
+    let mut file_path = None;
+    let mut index = 0;
 
-    // Install Ctrl+C handler (mirrors JS SIGINT / C++ SignalHandler)
+    while index < args.len() {
+        match args[index].as_str() {
+            "--synth" => {
+                use_synth = true;
+                index += 1;
+            }
+            "--file" => {
+                if file_path.is_some() {
+                    return Err("--file can only be specified once".to_string());
+                }
+
+                if let Some(next) = args.get(index + 1).filter(|arg| !arg.starts_with("--")) {
+                    file_path = Some(PathBuf::from(next));
+                    index += 2;
+                } else {
+                    file_path = Some(default_audio_path());
+                    index += 1;
+                }
+            }
+            other => {
+                if let Some(path) = other.strip_prefix("--file=") {
+                    if path.is_empty() {
+                        return Err("--file= requires a path".to_string());
+                    }
+                    if file_path.is_some() {
+                        return Err("--file can only be specified once".to_string());
+                    }
+                    file_path = Some(PathBuf::from(path));
+                    index += 1;
+                } else {
+                    return Err(format!("Unknown argument: {other}"));
+                }
+            }
+        }
+    }
+
+    if file_path.is_some() && use_synth {
+        return Err("--synth cannot be combined with --file".to_string());
+    }
+
+    if let Some(path) = file_path {
+        Ok(RunMode::File { path })
+    } else {
+        Ok(RunMode::Live { use_synth })
+    }
+}
+
+fn print_usage() {
+    eprintln!("Usage:");
+    eprintln!("  cargo run -p audio");
+    eprintln!("  cargo run -p audio -- --synth");
+    eprintln!("  cargo run -p audio -- --file [audio_path]");
+    eprintln!("  cargo run -p audio -- --file=audio_path");
+}
+
+fn default_audio_path() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(DEFAULT_AUDIO_FILE)
+}
+
+async fn run_live_transcription(use_synth: bool) -> Result<(), Box<dyn Error>> {
     let running = Arc::new(AtomicBool::new(true));
     let running_for_signal = running.clone();
     ctrlc::set_handler(move || {
@@ -41,7 +132,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!();
 
     let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
-    let model = manager.catalog().get_model(ALIAS).await?;
+    download_execution_providers(manager).await?;
+
+    let model = manager.catalog().get_model(LIVE_ALIAS).await?;
     println!("Model: {} (id: {})", model.alias(), model.id());
 
     if !model.is_cached().await? {
@@ -61,15 +154,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     let audio_client = model.create_audio_client();
     let mut session = audio_client.create_live_transcription_session();
-    session.settings.language = Some("en".into());    // English (default)
-    // session.settings.language = Some("de".into());    // German
-    // session.settings.language = Some("zh-CN".into()); // Chinese (Simplified)
-    // session.settings.language = Some("auto".into());  // Auto-detect language
+    session.settings.language = Some("en".into());
+    // session.settings.language = Some("de".into());
+    // session.settings.language = Some("zh-CN".into());
+    // session.settings.language = Some("auto".into());
     let session = Arc::new(session);
     session.start(None).await?;
     println!("✓ Session started\n");
 
-    // --- Background task reads transcription results (mirrors JS readPromise) ---
     let mut stream = session.get_stream().await?;
     let read_task = tokio::spawn(async move {
         while let Some(result) = stream.next().await {
@@ -94,9 +186,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         }
     });
 
-    // --- Microphone capture (mirrors JS naudiodon2 / C++ PortAudio / Python PyAudio) ---
-    // Try CPAL for mic input; fall back to synthetic PCM on failure.
-
     let mut mic_active = false;
 
     if !use_synth {
@@ -111,11 +200,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         }
     }
 
-    // Fallback: push synthetic PCM (440Hz sine wave) — mirrors JS catch block
     if !mic_active {
         println!("Pushing synthetic audio (440Hz sine, 2s)...");
         let pcm_data = generate_sine_wave_pcm(16000, 2, 440.0);
-        let chunk_size = 16000 / 10 * 2; // 100ms
+        let chunk_size = 16000 / 10 * 2;
         let chunk_interval = std::time::Duration::from_millis(100);
         for offset in (0..pcm_data.len()).step_by(chunk_size) {
             if !running.load(Ordering::SeqCst) {
@@ -127,11 +215,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         }
         println!("✓ Synthetic audio pushed");
 
-        // Wait for remaining transcription results
         tokio::time::sleep(std::time::Duration::from_secs(3)).await;
     }
 
-    // Graceful shutdown (mirrors JS SIGINT handler)
     println!("\n\nStopping...");
     session.stop(None).await?;
     read_task.await?;
@@ -140,12 +226,89 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     Ok(())
 }
 
+async fn run_file_transcription(path: &Path) -> Result<(), Box<dyn Error>> {
+    if !path.exists() {
+        return Err(format!("Audio file not found: {}", path.display()).into());
+    }
+
+    println!("===========================================================");
+    println!("   Foundry Local -- File Audio Transcription Demo (Rust)");
+    println!("===========================================================");
+    println!();
+    println!("Audio file: {}", path.display());
+
+    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
+    download_execution_providers(manager).await?;
+
+    let model = manager.catalog().get_model(FILE_ALIAS).await?;
+    println!("Model: {} (id: {})", model.alias(), model.id());
+
+    if !model.is_cached().await? {
+        println!("Downloading model...");
+        model
+            .download(Some(|progress: f64| {
+                print!("\r  {progress:.1}%");
+                io::stdout().flush().ok();
+            }))
+            .await?;
+        println!();
+    }
+
+    println!("Loading model...");
+    model.load().await?;
+    println!("✓ Model loaded\n");
+
+    let audio_client = model.create_audio_client();
+    let audio_path = path.to_string_lossy();
+
+    println!("--- Non-streaming transcription ---");
+    let result = audio_client.transcribe(audio_path.as_ref()).await?;
+    println!("Transcription: {}", result.text);
+
+    println!("\n--- Streaming transcription ---");
+    print!("Transcription: ");
+    let mut stream = audio_client
+        .transcribe_streaming(audio_path.as_ref())
+        .await?;
+    while let Some(chunk) = stream.next().await {
+        let chunk = chunk?;
+        print!("{}", chunk.text);
+        io::stdout().flush().ok();
+    }
+    println!("\n");
+
+    println!("Unloading model...");
+    model.unload().await?;
+    println!("Done.");
+    Ok(())
+}
+
+async fn download_execution_providers(manager: &FoundryLocalManager) -> Result<(), Box<dyn Error>> {
+    manager
+        .download_and_register_eps_with_progress(None, {
+            let mut current_ep = String::new();
+            move |ep_name: &str, percent: f64| {
+                if ep_name != current_ep {
+                    if !current_ep.is_empty() {
+                        println!();
+                    }
+                    current_ep = ep_name.to_string();
+                }
+                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
+                io::stdout().flush().ok();
+            }
+        })
+        .await?;
+    println!();
+    Ok(())
+}
+
 /// Try to open the default microphone with CPAL and forward PCM to the session.
 /// Blocks until Ctrl+C is pressed.
 async fn try_start_mic(
     session: &Arc<LiveAudioTranscriptionSession>,
     running: &Arc<AtomicBool>,
-) -> Result<(), Box<dyn std::error::Error>> {
+) -> Result<(), Box<dyn Error>> {
     let host = cpal::default_host();
     let device = host
         .default_input_device()
@@ -161,12 +324,9 @@ async fn try_start_mic(
         buffer_size: cpal::BufferSize::Default,
     };
 
-    // Bounded channel (cap=100) mirrors JS appendQueue / C++ AudioQueue
     let (audio_tx, mut audio_rx) = tokio::sync::mpsc::channel::<Vec<u8>>(100);
     let err_fn = |err| eprintln!("Microphone stream error: {err}");
 
-    // CPAL may deliver f32, i16, or u16 depending on the device/host. Convert
-    // each supported sample format to f32 in [-1.0, 1.0] before resampling.
     let input_stream = match sample_format {
         cpal::SampleFormat::F32 => {
             let tx = audio_tx.clone();
@@ -187,10 +347,8 @@ async fn try_start_mic(
             device.build_input_stream(
                 &mic_config,
                 move |data: &[i16], _: &cpal::InputCallbackInfo| {
-                    let samples: Vec<f32> = data
-                        .iter()
-                        .map(|&s| s as f32 / i16::MAX as f32)
-                        .collect();
+                    let samples: Vec<f32> =
+                        data.iter().map(|&s| s as f32 / i16::MAX as f32).collect();
                     let bytes = convert_audio(&samples, device_channels, device_rate);
                     if !bytes.is_empty() {
                         let _ = tx.try_send(bytes);
@@ -233,7 +391,6 @@ async fn try_start_mic(
     println!("===========================================================");
     println!();
 
-    // Pump audio from channel to session (mirrors JS pumpAudio / C++ pump loop)
     let session_clone = Arc::clone(session);
     let forward_task = tokio::spawn(async move {
         while let Some(bytes) = audio_rx.recv().await {
@@ -244,7 +401,6 @@ async fn try_start_mic(
         }
     });
 
-    // Block until Ctrl+C
     while running.load(Ordering::SeqCst) {
         tokio::time::sleep(std::time::Duration::from_millis(100)).await;
     }
diff --git a/samples/rust/foundry-local-webserver/Cargo.toml b/samples/rust/chat-completion/Cargo.toml
similarity index 50%
rename from samples/rust/foundry-local-webserver/Cargo.toml
rename to samples/rust/chat-completion/Cargo.toml
index 1671ea4c9..c74272c86 100644
--- a/samples/rust/foundry-local-webserver/Cargo.toml
+++ b/samples/rust/chat-completion/Cargo.toml
@@ -1,14 +1,11 @@
 [package]
-name = "foundry-local-webserver"
+name = "chat-completion"
 version = "0.1.0"
 edition = "2021"
-description = "Example of using the Foundry Local SDK with a local OpenAI-compatible web server"
+description = "Native and local web server chat completions using the Foundry Local Rust SDK"
 
 [dependencies]
 foundry-local-sdk = { path = "../../../sdk/rust" }
-tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
-serde_json = "1"
 reqwest = { version = "0.12", features = ["json"] }
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
+serde_json = "1"
+tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
diff --git a/samples/rust/chat-completion/README.md b/samples/rust/chat-completion/README.md
new file mode 100644
index 000000000..e27e38fd8
--- /dev/null
+++ b/samples/rust/chat-completion/README.md
@@ -0,0 +1,17 @@
+# Native + Web Server Chat Completions (Rust)
+
+Runs the same prompt two ways:
+
+1. native in-process chat completion with `model.create_chat_client()`
+2. OpenAI-compatible `/v1/chat/completions` through the local Foundry web server
+
+The sample tracks the Rust SDK from this repository via
+`foundry-local-sdk = { path = "../../../sdk/rust" }`, so it follows `main`
+instead of pinning to a published crate version.
+
+## Run
+
+```bash
+cd samples/rust
+cargo run -p chat-completion
+```
diff --git a/samples/rust/chat-completion/src/main.rs b/samples/rust/chat-completion/src/main.rs
new file mode 100644
index 000000000..b01387dc4
--- /dev/null
+++ b/samples/rust/chat-completion/src/main.rs
@@ -0,0 +1,182 @@
+// <complete_code>
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <imports>
+use std::io::{self, Write};
+
+use serde_json::json;
+
+use foundry_local_sdk::{
+    ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage,
+    ChatCompletionRequestUserMessage, FoundryLocalConfig, FoundryLocalManager,
+};
+// </imports>
+
+const ALIAS: &str = "qwen2.5-0.5b";
+const SYSTEM_PROMPT: &str = "You are a helpful assistant.";
+const USER_PROMPT: &str = "What is Rust's ownership model?";
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    println!("Native + Web Server Chat Completions");
+    println!("====================================\n");
+
+    // ── 1. Initialise the manager ────────────────────────────────────────
+    // <init>
+    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
+    // </init>
+
+    // Download and register all execution providers.
+    manager
+        .download_and_register_eps_with_progress(None, {
+            let mut current_ep = String::new();
+            move |ep_name: &str, percent: f64| {
+                if ep_name != current_ep {
+                    if !current_ep.is_empty() {
+                        println!();
+                    }
+                    current_ep = ep_name.to_string();
+                }
+                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
+                io::stdout().flush().ok();
+            }
+        })
+        .await?;
+    println!();
+
+    // ── 2. Pick a model and ensure it is downloaded ─────────────────────
+    // <model_setup>
+    let model = manager.catalog().get_model(ALIAS).await?;
+    println!("Model: {} (id: {})", model.alias(), model.id());
+
+    if !model.is_cached().await? {
+        println!("Downloading model...");
+        model
+            .download(Some(|progress: f64| {
+                print!("\r  {progress:.1}%");
+                io::stdout().flush().ok();
+            }))
+            .await?;
+        println!();
+    }
+
+    println!("Loading model...");
+    model.load().await?;
+    println!("✓ Model loaded\n");
+    // </model_setup>
+
+    let messages: Vec<ChatCompletionRequestMessage> = vec![
+        ChatCompletionRequestSystemMessage::from(SYSTEM_PROMPT).into(),
+        ChatCompletionRequestUserMessage::from(USER_PROMPT).into(),
+    ];
+
+    println!("==================================================");
+    println!("  1. Native in-process chat completion");
+    println!("==================================================");
+    println!("Prompt: {USER_PROMPT}\n");
+
+    // ── 3. Create a chat client ─────────────────────────────────────────
+    // <chat_client>
+    let client = model.create_chat_client().temperature(0.7).max_tokens(256);
+    // </chat_client>
+
+    // ── 4. Native chat completion ───────────────────────────────────────
+    // <chat_completion>
+    let response = client.complete_chat(&messages, None).await?;
+    if let Some(choice) = response.choices.first() {
+        if let Some(ref content) = choice.message.content {
+            println!("Assistant: {content}");
+        }
+    }
+    // </chat_completion>
+
+    println!("\n==================================================");
+    println!("  2. Local web server /v1/chat/completions");
+    println!("==================================================");
+    println!("Prompt: {USER_PROMPT}\n");
+
+    // <server_setup>
+    print!("Starting web service...");
+    manager.start_web_service().await?;
+    println!("done.");
+
+    let web_result = run_web_chat_completion(manager, model.id()).await;
+
+    println!("\nStopping web service...");
+    let stop_result = manager.stop_web_service().await;
+
+    // ── 5. Unload the model ─────────────────────────────────────────────
+    // <cleanup>
+    println!("Unloading model...");
+    let unload_result = model.unload().await;
+    // </cleanup>
+
+    web_result?;
+    stop_result?;
+    unload_result?;
+
+    println!("Done.");
+    Ok(())
+}
+
+async fn run_web_chat_completion(
+    manager: &FoundryLocalManager,
+    model_id: &str,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let urls = manager.urls()?;
+    let endpoint = urls
+        .first()
+        .expect("Web service did not return an endpoint");
+    let base_url = endpoint.trim_end_matches('/');
+    println!("Web service listening on: {base_url}");
+
+    let client = reqwest::Client::new();
+    let mut response = client
+        .post(format!("{base_url}/v1/chat/completions"))
+        .json(&json!({
+            "model": model_id,
+            "messages": [
+                { "role": "system", "content": SYSTEM_PROMPT },
+                { "role": "user", "content": USER_PROMPT }
+            ],
+            "stream": true
+        }))
+        .send()
+        .await?
+        .error_for_status()?;
+
+    print!("Assistant: ");
+    io::stdout().flush().ok();
+
+    let mut buffer = String::new();
+    while let Some(chunk) = response.chunk().await? {
+        buffer.push_str(&String::from_utf8_lossy(&chunk));
+        while let Some(newline) = buffer.find('\n') {
+            let line = buffer[..newline].trim_end().to_string();
+            buffer.drain(..=newline);
+
+            let Some(data) = line.strip_prefix("data: ") else {
+                continue;
+            };
+            if data == "[DONE]" {
+                println!();
+                return Ok(());
+            }
+
+            if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(data) {
+                if let Some(content) = parsed
+                    .pointer("/choices/0/delta/content")
+                    .and_then(|value| value.as_str())
+                {
+                    print!("{content}");
+                    io::stdout().flush().ok();
+                }
+            }
+        }
+    }
+
+    println!();
+    Ok(())
+}
+// </complete_code>
diff --git a/samples/rust/embeddings/Cargo.toml b/samples/rust/embeddings/Cargo.toml
index ebaa21be4..64faf27cf 100644
--- a/samples/rust/embeddings/Cargo.toml
+++ b/samples/rust/embeddings/Cargo.toml
@@ -7,6 +7,3 @@ description = "Native SDK embeddings (single and batch) using the Foundry Local
 [dependencies]
 foundry-local-sdk = { path = "../../../sdk/rust" }
 tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/embeddings/README.md b/samples/rust/embeddings/README.md
new file mode 100644
index 000000000..ac19a966a
--- /dev/null
+++ b/samples/rust/embeddings/README.md
@@ -0,0 +1,15 @@
+# Embeddings (Rust)
+
+Generates single and batch text embeddings with the native Foundry Local Rust
+SDK embedding client.
+
+The sample tracks the Rust SDK from this repository via
+`foundry-local-sdk = { path = "../../../sdk/rust" }`, so it follows `main`
+instead of pinning to a published crate version.
+
+## Run
+
+```bash
+cd samples/rust
+cargo run -p embeddings
+```
diff --git a/samples/rust/foundry-local-webserver/src/main.rs b/samples/rust/foundry-local-webserver/src/main.rs
deleted file mode 100644
index d36581e9c..000000000
--- a/samples/rust/foundry-local-webserver/src/main.rs
+++ /dev/null
@@ -1,131 +0,0 @@
-// <complete_code>
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-//! Foundry Local Web Server example.
-//!
-//! Demonstrates how to start a local OpenAI-compatible web server using the
-//! Foundry Local SDK, then call it with a standard HTTP client. This is useful
-//! when you want to use the OpenAI REST API directly or integrate with tools
-//! that expect an OpenAI-compatible endpoint.
-
-// <imports>
-use std::io::{self, Write};
-
-use serde_json::json;
-
-use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
-// </imports>
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // ── 1. Initialise the SDK ────────────────────────────────────────────
-    // <init>
-    println!("Initializing Foundry Local SDK...");
-    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
-    println!("✓ SDK initialized");
-    // </init>
-
-    // Download and register all execution providers.
-    manager
-        .download_and_register_eps_with_progress(None, {
-            let mut current_ep = String::new();
-            move |ep_name: &str, percent: f64| {
-                if ep_name != current_ep {
-                    if !current_ep.is_empty() {
-                        println!();
-                    }
-                    current_ep = ep_name.to_string();
-                }
-                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
-                io::stdout().flush().ok();
-            }
-        })
-        .await?;
-    println!();
-
-    // ── 2. Download and load a model ─────────────────────────────────────
-    // <model_setup>
-    let model_alias = "qwen2.5-0.5b";
-    let model = manager.catalog().get_model(model_alias).await?;
-
-    if !model.is_cached().await? {
-        print!("Downloading model {model_alias}...");
-        model
-            .download(Some(move |progress: f64| {
-                print!("\rDownloading model... {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    print!("Loading model {model_alias}...");
-    model.load().await?;
-    println!("done.");
-    // </model_setup>
-
-    // <server_setup>
-    // ── 3. Start the web service─────────────────────────────────────────
-    print!("Starting web service...");
-    manager.start_web_service().await?;
-    println!("done.");
-
-    let urls = manager.urls()?;
-    let endpoint = urls
-        .first()
-        .expect("Web service did not return an endpoint");
-    println!("Web service listening on: {endpoint}");
-
-    // ── 4. Use the OpenAI-compatible REST API with streaming ────────────
-    // Any HTTP client (or OpenAI SDK) can now talk to this endpoint.
-    let client = reqwest::Client::new();
-    let base_url = endpoint.trim_end_matches('/');
-
-    let mut response = client
-        .post(format!("{base_url}/v1/chat/completions"))
-        .json(&json!({
-            "model": model.id(),
-            "messages": [
-                { "role": "user", "content": "Why is the sky blue?" }
-            ],
-            "stream": true
-        }))
-        .send()
-        .await?;
-
-    print!("[ASSISTANT]: ");
-    while let Some(chunk) = response.chunk().await? {
-        let text = String::from_utf8_lossy(&chunk);
-        for line in text.lines() {
-            let line = line.trim();
-            if let Some(data) = line.strip_prefix("data: ") {
-                if data == "[DONE]" {
-                    break;
-                }
-                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(data) {
-                    if let Some(content) = parsed
-                        .pointer("/choices/0/delta/content")
-                        .and_then(|v| v.as_str())
-                    {
-                        print!("{content}");
-                        io::stdout().flush().ok();
-                    }
-                }
-            }
-        }
-    }
-    println!();
-    // </server_setup>
-
-    // ── 5. Clean up ──────────────────────────────────────────────────────
-    println!("\nStopping web service...");
-    manager.stop_web_service().await?;
-
-    println!("Unloading model...");
-    model.unload().await?;
-
-    println!("✓ Done.");
-    Ok(())
-}
-// </complete_code>
diff --git a/samples/rust/live-audio-transcription/README.md b/samples/rust/live-audio-transcription/README.md
deleted file mode 100644
index a9f90f3dc..000000000
--- a/samples/rust/live-audio-transcription/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Live Audio Transcription Example (Rust)
-
-Demonstrates real-time microphone-to-text using the Foundry Local Rust SDK:
-
-**Microphone (CPAL) → SDK (FoundryLocalManager) → Core (NativeAOT DLL)**
-
-Uses [CPAL](https://crates.io/crates/cpal) for cross-platform microphone capture
-(the Rust equivalent of `naudiodon2` in JS / `PortAudio` in C++ / `PyAudio` in Python).
-If CPAL cannot open a microphone, falls back to synthetic PCM audio.
-
-## Run (once the API is available)
-
-```bash
-cd samples/rust/live-audio-transcription-example
-
-# Live microphone (press Ctrl+C to stop)
-cargo run
-
-# Synthetic 440Hz sine wave (no microphone needed)
-cargo run -- --synth
-```
diff --git a/samples/rust/native-chat-completions/Cargo.toml b/samples/rust/native-chat-completions/Cargo.toml
deleted file mode 100644
index 349eaf025..000000000
--- a/samples/rust/native-chat-completions/Cargo.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[package]
-name = "native-chat-completions"
-version = "0.1.0"
-edition = "2021"
-description = "Native SDK chat completions (non-streaming and streaming) using the Foundry Local Rust SDK"
-
-[dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust" }
-tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
-tokio-stream = "0.1"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/native-chat-completions/src/main.rs b/samples/rust/native-chat-completions/src/main.rs
deleted file mode 100644
index bee7147e4..000000000
--- a/samples/rust/native-chat-completions/src/main.rs
+++ /dev/null
@@ -1,123 +0,0 @@
-// <complete_code>
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <imports>
-use std::io::{self, Write};
-
-use foundry_local_sdk::{
-    ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage,
-    ChatCompletionRequestUserMessage, FoundryLocalConfig, FoundryLocalManager,
-};
-use tokio_stream::StreamExt;
-// </imports>
-
-const ALIAS: &str = "qwen2.5-0.5b";
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    println!("Native Chat Completions");
-    println!("=======================\n");
-
-    // ── 1. Initialise the manager ────────────────────────────────────────
-    // <init>
-    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
-    // </init>
-
-    // Download and register all execution providers.
-    manager
-        .download_and_register_eps_with_progress(None, {
-            let mut current_ep = String::new();
-            move |ep_name: &str, percent: f64| {
-                if ep_name != current_ep {
-                    if !current_ep.is_empty() {
-                        println!();
-                    }
-                    current_ep = ep_name.to_string();
-                }
-                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
-                io::stdout().flush().ok();
-            }
-        })
-        .await?;
-    println!();
-
-    // ── 2. Pick a modeland ensure it is downloaded ──────────────────────
-    // <model_setup>
-    let model = manager.catalog().get_model(ALIAS).await?;
-    println!("Model: {} (id: {})", model.alias(), model.id());
-
-    if !model.is_cached().await? {
-        println!("Downloading model...");
-        model
-            .download(Some(|progress: f64| {
-                print!("\r  {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    println!("Loading model...");
-    model.load().await?;
-    println!("✓ Model loaded\n");
-    // </model_setup>
-
-    // ── 3. Create a chat client──────────────────────────────────────────
-    // <chat_client>
-    let client = model.create_chat_client()
-        .temperature(0.7)
-        .max_tokens(256);
-    // </chat_client>
-
-    // ── 4. Non-streamingchat completion ─────────────────────────────────
-    // <chat_completion>
-    let messages: Vec<ChatCompletionRequestMessage> = vec![
-        ChatCompletionRequestSystemMessage::from("You are a helpful assistant.").into(),
-        ChatCompletionRequestUserMessage::from("What is Rust's ownership model?").into(),
-    ];
-
-    println!("--- Non-streaming completion ---");
-    let response = client.complete_chat(&messages, None).await?;
-    if let Some(choice) = response.choices.first() {
-        if let Some(ref content) = choice.message.content {
-            println!("Assistant: {content}");
-        }
-    }
-    // </chat_completion>
-
-    // ── 5. Streamingchat completion ─────────────────────────────────────
-    // <streaming>
-    let stream_messages: Vec<ChatCompletionRequestMessage> = vec![
-        ChatCompletionRequestSystemMessage::from("You are a helpful assistant.").into(),
-        ChatCompletionRequestUserMessage::from("Explain the borrow checker in two sentences.")
-            .into(),
-    ];
-
-    println!("\n--- Streaming completion ---");
-    print!("Assistant: ");
-    let mut stream = client
-        .complete_streaming_chat(&stream_messages, None)
-        .await?;
-    while let Some(chunk) = stream.next().await {
-        let chunk = chunk?;
-        if let Some(choice) = chunk.choices.first() {
-            if let Some(ref content) = choice.delta.content {
-                print!("{content}");
-                io::stdout().flush().ok();
-            }
-        }
-    }
-    println!("\n");
-    // </streaming>
-
-    // ── 6. Unloadthe model──────────────────────────────────────────────
-    // <cleanup>
-    println!("Unloading model...");
-    model.unload().await?;
-    println!("Done.");
-    // </cleanup>
-
-    Ok(())
-}
-// </complete_code>
diff --git a/samples/rust/foundry-local-webserver-responses-vision/Cargo.toml b/samples/rust/responses-api/Cargo.toml
similarity index 72%
rename from samples/rust/foundry-local-webserver-responses-vision/Cargo.toml
rename to samples/rust/responses-api/Cargo.toml
index 290022f2e..0fa41ce8d 100644
--- a/samples/rust/foundry-local-webserver-responses-vision/Cargo.toml
+++ b/samples/rust/responses-api/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "foundry-local-webserver-responses-vision"
+name = "responses-api"
 version = "0.1.0"
 edition = "2021"
 description = "Vision (image understanding) example using the Foundry Local web service and the OpenAI Responses API"
@@ -11,6 +11,3 @@ serde_json = "1"
 reqwest = { version = "0.12", features = ["json", "stream"] }
 base64 = "0.22"
 futures-util = "0.3"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/responses-api/README.md b/samples/rust/responses-api/README.md
new file mode 100644
index 000000000..7138d123d
--- /dev/null
+++ b/samples/rust/responses-api/README.md
@@ -0,0 +1,21 @@
+# Foundry Local Web Server Responses Vision (Rust)
+
+Starts the local Foundry web server and streams a vision response from the
+OpenAI-compatible Responses API (`/v1/responses`) using a bundled test image by
+default.
+
+The sample tracks the Rust SDK from this repository via
+`foundry-local-sdk = { path = "../../../sdk/rust" }`, so it follows `main`
+instead of pinning to a published crate version.
+
+## Run
+
+```bash
+cd samples/rust
+
+# List vision models and variants
+cargo run -p responses-api -- --list-models
+
+# Run with a model alias or variant id; omit image_path to use test_image.jpg
+cargo run -p responses-api -- qwen3.5-0.8b [image_path]
+```
diff --git a/samples/rust/foundry-local-webserver-responses-vision/src/main.rs b/samples/rust/responses-api/src/main.rs
similarity index 97%
rename from samples/rust/foundry-local-webserver-responses-vision/src/main.rs
rename to samples/rust/responses-api/src/main.rs
index b0c4f831c..0f2ba7753 100644
--- a/samples/rust/foundry-local-webserver-responses-vision/src/main.rs
+++ b/samples/rust/responses-api/src/main.rs
@@ -4,7 +4,7 @@
 
 //! Foundry Local Web Server vision example (Responses API).
 //!
-//! Mirrors `samples/python/web-server-responses-vision`. Starts the local
+//! Mirrors `samples/python/responses-api`. Starts the local
 //! Foundry web service, posts a multimodal request to `/v1/responses` with a
 //! base64-encoded image, and streams the SSE response, printing each
 //! `response.output_text.delta` event.
@@ -24,8 +24,8 @@ const DEFAULT_MODEL_ALIAS: &str = "qwen3.5-0.8b";
 const DEFAULT_MAX_OUTPUT_TOKENS: u64 = 8192;
 
 fn print_usage() {
-    eprintln!("Usage: cargo run -p foundry-local-webserver-responses-vision -- <model_alias_or_id> [image_path]");
-    eprintln!("         cargo run -p foundry-local-webserver-responses-vision -- --list-models");
+    eprintln!("Usage: cargo run -p responses-api -- <model_alias_or_id> [image_path]");
+    eprintln!("         cargo run -p responses-api -- --list-models");
     eprintln!("  Example: ... -- {DEFAULT_MODEL_ALIAS}");
     eprintln!("  Example: ... -- Qwen2.5-VL-7B-Instruct-generic-cpu");
 }
diff --git a/samples/rust/responses-api/test_image.jpg b/samples/rust/responses-api/test_image.jpg
new file mode 100644
index 000000000..73a4e8004
Binary files /dev/null and b/samples/rust/responses-api/test_image.jpg differ
diff --git a/samples/rust/tool-calling-foundry-local/Cargo.toml b/samples/rust/tool-calling-foundry-local/Cargo.toml
deleted file mode 100644
index 8411a2b28..000000000
--- a/samples/rust/tool-calling-foundry-local/Cargo.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-[package]
-name = "tool-calling-foundry-local"
-version = "0.1.0"
-edition = "2021"
-description = "Tool calling example using the Foundry Local Rust SDK"
-
-[dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust" }
-tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
-tokio-stream = "0.1"
-serde_json = "1"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/tool-calling-foundry-local/src/main.rs b/samples/rust/tool-calling-foundry-local/src/main.rs
deleted file mode 100644
index 7b96333af..000000000
--- a/samples/rust/tool-calling-foundry-local/src/main.rs
+++ /dev/null
@@ -1,254 +0,0 @@
-// <complete_code>
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <imports>
-use std::io::{self, Write};
-
-use serde_json::{json, Value};
-use tokio_stream::StreamExt;
-
-use foundry_local_sdk::{
-    ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage,
-    ChatCompletionRequestToolMessage, ChatCompletionRequestUserMessage, ChatCompletionTools,
-    ChatToolChoice, FinishReason, FoundryLocalConfig, FoundryLocalManager,
-};
-// </imports>
-
-// By using an alias, the most suitable model variant will be downloaded
-// to your end-user's device.
-const ALIAS: &str = "qwen2.5-0.5b";
-
-// <tool_implementations>
-/// A simple tool that multiplies two numbers.
-fn multiply_numbers(first: f64, second: f64) -> f64 {
-    first * second
-}
-
-/// Dispatch a tool call by name and parsed arguments.
-fn invoke_tool(name: &str, args: &Value) -> String {
-    match name {
-        "multiply_numbers" => {
-            let first = args.get("first").and_then(|v| v.as_f64()).unwrap_or(0.0);
-            let second = args.get("second").and_then(|v| v.as_f64()).unwrap_or(0.0);
-            let result = multiply_numbers(first, second);
-            result.to_string()
-        }
-        _ => format!("Unknown tool: {name}"),
-    }
-}
-// </tool_implementations>
-
-/// Accumulated state from a streaming response that contains tool calls.
-#[derive(Default)]
-struct ToolCallState {
-    tool_calls: Vec<Value>,
-    current_tool_id: String,
-    current_tool_name: String,
-    current_tool_args: String,
-}
-
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    println!("Tool Calling with Foundry Local");
-    println!("===============================\n");
-
-    // ── 1. Initialise the manager ────────────────────────────────────────
-    // <init>
-    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
-    // </init>
-
-    // Download and register all execution providers.
-    manager
-        .download_and_register_eps_with_progress(None, {
-            let mut current_ep = String::new();
-            move |ep_name: &str, percent: f64| {
-                if ep_name != current_ep {
-                    if !current_ep.is_empty() {
-                        println!();
-                    }
-                    current_ep = ep_name.to_string();
-                }
-                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
-                io::stdout().flush().ok();
-            }
-        })
-        .await?;
-    println!();
-
-    // ── 2. Load a model──────────────────────────────────────────────────
-    // <model_setup>
-    let model = manager.catalog().get_model(ALIAS).await?;
-    println!("Model: {} (id: {})", model.alias(), model.id());
-
-    if !model.is_cached().await? {
-        println!("Downloading model...");
-        model
-            .download(Some(|progress: f64| {
-                print!("\r  {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    println!("Loading model...");
-    model.load().await?;
-    println!("✓ Model loaded\n");
-    // </model_setup>
-
-    // ── 3. Create a chat clientwith tool_choice = required ──────────────
-    let client = model.create_chat_client()
-        .max_tokens(512)
-        .tool_choice(ChatToolChoice::Required);
-
-    // <tool_definitions>
-    // Define the multiply_numbers tool.
-    let tools: Vec<ChatCompletionTools> = serde_json::from_value(json!([{
-        "type": "function",
-        "function": {
-            "name": "multiply_numbers",
-            "description": "A tool for multiplying two numbers.",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "first": {
-                        "type": "integer",
-                        "description": "The first number in the operation"
-                    },
-                    "second": {
-                        "type": "integer",
-                        "description": "The second number in the operation"
-                    }
-                },
-                "required": ["first", "second"]
-            }
-        }
-    }]))?;
-    // </tool_definitions>
-
-    // <tool_loop>
-    // Prepare the initial conversation.
-    let mut messages: Vec<ChatCompletionRequestMessage> = vec![
-        ChatCompletionRequestSystemMessage::from(
-            "You are a helpful AI assistant. If necessary, you can use any provided tools to answer the question.",
-        )
-        .into(),
-        ChatCompletionRequestUserMessage::from("What is the answer to 7 multiplied by 6?").into(),
-    ];
-
-    // ── 4. First streaming call – expect tool_calls ──────────────────────
-    println!("Chat completion response:");
-
-    let mut state = ToolCallState::default();
-    let mut stream = client
-        .complete_streaming_chat(&messages, Some(&tools))
-        .await?;
-
-    while let Some(chunk) = stream.next().await {
-        let chunk = chunk?;
-        if let Some(choice) = chunk.choices.first() {
-            // Accumulate streamed content (if any).
-            if let Some(ref content) = choice.delta.content {
-                print!("{content}");
-                io::stdout().flush().ok();
-            }
-
-            // Accumulate tool call fragments.
-            if let Some(ref tool_calls) = choice.delta.tool_calls {
-                for tc in tool_calls {
-                    if let Some(ref id) = tc.id {
-                        state.current_tool_id = id.clone();
-                    }
-                    if let Some(ref func) = tc.function {
-                        if let Some(ref name) = func.name {
-                            state.current_tool_name = name.clone();
-                        }
-                        if let Some(ref args) = func.arguments {
-                            state.current_tool_args.push_str(args);
-                        }
-                    }
-                }
-            }
-
-            // When the model signals finish_reason = ToolCalls, finalise.
-            if choice.finish_reason == Some(FinishReason::ToolCalls) {
-                let tc = json!({
-                    "id": state.current_tool_id.clone(),
-                    "type": "function",
-                    "function": {
-                        "name": state.current_tool_name.clone(),
-                        "arguments": state.current_tool_args.clone(),
-                    }
-                });
-                state.tool_calls.push(tc);
-            }
-        }
-    }
-    println!();
-
-    // ── 5. Execute the tool(s)and append results ────────────────────────
-    for tc in &state.tool_calls {
-        let func = &tc["function"];
-        let name = func["name"].as_str().unwrap_or_default();
-        let args_str = func["arguments"].as_str().unwrap_or("{}");
-        let args: Value = serde_json::from_str(args_str).unwrap_or(json!({}));
-
-        println!("\nInvoking tool: {name} with arguments {args}");
-        let result = invoke_tool(name, &args);
-        println!("Tool response: {result}");
-
-        // Append the assistant's tool_calls message and the tool result.
-        let assistant_msg: ChatCompletionRequestMessage = serde_json::from_value(json!({
-            "role": "assistant",
-            "content": null,
-            "tool_calls": [tc],
-        }))?;
-        messages.push(assistant_msg);
-        messages.push(
-            ChatCompletionRequestToolMessage {
-                content: result.into(),
-                tool_call_id: tc["id"].as_str().unwrap_or_default().to_string(),
-            }
-            .into(),
-        );
-    }
-
-    // ── 6. Continue the conversation with auto tool_choice ───────────────
-    println!("\nTool calls completed. Prompting model to continue conversation...\n");
-
-    messages.push(
-        ChatCompletionRequestSystemMessage::from(
-            "Respond only with the answer generated by the tool.",
-        )
-        .into(),
-    );
-
-    let client = client.tool_choice(ChatToolChoice::Auto);
-
-    print!("Chat completion response: ");
-    let mut stream = client
-        .complete_streaming_chat(&messages, Some(&tools))
-        .await?;
-    while let Some(chunk) = stream.next().await {
-        let chunk = chunk?;
-        if let Some(choice) = chunk.choices.first() {
-            if let Some(ref content) = choice.delta.content {
-                print!("{content}");
-                io::stdout().flush().ok();
-            }
-        }
-    }
-    println!("\n");
-    // </tool_loop>
-
-    // ── 7. Clean up──────────────────────────────────────────────────────
-    // <cleanup>
-    println!("Unloading model...");
-    model.unload().await?;
-    println!("Done.");
-    // </cleanup>
-
-    Ok(())
-}
-// </complete_code>
diff --git a/samples/rust/tutorial-chat-assistant/Cargo.toml b/samples/rust/tutorial-chat-assistant/Cargo.toml
deleted file mode 100644
index 5ff39b778..000000000
--- a/samples/rust/tutorial-chat-assistant/Cargo.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-[package]
-name = "tutorial-chat-assistant"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust" }
-tokio = { version = "1", features = ["full"] }
-tokio-stream = "0.1"
-anyhow = "1"
-serde_json = "1"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/tutorial-chat-assistant/src/main.rs b/samples/rust/tutorial-chat-assistant/src/main.rs
deleted file mode 100644
index ab98460cd..000000000
--- a/samples/rust/tutorial-chat-assistant/src/main.rs
+++ /dev/null
@@ -1,120 +0,0 @@
-// <complete_code>
-// <imports>
-use foundry_local_sdk::{
-    ChatCompletionRequestMessage,
-    ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage,
-    FoundryLocalConfig, FoundryLocalManager,
-};
-use std::io::{self, BufRead, Write};
-use tokio_stream::StreamExt;
-// </imports>
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    // <init>
-    // Initialize the Foundry Local SDK
-    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("chat-assistant"))?;
-
-    // Download and register all execution providers.
-    manager
-        .download_and_register_eps_with_progress(None, {
-            let mut current_ep = String::new();
-            move |ep_name: &str, percent: f64| {
-                if ep_name != current_ep {
-                    if !current_ep.is_empty() {
-                        println!();
-                    }
-                    current_ep = ep_name.to_string();
-                }
-                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
-                io::stdout().flush().ok();
-            }
-        })
-        .await?;
-    println!();
-
-    // Select and load a model from the catalog
-    let model = manager.catalog().get_model("qwen2.5-0.5b").await?;
-
-    if !model.is_cached().await? {
-        println!("Downloading model...");
-        model
-            .download(Some(|progress: f64| {
-                print!("\r  {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    model.load().await?;
-    println!("Model loaded and ready.");
-
-    // Create a chat client
-    let client = model.create_chat_client().temperature(0.7).max_tokens(512);
-    // </init>
-
-    // <system_prompt>
-    // Start the conversation with a system prompt
-    let mut messages: Vec<ChatCompletionRequestMessage> = vec![
-        ChatCompletionRequestSystemMessage::from(
-            "You are a helpful, friendly assistant. Keep your responses \
-             concise and conversational. If you don't know something, say so.",
-        )
-        .into(),
-    ];
-    // </system_prompt>
-
-    println!("\nChat assistant ready! Type 'quit' to exit.\n");
-
-    let stdin = io::stdin();
-    // <conversation_loop>
-    loop {
-        print!("You: ");
-        io::stdout().flush()?;
-
-        let mut input = String::new();
-        stdin.lock().read_line(&mut input)?;
-        let input = input.trim();
-
-        if input.eq_ignore_ascii_case("quit") || input.eq_ignore_ascii_case("exit") {
-            break;
-        }
-
-        // Add the user's message to conversation history
-        messages.push(ChatCompletionRequestUserMessage::from(input).into());
-
-        // <streaming>
-        // Stream the response token by token
-        print!("Assistant: ");
-        io::stdout().flush()?;
-        let mut full_response = String::new();
-        let mut stream = client.complete_streaming_chat(&messages, None).await?;
-        while let Some(chunk) = stream.next().await {
-            let chunk = chunk?;
-            if let Some(choice) = chunk.choices.first() {
-                if let Some(ref content) = choice.delta.content {
-                    print!("{content}");
-                    io::stdout().flush()?;
-                    full_response.push_str(content);
-                }
-            }
-        }
-        println!("\n");
-        // </streaming>
-
-        // Add the complete response to conversation history
-        let assistant_msg: ChatCompletionRequestMessage = serde_json::from_value(
-            serde_json::json!({"role": "assistant", "content": full_response}),
-        )?;
-        messages.push(assistant_msg);
-    }
-    // </conversation_loop>
-
-    // Clean up - unload the model
-    model.unload().await?;
-    println!("Model unloaded. Goodbye!");
-
-    Ok(())
-}
-// </complete_code>
diff --git a/samples/rust/tutorial-document-summarizer/Cargo.toml b/samples/rust/tutorial-document-summarizer/Cargo.toml
deleted file mode 100644
index f80398e7a..000000000
--- a/samples/rust/tutorial-document-summarizer/Cargo.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[package]
-name = "tutorial-document-summarizer"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust" }
-tokio = { version = "1", features = ["full"] }
-tokio-stream = "0.1"
-anyhow = "1"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/tutorial-document-summarizer/src/main.rs b/samples/rust/tutorial-document-summarizer/src/main.rs
deleted file mode 100644
index 6e6e8e4a6..000000000
--- a/samples/rust/tutorial-document-summarizer/src/main.rs
+++ /dev/null
@@ -1,175 +0,0 @@
-// <complete_code>
-// <imports>
-use foundry_local_sdk::{
-    ChatCompletionRequestMessage,
-    ChatCompletionRequestSystemMessage,
-    ChatCompletionRequestUserMessage, FoundryLocalConfig,
-    FoundryLocalManager,
-};
-use std::io::{self, Write};
-use std::path::Path;
-use std::{env, fs};
-// </imports>
-
-async fn summarize_file(
-    client: &foundry_local_sdk::openai::ChatClient,
-    file_path: &Path,
-    system_prompt: &str,
-) -> anyhow::Result<()> {
-    let content = fs::read_to_string(file_path)?;
-    let messages: Vec<ChatCompletionRequestMessage> = vec![
-        ChatCompletionRequestSystemMessage::from(system_prompt)
-            .into(),
-        ChatCompletionRequestUserMessage::from(content.as_str())
-            .into(),
-    ];
-
-    let response =
-        client.complete_chat(&messages, None).await?;
-    let summary = response.choices[0]
-        .message
-        .content
-        .as_deref()
-        .unwrap_or("");
-    println!("{}", summary);
-    Ok(())
-}
-
-async fn summarize_directory(
-    client: &foundry_local_sdk::openai::ChatClient,
-    directory: &Path,
-    system_prompt: &str,
-) -> anyhow::Result<()> {
-    let mut txt_files: Vec<_> = fs::read_dir(directory)?
-        .filter_map(|entry| entry.ok())
-        .filter(|entry| {
-            entry
-                .path()
-                .extension()
-                .map(|ext| ext == "txt")
-                .unwrap_or(false)
-        })
-        .collect();
-
-    txt_files.sort_by_key(|e| e.path());
-
-    if txt_files.is_empty() {
-        println!(
-            "No .txt files found in {}",
-            directory.display()
-        );
-        return Ok(());
-    }
-
-    for entry in &txt_files {
-        let file_name = entry.file_name();
-        println!(
-            "--- {} ---",
-            file_name.to_string_lossy()
-        );
-        summarize_file(
-            client,
-            &entry.path(),
-            system_prompt,
-        )
-        .await?;
-        println!();
-    }
-
-    Ok(())
-}
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    // <init>
-    // Initialize the Foundry Local SDK
-    let manager = FoundryLocalManager::create(
-        FoundryLocalConfig::new("doc-summarizer"),
-    )?;
-
-    // Download and register all execution providers.
-    manager
-        .download_and_register_eps_with_progress(None, {
-            let mut current_ep = String::new();
-            move |ep_name: &str, percent: f64| {
-                if ep_name != current_ep {
-                    if !current_ep.is_empty() {
-                        println!();
-                    }
-                    current_ep = ep_name.to_string();
-                }
-                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
-                io::stdout().flush().ok();
-            }
-        })
-        .await?;
-    println!();
-
-    // Select and load a model from the catalog
-    let model = manager
-        .catalog()
-        .get_model("qwen2.5-0.5b")
-        .await?;
-
-    if !model.is_cached().await? {
-        println!("Downloading model...");
-        model
-            .download(Some(|progress: f64| {
-                print!("\r  {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    model.load().await?;
-    println!("Model loaded and ready.\n");
-
-    // Create a chat client
-    let client = model
-        .create_chat_client()
-        .temperature(0.7)
-        .max_tokens(512);
-    // </init>
-
-    // <summarization>
-    let system_prompt = "Summarize the following document \
-         into concise bullet points. Focus on the key \
-         points and main ideas.";
-
-    // <file_reading>
-    let target = env::args()
-        .nth(1)
-        .unwrap_or_else(|| "document.txt".to_string());
-    let target_path = Path::new(&target);
-    // </file_reading>
-
-    if target_path.is_dir() {
-        summarize_directory(
-            &client,
-            target_path,
-            system_prompt,
-        )
-        .await?;
-    } else {
-        let file_name = target_path
-            .file_name()
-            .map(|n| n.to_string_lossy().to_string())
-            .unwrap_or_else(|| target.clone());
-        println!("--- {} ---", file_name);
-        summarize_file(
-            &client,
-            target_path,
-            system_prompt,
-        )
-        .await?;
-    }
-    // </summarization>
-
-    // Clean up
-    model.unload().await?;
-    println!("\nModel unloaded. Done!");
-
-    Ok(())
-}
-// </complete_code>
diff --git a/samples/rust/tutorial-tool-calling/Cargo.toml b/samples/rust/tutorial-tool-calling/Cargo.toml
deleted file mode 100644
index 5707d6967..000000000
--- a/samples/rust/tutorial-tool-calling/Cargo.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-[package]
-name = "tutorial-tool-calling"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust" }
-tokio = { version = "1", features = ["full"] }
-tokio-stream = "0.1"
-anyhow = "1"
-serde_json = "1"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/tutorial-tool-calling/src/main.rs b/samples/rust/tutorial-tool-calling/src/main.rs
deleted file mode 100644
index 131e4ad5e..000000000
--- a/samples/rust/tutorial-tool-calling/src/main.rs
+++ /dev/null
@@ -1,348 +0,0 @@
-// <complete_code>
-// <imports>
-use foundry_local_sdk::{
-    ChatCompletionRequestMessage,
-    ChatCompletionRequestSystemMessage,
-    ChatCompletionRequestToolMessage,
-    ChatCompletionRequestUserMessage,
-    ChatCompletionMessageToolCalls,
-    ChatCompletionTools, ChatToolChoice,
-    FoundryLocalConfig, FoundryLocalManager,
-};
-use serde_json::{json, Value};
-use std::io::{self, BufRead, Write};
-// </imports>
-
-// <tool_implementations>
-// --- Tool implementations ---
-fn execute_tool(
-    name: &str,
-    arguments: &Value,
-) -> Value {
-    match name {
-        "get_weather" => {
-            let location = arguments["location"]
-                .as_str()
-                .unwrap_or("unknown");
-            let unit = arguments["unit"]
-                .as_str()
-                .unwrap_or("celsius");
-            let temp = if unit == "celsius" { 22 } else { 72 };
-            json!({
-                "location": location,
-                "temperature": temp,
-                "unit": unit,
-                "condition": "Sunny"
-            })
-        }
-        "calculate" => {
-            let expression = arguments["expression"]
-                .as_str()
-                .unwrap_or("");
-            let is_valid = expression
-                .chars()
-                .all(|c| "0123456789+-*/(). ".contains(c));
-            if !is_valid {
-                return json!({"error": "Invalid expression"});
-            }
-            match eval_expression(expression) {
-                Ok(result) => json!({
-                    "expression": expression,
-                    "result": result
-                }),
-                Err(e) => json!({"error": e}),
-            }
-        }
-        _ => json!({"error": format!("Unknown function: {}", name)}),
-    }
-}
-
-fn eval_expression(expr: &str) -> Result<f64, String> {
-    let expr = expr.replace(' ', "");
-    let chars: Vec<char> = expr.chars().collect();
-    let mut pos = 0;
-    let result = parse_add(&chars, &mut pos)?;
-    if pos < chars.len() {
-        return Err("Unexpected character".to_string());
-    }
-    Ok(result)
-}
-
-fn parse_add(
-    chars: &[char],
-    pos: &mut usize,
-) -> Result<f64, String> {
-    let mut result = parse_mul(chars, pos)?;
-    while *pos < chars.len()
-        && (chars[*pos] == '+' || chars[*pos] == '-')
-    {
-        let op = chars[*pos];
-        *pos += 1;
-        let right = parse_mul(chars, pos)?;
-        result = if op == '+' {
-            result + right
-        } else {
-            result - right
-        };
-    }
-    Ok(result)
-}
-
-fn parse_mul(
-    chars: &[char],
-    pos: &mut usize,
-) -> Result<f64, String> {
-    let mut result = parse_atom(chars, pos)?;
-    while *pos < chars.len()
-        && (chars[*pos] == '*' || chars[*pos] == '/')
-    {
-        let op = chars[*pos];
-        *pos += 1;
-        let right = parse_atom(chars, pos)?;
-        result = if op == '*' {
-            result * right
-        } else {
-            result / right
-        };
-    }
-    Ok(result)
-}
-
-fn parse_atom(
-    chars: &[char],
-    pos: &mut usize,
-) -> Result<f64, String> {
-    if *pos < chars.len() && chars[*pos] == '(' {
-        *pos += 1;
-        let result = parse_add(chars, pos)?;
-        if *pos < chars.len() && chars[*pos] == ')' {
-            *pos += 1;
-        }
-        return Ok(result);
-    }
-    let start = *pos;
-    while *pos < chars.len()
-        && (chars[*pos].is_ascii_digit() || chars[*pos] == '.')
-    {
-        *pos += 1;
-    }
-    if start == *pos {
-        return Err("Expected number".to_string());
-    }
-    let num_str: String = chars[start..*pos].iter().collect();
-    num_str.parse::<f64>().map_err(|e| e.to_string())
-}
-// </tool_implementations>
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    // <tool_definitions>
-    // --- Tool definitions ---
-    let tools: Vec<ChatCompletionTools> = serde_json::from_value(json!([
-        {
-            "type": "function",
-            "function": {
-                "name": "get_weather",
-                "description":
-                    "Get the current weather for a location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "location": {
-                            "type": "string",
-                            "description":
-                                "The city or location"
-                        },
-                        "unit": {
-                            "type": "string",
-                            "enum": ["celsius", "fahrenheit"],
-                            "description": "Temperature unit"
-                        }
-                    },
-                    "required": ["location"]
-                }
-            }
-        },
-        {
-            "type": "function",
-            "function": {
-                "name": "calculate",
-                "description": "Perform a math calculation",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "expression": {
-                            "type": "string",
-                            "description":
-                                "The math expression to evaluate"
-                        }
-                    },
-                    "required": ["expression"]
-                }
-            }
-        }
-    ]))?;
-    // </tool_definitions>
-
-    // <init>
-    // Initialize the Foundry Local SDK
-    let manager = FoundryLocalManager::create(
-        FoundryLocalConfig::new("tool-calling-app"),
-    )?;
-
-    // Download and register all execution providers.
-    manager
-        .download_and_register_eps_with_progress(None, {
-            let mut current_ep = String::new();
-            move |ep_name: &str, percent: f64| {
-                if ep_name != current_ep {
-                    if !current_ep.is_empty() {
-                        println!();
-                    }
-                    current_ep = ep_name.to_string();
-                }
-                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
-                io::stdout().flush().ok();
-            }
-        })
-        .await?;
-    println!();
-
-    // Select and load a model
-    let model = manager
-        .catalog()
-        .get_model("qwen2.5-0.5b")
-        .await?;
-
-    if !model.is_cached().await? {
-        println!("Downloading model...");
-        model
-            .download(Some(|progress: f64| {
-                print!("\r  {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    model.load().await?;
-    println!("Model loaded and ready.");
-
-    // Create a chat client
-    let client = model
-        .create_chat_client()
-        .temperature(0.7)
-        .max_tokens(512)
-        .tool_choice(ChatToolChoice::Auto);
-
-    // Conversation with a system prompt
-    let mut messages: Vec<ChatCompletionRequestMessage> = vec![
-        ChatCompletionRequestSystemMessage::from(
-            "You are a helpful assistant with access to tools. \
-             Use them when needed to answer questions accurately.",
-        )
-        .into(),
-    ];
-    // </init>
-
-    // <tool_loop>
-    println!(
-        "\nTool-calling assistant ready! Type 'quit' to exit.\n"
-    );
-
-    let stdin = io::stdin();
-    loop {
-        print!("You: ");
-        io::stdout().flush()?;
-
-        let mut input = String::new();
-        stdin.lock().read_line(&mut input)?;
-        let input = input.trim();
-
-        if input.eq_ignore_ascii_case("quit")
-            || input.eq_ignore_ascii_case("exit")
-        {
-            break;
-        }
-
-        messages.push(
-            ChatCompletionRequestUserMessage::from(input).into(),
-        );
-
-        let mut response = client
-            .complete_chat(&messages, Some(&tools))
-            .await?;
-
-        // Process tool calls in a loop
-        while response.choices[0].message.tool_calls.is_some() {
-            let tool_calls = response.choices[0]
-                .message
-                .tool_calls
-                .as_ref()
-                .unwrap();
-
-            // Append the assistant's tool_calls message via JSON
-            let assistant_msg: ChatCompletionRequestMessage =
-                serde_json::from_value(json!({
-                    "role": "assistant",
-                    "content": null,
-                    "tool_calls": tool_calls,
-                }))?;
-            messages.push(assistant_msg);
-
-            for tc_enum in tool_calls {
-                let tool_call = match tc_enum {
-                    ChatCompletionMessageToolCalls::Function(
-                        tc,
-                    ) => tc,
-                    _ => continue,
-                };
-                let function_name =
-                    &tool_call.function.name;
-                let arguments: Value =
-                    serde_json::from_str(
-                        &tool_call.function.arguments,
-                    )?;
-                println!(
-                    "  Tool call: {}({})",
-                    function_name, arguments
-                );
-
-                let result =
-                    execute_tool(function_name, &arguments);
-                messages.push(
-                    ChatCompletionRequestToolMessage {
-                        content: result.to_string().into(),
-                        tool_call_id: tool_call.id.clone(),
-                    }
-                    .into(),
-                );
-            }
-
-            response = client
-                .complete_chat(&messages, Some(&tools))
-                .await?;
-        }
-
-        let answer = response.choices[0]
-            .message
-            .content
-            .as_deref()
-            .unwrap_or("");
-        let assistant_msg: ChatCompletionRequestMessage =
-            serde_json::from_value(json!({
-                "role": "assistant",
-                "content": answer,
-            }))?;
-        messages.push(assistant_msg);
-        println!("Assistant: {}\n", answer);
-    }
-
-    // Clean up
-    model.unload().await?;
-    println!("Model unloaded. Goodbye!");
-    // </tool_loop>
-
-    Ok(())
-}
-// </complete_code>
diff --git a/samples/rust/tutorial-voice-to-text/Cargo.toml b/samples/rust/tutorial-voice-to-text/Cargo.toml
deleted file mode 100644
index 6abf6052a..000000000
--- a/samples/rust/tutorial-voice-to-text/Cargo.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-[package]
-name = "tutorial-voice-to-text"
-version = "0.1.0"
-edition = "2021"
-
-[dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust" }
-tokio = { version = "1", features = ["full"] }
-tokio-stream = "0.1"
-anyhow = "1"
-
-[target.'cfg(windows)'.dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/tutorial-voice-to-text/src/main.rs b/samples/rust/tutorial-voice-to-text/src/main.rs
deleted file mode 100644
index d5be04c2e..000000000
--- a/samples/rust/tutorial-voice-to-text/src/main.rs
+++ /dev/null
@@ -1,128 +0,0 @@
-// <complete_code>
-// <imports>
-use foundry_local_sdk::{
-    ChatCompletionRequestMessage,
-    ChatCompletionRequestSystemMessage,
-    ChatCompletionRequestUserMessage,
-    FoundryLocalConfig, FoundryLocalManager,
-};
-use std::io::{self, Write};
-// </imports>
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    // <init>
-    // Initialize the Foundry Local SDK
-    let manager = FoundryLocalManager::create(
-        FoundryLocalConfig::new("note-taker"),
-    )?;
-    // </init>
-
-    // Download and register all execution providers.
-    manager
-        .download_and_register_eps_with_progress(None, {
-            let mut current_ep = String::new();
-            move |ep_name: &str, percent: f64| {
-                if ep_name != current_ep {
-                    if !current_ep.is_empty() {
-                        println!();
-                    }
-                    current_ep = ep_name.to_string();
-                }
-                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
-                io::stdout().flush().ok();
-            }
-        })
-        .await?;
-    println!();
-
-    // <transcription>
-    // Load the speech-to-text model
-    let speech_model = manager
-        .catalog()
-        .get_model("whisper-tiny")
-        .await?;
-
-    if !speech_model.is_cached().await? {
-        println!("Downloading speech model...");
-        speech_model
-            .download(Some(|progress: f64| {
-                print!("\r  {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    speech_model.load().await?;
-    println!("Speech model loaded.");
-
-    // Transcribe the audio file
-    let audio_client = speech_model.create_audio_client();
-    let transcription = audio_client
-        .transcribe("meeting-notes.wav")
-        .await?;
-    println!("\nTranscription:\n{}", transcription.text);
-
-    // Unload the speech model to free memory
-    speech_model.unload().await?;
-    // </transcription>
-
-    // <summarization>
-    // Load the chat model for summarization
-    let chat_model = manager
-        .catalog()
-        .get_model("qwen2.5-0.5b")
-        .await?;
-
-    if !chat_model.is_cached().await? {
-        println!("Downloading chat model...");
-        chat_model
-            .download(Some(|progress: f64| {
-                print!("\r  {progress:.1}%");
-                io::stdout().flush().ok();
-            }))
-            .await?;
-        println!();
-    }
-
-    chat_model.load().await?;
-    println!("Chat model loaded.");
-
-    // Summarize the transcription into organized notes
-    let client = chat_model
-        .create_chat_client()
-        .temperature(0.7)
-        .max_tokens(512);
-
-    let messages: Vec<ChatCompletionRequestMessage> = vec![
-        ChatCompletionRequestSystemMessage::from(
-            "You are a note-taking assistant. Summarize \
-             the following transcription into organized, \
-             concise notes with bullet points.",
-        )
-        .into(),
-        ChatCompletionRequestUserMessage::from(
-            transcription.text.as_str(),
-        )
-        .into(),
-    ];
-
-    let response = client
-        .complete_chat(&messages, None)
-        .await?;
-    let summary = response.choices[0]
-        .message
-        .content
-        .as_deref()
-        .unwrap_or("");
-    println!("\nSummary:\n{}", summary);
-
-    // Clean up
-    chat_model.unload().await?;
-    println!("\nDone. Models unloaded.");
-    // </summarization>
-
-    Ok(())
-}
-// </complete_code>
diff --git a/samples/rust/verify-winml/Cargo.toml b/samples/rust/verify-winml/Cargo.toml
deleted file mode 100644
index 6ca2cf275..000000000
--- a/samples/rust/verify-winml/Cargo.toml
+++ /dev/null
@@ -1,10 +0,0 @@
-[package]
-name = "verify-winml"
-version = "1.0.0"
-edition = "2021"
-
-[dependencies]
-foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
-tokio = { version = "1", features = ["full"] }
-tokio-stream = "0.1"
-anyhow = "1"
diff --git a/samples/rust/verify-winml/README.md b/samples/rust/verify-winml/README.md
deleted file mode 100644
index 57eb83ced..000000000
--- a/samples/rust/verify-winml/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Verify WinML 2.0 Execution Providers (Rust)
-
-This sample verifies that WinML 2.0 execution providers are correctly discovered,
-downloaded, and registered using the Foundry Local Rust SDK. It uses registered WinML
-EP-backed model variants and finishes with one native streaming chat check.
-
-## Prerequisites
-
-- Windows with a compatible GPU
-- Rust toolchain
-
-## Build & Run
-
-This sample enables the Rust SDK's `winml` feature and the SDK build script
-downloads the pinned `Microsoft.AI.Foundry.Local.Core.WinML` package from the
-configured NuGet feeds during the build.
-
-```bash
-cargo run
-```
diff --git a/samples/rust/verify-winml/src/main.rs b/samples/rust/verify-winml/src/main.rs
deleted file mode 100644
index bd090130f..000000000
--- a/samples/rust/verify-winml/src/main.rs
+++ /dev/null
@@ -1,327 +0,0 @@
-// Foundry Local SDK - WinML 2.0 EP Verification (Rust)
-//
-// Verifies:
-//   1. Execution providers are discovered and registered
-//   2. Accelerated models appear in catalog after EP registration
-//   3. Streaming chat completions work on an accelerated model
-
-use foundry_local_sdk::{
-    ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage,
-    ChatCompletionRequestUserMessage, DeviceType, FoundryLocalConfig,
-    FoundryLocalManager, Model,
-};
-use std::io::{self, Write};
-use tokio_stream::StreamExt;
-
-const PASS: &str = "\x1b[92m[PASS]\x1b[0m";
-const FAIL: &str = "\x1b[91m[FAIL]\x1b[0m";
-const INFO: &str = "\x1b[94m[INFO]\x1b[0m";
-const WARN: &str = "\x1b[93m[WARN]\x1b[0m";
-
-fn is_accelerated_variant(model: &Model) -> bool {
-    model.info()
-        .runtime
-        .as_ref()
-        .map(|rt| matches!(rt.device_type, DeviceType::GPU | DeviceType::NPU))
-        .unwrap_or(false)
-}
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    let mut results: Vec<(&str, bool)> = Vec::new();
-
-    // ── 0. Initialize FoundryLocalManager ──────────────────────
-    println!("\n{}", "=".repeat(60));
-    println!("  Initialization");
-    println!("{}\n", "=".repeat(60));
-
-    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("verify_winml"))?;
-    println!("{INFO} FoundryLocalManager initialized.");
-
-    // ── 1. Discover & Register EPs ────────────────────────────
-    println!("\n{}", "=".repeat(60));
-    println!("  Step 1: Discover & Register Execution Providers");
-    println!("{}\n", "=".repeat(60));
-
-    let eps = match manager.discover_eps() {
-        Ok(eps) => {
-            println!("{INFO} Discovered {} execution providers:", eps.len());
-            for ep in &eps {
-                println!("  - {:<40}  Registered: {}", ep.name, ep.is_registered);
-            }
-
-            let detail = format!("{} EP(s) found", eps.len());
-            println!("{PASS} EP Discovery - {detail}");
-            results.push(("EP Discovery", true));
-            eps
-        }
-        Err(e) => {
-            println!("{FAIL} EP Discovery - {e}");
-            results.push(("EP Discovery", false));
-            Vec::new()
-        }
-    };
-
-    if eps.is_empty() {
-        let detail = "No execution providers discovered on this machine";
-        println!("{FAIL} EP Download & Registration - {detail}");
-        println!("\n{FAIL} {detail}.");
-        results.push(("EP Download & Registration", false));
-        print_summary(&results);
-        return Ok(());
-    }
-
-    match manager.download_and_register_eps_with_progress(None, {
-        let mut last_progress_ep: Option<String> = None;
-        let mut last_progress_percent = -1.0f64;
-
-        move |ep_name: &str, percent: f64| {
-            if last_progress_ep
-                .as_ref()
-                .map(|current| current != ep_name || percent < last_progress_percent)
-                .unwrap_or(false)
-            {
-                println!();
-            }
-
-            last_progress_ep = Some(ep_name.to_string());
-            last_progress_percent = percent;
-            print!("\r  Downloading {ep_name}: {percent:.1}%");
-            io::stdout().flush().ok();
-        }
-    }).await {
-        Ok(result) => {
-            println!();
-            println!(
-                "{INFO} EP registration result: success={}, status={}",
-                result.success, result.status
-            );
-            if !result.registered_eps.is_empty() {
-                println!("  Registered: {}", result.registered_eps.join(", "));
-            }
-            if !result.failed_eps.is_empty() {
-                println!("  Failed:     {}", result.failed_eps.join(", "));
-            }
-
-            let download_ok = result.success;
-            let status = if download_ok { PASS } else { FAIL };
-            let detail = if download_ok && !result.registered_eps.is_empty() {
-                format!("{} EP(s) registered", result.registered_eps.len())
-            } else {
-                result.status.clone()
-            };
-            println!("{status} EP Download & Registration - {detail}");
-            results.push(("EP Download & Registration", download_ok));
-
-            if !download_ok {
-                print_summary(&results);
-                return Ok(());
-            }
-        }
-        Err(e) => {
-            println!();
-            println!("{FAIL} EP Download & Registration - {e}");
-            results.push(("EP Download & Registration", false));
-            print_summary(&results);
-            return Ok(());
-        }
-    }
-
-    // ── 2. List Models & Find Accelerated Variants ────────────
-    println!("\n{}", "=".repeat(60));
-    println!("  Step 2: Model Catalog - Accelerated Models");
-    println!("{}\n", "=".repeat(60));
-
-    let models = manager.catalog().get_models().await?;
-    println!("{INFO} Total models in catalog: {}", models.len());
-
-    let mut accelerated_variants = Vec::new();
-    for model in &models {
-        for variant in model.variants() {
-            if is_accelerated_variant(variant.as_ref()) {
-                let device = variant
-                    .info()
-                    .runtime
-                    .as_ref()
-                    .map(|rt| format!("{:?}", rt.device_type))
-                    .unwrap_or_else(|| "?".to_string());
-                let ep = variant
-                    .info()
-                    .runtime
-                    .as_ref()
-                    .map(|rt| rt.execution_provider.as_str())
-                    .unwrap_or("?");
-                println!(
-                    "  - {:<50}  Device: {:<3}  EP: {}",
-                    variant.id(),
-                    device,
-                    ep
-                );
-                accelerated_variants.push(variant);
-            }
-        }
-    }
-
-    println!("{INFO} Accelerated model variants: {}", accelerated_variants.len());
-    let has_accelerated_models = !accelerated_variants.is_empty();
-    let status = if has_accelerated_models { PASS } else { FAIL };
-    println!(
-        "{status} Catalog - Accelerated models found - {} accelerated variant(s)",
-        accelerated_variants.len()
-    );
-    results.push(("Catalog - Accelerated models found", has_accelerated_models));
-
-    if accelerated_variants.is_empty() {
-        println!("\n{FAIL} No accelerated model variants are available.");
-        println!("{WARN} Ensure the system has a compatible accelerator and matching model variants installed.");
-        print_summary(&results);
-        return Ok(());
-    }
-
-    // ── 3. Download & Load Model ──────────────────────────────
-    println!("\n{}", "=".repeat(60));
-    println!("  Step 3: Download & Load Model");
-    println!("{}\n", "=".repeat(60));
-
-    let mut model = None;
-    let mut downloaded_any = false;
-    let mut last_load_error: Option<String> = None;
-
-    for candidate in accelerated_variants {
-        let candidate_ep = candidate
-            .info()
-            .runtime
-            .as_ref()
-            .map(|rt| rt.execution_provider.as_str())
-            .unwrap_or("unknown");
-        println!("\n{INFO} Trying model: {} (EP: {candidate_ep})", candidate.id());
-
-        if !candidate.is_cached().await? {
-            match candidate
-                .download(Some(|progress: f64| {
-                    print!("\r  Downloading model: {progress:.1}%");
-                    io::stdout().flush().ok();
-                }))
-                .await
-            {
-                Ok(_) => {
-                    println!();
-                    downloaded_any = true;
-                }
-                Err(e) => {
-                    println!();
-                    println!("{WARN} Skipping {}: download failed: {e}", candidate.id());
-                    last_load_error = Some(e.to_string());
-                    continue;
-                }
-            }
-        } else {
-            println!("{INFO} Model already cached");
-            downloaded_any = true;
-        }
-
-        match candidate.load().await {
-            Ok(_) => {
-                model = Some(candidate);
-                break;
-            }
-            Err(e) => {
-                println!("{WARN} Skipping {}: load failed: {e}", candidate.id());
-                last_load_error = Some(e.to_string());
-            }
-        }
-    }
-
-    let download_status = if downloaded_any { PASS } else { FAIL };
-    let download_detail = if downloaded_any {
-        "At least one accelerated variant downloaded".to_string()
-    } else {
-        last_load_error
-            .clone()
-            .unwrap_or_else(|| "No accelerated variant could be downloaded".to_string())
-    };
-    println!("{download_status} Model Download - {download_detail}");
-    results.push(("Model Download", downloaded_any));
-
-    let Some(model) = model else {
-        let detail = last_load_error
-            .unwrap_or_else(|| "No accelerated variant could be loaded on this machine".to_string());
-        println!("{FAIL} Model Load - {detail}");
-        results.push(("Model Load", false));
-        print_summary(&results);
-        return Ok(());
-    };
-
-    println!("{PASS} Model Load - Loaded {}", model.id());
-    results.push(("Model Load", true));
-
-    // ── 4. Streaming Chat Completions ────────────────────────
-    println!("\n{}", "=".repeat(60));
-    println!("  Step 4: Streaming Chat Completions");
-    println!("{}\n", "=".repeat(60));
-
-    let messages: Vec<ChatCompletionRequestMessage> = vec![
-        ChatCompletionRequestSystemMessage::from("You are a helpful assistant.").into(),
-        ChatCompletionRequestUserMessage::from("What is 2 + 2? Reply with just the number.").into(),
-    ];
-
-    let client = model.create_chat_client().temperature(0.0).max_tokens(16);
-    match client.complete_streaming_chat(&messages, None).await {
-        Ok(mut stream) => {
-            let mut full_response = String::new();
-            let start = std::time::Instant::now();
-            while let Some(chunk) = stream.next().await {
-                match chunk {
-                    Ok(c) => {
-                        if let Some(text) = c
-                            .choices
-                            .first()
-                            .and_then(|ch| ch.delta.content.as_deref())
-                        {
-                            print!("{text}");
-                            io::stdout().flush().ok();
-                            full_response.push_str(text);
-                        }
-                    }
-                    Err(e) => {
-                        println!("\n{FAIL} Streaming chunk error: {e}");
-                        break;
-                    }
-                }
-            }
-            let elapsed = start.elapsed().as_secs_f64();
-            println!();
-            let ok = !full_response.is_empty();
-            let status = if ok { PASS } else { FAIL };
-            println!(
-                "{status} Streaming Chat - {} chars in {elapsed:.2}s",
-                full_response.len()
-            );
-            results.push(("Streaming Chat", ok));
-        }
-        Err(e) => {
-            println!("{FAIL} Streaming Chat - {e}");
-            results.push(("Streaming Chat", false));
-        }
-    }
-
-    if let Err(e) = model.unload().await {
-        println!("{WARN} Failed to unload model: {e}");
-    } else {
-        println!("{INFO} Model unloaded.");
-    }
-
-    print_summary(&results);
-    Ok(())
-}
-
-fn print_summary(results: &[(&str, bool)]) {
-    println!("\n{}", "=".repeat(60));
-    println!("  Summary");
-    println!("{}\n", "=".repeat(60));
-    let passed = results.iter().filter(|(_, p)| *p).count();
-    for (name, p) in results {
-        println!("  {} {name}", if *p { "✓" } else { "✗" });
-    }
-    println!("\n  {passed}/{} tests passed", results.len());
-}
diff --git a/sdk/rust/README.md b/sdk/rust/README.md
index dbd9906a2..562b30882 100644
--- a/sdk/rust/README.md
+++ b/sdk/rust/README.md
@@ -569,16 +569,16 @@ Sample applications are available in [`samples/rust/`](../../samples/rust/):
 
 | Sample | Description |
 |--------|-------------|
-| `native-chat-completions` | Non-streaming and streaming chat completions |
-| `tool-calling-foundry-local` | Function/tool calling with multi-turn conversations |
-| `audio-transcription-example` | Audio transcription (non-streaming and streaming) |
-| `foundry-local-webserver` | Embedded OpenAI-compatible REST API server |
+| `chat-completion` | Chat completions via native inference and the local web server (`/v1/chat/completions`) |
+| `embeddings` | Text embeddings (single and batch) |
+| `audio` | Live microphone transcription (Nemotron) and file-based transcription (Whisper, `--file`) |
+| `responses-api` | Vision via the local web server Responses API (`/v1/responses`) |
 
 Run a sample with:
 
 ```sh
 cd samples/rust
-cargo run -p native-chat-completions
+cargo run -p chat-completion
 ```
 
 ## License
diff --git a/sdk_v2/python/README.md b/sdk_v2/python/README.md
index 3d7b1bbc3..664685dfb 100644
--- a/sdk_v2/python/README.md
+++ b/sdk_v2/python/README.md
@@ -345,7 +345,7 @@ model.unload()
 
 ### Audio transcription
 
-`AudioSession` accepts `AudioItem` input (PCM bytes + sample rate / channels) and produces `TextItem` output. See [`samples/python/audio-transcription/`](https://github.com/microsoft/Foundry-Local/tree/main/samples/python/audio-transcription) and [`live-audio-transcription/`](https://github.com/microsoft/Foundry-Local/tree/main/samples/python/live-audio-transcription) for runnable end-to-end examples covering offline files and live PCM streaming through an `ItemQueue`.
+`AudioSession` accepts `AudioItem` input (PCM bytes + sample rate / channels) and produces `TextItem` output. See [`samples/python/audio/`](https://github.com/microsoft/Foundry-Local/tree/main/samples/python/audio) for a runnable end-to-end example covering both offline files and live PCM streaming through an `ItemQueue`.
 
 ### Web service (optional)