From 657ce7b5ad682a37f0a336e92f75b9f5fb9152f1 Mon Sep 17 00:00:00 2001 From: rui-ren Date: Thu, 21 May 2026 23:11:13 -0700 Subject: [PATCH 1/7] support multi-lingual --- samples/cpp/live-audio-transcription/main.cpp | 9 ++++++++- samples/cs/live-audio-transcription/Program.cs | 7 +++++++ samples/js/live-audio-transcription/app.js | 7 +++++++ samples/python/live-audio-transcription/src/app.py | 9 ++++++++- samples/rust/live-audio-transcription/src/main.rs | 7 +++++++ 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/samples/cpp/live-audio-transcription/main.cpp b/samples/cpp/live-audio-transcription/main.cpp index 5c94d6180..1055ce78e 100644 --- a/samples/cpp/live-audio-transcription/main.cpp +++ b/samples/cpp/live-audio-transcription/main.cpp @@ -126,9 +126,12 @@ int main(int argc, char* argv[]) { manager.DownloadAndRegisterEps(nullptr, isCancellationRequested); auto& catalog = manager.GetCatalog(); + // English-only: auto* model = catalog.GetModel("nemotron-speech-streaming-en-0.6b"); + // Multi-lingual (supports 30+ languages including auto-detect): + // auto* model = catalog.GetModel("Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4"); if (!model) { - throw std::runtime_error("Model \"nemotron-speech-streaming-en-0.6b\" not found in catalog"); + throw std::runtime_error("Model not found in catalog"); } std::cout << "Downloading model (if needed)..." << std::endl; @@ -152,6 +155,10 @@ int main(int argc, char* argv[]) { session->Settings().channels = 1; session->Settings().bits_per_sample = 16; session->Settings().language = "en"; + // Multi-lingual examples: + // session->Settings().language = "de"; // German + // session->Settings().language = "zh-CN"; // Chinese (Simplified) + // session->Settings().language = "auto"; // Auto-detect language session->Start(); std::cout << "Session started" << std::endl; diff --git a/samples/cs/live-audio-transcription/Program.cs b/samples/cs/live-audio-transcription/Program.cs index 5c8b8988f..12342b514 100644 --- a/samples/cs/live-audio-transcription/Program.cs +++ b/samples/cs/live-audio-transcription/Program.cs @@ -24,7 +24,10 @@ var catalog = await mgr.GetCatalogAsync(); +// English-only: var model = await catalog.GetModelAsync("nemotron-speech-streaming-en-0.6b") ?? throw new Exception("Model \"nemotron-speech-streaming-en-0.6b\" not found in catalog"); +// Multi-lingual (supports 30+ languages including auto-detect): +// var model = await catalog.GetModelAsync("Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4") ?? throw new Exception("Model not found"); await model.DownloadAsync(progress => { @@ -44,6 +47,10 @@ await model.DownloadAsync(progress => session.Settings.SampleRate = 16000; // Default is 16000; shown here to match the NAudio WaveFormat below session.Settings.Channels = 1; session.Settings.Language = "en"; +// Multi-lingual examples: +// session.Settings.Language = "de"; // German +// session.Settings.Language = "zh-CN"; // Chinese (Simplified) +// session.Settings.Language = "auto"; // Auto-detect language await session.StartAsync(); Console.WriteLine(" Session started"); diff --git a/samples/js/live-audio-transcription/app.js b/samples/js/live-audio-transcription/app.js index 42e9c8757..0fa0d16b6 100644 --- a/samples/js/live-audio-transcription/app.js +++ b/samples/js/live-audio-transcription/app.js @@ -21,7 +21,10 @@ const manager = FoundryLocalManager.create({ console.log('✓ SDK initialized'); // Get and load the nemotron model +// English-only: const modelAlias = 'nemotron-speech-streaming-en-0.6b'; +// Multi-lingual (supports 30+ languages including auto-detect): +// const modelAlias = 'Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4'; let model = await manager.catalog.getModel(modelAlias); if (!model) { console.error(`ERROR: Model "${modelAlias}" not found in catalog.`); @@ -47,6 +50,10 @@ session.settings.sampleRate = 16000; // Default is 16000; shown here for clarit session.settings.channels = 1; session.settings.bitsPerSample = 16; session.settings.language = 'en'; +// Multi-lingual examples: +// session.settings.language = 'de'; // German +// session.settings.language = 'zh-CN'; // Chinese (Simplified) +// session.settings.language = 'auto'; // Auto-detect language console.log('Starting streaming session...'); await session.start(); diff --git a/samples/python/live-audio-transcription/src/app.py b/samples/python/live-audio-transcription/src/app.py index ba61b679d..36ebe0037 100644 --- a/samples/python/live-audio-transcription/src/app.py +++ b/samples/python/live-audio-transcription/src/app.py @@ -29,9 +29,12 @@ manager.download_and_register_eps() +# English-only: model = manager.catalog.get_model("nemotron-speech-streaming-en-0.6b") +# Multi-lingual (supports 30+ languages including auto-detect): +# model = manager.catalog.get_model("Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4") if model is None: - raise RuntimeError('Model "nemotron-speech-streaming-en-0.6b" not found in catalog') + raise RuntimeError('Model not found in catalog') model.download( lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True) @@ -46,6 +49,10 @@ session.settings.sample_rate = 16000 session.settings.channels = 1 session.settings.language = "en" +# Multi-lingual examples: +# session.settings.language = "de" # German +# session.settings.language = "zh-CN" # Chinese (Simplified) +# session.settings.language = "auto" # Auto-detect language session.start() print("✓ Session started") diff --git a/samples/rust/live-audio-transcription/src/main.rs b/samples/rust/live-audio-transcription/src/main.rs index 12074ae46..9fde1d9c2 100644 --- a/samples/rust/live-audio-transcription/src/main.rs +++ b/samples/rust/live-audio-transcription/src/main.rs @@ -15,7 +15,10 @@ use cpal::traits::{DeviceTrait, HostTrait, StreamTrait}; use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; use tokio_stream::StreamExt; +// English-only: const ALIAS: &str = "nemotron-speech-streaming-en-0.6b"; +// Multi-lingual (supports 30+ languages including auto-detect): +// const ALIAS: &str = "Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4"; // Global flag for Ctrl+C graceful shutdown (mirrors JS process.on('SIGINT')) static RUNNING: AtomicBool = AtomicBool::new(true); @@ -58,6 +61,10 @@ async fn main() -> Result<(), Box> { let audio_client = model.create_audio_client(); let session = Arc::new(audio_client.create_live_transcription_session()); + // session.settings.language = Some("en".into()); // English (default) + // session.settings.language = Some("de".into()); // German + // session.settings.language = Some("zh-CN".into()); // Chinese (Simplified) + // session.settings.language = Some("auto".into()); // Auto-detect session.start(None).await?; println!("✓ Session started\n"); From bd3bb700b65755f58bd09879cd8a2e73f699273a Mon Sep 17 00:00:00 2001 From: rui-ren Date: Thu, 21 May 2026 23:34:20 -0700 Subject: [PATCH 2/7] lint --- samples/cpp/live-audio-transcription/main.cpp | 7 ++++--- samples/cs/live-audio-transcription/Program.cs | 2 +- samples/python/live-audio-transcription/src/app.py | 7 ++++--- samples/rust/live-audio-transcription/src/main.rs | 3 ++- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/samples/cpp/live-audio-transcription/main.cpp b/samples/cpp/live-audio-transcription/main.cpp index 1055ce78e..5b77ab731 100644 --- a/samples/cpp/live-audio-transcription/main.cpp +++ b/samples/cpp/live-audio-transcription/main.cpp @@ -127,11 +127,12 @@ int main(int argc, char* argv[]) { auto& catalog = manager.GetCatalog(); // English-only: - auto* model = catalog.GetModel("nemotron-speech-streaming-en-0.6b"); + const char* modelAlias = "nemotron-speech-streaming-en-0.6b"; // Multi-lingual (supports 30+ languages including auto-detect): - // auto* model = catalog.GetModel("Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4"); + // const char* modelAlias = "Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4"; + auto* model = catalog.GetModel(modelAlias); if (!model) { - throw std::runtime_error("Model not found in catalog"); + throw std::runtime_error(std::string("Model \"") + modelAlias + "\" not found in catalog"); } std::cout << "Downloading model (if needed)..." << std::endl; diff --git a/samples/cs/live-audio-transcription/Program.cs b/samples/cs/live-audio-transcription/Program.cs index 12342b514..cad82de37 100644 --- a/samples/cs/live-audio-transcription/Program.cs +++ b/samples/cs/live-audio-transcription/Program.cs @@ -27,7 +27,7 @@ // English-only: var model = await catalog.GetModelAsync("nemotron-speech-streaming-en-0.6b") ?? throw new Exception("Model \"nemotron-speech-streaming-en-0.6b\" not found in catalog"); // Multi-lingual (supports 30+ languages including auto-detect): -// var model = await catalog.GetModelAsync("Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4") ?? throw new Exception("Model not found"); +// var model = await catalog.GetModelAsync("Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4") ?? throw new Exception("Model \"Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4\" not found in catalog"); await model.DownloadAsync(progress => { diff --git a/samples/python/live-audio-transcription/src/app.py b/samples/python/live-audio-transcription/src/app.py index 36ebe0037..85d17651c 100644 --- a/samples/python/live-audio-transcription/src/app.py +++ b/samples/python/live-audio-transcription/src/app.py @@ -30,11 +30,12 @@ manager.download_and_register_eps() # English-only: -model = manager.catalog.get_model("nemotron-speech-streaming-en-0.6b") +model_alias = "nemotron-speech-streaming-en-0.6b" # Multi-lingual (supports 30+ languages including auto-detect): -# model = manager.catalog.get_model("Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4") +# model_alias = "Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4" +model = manager.catalog.get_model(model_alias) if model is None: - raise RuntimeError('Model not found in catalog') + raise RuntimeError(f'Model "{model_alias}" not found in catalog') model.download( lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True) diff --git a/samples/rust/live-audio-transcription/src/main.rs b/samples/rust/live-audio-transcription/src/main.rs index 9fde1d9c2..9388543d2 100644 --- a/samples/rust/live-audio-transcription/src/main.rs +++ b/samples/rust/live-audio-transcription/src/main.rs @@ -60,11 +60,12 @@ async fn main() -> Result<(), Box> { println!("✓ Model loaded\n"); let audio_client = model.create_audio_client(); - let session = Arc::new(audio_client.create_live_transcription_session()); + let mut session = audio_client.create_live_transcription_session(); // session.settings.language = Some("en".into()); // English (default) // session.settings.language = Some("de".into()); // German // session.settings.language = Some("zh-CN".into()); // Chinese (Simplified) // session.settings.language = Some("auto".into()); // Auto-detect + let session = Arc::new(session); session.start(None).await?; println!("✓ Session started\n"); From d36efac3214678140f860e96ba0148ebf43473b4 Mon Sep 17 00:00:00 2001 From: rui-ren Date: Tue, 26 May 2026 18:07:57 -0700 Subject: [PATCH 3/7] regex output --- samples/cpp/live-audio-transcription/main.cpp | 2 +- samples/cs/live-audio-transcription/Program.cs | 2 +- samples/js/live-audio-transcription/app.js | 2 +- samples/python/live-audio-transcription/src/app.py | 2 +- samples/rust/live-audio-transcription/src/main.rs | 2 +- sdk/cs/src/OpenAI/LiveAudioTypes.cs | 11 +++++++++-- 6 files changed, 14 insertions(+), 7 deletions(-) diff --git a/samples/cpp/live-audio-transcription/main.cpp b/samples/cpp/live-audio-transcription/main.cpp index 5b77ab731..b3f654563 100644 --- a/samples/cpp/live-audio-transcription/main.cpp +++ b/samples/cpp/live-audio-transcription/main.cpp @@ -129,7 +129,7 @@ int main(int argc, char* argv[]) { // English-only: const char* modelAlias = "nemotron-speech-streaming-en-0.6b"; // Multi-lingual (supports 30+ languages including auto-detect): - // const char* modelAlias = "Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4"; + // const char* modelAlias = "nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b"; auto* model = catalog.GetModel(modelAlias); if (!model) { throw std::runtime_error(std::string("Model \"") + modelAlias + "\" not found in catalog"); diff --git a/samples/cs/live-audio-transcription/Program.cs b/samples/cs/live-audio-transcription/Program.cs index cad82de37..a57a42f9d 100644 --- a/samples/cs/live-audio-transcription/Program.cs +++ b/samples/cs/live-audio-transcription/Program.cs @@ -27,7 +27,7 @@ // English-only: var model = await catalog.GetModelAsync("nemotron-speech-streaming-en-0.6b") ?? throw new Exception("Model \"nemotron-speech-streaming-en-0.6b\" not found in catalog"); // Multi-lingual (supports 30+ languages including auto-detect): -// var model = await catalog.GetModelAsync("Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4") ?? throw new Exception("Model \"Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4\" not found in catalog"); +// var model = await catalog.GetModelAsync("nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b") ?? throw new Exception("Model \"nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b\" not found in catalog"); await model.DownloadAsync(progress => { diff --git a/samples/js/live-audio-transcription/app.js b/samples/js/live-audio-transcription/app.js index 0fa0d16b6..1763e3b5d 100644 --- a/samples/js/live-audio-transcription/app.js +++ b/samples/js/live-audio-transcription/app.js @@ -24,7 +24,7 @@ console.log('✓ SDK initialized'); // English-only: const modelAlias = 'nemotron-speech-streaming-en-0.6b'; // Multi-lingual (supports 30+ languages including auto-detect): -// const modelAlias = 'Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4'; +// const modelAlias = 'nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b'; let model = await manager.catalog.getModel(modelAlias); if (!model) { console.error(`ERROR: Model "${modelAlias}" not found in catalog.`); diff --git a/samples/python/live-audio-transcription/src/app.py b/samples/python/live-audio-transcription/src/app.py index 85d17651c..122b59ee3 100644 --- a/samples/python/live-audio-transcription/src/app.py +++ b/samples/python/live-audio-transcription/src/app.py @@ -32,7 +32,7 @@ # English-only: model_alias = "nemotron-speech-streaming-en-0.6b" # Multi-lingual (supports 30+ languages including auto-detect): -# model_alias = "Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4" +# model_alias = "nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b" model = manager.catalog.get_model(model_alias) if model is None: raise RuntimeError(f'Model "{model_alias}" not found in catalog') diff --git a/samples/rust/live-audio-transcription/src/main.rs b/samples/rust/live-audio-transcription/src/main.rs index 9388543d2..efb9184eb 100644 --- a/samples/rust/live-audio-transcription/src/main.rs +++ b/samples/rust/live-audio-transcription/src/main.rs @@ -18,7 +18,7 @@ use tokio_stream::StreamExt; // English-only: const ALIAS: &str = "nemotron-speech-streaming-en-0.6b"; // Multi-lingual (supports 30+ languages including auto-detect): -// const ALIAS: &str = "Nemotron-3.5-ASR-Streaming-Multilingual-0.6b-onnx-int4"; +// const ALIAS: &str = "nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b"; // Global flag for Ctrl+C graceful shutdown (mirrors JS process.on('SIGINT')) static RUNNING: AtomicBool = AtomicBool::new(true); diff --git a/sdk/cs/src/OpenAI/LiveAudioTypes.cs b/sdk/cs/src/OpenAI/LiveAudioTypes.cs index a0e985425..04cd506e3 100644 --- a/sdk/cs/src/OpenAI/LiveAudioTypes.cs +++ b/sdk/cs/src/OpenAI/LiveAudioTypes.cs @@ -2,6 +2,7 @@ namespace Microsoft.AI.Foundry.Local.OpenAI; using System.Text.Json; using System.Text.Json.Serialization; +using System.Text.RegularExpressions; using Betalgo.Ranul.OpenAI.ObjectModels.RealtimeModels; using Microsoft.AI.Foundry.Local; using Microsoft.AI.Foundry.Local.Detail; @@ -15,6 +16,9 @@ namespace Microsoft.AI.Foundry.Local.OpenAI; /// public class LiveAudioTranscriptionResponse : ConversationItem { + // Multilingual Nemotron models emit language tags like , at segment boundaries. + // Strip them so consumers get clean text. + private static readonly Regex LangTagRegex = new(@"\s*<[a-z]{2}(-[A-Z]{2})?>\s*", RegexOptions.Compiled); /// /// Whether this is a final or partial (interim) result. /// - Nemotron models always return true (every result is final). @@ -38,6 +42,9 @@ internal static LiveAudioTranscriptionResponse FromJson(string json) JsonSerializationContext.Default.LiveAudioTranscriptionRaw) ?? throw new FoundryLocalException("Failed to deserialize live audio transcription result"); + // Strip language tags emitted by multilingual models (e.g. , ) + var text = LangTagRegex.Replace(raw.Text, " ").Trim(); + return new LiveAudioTranscriptionResponse { IsFinal = raw.IsFinal, @@ -47,8 +54,8 @@ internal static LiveAudioTranscriptionResponse FromJson(string json) [ new ContentPart { - Text = raw.Text, - Transcript = raw.Text + Text = text, + Transcript = text } ] }; From de2b8f6fe79f80ff2856b8491af822b7df6889bf Mon Sep 17 00:00:00 2001 From: rui-ren Date: Tue, 26 May 2026 21:26:43 -0700 Subject: [PATCH 4/7] version --- sdk/deps_versions_winml.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/deps_versions_winml.json b/sdk/deps_versions_winml.json index e67af58a6..653810ea6 100644 --- a/sdk/deps_versions_winml.json +++ b/sdk/deps_versions_winml.json @@ -7,9 +7,9 @@ "version": "2.1.1" }, "onnxruntime": { - "version": "1.25.1" + "version": "1.26.0" }, "onnxruntime-genai": { - "version": "0.13.2" + "version": "0.14.0" } } From f873f624baff8b53ea261a5bc8c05cbfec48094f Mon Sep 17 00:00:00 2001 From: rui-ren Date: Tue, 26 May 2026 21:39:49 -0700 Subject: [PATCH 5/7] rust lint --- samples/rust/live-audio-transcription/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/rust/live-audio-transcription/src/main.rs b/samples/rust/live-audio-transcription/src/main.rs index 8435b16af..a2b2b58da 100644 --- a/samples/rust/live-audio-transcription/src/main.rs +++ b/samples/rust/live-audio-transcription/src/main.rs @@ -60,7 +60,7 @@ async fn main() -> Result<(), Box> { println!("✓ Model loaded\n"); let audio_client = model.create_audio_client(); - let mut session = audio_client.create_live_transcription_session(); + let session = audio_client.create_live_transcription_session(); // session.settings.language = Some("en".into()); // English (default) // session.settings.language = Some("de".into()); // German // session.settings.language = Some("zh-CN".into()); // Chinese (Simplified) From 494b382a499bc455fcce2482345600632ddfb0a3 Mon Sep 17 00:00:00 2001 From: rui-ren Date: Tue, 26 May 2026 21:55:25 -0700 Subject: [PATCH 6/7] update README --- sdk/cpp/README.md | 29 +++++++++++++++++++++++++++++ sdk/cs/README.md | 35 +++++++++++++++++++++++++++++++++++ sdk/js/README.md | 33 +++++++++++++++++++++++++++++++++ sdk/rust/README.md | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 130 insertions(+) diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 5959ec95b..861b893d0 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -278,6 +278,35 @@ audio.TranscribeAudioStreaming(R"(C:\path\to\audio.wav)", [](const AudioCreateTr }); ``` +#### Multilingual Language Codes + +When using a multilingual ASR model (e.g. `nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b`), set the language to a BCP-47 language code to hint the expected language. + +| Code | Language | Code | Language | +|------|----------|------|----------| +| `en-US` | English (US) | `en-GB` | English (UK) | +| `zh-CN` | Chinese (Simplified) | `ja-JP` | Japanese | +| `ko-KR` | Korean | `de-DE` | German | +| `fr-FR` | French | `fr-CA` | French (Canada) | +| `es-ES` | Spanish (Spain) | `es-US` | Spanish (US) | +| `it-IT` | Italian | `pt-BR` | Portuguese (Brazil) | +| `pt-PT` | Portuguese (Portugal) | `ru-RU` | Russian | +| `ar-AR` | Arabic | `hi-IN` | Hindi | +| `nl-NL` | Dutch | `pl-PL` | Polish | +| `sv-SE` | Swedish | `da-DK` | Danish | +| `fi-FI` | Finnish | `cs-CZ` | Czech | +| `ro-RO` | Romanian | `hu-HU` | Hungarian | +| `sk-SK` | Slovak | `bg-BG` | Bulgarian | +| `el-GR` | Greek | `et-EE` | Estonian | +| `hr-HR` | Croatian | `lt-LT` | Lithuanian | +| `lv-LV` | Latvian | `sl-SL` | Slovenian | +| `uk-UA` | Ukrainian | `he-IL` | Hebrew | +| `nb-NO` | Norwegian (Bokmål) | `nn-NO` | Norwegian (Nynorsk) | +| `th-TH` | Thai | `tr-TR` | Turkish | +| `vi-VN` | Vietnamese | `auto` | Auto-detect | + +Use `"auto"` to let the model detect the spoken language automatically. + ### Tool Calling See `sample/main.cpp` (Example 5) for a full tool-calling walkthrough. diff --git a/sdk/cs/README.md b/sdk/cs/README.md index 9493eea0b..1ee872c5c 100644 --- a/sdk/cs/README.md +++ b/sdk/cs/README.md @@ -300,6 +300,41 @@ audioClient.Settings.Language = "en"; audioClient.Settings.Temperature = 0.0f; ``` +#### Multilingual Language Codes + +When using a multilingual ASR model (e.g. `nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b`), set `Language` to a BCP-47 language code to hint the expected language. This applies to both `OpenAIAudioClient.Settings.Language` and `LiveAudioTranscriptionSession.Settings.Language`. + +| Code | Language | Code | Language | +|------|----------|------|----------| +| `en-US` | English (US) | `en-GB` | English (UK) | +| `zh-CN` | Chinese (Simplified) | `ja-JP` | Japanese | +| `ko-KR` | Korean | `de-DE` | German | +| `fr-FR` | French | `fr-CA` | French (Canada) | +| `es-ES` | Spanish (Spain) | `es-US` | Spanish (US) | +| `it-IT` | Italian | `pt-BR` | Portuguese (Brazil) | +| `pt-PT` | Portuguese (Portugal) | `ru-RU` | Russian | +| `ar-AR` | Arabic | `hi-IN` | Hindi | +| `nl-NL` | Dutch | `pl-PL` | Polish | +| `sv-SE` | Swedish | `da-DK` | Danish | +| `fi-FI` | Finnish | `cs-CZ` | Czech | +| `ro-RO` | Romanian | `hu-HU` | Hungarian | +| `sk-SK` | Slovak | `bg-BG` | Bulgarian | +| `el-GR` | Greek | `et-EE` | Estonian | +| `hr-HR` | Croatian | `lt-LT` | Lithuanian | +| `lv-LV` | Latvian | `sl-SL` | Slovenian | +| `uk-UA` | Ukrainian | `he-IL` | Hebrew | +| `nb-NO` | Norwegian (Bokmål) | `nn-NO` | Norwegian (Nynorsk) | +| `th-TH` | Thai | `tr-TR` | Turkish | +| `vi-VN` | Vietnamese | `auto` | Auto-detect | + +Use `"auto"` to let the model detect the spoken language automatically. + +```csharp +// Multilingual example +audioClient.Settings.Language = "zh-CN"; // Chinese +session.Settings.Language = "auto"; // Auto-detect +``` + ### Live Audio Transcription (Real-Time Streaming) For real-time microphone-to-text transcription, use `CreateLiveTranscriptionSession()`. Audio is pushed as raw PCM chunks and transcription results stream back as an `IAsyncEnumerable`. diff --git a/sdk/js/README.md b/sdk/js/README.md index 2eebae018..5aa84e3c4 100644 --- a/sdk/js/README.md +++ b/sdk/js/README.md @@ -265,6 +265,39 @@ for await (const chunk of audioClient.transcribeStreaming('/path/to/audio.wav')) } ``` +#### Multilingual Language Codes + +When using a multilingual ASR model (e.g. `nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b`), set `language` to a BCP-47 language code to hint the expected language. + +| Code | Language | Code | Language | +|------|----------|------|----------| +| `en-US` | English (US) | `en-GB` | English (UK) | +| `zh-CN` | Chinese (Simplified) | `ja-JP` | Japanese | +| `ko-KR` | Korean | `de-DE` | German | +| `fr-FR` | French | `fr-CA` | French (Canada) | +| `es-ES` | Spanish (Spain) | `es-US` | Spanish (US) | +| `it-IT` | Italian | `pt-BR` | Portuguese (Brazil) | +| `pt-PT` | Portuguese (Portugal) | `ru-RU` | Russian | +| `ar-AR` | Arabic | `hi-IN` | Hindi | +| `nl-NL` | Dutch | `pl-PL` | Polish | +| `sv-SE` | Swedish | `da-DK` | Danish | +| `fi-FI` | Finnish | `cs-CZ` | Czech | +| `ro-RO` | Romanian | `hu-HU` | Hungarian | +| `sk-SK` | Slovak | `bg-BG` | Bulgarian | +| `el-GR` | Greek | `et-EE` | Estonian | +| `hr-HR` | Croatian | `lt-LT` | Lithuanian | +| `lv-LV` | Latvian | `sl-SL` | Slovenian | +| `uk-UA` | Ukrainian | `he-IL` | Hebrew | +| `nb-NO` | Norwegian (Bokmål) | `nn-NO` | Norwegian (Nynorsk) | +| `th-TH` | Thai | `tr-TR` | Turkish | +| `vi-VN` | Vietnamese | `auto` | Auto-detect | + +```typescript +// Multilingual example +audioClient.settings.language = 'zh-CN'; // Chinese +audioClient.settings.language = 'auto'; // Auto-detect +``` + ### Embedded Web Service Start a local HTTP server that exposes an OpenAI-compatible API: diff --git a/sdk/rust/README.md b/sdk/rust/README.md index dbd9906a2..ded4d5f4d 100644 --- a/sdk/rust/README.md +++ b/sdk/rust/README.md @@ -441,6 +441,39 @@ while let Some(chunk) = stream.next().await { } ``` +#### Multilingual Language Codes + +When using a multilingual ASR model (e.g. `nvidia-nemotron-3.5-asr-streaming-multilingual-0.6b`), set `language` to a BCP-47 language code to hint the expected language. + +| Code | Language | Code | Language | +|------|----------|------|----------| +| `en-US` | English (US) | `en-GB` | English (UK) | +| `zh-CN` | Chinese (Simplified) | `ja-JP` | Japanese | +| `ko-KR` | Korean | `de-DE` | German | +| `fr-FR` | French | `fr-CA` | French (Canada) | +| `es-ES` | Spanish (Spain) | `es-US` | Spanish (US) | +| `it-IT` | Italian | `pt-BR` | Portuguese (Brazil) | +| `pt-PT` | Portuguese (Portugal) | `ru-RU` | Russian | +| `ar-AR` | Arabic | `hi-IN` | Hindi | +| `nl-NL` | Dutch | `pl-PL` | Polish | +| `sv-SE` | Swedish | `da-DK` | Danish | +| `fi-FI` | Finnish | `cs-CZ` | Czech | +| `ro-RO` | Romanian | `hu-HU` | Hungarian | +| `sk-SK` | Slovak | `bg-BG` | Bulgarian | +| `el-GR` | Greek | `et-EE` | Estonian | +| `hr-HR` | Croatian | `lt-LT` | Lithuanian | +| `lv-LV` | Latvian | `sl-SL` | Slovenian | +| `uk-UA` | Ukrainian | `he-IL` | Hebrew | +| `nb-NO` | Norwegian (Bokmål) | `nn-NO` | Norwegian (Nynorsk) | +| `th-TH` | Thai | `tr-TR` | Turkish | +| `vi-VN` | Vietnamese | `auto` | Auto-detect | + +```rust +// Multilingual example +let audio_client = model.create_audio_client() + .language("zh-CN"); // Chinese +``` + ### Embedded Web Service Start a local HTTP server that exposes an OpenAI-compatible REST API: From c70207a83b19d038d3e272c0798bb6f786462ce9 Mon Sep 17 00:00:00 2001 From: rui-ren Date: Wed, 27 May 2026 16:38:18 -0700 Subject: [PATCH 7/7] add C++ language usage snippet per review --- sdk/cpp/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 921541748..eaf90cd4f 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -307,6 +307,13 @@ When using a multilingual ASR model (e.g. `nvidia-nemotron-3.5-asr-streaming-mul Use `"auto"` to let the model detect the spoken language automatically. +Set the language on a live audio transcription session: + +```cpp +session->Settings().language = "zh-CN"; // Chinese (Simplified) +// session->Settings().language = "auto"; // Auto-detect language +``` + ### Tool Calling See `sample/main.cpp` (Example 5) for a full tool-calling walkthrough.