diff --git a/sdk_v2/cpp/src/catalog/static_catalog_client.cc b/sdk_v2/cpp/src/catalog/static_catalog_client.cc index af9ebb5f6..de722a06d 100644 --- a/sdk_v2/cpp/src/catalog/static_catalog_client.cc +++ b/sdk_v2/cpp/src/catalog/static_catalog_client.cc @@ -51,6 +51,14 @@ class StaticCatalogClient : public ICatalogClient { for (const auto& [device, eps] : devices_to_eps) { for (const auto& ep : eps) { allowed.emplace(to_lower(device), to_lower(ep)); + + // CudaPluginExecutionProvider is the ORT registration name for the + // downloadable CUDA plugin EP, but catalog models are tagged with + // CUDAExecutionProvider. Add the canonical name as an alias so + // plugin-EP machines can see and load CUDA catalog models. + if (to_lower(ep) == "cudapluginexecutionprovider") { + allowed.emplace(to_lower(device), "cudaexecutionprovider"); + } } } diff --git a/sdk_v2/cpp/src/ep_detection/cuda_ep_bootstrapper.cc b/sdk_v2/cpp/src/ep_detection/cuda_ep_bootstrapper.cc index 6f50de93d..a87ede96a 100644 --- a/sdk_v2/cpp/src/ep_detection/cuda_ep_bootstrapper.cc +++ b/sdk_v2/cpp/src/ep_detection/cuda_ep_bootstrapper.cc @@ -3,18 +3,26 @@ #include "ep_detection/cuda_ep_bootstrapper.h" #include "ep_detection/ep_utils.h" +#include "http/http_client.h" +#include "http/http_download.h" #include "logger.h" #include "util/file_lock.h" -#include "http/http_download.h" #include "util/zip_extract.h" #include +#include #include +#include #include #include #include +#include +#include +#include +#include #include +#include #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN @@ -23,43 +31,224 @@ namespace { -constexpr const char* kPackageFileName = "cuda-ep.zip"; +constexpr const char* kOrtPackageFileName = "cuda-ep-ort.zip"; +constexpr const char* kCudaDepsPackageFileName = "cuda-ep-cuda-deps.zip"; constexpr const char* kLockFileName = "cuda-ep.lock"; +constexpr const char* kStagingDirName = "cuda-ep-staging"; constexpr const char* kUserAgent = "FoundryLocal"; constexpr int kMaxInstallAttempts = 5; -// CUDA EP package is built against the ONNX Runtime version we link against, so -// WinML and non-WinML builds need separate downloads. Hashes mirror the C# core -// (see neutron.main/src/Service/Providers/Detector/CudaEpBootstrapper.cs). -// WinML build -> ORT 1.23.2 (cuda-ep-20260501-182408.zip) -// Non-WinML -> ORT 1.25.1 (cuda-ep-20260501-062935.zip) -#if defined(FOUNDRY_LOCAL_USE_WINML) && FOUNDRY_LOCAL_USE_WINML -constexpr const char* kDownloadUrl = - "https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda-ep-20260501-182408.zip"; +// Manifest URL on the CDN — published by the CUDA EP upload pipeline. +constexpr const char* kManifestUrl = + "https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda_ep_dev.json"; + +// RSA-4096 public key used to verify the manifest signature. +// Corresponds to the private key used by official CUDA Plugin EP Publishing Pipeline. +constexpr const char* kManifestSigningPublicKey = R"PEM( +-----BEGIN PUBLIC KEY----- +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA1YwPWIQ7UJZ0EOVfRIeU +AiI6G9nwmQ+0RGmBKKNPeuTt8To7EUBfs2yjHs1nS159oEbI9wmN+SRhTx72fyo7 +EEbQ2kYB/d+/znqrpTinHiyfrn6dEzqJzj5diTfXkVbm5+uueqxoxN6TAUwZqsdO +wveft1DiSU8G0NRx3QPxBACZx199ObiQgqDQycTbc7qaRUy9rkcrMimvXKIaui3z +fmxQtzF6WkRnN4Xf+jkzxgua0xSHkcdYpDu+M39iynqEkSChzv+h0NIE/B05z9/y ++6/EjFETYB2LuSr7N3EOMj1eTff/oFqwBk1gBuLxNxHjTtH1+DxpygIxz9Dy2OY5 +jG46Io9Eg8q7UMW4aSm/YS/Sqt8KzqOG59XvLtADDlaS+8+KDV0K9Jwq1WXBbqXd +gXlUjLdIh+UAgF0zv5N8MGoS9BxvBNr932XkUV5VC26JgU3tPqiiiSXfPParBSJt +wt/PSpQDqkcWE9VsRmCe5pAgmv3AQlv+jSLlB8aDdCP8/+/AoI7St4n7STl8QtPl +XXWmO8EJwqEXFpaitcpNyzuol6/7H4mQV6XeNjezjmTWeedvxWcZXi1Pxp/FfOEK +iJxrPNMxlZZA26WvTEhc0vi9hxYxTsZKWuenZoGvgR2/sy2tqbEV3/4JhowQ6K56 +MvdOj/vvArK/BIwPJnCYv4kCAwEAAQ== +-----END PUBLIC KEY----- +)PEM"; + +// Install flow: +// 1. Fetch the manifest and check the existing cuda-ep directory against it. +// 2. If any package is stale, copy the existing cuda-ep directory into staging. +// 3. Use that staged copy as the base and download only the stale package(s). +// 4. Re-write version.json with the ORT version, verify the staged files, then +// atomically rename staging into place. + +// ----------------------------------------------------------------------- +// Platform detection +// +// Returns the manifest platform key and ORT registration library filename +// for the current build target, or std::nullopt if unsupported. +// ----------------------------------------------------------------------- +struct PlatformInfo { + const char* key; // manifest lookup key, e.g. "win-x64" + const char* ep_lib; // ORT registration library filename +}; + +std::optional GetPlatformInfo() { +#if defined(_WIN32) && !defined(_M_ARM64) + return PlatformInfo{"win-x64", "onnxruntime_providers_cuda_plugin.dll"}; + +// Uncomment when win-arm64 CUDA EP build is available (see cuda-ep-upload.yml): +// #elif defined(_WIN32) && defined(_M_ARM64) +// return PlatformInfo{"win-arm64", "onnxruntime_providers_cuda_plugin.dll"}; + +// Uncomment when linux-x64 CUDA EP build is available (see cuda-ep-upload.yml): +// #elif defined(__linux__) && defined(__x86_64__) +// return PlatformInfo{"linux-x64", "libonnxruntime_providers_cuda_plugin.so"}; + +// Uncomment when linux-arm64 CUDA EP build is available (see cuda-ep-upload.yml): +// #elif defined(__linux__) && defined(__aarch64__) +// return PlatformInfo{"linux-arm64", "libonnxruntime_providers_cuda_plugin.so"}; + #else -constexpr const char* kDownloadUrl = - "https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda-ep-20260501-062935.zip"; + return std::nullopt; // Platform not yet supported — graceful no-op. #endif +} -struct ExpectedBinary { - const char* filename; - const char* sha256; -}; +constexpr const char* kRegistrationName = "Foundry.CUDA"; + +struct ManifestInfo { + struct PackageInfo { + std::string download_url; + std::unordered_map sha256; // filename -> expected hash + }; -#if defined(FOUNDRY_LOCAL_USE_WINML) && FOUNDRY_LOCAL_USE_WINML -constexpr ExpectedBinary kExpectedBinaries[] = { - {"onnxruntime_providers_cuda.dll", "4CEF18654878CEFCFCF8488E9C3A705EB5327AA9B5556155C319C9CBB2D98FCF"}, - {"onnxruntime-genai-cuda.dll", "BC953F8E2AAFC6219B2D723B65AB8F1A9426A6B7724D6A01ED756FAE8C3DE6AE"}, + std::string ort_version; + std::string cuda_deps_version; + PackageInfo ort; + PackageInfo cuda_deps; }; -#else -constexpr ExpectedBinary kExpectedBinaries[] = { - {"onnxruntime_providers_cuda.dll", "DD540FCFECFBC68B4675C9ADF09C2858CF6B054563859D79598AA2524406A76F"}, - {"onnxruntime-genai-cuda.dll", "BC953F8E2AAFC6219B2D723B65AB8F1A9426A6B7724D6A01ED756FAE8C3DE6AE"}, + +ManifestInfo::PackageInfo ParsePackage(const nlohmann::json& package_json, + const char* package_name) { + ManifestInfo::PackageInfo info; + + info.download_url = package_json.at("url").get(); + auto& sha256 = package_json.at("sha256"); + if (!sha256.is_object() || sha256.empty()) { + throw std::runtime_error( + fmt::format("CUDA EP manifest '{}' entry has invalid/empty 'sha256'", package_name)); + } + + for (auto& [filename, hash] : sha256.items()) { + info.sha256[filename] = hash.get(); + } + + return info; +} + +bool DownloadAndExtractPackage(const ManifestInfo::PackageInfo& package, + const std::filesystem::path& staging_dir, + const std::filesystem::path& zip_path, + const std::string& package_name, + const std::string& ep_name, + const fl::IEpBootstrapper::ProgressCallback& progress_cb, + float progress_base, + float progress_span, + fl::ILogger& logger) { + logger.Log(fl::LogLevel::Information, + fmt::format("CUDA EP: downloading {} package...", package_name)); + logger.Log(fl::LogLevel::Debug, + fmt::format("CUDA EP: {} download URL is {}", package_name, package.download_url)); + + std::atomic cancel_flag{false}; + auto download_progress = [&](float pct) { + if (progress_cb) { + if (!progress_cb(ep_name, progress_base + (pct * progress_span))) { + cancel_flag.store(true); + } + } + }; + + if (!HttpDownloadFile(package.download_url, zip_path, kUserAgent, + &cancel_flag, download_progress, logger)) { + logger.Log(fl::LogLevel::Warning, + fmt::format("CUDA EP: {} package download failed", package_name)); + return false; + } + + logger.Log(fl::LogLevel::Information, + fmt::format("CUDA EP: extracting {} package to {}", + package_name, staging_dir.string())); + + if (!ExtractZip(zip_path, staging_dir, logger)) { + logger.Log(fl::LogLevel::Warning, + fmt::format("CUDA EP: {} package extraction failed", package_name)); + return false; + } + + std::filesystem::remove(zip_path); + return true; }; -#endif -constexpr const char* kRegistrationName = "Foundry.CUDA"; -constexpr const char* kCudaProviderDll = "onnxruntime_providers_cuda.dll"; +void WriteVersionJson(const std::filesystem::path& staging_dir, + const std::string& ort_version, + fl::ILogger& logger) { + auto version_path = staging_dir / "version.json"; + auto version_json = nlohmann::json{{"version", ort_version}}; + + std::ofstream out(version_path, std::ios::trunc | std::ios::binary); + if (!out) { + throw std::runtime_error( + fmt::format("CUDA EP: failed to open {} for writing", version_path.string())); + } + + out << version_json.dump(); + if (!out) { + throw std::runtime_error( + fmt::format("CUDA EP: failed to write {}", version_path.string())); + } + + logger.Log(fl::LogLevel::Debug, + fmt::format("CUDA EP: wrote version.json with ort_version={}", ort_version)); +} + +/// Fetch and parse the CUDA EP manifest from the CDN. +/// Verifies the manifest signature before using it. +/// Returns the package entry for the given platform key. +ManifestInfo FetchManifest(const char* platform_key, fl::ILogger& logger) { + logger.Log(fl::LogLevel::Debug, + fmt::format("CUDA EP: fetching manifest from {}", kManifestUrl)); + + auto body = fl::http::HttpGetWithRetry(kManifestUrl, kUserAgent, logger); + + // Sig URL is manifest URL + ".sig". + const std::string sig_url = std::string(kManifestUrl) + ".sig"; + logger.Log(fl::LogLevel::Debug, + fmt::format("CUDA EP: fetching manifest signature from {}", sig_url)); + auto sig = fl::http::HttpGetWithRetry(sig_url, kUserAgent, logger); + + // Trim any trailing whitespace (CDN may append \r\n). + while (!sig.empty() && (sig.back() == '\n' || sig.back() == '\r' || sig.back() == ' ')) { + sig.pop_back(); + } + + if (!fl::VerifyRsaSha256Signature(body, sig, kManifestSigningPublicKey, logger)) { + throw std::runtime_error( + "CUDA EP: manifest signature verification failed — refusing to use manifest"); + } + + logger.Log(fl::LogLevel::Debug, "CUDA EP: manifest signature verified"); + + auto j = nlohmann::json::parse(body); + + ManifestInfo info; + info.ort_version = j.at("ort_version").get(); + info.cuda_deps_version = j.at("cuda_deps_version").get(); + + auto& packages = j.at("packages"); + if (!packages.contains(platform_key)) { + throw std::runtime_error( + fmt::format("CUDA EP manifest has no entry for platform '{}'", platform_key)); + } + + auto& platform_package = packages.at(platform_key); + if (!platform_package.contains("ort") || !platform_package.contains("cuda_deps")) { + throw std::runtime_error( + fmt::format("CUDA EP manifest platform '{}' is missing 'ort' or 'cuda_deps'", + platform_key)); + } + + info.ort = ParsePackage(platform_package.at("ort"), "ort"); + info.cuda_deps = ParsePackage(platform_package.at("cuda_deps"), "cuda_deps"); + + return info; +} } // anonymous namespace @@ -93,80 +282,113 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force, attempts_++; + // Bail out early if this platform is not yet in the manifest. + auto platform_info = GetPlatformInfo(); + if (!platform_info) { + logger.Log(LogLevel::Information, "CUDA EP: current platform is not yet supported"); + return false; + } + auto ep_dir = std::filesystem::path(ep_dir_); - auto lock_path = ep_dir.parent_path() / kLockFileName; - auto zip_path = ep_dir.parent_path() / kPackageFileName; + auto parent_dir = ep_dir.parent_path(); try { - // Cross-process lock to prevent concurrent installs - FileLock lock(lock_path); - - // Check if package already exists and is valid - if (fl::VerifyEpPackage(ep_dir, - {{kExpectedBinaries[0].filename, kExpectedBinaries[0].sha256}, - {kExpectedBinaries[1].filename, kExpectedBinaries[1].sha256}}, - "CUDA EP", logger)) { - logger.Log(LogLevel::Information, "CUDA EP: package already valid, skipping download"); - } else { - // Clean up any partial install - if (std::filesystem::exists(ep_dir)) { - std::filesystem::remove_all(ep_dir); - } + // Fetch the manifest before acquiring the lock to avoid holding it during network I/O. + auto manifest = FetchManifest(platform_info->key, logger); + logger.Log(LogLevel::Information, + fmt::format("CUDA EP: manifest fetched (ort_version={}, cuda_deps_version={}, platform={})", + manifest.ort_version, manifest.cuda_deps_version, platform_info->key)); - std::filesystem::create_directories(ep_dir); + // Cross-process lock to prevent concurrent installs. + std::filesystem::create_directories(parent_dir); + FileLock lock(parent_dir / kLockFileName); - // Download - logger.Log(LogLevel::Information, "CUDA EP: downloading from CDN..."); + bool needs_ort = force || !VerifyEpPackage(ep_dir, manifest.ort.sha256, "CUDA EP (ort)", logger); + bool needs_cuda_deps = force || !VerifyEpPackage(ep_dir, manifest.cuda_deps.sha256, "CUDA EP (cuda_deps)", logger); - // Bridge callback-based cancellation to the atomic flag HttpDownloadFile expects - std::atomic cancel_flag{false}; + if (!needs_ort && !needs_cuda_deps) { + logger.Log(LogLevel::Information, + "CUDA EP: ORT and CUDA deps packages already valid, skipping download"); + } else { + // Download only outdated package(s) into staging, then atomically swap. + auto staging_dir = parent_dir / kStagingDirName; + if (std::filesystem::exists(staging_dir)) { + std::filesystem::remove_all(staging_dir); + } + + if (std::filesystem::exists(ep_dir)) { + std::filesystem::copy(ep_dir, staging_dir, + std::filesystem::copy_options::recursive | + std::filesystem::copy_options::overwrite_existing); + } else { + std::filesystem::create_directories(staging_dir); + } - auto download_progress = [&](float pct) { - if (progress_cb) { - // 0-80% for download phase - if (!progress_cb(name_, pct * 0.8f)) { - cancel_flag.store(true); - } + const int download_count = (needs_ort ? 1 : 0) + (needs_cuda_deps ? 1 : 0); + float progress_base = 0.0f; + const float progress_span = download_count > 0 ? (80.0f / download_count) : 0.0f; + + if (needs_ort) { + auto ort_zip_path = staging_dir / kOrtPackageFileName; + if (!DownloadAndExtractPackage(manifest.ort, staging_dir, ort_zip_path, + "ort", name_, progress_cb, + progress_base, progress_span, logger)) { + std::filesystem::remove_all(staging_dir); + return false; } - }; + progress_base += progress_span; + } - if (!HttpDownloadFile(kDownloadUrl, zip_path, kUserAgent, - &cancel_flag, download_progress, logger)) { - logger.Log(LogLevel::Warning, "CUDA EP: download failed (see prior log for details)"); - return false; + if (needs_cuda_deps) { + auto cuda_deps_zip_path = staging_dir / kCudaDepsPackageFileName; + if (!DownloadAndExtractPackage(manifest.cuda_deps, staging_dir, cuda_deps_zip_path, + "cuda_deps", name_, progress_cb, + progress_base, progress_span, logger)) { + std::filesystem::remove_all(staging_dir); + return false; + } + progress_base += progress_span; } - // Extract - logger.Log(LogLevel::Information, "CUDA EP: extracting..."); + // CUDA has two install steps (ORT package and CUDA deps), so always stamp + // the final package with the ORT version after both steps complete. + WriteVersionJson(staging_dir, manifest.ort_version, logger); - if (!ExtractZip(zip_path, ep_dir, logger)) { - logger.Log(LogLevel::Warning, "CUDA EP: extraction failed"); + // Verify both package subsets in staging before promotion. + if (!VerifyEpPackage(staging_dir, manifest.ort.sha256, "CUDA EP (ort)", logger) || + !VerifyEpPackage(staging_dir, manifest.cuda_deps.sha256, "CUDA EP (cuda_deps)", logger)) { + logger.Log(LogLevel::Warning, + "CUDA EP: verification failed after downloading updated package(s)"); + std::filesystem::remove_all(staging_dir); return false; } - // Clean up zip - std::filesystem::remove(zip_path); + logger.Log(LogLevel::Debug, + fmt::format("CUDA EP: staging verification succeeded, promoting to {}", + ep_dir.string())); - // Verify - if (!fl::VerifyEpPackage(ep_dir, - {{kExpectedBinaries[0].filename, kExpectedBinaries[0].sha256}, - {kExpectedBinaries[1].filename, kExpectedBinaries[1].sha256}}, - "CUDA EP", logger)) { - logger.Log(LogLevel::Warning, "CUDA EP: verification failed after download"); - return false; + // Atomic swap: delete old install, rename staging to target. + if (std::filesystem::exists(ep_dir)) { + std::filesystem::remove_all(ep_dir); } + std::filesystem::rename(staging_dir, ep_dir); + + logger.Log(LogLevel::Information, + fmt::format("CUDA EP: successfully installed (updated: ort={}, cuda_deps={})", + needs_ort ? "yes" : "no", + needs_cuda_deps ? "yes" : "no")); } if (progress_cb) { progress_cb(name_, 90.0f); } - // Register with ORT + // Register with ORT. #ifdef _WIN32 // Permanently prepend the EP directory to PATH. The zip bundles all // required CUDA/cuDNN DLLs, so no system CUDA install is needed. // PATH must stay modified for the process lifetime because: - // - onnxruntime_providers_cuda.dll delay-loads some dependencies + // - onnxruntime_providers_cuda_plugin.dll delay-loads CUDA dependencies // - onnxruntime-genai-cuda.dll is loaded later at model-load time // - ORT creates CUDA sessions after registration { @@ -183,9 +405,17 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force, } #endif - auto cuda_dll_path = ep_dir / kCudaProviderDll; + auto cuda_lib_path = ep_dir / platform_info->ep_lib; + + // NOTE: RegisterExecutionProviderLibrary loads the CUDA plugin DLL, which + // initializes the CUDA runtime and cuDNN. This can take 30–60 seconds on + // first use — especially on machines with large cuDNN caches or slow VRAM + // init. This is normal; it is NOT a hang in the bootstrapper itself. + logger.Log(LogLevel::Information, + fmt::format("CUDA EP: registering provider library {} (CUDA init may take ~30s)...", + cuda_lib_path.string())); - if (!register_ep_(kRegistrationName, cuda_dll_path)) { + if (!register_ep_(kRegistrationName, cuda_lib_path)) { logger.Log(LogLevel::Warning, "CUDA EP: ORT registration failed"); return false; } @@ -196,10 +426,9 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force, progress_cb(name_, 100.0f); } - // Bootstrapper-side log — captures the install dir, which the central - // register_ep callback (logs library + version) doesn't have. logger.Log(LogLevel::Information, - fmt::format("CUDA EP: ready (install_path={})", ep_dir.string())); + fmt::format("CUDA EP: ready (install_path={}, ort_version={}, cuda_deps_version={})", + ep_dir.string(), manifest.ort_version, manifest.cuda_deps_version)); return true; } catch (const std::exception& e) { logger.Log(LogLevel::Warning, fmt::format("CUDA EP: error: {}", e.what())); diff --git a/sdk_v2/cpp/src/ep_detection/ep_utils.cc b/sdk_v2/cpp/src/ep_detection/ep_utils.cc index 7fa6524f4..5e7ad160b 100644 --- a/sdk_v2/cpp/src/ep_detection/ep_utils.cc +++ b/sdk_v2/cpp/src/ep_detection/ep_utils.cc @@ -7,17 +7,28 @@ #include +#include +#include +#include + #include #include #include +#include namespace fl { bool VerifyEpPackage( const std::filesystem::path& dir, - std::initializer_list> expected, + const std::unordered_map& expected, std::string_view ep_name, ILogger& logger) { + if (expected.empty()) { + logger.Log(LogLevel::Warning, + fmt::format("{}: expected hash map is empty", ep_name)); + return false; + } + for (const auto& [filename, expected_hash] : expected) { auto file_path = dir / filename; @@ -40,4 +51,77 @@ bool VerifyEpPackage( return true; } +bool VerifyRsaSha256Signature( + std::string_view data, + std::string_view base64_sig, + std::string_view public_key_pem, + ILogger& logger) { + // Load the RSA public key from PEM. + BIO* key_bio = BIO_new_mem_buf(public_key_pem.data(), static_cast(public_key_pem.size())); + if (!key_bio) { + logger.Log(LogLevel::Warning, "manifest signature: failed to allocate BIO for public key"); + return false; + } + EVP_PKEY* pkey = PEM_read_bio_PUBKEY(key_bio, nullptr, nullptr, nullptr); + BIO_free(key_bio); + if (!pkey) { + logger.Log(LogLevel::Warning, "manifest signature: failed to parse public key PEM"); + return false; + } + + // Decode the base64 signature (single-line, no newlines). + BIO* b64 = BIO_new(BIO_f_base64()); + if (!b64) { + logger.Log(LogLevel::Warning, "manifest signature: failed to allocate BIO for base64"); + EVP_PKEY_free(pkey); + return false; + } + + BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL); + BIO* mem = BIO_new_mem_buf(base64_sig.data(), static_cast(base64_sig.size())); + if (!mem) { + BIO_free(b64); + EVP_PKEY_free(pkey); + logger.Log(LogLevel::Warning, "manifest signature: failed to allocate BIO for signature buffer"); + return false; + } + + BIO_push(b64, mem); + + // Upper bound: base64 expands by ~4/3. + std::vector sig_bytes(base64_sig.size()); + int sig_len = BIO_read(b64, sig_bytes.data(), static_cast(sig_bytes.size())); + BIO_free_all(b64); + + if (sig_len <= 0) { + EVP_PKEY_free(pkey); + logger.Log(LogLevel::Warning, "manifest signature: failed to decode base64 signature"); + return false; + } + sig_bytes.resize(static_cast(sig_len)); + + // Verify RSA-SHA256-PKCS1v15. + EVP_MD_CTX* ctx = EVP_MD_CTX_new(); + if (!ctx) { + EVP_PKEY_free(pkey); + logger.Log(LogLevel::Warning, "manifest signature: failed to allocate EVP_MD_CTX"); + return false; + } + + bool ok = false; + if (EVP_DigestVerifyInit(ctx, nullptr, EVP_sha256(), nullptr, pkey) == 1 && + EVP_DigestVerifyUpdate(ctx, data.data(), data.size()) == 1) { + ok = (EVP_DigestVerifyFinal(ctx, sig_bytes.data(), sig_bytes.size()) == 1); + } + + EVP_MD_CTX_free(ctx); + EVP_PKEY_free(pkey); + + if (!ok) { + logger.Log(LogLevel::Warning, "manifest signature: RSA-SHA256 verification failed"); + } + + return ok; +} + } // namespace fl diff --git a/sdk_v2/cpp/src/ep_detection/ep_utils.h b/sdk_v2/cpp/src/ep_detection/ep_utils.h index 634bb517e..e76f1af9a 100644 --- a/sdk_v2/cpp/src/ep_detection/ep_utils.h +++ b/sdk_v2/cpp/src/ep_detection/ep_utils.h @@ -3,8 +3,8 @@ #pragma once #include -#include -#include +#include +#include #include namespace fl { @@ -14,14 +14,27 @@ class ILogger; /// Verify a set of binaries in @p dir all exist and match their expected SHA-256 hashes. /// /// @param dir Directory containing the extracted EP binaries. -/// @param expected List of (filename, expected_sha256_hex) pairs. -/// @param ep_name EP name used in warning log messages (e.g. "CUDA EP"). +/// @param expected Map of filename -> expected_sha256_hex. +/// @param ep_name EP name used in warning log messages (e.g. "CUDA EP (ort)"). /// @param logger Logger for diagnostic output. /// @return true if every file exists and its hash matches; false otherwise. bool VerifyEpPackage( const std::filesystem::path& dir, - std::initializer_list> expected, + const std::unordered_map& expected, std::string_view ep_name, ILogger& logger); +/// Verify an RSA-SHA256-PKCS1v15 detached signature over @p data. +/// +/// @param data The signed data bytes (e.g. raw manifest JSON text). +/// @param base64_sig Base64-encoded RSA signature (single line, no newlines). +/// @param public_key_pem PEM-encoded RSA public key (-----BEGIN PUBLIC KEY----- block). +/// @param logger Logger for diagnostic output. +/// @return true if the signature is valid; false otherwise. +bool VerifyRsaSha256Signature( + std::string_view data, + std::string_view base64_sig, + std::string_view public_key_pem, + ILogger& logger); + } // namespace fl diff --git a/sdk_v2/cpp/src/ep_detection/webgpu_ep_bootstrapper.cc b/sdk_v2/cpp/src/ep_detection/webgpu_ep_bootstrapper.cc index afdb7da9f..3725bf209 100644 --- a/sdk_v2/cpp/src/ep_detection/webgpu_ep_bootstrapper.cc +++ b/sdk_v2/cpp/src/ep_detection/webgpu_ep_bootstrapper.cc @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN @@ -139,9 +140,11 @@ bool WebGpuEpBootstrapper::DownloadAndRegister(bool force, try { // Fetch manifest before acquiring lock (avoid holding lock during network I/O) auto manifest = FetchManifest(logger); + const std::unordered_map expected_hashes = { + {kWebGpuProviderLib, manifest.sha256}}; // Check if package already exists and is valid - if (!force && VerifyEpPackage(ep_dir, {{kWebGpuProviderLib, manifest.sha256}}, "WebGPU EP", logger)) { + if (!force && VerifyEpPackage(ep_dir, expected_hashes, "WebGPU EP", logger)) { logger.Log(LogLevel::Debug, "WebGPU EP: local binaries match manifest, skipping download"); } else { // Ensure parent directory exists for the lock file @@ -152,7 +155,7 @@ bool WebGpuEpBootstrapper::DownloadAndRegister(bool force, FileLock lock(lock_path); // Re-check after acquiring lock (another process may have completed the update) - if (!force && VerifyEpPackage(ep_dir, {{kWebGpuProviderLib, manifest.sha256}}, "WebGPU EP", logger)) { + if (!force && VerifyEpPackage(ep_dir, expected_hashes, "WebGPU EP", logger)) { logger.Log(LogLevel::Debug, "WebGPU EP: another process already completed the update"); } else { // Download and extract to staging directory for atomic swap @@ -200,7 +203,7 @@ bool WebGpuEpBootstrapper::DownloadAndRegister(bool force, std::filesystem::remove(zip_path); // Verify staging - if (!VerifyEpPackage(staging_dir, {{kWebGpuProviderLib, manifest.sha256}}, "WebGPU EP", logger)) { + if (!VerifyEpPackage(staging_dir, expected_hashes, "WebGPU EP", logger)) { logger.Log(LogLevel::Warning, fmt::format("WebGPU EP: verification failed after extraction (attempt {})", attempts_)); diff --git a/sdk_v2/cpp/src/inferencing/model_load_manager.cc b/sdk_v2/cpp/src/inferencing/model_load_manager.cc index 0bc321b9b..4b25ce9d5 100644 --- a/sdk_v2/cpp/src/inferencing/model_load_manager.cc +++ b/sdk_v2/cpp/src/inferencing/model_load_manager.cc @@ -36,6 +36,16 @@ constexpr ModelIdEpRequirement kModelIdEpRequirements[] = { {"vitis-npu", "VitisAIExecutionProvider"}, }; +/// Returns true if the registered EP name satisfies the catalog requirement. +/// CudaPluginExecutionProvider is treated as equivalent to CUDAExecutionProvider +/// because catalog models are tagged with the canonical name, not the plugin name. +bool EpSatisfiesRequirement(std::string_view registered_ep, std::string_view required_ep) { + if (registered_ep == required_ep) return true; + if (required_ep == "CUDAExecutionProvider" && registered_ep == "CudaPluginExecutionProvider") + return true; + return false; +} + /// Returns the required EP registration name for a model_id, or empty if none required. std::string_view RequiredEpForModelId(std::string_view model_id) { for (const auto& req : kModelIdEpRequirements) { @@ -65,8 +75,10 @@ ModelLoadManager::~ModelLoadManager() { bool ModelLoadManager::HasEP(const std::string& ep_name) const { const auto& device_map = ep_detector_.GetAvailableDevicesToEPs(); for (const auto& [device, eps] : device_map) { - if (std::find(eps.begin(), eps.end(), ep_name) != eps.end()) { - return true; + for (const auto& registered : eps) { + if (EpSatisfiesRequirement(registered, ep_name)) { + return true; + } } }