From 9e288de134fc93eb1173cb06129708388620fc9b Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Tue, 9 Jun 2026 03:49:53 -0500 Subject: [PATCH 01/36] sdk_v2/cpp: cross-process lock + skip-existing for downloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increment 1 of the resumable-downloads port (see docs/ResumableDownloadsPlan.md). No public C ABI changes. CrossProcessFileLock - New RAII helper backed by an OS-level exclusive lock on /.download.lock: Windows uses CreateFileW with FILE_SHARE_NONE + FILE_FLAG_DELETE_ON_CLOSE; POSIX uses open(O_CREAT|O_RDWR|O_CLOEXEC) + flock(LOCK_EX|LOCK_NB). - Writes a PID:,Time: diagnostic line for crash forensics. - WaitForLockForDirectory polls at 1.25 s with a 3 h timeout. The cancellation hook is a std::function predicate (not a bare atomic) so callers can route it through their own cancellation channel — DownloadManager forwards it through the existing progress callback's non-zero return. DownloadManager::DownloadModel - Acquires the cross-process lock immediately after create_directories and before writing the in-progress signal file. - Re-checks the cache after acquiring the lock to short-circuit when another process just finished the same download. - Now stores ILogger& logger_ so the lock acquisition can log who is waiting. DownloadBlobsToDirectory (skip-existing) - New IsDownloadNeeded(blob, local_path) filter: blobs whose local file already exists at the expected content_length are skipped. - Skipped bytes are credited toward the total — the initial progress callback now emits skipped_bytes / total_size * 100 instead of always 0%, so resumed downloads start at an honest percentage rather than rewinding to zero. - If every blob is already on disk the function emits 100% and returns. Tests - 9 new CrossProcessFileLockTest cases (acquire, release, contention, recovery after release, directory creation, wait happy path, wait-then-acquire, cancellation, timeout). - 4 new BlobDownloadTest cases for skip-existing (same-size, wrong-size, progress accounting, everything-cached). - Full targeted suite passes 40/40 in 14 s. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/CMakeLists.txt | 1 + sdk_v2/cpp/src/download/blob_downloader.cc | 67 +++++- .../src/download/cross_process_file_lock.cc | 198 ++++++++++++++++++ .../src/download/cross_process_file_lock.h | 62 ++++++ sdk_v2/cpp/src/download/download_manager.cc | 37 +++- sdk_v2/cpp/src/download/download_manager.h | 3 + sdk_v2/cpp/test/CMakeLists.txt | 1 + .../cross_process_file_lock_test.cc | 188 +++++++++++++++++ sdk_v2/cpp/test/internal_api/download_test.cc | 93 ++++++++ 9 files changed, 644 insertions(+), 6 deletions(-) create mode 100644 sdk_v2/cpp/src/download/cross_process_file_lock.cc create mode 100644 sdk_v2/cpp/src/download/cross_process_file_lock.h create mode 100644 sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc diff --git a/sdk_v2/cpp/CMakeLists.txt b/sdk_v2/cpp/CMakeLists.txt index 671c7648..e995a20e 100644 --- a/sdk_v2/cpp/CMakeLists.txt +++ b/sdk_v2/cpp/CMakeLists.txt @@ -149,6 +149,7 @@ set(FOUNDRY_LOCAL_SOURCES src/inferencing/generative/chat/chat_template.cc src/configuration.cc src/download/blob_downloader.cc + src/download/cross_process_file_lock.cc src/download/download_manager.cc src/download/inference_model_writer.cc src/download/model_registry_client.cc diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 1d5c2981..eeccf7d2 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -247,6 +247,35 @@ std::string ComputeRelativePath(const std::string& prefix, const std::string& bl return blob_name.substr(trim); } +bool EndsWith(const std::string& str, const std::string& suffix) { + if (suffix.size() > str.size()) { + return false; + } + + return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin(), + [](char a, char b) { + return std::tolower(static_cast(a)) == + std::tolower(static_cast(b)); + }); +} + +/// Returns false if a file at `local_path` already matches the blob's expected +/// `content_length` exactly — in which case the caller can skip the download. +/// Returns true (download needed) for any of: missing file, size mismatch, or +/// filesystem-stat errors (treat as "redownload to be safe"). +bool IsDownloadNeeded(const BlobItemInfo& blob, const std::string& local_path) { + std::error_code ec; + auto status = std::filesystem::status(local_path, ec); + if (ec || !std::filesystem::exists(status) || !std::filesystem::is_regular_file(status)) { + return true; + } + auto size = std::filesystem::file_size(local_path, ec); + if (ec) { + return true; + } + return static_cast(size) != blob.content_length; +} + } // anonymous namespace void DownloadBlobsToDirectory(IBlobDownloader& downloader, @@ -304,15 +333,43 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, return a.first.content_length < b.first.content_length; }); - // Step 4: Calculate total size for progress + // Step 4: Calculate total size across every in-scope blob, including those + // already present on disk — so 100% always means "every byte is local". int64_t total_size = 0; for (const auto& [blob, _] : blobs_to_download) { total_size += blob.content_length; } - // Step 4.5: Emit 0% so callers know the download has started + // Step 4.25: Skip blobs already present at the expected size. Their bytes + // count toward "downloaded" so the percentage stays accurate when this is a + // resume of a partially-completed download. + int64_t skipped_bytes = 0; + blobs_to_download.erase( + std::remove_if(blobs_to_download.begin(), blobs_to_download.end(), + [&skipped_bytes](const auto& pair) { + if (IsDownloadNeeded(pair.first, pair.second)) { + return false; + } + skipped_bytes += pair.first.content_length; + return true; + }), + blobs_to_download.end()); + + // Step 4.5: Emit initial progress reflecting any already-on-disk bytes. + // If everything was skipped, emit 100% directly and return. + if (blobs_to_download.empty()) { + if (options.progress) { + options.progress(100.0f); + } + return; + } + if (options.progress) { - int result = options.progress(0.0f); + float initial_percent = total_size > 0 + ? static_cast(skipped_bytes) / + static_cast(total_size) * 100.0f + : 0.0f; + int result = options.progress(initial_percent); if (result != 0) { FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled by user callback return value"); } @@ -322,7 +379,9 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, // The cancellation flag is set when the progress callback returns non-zero. // It is shared with chunk download threads so they can exit promptly. std::atomic cancelled{false}; - std::atomic total_downloaded_bytes{0}; + // Seed with skipped bytes so per-chunk progress callbacks compute the right + // overall percentage. + std::atomic total_downloaded_bytes{skipped_bytes}; for (const auto& [blob, local_path] : blobs_to_download) { // Check cancellation between blobs diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.cc b/sdk_v2/cpp/src/download/cross_process_file_lock.cc new file mode 100644 index 00000000..33eeb215 --- /dev/null +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.cc @@ -0,0 +1,198 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "download/cross_process_file_lock.h" +#include "exception.h" +#include "logger.h" + +#include + +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#include +#else +#include +#include +#include +#include +#endif + +namespace fl { + +namespace { + +constexpr const char* kLockFileName = ".download.lock"; + +/// `PID:,Time:\n` — mirrors what C# writes +/// (CrossProcessFileLock.cs:68) so the lock file is recognizable across SDKs. +std::string FormatProcessInfo() { +#ifdef _WIN32 + auto pid = static_cast(_getpid()); +#else + auto pid = static_cast(getpid()); +#endif + auto t = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + std::tm tm{}; +#ifdef _WIN32 + gmtime_s(&tm, &t); +#else + gmtime_r(&t, &tm); +#endif + std::ostringstream oss; + oss << "PID:" << pid << ",Time:" << std::put_time(&tm, "%Y-%m-%dT%H:%M:%SZ") << '\n'; + return oss.str(); +} + +} // namespace + +// Platform-specific resource handle. The destructor here is the only thing +// that releases the lock; CrossProcessFileLock's destructor is defaulted. +#ifdef _WIN32 +struct CrossProcessFileLock::State { + HANDLE handle; + ~State() { + if (handle != INVALID_HANDLE_VALUE) { + // FILE_FLAG_DELETE_ON_CLOSE removes the file when the last handle closes. + CloseHandle(handle); + } + } +}; +#else +struct CrossProcessFileLock::State { + int fd; + std::filesystem::path path; + ~State() { + if (fd >= 0) { + // Unlink before close so the file disappears at the same instant the + // lock releases; a concurrent acquirer simply recreates it. + ::unlink(path.c_str()); + ::close(fd); + } + } +}; +#endif + +CrossProcessFileLock::CrossProcessFileLock(std::filesystem::path path, + std::unique_ptr state, + ILogger* logger) + : path_(std::move(path)), state_(std::move(state)), logger_(logger) {} + +CrossProcessFileLock::~CrossProcessFileLock() { + // Release the OS handle first so the "released" log message is accurate. + state_.reset(); + if (logger_) { + logger_->Log(LogLevel::Debug, "CrossProcessFileLock released: " + path_.string()); + } +} + +std::unique_ptr CrossProcessFileLock::TryAcquireForDirectory( + const std::filesystem::path& directory, ILogger* logger) { + std::error_code ec; + std::filesystem::create_directories(directory, ec); + // Best-effort: if create_directories failed, the platform open below will + // surface a clearer error message. + + auto lock_path = directory / kLockFileName; + std::unique_ptr state; + +#ifdef _WIN32 + // dwShareMode=0 blocks any other open (cross- and in-process) until this + // handle closes. FILE_FLAG_DELETE_ON_CLOSE pairs OPEN_ALWAYS into a + // self-cleaning lock that doesn't require unlink-then-close races. + auto wide = lock_path.wstring(); + HANDLE handle = CreateFileW(wide.c_str(), + GENERIC_READ | GENERIC_WRITE, + 0, + nullptr, + OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE, + nullptr); + if (handle == INVALID_HANDLE_VALUE) { + DWORD err = GetLastError(); + if (err == ERROR_SHARING_VIOLATION || err == ERROR_LOCK_VIOLATION || err == ERROR_ACCESS_DENIED) { + // ACCESS_DENIED can surface on FILE_SHARE_NONE collisions when the + // existing handle has narrower access rights — treat as contention. + return nullptr; + } + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "CreateFileW failed for lock '" + lock_path.string() + + "' (GetLastError=" + std::to_string(err) + ")"); + } + + auto info = FormatProcessInfo(); + DWORD written = 0; + WriteFile(handle, info.data(), static_cast(info.size()), &written, nullptr); + FlushFileBuffers(handle); + + state = std::unique_ptr(new State{handle}); +#else + int fd = ::open(lock_path.c_str(), O_CREAT | O_RDWR | O_CLOEXEC, 0644); + if (fd < 0) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "open failed for lock '" + lock_path.string() + "' (errno=" + std::to_string(errno) + ")"); + } + if (::flock(fd, LOCK_EX | LOCK_NB) != 0) { + int err = errno; + ::close(fd); + if (err == EWOULDBLOCK || err == EAGAIN) { + return nullptr; + } + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "flock failed for '" + lock_path.string() + "' (errno=" + std::to_string(err) + ")"); + } + + (void)::ftruncate(fd, 0); + auto info = FormatProcessInfo(); + (void)::write(fd, info.data(), info.size()); + + state = std::unique_ptr(new State{fd, lock_path}); +#endif + + if (logger) { + logger->Log(LogLevel::Debug, "CrossProcessFileLock acquired: " + lock_path.string()); + } + return std::unique_ptr( + new CrossProcessFileLock(std::move(lock_path), std::move(state), logger)); +} + +std::unique_ptr WaitForLockForDirectory( + const std::filesystem::path& directory, + const CancellationPredicate& is_cancelled, + ILogger* logger, + std::chrono::milliseconds poll_interval, + std::chrono::milliseconds timeout) { + auto deadline = std::chrono::steady_clock::now() + timeout; + // Poll cancellation in slices of at most 100 ms so a long poll interval + // (1.25 s default) doesn't keep a cancelling caller waiting. + constexpr std::chrono::milliseconds kCancelSlice{100}; + while (true) { + if (is_cancelled && is_cancelled()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "lock acquisition cancelled"); + } + auto lock = CrossProcessFileLock::TryAcquireForDirectory(directory, logger); + if (lock) { + return lock; + } + if (std::chrono::steady_clock::now() >= deadline) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "timed out waiting for cross-process download lock on '" + directory.string() + "'"); + } + auto slice_end = std::chrono::steady_clock::now() + poll_interval; + while (std::chrono::steady_clock::now() < slice_end) { + if (is_cancelled && is_cancelled()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "lock acquisition cancelled"); + } + std::this_thread::sleep_for(std::min(kCancelSlice, poll_interval)); + } + } +} + +} // namespace fl diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.h b/sdk_v2/cpp/src/download/cross_process_file_lock.h new file mode 100644 index 00000000..2c771b9c --- /dev/null +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.h @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#pragma once + +#include +#include +#include +#include + +namespace fl { + +class ILogger; + +/// RAII exclusive lock backed by an OS-level file lock on +/// `/.download.lock`. Serializes model downloads across processes +/// that share a cache directory. A crash while holding the lock may leave a +/// zero-byte file behind; the next acquirer reopens and re-locks, so the leak +/// is harmless. +class CrossProcessFileLock { + public: + /// Non-blocking acquisition. Returns nullptr if another process currently + /// holds the lock. Creates `directory` if missing. Throws fl::Exception on + /// unexpected errors (permission denied, etc.). + static std::unique_ptr TryAcquireForDirectory( + const std::filesystem::path& directory, + ILogger* logger = nullptr); + + ~CrossProcessFileLock(); + + CrossProcessFileLock(const CrossProcessFileLock&) = delete; + CrossProcessFileLock& operator=(const CrossProcessFileLock&) = delete; + CrossProcessFileLock(CrossProcessFileLock&&) = delete; + CrossProcessFileLock& operator=(CrossProcessFileLock&&) = delete; + + /// Path to the lock file (for diagnostics / tests). + const std::filesystem::path& path() const noexcept { return path_; } + + private: + struct State; // Platform-specific; defined in the .cc. + + CrossProcessFileLock(std::filesystem::path path, std::unique_ptr state, ILogger* logger); + + std::filesystem::path path_; + std::unique_ptr state_; + ILogger* logger_; +}; + +/// Returning true aborts WaitForLockForDirectory with FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED. +using CancellationPredicate = std::function; + +/// Polls TryAcquireForDirectory until the lock is acquired, `is_cancelled()` +/// returns true, or `timeout` elapses. +/// Throws FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED on cancellation, or +/// FOUNDRY_LOCAL_ERROR_INTERNAL on timeout. +std::unique_ptr WaitForLockForDirectory( + const std::filesystem::path& directory, + const CancellationPredicate& is_cancelled, + ILogger* logger = nullptr, + std::chrono::milliseconds poll_interval = std::chrono::milliseconds{1250}, + std::chrono::milliseconds timeout = std::chrono::hours{3}); + +} // namespace fl diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index 6e3bd64c..c4407bb3 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -1,8 +1,11 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "download/download_manager.h" +#include "download/cross_process_file_lock.h" #include "download/inference_model_writer.h" #include "exception.h" +#include "log_level.h" +#include "logger.h" #include "util/path_safety.h" #include "util/region_fallback.h" #include "utils.h" @@ -176,6 +179,7 @@ DownloadManager::DownloadManager(std::string cache_directory, std::string_view c : cache_directory_(std::move(cache_directory)), config_region_(NormalizeConfiguredRegion(catalog_region)), max_concurrency_(max_concurrency), + logger_(logger), registry_client_(std::make_unique( kDefaultRegistryRegion, logger, std::make_unique(logger, !disable_region_fallback))), blob_downloader_(std::make_unique()) {} @@ -241,7 +245,7 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, auto model_path = ComputeModelPath(info); - // Check if already downloaded (before validating URI — cached models don't need one). + // Fast path: serve the cache without taking the cross-process lock. // A valid cache hit requires: directory exists, no in-progress signal file, and // inference_model.json is present (written by DownloadModel on successful completion). auto signal_path = std::filesystem::path(model_path) / kDownloadSignalFileName; @@ -260,9 +264,38 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "cannot download model: empty URI (asset_id)"); } - // Create output directory + // Create output directory before taking the cross-process lock, since the lock + // file lives inside it. std::filesystem::create_directories(model_path); + // Serialize across processes that share this cache directory. Inside the + // running process `download_mutex_` already prevents reentry; the file lock + // protects against a second SDK instance (e.g. another service or CLI) racing + // on the same model directory. + auto cancel_pred = [&progress_cb]() -> bool { + // progress_cb returning non-zero is the SDK's cancellation signal. Reusing + // it here also acts as a periodic heartbeat (0%) while we wait for the + // other process to finish. + return progress_cb && progress_cb(0.0f) != 0; + }; + auto lock = CrossProcessFileLock::TryAcquireForDirectory(model_path, &logger_); + if (!lock) { + logger_.Log(LogLevel::Information, + "Model download is being performed by another process. Waiting on lock at '" + + model_path + "'..."); + lock = WaitForLockForDirectory(model_path, cancel_pred, &logger_); + } + + // Another process may have just completed the download we were waiting on. + // Re-check the cache now that we hold the lock. + if (std::filesystem::exists(model_path) && !std::filesystem::exists(signal_path) && + HasInferenceModelJson(model_path)) { + if (progress_cb) { + progress_cb(100.0f); + } + return ResolveEffectiveModelPath(model_path); + } + // Create download signal file { std::ofstream signal(signal_path); diff --git a/sdk_v2/cpp/src/download/download_manager.h b/sdk_v2/cpp/src/download/download_manager.h index c552101b..b2fe1458 100644 --- a/sdk_v2/cpp/src/download/download_manager.h +++ b/sdk_v2/cpp/src/download/download_manager.h @@ -3,9 +3,11 @@ #pragma once #include "download/blob_downloader.h" +#include "download/cross_process_file_lock.h" #include "download/model_registry_client.h" #include "model_info.h" +#include #include #include #include @@ -74,6 +76,7 @@ class DownloadManager { // from config. std::string config_region_; int max_concurrency_; + ILogger& logger_; std::unique_ptr registry_client_; std::unique_ptr blob_downloader_; diff --git a/sdk_v2/cpp/test/CMakeLists.txt b/sdk_v2/cpp/test/CMakeLists.txt index 08e23caf..ff9923fc 100644 --- a/sdk_v2/cpp/test/CMakeLists.txt +++ b/sdk_v2/cpp/test/CMakeLists.txt @@ -21,6 +21,7 @@ add_executable(foundry_local_tests internal_api/chat_completions_test.cc internal_api/chat_completions_converter_test.cc internal_api/configuration_test.cc + internal_api/cross_process_file_lock_test.cc internal_api/download_test.cc internal_api/embeddings/contracts_embeddings_test.cc internal_api/embeddings/fp16_test.cc diff --git a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc new file mode 100644 index 00000000..a6e38fdf --- /dev/null +++ b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc @@ -0,0 +1,188 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "download/cross_process_file_lock.h" + +#include "exception.h" + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +using namespace fl; + +namespace { + +/// Per-test temp directory. Auto-cleans on destruction so a flaky test never +/// leaks lock files into the system temp dir. +class TempDir { + public: + TempDir() { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dist; + path_ = fs::temp_directory_path() / ("fl_lock_test_" + std::to_string(dist(gen))); + fs::create_directories(path_); + } + + ~TempDir() { + std::error_code ec; + fs::remove_all(path_, ec); + } + + const fs::path& path() const { return path_; } + + private: + fs::path path_; +}; + +} // namespace + +TEST(CrossProcessFileLockTest, TryAcquireSucceedsForFreshDirectory) { + TempDir dir; + + auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + + ASSERT_NE(lock, nullptr); + EXPECT_TRUE(fs::exists(lock->path())); + EXPECT_EQ(lock->path().parent_path(), dir.path()); + EXPECT_EQ(lock->path().filename(), ".download.lock"); +} + +TEST(CrossProcessFileLockTest, ReleaseOnDestructionRemovesLockFile) { + TempDir dir; + fs::path lock_file; + + { + auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + ASSERT_NE(lock, nullptr); + lock_file = lock->path(); + EXPECT_TRUE(fs::exists(lock_file)); + } + + // After RAII release the lock file should be gone (Win FILE_FLAG_DELETE_ON_CLOSE, + // POSIX explicit unlink in destructor). + EXPECT_FALSE(fs::exists(lock_file)); +} + +TEST(CrossProcessFileLockTest, SecondAcquireReturnsNullWhileFirstIsHeld) { + TempDir dir; + auto first = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + ASSERT_NE(first, nullptr); + + auto second = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + EXPECT_EQ(second, nullptr); +} + +TEST(CrossProcessFileLockTest, ReacquireSucceedsAfterRelease) { + TempDir dir; + { + auto first = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + ASSERT_NE(first, nullptr); + } + auto reacquired = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + EXPECT_NE(reacquired, nullptr); +} + +TEST(CrossProcessFileLockTest, CreatesDirectoryIfMissing) { + TempDir parent; + auto missing = parent.path() / "nested" / "model"; + + ASSERT_FALSE(fs::exists(missing)); + + auto lock = CrossProcessFileLock::TryAcquireForDirectory(missing); + + ASSERT_NE(lock, nullptr); + EXPECT_TRUE(fs::is_directory(missing)); + EXPECT_TRUE(fs::exists(missing / ".download.lock")); +} + +TEST(CrossProcessFileLockTest, WaitForLockReturnsImmediatelyWhenAvailable) { + TempDir dir; + + auto start = std::chrono::steady_clock::now(); + auto lock = WaitForLockForDirectory(dir.path(), []() { return false; }); + auto elapsed = std::chrono::steady_clock::now() - start; + + ASSERT_NE(lock, nullptr); + // Fast-path acquisition should be well under 100 ms. + EXPECT_LT(elapsed, std::chrono::milliseconds(500)); +} + +TEST(CrossProcessFileLockTest, WaitForLockAcquiresAfterHolderReleases) { + TempDir dir; + auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + ASSERT_NE(holder, nullptr); + + // Release the holder after a short delay on another thread. + std::thread releaser([&] { + std::this_thread::sleep_for(std::chrono::milliseconds(300)); + holder.reset(); + }); + + auto start = std::chrono::steady_clock::now(); + auto lock = WaitForLockForDirectory(dir.path(), + []() { return false; }, + /*logger=*/nullptr, + /*poll_interval=*/std::chrono::milliseconds(100), + /*timeout=*/std::chrono::seconds(10)); + auto elapsed = std::chrono::steady_clock::now() - start; + + releaser.join(); + ASSERT_NE(lock, nullptr); + EXPECT_GE(elapsed, std::chrono::milliseconds(200)); + EXPECT_LT(elapsed, std::chrono::seconds(5)); +} + +TEST(CrossProcessFileLockTest, WaitForLockThrowsOnCancellation) { + TempDir dir; + auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + ASSERT_NE(holder, nullptr); + + std::atomic cancel{false}; + std::thread canceller([&] { + std::this_thread::sleep_for(std::chrono::milliseconds(150)); + cancel.store(true); + }); + + try { + (void)WaitForLockForDirectory(dir.path(), + [&cancel]() { return cancel.load(); }, + /*logger=*/nullptr, + /*poll_interval=*/std::chrono::milliseconds(100), + /*timeout=*/std::chrono::seconds(10)); + canceller.join(); + FAIL() << "expected fl::Exception(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED)"; + } catch (const Exception& ex) { + canceller.join(); + EXPECT_EQ(ex.code(), FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED); + } +} + +TEST(CrossProcessFileLockTest, WaitForLockThrowsOnTimeout) { + TempDir dir; + auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + ASSERT_NE(holder, nullptr); + + try { + (void)WaitForLockForDirectory(dir.path(), + []() { return false; }, + /*logger=*/nullptr, + /*poll_interval=*/std::chrono::milliseconds(50), + /*timeout=*/std::chrono::milliseconds(200)); + FAIL() << "expected fl::Exception(FOUNDRY_LOCAL_ERROR_INTERNAL)"; + } catch (const Exception& ex) { + EXPECT_EQ(ex.code(), FOUNDRY_LOCAL_ERROR_INTERNAL); + std::string what = ex.what(); + EXPECT_NE(what.find("timed out"), std::string::npos); + } +} diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 38215c6c..f352db6f 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -517,6 +517,99 @@ TEST(BlobDownloadTest, HandlesEmptyBlobList) { EXPECT_TRUE(mock.downloaded_blobs.empty()); } +// ======================================================================== +// Skip-existing (Increment 1: resumable downloads) +// ======================================================================== + +TEST(BlobDownloadTest, SkipsExistingFilesWithCorrectSize) { + TempDir tmpdir; + // Pre-create one of the blobs at the expected size on disk. + std::ofstream(tmpdir.path() / "weights.safetensors") << std::string(1000, 'X'); + + MockBlobDownloader mock; + mock.blobs_to_return = { + {"weights.safetensors", 1000}, + {"config.json", 100}, + }; + + BlobDownloadOptions opts; + DownloadBlobsToDirectory(mock, "https://test.blob/c?sig=x", tmpdir.string(), opts); + + // Only the missing blob should be downloaded. + ASSERT_EQ(mock.downloaded_blobs.size(), 1u); + EXPECT_EQ(mock.downloaded_blobs[0], "config.json"); +} + +TEST(BlobDownloadTest, RedownloadsFilesWithWrongSize) { + TempDir tmpdir; + // Existing file is truncated relative to the expected blob size. + std::ofstream(tmpdir.path() / "weights.safetensors") << std::string(500, 'X'); + + MockBlobDownloader mock; + mock.blobs_to_return = { + {"weights.safetensors", 1000}, + }; + + BlobDownloadOptions opts; + DownloadBlobsToDirectory(mock, "https://test.blob/c?sig=x", tmpdir.string(), opts); + + // Wrong-size files should be redownloaded (the mock overwrites them). + ASSERT_EQ(mock.downloaded_blobs.size(), 1u); + EXPECT_EQ(mock.downloaded_blobs[0], "weights.safetensors"); +} + +TEST(BlobDownloadTest, ReportsSkippedBytesInInitialProgress) { + TempDir tmpdir; + // 500 of 1500 bytes already on disk → initial progress should be ~33%. + std::ofstream(tmpdir.path() / "already.bin") << std::string(500, 'X'); + + MockBlobDownloader mock; + mock.blobs_to_return = { + {"already.bin", 500}, + {"missing.bin", 1000}, + }; + + std::vector progress_values; + BlobDownloadOptions opts; + opts.progress = [&](float pct) { + progress_values.push_back(pct); + return 0; + }; + + DownloadBlobsToDirectory(mock, "https://test.blob/c?sig=x", tmpdir.string(), opts); + + ASSERT_FALSE(progress_values.empty()); + // First emitted progress reflects the already-on-disk bytes (500/1500 ≈ 33.3%). + EXPECT_NEAR(progress_values.front(), 100.0f * 500.0f / 1500.0f, 0.5f); + // Final progress must hit 100%. + EXPECT_FLOAT_EQ(progress_values.back(), 100.0f); +} + +TEST(BlobDownloadTest, EmitsHundredPercentWhenEverythingIsCached) { + TempDir tmpdir; + std::ofstream(tmpdir.path() / "a.bin") << std::string(100, 'A'); + std::ofstream(tmpdir.path() / "b.bin") << std::string(200, 'B'); + + MockBlobDownloader mock; + mock.blobs_to_return = { + {"a.bin", 100}, + {"b.bin", 200}, + }; + + std::vector progress_values; + BlobDownloadOptions opts; + opts.progress = [&](float pct) { + progress_values.push_back(pct); + return 0; + }; + + DownloadBlobsToDirectory(mock, "https://test.blob/c?sig=x", tmpdir.string(), opts); + + EXPECT_TRUE(mock.downloaded_blobs.empty()); + ASSERT_FALSE(progress_values.empty()); + EXPECT_FLOAT_EQ(progress_values.back(), 100.0f); +} + // ======================================================================== // Path-traversal hardening (security) // ======================================================================== From 8225ce78cd8d00316d5369b40b8cf91aa208d9c9 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Tue, 9 Jun 2026 12:45:08 -0500 Subject: [PATCH 02/36] sdk_v2/cpp: per-chunk resumable downloads with linked cancellation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increment 2 of the resumable-downloads C++ port. Adds a .dlstate sidecar that tracks per-chunk completion via a truncated bitmap (matching the C# BlobDownloadState design from neutron-server), and replaces AzureBlobDownloader::DownloadBlob's batch loop with a worker pool that shares a single Azure::Core::Context. The first chunk failure calls Cancel() on the shared context and flips an internal cancel flag, so every other in-flight chunk drains within tens of milliseconds instead of waiting on its own retry+timeout budget. Highlights: - BlobDownloadState (new): compact binary on-disk format. ~45-byte LE header (magic FLDS + version + sizes + counters) followed by the truncated bitmap suffix. The prefix of fully-completed chunks is implied — SaveState advances itmap_byte_aligned_start past every fully-set word to keep the sidecar proportional to the unfinished tail. LoadState rejects on magic, version, or layout (blob_size / chunk_size / total_chunks) mismatch and starts the download fresh in that case. Atomic save via tmp file + rename, with remove-then-rename fallback for filesystems that don't replace atomically. - AzureBlobDownloader rework: protected virtual GetBlobSize and DownloadChunkToBuffer (against an opaque ChunkContext) form a test seam so subclasses can simulate per-chunk behavior without touching Azure. Worker pool uses an atomic queue index over pending chunks; workers claim, fetch, write, mark complete, and periodically save (max(10, num_chunks/50) chunks). Pre-allocation is skipped if the file is already at full size, so resume doesn't discard valid bytes. Sidecar is persisted on any failure and deleted on full success. - IsDownloadNeeded now treats the presence of a .dlstate sidecar as "download still needed" — the data file may be pre-allocated with holes. - AzureBlobDownloader picks up an optional ILogger*; DownloadManager passes its own logger through. Tests: - 15 BlobDownloadStateTest cases (create/mark/save/load/delete, gap enumeration, partial final chunk math, byte-aligned-start advancement, rejection of magic/size mismatches). - 5 AzureBlobDownloaderResumeTest cases via a FakeChunkAzureDownloader subclass: resume skips already-completed chunks, sidecar persists on chunk failure, stale sidecar is cleaned up for empty blobs, and a failing chunk drains 9 sleeping peers within ~300 ms (well under the 2 s threshold) — exercising the linked-cancellation cascade end to end. 20 new tests; full BlobDownloader/DownloadManager/CrossProcessFileLock suite is 59/59 in ~15 s. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/CMakeLists.txt | 1 + .../cpp/src/download/blob_download_state.cc | 375 +++++++++++++++++ sdk_v2/cpp/src/download/blob_download_state.h | 116 +++++ sdk_v2/cpp/src/download/blob_downloader.cc | 395 ++++++++++++------ sdk_v2/cpp/src/download/blob_downloader.h | 40 ++ sdk_v2/cpp/src/download/download_manager.cc | 2 +- sdk_v2/cpp/test/CMakeLists.txt | 1 + .../internal_api/blob_download_state_test.cc | 251 +++++++++++ sdk_v2/cpp/test/internal_api/download_test.cc | 214 ++++++++++ 9 files changed, 1271 insertions(+), 124 deletions(-) create mode 100644 sdk_v2/cpp/src/download/blob_download_state.cc create mode 100644 sdk_v2/cpp/src/download/blob_download_state.h create mode 100644 sdk_v2/cpp/test/internal_api/blob_download_state_test.cc diff --git a/sdk_v2/cpp/CMakeLists.txt b/sdk_v2/cpp/CMakeLists.txt index e995a20e..69498c44 100644 --- a/sdk_v2/cpp/CMakeLists.txt +++ b/sdk_v2/cpp/CMakeLists.txt @@ -148,6 +148,7 @@ set(FOUNDRY_LOCAL_SOURCES src/inferencing/generative/chat/chat_session.cc src/inferencing/generative/chat/chat_template.cc src/configuration.cc + src/download/blob_download_state.cc src/download/blob_downloader.cc src/download/cross_process_file_lock.cc src/download/download_manager.cc diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc new file mode 100644 index 00000000..d1d97baf --- /dev/null +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -0,0 +1,375 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "download/blob_download_state.h" +#include "logger.h" + +#include +#include +#include +#include + +namespace fl { + +namespace { + +constexpr const char* kStateFileExtension = ".dlstate"; + +// On-disk format (little-endian throughout): +// bytes | field +// -------|-------------------------------------------------------- +// 0..3 | magic "FLDS" +// 4 | version (currently 1) +// 5..12 | blob_size (int64) +// 13..16 | chunk_size (int32) +// 17..20 | total_chunks (int32) +// 21..24 | bitmap_byte_aligned_start (int32) +// 25..28 | highest_completed_chunk (int32) +// 29..32 | completed_count (int32) +// 33..40 | last_modified_unix_ms (int64) +// 41..44 | trunc_bitmap_byte_len (uint32) +// 45.. | trunc_bitmap_byte_len bytes of bitmap data, copied directly out of +// full_completion_bitmap starting at the byte offset implied by +// bitmap_byte_aligned_start. +constexpr char kMagic[4] = {'F', 'L', 'D', 'S'}; +constexpr uint8_t kVersion = 1; +constexpr size_t kHeaderSize = 45; + +constexpr int32_t kBitsPerWord = 64; + +template +void WriteLE(std::ostream& out, T value) { + static_assert(std::is_trivially_copyable_v); + unsigned char buf[sizeof(T)]; + std::memcpy(buf, &value, sizeof(T)); + out.write(reinterpret_cast(buf), sizeof(T)); +} + +template +bool ReadLE(std::istream& in, T& out_value) { + static_assert(std::is_trivially_copyable_v); + unsigned char buf[sizeof(T)]; + in.read(reinterpret_cast(buf), sizeof(T)); + if (!in) { + return false; + } + std::memcpy(&out_value, buf, sizeof(T)); + return true; +} + +int64_t NowUnixMs() { + return std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); +} + +} // namespace + +std::filesystem::path BlobDownloadState::GetStateFilePath(const std::filesystem::path& local_file_path) { + auto p = local_file_path; + p += kStateFileExtension; + return p; +} + +std::unique_ptr BlobDownloadState::CreateNew(std::string blob_name, + std::filesystem::path local_file_path, + int64_t blob_size, + int32_t chunk_size, + int32_t total_chunks) { + auto state = std::make_unique(); + state->blob_name = std::move(blob_name); + state->local_file_path = local_file_path.string(); + state->blob_size = blob_size; + state->chunk_size = chunk_size; + state->total_chunks = total_chunks; + state->bitmap_byte_aligned_start = 0; + state->highest_completed_chunk = -1; + state->completed_count = 0; + state->last_modified_unix_ms = NowUnixMs(); + auto words = static_cast((total_chunks + kBitsPerWord - 1) / kBitsPerWord); + state->full_completion_bitmap.assign(words, 0); + return state; +} + +std::unique_ptr BlobDownloadState::LoadState(std::string blob_name, + std::filesystem::path local_file_path, + int64_t expected_blob_size, + int32_t expected_chunk_size, + int32_t expected_total_chunks, + ILogger* logger) { + auto state_path = GetStateFilePath(local_file_path); + std::error_code ec; + if (!std::filesystem::exists(state_path, ec)) { + return nullptr; + } + + std::ifstream in(state_path, std::ios::binary); + if (!in) { + if (logger) { + logger->Log(LogLevel::Warning, "Could not open download state file: " + state_path.string()); + } + return nullptr; + } + + char magic[4]{}; + in.read(magic, 4); + uint8_t version = 0; + if (!in || std::memcmp(magic, kMagic, 4) != 0 || !ReadLE(in, version) || version != kVersion) { + if (logger) { + logger->Log(LogLevel::Warning, + "Download state file " + state_path.string() + " has unexpected magic/version; ignoring"); + } + return nullptr; + } + + int64_t blob_size = 0; + int32_t chunk_size = 0; + int32_t total_chunks = 0; + int32_t bitmap_byte_aligned_start = 0; + int32_t highest_completed_chunk = 0; + int32_t completed_count = 0; + int64_t last_modified_unix_ms = 0; + uint32_t trunc_len = 0; + if (!ReadLE(in, blob_size) || !ReadLE(in, chunk_size) || !ReadLE(in, total_chunks) || + !ReadLE(in, bitmap_byte_aligned_start) || !ReadLE(in, highest_completed_chunk) || + !ReadLE(in, completed_count) || !ReadLE(in, last_modified_unix_ms) || !ReadLE(in, trunc_len)) { + if (logger) { + logger->Log(LogLevel::Warning, "Download state header truncated: " + state_path.string()); + } + return nullptr; + } + + // Sanity / compatibility checks. + if (blob_size != expected_blob_size || chunk_size != expected_chunk_size || + total_chunks != expected_total_chunks) { + if (logger) { + logger->Log(LogLevel::Information, + "Download state for " + state_path.string() + + " is incompatible with current blob layout; starting fresh"); + } + return nullptr; + } + if (bitmap_byte_aligned_start < 0 || bitmap_byte_aligned_start % 8 != 0 || + bitmap_byte_aligned_start > total_chunks || completed_count < 0 || + completed_count > total_chunks || highest_completed_chunk < -1 || + highest_completed_chunk >= total_chunks) { + if (logger) { + logger->Log(LogLevel::Warning, "Download state header values out of range: " + state_path.string()); + } + return nullptr; + } + + auto words_total = static_cast((total_chunks + kBitsPerWord - 1) / kBitsPerWord); + std::vector bitmap(words_total, 0); + + // The prefix of fully-completed chunks below bitmap_byte_aligned_start is + // implied — fill those bits. + size_t implicit_full_words = static_cast(bitmap_byte_aligned_start) / kBitsPerWord; + for (size_t i = 0; i < implicit_full_words && i < bitmap.size(); ++i) { + bitmap[i] = ~uint64_t{0}; + } + // Any remaining "implicit" bits inside a partial word (between + // implicit_full_words*64 and bitmap_byte_aligned_start). + if (size_t partial_bits = static_cast(bitmap_byte_aligned_start) % kBitsPerWord; + partial_bits > 0 && implicit_full_words < bitmap.size()) { + bitmap[implicit_full_words] |= (uint64_t{1} << partial_bits) - 1; + } + + if (trunc_len > 0) { + // Copy serialized bytes directly into the bitmap starting at the byte + // position implied by bitmap_byte_aligned_start. + size_t byte_offset = static_cast(bitmap_byte_aligned_start) / 8; + auto* dest = reinterpret_cast(bitmap.data()) + byte_offset; + auto dest_capacity = bitmap.size() * sizeof(uint64_t) - byte_offset; + if (trunc_len > dest_capacity) { + if (logger) { + logger->Log(LogLevel::Warning, + "Download state bitmap length exceeds expected capacity: " + state_path.string()); + } + return nullptr; + } + in.read(reinterpret_cast(dest), trunc_len); + if (!in) { + if (logger) { + logger->Log(LogLevel::Warning, + "Download state bitmap payload truncated: " + state_path.string()); + } + return nullptr; + } + } + + auto state = std::make_unique(); + state->blob_name = std::move(blob_name); + state->local_file_path = local_file_path.string(); + state->blob_size = blob_size; + state->chunk_size = chunk_size; + state->total_chunks = total_chunks; + state->bitmap_byte_aligned_start = bitmap_byte_aligned_start; + state->highest_completed_chunk = highest_completed_chunk; + state->completed_count = completed_count; + state->last_modified_unix_ms = last_modified_unix_ms; + state->full_completion_bitmap = std::move(bitmap); + + if (logger) { + logger->Log(LogLevel::Information, + "Loaded download state " + state_path.string() + ": " + + std::to_string(completed_count) + "/" + std::to_string(total_chunks) + + " chunks already done"); + } + return state; +} + +int64_t BlobDownloadState::CalculateDownloadedSize() const noexcept { + int64_t bytes = static_cast(completed_count) * chunk_size; + // If the final chunk is partial and was completed, adjust the overcount. + if (highest_completed_chunk == total_chunks - 1 && chunk_size > 0) { + auto remainder = blob_size % chunk_size; + if (remainder != 0) { + bytes -= (chunk_size - remainder); + } + } + return bytes; +} + +bool BlobDownloadState::IsChunkComplete(int32_t chunk_idx) const noexcept { + if (chunk_idx < 0 || chunk_idx >= total_chunks) { + return false; + } + if (chunk_idx < bitmap_byte_aligned_start) { + // Below the truncation point — implicitly complete. + return true; + } + auto word_idx = static_cast(chunk_idx) / kBitsPerWord; + auto bit_idx = static_cast(chunk_idx) % kBitsPerWord; + if (word_idx >= full_completion_bitmap.size()) { + return false; + } + return (full_completion_bitmap[word_idx] & (uint64_t{1} << bit_idx)) != 0; +} + +void BlobDownloadState::MarkChunkComplete(int32_t chunk_idx) { + if (chunk_idx < 0 || chunk_idx >= total_chunks) { + return; + } + if (IsChunkComplete(chunk_idx)) { + return; + } + if (chunk_idx > highest_completed_chunk) { + highest_completed_chunk = chunk_idx; + } + auto word_idx = static_cast(chunk_idx) / kBitsPerWord; + auto bit_idx = static_cast(chunk_idx) % kBitsPerWord; + full_completion_bitmap[word_idx] |= (uint64_t{1} << bit_idx); + ++completed_count; +} + +std::vector BlobDownloadState::GetPendingChunks() const { + std::vector pending; + pending.reserve(static_cast(total_chunks - completed_count)); + for (int32_t i = bitmap_byte_aligned_start; i < total_chunks; ++i) { + if (!IsChunkComplete(i)) { + pending.push_back(i); + } + } + return pending; +} + +void BlobDownloadState::SaveState(ILogger* logger) { + // Advance bitmap_byte_aligned_start past any words that are now all 1s, so + // the next save serializes only the unfinished tail. + int32_t new_start = bitmap_byte_aligned_start; + size_t word_idx = static_cast(new_start) / kBitsPerWord; + while (word_idx < full_completion_bitmap.size() && + full_completion_bitmap[word_idx] == ~uint64_t{0}) { + new_start += kBitsPerWord; + ++word_idx; + } + // Within the first not-fully-set word, advance to the lowest 0 bit and round + // down to a byte boundary (8 bits) so reload-then-resume re-reads on a clean + // alignment. + if (word_idx < full_completion_bitmap.size()) { + uint64_t inverted = ~full_completion_bitmap[word_idx]; + int trailing_zero = 0; + while (trailing_zero < kBitsPerWord && ((inverted >> trailing_zero) & 1) == 0) { + ++trailing_zero; + } + new_start += trailing_zero; + } + new_start = (new_start / 8) * 8; + if (new_start > total_chunks) { + new_start = (total_chunks / 8) * 8; + } + if (new_start > bitmap_byte_aligned_start) { + bitmap_byte_aligned_start = new_start; + } + + last_modified_unix_ms = NowUnixMs(); + + auto state_path = GetStateFilePath(local_file_path); + auto tmp_path = state_path; + tmp_path += ".tmp"; + + // Compute the serialized bitmap payload: bytes from bitmap_byte_aligned_start + // up to (highest_completed_chunk + 1), rounded up to the nearest byte. + uint32_t trunc_len = 0; + if (highest_completed_chunk >= bitmap_byte_aligned_start) { + int32_t bit_count = highest_completed_chunk - bitmap_byte_aligned_start + 1; + trunc_len = static_cast((bit_count + 7) / 8); + } + size_t byte_offset = static_cast(bitmap_byte_aligned_start) / 8; + + { + std::ofstream out(tmp_path, std::ios::binary | std::ios::trunc); + if (!out) { + if (logger) { + logger->Log(LogLevel::Warning, "Failed to open download state tmp file: " + tmp_path.string()); + } + return; + } + out.write(kMagic, 4); + WriteLE(out, kVersion); + WriteLE(out, blob_size); + WriteLE(out, chunk_size); + WriteLE(out, total_chunks); + WriteLE(out, bitmap_byte_aligned_start); + WriteLE(out, highest_completed_chunk); + WriteLE(out, completed_count); + WriteLE(out, last_modified_unix_ms); + WriteLE(out, trunc_len); + if (trunc_len > 0) { + auto* src = reinterpret_cast(full_completion_bitmap.data()) + byte_offset; + out.write(reinterpret_cast(src), trunc_len); + } + if (!out) { + if (logger) { + logger->Log(LogLevel::Warning, "Failed to write download state tmp file: " + tmp_path.string()); + } + return; + } + } + + std::error_code ec; + std::filesystem::rename(tmp_path, state_path, ec); + if (ec) { + // Try remove-then-rename for filesystems that don't replace atomically. + std::filesystem::remove(state_path, ec); + std::filesystem::rename(tmp_path, state_path, ec); + if (ec && logger) { + logger->Log(LogLevel::Warning, + "Failed to rename download state file: " + tmp_path.string() + " -> " + + state_path.string() + " (" + ec.message() + ")"); + } + } +} + +void BlobDownloadState::DeleteState(const std::filesystem::path& local_file_path, ILogger* logger) { + auto state_path = GetStateFilePath(local_file_path); + std::error_code ec; + std::filesystem::remove(state_path, ec); + if (ec && logger) { + logger->Log(LogLevel::Warning, + "Failed to delete download state file: " + state_path.string() + " (" + + ec.message() + ")"); + } +} + +} // namespace fl diff --git a/sdk_v2/cpp/src/download/blob_download_state.h b/sdk_v2/cpp/src/download/blob_download_state.h new file mode 100644 index 00000000..66cc69db --- /dev/null +++ b/sdk_v2/cpp/src/download/blob_download_state.h @@ -0,0 +1,116 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace fl { + +class ILogger; + +/// Per-blob download progress, persisted next to the data file as `.dlstate`. +/// +/// Each chunk completion flips a bit in `full_completion_bitmap`. On resume, +/// `GetPendingChunks` enumerates only the chunks whose bits are still 0. +/// +/// The serialized form stores only the bitmap suffix starting at +/// `bitmap_byte_aligned_start` — the prefix of fully-completed chunks is +/// implied. This keeps the on-disk state proportional to the *unfinished* +/// range, not the total file size. +/// +/// On-disk layout is a small fixed-width little-endian binary header followed +/// by the truncated bitmap bytes; see `blob_download_state.cc` for the exact +/// field order. Chosen over JSON for speed and compactness; the file is purely +/// internal cache state, never inspected by users. +class BlobDownloadState { + public: + /// Identity of the blob (populated by caller; not serialized). + std::string blob_name; + std::string local_file_path; + + /// Fixed at first save; serialized for resume integrity checks. + int64_t blob_size = 0; + int32_t chunk_size = 0; + int32_t total_chunks = 0; + + /// Bit 0 of `full_completion_bitmap` represents chunk `bitmap_byte_aligned_start`. + /// Always a multiple of 8 — the prefix of completed chunks below this index + /// is implied complete and is not serialized. + int32_t bitmap_byte_aligned_start = 0; + + /// Highest chunk index completed so far. -1 if no chunks are done yet. + int32_t highest_completed_chunk = -1; + + /// Cached count for O(1) `IsComplete()`. + int32_t completed_count = 0; + + /// Unix epoch milliseconds; refreshed on every save. + int64_t last_modified_unix_ms = 0; + + /// Bit set: bit at `(chunk_idx - bitmap_byte_aligned_start) / 64` shifted by + /// `(chunk_idx - bitmap_byte_aligned_start) % 64`. Lazily grown by + /// `MarkChunkComplete` to cover up to `highest_completed_chunk`. + std::vector full_completion_bitmap; + + /// Sidecar path for `local_file_path`. + static std::filesystem::path GetStateFilePath(const std::filesystem::path& local_file_path); + + /// Construct a fresh state for a new download. Bitmap sized for `total_chunks`. + static std::unique_ptr CreateNew(std::string blob_name, + std::filesystem::path local_file_path, + int64_t blob_size, + int32_t chunk_size, + int32_t total_chunks); + + /// Load existing state from `.dlstate`. Returns nullptr if + /// the file does not exist, is corrupted, or has incompatible + /// `blob_size` / `chunk_size` / `total_chunks` (caller-provided values are + /// authoritative — a mismatch means the blob has been reconfigured upstream + /// and the partial download is no longer valid). + static std::unique_ptr LoadState(std::string blob_name, + std::filesystem::path local_file_path, + int64_t expected_blob_size, + int32_t expected_chunk_size, + int32_t expected_total_chunks, + ILogger* logger = nullptr); + + /// All chunks downloaded. + bool IsComplete() const noexcept { return completed_count == total_chunks; } + + /// Sum of bytes already written. Accounts for the final chunk being smaller + /// than `chunk_size` when blob_size is not chunk-aligned. + int64_t CalculateDownloadedSize() const noexcept; + + /// Whether `chunk_idx` is already marked complete. + bool IsChunkComplete(int32_t chunk_idx) const noexcept; + + /// Mark `chunk_idx` complete. Caller must hold the mutex when called from + /// concurrent worker tasks (use `mutex()` for that). Idempotent. + void MarkChunkComplete(int32_t chunk_idx); + + /// Enumerate chunks in [0, total_chunks) that are not yet complete. + std::vector GetPendingChunks() const; + + /// Atomically write current state to `.dlstate`. Best-effort: + /// I/O errors are logged but not thrown — the next save will retry, and a + /// failed save just means the next resume will replay a few chunks. + void SaveState(ILogger* logger = nullptr); + + /// Remove the sidecar; called on successful completion. + static void DeleteState(const std::filesystem::path& local_file_path, + ILogger* logger = nullptr); + + /// Mutex protecting concurrent `MarkChunkComplete` / `SaveState` calls from + /// the chunk worker pool. + std::mutex& mutex() noexcept { return mutex_; } + + private: + mutable std::mutex mutex_; +}; + +} // namespace fl diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index eeccf7d2..13af9d63 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -1,13 +1,16 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "download/blob_downloader.h" +#include "download/blob_download_state.h" #include "exception.h" +#include "logger.h" #include "util/path_safety.h" #include "util/string_utils.h" #include #include #include +#include #include #include #include @@ -24,6 +27,18 @@ namespace fl { // AzureBlobDownloader — real Azure Storage SDK implementation // ======================================================================== +/// Per-blob shared state passed to the protected virtuals. The production +/// virtuals dereference `blob_client` / `azure_ctx`; tests can ignore them. +/// `cancel_flag` is flipped by the orchestrator on the first chunk failure so +/// workers exit promptly without waiting for Azure SDK timeouts. +struct AzureBlobDownloader::ChunkContext { + Azure::Storage::Blobs::BlobClient* blob_client; + Azure::Core::Context* azure_ctx; + std::atomic* cancel_flag; +}; + +AzureBlobDownloader::AzureBlobDownloader(ILogger* logger) : logger_(logger) {} + std::vector AzureBlobDownloader::ListBlobs(const std::string& sas_uri) { try { auto container_client = Azure::Storage::Blobs::BlobContainerClient(sas_uri); @@ -45,6 +60,97 @@ std::vector AzureBlobDownloader::ListBlobs(const std::string& sas_ } } +int64_t AzureBlobDownloader::GetBlobSize(ChunkContext& ctx) { + auto props = ctx.blob_client->GetProperties({}, *ctx.azure_ctx).Value; + return props.BlobSize; +} + +std::atomic* AzureBlobDownloader::GetCancelFlag(ChunkContext& ctx) { + return ctx.cancel_flag; +} + +void AzureBlobDownloader::DownloadChunkToBuffer(ChunkContext& ctx, + int64_t offset, + int64_t size, + std::vector& buffer) { + Azure::Storage::Blobs::DownloadBlobOptions range_opts; + range_opts.Range = Azure::Core::Http::HttpRange{offset, size}; + auto result = ctx.blob_client->Download(range_opts, *ctx.azure_ctx); + auto& body_stream = *result.Value.BodyStream; + + buffer.assign(static_cast(size), 0); + size_t total_read = 0; + while (total_read < static_cast(size)) { + size_t bytes_read = body_stream.Read(buffer.data() + total_read, + static_cast(size) - total_read, + *ctx.azure_ctx); + if (bytes_read == 0) { + break; + } + + total_read += bytes_read; + } + + // A zero-byte read before reaching `size` indicates the server closed early. + // Treat as a hard error rather than silently writing a truncated chunk. + if (total_read < static_cast(size)) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "short read from blob stream: got " + std::to_string(total_read) + " of " + + std::to_string(size) + " bytes at offset " + std::to_string(offset)); + } + buffer.resize(total_read); +} + +namespace { + +/// Open the local file at the given offset for write. Throws on failure. +void WriteChunkToFile(const std::string& local_path, int64_t offset, + const std::vector& buffer, std::mutex& file_mutex) { + std::lock_guard lock(file_mutex); + std::ofstream f(local_path, std::ios::binary | std::ios::in | std::ios::out); + if (!f.is_open()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "failed to open blob file for write: " + local_path); + } + + f.seekp(offset); + f.write(reinterpret_cast(buffer.data()), + static_cast(buffer.size())); + if (f.fail()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "failed to write blob chunk to " + local_path + " at offset " + + std::to_string(offset) + " (" + std::to_string(buffer.size()) + " bytes)"); + } +} + +/// Pre-allocate `local_path` to `blob_size` bytes if it does not already exist +/// at the expected size. Allows concurrent chunk writes to seek without races +/// and avoids re-zeroing a file we're resuming. +void EnsureFilePreallocated(const std::string& local_path, int64_t blob_size) { + std::error_code ec; + auto cur_size = std::filesystem::file_size(local_path, ec); + if (!ec && cur_size == static_cast(blob_size)) { + return; + } + + std::ofstream f(local_path, std::ios::binary); + if (!f.is_open()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "failed to open blob file for pre-allocation: " + local_path); + } + + f.seekp(blob_size - 1); + f.put('\0'); + f.close(); + if (f.fail()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "failed to pre-allocate blob file: " + local_path + + " (size=" + std::to_string(blob_size) + ")"); + } +} + +} // namespace + void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, const std::string& blob_name, const std::string& local_path, @@ -65,155 +171,187 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, auto container_client = Azure::Storage::Blobs::BlobContainerClient(sas_uri, client_options); auto blob_client = container_client.GetBlobClient(blob_name); - // Context provides cooperative cancellation across all SDK operations. - Azure::Core::Context ctx; + // Single shared Azure context for the whole blob; calling Cancel() on it + // propagates into every in-flight chunk read. + Azure::Core::Context azure_ctx; + // Internal cancel flag flipped by the orchestrator on first chunk failure + // or by external cancellation; checked by workers between iterations. + std::atomic internal_cancel{false}; - // Get blob size - auto props = blob_client.GetProperties({}, ctx).Value; - int64_t blob_size = props.BlobSize; + ChunkContext chunk_ctx{&blob_client, &azure_ctx, &internal_cancel}; + + int64_t blob_size = GetBlobSize(chunk_ctx); if (blob_size == 0) { - // Empty blob — just create the file + // Empty blob — just create the file and clean up any stale sidecar. std::ofstream f(local_path, std::ios::binary); if (!f.is_open()) { FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "failed to create empty blob file: " + local_path); } - + f.close(); + BlobDownloadState::DeleteState(local_path, logger_); return; } // 2MB chunk size matching C# constexpr int64_t kChunkSize = 2 * 1024 * 1024; - int64_t num_chunks = (blob_size + kChunkSize - 1) / kChunkSize; + int32_t num_chunks = static_cast((blob_size + kChunkSize - 1) / kChunkSize); + + // Resume from existing sidecar if it matches the current blob layout. + auto state = BlobDownloadState::LoadState(blob_name, local_path, blob_size, + static_cast(kChunkSize), + num_chunks, logger_); + if (!state) { + state = BlobDownloadState::CreateNew(blob_name, local_path, blob_size, + static_cast(kChunkSize), num_chunks); + } - // Pre-allocate the file to the full blob size. - // This lets concurrent chunk writes seek to their offset without a resize race. - { - std::ofstream f(local_path, std::ios::binary); - if (!f.is_open()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to open blob file for pre-allocation: " + local_path); - } + // Pre-allocate only if the file is not already at full size. On resume the + // file already exists with valid bytes in completed chunks; re-truncating + // would discard them. + EnsureFilePreallocated(local_path, blob_size); - f.seekp(blob_size - 1); - f.put('\0'); - f.close(); - if (f.fail()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to pre-allocate blob file: " + local_path + - " (size=" + std::to_string(blob_size) + ")"); + // Track cumulative bytes for progress reporting; seed with bytes already + // present on disk so percent stays monotonic across resume. + std::atomic bytes_completed{state->CalculateDownloadedSize()}; + if (bytes_written_cb && bytes_completed.load() > 0) { + bytes_written_cb(bytes_completed.load()); + } + + auto pending = state->GetPendingChunks(); + if (pending.empty()) { + // Already complete on disk — drop the sidecar. + BlobDownloadState::DeleteState(local_path, logger_); + if (bytes_written_cb) { + bytes_written_cb(blob_size); } + return; } - // Track cumulative bytes for progress reporting - std::atomic bytes_completed{0}; + // Save the sidecar roughly every 2% of chunks, with a floor of 10. + const int32_t save_interval = std::max(10, num_chunks / 50); + std::atomic chunks_since_save{0}; // Mutex protects concurrent writes to different offsets in the same file. - // Each chunk opens the file, seeks, and writes — the mutex prevents interleaved I/O. std::mutex file_mutex; + std::mutex error_mutex; + std::exception_ptr first_error; + + // Worker pool: workers race to claim from `pending` via atomic fetch_add. + // On any failure, the first worker to fail records the error, sets + // internal_cancel, and calls azure_ctx.Cancel(); other workers see the + // signal and exit fast. + std::atomic next_pending_idx{0}; + int worker_count = std::min(max_concurrency, static_cast(pending.size())); + if (worker_count < 1) { + worker_count = 1; + } + std::vector> workers; + workers.reserve(static_cast(worker_count)); + + auto worker_body = [&]() { + while (true) { + // External cancellation drains the pool as fast as the SDK can unwind. + if (cancelled && cancelled->load(std::memory_order_relaxed)) { + if (!internal_cancel.exchange(true)) { + azure_ctx.Cancel(); + } + return; + } + if (internal_cancel.load(std::memory_order_relaxed)) { + return; + } - // Download chunks concurrently using a bounded pool of async tasks. - // We launch up to max_concurrency tasks at a time, then wait for the batch to complete. - for (int64_t batch_start = 0; batch_start < num_chunks; batch_start += max_concurrency) { - // Check cancellation between batches - if (cancelled && cancelled->load(std::memory_order_relaxed)) { - ctx.Cancel(); - FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled"); - } + size_t i = next_pending_idx.fetch_add(1, std::memory_order_relaxed); + if (i >= pending.size()) { + return; + } + int32_t chunk_idx = pending[i]; + int64_t offset = static_cast(chunk_idx) * kChunkSize; + int64_t size = std::min(kChunkSize, blob_size - offset); + + std::vector buffer; + try { + DownloadChunkToBuffer(chunk_ctx, offset, size, buffer); + } catch (...) { + std::lock_guard lock(error_mutex); + if (!first_error) { + first_error = std::current_exception(); + } + if (!internal_cancel.exchange(true)) { + azure_ctx.Cancel(); + } + return; + } - int64_t batch_end = std::min(batch_start + max_concurrency, num_chunks); - std::vector> futures; - futures.reserve(static_cast(batch_end - batch_start)); - - for (int64_t chunk_idx = batch_start; chunk_idx < batch_end; ++chunk_idx) { - int64_t offset = chunk_idx * kChunkSize; - int64_t size = std::min(kChunkSize, blob_size - offset); - - futures.push_back(std::async(std::launch::async, - [&blob_client, &local_path, &file_mutex, &bytes_completed, &bytes_written_cb, - &ctx, offset, size]() { - // Download this range from the blob. - // Retry and backoff are handled by the SDK's retry policy. - Azure::Storage::Blobs::DownloadBlobOptions range_opts; - range_opts.Range = Azure::Core::Http::HttpRange{offset, size}; - auto result = blob_client.Download(range_opts, ctx); - auto& body_stream = *result.Value.BodyStream; - - // Read the body into a local buffer - std::vector buffer(static_cast(size)); - size_t total_read = 0; - while (total_read < static_cast(size)) { - size_t bytes_read = body_stream.Read( - buffer.data() + total_read, - static_cast(size) - total_read, - ctx); - - if (bytes_read == 0) { - break; - } - - total_read += bytes_read; - } - - // a zero-byte read before reaching `size` indicates the server closed early. - // Treat as a hard error rather than silently writing a truncated chunk. - if (total_read < static_cast(size)) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "short read from blob stream: got " + - std::to_string(total_read) + " of " + - std::to_string(size) + " bytes at offset " + - std::to_string(offset)); - } - - // Write the chunk to the file at the correct offset - { - std::lock_guard lock(file_mutex); - std::ofstream f(local_path, - std::ios::binary | std::ios::in | std::ios::out); - if (!f.is_open()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to open blob file for write: " + local_path); - } - - f.seekp(offset); - f.write(reinterpret_cast(buffer.data()), - static_cast(total_read)); - if (f.fail()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to write blob chunk to " + local_path + - " at offset " + std::to_string(offset) + - " (" + std::to_string(total_read) + " bytes)"); - } - } - - // Report progress - bytes_completed += static_cast(total_read); - if (bytes_written_cb) { - bytes_written_cb(bytes_completed.load()); - } - })); + try { + WriteChunkToFile(local_path, offset, buffer, file_mutex); + } catch (...) { + std::lock_guard lock(error_mutex); + if (!first_error) { + first_error = std::current_exception(); + } + if (!internal_cancel.exchange(true)) { + azure_ctx.Cancel(); + } + return; + } + + int64_t new_total = bytes_completed.fetch_add(size, std::memory_order_relaxed) + size; + if (bytes_written_cb) { + bytes_written_cb(new_total); + } + + bool should_save = false; + { + std::lock_guard lock(state->mutex()); + state->MarkChunkComplete(chunk_idx); + int32_t inc = chunks_since_save.fetch_add(1, std::memory_order_relaxed) + 1; + if (inc >= save_interval) { + chunks_since_save.store(0, std::memory_order_relaxed); + should_save = true; + } + } + if (should_save) { + std::lock_guard lock(state->mutex()); + state->SaveState(logger_); + } } + }; + + for (int w = 0; w < worker_count; ++w) { + workers.push_back(std::async(std::launch::async, worker_body)); + } - // Wait for all tasks in this batch, cancelling context on failure + for (auto& f : workers) { try { - for (auto& f : futures) { - f.get(); - } + f.get(); } catch (...) { - // Cancel remaining in-flight downloads so futures complete quickly - ctx.Cancel(); - for (auto& f : futures) { - try { - if (f.valid()) { - f.get(); - } - } catch (...) { - } + // Worker bodies should already have routed exceptions through + // first_error, but stay defensive in case std::async signals one. + std::lock_guard lock(error_mutex); + if (!first_error) { + first_error = std::current_exception(); } - throw; + internal_cancel.store(true, std::memory_order_relaxed); } } + + if (first_error || (cancelled && cancelled->load(std::memory_order_relaxed))) { + // Persist what we have so the next attempt resumes from here. + { + std::lock_guard lock(state->mutex()); + state->SaveState(logger_); + } + if (cancelled && cancelled->load(std::memory_order_relaxed)) { + FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled"); + } + std::rethrow_exception(first_error); + } + + // All chunks done — sidecar is no longer needed. + BlobDownloadState::DeleteState(local_path, logger_); } catch (const Azure::Core::OperationCancelledException&) { FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled"); } catch (const Azure::Core::RequestFailedException& e) { @@ -260,9 +398,10 @@ bool EndsWith(const std::string& str, const std::string& suffix) { } /// Returns false if a file at `local_path` already matches the blob's expected -/// `content_length` exactly — in which case the caller can skip the download. -/// Returns true (download needed) for any of: missing file, size mismatch, or -/// filesystem-stat errors (treat as "redownload to be safe"). +/// `content_length` exactly AND has no `.dlstate` sidecar — in which case the +/// caller can skip the download. Returns true (download needed) for any of: +/// missing file, size mismatch, sidecar present (file may be pre-allocated +/// with holes), or filesystem-stat errors (treat as "redownload to be safe"). bool IsDownloadNeeded(const BlobItemInfo& blob, const std::string& local_path) { std::error_code ec; auto status = std::filesystem::status(local_path, ec); @@ -273,7 +412,17 @@ bool IsDownloadNeeded(const BlobItemInfo& blob, const std::string& local_path) { if (ec) { return true; } - return static_cast(size) != blob.content_length; + if (static_cast(size) != blob.content_length) { + return true; + } + // The data file is at the expected size, but a sidecar means a previous run + // pre-allocated then aborted mid-download. The file has holes; let + // AzureBlobDownloader resume from the sidecar. + auto sidecar = BlobDownloadState::GetStateFilePath(local_path); + if (std::filesystem::exists(sidecar, ec)) { + return true; + } + return false; } } // anonymous namespace diff --git a/sdk_v2/cpp/src/download/blob_downloader.h b/sdk_v2/cpp/src/download/blob_downloader.h index f43774a1..4a733528 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.h +++ b/sdk_v2/cpp/src/download/blob_downloader.h @@ -11,6 +11,8 @@ namespace fl { +class ILogger; + /// Progress callback: percent is 0.0 to 100.0. Return 0 to continue, non-zero to cancel. using DownloadProgressFn = std::function; @@ -57,8 +59,16 @@ class IBlobDownloader { }; /// Azure Storage Blobs SDK-based implementation of IBlobDownloader. +/// +/// Implements resumable downloads: a `.dlstate` sidecar tracks which 2 MB +/// chunks have completed, and DownloadBlob picks up where a prior aborted run +/// left off. A linked cancellation token cascades the first chunk-level +/// failure to every other in-flight chunk so the worker pool drains quickly. class AzureBlobDownloader : public IBlobDownloader { public: + /// `logger` is used for diagnostics only (state file save/load events). May be null. + explicit AzureBlobDownloader(ILogger* logger = nullptr); + std::vector ListBlobs(const std::string& sas_uri) override; void DownloadBlob(const std::string& sas_uri, @@ -67,6 +77,36 @@ class AzureBlobDownloader : public IBlobDownloader { int max_concurrency, BlobBytesWrittenFn bytes_written_cb = nullptr, std::atomic* cancelled = nullptr) override; + + protected: + /// Opaque per-blob context. Defined in `blob_downloader.cc`; holds the Azure + /// SDK BlobClient + Context pointers used by the production virtuals. Test + /// subclasses can ignore this argument and use only the explicit parameters. + struct ChunkContext; + + /// Return the blob size in bytes. Production calls `BlobClient::GetProperties`. + /// Test subclasses can override to return a constant without touching Azure. + virtual int64_t GetBlobSize(ChunkContext& ctx); + + /// Read `size` bytes starting at `offset` into `buffer`. The production + /// implementation pulls from the blob client referenced by `ctx`; test + /// subclasses can override to inject chunk-level failures or slow reads. + /// Must throw on failure. Implementations should observe the cancellation + /// flag accessible via `ctx` and exit promptly when cancellation is requested. + virtual void DownloadChunkToBuffer(ChunkContext& ctx, + int64_t offset, + int64_t size, + std::vector& buffer); + + /// Accessor for test subclasses overriding `DownloadChunkToBuffer`. Returns + /// the shared cancellation flag — when set by the orchestrator (e.g. after + /// another chunk fails), in-flight chunk simulations should observe it and + /// exit promptly. Production code doesn't need this directly: cancellation + /// is routed through `Azure::Core::Context::Cancel()`. + std::atomic* GetCancelFlag(ChunkContext& ctx); + + private: + ILogger* logger_ = nullptr; }; /// High-level download function: enumerate, filter, and download all blobs from a SAS URI. diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index c4407bb3..b5255045 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -182,7 +182,7 @@ DownloadManager::DownloadManager(std::string cache_directory, std::string_view c logger_(logger), registry_client_(std::make_unique( kDefaultRegistryRegion, logger, std::make_unique(logger, !disable_region_fallback))), - blob_downloader_(std::make_unique()) {} + blob_downloader_(std::make_unique(&logger)) {} DownloadManager::~DownloadManager() = default; diff --git a/sdk_v2/cpp/test/CMakeLists.txt b/sdk_v2/cpp/test/CMakeLists.txt index ff9923fc..2070fe03 100644 --- a/sdk_v2/cpp/test/CMakeLists.txt +++ b/sdk_v2/cpp/test/CMakeLists.txt @@ -11,6 +11,7 @@ add_executable(foundry_local_tests internal_api/audio/audio_transcription_contract_test.cc internal_api/audio/pcm_utils_test.cc internal_api/base_model_catalog_test.cc + internal_api/blob_download_state_test.cc internal_api/c_api_test.cc internal_api/callback_handler_test.cc internal_api/catalog_cache_test.cc diff --git a/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc b/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc new file mode 100644 index 00000000..9e477012 --- /dev/null +++ b/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc @@ -0,0 +1,251 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "download/blob_download_state.h" + +#include + +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +using namespace fl; + +namespace { + +class TempDir { + public: + TempDir() { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dist; + path_ = fs::temp_directory_path() / ("fl_dlstate_test_" + std::to_string(dist(gen))); + fs::create_directories(path_); + } + + ~TempDir() { + std::error_code ec; + fs::remove_all(path_, ec); + } + + const fs::path& path() const { return path_; } + + private: + fs::path path_; +}; + +constexpr int64_t kBlobSize = 20 * 1024 * 1024; // 20 MiB +constexpr int32_t kChunkSize = 2 * 1024 * 1024; // 2 MiB +constexpr int32_t kNumChunks = 10; + +} // namespace + +TEST(BlobDownloadStateTest, GetStateFilePathAppendsDlstate) { + fs::path p = "C:/some/file.bin"; + EXPECT_EQ(BlobDownloadState::GetStateFilePath(p).string(), + (fs::path("C:/some/file.bin.dlstate")).string()); +} + +TEST(BlobDownloadStateTest, CreateNewInitializesEmptyBitmap) { + TempDir d; + auto local = d.path() / "blob.bin"; + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + ASSERT_NE(s, nullptr); + EXPECT_EQ(s->blob_size, kBlobSize); + EXPECT_EQ(s->chunk_size, kChunkSize); + EXPECT_EQ(s->total_chunks, kNumChunks); + EXPECT_EQ(s->completed_count, 0); + EXPECT_EQ(s->highest_completed_chunk, -1); + EXPECT_EQ(s->bitmap_byte_aligned_start, 0); + EXPECT_FALSE(s->IsComplete()); + EXPECT_EQ(s->CalculateDownloadedSize(), 0); + EXPECT_EQ(s->GetPendingChunks().size(), static_cast(kNumChunks)); +} + +TEST(BlobDownloadStateTest, MarkChunkCompleteUpdatesBitmapAndCounter) { + TempDir d; + auto local = d.path() / "blob.bin"; + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + s->MarkChunkComplete(3); + EXPECT_TRUE(s->IsChunkComplete(3)); + EXPECT_FALSE(s->IsChunkComplete(2)); + EXPECT_EQ(s->completed_count, 1); + EXPECT_EQ(s->highest_completed_chunk, 3); + EXPECT_EQ(s->CalculateDownloadedSize(), kChunkSize); +} + +TEST(BlobDownloadStateTest, MarkChunkCompleteIsIdempotent) { + TempDir d; + auto local = d.path() / "blob.bin"; + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + s->MarkChunkComplete(5); + s->MarkChunkComplete(5); + s->MarkChunkComplete(5); + EXPECT_EQ(s->completed_count, 1); +} + +TEST(BlobDownloadStateTest, CalculateDownloadedSizeAccountsForPartialFinalChunk) { + TempDir d; + auto local = d.path() / "blob.bin"; + constexpr int64_t kOddBlobSize = 5 * 1024 * 1024 + 17; // last chunk is 17 bytes + constexpr int32_t kOddNumChunks = 3; + auto s = BlobDownloadState::CreateNew("blob", local, kOddBlobSize, kChunkSize, kOddNumChunks); + for (int32_t i = 0; i < kOddNumChunks; ++i) { + s->MarkChunkComplete(i); + } + EXPECT_TRUE(s->IsComplete()); + EXPECT_EQ(s->CalculateDownloadedSize(), kOddBlobSize); +} + +TEST(BlobDownloadStateTest, GetPendingChunksReturnsGaps) { + TempDir d; + auto local = d.path() / "blob.bin"; + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + for (int32_t i : {0, 1, 2, 5, 7}) { + s->MarkChunkComplete(i); + } + auto pending = s->GetPendingChunks(); + std::vector expected{3, 4, 6, 8, 9}; + EXPECT_EQ(pending, expected); +} + +TEST(BlobDownloadStateTest, SaveAndLoadRoundTrip) { + TempDir d; + auto local = d.path() / "blob.bin"; + { + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + for (int32_t i : {0, 2, 4, 6, 8}) { + s->MarkChunkComplete(i); + } + s->SaveState(); + } + auto loaded = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); + ASSERT_NE(loaded, nullptr); + EXPECT_EQ(loaded->completed_count, 5); + EXPECT_EQ(loaded->highest_completed_chunk, 8); + for (int32_t i : {0, 2, 4, 6, 8}) { + EXPECT_TRUE(loaded->IsChunkComplete(i)) << "chunk " << i; + } + for (int32_t i : {1, 3, 5, 7, 9}) { + EXPECT_FALSE(loaded->IsChunkComplete(i)) << "chunk " << i; + } + std::vector expected{1, 3, 5, 7, 9}; + EXPECT_EQ(loaded->GetPendingChunks(), expected); +} + +TEST(BlobDownloadStateTest, SaveStateAdvancesBitmapByteAlignedStart) { + TempDir d; + auto local = d.path() / "blob.bin"; + // Use a large enough total that whole-word advance is meaningful. + constexpr int32_t kBigNumChunks = 200; + constexpr int64_t kBigBlobSize = static_cast(kBigNumChunks) * kChunkSize; + auto s = BlobDownloadState::CreateNew("blob", local, kBigBlobSize, kChunkSize, kBigNumChunks); + // Complete the first 80 chunks (10 full bytes worth). + for (int32_t i = 0; i < 80; ++i) { + s->MarkChunkComplete(i); + } + s->SaveState(); + // 64 bits = 1 full word; next 16 bits in word 1. Aligned start lands on + // 80 (multiple of 8). + EXPECT_EQ(s->bitmap_byte_aligned_start, 80); + + // Reload and verify the implicit prefix is still considered complete. + auto loaded = BlobDownloadState::LoadState("blob", local, kBigBlobSize, kChunkSize, kBigNumChunks); + ASSERT_NE(loaded, nullptr); + for (int32_t i = 0; i < 80; ++i) { + EXPECT_TRUE(loaded->IsChunkComplete(i)); + } + for (int32_t i = 80; i < kBigNumChunks; ++i) { + EXPECT_FALSE(loaded->IsChunkComplete(i)); + } + EXPECT_EQ(loaded->completed_count, 80); +} + +TEST(BlobDownloadStateTest, LoadStateReturnsNullWhenFileMissing) { + TempDir d; + auto local = d.path() / "blob.bin"; + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); + EXPECT_EQ(s, nullptr); +} + +TEST(BlobDownloadStateTest, LoadStateRejectsBadMagic) { + TempDir d; + auto local = d.path() / "blob.bin"; + auto sidecar = BlobDownloadState::GetStateFilePath(local); + { + std::ofstream f(sidecar, std::ios::binary); + f << "ZZZZ"; // wrong magic + f.put(static_cast(0)); // version + for (int i = 0; i < 64; ++i) f.put(0); // padding + } + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); + EXPECT_EQ(s, nullptr); +} + +TEST(BlobDownloadStateTest, LoadStateRejectsBlobSizeMismatch) { + TempDir d; + auto local = d.path() / "blob.bin"; + { + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + s->MarkChunkComplete(0); + s->SaveState(); + } + // Reload with a *different* expected blob_size — should be rejected. + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize + 1, kChunkSize, kNumChunks); + EXPECT_EQ(s, nullptr); +} + +TEST(BlobDownloadStateTest, LoadStateRejectsChunkSizeMismatch) { + TempDir d; + auto local = d.path() / "blob.bin"; + { + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + s->MarkChunkComplete(0); + s->SaveState(); + } + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize + 1, kNumChunks); + EXPECT_EQ(s, nullptr); +} + +TEST(BlobDownloadStateTest, LoadStateRejectsTotalChunksMismatch) { + TempDir d; + auto local = d.path() / "blob.bin"; + { + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + s->MarkChunkComplete(0); + s->SaveState(); + } + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks + 1); + EXPECT_EQ(s, nullptr); +} + +TEST(BlobDownloadStateTest, DeleteStateRemovesSidecar) { + TempDir d; + auto local = d.path() / "blob.bin"; + { + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + s->MarkChunkComplete(0); + s->SaveState(); + } + EXPECT_TRUE(fs::exists(BlobDownloadState::GetStateFilePath(local))); + BlobDownloadState::DeleteState(local); + EXPECT_FALSE(fs::exists(BlobDownloadState::GetStateFilePath(local))); + // Re-deletion when the file is already absent is a no-op (best-effort). + BlobDownloadState::DeleteState(local); +} + +TEST(BlobDownloadStateTest, IsCompleteFlipsTrueWhenAllChunksMarked) { + TempDir d; + auto local = d.path() / "blob.bin"; + auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + for (int32_t i = 0; i < kNumChunks; ++i) { + EXPECT_FALSE(s->IsComplete()); + s->MarkChunkComplete(i); + } + EXPECT_TRUE(s->IsComplete()); + EXPECT_EQ(s->GetPendingChunks().size(), 0u); +} diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index f352db6f..91aa9a01 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -8,6 +8,7 @@ // - DownloadManager (full flow orchestration) #include "catalog/azure_catalog_client.h" #include "catalog/azure_catalog_models.h" +#include "download/blob_download_state.h" #include "download/blob_downloader.h" #include "download/download_manager.h" #include "download/inference_model_writer.h" @@ -23,9 +24,12 @@ #include #include +#include #include #include #include +#include +#include #include #include #include @@ -1391,3 +1395,213 @@ TEST(DownloadManagerTest, AcceptsNormalModelIdAndPublisher) { EXPECT_NO_THROW(manager.IsModelCached(info)); EXPECT_FALSE(manager.IsModelCached(info)); } + +// ======================================================================== +// AzureBlobDownloader resume + cancel-cascade tests +// Use a subclass that overrides the protected GetBlobSize / DownloadChunkToBuffer +// virtuals to bypass the real Azure SDK and simulate per-chunk behavior. +// ======================================================================== + +namespace { + +/// Test double for AzureBlobDownloader. Overrides the protected virtuals so +/// chunked-download orchestration can be exercised without network I/O. +class FakeChunkAzureDownloader : public AzureBlobDownloader { + public: + int64_t blob_size = 0; + + /// Per-call hook. Receives the chunk offset and the size. Allowed to: + /// - mutate `buffer` (must end up at `size` bytes) + /// - throw to simulate a transient failure + /// - sleep / poll cancellation + std::function& buffer, + std::atomic* cancel_flag)> + chunk_hook; + + std::atomic chunk_call_count{0}; + std::mutex offsets_mutex; + std::vector requested_offsets; + + using AzureBlobDownloader::AzureBlobDownloader; + + protected: + int64_t GetBlobSize(ChunkContext& /*ctx*/) override { return blob_size; } + + void DownloadChunkToBuffer(ChunkContext& ctx, int64_t offset, int64_t size, + std::vector& buffer) override { + chunk_call_count.fetch_add(1); + { + std::lock_guard lock(offsets_mutex); + requested_offsets.push_back(offset); + } + if (chunk_hook) { + chunk_hook(offset, size, buffer, GetCancelFlag(ctx)); + return; + } + // Default: fill with the low byte of the offset for verification. + buffer.assign(static_cast(size), static_cast(offset & 0xFF)); + } +}; + +} // namespace + +TEST(AzureBlobDownloaderResumeTest, SkipsChunksAlreadyMarkedCompleteInSidecar) { + TempDir tmpdir; + auto local = tmpdir.path() / "blob.bin"; + + constexpr int32_t kChunkSize = 2 * 1024 * 1024; + constexpr int32_t kNumChunks = 10; + constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; + + // Pre-allocate the data file so the downloader takes the resume path. + { + std::ofstream f(local, std::ios::binary); + f.seekp(kBlobSize - 1); + f.put('\0'); + } + // Pre-write a sidecar: chunks 0..4 done, 5..9 pending. + { + auto state = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + for (int32_t i = 0; i < 5; ++i) { + state->MarkChunkComplete(i); + } + state->SaveState(); + } + + FakeChunkAzureDownloader d; + d.blob_size = kBlobSize; + + d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/2); + + EXPECT_EQ(d.chunk_call_count.load(), 5); + std::sort(d.requested_offsets.begin(), d.requested_offsets.end()); + std::vector expected{5 * int64_t{kChunkSize}, 6 * int64_t{kChunkSize}, + 7 * int64_t{kChunkSize}, 8 * int64_t{kChunkSize}, + 9 * int64_t{kChunkSize}}; + EXPECT_EQ(d.requested_offsets, expected); + + // Sidecar should be gone on full success. + EXPECT_FALSE(fs::exists(BlobDownloadState::GetStateFilePath(local))); +} + +TEST(AzureBlobDownloaderResumeTest, DownloadsAllChunksWhenSidecarMissing) { + TempDir tmpdir; + auto local = tmpdir.path() / "blob.bin"; + + constexpr int32_t kChunkSize = 2 * 1024 * 1024; + constexpr int32_t kNumChunks = 4; + constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; + + FakeChunkAzureDownloader d; + d.blob_size = kBlobSize; + + d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/4); + + EXPECT_EQ(d.chunk_call_count.load(), kNumChunks); + EXPECT_FALSE(fs::exists(BlobDownloadState::GetStateFilePath(local))); + // Local file is pre-allocated to blob_size during the first pass. + EXPECT_TRUE(fs::exists(local)); + EXPECT_EQ(fs::file_size(local), static_cast(kBlobSize)); +} + +TEST(AzureBlobDownloaderResumeTest, PersistsSidecarOnChunkFailure) { + TempDir tmpdir; + auto local = tmpdir.path() / "blob.bin"; + + constexpr int32_t kChunkSize = 2 * 1024 * 1024; + constexpr int32_t kNumChunks = 10; + constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; + + FakeChunkAzureDownloader d; + d.blob_size = kBlobSize; + // Fail when we see the offset of chunk 4 (specifically chosen so several + // chunks land before the failing one across threads). + constexpr int64_t kFailOffset = 4 * int64_t{kChunkSize}; + d.chunk_hook = [&](int64_t offset, int64_t size, std::vector& buffer, + std::atomic* /*cancel_flag*/) { + if (offset == kFailOffset) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "simulated chunk failure"); + } + buffer.assign(static_cast(size), static_cast(offset & 0xFF)); + }; + + EXPECT_THROW( + d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/2), + fl::Exception); + + // The sidecar should be persisted so a subsequent call can resume. + EXPECT_TRUE(fs::exists(BlobDownloadState::GetStateFilePath(local))); + + // On resume with the same offset blocked, we should still hit the failure + // but skip already-completed chunks. Strip the failure and rerun: the + // downloader should only process the chunks that weren't completed. + auto retry_state = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); + ASSERT_NE(retry_state, nullptr); + EXPECT_GT(retry_state->completed_count, 0); + EXPECT_LT(retry_state->completed_count, kNumChunks); +} + +TEST(AzureBlobDownloaderResumeTest, CleansUpSidecarOnEmptyBlob) { + TempDir tmpdir; + auto local = tmpdir.path() / "empty.bin"; + // Plant a stale sidecar. + { + std::ofstream f(BlobDownloadState::GetStateFilePath(local), std::ios::binary); + f << "stale"; + } + + FakeChunkAzureDownloader d; + d.blob_size = 0; // empty + + d.DownloadBlob(/*sas_uri=*/"", "empty", local.string(), /*max_concurrency=*/4); + + EXPECT_TRUE(fs::exists(local)); + EXPECT_EQ(fs::file_size(local), 0u); + EXPECT_FALSE(fs::exists(BlobDownloadState::GetStateFilePath(local))); + EXPECT_EQ(d.chunk_call_count.load(), 0); +} + +TEST(AzureBlobDownloaderResumeTest, ChunkFailureCancelsInFlightPeersFast) { + TempDir tmpdir; + auto local = tmpdir.path() / "blob.bin"; + + constexpr int32_t kChunkSize = 2 * 1024 * 1024; + constexpr int32_t kNumChunks = 10; + constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; + constexpr int64_t kFailOffset = 4 * int64_t{kChunkSize}; + + FakeChunkAzureDownloader d; + d.blob_size = kBlobSize; + // The failing chunk throws fast. Every other chunk sleeps for up to 5 s in + // 50-ms slices, polling the cancel flag. If linked cancellation works, they + // observe the flag within one slice of the failure and exit promptly. + d.chunk_hook = [kFailOffset](int64_t offset, int64_t size, std::vector& buffer, + std::atomic* cancel_flag) { + if (offset == kFailOffset) { + // Give other workers a moment to enter their sleep loop before we throw, + // so we're meaningfully testing the cancel-while-in-flight path. + std::this_thread::sleep_for(std::chrono::milliseconds(75)); + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "simulated chunk failure"); + } + for (int i = 0; i < 100; ++i) { + if (cancel_flag && cancel_flag->load(std::memory_order_relaxed)) { + FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "cancelled mid-chunk"); + } + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + buffer.assign(static_cast(size), 0); + }; + + auto start = std::chrono::steady_clock::now(); + EXPECT_THROW( + d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/kNumChunks), + fl::Exception); + auto elapsed = std::chrono::steady_clock::now() - start; + auto elapsed_ms = std::chrono::duration_cast(elapsed).count(); + + // Without cancellation, the slow chunks would sleep ~5 s. With it, they + // should all exit within a few hundred ms of the failure (well under 2 s). + EXPECT_LT(elapsed_ms, 2000) + << "Cancel-cascade should drain in-flight peers fast; took " << elapsed_ms << " ms"; +} + From 273557071bacad4f9e813284a6d58c1373779d49 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Tue, 9 Jun 2026 14:52:37 -0500 Subject: [PATCH 03/36] Stream blob chunks via per-worker scratch + IFileWriter (caps per-worker memory at 64 KB) Eliminate the 2 MB-per-chunk std::vector allocation in AzureBlobDownloader::DownloadBlob by streaming each chunk through a sink callback that forwards 64 KB pieces straight to a thread-safe file writer. Peak download memory drops from concurrency * chunk_size (128 MB at 64-way concurrency on 2 MB chunks) to roughly concurrency * 64 KB (~4 MB) regardless of chunk size, matching the .NET Stream.CopyTo semantics we ported from instead of doubling memory with a buffer copy. The file write strategy is now selected via a new `FileWriterKind` constructor argument on `AzureBlobDownloader` and backed by a small `IFileWriter` abstraction with two implementations: - `Positional` (default, recommended): lock-free positional writes using `pwrite` on POSIX and `WriteFile` + `OVERLAPPED.Offset` on Windows. No user-space mutex; the kernel orders disjoint-range writes. - `MutexFstream` (comparison / portable fallback): single shared `std::fstream` guarded by an internal mutex. The chunk-write fast path that used to open a fresh fstream and seek under a mutex per chunk is now subsumed by this writer; the file handle is opened once and reused for every WriteAt. Both writers handle `Open` correctly for the resume path: an existing file at exactly the expected size is preserved (so already-downloaded bytes survive across the writer swap), and any other state triggers pre-allocation to the expected size. The orchestrator's worker pool, atomic cancel cascade, sidecar save cadence, and progress reporting are unchanged. The renamed protected virtual `DownloadChunkStreaming` (replacing `DownloadChunkToBuffer`) is the new test seam; both production code and the existing `FakeChunkAzureDownloader` test double now use the sink callback to deliver chunk bytes. Tests: - New `WriterImpls/FileWriterTest` runs 5 correctness checks against both writer implementations (10 tests total): open semantics for fresh / existing-at-size / existing-at-different-size files, single thread WriteAt, and 8 threads writing 256 KB regions to disjoint offsets validated byte-for-byte after close. - New `FileWriterPerfComparison.PositionalVsMutexFstream` runs the realistic AzureBlobDownloader workload (32 workers, 8 chunks/worker, 2 MB chunks, 64 KB sink pieces, 512 MB total) against both writers and prints wall-clock + MB/s for comparison. Measured locally on NVMe NTFS: Positional averages ~590 MB/s, MutexFstream ~545 MB/s (Positional ~8 percent faster on average; both well above 500 MB/s). - All existing 60 BlobDownload* + AzureBlobDownloader* tests still pass without modification beyond the chunk_hook signature update. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/CMakeLists.txt | 1 + sdk_v2/cpp/src/download/blob_downloader.cc | 154 +++++----- sdk_v2/cpp/src/download/blob_downloader.h | 48 +++- sdk_v2/cpp/src/download/file_writer.cc | 209 ++++++++++++++ sdk_v2/cpp/src/download/file_writer.h | 46 +++ sdk_v2/cpp/test/CMakeLists.txt | 1 + sdk_v2/cpp/test/internal_api/download_test.cc | 49 +++- .../cpp/test/internal_api/file_writer_test.cc | 272 ++++++++++++++++++ 8 files changed, 668 insertions(+), 112 deletions(-) create mode 100644 sdk_v2/cpp/src/download/file_writer.cc create mode 100644 sdk_v2/cpp/src/download/file_writer.h create mode 100644 sdk_v2/cpp/test/internal_api/file_writer_test.cc diff --git a/sdk_v2/cpp/CMakeLists.txt b/sdk_v2/cpp/CMakeLists.txt index 69498c44..dcd191b4 100644 --- a/sdk_v2/cpp/CMakeLists.txt +++ b/sdk_v2/cpp/CMakeLists.txt @@ -152,6 +152,7 @@ set(FOUNDRY_LOCAL_SOURCES src/download/blob_downloader.cc src/download/cross_process_file_lock.cc src/download/download_manager.cc + src/download/file_writer.cc src/download/inference_model_writer.cc src/download/model_registry_client.cc src/ep_detection/cuda_ep_bootstrapper.cc diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 13af9d63..a0ff4297 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include "download/blob_downloader.h" #include "download/blob_download_state.h" +#include "download/file_writer.h" #include "exception.h" #include "logger.h" #include "util/path_safety.h" @@ -14,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +25,15 @@ namespace fl { +namespace { + +/// Streaming buffer size used by the production chunk downloader. Matches the +/// 64 KB-ish granularity Stream.CopyTo uses in .NET, capping per-worker peak +/// memory at this many bytes regardless of chunk size. +constexpr size_t kStreamingBufferBytes = 64 * 1024; + +} // namespace + // ======================================================================== // AzureBlobDownloader — real Azure Storage SDK implementation // ======================================================================== @@ -37,7 +48,8 @@ struct AzureBlobDownloader::ChunkContext { std::atomic* cancel_flag; }; -AzureBlobDownloader::AzureBlobDownloader(ILogger* logger) : logger_(logger) {} +AzureBlobDownloader::AzureBlobDownloader(ILogger* logger, FileWriterKind writer_kind) + : logger_(logger), writer_kind_(writer_kind) {} std::vector AzureBlobDownloader::ListBlobs(const std::string& sas_uri) { try { @@ -69,83 +81,48 @@ std::atomic* AzureBlobDownloader::GetCancelFlag(ChunkContext& ctx) { return ctx.cancel_flag; } -void AzureBlobDownloader::DownloadChunkToBuffer(ChunkContext& ctx, - int64_t offset, - int64_t size, - std::vector& buffer) { +void AzureBlobDownloader::DownloadChunkStreaming( + ChunkContext& ctx, int64_t offset, int64_t size, std::vector& scratch, + const std::function& sink) { Azure::Storage::Blobs::DownloadBlobOptions range_opts; range_opts.Range = Azure::Core::Http::HttpRange{offset, size}; auto result = ctx.blob_client->Download(range_opts, *ctx.azure_ctx); auto& body_stream = *result.Value.BodyStream; - buffer.assign(static_cast(size), 0); - size_t total_read = 0; - while (total_read < static_cast(size)) { - size_t bytes_read = body_stream.Read(buffer.data() + total_read, - static_cast(size) - total_read, - *ctx.azure_ctx); - if (bytes_read == 0) { - break; - } - - total_read += bytes_read; + if (scratch.size() < kStreamingBufferBytes) { + scratch.resize(kStreamingBufferBytes); } - // A zero-byte read before reaching `size` indicates the server closed early. - // Treat as a hard error rather than silently writing a truncated chunk. - if (total_read < static_cast(size)) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "short read from blob stream: got " + std::to_string(total_read) + " of " + - std::to_string(size) + " bytes at offset " + std::to_string(offset)); + int64_t remaining = size; + while (remaining > 0) { + size_t to_read = + static_cast(std::min(remaining, static_cast(scratch.size()))); + size_t got = body_stream.Read(scratch.data(), to_read, *ctx.azure_ctx); + if (got == 0) { + // Zero-byte read before reaching `size` means the server closed early. + // Treat as a hard error rather than silently writing a truncated chunk. + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "short read from blob stream at offset " + std::to_string(offset) + ": got " + + std::to_string(size - remaining) + " of " + std::to_string(size) + " bytes"); + } + sink(scratch.data(), got); + remaining -= static_cast(got); } - buffer.resize(total_read); } namespace { -/// Open the local file at the given offset for write. Throws on failure. -void WriteChunkToFile(const std::string& local_path, int64_t offset, - const std::vector& buffer, std::mutex& file_mutex) { - std::lock_guard lock(file_mutex); - std::ofstream f(local_path, std::ios::binary | std::ios::in | std::ios::out); - if (!f.is_open()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to open blob file for write: " + local_path); - } - - f.seekp(offset); - f.write(reinterpret_cast(buffer.data()), - static_cast(buffer.size())); - if (f.fail()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to write blob chunk to " + local_path + " at offset " + - std::to_string(offset) + " (" + std::to_string(buffer.size()) + " bytes)"); - } -} - /// Pre-allocate `local_path` to `blob_size` bytes if it does not already exist /// at the expected size. Allows concurrent chunk writes to seek without races /// and avoids re-zeroing a file we're resuming. -void EnsureFilePreallocated(const std::string& local_path, int64_t blob_size) { - std::error_code ec; - auto cur_size = std::filesystem::file_size(local_path, ec); - if (!ec && cur_size == static_cast(blob_size)) { - return; - } - +/// +/// Used only for the empty-blob case below; the writers' `Open` method handles +/// pre-allocation for the streaming chunked path. +void EnsureEmptyBlobFile(const std::string& local_path) { std::ofstream f(local_path, std::ios::binary); if (!f.is_open()) { FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to open blob file for pre-allocation: " + local_path); - } - - f.seekp(blob_size - 1); - f.put('\0'); - f.close(); - if (f.fail()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to pre-allocate blob file: " + local_path + - " (size=" + std::to_string(blob_size) + ")"); + "failed to create empty blob file: " + local_path); } } @@ -183,13 +160,7 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, int64_t blob_size = GetBlobSize(chunk_ctx); if (blob_size == 0) { - // Empty blob — just create the file and clean up any stale sidecar. - std::ofstream f(local_path, std::ios::binary); - if (!f.is_open()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "failed to create empty blob file: " + local_path); - } - f.close(); + EnsureEmptyBlobFile(local_path); BlobDownloadState::DeleteState(local_path, logger_); return; } @@ -207,11 +178,6 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, static_cast(kChunkSize), num_chunks); } - // Pre-allocate only if the file is not already at full size. On resume the - // file already exists with valid bytes in completed chunks; re-truncating - // would discard them. - EnsureFilePreallocated(local_path, blob_size); - // Track cumulative bytes for progress reporting; seed with bytes already // present on disk so percent stays monotonic across resume. std::atomic bytes_completed{state->CalculateDownloadedSize()}; @@ -229,12 +195,19 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, return; } + // Open the file writer once for the whole download. Open() pre-allocates + // the file to blob_size if needed, preserving any existing bytes from a + // resume. Concurrent WriteAt calls to disjoint ranges are thread-safe + // (lock-free for Positional, mutex-guarded for MutexFstream). + std::unique_ptr writer = (writer_kind_ == FileWriterKind::MutexFstream) + ? MakeMutexFstreamFileWriter() + : MakePositionalFileWriter(); + writer->Open(local_path, blob_size); + // Save the sidecar roughly every 2% of chunks, with a floor of 10. const int32_t save_interval = std::max(10, num_chunks / 50); std::atomic chunks_since_save{0}; - // Mutex protects concurrent writes to different offsets in the same file. - std::mutex file_mutex; std::mutex error_mutex; std::exception_ptr first_error; @@ -251,6 +224,12 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, workers.reserve(static_cast(worker_count)); auto worker_body = [&]() { + // Per-worker scratch buffer reused across every chunk this worker + // handles. Streaming downloads fill the scratch in 64 KB pieces and + // forward each piece to the sink, so total transient memory is bounded + // by `worker_count * kStreamingBufferBytes` regardless of chunk size. + std::vector scratch(kStreamingBufferBytes); + while (true) { // External cancellation drains the pool as fast as the SDK can unwind. if (cancelled && cancelled->load(std::memory_order_relaxed)) { @@ -271,22 +250,17 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, int64_t offset = static_cast(chunk_idx) * kChunkSize; int64_t size = std::min(kChunkSize, blob_size - offset); - std::vector buffer; - try { - DownloadChunkToBuffer(chunk_ctx, offset, size, buffer); - } catch (...) { - std::lock_guard lock(error_mutex); - if (!first_error) { - first_error = std::current_exception(); - } - if (!internal_cancel.exchange(true)) { - azure_ctx.Cancel(); - } - return; - } + // Sink advances a per-chunk write cursor and forwards each piece to + // the file writer. The writer is responsible for any synchronization + // needed across concurrent workers; we don't take a mutex here. + int64_t written = 0; + auto sink = [&](const uint8_t* data, size_t len) { + writer->WriteAt(offset + written, data, len); + written += static_cast(len); + }; try { - WriteChunkToFile(local_path, offset, buffer, file_mutex); + DownloadChunkStreaming(chunk_ctx, offset, size, scratch, sink); } catch (...) { std::lock_guard lock(error_mutex); if (!first_error) { @@ -338,6 +312,10 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, } } + // Release the OS handle before persisting / deleting the sidecar so any + // observer that watches the data file sees a fully-closed handle. + writer->Close(); + if (first_error || (cancelled && cancelled->load(std::memory_order_relaxed))) { // Persist what we have so the next attempt resumes from here. { diff --git a/sdk_v2/cpp/src/download/blob_downloader.h b/sdk_v2/cpp/src/download/blob_downloader.h index 4a733528..4fc7412f 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.h +++ b/sdk_v2/cpp/src/download/blob_downloader.h @@ -58,16 +58,37 @@ class IBlobDownloader { std::atomic* cancelled = nullptr) = 0; }; +/// Strategy for writing downloaded blob chunks to the local file. Both +/// strategies are thread-safe across concurrent calls to disjoint ranges. +/// +/// - `Positional`: lock-free `pwrite` / `WriteFile`+`OVERLAPPED`. Default and +/// recommended; lets the OS arbitrate concurrent writes to disjoint ranges +/// instead of taking a user-space mutex. +/// - `MutexFstream`: single shared `std::fstream` guarded by an internal +/// mutex. Provided for benchmarking and as a portable fallback. +enum class FileWriterKind { + Positional, + MutexFstream, +}; + /// Azure Storage Blobs SDK-based implementation of IBlobDownloader. /// /// Implements resumable downloads: a `.dlstate` sidecar tracks which 2 MB /// chunks have completed, and DownloadBlob picks up where a prior aborted run /// left off. A linked cancellation token cascades the first chunk-level /// failure to every other in-flight chunk so the worker pool drains quickly. +/// +/// Chunks stream from the blob client into the local file in ~64 KB pieces +/// via a sink callback, so each worker holds a single 64 KB scratch buffer +/// instead of allocating a full chunk's worth of bytes per request. This +/// caps peak memory at roughly `max_concurrency * 64 KB` regardless of how +/// large the blob or the chunk size is. class AzureBlobDownloader : public IBlobDownloader { public: /// `logger` is used for diagnostics only (state file save/load events). May be null. - explicit AzureBlobDownloader(ILogger* logger = nullptr); + /// `writer_kind` chooses the on-disk write strategy; see `FileWriterKind`. + explicit AzureBlobDownloader(ILogger* logger = nullptr, + FileWriterKind writer_kind = FileWriterKind::Positional); std::vector ListBlobs(const std::string& sas_uri) override; @@ -88,17 +109,25 @@ class AzureBlobDownloader : public IBlobDownloader { /// Test subclasses can override to return a constant without touching Azure. virtual int64_t GetBlobSize(ChunkContext& ctx); - /// Read `size` bytes starting at `offset` into `buffer`. The production - /// implementation pulls from the blob client referenced by `ctx`; test - /// subclasses can override to inject chunk-level failures or slow reads. + /// Read `size` bytes starting at `offset` from the blob and forward them + /// piecewise to `sink`. The production implementation pulls from the blob + /// client referenced by `ctx`; test subclasses can override to inject + /// chunk-level failures or slow reads. + /// + /// `scratch` is a per-worker reusable buffer (default 64 KB) — implementers + /// may resize it but should avoid allocating one-buffer-per-chunk. `sink` + /// must be invoked with strictly contiguous ranges; the cumulative byte + /// count delivered to `sink` must equal `size` on success. + /// /// Must throw on failure. Implementations should observe the cancellation /// flag accessible via `ctx` and exit promptly when cancellation is requested. - virtual void DownloadChunkToBuffer(ChunkContext& ctx, - int64_t offset, - int64_t size, - std::vector& buffer); + virtual void DownloadChunkStreaming(ChunkContext& ctx, + int64_t offset, + int64_t size, + std::vector& scratch, + const std::function& sink); - /// Accessor for test subclasses overriding `DownloadChunkToBuffer`. Returns + /// Accessor for test subclasses overriding `DownloadChunkStreaming`. Returns /// the shared cancellation flag — when set by the orchestrator (e.g. after /// another chunk fails), in-flight chunk simulations should observe it and /// exit promptly. Production code doesn't need this directly: cancellation @@ -107,6 +136,7 @@ class AzureBlobDownloader : public IBlobDownloader { private: ILogger* logger_ = nullptr; + FileWriterKind writer_kind_ = FileWriterKind::Positional; }; /// High-level download function: enumerate, filter, and download all blobs from a SAS URI. diff --git a/sdk_v2/cpp/src/download/file_writer.cc b/sdk_v2/cpp/src/download/file_writer.cc new file mode 100644 index 00000000..905bb6b9 --- /dev/null +++ b/sdk_v2/cpp/src/download/file_writer.cc @@ -0,0 +1,209 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "download/file_writer.h" +#include "exception.h" + +#include + +#include +#include +#include +#include + +#ifdef _WIN32 +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#else +#include +#include +#include +#include +#endif + +namespace fl { + +namespace { + +namespace fs = std::filesystem; + +/// Ensure the data file exists at exactly `expected_size`. Skips truncation +/// if the file is already at that size — the resume path relies on this. +void EnsureFileExistsAtSize(const fs::path& path, int64_t expected_size) { + std::error_code ec; + auto cur_size = fs::file_size(path, ec); + if (!ec && cur_size == static_cast(expected_size)) { + return; + } + + std::ofstream f(path, std::ios::binary); + if (!f.is_open()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "failed to open blob file for pre-allocation: " + path.string()); + } + if (expected_size > 0) { + f.seekp(expected_size - 1); + f.put('\0'); + } + f.close(); + if (f.fail()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "failed to pre-allocate blob file: " + path.string() + + " (size=" + std::to_string(expected_size) + ")"); + } +} + +#ifdef _WIN32 + +class WindowsPositionalFileWriter : public IFileWriter { + public: + ~WindowsPositionalFileWriter() override { Close(); } + + void Open(const fs::path& path, int64_t expected_size) override { + EnsureFileExistsAtSize(path, expected_size); + // FILE_SHARE_READ | FILE_SHARE_WRITE so the lock file / other tools can + // peek at the partial file without us erroring; positional WriteFile is + // safe regardless of share mode. + handle_ = ::CreateFileW(path.wstring().c_str(), GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, nullptr); + if (handle_ == INVALID_HANDLE_VALUE) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "PositionalFileWriter open failed for " + path.string() + + " (Win32 err " + std::to_string(::GetLastError()) + ")"); + } + } + + void WriteAt(int64_t offset, const uint8_t* data, size_t len) override { + // Concurrent WriteFile calls with distinct OVERLAPPED offsets on the same + // handle are safe for non-overlapping ranges; the kernel orders them. + while (len > 0) { + OVERLAPPED ov{}; + ov.Offset = static_cast(static_cast(offset) & 0xFFFFFFFFULL); + ov.OffsetHigh = static_cast((static_cast(offset) >> 32) & 0xFFFFFFFFULL); + DWORD to_write = static_cast(len > 0x7FFFFFFFu ? 0x7FFFFFFFu : len); + DWORD written = 0; + if (!::WriteFile(handle_, data, to_write, &written, &ov)) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "PositionalFileWriter write failed at offset " + std::to_string(offset) + + " (Win32 err " + std::to_string(::GetLastError()) + ")"); + } + if (written == 0) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "PositionalFileWriter short write at offset " + std::to_string(offset)); + } + offset += static_cast(written); + data += written; + len -= written; + } + } + + void Close() override { + if (handle_ != INVALID_HANDLE_VALUE) { + ::CloseHandle(handle_); + handle_ = INVALID_HANDLE_VALUE; + } + } + + private: + HANDLE handle_ = INVALID_HANDLE_VALUE; +}; + +#else // POSIX + +class PosixPositionalFileWriter : public IFileWriter { + public: + ~PosixPositionalFileWriter() override { Close(); } + + void Open(const fs::path& path, int64_t expected_size) override { + EnsureFileExistsAtSize(path, expected_size); + fd_ = ::open(path.c_str(), O_RDWR | O_CLOEXEC); + if (fd_ < 0) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "PositionalFileWriter open failed for " + path.string() + + " (errno " + std::to_string(errno) + ")"); + } + } + + void WriteAt(int64_t offset, const uint8_t* data, size_t len) override { + while (len > 0) { + ssize_t n = ::pwrite(fd_, data, len, static_cast(offset)); + if (n < 0) { + if (errno == EINTR) continue; + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "PositionalFileWriter pwrite failed at offset " + std::to_string(offset) + + " (errno " + std::to_string(errno) + ")"); + } + if (n == 0) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "PositionalFileWriter short pwrite at offset " + std::to_string(offset)); + } + offset += n; + data += n; + len -= static_cast(n); + } + } + + void Close() override { + if (fd_ >= 0) { + ::close(fd_); + fd_ = -1; + } + } + + private: + int fd_ = -1; +}; + +#endif + +class MutexFstreamFileWriter : public IFileWriter { + public: + ~MutexFstreamFileWriter() override { Close(); } + + void Open(const fs::path& path, int64_t expected_size) override { + EnsureFileExistsAtSize(path, expected_size); + file_.open(path, std::ios::binary | std::ios::in | std::ios::out); + if (!file_.is_open()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "MutexFstreamFileWriter open failed for " + path.string()); + } + } + + void WriteAt(int64_t offset, const uint8_t* data, size_t len) override { + std::lock_guard lock(mutex_); + file_.seekp(offset); + file_.write(reinterpret_cast(data), static_cast(len)); + if (file_.fail()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "MutexFstreamFileWriter write failed at offset " + std::to_string(offset)); + } + } + + void Close() override { + if (file_.is_open()) { + file_.close(); + } + } + + private: + std::fstream file_; + std::mutex mutex_; +}; + +} // namespace + +std::unique_ptr MakePositionalFileWriter() { +#ifdef _WIN32 + return std::make_unique(); +#else + return std::make_unique(); +#endif +} + +std::unique_ptr MakeMutexFstreamFileWriter() { + return std::make_unique(); +} + +} // namespace fl diff --git a/sdk_v2/cpp/src/download/file_writer.h b/sdk_v2/cpp/src/download/file_writer.h new file mode 100644 index 00000000..eacc498e --- /dev/null +++ b/sdk_v2/cpp/src/download/file_writer.h @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#pragma once + +#include +#include +#include + +namespace fl { + +/// Thread-safe positional writer for blob downloads. +/// +/// Workers in a single download claim disjoint chunks, so concurrent +/// `WriteAt` calls always target non-overlapping byte ranges. An +/// implementation may serialize internally (e.g. via a mutex) or rely on the +/// OS to allow lock-free concurrent positional writes — the contract is the +/// same either way. +class IFileWriter { + public: + virtual ~IFileWriter() = default; + + /// Make `path` exist at exactly `expected_size` bytes. If the file already + /// exists at that size, leave its contents intact (so the resume path can + /// pick up where it left off). Called once before the first `WriteAt`. + virtual void Open(const std::filesystem::path& path, int64_t expected_size) = 0; + + /// Write `len` bytes from `data` starting at byte offset `offset`. + /// Thread-safe across overlapping or disjoint ranges — concurrent calls to + /// disjoint ranges complete without coordination from the caller. + virtual void WriteAt(int64_t offset, const uint8_t* data, size_t len) = 0; + + /// Release the underlying OS handle. Implicitly called by the destructor. + virtual void Close() = 0; +}; + +/// Backed by `pwrite` (POSIX) or `WriteFile`+`OVERLAPPED` (Windows). Concurrent +/// `WriteAt` calls to disjoint ranges proceed in parallel — no internal +/// mutex. The recommended default. +std::unique_ptr MakePositionalFileWriter(); + +/// Backed by a single `std::fstream` guarded by an internal mutex. Provided +/// for comparison with `MakePositionalFileWriter` and as a portable fallback +/// if a platform's positional-write semantics ever change. +std::unique_ptr MakeMutexFstreamFileWriter(); + +} // namespace fl diff --git a/sdk_v2/cpp/test/CMakeLists.txt b/sdk_v2/cpp/test/CMakeLists.txt index 2070fe03..fb4aa165 100644 --- a/sdk_v2/cpp/test/CMakeLists.txt +++ b/sdk_v2/cpp/test/CMakeLists.txt @@ -30,6 +30,7 @@ add_executable(foundry_local_tests internal_api/exception_test.cc internal_api/execution_provider_test.cc internal_api/file_uri_test.cc + internal_api/file_writer_test.cc internal_api/genai_config_test.cc internal_api/http_retry_test.cc internal_api/item_test.cc diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 91aa9a01..4a6a7586 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1398,7 +1398,7 @@ TEST(DownloadManagerTest, AcceptsNormalModelIdAndPublisher) { // ======================================================================== // AzureBlobDownloader resume + cancel-cascade tests -// Use a subclass that overrides the protected GetBlobSize / DownloadChunkToBuffer +// Use a subclass that overrides the protected GetBlobSize / DownloadChunkStreaming // virtuals to bypass the real Azure SDK and simulate per-chunk behavior. // ======================================================================== @@ -1410,11 +1410,14 @@ class FakeChunkAzureDownloader : public AzureBlobDownloader { public: int64_t blob_size = 0; - /// Per-call hook. Receives the chunk offset and the size. Allowed to: - /// - mutate `buffer` (must end up at `size` bytes) - /// - throw to simulate a transient failure + /// Per-call hook. Receives the chunk offset and size plus a `sink` callback + /// that forwards bytes to the file writer. Allowed to: + /// - call `sink` zero or more times with strictly contiguous, cumulative + /// `size`-byte ranges to simulate a successful chunk + /// - throw to simulate a transient failure (sink calls so far still hit disk) /// - sleep / poll cancellation - std::function& buffer, + std::function& sink, std::atomic* cancel_flag)> chunk_hook; @@ -1427,19 +1430,31 @@ class FakeChunkAzureDownloader : public AzureBlobDownloader { protected: int64_t GetBlobSize(ChunkContext& /*ctx*/) override { return blob_size; } - void DownloadChunkToBuffer(ChunkContext& ctx, int64_t offset, int64_t size, - std::vector& buffer) override { + void DownloadChunkStreaming(ChunkContext& ctx, int64_t offset, int64_t size, + std::vector& scratch, + const std::function& sink) override { chunk_call_count.fetch_add(1); { std::lock_guard lock(offsets_mutex); requested_offsets.push_back(offset); } if (chunk_hook) { - chunk_hook(offset, size, buffer, GetCancelFlag(ctx)); + chunk_hook(offset, size, sink, GetCancelFlag(ctx)); return; } - // Default: fill with the low byte of the offset for verification. - buffer.assign(static_cast(size), static_cast(offset & 0xFF)); + // Default: stream the chunk to the sink in scratch-sized pieces, filled + // with the low byte of the offset for verification. + if (scratch.size() < 64 * 1024) { + scratch.resize(64 * 1024); + } + int64_t remaining = size; + while (remaining > 0) { + size_t to_emit = + static_cast(std::min(remaining, static_cast(scratch.size()))); + std::fill_n(scratch.begin(), to_emit, static_cast(offset & 0xFF)); + sink(scratch.data(), to_emit); + remaining -= static_cast(to_emit); + } } }; @@ -1517,12 +1532,14 @@ TEST(AzureBlobDownloaderResumeTest, PersistsSidecarOnChunkFailure) { // Fail when we see the offset of chunk 4 (specifically chosen so several // chunks land before the failing one across threads). constexpr int64_t kFailOffset = 4 * int64_t{kChunkSize}; - d.chunk_hook = [&](int64_t offset, int64_t size, std::vector& buffer, + d.chunk_hook = [&](int64_t offset, int64_t size, + const std::function& sink, std::atomic* /*cancel_flag*/) { if (offset == kFailOffset) { FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "simulated chunk failure"); } - buffer.assign(static_cast(size), static_cast(offset & 0xFF)); + std::vector buf(static_cast(size), static_cast(offset & 0xFF)); + sink(buf.data(), buf.size()); }; EXPECT_THROW( @@ -1575,8 +1592,9 @@ TEST(AzureBlobDownloaderResumeTest, ChunkFailureCancelsInFlightPeersFast) { // The failing chunk throws fast. Every other chunk sleeps for up to 5 s in // 50-ms slices, polling the cancel flag. If linked cancellation works, they // observe the flag within one slice of the failure and exit promptly. - d.chunk_hook = [kFailOffset](int64_t offset, int64_t size, std::vector& buffer, - std::atomic* cancel_flag) { + d.chunk_hook = [kFailOffset](int64_t offset, int64_t size, + const std::function& sink, + std::atomic* cancel_flag) { if (offset == kFailOffset) { // Give other workers a moment to enter their sleep loop before we throw, // so we're meaningfully testing the cancel-while-in-flight path. @@ -1589,7 +1607,8 @@ TEST(AzureBlobDownloaderResumeTest, ChunkFailureCancelsInFlightPeersFast) { } std::this_thread::sleep_for(std::chrono::milliseconds(50)); } - buffer.assign(static_cast(size), 0); + std::vector buf(static_cast(size), 0); + sink(buf.data(), buf.size()); }; auto start = std::chrono::steady_clock::now(); diff --git a/sdk_v2/cpp/test/internal_api/file_writer_test.cc b/sdk_v2/cpp/test/internal_api/file_writer_test.cc new file mode 100644 index 00000000..84134d68 --- /dev/null +++ b/sdk_v2/cpp/test/internal_api/file_writer_test.cc @@ -0,0 +1,272 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// Tests for the IFileWriter abstraction backing AzureBlobDownloader's chunked +// writes. Exercises both implementations (Positional / MutexFstream) through a +// parametrized fixture so every correctness assertion runs against both. +// +// The "PerfComparison" test prints wall-clock numbers for a representative +// download workload (32 threads, 64-way chunked streaming into a 256 MB file) +// so we can eyeball lock contention deltas without adding a separate +// microbenchmark binary. It is informational — its only EXPECT is that both +// runs complete and the file ends up at the right size. + +#include "download/file_writer.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; +using namespace fl; + +namespace { + +class TempPath { + public: + TempPath() { + auto base = fs::temp_directory_path(); + std::random_device rd; + std::uniform_int_distribution dist; + path_ = base / ("file_writer_test_" + std::to_string(dist(rd)) + ".bin"); + } + ~TempPath() { + std::error_code ec; + fs::remove(path_, ec); + } + const fs::path& path() const { return path_; } + + private: + fs::path path_; +}; + +std::unique_ptr MakeWriter(const std::string& kind) { + if (kind == "Positional") return MakePositionalFileWriter(); + if (kind == "MutexFstream") return MakeMutexFstreamFileWriter(); + ADD_FAILURE() << "unknown writer kind " << kind; + return nullptr; +} + +class FileWriterTest : public ::testing::TestWithParam {}; + +} // namespace + +TEST_P(FileWriterTest, OpenCreatesFileAtRequestedSize) { + TempPath p; + auto w = MakeWriter(GetParam()); + ASSERT_NE(w, nullptr); + w->Open(p.path(), 4096); + w->Close(); + EXPECT_TRUE(fs::exists(p.path())); + EXPECT_EQ(fs::file_size(p.path()), 4096u); +} + +TEST_P(FileWriterTest, OpenPreservesExistingFileAtSameSize) { + TempPath p; + // Pre-write a sentinel byte the writer must NOT overwrite. + { + std::ofstream f(p.path(), std::ios::binary); + f.seekp(1023); + f.put('\0'); + } + // Plant a known byte at offset 100. + { + std::fstream f(p.path(), std::ios::binary | std::ios::in | std::ios::out); + f.seekp(100); + f.put(static_cast(0xAB)); + } + + auto w = MakeWriter(GetParam()); + ASSERT_NE(w, nullptr); + w->Open(p.path(), 1024); // same size -> must not truncate + w->Close(); + + // Sentinel byte should still be there. + std::ifstream f(p.path(), std::ios::binary); + f.seekg(100); + int byte = f.get(); + EXPECT_EQ(byte, 0xAB); +} + +TEST_P(FileWriterTest, OpenTruncatesIfSizeChanged) { + TempPath p; + { + std::ofstream f(p.path(), std::ios::binary); + f.seekp(100); + f.put(static_cast(0xCD)); + } + EXPECT_EQ(fs::file_size(p.path()), 101u); + + auto w = MakeWriter(GetParam()); + ASSERT_NE(w, nullptr); + w->Open(p.path(), 4096); + w->Close(); + EXPECT_EQ(fs::file_size(p.path()), 4096u); +} + +TEST_P(FileWriterTest, SingleThreadWriteAt) { + TempPath p; + auto w = MakeWriter(GetParam()); + ASSERT_NE(w, nullptr); + w->Open(p.path(), 1024); + + std::vector data(256, 0xEF); + w->WriteAt(512, data.data(), data.size()); + w->Close(); + + std::ifstream f(p.path(), std::ios::binary); + std::vector contents((std::istreambuf_iterator(f)), + std::istreambuf_iterator()); + ASSERT_EQ(contents.size(), 1024u); + for (size_t i = 512; i < 768; ++i) { + EXPECT_EQ(contents[i], 0xEF) << "byte " << i; + } +} + +TEST_P(FileWriterTest, ConcurrentDisjointWritesProduceCorrectFile) { + TempPath p; + constexpr int kThreads = 8; + constexpr int kRegionSize = 256 * 1024; // 256 KB per thread + constexpr int kPieceSize = 16 * 1024; // 16 KB per WriteAt + constexpr int64_t kTotalSize = int64_t{kThreads} * kRegionSize; + static_assert(kRegionSize % kPieceSize == 0, ""); + + auto w = MakeWriter(GetParam()); + ASSERT_NE(w, nullptr); + w->Open(p.path(), kTotalSize); + + std::atomic started{0}; + std::vector workers; + workers.reserve(kThreads); + for (int t = 0; t < kThreads; ++t) { + workers.emplace_back([&, t]() { + std::vector piece(kPieceSize, static_cast(t + 1)); + started.fetch_add(1); + while (started.load() < kThreads) { + // tiny spin to encourage concurrent dispatch + } + const int64_t base = int64_t{t} * kRegionSize; + for (int i = 0; i < kRegionSize / kPieceSize; ++i) { + w->WriteAt(base + int64_t{i} * kPieceSize, piece.data(), piece.size()); + } + }); + } + for (auto& th : workers) th.join(); + w->Close(); + + std::ifstream f(p.path(), std::ios::binary); + std::vector contents((std::istreambuf_iterator(f)), + std::istreambuf_iterator()); + ASSERT_EQ(contents.size(), static_cast(kTotalSize)); + for (int t = 0; t < kThreads; ++t) { + const uint8_t expected = static_cast(t + 1); + for (int64_t i = 0; i < kRegionSize; ++i) { + const auto idx = static_cast(int64_t{t} * kRegionSize + i); + if (contents[idx] != expected) { + FAIL() << "mismatch at offset " << idx << " (thread " << t << ", expected " + << static_cast(expected) << ", got " << static_cast(contents[idx]) << ")"; + } + } + } +} + +INSTANTIATE_TEST_SUITE_P(WriterImpls, FileWriterTest, + ::testing::Values("Positional", "MutexFstream"), + [](const ::testing::TestParamInfo& info) { + return info.param; + }); + +// --------------------------------------------------------------------------- +// Perf comparison: print wall-clock for both writer kinds against a workload +// that mirrors AzureBlobDownloader (32 workers each streaming 8 chunks of 2 MB +// in 64 KB sink pieces). Run direct: +// foundry_local_tests --gtest_filter=FileWriterPerfComparison.* +// --------------------------------------------------------------------------- + +namespace { + +struct PerfResult { + std::string kind; + int64_t elapsed_ms; + double mb_per_sec; +}; + +PerfResult RunChunkedWorkload(const std::string& kind) { + constexpr int kThreads = 32; + constexpr int kChunksPerThread = 8; + constexpr int kChunkSize = 2 * 1024 * 1024; // 2 MB chunk like the downloader + constexpr int kPieceSize = 64 * 1024; // 64 KB scratch like the downloader + constexpr int64_t kTotalSize = int64_t{kThreads} * kChunksPerThread * kChunkSize; + static_assert(kChunkSize % kPieceSize == 0, ""); + + TempPath p; + auto w = MakeWriter(kind); + if (!w) { + ADD_FAILURE() << "MakeWriter returned null for " << kind; + return {kind, 0, 0.0}; + } + w->Open(p.path(), kTotalSize); + + std::atomic next_chunk{0}; + const int total_chunks = kThreads * kChunksPerThread; + + auto start = std::chrono::steady_clock::now(); + std::vector workers; + workers.reserve(kThreads); + for (int t = 0; t < kThreads; ++t) { + workers.emplace_back([&, t]() { + std::vector scratch(kPieceSize, static_cast(t & 0xFF)); + while (true) { + int i = next_chunk.fetch_add(1, std::memory_order_relaxed); + if (i >= total_chunks) return; + const int64_t chunk_off = int64_t{i} * kChunkSize; + for (int pos = 0; pos < kChunkSize; pos += kPieceSize) { + w->WriteAt(chunk_off + pos, scratch.data(), kPieceSize); + } + } + }); + } + for (auto& th : workers) th.join(); + w->Close(); + auto elapsed = std::chrono::steady_clock::now() - start; + auto ms = std::chrono::duration_cast(elapsed).count(); + + EXPECT_EQ(fs::file_size(p.path()), static_cast(kTotalSize)); + + double mb_per_sec = + static_cast(kTotalSize) / (1024.0 * 1024.0) / (static_cast(ms) / 1000.0); + return {kind, ms, mb_per_sec}; +} + +} // namespace + +TEST(FileWriterPerfComparison, PositionalVsMutexFstream) { + std::vector results; + results.push_back(RunChunkedWorkload("Positional")); + results.push_back(RunChunkedWorkload("MutexFstream")); + + std::cout << "\n=== IFileWriter perf comparison ===\n"; + std::cout << "Workload: 32 workers, 8 chunks/worker, 2 MB chunks, 64 KB sink pieces (512 MB total)\n"; + for (const auto& r : results) { + std::cout << " " << r.kind << ": " << r.elapsed_ms << " ms (" + << static_cast(r.mb_per_sec) << " MB/s)\n"; + } + std::cout << "===================================\n" << std::endl; + + // Sanity: both should make positive progress; perf is informational. + for (const auto& r : results) { + EXPECT_GT(r.mb_per_sec, 0.0) << r.kind; + } +} From aa5d30b57b8d9afe414ed8643d3bde79ad6bed0c Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Wed, 10 Jun 2026 04:25:45 -0500 Subject: [PATCH 04/36] Fix Linux/macOS -Werror build break + MutexFstream sticky-failbit Two unrelated -Werror diagnostics from Clang (and modern GCC) were tripping the Linux x64 and macOS ARM64 jobs on PR #793; Windows + MSVC silently accepted them. 1. blob_download_state.cc: 'kHeaderSize' was a namespace-scope constexpr that nothing referenced (the header layout is materialized by the WriteLE call sequence, not this constant). Triggers -Wunused-const-variable on Clang. Delete it; the layout comment above already documents the 45-byte size. 2. download_test.cc: ChunkFailureCancelsInFlightPeersFast captured 'kFailOffset' in a lambda, but it's a constexpr int64_t used only in a constant expression so the capture is redundant and -Wunused-lambda-capture flags it. Replace [kFailOffset] with [] to match the sister test's pattern. Also fix a latent issue surfaced during review: 3. MutexFstreamFileWriter::WriteAt now calls file_.clear() before seekp() so a prior failure doesn't permanently poison the stream and cause subsequent workers to surface a spurious 'write failed' instead of the original error. Positional writers are unaffected (pwrite/WriteFile are stateless). 71 tests still pass on Windows (35 in the affected suites verified explicitly). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_download_state.cc | 1 - sdk_v2/cpp/src/download/file_writer.cc | 3 +++ sdk_v2/cpp/test/internal_api/download_test.cc | 6 +++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc index d1d97baf..1bf056cb 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.cc +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -32,7 +32,6 @@ constexpr const char* kStateFileExtension = ".dlstate"; // bitmap_byte_aligned_start. constexpr char kMagic[4] = {'F', 'L', 'D', 'S'}; constexpr uint8_t kVersion = 1; -constexpr size_t kHeaderSize = 45; constexpr int32_t kBitsPerWord = 64; diff --git a/sdk_v2/cpp/src/download/file_writer.cc b/sdk_v2/cpp/src/download/file_writer.cc index 905bb6b9..cfa37578 100644 --- a/sdk_v2/cpp/src/download/file_writer.cc +++ b/sdk_v2/cpp/src/download/file_writer.cc @@ -173,6 +173,9 @@ class MutexFstreamFileWriter : public IFileWriter { void WriteAt(int64_t offset, const uint8_t* data, size_t len) override { std::lock_guard lock(mutex_); + // Clear any sticky failbit from a prior call so this write's diagnostic + // reflects what actually went wrong here, not a stale earlier failure. + file_.clear(); file_.seekp(offset); file_.write(reinterpret_cast(data), static_cast(len)); if (file_.fail()) { diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 4a6a7586..fd81f8bb 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1592,9 +1592,9 @@ TEST(AzureBlobDownloaderResumeTest, ChunkFailureCancelsInFlightPeersFast) { // The failing chunk throws fast. Every other chunk sleeps for up to 5 s in // 50-ms slices, polling the cancel flag. If linked cancellation works, they // observe the flag within one slice of the failure and exit promptly. - d.chunk_hook = [kFailOffset](int64_t offset, int64_t size, - const std::function& sink, - std::atomic* cancel_flag) { + d.chunk_hook = [](int64_t offset, int64_t size, + const std::function& sink, + std::atomic* cancel_flag) { if (offset == kFailOffset) { // Give other workers a moment to enter their sleep loop before we throw, // so we're meaningfully testing the cancel-while-in-flight path. From 036f7a6ba003a2adf51642cc484ff29163e1d011 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Wed, 10 Jun 2026 09:20:47 -0500 Subject: [PATCH 05/36] Preserve resume progress on transient stat / rename failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two correctness gaps surfaced during PR review of the new resumable-download machinery; both bias toward silently destroying intact on-disk progress when a transient filesystem error happens. EnsureFileExistsAtSize (file_writer.cc): the previous implementation treated any std::filesystem::file_size() error as 'file does not exist' and fell through to opening an std::ofstream — which has implicit ios::trunc — over the path. A permission glitch, NFS stat hiccup, or virus-scanner-induced EBUSY on a file that *did* exist at the right size would wipe the partial download and force a restart from chunk 0. Now: only the no_such_file_or_directory case proceeds to (re)create; any other stat error throws so the resume bitmap on disk is preserved and the caller can retry. SaveState (blob_download_state.cc): the rename-failed fallback used to do remove(state_path) + rename(tmp_path, state_path). If the second rename also failed (sharing violation, EXDEV, etc.) we had already deleted the old sidecar — leaving nothing on disk and forcing a from-scratch restart on the next run. std::filesystem::rename atomically replaces on every platform we target (POSIX rename(2); Windows MoveFileExW REPLACE_EXISTING), so that fallback was both unnecessary and destructive. Now: on rename failure, just remove the tmp file and log a warning; the previous state_path is left intact and the next SaveState call retries with the up-to-date in-memory bitmap. All 61 download-related tests still pass on Windows. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cpp/src/download/blob_download_state.cc | 19 +++++++++++++------ sdk_v2/cpp/src/download/file_writer.cc | 14 ++++++++++++-- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc index 1bf056cb..1cf8ae9b 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.cc +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -349,13 +349,20 @@ void BlobDownloadState::SaveState(ILogger* logger) { std::error_code ec; std::filesystem::rename(tmp_path, state_path, ec); if (ec) { - // Try remove-then-rename for filesystems that don't replace atomically. - std::filesystem::remove(state_path, ec); - std::filesystem::rename(tmp_path, state_path, ec); - if (ec && logger) { + // std::filesystem::rename atomically replaces the destination on every + // platform we target (POSIX rename(2); Windows MoveFileExW with + // MOVEFILE_REPLACE_EXISTING). If it still fails, the cause is transient + // (e.g. a brief sharing violation on Windows or a flaky network FS) — + // do NOT delete state_path as a fallback; that loses the only intact + // copy of the resume bitmap. Instead, drop the tmp file and let the + // next SaveState call retry from the up-to-date in-memory state. + std::error_code rm_ec; + std::filesystem::remove(tmp_path, rm_ec); + if (logger) { logger->Log(LogLevel::Warning, - "Failed to rename download state file: " + tmp_path.string() + " -> " + - state_path.string() + " (" + ec.message() + ")"); + "Failed to commit download state file: " + tmp_path.string() + " -> " + + state_path.string() + " (" + ec.message() + + "); previous state retained, will retry on next save"); } } } diff --git a/sdk_v2/cpp/src/download/file_writer.cc b/sdk_v2/cpp/src/download/file_writer.cc index cfa37578..a1936a62 100644 --- a/sdk_v2/cpp/src/download/file_writer.cc +++ b/sdk_v2/cpp/src/download/file_writer.cc @@ -33,8 +33,18 @@ namespace fs = std::filesystem; void EnsureFileExistsAtSize(const fs::path& path, int64_t expected_size) { std::error_code ec; auto cur_size = fs::file_size(path, ec); - if (!ec && cur_size == static_cast(expected_size)) { - return; + if (!ec) { + if (cur_size == static_cast(expected_size)) { + return; + } + // File exists but is the wrong size — fall through to recreate. + } else if (ec != std::errc::no_such_file_or_directory) { + // Some other stat error (permission, transient NFS hiccup, AV scanner + // holding a handle, etc.). Don't blow away a potentially-intact file + // just because we couldn't read its size; surface the error instead so + // the caller can retry and the existing on-disk progress is preserved. + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "failed to stat blob file: " + path.string() + " (" + ec.message() + ")"); } std::ofstream f(path, std::ios::binary); From a0b944bb38535bf350d8e8a74f93bd2a3bc087fb Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Wed, 10 Jun 2026 09:43:46 -0500 Subject: [PATCH 06/36] CrossProcessFileLock: drop self-cleanup; persist lock file across releases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unlink-on-release behavior (POSIX explicit unlink, Windows FILE_FLAG_DELETE_ON_CLOSE) mirrored what the C# reference does but inherited the same theoretical race: between unlink and close on POSIX, another acquirer can O_CREAT a fresh inode at the path and flock that, leaving two processes briefly believing they hold the lock on different inodes. In our download protocol the race is benign because every acquirer immediately re-checks 'is the model already downloaded' under the new lock and returns a no-op — but the cleaner answer is to never open the window in the first place. Persist the .download.lock file across acquisitions: - POSIX State: drop the 'path' field; destructor just close()s. - Windows: drop FILE_FLAG_DELETE_ON_CLOSE; OPEN_ALWAYS opens the existing inode on re-acquire, and dwShareMode=0 still enforces exclusivity. Re-acquirers reopen the same inode — there is no path-to-inode race window anywhere in the lifecycle. The file is a few bytes of debug payload and lives alongside the model artifacts; no user-visible impact. Test update: ReleaseOnDestructionRemovesLockFile is replaced by ReleaseLeavesLockFileForReuse, which asserts both that the file persists and that a fresh TryAcquireForDirectory against the same directory succeeds. All 61 download-related tests pass on Windows. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/download/cross_process_file_lock.cc | 22 +++++++++---------- .../cross_process_file_lock_test.cc | 15 +++++++++---- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.cc b/sdk_v2/cpp/src/download/cross_process_file_lock.cc index 33eeb215..81484fe7 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.cc +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.cc @@ -53,14 +53,17 @@ std::string FormatProcessInfo() { } // namespace -// Platform-specific resource handle. The destructor here is the only thing -// that releases the lock; CrossProcessFileLock's destructor is defaulted. +// Platform-specific resource handle. Closing the handle releases the lock; +// CrossProcessFileLock's destructor is defaulted. The lock file itself is +// intentionally left on disk — re-acquirers simply re-open the existing +// inode rather than racing to create a fresh one (eliminating the small +// inode-mismatch window between unlink and close on POSIX, and matching it +// on Windows by dropping FILE_FLAG_DELETE_ON_CLOSE). #ifdef _WIN32 struct CrossProcessFileLock::State { HANDLE handle; ~State() { if (handle != INVALID_HANDLE_VALUE) { - // FILE_FLAG_DELETE_ON_CLOSE removes the file when the last handle closes. CloseHandle(handle); } } @@ -68,12 +71,8 @@ struct CrossProcessFileLock::State { #else struct CrossProcessFileLock::State { int fd; - std::filesystem::path path; ~State() { if (fd >= 0) { - // Unlink before close so the file disappears at the same instant the - // lock releases; a concurrent acquirer simply recreates it. - ::unlink(path.c_str()); ::close(fd); } } @@ -105,15 +104,16 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto #ifdef _WIN32 // dwShareMode=0 blocks any other open (cross- and in-process) until this - // handle closes. FILE_FLAG_DELETE_ON_CLOSE pairs OPEN_ALWAYS into a - // self-cleaning lock that doesn't require unlink-then-close races. + // handle closes. The lock file persists after release; subsequent acquirers + // just re-open the same inode and the next dwShareMode=0 open is what + // enforces exclusivity, no race possible. auto wide = lock_path.wstring(); HANDLE handle = CreateFileW(wide.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, OPEN_ALWAYS, - FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE, + FILE_ATTRIBUTE_NORMAL, nullptr); if (handle == INVALID_HANDLE_VALUE) { DWORD err = GetLastError(); @@ -153,7 +153,7 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto auto info = FormatProcessInfo(); (void)::write(fd, info.data(), info.size()); - state = std::unique_ptr(new State{fd, lock_path}); + state = std::unique_ptr(new State{fd}); #endif if (logger) { diff --git a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc index a6e38fdf..322125cb 100644 --- a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc +++ b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc @@ -58,7 +58,7 @@ TEST(CrossProcessFileLockTest, TryAcquireSucceedsForFreshDirectory) { EXPECT_EQ(lock->path().filename(), ".download.lock"); } -TEST(CrossProcessFileLockTest, ReleaseOnDestructionRemovesLockFile) { +TEST(CrossProcessFileLockTest, ReleaseLeavesLockFileForReuse) { TempDir dir; fs::path lock_file; @@ -69,9 +69,16 @@ TEST(CrossProcessFileLockTest, ReleaseOnDestructionRemovesLockFile) { EXPECT_TRUE(fs::exists(lock_file)); } - // After RAII release the lock file should be gone (Win FILE_FLAG_DELETE_ON_CLOSE, - // POSIX explicit unlink in destructor). - EXPECT_FALSE(fs::exists(lock_file)); + // The lock file intentionally persists after release: re-acquirers re-open + // the same inode rather than racing to create a fresh one, which avoids the + // unlink-then-close inode-mismatch window inherent to POSIX flock semantics. + EXPECT_TRUE(fs::exists(lock_file)); + + // A second TryAcquire on the same directory must still succeed against the + // now-unlocked persistent lock file. + auto reacquired = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + ASSERT_NE(reacquired, nullptr); + EXPECT_EQ(reacquired->path(), lock_file); } TEST(CrossProcessFileLockTest, SecondAcquireReturnsNullWhileFirstIsHeld) { From d7a3072db05a365aa25040e79d5543f014d3d656 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Wed, 10 Jun 2026 10:16:14 -0500 Subject: [PATCH 07/36] Revert "CrossProcessFileLock: drop self-cleanup; persist lock file across releases" This reverts commit be3c391729ae05da2088dbf8c2a5e696fe7bceed. --- .../src/download/cross_process_file_lock.cc | 22 +++++++++---------- .../cross_process_file_lock_test.cc | 15 ++++--------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.cc b/sdk_v2/cpp/src/download/cross_process_file_lock.cc index 81484fe7..33eeb215 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.cc +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.cc @@ -53,17 +53,14 @@ std::string FormatProcessInfo() { } // namespace -// Platform-specific resource handle. Closing the handle releases the lock; -// CrossProcessFileLock's destructor is defaulted. The lock file itself is -// intentionally left on disk — re-acquirers simply re-open the existing -// inode rather than racing to create a fresh one (eliminating the small -// inode-mismatch window between unlink and close on POSIX, and matching it -// on Windows by dropping FILE_FLAG_DELETE_ON_CLOSE). +// Platform-specific resource handle. The destructor here is the only thing +// that releases the lock; CrossProcessFileLock's destructor is defaulted. #ifdef _WIN32 struct CrossProcessFileLock::State { HANDLE handle; ~State() { if (handle != INVALID_HANDLE_VALUE) { + // FILE_FLAG_DELETE_ON_CLOSE removes the file when the last handle closes. CloseHandle(handle); } } @@ -71,8 +68,12 @@ struct CrossProcessFileLock::State { #else struct CrossProcessFileLock::State { int fd; + std::filesystem::path path; ~State() { if (fd >= 0) { + // Unlink before close so the file disappears at the same instant the + // lock releases; a concurrent acquirer simply recreates it. + ::unlink(path.c_str()); ::close(fd); } } @@ -104,16 +105,15 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto #ifdef _WIN32 // dwShareMode=0 blocks any other open (cross- and in-process) until this - // handle closes. The lock file persists after release; subsequent acquirers - // just re-open the same inode and the next dwShareMode=0 open is what - // enforces exclusivity, no race possible. + // handle closes. FILE_FLAG_DELETE_ON_CLOSE pairs OPEN_ALWAYS into a + // self-cleaning lock that doesn't require unlink-then-close races. auto wide = lock_path.wstring(); HANDLE handle = CreateFileW(wide.c_str(), GENERIC_READ | GENERIC_WRITE, 0, nullptr, OPEN_ALWAYS, - FILE_ATTRIBUTE_NORMAL, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE, nullptr); if (handle == INVALID_HANDLE_VALUE) { DWORD err = GetLastError(); @@ -153,7 +153,7 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto auto info = FormatProcessInfo(); (void)::write(fd, info.data(), info.size()); - state = std::unique_ptr(new State{fd}); + state = std::unique_ptr(new State{fd, lock_path}); #endif if (logger) { diff --git a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc index 322125cb..a6e38fdf 100644 --- a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc +++ b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc @@ -58,7 +58,7 @@ TEST(CrossProcessFileLockTest, TryAcquireSucceedsForFreshDirectory) { EXPECT_EQ(lock->path().filename(), ".download.lock"); } -TEST(CrossProcessFileLockTest, ReleaseLeavesLockFileForReuse) { +TEST(CrossProcessFileLockTest, ReleaseOnDestructionRemovesLockFile) { TempDir dir; fs::path lock_file; @@ -69,16 +69,9 @@ TEST(CrossProcessFileLockTest, ReleaseLeavesLockFileForReuse) { EXPECT_TRUE(fs::exists(lock_file)); } - // The lock file intentionally persists after release: re-acquirers re-open - // the same inode rather than racing to create a fresh one, which avoids the - // unlink-then-close inode-mismatch window inherent to POSIX flock semantics. - EXPECT_TRUE(fs::exists(lock_file)); - - // A second TryAcquire on the same directory must still succeed against the - // now-unlocked persistent lock file. - auto reacquired = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); - ASSERT_NE(reacquired, nullptr); - EXPECT_EQ(reacquired->path(), lock_file); + // After RAII release the lock file should be gone (Win FILE_FLAG_DELETE_ON_CLOSE, + // POSIX explicit unlink in destructor). + EXPECT_FALSE(fs::exists(lock_file)); } TEST(CrossProcessFileLockTest, SecondAcquireReturnsNullWhileFirstIsHeld) { From 38b14ad43fd68c36037889d05714fca3c279e650 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Wed, 17 Jun 2026 15:01:43 -0500 Subject: [PATCH 08/36] refactor(download): collapse file writer to a single concrete FileWriter The IFileWriter interface + FileWriterKind strategy enum + MutexFstream second implementation + runtime selection were speculative generality: the only real variation (Windows vs POSIX) is already a compile-time #ifdef, and nothing in production ever selected MutexFstream (download_manager always used the default Positional; MutexFstream existed only for a unit-test parameterization and a thresholdless perf benchmark). Replace it with a single concrete FileWriter (Open/WriteAt/Close), backed by pwrite (POSIX) / WriteFile+OVERLAPPED (Windows) via #ifdef. The Windows HANDLE is stored as void* so stays out of the header. AzureBlobDownloader now stack-allocates the writer; the FileWriterKind enum, constructor parameter, and selection branch are gone. Tests: drop the MutexFstream parameterization (TEST_P -> TEST) and the PositionalVsMutexFstream benchmark; the same correctness assertions now run once against FileWriter. The Azure test seam (GetBlobSize/DownloadChunkStreaming) and real-temp-file writes are unchanged. Verified (RelWithDebInfo): build clean; FileWriterTest (5), DownloadManagerTest (17), CrossProcessFileLockTest (9), BlobDownloadStateTest (15) all pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 17 +- sdk_v2/cpp/src/download/blob_downloader.h | 18 +- sdk_v2/cpp/src/download/file_writer.cc | 217 +++++++----------- sdk_v2/cpp/src/download/file_writer.h | 53 ++--- .../cpp/test/internal_api/file_writer_test.cc | 164 +++---------- 5 files changed, 139 insertions(+), 330 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index a0ff4297..23341dbb 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -48,8 +48,7 @@ struct AzureBlobDownloader::ChunkContext { std::atomic* cancel_flag; }; -AzureBlobDownloader::AzureBlobDownloader(ILogger* logger, FileWriterKind writer_kind) - : logger_(logger), writer_kind_(writer_kind) {} +AzureBlobDownloader::AzureBlobDownloader(ILogger* logger) : logger_(logger) {} std::vector AzureBlobDownloader::ListBlobs(const std::string& sas_uri) { try { @@ -197,12 +196,10 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // Open the file writer once for the whole download. Open() pre-allocates // the file to blob_size if needed, preserving any existing bytes from a - // resume. Concurrent WriteAt calls to disjoint ranges are thread-safe - // (lock-free for Positional, mutex-guarded for MutexFstream). - std::unique_ptr writer = (writer_kind_ == FileWriterKind::MutexFstream) - ? MakeMutexFstreamFileWriter() - : MakePositionalFileWriter(); - writer->Open(local_path, blob_size); + // resume. Concurrent WriteAt calls to disjoint ranges are thread-safe — the + // OS arbitrates positional writes to non-overlapping ranges. + FileWriter writer; + writer.Open(local_path, blob_size); // Save the sidecar roughly every 2% of chunks, with a floor of 10. const int32_t save_interval = std::max(10, num_chunks / 50); @@ -255,7 +252,7 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // needed across concurrent workers; we don't take a mutex here. int64_t written = 0; auto sink = [&](const uint8_t* data, size_t len) { - writer->WriteAt(offset + written, data, len); + writer.WriteAt(offset + written, data, len); written += static_cast(len); }; @@ -314,7 +311,7 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // Release the OS handle before persisting / deleting the sidecar so any // observer that watches the data file sees a fully-closed handle. - writer->Close(); + writer.Close(); if (first_error || (cancelled && cancelled->load(std::memory_order_relaxed))) { // Persist what we have so the next attempt resumes from here. diff --git a/sdk_v2/cpp/src/download/blob_downloader.h b/sdk_v2/cpp/src/download/blob_downloader.h index 4fc7412f..7e54d8bf 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.h +++ b/sdk_v2/cpp/src/download/blob_downloader.h @@ -58,19 +58,6 @@ class IBlobDownloader { std::atomic* cancelled = nullptr) = 0; }; -/// Strategy for writing downloaded blob chunks to the local file. Both -/// strategies are thread-safe across concurrent calls to disjoint ranges. -/// -/// - `Positional`: lock-free `pwrite` / `WriteFile`+`OVERLAPPED`. Default and -/// recommended; lets the OS arbitrate concurrent writes to disjoint ranges -/// instead of taking a user-space mutex. -/// - `MutexFstream`: single shared `std::fstream` guarded by an internal -/// mutex. Provided for benchmarking and as a portable fallback. -enum class FileWriterKind { - Positional, - MutexFstream, -}; - /// Azure Storage Blobs SDK-based implementation of IBlobDownloader. /// /// Implements resumable downloads: a `.dlstate` sidecar tracks which 2 MB @@ -86,9 +73,7 @@ enum class FileWriterKind { class AzureBlobDownloader : public IBlobDownloader { public: /// `logger` is used for diagnostics only (state file save/load events). May be null. - /// `writer_kind` chooses the on-disk write strategy; see `FileWriterKind`. - explicit AzureBlobDownloader(ILogger* logger = nullptr, - FileWriterKind writer_kind = FileWriterKind::Positional); + explicit AzureBlobDownloader(ILogger* logger = nullptr); std::vector ListBlobs(const std::string& sas_uri) override; @@ -136,7 +121,6 @@ class AzureBlobDownloader : public IBlobDownloader { private: ILogger* logger_ = nullptr; - FileWriterKind writer_kind_ = FileWriterKind::Positional; }; /// High-level download function: enumerate, filter, and download all blobs from a SAS URI. diff --git a/sdk_v2/cpp/src/download/file_writer.cc b/sdk_v2/cpp/src/download/file_writer.cc index a1936a62..46cc1716 100644 --- a/sdk_v2/cpp/src/download/file_writer.cc +++ b/sdk_v2/cpp/src/download/file_writer.cc @@ -6,7 +6,6 @@ #include #include -#include #include #include @@ -24,12 +23,12 @@ namespace fl { -namespace { - namespace fs = std::filesystem; -/// Ensure the data file exists at exactly `expected_size`. Skips truncation -/// if the file is already at that size — the resume path relies on this. +namespace { + +/// Ensure the data file exists at exactly `expected_size`. Skips truncation if +/// the file is already at that size — the resume path relies on this. void EnsureFileExistsAtSize(const fs::path& path, int64_t expected_size) { std::error_code ec; auto cur_size = fs::file_size(path, ec); @@ -40,9 +39,9 @@ void EnsureFileExistsAtSize(const fs::path& path, int64_t expected_size) { // File exists but is the wrong size — fall through to recreate. } else if (ec != std::errc::no_such_file_or_directory) { // Some other stat error (permission, transient NFS hiccup, AV scanner - // holding a handle, etc.). Don't blow away a potentially-intact file - // just because we couldn't read its size; surface the error instead so - // the caller can retry and the existing on-disk progress is preserved. + // holding a handle, etc.). Don't blow away a potentially-intact file just + // because we couldn't read its size; surface the error instead so the + // caller can retry and the existing on-disk progress is preserved. FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "failed to stat blob file: " + path.string() + " (" + ec.message() + ")"); } @@ -64,159 +63,99 @@ void EnsureFileExistsAtSize(const fs::path& path, int64_t expected_size) { } } -#ifdef _WIN32 - -class WindowsPositionalFileWriter : public IFileWriter { - public: - ~WindowsPositionalFileWriter() override { Close(); } - - void Open(const fs::path& path, int64_t expected_size) override { - EnsureFileExistsAtSize(path, expected_size); - // FILE_SHARE_READ | FILE_SHARE_WRITE so the lock file / other tools can - // peek at the partial file without us erroring; positional WriteFile is - // safe regardless of share mode. - handle_ = ::CreateFileW(path.wstring().c_str(), GENERIC_READ | GENERIC_WRITE, - FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, nullptr); - if (handle_ == INVALID_HANDLE_VALUE) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "PositionalFileWriter open failed for " + path.string() + - " (Win32 err " + std::to_string(::GetLastError()) + ")"); - } - } +} // namespace - void WriteAt(int64_t offset, const uint8_t* data, size_t len) override { - // Concurrent WriteFile calls with distinct OVERLAPPED offsets on the same - // handle are safe for non-overlapping ranges; the kernel orders them. - while (len > 0) { - OVERLAPPED ov{}; - ov.Offset = static_cast(static_cast(offset) & 0xFFFFFFFFULL); - ov.OffsetHigh = static_cast((static_cast(offset) >> 32) & 0xFFFFFFFFULL); - DWORD to_write = static_cast(len > 0x7FFFFFFFu ? 0x7FFFFFFFu : len); - DWORD written = 0; - if (!::WriteFile(handle_, data, to_write, &written, &ov)) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "PositionalFileWriter write failed at offset " + std::to_string(offset) + - " (Win32 err " + std::to_string(::GetLastError()) + ")"); - } - if (written == 0) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "PositionalFileWriter short write at offset " + std::to_string(offset)); - } - offset += static_cast(written); - data += written; - len -= written; - } - } +#ifdef _WIN32 - void Close() override { - if (handle_ != INVALID_HANDLE_VALUE) { - ::CloseHandle(handle_); - handle_ = INVALID_HANDLE_VALUE; - } +FileWriter::~FileWriter() { Close(); } + +void FileWriter::Open(const fs::path& path, int64_t expected_size) { + EnsureFileExistsAtSize(path, expected_size); + // FILE_SHARE_READ | FILE_SHARE_WRITE so the lock file / other tools can peek + // at the partial file without us erroring; positional WriteFile is safe + // regardless of share mode. + HANDLE h = ::CreateFileW(path.wstring().c_str(), GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, nullptr, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, nullptr); + if (h == INVALID_HANDLE_VALUE) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "FileWriter open failed for " + path.string() + " (Win32 err " + + std::to_string(::GetLastError()) + ")"); } + handle_ = h; +} - private: - HANDLE handle_ = INVALID_HANDLE_VALUE; -}; - -#else // POSIX - -class PosixPositionalFileWriter : public IFileWriter { - public: - ~PosixPositionalFileWriter() override { Close(); } - - void Open(const fs::path& path, int64_t expected_size) override { - EnsureFileExistsAtSize(path, expected_size); - fd_ = ::open(path.c_str(), O_RDWR | O_CLOEXEC); - if (fd_ < 0) { +void FileWriter::WriteAt(int64_t offset, const uint8_t* data, size_t len) { + // Concurrent WriteFile calls with distinct OVERLAPPED offsets on the same + // handle are safe for non-overlapping ranges; the kernel orders them. + while (len > 0) { + OVERLAPPED ov{}; + ov.Offset = static_cast(static_cast(offset) & 0xFFFFFFFFULL); + ov.OffsetHigh = static_cast((static_cast(offset) >> 32) & 0xFFFFFFFFULL); + DWORD to_write = static_cast(len > 0x7FFFFFFFu ? 0x7FFFFFFFu : len); + DWORD written = 0; + if (!::WriteFile(handle_, data, to_write, &written, &ov)) { FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "PositionalFileWriter open failed for " + path.string() + - " (errno " + std::to_string(errno) + ")"); + "FileWriter write failed at offset " + std::to_string(offset) + " (Win32 err " + + std::to_string(::GetLastError()) + ")"); } - } - - void WriteAt(int64_t offset, const uint8_t* data, size_t len) override { - while (len > 0) { - ssize_t n = ::pwrite(fd_, data, len, static_cast(offset)); - if (n < 0) { - if (errno == EINTR) continue; - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "PositionalFileWriter pwrite failed at offset " + std::to_string(offset) + - " (errno " + std::to_string(errno) + ")"); - } - if (n == 0) { - FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "PositionalFileWriter short pwrite at offset " + std::to_string(offset)); - } - offset += n; - data += n; - len -= static_cast(n); + if (written == 0) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "FileWriter short write at offset " + std::to_string(offset)); } + offset += static_cast(written); + data += written; + len -= written; } +} - void Close() override { - if (fd_ >= 0) { - ::close(fd_); - fd_ = -1; - } +void FileWriter::Close() { + if (handle_ != nullptr) { + ::CloseHandle(handle_); + handle_ = nullptr; } +} - private: - int fd_ = -1; -}; +#else // POSIX -#endif +FileWriter::~FileWriter() { Close(); } -class MutexFstreamFileWriter : public IFileWriter { - public: - ~MutexFstreamFileWriter() override { Close(); } +void FileWriter::Open(const fs::path& path, int64_t expected_size) { + EnsureFileExistsAtSize(path, expected_size); + fd_ = ::open(path.c_str(), O_RDWR | O_CLOEXEC); + if (fd_ < 0) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "FileWriter open failed for " + path.string() + " (errno " + + std::to_string(errno) + ")"); + } +} - void Open(const fs::path& path, int64_t expected_size) override { - EnsureFileExistsAtSize(path, expected_size); - file_.open(path, std::ios::binary | std::ios::in | std::ios::out); - if (!file_.is_open()) { +void FileWriter::WriteAt(int64_t offset, const uint8_t* data, size_t len) { + while (len > 0) { + ssize_t n = ::pwrite(fd_, data, len, static_cast(offset)); + if (n < 0) { + if (errno == EINTR) continue; FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "MutexFstreamFileWriter open failed for " + path.string()); + "FileWriter pwrite failed at offset " + std::to_string(offset) + " (errno " + + std::to_string(errno) + ")"); } - } - - void WriteAt(int64_t offset, const uint8_t* data, size_t len) override { - std::lock_guard lock(mutex_); - // Clear any sticky failbit from a prior call so this write's diagnostic - // reflects what actually went wrong here, not a stale earlier failure. - file_.clear(); - file_.seekp(offset); - file_.write(reinterpret_cast(data), static_cast(len)); - if (file_.fail()) { + if (n == 0) { FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, - "MutexFstreamFileWriter write failed at offset " + std::to_string(offset)); + "FileWriter short pwrite at offset " + std::to_string(offset)); } + offset += n; + data += n; + len -= static_cast(n); } +} - void Close() override { - if (file_.is_open()) { - file_.close(); - } +void FileWriter::Close() { + if (fd_ >= 0) { + ::close(fd_); + fd_ = -1; } - - private: - std::fstream file_; - std::mutex mutex_; -}; - -} // namespace - -std::unique_ptr MakePositionalFileWriter() { -#ifdef _WIN32 - return std::make_unique(); -#else - return std::make_unique(); -#endif } -std::unique_ptr MakeMutexFstreamFileWriter() { - return std::make_unique(); -} +#endif } // namespace fl diff --git a/sdk_v2/cpp/src/download/file_writer.h b/sdk_v2/cpp/src/download/file_writer.h index eacc498e..da581322 100644 --- a/sdk_v2/cpp/src/download/file_writer.h +++ b/sdk_v2/cpp/src/download/file_writer.h @@ -2,45 +2,46 @@ // Licensed under the MIT License. #pragma once +#include #include #include -#include namespace fl { /// Thread-safe positional writer for blob downloads. /// -/// Workers in a single download claim disjoint chunks, so concurrent -/// `WriteAt` calls always target non-overlapping byte ranges. An -/// implementation may serialize internally (e.g. via a mutex) or rely on the -/// OS to allow lock-free concurrent positional writes — the contract is the -/// same either way. -class IFileWriter { +/// Workers in a single download claim disjoint chunks, so concurrent `WriteAt` +/// calls always target non-overlapping byte ranges. Backed by `pwrite` (POSIX) +/// or `WriteFile` + `OVERLAPPED` (Windows): the OS arbitrates concurrent writes +/// to disjoint ranges, so no user-space lock is taken. +class FileWriter { public: - virtual ~IFileWriter() = default; + FileWriter() = default; + ~FileWriter(); + + FileWriter(const FileWriter&) = delete; + FileWriter& operator=(const FileWriter&) = delete; /// Make `path` exist at exactly `expected_size` bytes. If the file already - /// exists at that size, leave its contents intact (so the resume path can - /// pick up where it left off). Called once before the first `WriteAt`. - virtual void Open(const std::filesystem::path& path, int64_t expected_size) = 0; + /// exists at that size, leave its contents intact so the resume path can pick + /// up where it left off. Called once before the first `WriteAt`. + void Open(const std::filesystem::path& path, int64_t expected_size); - /// Write `len` bytes from `data` starting at byte offset `offset`. - /// Thread-safe across overlapping or disjoint ranges — concurrent calls to - /// disjoint ranges complete without coordination from the caller. - virtual void WriteAt(int64_t offset, const uint8_t* data, size_t len) = 0; + /// Write `len` bytes from `data` starting at byte offset `offset`. Safe for + /// concurrent calls targeting disjoint ranges. + void WriteAt(int64_t offset, const uint8_t* data, size_t len); /// Release the underlying OS handle. Implicitly called by the destructor. - virtual void Close() = 0; + void Close(); + + private: +#ifdef _WIN32 + // Win32 HANDLE. Holds a valid handle while open, nullptr otherwise — Open() + // maps a CreateFileW failure to a throw, so INVALID_HANDLE_VALUE is never stored. + void* handle_ = nullptr; +#else + int fd_ = -1; +#endif }; -/// Backed by `pwrite` (POSIX) or `WriteFile`+`OVERLAPPED` (Windows). Concurrent -/// `WriteAt` calls to disjoint ranges proceed in parallel — no internal -/// mutex. The recommended default. -std::unique_ptr MakePositionalFileWriter(); - -/// Backed by a single `std::fstream` guarded by an internal mutex. Provided -/// for comparison with `MakePositionalFileWriter` and as a portable fallback -/// if a platform's positional-write semantics ever change. -std::unique_ptr MakeMutexFstreamFileWriter(); - } // namespace fl diff --git a/sdk_v2/cpp/test/internal_api/file_writer_test.cc b/sdk_v2/cpp/test/internal_api/file_writer_test.cc index 84134d68..7a0cec21 100644 --- a/sdk_v2/cpp/test/internal_api/file_writer_test.cc +++ b/sdk_v2/cpp/test/internal_api/file_writer_test.cc @@ -1,31 +1,22 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. // -// Tests for the IFileWriter abstraction backing AzureBlobDownloader's chunked -// writes. Exercises both implementations (Positional / MutexFstream) through a -// parametrized fixture so every correctness assertion runs against both. -// -// The "PerfComparison" test prints wall-clock numbers for a representative -// download workload (32 threads, 64-way chunked streaming into a 256 MB file) -// so we can eyeball lock contention deltas without adding a separate -// microbenchmark binary. It is informational — its only EXPECT is that both -// runs complete and the file ends up at the right size. +// Tests for the FileWriter backing AzureBlobDownloader's chunked writes: +// pre-allocation, resume preservation, and single-thread + concurrent +// disjoint-range positional writes. #include "download/file_writer.h" #include #include -#include #include -#include #include #include -#include #include -#include #include #include +#include #include #include @@ -52,28 +43,18 @@ class TempPath { fs::path path_; }; -std::unique_ptr MakeWriter(const std::string& kind) { - if (kind == "Positional") return MakePositionalFileWriter(); - if (kind == "MutexFstream") return MakeMutexFstreamFileWriter(); - ADD_FAILURE() << "unknown writer kind " << kind; - return nullptr; -} - -class FileWriterTest : public ::testing::TestWithParam {}; - } // namespace -TEST_P(FileWriterTest, OpenCreatesFileAtRequestedSize) { +TEST(FileWriterTest, OpenCreatesFileAtRequestedSize) { TempPath p; - auto w = MakeWriter(GetParam()); - ASSERT_NE(w, nullptr); - w->Open(p.path(), 4096); - w->Close(); + FileWriter w; + w.Open(p.path(), 4096); + w.Close(); EXPECT_TRUE(fs::exists(p.path())); EXPECT_EQ(fs::file_size(p.path()), 4096u); } -TEST_P(FileWriterTest, OpenPreservesExistingFileAtSameSize) { +TEST(FileWriterTest, OpenPreservesExistingFileAtSameSize) { TempPath p; // Pre-write a sentinel byte the writer must NOT overwrite. { @@ -88,10 +69,9 @@ TEST_P(FileWriterTest, OpenPreservesExistingFileAtSameSize) { f.put(static_cast(0xAB)); } - auto w = MakeWriter(GetParam()); - ASSERT_NE(w, nullptr); - w->Open(p.path(), 1024); // same size -> must not truncate - w->Close(); + FileWriter w; + w.Open(p.path(), 1024); // same size -> must not truncate + w.Close(); // Sentinel byte should still be there. std::ifstream f(p.path(), std::ios::binary); @@ -100,7 +80,7 @@ TEST_P(FileWriterTest, OpenPreservesExistingFileAtSameSize) { EXPECT_EQ(byte, 0xAB); } -TEST_P(FileWriterTest, OpenTruncatesIfSizeChanged) { +TEST(FileWriterTest, OpenTruncatesIfSizeChanged) { TempPath p; { std::ofstream f(p.path(), std::ios::binary); @@ -109,22 +89,20 @@ TEST_P(FileWriterTest, OpenTruncatesIfSizeChanged) { } EXPECT_EQ(fs::file_size(p.path()), 101u); - auto w = MakeWriter(GetParam()); - ASSERT_NE(w, nullptr); - w->Open(p.path(), 4096); - w->Close(); + FileWriter w; + w.Open(p.path(), 4096); + w.Close(); EXPECT_EQ(fs::file_size(p.path()), 4096u); } -TEST_P(FileWriterTest, SingleThreadWriteAt) { +TEST(FileWriterTest, SingleThreadWriteAt) { TempPath p; - auto w = MakeWriter(GetParam()); - ASSERT_NE(w, nullptr); - w->Open(p.path(), 1024); + FileWriter w; + w.Open(p.path(), 1024); std::vector data(256, 0xEF); - w->WriteAt(512, data.data(), data.size()); - w->Close(); + w.WriteAt(512, data.data(), data.size()); + w.Close(); std::ifstream f(p.path(), std::ios::binary); std::vector contents((std::istreambuf_iterator(f)), @@ -135,7 +113,7 @@ TEST_P(FileWriterTest, SingleThreadWriteAt) { } } -TEST_P(FileWriterTest, ConcurrentDisjointWritesProduceCorrectFile) { +TEST(FileWriterTest, ConcurrentDisjointWritesProduceCorrectFile) { TempPath p; constexpr int kThreads = 8; constexpr int kRegionSize = 256 * 1024; // 256 KB per thread @@ -143,9 +121,8 @@ TEST_P(FileWriterTest, ConcurrentDisjointWritesProduceCorrectFile) { constexpr int64_t kTotalSize = int64_t{kThreads} * kRegionSize; static_assert(kRegionSize % kPieceSize == 0, ""); - auto w = MakeWriter(GetParam()); - ASSERT_NE(w, nullptr); - w->Open(p.path(), kTotalSize); + FileWriter w; + w.Open(p.path(), kTotalSize); std::atomic started{0}; std::vector workers; @@ -159,12 +136,12 @@ TEST_P(FileWriterTest, ConcurrentDisjointWritesProduceCorrectFile) { } const int64_t base = int64_t{t} * kRegionSize; for (int i = 0; i < kRegionSize / kPieceSize; ++i) { - w->WriteAt(base + int64_t{i} * kPieceSize, piece.data(), piece.size()); + w.WriteAt(base + int64_t{i} * kPieceSize, piece.data(), piece.size()); } }); } for (auto& th : workers) th.join(); - w->Close(); + w.Close(); std::ifstream f(p.path(), std::ios::binary); std::vector contents((std::istreambuf_iterator(f)), @@ -181,92 +158,3 @@ TEST_P(FileWriterTest, ConcurrentDisjointWritesProduceCorrectFile) { } } } - -INSTANTIATE_TEST_SUITE_P(WriterImpls, FileWriterTest, - ::testing::Values("Positional", "MutexFstream"), - [](const ::testing::TestParamInfo& info) { - return info.param; - }); - -// --------------------------------------------------------------------------- -// Perf comparison: print wall-clock for both writer kinds against a workload -// that mirrors AzureBlobDownloader (32 workers each streaming 8 chunks of 2 MB -// in 64 KB sink pieces). Run direct: -// foundry_local_tests --gtest_filter=FileWriterPerfComparison.* -// --------------------------------------------------------------------------- - -namespace { - -struct PerfResult { - std::string kind; - int64_t elapsed_ms; - double mb_per_sec; -}; - -PerfResult RunChunkedWorkload(const std::string& kind) { - constexpr int kThreads = 32; - constexpr int kChunksPerThread = 8; - constexpr int kChunkSize = 2 * 1024 * 1024; // 2 MB chunk like the downloader - constexpr int kPieceSize = 64 * 1024; // 64 KB scratch like the downloader - constexpr int64_t kTotalSize = int64_t{kThreads} * kChunksPerThread * kChunkSize; - static_assert(kChunkSize % kPieceSize == 0, ""); - - TempPath p; - auto w = MakeWriter(kind); - if (!w) { - ADD_FAILURE() << "MakeWriter returned null for " << kind; - return {kind, 0, 0.0}; - } - w->Open(p.path(), kTotalSize); - - std::atomic next_chunk{0}; - const int total_chunks = kThreads * kChunksPerThread; - - auto start = std::chrono::steady_clock::now(); - std::vector workers; - workers.reserve(kThreads); - for (int t = 0; t < kThreads; ++t) { - workers.emplace_back([&, t]() { - std::vector scratch(kPieceSize, static_cast(t & 0xFF)); - while (true) { - int i = next_chunk.fetch_add(1, std::memory_order_relaxed); - if (i >= total_chunks) return; - const int64_t chunk_off = int64_t{i} * kChunkSize; - for (int pos = 0; pos < kChunkSize; pos += kPieceSize) { - w->WriteAt(chunk_off + pos, scratch.data(), kPieceSize); - } - } - }); - } - for (auto& th : workers) th.join(); - w->Close(); - auto elapsed = std::chrono::steady_clock::now() - start; - auto ms = std::chrono::duration_cast(elapsed).count(); - - EXPECT_EQ(fs::file_size(p.path()), static_cast(kTotalSize)); - - double mb_per_sec = - static_cast(kTotalSize) / (1024.0 * 1024.0) / (static_cast(ms) / 1000.0); - return {kind, ms, mb_per_sec}; -} - -} // namespace - -TEST(FileWriterPerfComparison, PositionalVsMutexFstream) { - std::vector results; - results.push_back(RunChunkedWorkload("Positional")); - results.push_back(RunChunkedWorkload("MutexFstream")); - - std::cout << "\n=== IFileWriter perf comparison ===\n"; - std::cout << "Workload: 32 workers, 8 chunks/worker, 2 MB chunks, 64 KB sink pieces (512 MB total)\n"; - for (const auto& r : results) { - std::cout << " " << r.kind << ": " << r.elapsed_ms << " ms (" - << static_cast(r.mb_per_sec) << " MB/s)\n"; - } - std::cout << "===================================\n" << std::endl; - - // Sanity: both should make positive progress; perf is informational. - for (const auto& r : results) { - EXPECT_GT(r.mb_per_sec, 0.0) << r.kind; - } -} From 7b1dd4fef95b2e8da81d1476d5e530d18d63223a Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Wed, 17 Jun 2026 19:36:02 -0500 Subject: [PATCH 09/36] fix(download): per-model serialization + airtight cross-process lock Address the download-lock review items: - Per-model in-process lock. Replace the single global download_mutex_ with a per-model mutex keyed on the resolved cache path. Two downloads of the same model serialize; downloads of different models run concurrently in-process instead of queuing behind each other's (up to 3 h) cross-process waits. - Close the POSIX flock()+unlink() orphan-inode race. After flock() succeeds, verify (fstat vs stat) that the inode we locked is still the file at the lock path; if a racing releaser unlinked it and a third process recreated it, drop the stale lock and report contention so the caller retries. This makes the self-cleaning unlink-on-release provably safe and guarantees two processes can never both believe they hold the lock - so a model can never be downloaded to the same directory twice at once, across any number of processes or apps. - Fix the misleading Windows ACCESS_DENIED comment: it is the DELETE_ON_CLOSE delete-pending window (STATUS_DELETE_PENDING), not "narrower access rights". - Document why the POSIX unlink-before-close is safe (fresh-fd non-blocking waiters; no work between unlink and close; the inode check above). - Decouple the lock-wait cadence from the progress heartbeat: poll the cancellation/heartbeat callback once per poll_interval instead of every 100 ms, so a user callback is not invoked ~10x/s for the whole wait. - Tests: add ConcurrentDownloadsOfDifferentModelsRunConcurrently (proves different models do not serialize). Existing same-model serialize and CrossProcessFileLock acquire/release/wait/cancel/timeout tests still pass (66 download-suite tests green; POSIX branch syntax-checked under g++). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/download/cross_process_file_lock.cc | 57 ++++++++--- sdk_v2/cpp/src/download/download_manager.cc | 24 +++-- sdk_v2/cpp/src/download/download_manager.h | 21 ++++- sdk_v2/cpp/test/internal_api/download_test.cc | 94 +++++++++++++++++++ 4 files changed, 172 insertions(+), 24 deletions(-) diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.cc b/sdk_v2/cpp/src/download/cross_process_file_lock.cc index 33eeb215..8e12411d 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.cc +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include #endif @@ -71,8 +72,15 @@ struct CrossProcessFileLock::State { std::filesystem::path path; ~State() { if (fd >= 0) { - // Unlink before close so the file disappears at the same instant the - // lock releases; a concurrent acquirer simply recreates it. + // Unlink before close so the file disappears the instant the lock + // releases; a concurrent acquirer simply recreates it. This is the + // classic flock()+unlink() pattern, and it is safe here because every + // acquirer verifies, while holding the flock, that the inode it locked is + // still the one at `path` (see the fstat/stat check in + // TryAcquireForDirectory). An acquirer that raced in on the old inode + // between our unlink and a third party's recreate will see the inode + // mismatch and retry, so two processes never hold "the lock" at once. + // There is also no protected work between this unlink and close. ::unlink(path.c_str()); ::close(fd); } @@ -118,8 +126,14 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto if (handle == INVALID_HANDLE_VALUE) { DWORD err = GetLastError(); if (err == ERROR_SHARING_VIOLATION || err == ERROR_LOCK_VIOLATION || err == ERROR_ACCESS_DENIED) { - // ACCESS_DENIED can surface on FILE_SHARE_NONE collisions when the - // existing handle has narrower access rights — treat as contention. + // SHARING/LOCK_VIOLATION: another handle already holds the share-none + // lock. ACCESS_DENIED: the holder is mid-release — FILE_FLAG_DELETE_ON_CLOSE + // puts the file into STATUS_DELETE_PENDING during the close window, and a + // concurrent open of a delete-pending file is reported as ACCESS_DENIED. + // All three mean "another process has it"; treat as contention so the + // caller retries. (A genuine permission error also lands here and would + // poll until timeout, but the directory was just created successfully so + // that is improbable.) return nullptr; } FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, @@ -149,6 +163,23 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto "flock failed for '" + lock_path.string() + "' (errno=" + std::to_string(err) + ")"); } + // Robust-flock inode check. We now hold an exclusive flock on whatever inode + // `fd` refers to, but a releaser unlink()s the lock file in its destructor — + // so between our open() and flock() the path may have been unlinked and a + // third process may have recreated it. If so, we are holding a lock on an + // orphaned inode that guards nothing while the live file at `lock_path` is a + // different inode. Confirm the inode we locked is still the one at the path; + // if not, drop it and report contention so the caller retries against the + // live file. This closes the flock()+unlink() orphan-inode race, which is + // what lets two processes never both believe they hold the lock. + struct stat fd_stat {}; + struct stat path_stat {}; + if (::fstat(fd, &fd_stat) != 0 || ::stat(lock_path.c_str(), &path_stat) != 0 || + fd_stat.st_dev != path_stat.st_dev || fd_stat.st_ino != path_stat.st_ino) { + ::close(fd); // releases the flock on the stale / orphaned inode + return nullptr; + } + (void)::ftruncate(fd, 0); auto info = FormatProcessInfo(); (void)::write(fd, info.data(), info.size()); @@ -170,9 +201,13 @@ std::unique_ptr WaitForLockForDirectory( std::chrono::milliseconds poll_interval, std::chrono::milliseconds timeout) { auto deadline = std::chrono::steady_clock::now() + timeout; - // Poll cancellation in slices of at most 100 ms so a long poll interval - // (1.25 s default) doesn't keep a cancelling caller waiting. - constexpr std::chrono::milliseconds kCancelSlice{100}; + // `is_cancelled` is the caller's progress callback, which also serves as the + // liveness heartbeat — it emits 0% on every invocation. We therefore poll it + // on a single cadence (once per `poll_interval`) rather than on a separate + // fast cancellation tick: a faster tick would spam the user callback (~10x/s) + // for the entire wait, and cancelling a multi-minute cross-process wait a + // second sooner is imperceptible. There is no separate cancellation channel + // to decouple the heartbeat from. while (true) { if (is_cancelled && is_cancelled()) { FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "lock acquisition cancelled"); @@ -185,13 +220,7 @@ std::unique_ptr WaitForLockForDirectory( FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "timed out waiting for cross-process download lock on '" + directory.string() + "'"); } - auto slice_end = std::chrono::steady_clock::now() + poll_interval; - while (std::chrono::steady_clock::now() < slice_end) { - if (is_cancelled && is_cancelled()) { - FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "lock acquisition cancelled"); - } - std::this_thread::sleep_for(std::min(kCancelSlice, poll_interval)); - } + std::this_thread::sleep_for(poll_interval); } } diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index b5255045..869b7c9e 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -235,15 +237,25 @@ std::string DownloadManager::ComputeModelPath(const ModelInfo& info) const { return full_path.string(); } +std::shared_ptr DownloadManager::GetModelLock(const std::string& model_path) const { + std::lock_guard guard(model_locks_mutex_); + auto& slot = model_locks_[model_path]; + if (!slot) { + slot = std::make_shared(); + } + return slot; +} + std::string DownloadManager::DownloadModel(const ModelInfo& info, std::function progress_cb) { - // Serialize all downloads. Concurrent downloads of the same model would race into - // creating the same directory and double-writing inference_model.json; concurrent - // downloads of different models would compete for the same per-blob chunk parallelism. - // A single global lock keeps the model simple and predictable. - std::lock_guard download_guard(download_mutex_); - + // Resolve the cache path first, then serialize per model. Two downloads of the + // same model share one mutex and run one-at-a-time; downloads of different + // models take different mutexes and proceed concurrently. The cross-process + // file lock taken below extends the same-model guarantee across every process + // and app that shares this cache directory. auto model_path = ComputeModelPath(info); + auto model_lock = GetModelLock(model_path); + std::lock_guard download_guard(*model_lock); // Fast path: serve the cache without taking the cross-process lock. // A valid cache hit requires: directory exists, no in-progress signal file, and diff --git a/sdk_v2/cpp/src/download/download_manager.h b/sdk_v2/cpp/src/download/download_manager.h index b2fe1458..44f9ce38 100644 --- a/sdk_v2/cpp/src/download/download_manager.h +++ b/sdk_v2/cpp/src/download/download_manager.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -70,6 +71,12 @@ class DownloadManager { /// Uses {cache_dir}/{publisher}/{model_id_with_version_fix} std::string ComputeModelPath(const ModelInfo& info) const; + /// Get (creating on first use) the per-model serialization mutex for the + /// resolved cache path `model_path`. Downloads of the same model share one + /// mutex and run one-at-a-time; downloads of different models get distinct + /// mutexes and proceed concurrently in-process. + std::shared_ptr GetModelLock(const std::string& model_path) const; + std::string cache_directory_; // Explicit registry region override. Empty (or "auto") means "use the model's // detected_region, falling back to default registry region" — set at construction @@ -80,10 +87,16 @@ class DownloadManager { std::unique_ptr registry_client_; std::unique_ptr blob_downloader_; - /// Serializes all DownloadModel calls. Only one model downloads at a time — simpler - /// than per-model locking and avoids contending with the per-blob chunk parallelism - /// (`max_concurrency_`) inside a single download. - mutable std::mutex download_mutex_; + /// Guards `model_locks_`. Held only briefly to look up or insert a per-model + /// mutex — never across an actual download. + mutable std::mutex model_locks_mutex_; + + /// Per-model serialization mutexes, keyed by resolved cache path. Bounded by + /// the number of distinct models this process downloads. The `shared_ptr` + /// keeps a mutex alive for an in-flight download even though its map entry + /// persists. Cross-process serialization is handled separately by + /// CrossProcessFileLock. + mutable std::map> model_locks_; }; } // namespace fl diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index fd81f8bb..2c6241e8 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -1169,6 +1170,99 @@ TEST(DownloadManagerTest, ConcurrentDownloadsOfSameModelSerialize) { } } +// With per-model locking, two *different* models must download concurrently — +// they must not serialize through a shared in-process mutex. A rendezvous inside +// the blob downloader proves both downloads occupy the critical section at the +// same time: each arrival waits for its peer, so if the two ever serialized the +// first arrival would time out waiting for a peer that can't enter yet. +TEST(DownloadManagerTest, ConcurrentDownloadsOfDifferentModelsRunConcurrently) { + TempDir tmpdir; + DownloadManager manager(tmpdir.string(), "eastus", 64, fl::test::NullLog()); + + auto registry = std::make_unique("eastus", fl::test::NullLog()); + registry->SetHttpGet([](const std::string&) -> std::string { + return R"({"blobSasUri": "https://storage.blob.core.windows.net/c?sig=test"})"; + }); + manager.SetModelRegistryClient(std::move(registry)); + + class RendezvousDownloader : public IBlobDownloader { + public: + std::mutex m; + std::condition_variable cv; + int arrived = 0; + bool released = false; + std::atomic timeouts{0}; + + std::vector ListBlobs(const std::string&) override { + return {{"variant-cpu/weights.bin", 16}}; + } + + void DownloadBlob(const std::string&, const std::string& blob_name, + const std::string& local_path, int, + BlobBytesWrittenFn bytes_written_cb, + std::atomic*) override { + { + std::unique_lock lk(m); + if (++arrived >= 2) { // both concurrent downloads reached the rendezvous + released = true; + cv.notify_all(); + } else if (!cv.wait_for(lk, std::chrono::seconds(5), [&] { return released; })) { + ++timeouts; // peer never arrived within the window → downloads serialized + } + } + + auto parent = fs::path(local_path).parent_path(); + if (!parent.empty()) { + fs::create_directories(parent); + } + std::ofstream f(local_path); + f << "data for " << blob_name; + if (bytes_written_cb) { + bytes_written_cb(16); + } + } + }; + + auto rendezvous = std::make_unique(); + auto* rendezvous_raw = rendezvous.get(); + manager.SetBlobDownloader(std::move(rendezvous)); + + auto make_info = [](const char* id, const char* publisher) { + ModelInfo info; + info.model_id = id; + info.name = id; + info.uri = std::string("azureml://registries/test/models/") + id + "/versions/1"; + info.string_properties[FOUNDRY_LOCAL_MODEL_PROP_PUBLISHER_STR] = publisher; + return info; + }; + auto info_a = make_info("model-a:1", "PubA"); + auto info_b = make_info("model-b:1", "PubB"); + + std::atomic exceptions{0}; + std::thread t1([&] { + try { + manager.DownloadModel(info_a); + } catch (...) { + ++exceptions; + } + }); + std::thread t2([&] { + try { + manager.DownloadModel(info_b); + } catch (...) { + ++exceptions; + } + }); + t1.join(); + t2.join(); + + EXPECT_EQ(exceptions.load(), 0); + EXPECT_TRUE(rendezvous_raw->released) + << "Both different-model downloads should have met at the rendezvous."; + EXPECT_EQ(rendezvous_raw->timeouts.load(), 0) + << "Downloads of different models must run concurrently, not serialize."; +} + // HasInferenceModelJson must return false instead of throwing when the path // it's asked about is not a directory (e.g. a regular file). Previously the // underlying directory_iterator would throw filesystem_error. From 905b6a1b0c7575b75e693d0afd6c8cab468f3618 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Thu, 18 Jun 2026 02:26:04 -0500 Subject: [PATCH 10/36] fixup(rebase): resolve semantic conflicts against main's download refactor Rebasing onto main surfaced API changes that auto-merge could not reconcile: - blob_downloader.cc: drop the local EndsWith helper; main supplies the shared EndsWithIgnoreCase (used for the inference_model.json filter), so the local copy was unreferenced (/WX dead-code break). - download_test.cc: main moved ModelRegistryClient's test HTTP injection from a SetHttpGet setter (returning a JSON string) to constructor injection of an HttpGetResponseFn returning http::HttpResponse; update the one remaining branch-added test to the new pattern (MakeRegistryResponse). - download_manager.cc: refresh a stale comment that referenced the removed global download_mutex_ (now the per-model lock). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 12 ------------ sdk_v2/cpp/src/download/download_manager.cc | 2 +- sdk_v2/cpp/test/internal_api/download_test.cc | 10 ++++++---- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 23341dbb..04dcd5ed 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -360,18 +360,6 @@ std::string ComputeRelativePath(const std::string& prefix, const std::string& bl return blob_name.substr(trim); } -bool EndsWith(const std::string& str, const std::string& suffix) { - if (suffix.size() > str.size()) { - return false; - } - - return std::equal(suffix.rbegin(), suffix.rend(), str.rbegin(), - [](char a, char b) { - return std::tolower(static_cast(a)) == - std::tolower(static_cast(b)); - }); -} - /// Returns false if a file at `local_path` already matches the blob's expected /// `content_length` exactly AND has no `.dlstate` sidecar — in which case the /// caller can skip the download. Returns true (download needed) for any of: diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index 869b7c9e..9a2c34da 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -281,7 +281,7 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, std::filesystem::create_directories(model_path); // Serialize across processes that share this cache directory. Inside the - // running process `download_mutex_` already prevents reentry; the file lock + // running process the per-model lock already prevents reentry; the file lock // protects against a second SDK instance (e.g. another service or CLI) racing // on the same model directory. auto cancel_pred = [&progress_cb]() -> bool { diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 2c6241e8..b434a6c4 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1179,10 +1179,12 @@ TEST(DownloadManagerTest, ConcurrentDownloadsOfDifferentModelsRunConcurrently) { TempDir tmpdir; DownloadManager manager(tmpdir.string(), "eastus", 64, fl::test::NullLog()); - auto registry = std::make_unique("eastus", fl::test::NullLog()); - registry->SetHttpGet([](const std::string&) -> std::string { - return R"({"blobSasUri": "https://storage.blob.core.windows.net/c?sig=test"})"; - }); + auto registry = std::make_unique( + "eastus", fl::test::NullLog(), std::make_unique(fl::test::NullLog(), false), + [](const std::string&) { + return MakeRegistryResponse( + R"({"blobSasUri": "https://storage.blob.core.windows.net/c?sig=test"})"); + }); manager.SetModelRegistryClient(std::move(registry)); class RendezvousDownloader : public IBlobDownloader { From 3e7382f0dd133b3aa6a06125a2fb78e08e14385a Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Thu, 18 Jun 2026 02:26:13 -0500 Subject: [PATCH 11/36] fix(download): correct SaveState prefix advance across a 64-bit word boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bitmap_byte_aligned_start marks the chunk index below which every chunk is implicitly complete. SaveState advanced it by accumulating +64 per fully-set word onto the (possibly unaligned) previous start, then adding the trailing-zero offset measured from the word base. When the previous start was not a multiple of 64, the two bases disagreed and the new start overshot by (start % 64), marking never-downloaded chunks complete on reload — silent, permanent data corruption once the sidecar is deleted on completion. Derive the new start from the word index directly (word_idx * 64 + trailing zero), independent of the previous start. Add a regression test that saves from a non-word-aligned prefix, extends it across a word boundary, saves again, and asserts the never-downloaded chunks remain pending after reload. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cpp/src/download/blob_download_state.cc | 21 ++++++---- .../internal_api/blob_download_state_test.cc | 42 +++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc index 1cf8ae9b..bdeefd5b 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.cc +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -275,23 +275,30 @@ std::vector BlobDownloadState::GetPendingChunks() const { void BlobDownloadState::SaveState(ILogger* logger) { // Advance bitmap_byte_aligned_start past any words that are now all 1s, so // the next save serializes only the unfinished tail. - int32_t new_start = bitmap_byte_aligned_start; - size_t word_idx = static_cast(new_start) / kBitsPerWord; + // Find the first word that is not fully complete. Every word below it is + // implicitly complete and need not be serialized again. + size_t word_idx = static_cast(bitmap_byte_aligned_start) / kBitsPerWord; while (word_idx < full_completion_bitmap.size() && full_completion_bitmap[word_idx] == ~uint64_t{0}) { - new_start += kBitsPerWord; ++word_idx; } - // Within the first not-fully-set word, advance to the lowest 0 bit and round - // down to a byte boundary (8 bits) so reload-then-resume re-reads on a clean - // alignment. + int32_t new_start; if (word_idx < full_completion_bitmap.size()) { + // Within the first not-fully-set word, advance to the lowest 0 bit. Derive + // the absolute chunk index from the word base (word_idx * 64), NOT by + // accumulating 64 per word onto the (possibly unaligned) previous start — + // the latter overshoots by (bitmap_byte_aligned_start % 64) and would mark + // never-downloaded chunks complete on reload. Round down to a byte boundary + // so reload-then-resume re-reads on a clean alignment. uint64_t inverted = ~full_completion_bitmap[word_idx]; int trailing_zero = 0; while (trailing_zero < kBitsPerWord && ((inverted >> trailing_zero) & 1) == 0) { ++trailing_zero; } - new_start += trailing_zero; + new_start = static_cast(word_idx) * kBitsPerWord + trailing_zero; + } else { + // Every word is fully complete. + new_start = total_chunks; } new_start = (new_start / 8) * 8; if (new_start > total_chunks) { diff --git a/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc b/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc index 9e477012..259e4a78 100644 --- a/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc +++ b/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc @@ -165,6 +165,48 @@ TEST(BlobDownloadStateTest, SaveStateAdvancesBitmapByteAlignedStart) { EXPECT_EQ(loaded->completed_count, 80); } +// Regression: a second SaveState whose contiguous-complete prefix crosses a +// 64-bit word boundary from a non-word-aligned start must not advance +// bitmap_byte_aligned_start past the first still-pending chunk. The advance +// previously accumulated +64 per word onto the unaligned base and overshot by +// (start % 64), silently marking never-downloaded chunks complete on reload. +TEST(BlobDownloadStateTest, SaveStateFromUnalignedStartDoesNotMarkPendingComplete) { + TempDir d; + auto local = d.path() / "blob.bin"; + constexpr int32_t kBigNumChunks = 200; + constexpr int64_t kBigBlobSize = static_cast(kBigNumChunks) * kChunkSize; + auto s = BlobDownloadState::CreateNew("blob", local, kBigBlobSize, kChunkSize, kBigNumChunks); + + // First save lands the contiguous prefix on a byte (8) but not a word (64) + // boundary. + for (int32_t i = 0; i < 8; ++i) { + s->MarkChunkComplete(i); + } + s->SaveState(); + EXPECT_EQ(s->bitmap_byte_aligned_start, 8); + + // Extend the contiguous prefix across the word boundary: chunks 0..64 done, + // chunk 65 is the first still-pending chunk. + for (int32_t i = 8; i <= 64; ++i) { + s->MarkChunkComplete(i); + } + s->SaveState(); + // Must round down to 64 (the byte boundary at/below the first pending chunk), + // never overshoot to 72. + EXPECT_EQ(s->bitmap_byte_aligned_start, 64); + + // Reload and prove chunks 65..71 (never downloaded) are still pending. + auto loaded = BlobDownloadState::LoadState("blob", local, kBigBlobSize, kChunkSize, kBigNumChunks); + ASSERT_NE(loaded, nullptr); + EXPECT_TRUE(loaded->IsChunkComplete(64)); + for (int32_t i = 65; i < 72; ++i) { + EXPECT_FALSE(loaded->IsChunkComplete(i)) << "chunk " << i << " was never downloaded"; + } + auto pending = loaded->GetPendingChunks(); + ASSERT_FALSE(pending.empty()); + EXPECT_EQ(pending.front(), 65); +} + TEST(BlobDownloadStateTest, LoadStateReturnsNullWhenFileMissing) { TempDir d; auto local = d.path() / "blob.bin"; From 42a7737f24a3b7dcf6a75f75d82860854ee8bfed Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Thu, 18 Jun 2026 03:08:23 -0500 Subject: [PATCH 12/36] fix(download): persist sidecar before pre-allocating, so a crash stays resumable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found via end-to-end SDK testing: interrupting a download (process kill / crash) left the model "cached" but full of zeros. Root cause: AzureBlobDownloader pre-allocates each data file to its full size (FileWriter::Open) before downloading chunks, but the .dlstate sidecar was only written at the first periodic save (after save_interval ~= 10 chunks, ~20 MB). IsDownloadNeeded treats "data file at full content_length + no sidecar" as a completed download and skips it. So a crash in the window between pre-allocation and the first periodic save left a full-size, mostly-empty file with no sidecar; the next run skipped it and wrote inference_model.json, marking the model complete while the weights were all zeros — silent, permanent corruption. Fix: persist the sidecar immediately after CreateNew, before Open() pre-allocates the file, upholding the invariant IsDownloadNeeded relies on ("pre-allocated but unfinished <=> sidecar present"). A subsequent run then sees the sidecar and resumes the missing chunks instead of skipping the file. Regression test (AzureBlobDownloaderResumeTest.SidecarExistsBeforeFirstChunkCompletes) asserts the sidecar is on disk the moment the first chunk is requested. Verified E2E through the Python SDK: download qwen2.5-0.5b, kill at ~18%, resume to completion — the result is now byte-for-byte (SHA-256) identical to a fresh uninterrupted download, including the 862 MB model.onnx.data. Previously that file was 100% zeros. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 9 ++++ sdk_v2/cpp/test/internal_api/download_test.cc | 43 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 04dcd5ed..d2579b2a 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -175,6 +175,15 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, if (!state) { state = BlobDownloadState::CreateNew(blob_name, local_path, blob_size, static_cast(kChunkSize), num_chunks); + // Persist the sidecar now, before Open() pre-allocates the data file. + // IsDownloadNeeded treats "data file at full size + no sidecar" as a + // completed download and skips it. The periodic save below does not run + // until save_interval chunks are done (~20 MB), so a crash between + // pre-allocation and that first save would otherwise leave a full-size, + // mostly-empty file with no sidecar that the next run silently accepts as + // complete — serving zeros. Writing the sidecar up front upholds the + // invariant "pre-allocated but unfinished <=> sidecar present". + state->SaveState(logger_); } // Track cumulative bytes for progress reporting; seed with bytes already diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index b434a6c4..d61fad1f 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1654,6 +1654,49 @@ TEST(AzureBlobDownloaderResumeTest, PersistsSidecarOnChunkFailure) { EXPECT_LT(retry_state->completed_count, kNumChunks); } +// Regression: the sidecar must reach disk before the data file is pre-allocated, +// not only after save_interval chunks. Open() pre-allocates the file to full +// size, and IsDownloadNeeded treats "full-size data file + no sidecar" as a +// completed download. So a crash in the window between pre-allocation and the +// first periodic save would otherwise leave a full-size, empty file that the +// next run skips — silently serving zeros. Verify a sidecar is already present +// the moment the first chunk is requested. +TEST(AzureBlobDownloaderResumeTest, SidecarExistsBeforeFirstChunkCompletes) { + TempDir tmpdir; + auto local = tmpdir.path() / "blob.bin"; + + constexpr int32_t kChunkSize = 2 * 1024 * 1024; + constexpr int32_t kNumChunks = 100; // far above the save_interval floor of 10 + constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; + + FakeChunkAzureDownloader d; + d.blob_size = kBlobSize; + + auto sidecar = BlobDownloadState::GetStateFilePath(local); + std::atomic recorded{false}; + std::atomic sidecar_present_at_first_chunk{false}; + d.chunk_hook = [&](int64_t /*offset*/, int64_t /*size*/, + const std::function& /*sink*/, + std::atomic*) { + if (!recorded.exchange(true)) { + // First chunk callback: CreateNew + the initial SaveState + Open() have + // all run, so the sidecar must already exist. Abort before any periodic + // save to mimic an early interruption. + sidecar_present_at_first_chunk.store(fs::exists(sidecar)); + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "stop after first chunk"); + } + }; + + EXPECT_THROW(d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/1), + fl::Exception); + + EXPECT_TRUE(sidecar_present_at_first_chunk.load()) + << "Sidecar must exist before any chunk completes so an early crash stays resumable."; + EXPECT_TRUE(fs::exists(sidecar)); + EXPECT_TRUE(fs::exists(local)); + EXPECT_EQ(fs::file_size(local), static_cast(kBlobSize)); +} + TEST(AzureBlobDownloaderResumeTest, CleansUpSidecarOnEmptyBlob) { TempDir tmpdir; auto local = tmpdir.path() / "empty.bin"; From 1ee720c2b8db9b0f3c96a58d2ee6d479123283c4 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Thu, 18 Jun 2026 03:34:46 -0500 Subject: [PATCH 13/36] fix(download): serialize the user progress callback across chunk workers The per-chunk progress path (per_chunk_progress -> options.progress) could be entered concurrently by up to max_concurrency (default 64) chunk worker threads, but the public download progress API does not require the caller's callback to be thread-safe. A typical callback that updates a counter, UI handle, or logger would data-race. Guard the user callback invocation with a mutex so it is never re-entered concurrently; the atomics that compute the percentage are unchanged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index d2579b2a..c47c6477 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -504,6 +504,13 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, // overall percentage. std::atomic total_downloaded_bytes{skipped_bytes}; + // The user progress callback can be reached from up to max_concurrency chunk + // worker threads at once (per_chunk_progress below). Serialize it so a + // caller's callback (UI handle, counter, logger, IPC) is never entered + // concurrently — the public download progress API does not require callers to + // be thread-safe. + std::mutex progress_mutex; + for (const auto& [blob, local_path] : blobs_to_download) { // Check cancellation between blobs if (cancelled.load(std::memory_order_relaxed)) { @@ -528,7 +535,11 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, overall = std::min(overall, total_size); float percent = static_cast(overall) / static_cast(total_size) * 100.0f; - int result = options.progress(percent); + int result; + { + std::lock_guard lock(progress_mutex); + result = options.progress(percent); + } if (result != 0) { cancelled.store(true, std::memory_order_relaxed); FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled by user callback return value"); From 6396c8a99ba6c6b38ed3efd8ff7886bcaaa15ed3 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Thu, 18 Jun 2026 10:10:58 -0500 Subject: [PATCH 14/36] feat(download): also flush the resume sidecar on a wall-clock cap The sidecar was saved only every save_interval = max(10, num_chunks/50) chunks. On a slow connection that interval can span minutes, so a hard crash could lose minutes of completed download (all re-fetched on resume). Add a time cap (AzureBlobDownloader::save_state_interval_, default 3s): flush when the chunk count OR the elapsed wall-clock since the last save is reached, whichever first. This does not slow downloads: the check runs only at chunk completion (so it never flushes more often than chunks arrive), the sidecar write is tiny and not fsync'd, and it happens off the download critical path (network I/O and the file write take no state lock). On fast links the chunk count is still hit first, so save cadence is unchanged; on slow links the tiny extra writes land in the network-idle gaps and bound crash loss to seconds. The interval is an injectable member so tests can force the time path; TimeBasedSaveFlushesBeforeChunkInterval drives a 5-chunk blob (below the 10-chunk count interval) with a zero cap and asserts the sidecar is flushed mid-download. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 13 +++++- sdk_v2/cpp/src/download/blob_downloader.h | 8 ++++ sdk_v2/cpp/test/internal_api/download_test.cc | 43 +++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index c47c6477..500e9f7f 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -210,9 +210,16 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, FileWriter writer; writer.Open(local_path, blob_size); - // Save the sidecar roughly every 2% of chunks, with a floor of 10. + // Flush the sidecar every ~2% of chunks (floor 10) OR every + // save_state_interval_ of wall-clock, whichever comes first. The chunk + // count bounds the bytes re-downloaded after a crash; the time cap bounds + // the wall-clock download lost on a slow link, where save_interval chunks + // can span minutes. Checked only at chunk completion, so it never flushes + // more often than chunks arrive. const int32_t save_interval = std::max(10, num_chunks / 50); + const auto save_time_interval = save_state_interval_; std::atomic chunks_since_save{0}; + auto last_save_time = std::chrono::steady_clock::now(); std::mutex error_mutex; std::exception_ptr first_error; @@ -288,8 +295,10 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, std::lock_guard lock(state->mutex()); state->MarkChunkComplete(chunk_idx); int32_t inc = chunks_since_save.fetch_add(1, std::memory_order_relaxed) + 1; - if (inc >= save_interval) { + auto now = std::chrono::steady_clock::now(); + if (inc >= save_interval || now - last_save_time >= save_time_interval) { chunks_since_save.store(0, std::memory_order_relaxed); + last_save_time = now; should_save = true; } } diff --git a/sdk_v2/cpp/src/download/blob_downloader.h b/sdk_v2/cpp/src/download/blob_downloader.h index 7e54d8bf..c1f0bc3c 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.h +++ b/sdk_v2/cpp/src/download/blob_downloader.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include #include @@ -119,6 +120,13 @@ class AzureBlobDownloader : public IBlobDownloader { /// is routed through `Azure::Core::Context::Cancel()`. std::atomic* GetCancelFlag(ChunkContext& ctx); + /// Wall-clock cap between sidecar saves, on top of the chunk-count interval. + /// Bounds how much of a download is lost on a hard crash over a slow link, + /// where save_interval chunks can span minutes. Checked only at chunk + /// completion, so it never flushes more often than chunks arrive. Test + /// subclasses may shrink it to force time-based saves. + std::chrono::steady_clock::duration save_state_interval_ = std::chrono::seconds(3); + private: ILogger* logger_ = nullptr; }; diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index d61fad1f..08e2e81d 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1523,6 +1523,8 @@ class FakeChunkAzureDownloader : public AzureBlobDownloader { using AzureBlobDownloader::AzureBlobDownloader; + void SetSaveStateInterval(std::chrono::steady_clock::duration d) { save_state_interval_ = d; } + protected: int64_t GetBlobSize(ChunkContext& /*ctx*/) override { return blob_size; } @@ -1697,6 +1699,47 @@ TEST(AzureBlobDownloaderResumeTest, SidecarExistsBeforeFirstChunkCompletes) { EXPECT_EQ(fs::file_size(local), static_cast(kBlobSize)); } +// The sidecar must also flush on a wall-clock cap, not only every save_interval +// chunks, so a crash on a slow connection (where save_interval chunks can span +// minutes) loses at most a few seconds of download. With the time cap at zero +// every completed chunk flushes, even though a 5-chunk blob never reaches the +// 10-chunk count interval. +TEST(AzureBlobDownloaderResumeTest, TimeBasedSaveFlushesBeforeChunkInterval) { + TempDir tmpdir; + auto local = tmpdir.path() / "blob.bin"; + + constexpr int32_t kChunkSize = 2 * 1024 * 1024; + constexpr int32_t kNumChunks = 5; // below the save_interval floor of 10 + constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; + + FakeChunkAzureDownloader d; + d.blob_size = kBlobSize; + d.SetSaveStateInterval(std::chrono::steady_clock::duration::zero()); + + std::atomic max_persisted{0}; + d.chunk_hook = [&](int64_t /*offset*/, int64_t size, + const std::function& sink, + std::atomic*) { + // Record how many chunks the on-disk sidecar reports complete so far. + if (auto st = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks)) { + int32_t c = st->completed_count; + int32_t prev = max_persisted.load(); + while (c > prev && !max_persisted.compare_exchange_weak(prev, c)) { + } + } + std::vector buf(static_cast(size), 0); + sink(buf.data(), buf.size()); + }; + + d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/1); + + // With the time cap each completed chunk is flushed, so by the final chunk the + // sidecar reflects the earlier ones even though the chunk-count interval (10) + // was never reached. Without it, a 5-chunk blob never saves mid-flight and + // this stays 0. + EXPECT_GE(max_persisted.load(), kNumChunks - 1); +} + TEST(AzureBlobDownloaderResumeTest, CleansUpSidecarOnEmptyBlob) { TempDir tmpdir; auto local = tmpdir.path() / "empty.bin"; From 4a961b568b1f9f0de7fa506fbfd67a663ca80738 Mon Sep 17 00:00:00 2001 From: bmehta001 Date: Thu, 18 Jun 2026 13:17:09 -0500 Subject: [PATCH 15/36] Fix comment --- sdk_v2/cpp/src/download/blob_download_state.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_download_state.h b/sdk_v2/cpp/src/download/blob_download_state.h index 66cc69db..9860e1a1 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.h +++ b/sdk_v2/cpp/src/download/blob_download_state.h @@ -16,17 +16,15 @@ class ILogger; /// Per-blob download progress, persisted next to the data file as `.dlstate`. /// /// Each chunk completion flips a bit in `full_completion_bitmap`. On resume, -/// `GetPendingChunks` enumerates only the chunks whose bits are still 0. +/// `GetPendingChunks` enumerates only chunks whose bits are still 0. /// /// The serialized form stores only the bitmap suffix starting at -/// `bitmap_byte_aligned_start` — the prefix of fully-completed chunks is -/// implied. This keeps the on-disk state proportional to the *unfinished* +/// `bitmap_byte_aligned_start` to `highest_completed_chunk`. +// This keeps the on-disk state proportional to the *unfinished* /// range, not the total file size. /// /// On-disk layout is a small fixed-width little-endian binary header followed -/// by the truncated bitmap bytes; see `blob_download_state.cc` for the exact -/// field order. Chosen over JSON for speed and compactness; the file is purely -/// internal cache state, never inspected by users. +/// by the truncated bitmap bytes. class BlobDownloadState { public: /// Identity of the blob (populated by caller; not serialized). @@ -40,7 +38,7 @@ class BlobDownloadState { /// Bit 0 of `full_completion_bitmap` represents chunk `bitmap_byte_aligned_start`. /// Always a multiple of 8 — the prefix of completed chunks below this index - /// is implied complete and is not serialized. + /// is not serialized. int32_t bitmap_byte_aligned_start = 0; /// Highest chunk index completed so far. -1 if no chunks are done yet. From f5b377f731208d0863b0b82e2c661e07d16bda0e Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Thu, 18 Jun 2026 13:24:45 -0500 Subject: [PATCH 16/36] download: clarify state-sidecar serialization naming and docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename WriteLE/ReadLE to WriteNative/ReadNative — the helpers serialize scalars in host byte order (little-endian on every supported target), not via explicit little-endian conversion. Update the on-disk format comment to match. Fix two stale BlobDownloadState doc comments: full_completion_bitmap is pre-sized for all chunks by CreateNew and indexed by absolute chunk_idx (not lazily grown), and bitmap_byte_aligned_start only trims the serialized sidecar prefix, not the in-memory buffer. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cpp/src/download/blob_download_state.cc | 37 +++++++++++-------- sdk_v2/cpp/src/download/blob_download_state.h | 15 ++++---- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc index bdeefd5b..2a3ea3e2 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.cc +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -14,7 +14,9 @@ namespace { constexpr const char* kStateFileExtension = ".dlstate"; -// On-disk format (little-endian throughout): +// On-disk format. Scalar fields use host byte order (little-endian on every +// target we build for); see WriteNative/ReadNative below. The bitmap suffix is +// a raw byte copy and is endian-agnostic. // bytes | field // -------|-------------------------------------------------------- // 0..3 | magic "FLDS" @@ -35,8 +37,11 @@ constexpr uint8_t kVersion = 1; constexpr int32_t kBitsPerWord = 64; +// Serialize a scalar field in host byte order. Every target we build for +// (x64 / arm64) is little-endian, so the on-disk layout is little-endian in +// practice. template -void WriteLE(std::ostream& out, T value) { +void WriteNative(std::ostream& out, T value) { static_assert(std::is_trivially_copyable_v); unsigned char buf[sizeof(T)]; std::memcpy(buf, &value, sizeof(T)); @@ -44,7 +49,7 @@ void WriteLE(std::ostream& out, T value) { } template -bool ReadLE(std::istream& in, T& out_value) { +bool ReadNative(std::istream& in, T& out_value) { static_assert(std::is_trivially_copyable_v); unsigned char buf[sizeof(T)]; in.read(reinterpret_cast(buf), sizeof(T)); @@ -112,7 +117,7 @@ std::unique_ptr BlobDownloadState::LoadState(std::string blob char magic[4]{}; in.read(magic, 4); uint8_t version = 0; - if (!in || std::memcmp(magic, kMagic, 4) != 0 || !ReadLE(in, version) || version != kVersion) { + if (!in || std::memcmp(magic, kMagic, 4) != 0 || !ReadNative(in, version) || version != kVersion) { if (logger) { logger->Log(LogLevel::Warning, "Download state file " + state_path.string() + " has unexpected magic/version; ignoring"); @@ -128,9 +133,9 @@ std::unique_ptr BlobDownloadState::LoadState(std::string blob int32_t completed_count = 0; int64_t last_modified_unix_ms = 0; uint32_t trunc_len = 0; - if (!ReadLE(in, blob_size) || !ReadLE(in, chunk_size) || !ReadLE(in, total_chunks) || - !ReadLE(in, bitmap_byte_aligned_start) || !ReadLE(in, highest_completed_chunk) || - !ReadLE(in, completed_count) || !ReadLE(in, last_modified_unix_ms) || !ReadLE(in, trunc_len)) { + if (!ReadNative(in, blob_size) || !ReadNative(in, chunk_size) || !ReadNative(in, total_chunks) || + !ReadNative(in, bitmap_byte_aligned_start) || !ReadNative(in, highest_completed_chunk) || + !ReadNative(in, completed_count) || !ReadNative(in, last_modified_unix_ms) || !ReadNative(in, trunc_len)) { if (logger) { logger->Log(LogLevel::Warning, "Download state header truncated: " + state_path.string()); } @@ -332,15 +337,15 @@ void BlobDownloadState::SaveState(ILogger* logger) { return; } out.write(kMagic, 4); - WriteLE(out, kVersion); - WriteLE(out, blob_size); - WriteLE(out, chunk_size); - WriteLE(out, total_chunks); - WriteLE(out, bitmap_byte_aligned_start); - WriteLE(out, highest_completed_chunk); - WriteLE(out, completed_count); - WriteLE(out, last_modified_unix_ms); - WriteLE(out, trunc_len); + WriteNative(out, kVersion); + WriteNative(out, blob_size); + WriteNative(out, chunk_size); + WriteNative(out, total_chunks); + WriteNative(out, bitmap_byte_aligned_start); + WriteNative(out, highest_completed_chunk); + WriteNative(out, completed_count); + WriteNative(out, last_modified_unix_ms); + WriteNative(out, trunc_len); if (trunc_len > 0) { auto* src = reinterpret_cast(full_completion_bitmap.data()) + byte_offset; out.write(reinterpret_cast(src), trunc_len); diff --git a/sdk_v2/cpp/src/download/blob_download_state.h b/sdk_v2/cpp/src/download/blob_download_state.h index 9860e1a1..8a8ae07c 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.h +++ b/sdk_v2/cpp/src/download/blob_download_state.h @@ -20,7 +20,7 @@ class ILogger; /// /// The serialized form stores only the bitmap suffix starting at /// `bitmap_byte_aligned_start` to `highest_completed_chunk`. -// This keeps the on-disk state proportional to the *unfinished* +/// This keeps the on-disk state proportional to the *unfinished* /// range, not the total file size. /// /// On-disk layout is a small fixed-width little-endian binary header followed @@ -36,9 +36,9 @@ class BlobDownloadState { int32_t chunk_size = 0; int32_t total_chunks = 0; - /// Bit 0 of `full_completion_bitmap` represents chunk `bitmap_byte_aligned_start`. - /// Always a multiple of 8 — the prefix of completed chunks below this index - /// is not serialized. + /// Serialization marker (always a multiple of 8): chunks below this index are + /// complete and dropped from the sidecar's truncated bitmap. The in-memory + /// `full_completion_bitmap` still covers them. int32_t bitmap_byte_aligned_start = 0; /// Highest chunk index completed so far. -1 if no chunks are done yet. @@ -50,9 +50,10 @@ class BlobDownloadState { /// Unix epoch milliseconds; refreshed on every save. int64_t last_modified_unix_ms = 0; - /// Bit set: bit at `(chunk_idx - bitmap_byte_aligned_start) / 64` shifted by - /// `(chunk_idx - bitmap_byte_aligned_start) % 64`. Lazily grown by - /// `MarkChunkComplete` to cover up to `highest_completed_chunk`. + /// One bit per chunk over the whole blob: chunk `i` lives in word `i / 64` at + /// bit `i % 64` (absolute indexing — the buffer always starts at chunk 0). + /// Sized for all `total_chunks` by `CreateNew`; `MarkChunkComplete` sets bits + /// without resizing. std::vector full_completion_bitmap; /// Sidecar path for `local_file_path`. From 94809d41ec175bde3a18cff1d68a0e7c1ac28264 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Thu, 18 Jun 2026 15:12:34 -0500 Subject: [PATCH 17/36] download: fixed-size sidecar interval; drop test-only cancel plumbing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Save the resume sidecar every ~16 MB of completed chunks instead of every 2% of the blob, so a hard crash re-downloads at most a fixed amount regardless of blob size. This makes the wall-clock save trigger redundant, so remove it (save_state_interval_, the time-based branch, the SetSaveStateInterval hook, and the TimeBasedSave test). Drop ChunkContext::cancel_flag and GetCancelFlag(): production cancels in-flight chunks via Azure::Core::Context::Cancel(), and the real DownloadChunkStreaming never read the flag — it existed only for the test to observe cancellation. The cancel test now polls the genuine signal through IsCancellationRequested() -> azure_ctx->IsCancelled(), exercising the production path. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 32 +++++----- sdk_v2/cpp/src/download/blob_downloader.h | 21 +++---- sdk_v2/cpp/test/internal_api/download_test.cc | 61 +++---------------- 3 files changed, 30 insertions(+), 84 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 500e9f7f..2bb4fe13 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -40,12 +40,12 @@ constexpr size_t kStreamingBufferBytes = 64 * 1024; /// Per-blob shared state passed to the protected virtuals. The production /// virtuals dereference `blob_client` / `azure_ctx`; tests can ignore them. -/// `cancel_flag` is flipped by the orchestrator on the first chunk failure so -/// workers exit promptly without waiting for Azure SDK timeouts. +/// Cancellation is observed through `azure_ctx`: the orchestrator calls +/// `Cancel()` on it after the first chunk failure or on external cancellation, +/// which interrupts every in-flight chunk read. struct AzureBlobDownloader::ChunkContext { Azure::Storage::Blobs::BlobClient* blob_client; Azure::Core::Context* azure_ctx; - std::atomic* cancel_flag; }; AzureBlobDownloader::AzureBlobDownloader(ILogger* logger) : logger_(logger) {} @@ -76,8 +76,8 @@ int64_t AzureBlobDownloader::GetBlobSize(ChunkContext& ctx) { return props.BlobSize; } -std::atomic* AzureBlobDownloader::GetCancelFlag(ChunkContext& ctx) { - return ctx.cancel_flag; +bool AzureBlobDownloader::IsCancellationRequested(ChunkContext& ctx) { + return ctx.azure_ctx->IsCancelled(); } void AzureBlobDownloader::DownloadChunkStreaming( @@ -154,7 +154,7 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // or by external cancellation; checked by workers between iterations. std::atomic internal_cancel{false}; - ChunkContext chunk_ctx{&blob_client, &azure_ctx, &internal_cancel}; + ChunkContext chunk_ctx{&blob_client, &azure_ctx}; int64_t blob_size = GetBlobSize(chunk_ctx); @@ -210,16 +210,14 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, FileWriter writer; writer.Open(local_path, blob_size); - // Flush the sidecar every ~2% of chunks (floor 10) OR every - // save_state_interval_ of wall-clock, whichever comes first. The chunk - // count bounds the bytes re-downloaded after a crash; the time cap bounds - // the wall-clock download lost on a slow link, where save_interval chunks - // can span minutes. Checked only at chunk completion, so it never flushes - // more often than chunks arrive. - const int32_t save_interval = std::max(10, num_chunks / 50); - const auto save_time_interval = save_state_interval_; + // Flush the resume sidecar roughly every 16 MB of completed chunks, so a + // hard crash re-downloads at most that much on resume — a fixed bound, + // independent of blob size. Checked only at chunk completion, so it never + // flushes faster than chunks arrive. + constexpr int64_t kBytesPerSidecarSave = 16 * 1024 * 1024; + const int32_t save_interval = + std::max(1, static_cast(kBytesPerSidecarSave / kChunkSize)); std::atomic chunks_since_save{0}; - auto last_save_time = std::chrono::steady_clock::now(); std::mutex error_mutex; std::exception_ptr first_error; @@ -295,10 +293,8 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, std::lock_guard lock(state->mutex()); state->MarkChunkComplete(chunk_idx); int32_t inc = chunks_since_save.fetch_add(1, std::memory_order_relaxed) + 1; - auto now = std::chrono::steady_clock::now(); - if (inc >= save_interval || now - last_save_time >= save_time_interval) { + if (inc >= save_interval) { chunks_since_save.store(0, std::memory_order_relaxed); - last_save_time = now; should_save = true; } } diff --git a/sdk_v2/cpp/src/download/blob_downloader.h b/sdk_v2/cpp/src/download/blob_downloader.h index c1f0bc3c..6835060b 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.h +++ b/sdk_v2/cpp/src/download/blob_downloader.h @@ -3,7 +3,6 @@ #pragma once #include -#include #include #include #include @@ -113,19 +112,13 @@ class AzureBlobDownloader : public IBlobDownloader { std::vector& scratch, const std::function& sink); - /// Accessor for test subclasses overriding `DownloadChunkStreaming`. Returns - /// the shared cancellation flag — when set by the orchestrator (e.g. after - /// another chunk fails), in-flight chunk simulations should observe it and - /// exit promptly. Production code doesn't need this directly: cancellation - /// is routed through `Azure::Core::Context::Cancel()`. - std::atomic* GetCancelFlag(ChunkContext& ctx); - - /// Wall-clock cap between sidecar saves, on top of the chunk-count interval. - /// Bounds how much of a download is lost on a hard crash over a slow link, - /// where save_interval chunks can span minutes. Checked only at chunk - /// completion, so it never flushes more often than chunks arrive. Test - /// subclasses may shrink it to force time-based saves. - std::chrono::steady_clock::duration save_state_interval_ = std::chrono::seconds(3); + /// Reports whether cooperative cancellation has been requested for this + /// download. The orchestrator calls `Azure::Core::Context::Cancel()` after a + /// sibling chunk fails or on external cancellation, and the Azure SDK + /// interrupts in-flight transfers as a result. Exposed for test subclasses + /// overriding `DownloadChunkStreaming` so their chunk simulations can observe + /// the same signal and exit promptly. + bool IsCancellationRequested(ChunkContext& ctx); private: ILogger* logger_ = nullptr; diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 08e2e81d..160aadef 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1514,7 +1514,7 @@ class FakeChunkAzureDownloader : public AzureBlobDownloader { /// - sleep / poll cancellation std::function& sink, - std::atomic* cancel_flag)> + const std::function& is_cancelled)> chunk_hook; std::atomic chunk_call_count{0}; @@ -1523,8 +1523,6 @@ class FakeChunkAzureDownloader : public AzureBlobDownloader { using AzureBlobDownloader::AzureBlobDownloader; - void SetSaveStateInterval(std::chrono::steady_clock::duration d) { save_state_interval_ = d; } - protected: int64_t GetBlobSize(ChunkContext& /*ctx*/) override { return blob_size; } @@ -1537,7 +1535,7 @@ class FakeChunkAzureDownloader : public AzureBlobDownloader { requested_offsets.push_back(offset); } if (chunk_hook) { - chunk_hook(offset, size, sink, GetCancelFlag(ctx)); + chunk_hook(offset, size, sink, [this, &ctx]() { return IsCancellationRequested(ctx); }); return; } // Default: stream the chunk to the sink in scratch-sized pieces, filled @@ -1632,7 +1630,7 @@ TEST(AzureBlobDownloaderResumeTest, PersistsSidecarOnChunkFailure) { constexpr int64_t kFailOffset = 4 * int64_t{kChunkSize}; d.chunk_hook = [&](int64_t offset, int64_t size, const std::function& sink, - std::atomic* /*cancel_flag*/) { + const std::function& /*is_cancelled*/) { if (offset == kFailOffset) { FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "simulated chunk failure"); } @@ -1668,7 +1666,7 @@ TEST(AzureBlobDownloaderResumeTest, SidecarExistsBeforeFirstChunkCompletes) { auto local = tmpdir.path() / "blob.bin"; constexpr int32_t kChunkSize = 2 * 1024 * 1024; - constexpr int32_t kNumChunks = 100; // far above the save_interval floor of 10 + constexpr int32_t kNumChunks = 100; // far above the per-save chunk interval constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; FakeChunkAzureDownloader d; @@ -1679,7 +1677,7 @@ TEST(AzureBlobDownloaderResumeTest, SidecarExistsBeforeFirstChunkCompletes) { std::atomic sidecar_present_at_first_chunk{false}; d.chunk_hook = [&](int64_t /*offset*/, int64_t /*size*/, const std::function& /*sink*/, - std::atomic*) { + const std::function&) { if (!recorded.exchange(true)) { // First chunk callback: CreateNew + the initial SaveState + Open() have // all run, so the sidecar must already exist. Abort before any periodic @@ -1699,47 +1697,6 @@ TEST(AzureBlobDownloaderResumeTest, SidecarExistsBeforeFirstChunkCompletes) { EXPECT_EQ(fs::file_size(local), static_cast(kBlobSize)); } -// The sidecar must also flush on a wall-clock cap, not only every save_interval -// chunks, so a crash on a slow connection (where save_interval chunks can span -// minutes) loses at most a few seconds of download. With the time cap at zero -// every completed chunk flushes, even though a 5-chunk blob never reaches the -// 10-chunk count interval. -TEST(AzureBlobDownloaderResumeTest, TimeBasedSaveFlushesBeforeChunkInterval) { - TempDir tmpdir; - auto local = tmpdir.path() / "blob.bin"; - - constexpr int32_t kChunkSize = 2 * 1024 * 1024; - constexpr int32_t kNumChunks = 5; // below the save_interval floor of 10 - constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; - - FakeChunkAzureDownloader d; - d.blob_size = kBlobSize; - d.SetSaveStateInterval(std::chrono::steady_clock::duration::zero()); - - std::atomic max_persisted{0}; - d.chunk_hook = [&](int64_t /*offset*/, int64_t size, - const std::function& sink, - std::atomic*) { - // Record how many chunks the on-disk sidecar reports complete so far. - if (auto st = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks)) { - int32_t c = st->completed_count; - int32_t prev = max_persisted.load(); - while (c > prev && !max_persisted.compare_exchange_weak(prev, c)) { - } - } - std::vector buf(static_cast(size), 0); - sink(buf.data(), buf.size()); - }; - - d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/1); - - // With the time cap each completed chunk is flushed, so by the final chunk the - // sidecar reflects the earlier ones even though the chunk-count interval (10) - // was never reached. Without it, a 5-chunk blob never saves mid-flight and - // this stays 0. - EXPECT_GE(max_persisted.load(), kNumChunks - 1); -} - TEST(AzureBlobDownloaderResumeTest, CleansUpSidecarOnEmptyBlob) { TempDir tmpdir; auto local = tmpdir.path() / "empty.bin"; @@ -1772,11 +1729,11 @@ TEST(AzureBlobDownloaderResumeTest, ChunkFailureCancelsInFlightPeersFast) { FakeChunkAzureDownloader d; d.blob_size = kBlobSize; // The failing chunk throws fast. Every other chunk sleeps for up to 5 s in - // 50-ms slices, polling the cancel flag. If linked cancellation works, they - // observe the flag within one slice of the failure and exit promptly. + // 50-ms slices, polling cancellation. If linked cancellation works, they + // observe it within one slice of the failure and exit promptly. d.chunk_hook = [](int64_t offset, int64_t size, const std::function& sink, - std::atomic* cancel_flag) { + const std::function& is_cancelled) { if (offset == kFailOffset) { // Give other workers a moment to enter their sleep loop before we throw, // so we're meaningfully testing the cancel-while-in-flight path. @@ -1784,7 +1741,7 @@ TEST(AzureBlobDownloaderResumeTest, ChunkFailureCancelsInFlightPeersFast) { FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, "simulated chunk failure"); } for (int i = 0; i < 100; ++i) { - if (cancel_flag && cancel_flag->load(std::memory_order_relaxed)) { + if (is_cancelled()) { FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "cancelled mid-chunk"); } std::this_thread::sleep_for(std::chrono::milliseconds(50)); From 92f8605339014bc313aaa1180cb9199b785d9342 Mon Sep 17 00:00:00 2001 From: bmehta001 Date: Thu, 18 Jun 2026 17:39:54 -0500 Subject: [PATCH 18/36] Rename function --- sdk_v2/cpp/src/download/cross_process_file_lock.cc | 2 +- sdk_v2/cpp/src/download/cross_process_file_lock.h | 4 ++-- sdk_v2/cpp/src/download/download_manager.cc | 2 +- .../cpp/test/internal_api/cross_process_file_lock_test.cc | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.cc b/sdk_v2/cpp/src/download/cross_process_file_lock.cc index 8e12411d..52eb0134 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.cc +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.cc @@ -194,7 +194,7 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto new CrossProcessFileLock(std::move(lock_path), std::move(state), logger)); } -std::unique_ptr WaitForLockForDirectory( +std::unique_ptr WaitForDirectoryLock( const std::filesystem::path& directory, const CancellationPredicate& is_cancelled, ILogger* logger, diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.h b/sdk_v2/cpp/src/download/cross_process_file_lock.h index 2c771b9c..7efbace8 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.h +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.h @@ -45,14 +45,14 @@ class CrossProcessFileLock { ILogger* logger_; }; -/// Returning true aborts WaitForLockForDirectory with FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED. +/// Returning true aborts WaitForDirectoryLock with FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED. using CancellationPredicate = std::function; /// Polls TryAcquireForDirectory until the lock is acquired, `is_cancelled()` /// returns true, or `timeout` elapses. /// Throws FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED on cancellation, or /// FOUNDRY_LOCAL_ERROR_INTERNAL on timeout. -std::unique_ptr WaitForLockForDirectory( +std::unique_ptr WaitForDirectoryLock( const std::filesystem::path& directory, const CancellationPredicate& is_cancelled, ILogger* logger = nullptr, diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index 9a2c34da..364e641b 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -295,7 +295,7 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, logger_.Log(LogLevel::Information, "Model download is being performed by another process. Waiting on lock at '" + model_path + "'..."); - lock = WaitForLockForDirectory(model_path, cancel_pred, &logger_); + lock = WaitForDirectoryLock(model_path, cancel_pred, &logger_); } // Another process may have just completed the download we were waiting on. diff --git a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc index a6e38fdf..a1781620 100644 --- a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc +++ b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc @@ -110,7 +110,7 @@ TEST(CrossProcessFileLockTest, WaitForLockReturnsImmediatelyWhenAvailable) { TempDir dir; auto start = std::chrono::steady_clock::now(); - auto lock = WaitForLockForDirectory(dir.path(), []() { return false; }); + auto lock = WaitForDirectoryLock(dir.path(), []() { return false; }); auto elapsed = std::chrono::steady_clock::now() - start; ASSERT_NE(lock, nullptr); @@ -130,7 +130,7 @@ TEST(CrossProcessFileLockTest, WaitForLockAcquiresAfterHolderReleases) { }); auto start = std::chrono::steady_clock::now(); - auto lock = WaitForLockForDirectory(dir.path(), + auto lock = WaitForDirectoryLock(dir.path(), []() { return false; }, /*logger=*/nullptr, /*poll_interval=*/std::chrono::milliseconds(100), @@ -155,7 +155,7 @@ TEST(CrossProcessFileLockTest, WaitForLockThrowsOnCancellation) { }); try { - (void)WaitForLockForDirectory(dir.path(), + (void)WaitForDirectoryLock(dir.path(), [&cancel]() { return cancel.load(); }, /*logger=*/nullptr, /*poll_interval=*/std::chrono::milliseconds(100), @@ -174,7 +174,7 @@ TEST(CrossProcessFileLockTest, WaitForLockThrowsOnTimeout) { ASSERT_NE(holder, nullptr); try { - (void)WaitForLockForDirectory(dir.path(), + (void)WaitForDirectoryLock(dir.path(), []() { return false; }, /*logger=*/nullptr, /*poll_interval=*/std::chrono::milliseconds(50), From f4567a472e3b414bcef1ce8507aef68954319c94 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Thu, 18 Jun 2026 17:50:00 -0500 Subject: [PATCH 19/36] test(download): add a true cross-process lock test (POSIX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fork a child that holds the directory lock and verify this process is locked out while the child holds it, and that the kernel releases the flock when the child exits — even though the child leaves the lock file on disk, mirroring a downloader that crashed mid-download. Closes the gap where lock coverage was in-process only. POSIX-gated (macOS/Linux); Windows share-none contention is already covered in-process by SecondAcquireReturnsNullWhileFirstIsHeld. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cross_process_file_lock_test.cc | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc index a1781620..d140fb08 100644 --- a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc +++ b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc @@ -16,6 +16,11 @@ #include #include +#ifndef _WIN32 +#include +#include +#endif + namespace fs = std::filesystem; using namespace fl; @@ -186,3 +191,63 @@ TEST(CrossProcessFileLockTest, WaitForLockThrowsOnTimeout) { EXPECT_NE(what.find("timed out"), std::string::npos); } } + +#ifndef _WIN32 +// A genuine cross-PROCESS test (POSIX, i.e. macOS/Linux): fork a child that +// holds the lock, then verify (a) this process is locked out while the child +// holds it and (b) the kernel releases the flock when the child *exits* — even +// though the child leaves the lock file on disk, mirroring a downloader that +// crashed mid-download. Windows share-none contention is already covered +// in-process by SecondAcquireReturnsNullWhileFirstIsHeld (dwShareMode=0 is +// enforced identically for same- and cross-process opens). +TEST(CrossProcessFileLockTest, HeldAcrossProcessesAndReleasedWhenHolderExits) { + TempDir dir; + const auto acquired_signal = dir.path() / "child_acquired"; + const auto release_signal = dir.path() / "parent_done"; + + const pid_t pid = ::fork(); + ASSERT_NE(pid, -1) << "fork failed"; + + if (pid == 0) { + // CHILD: acquire, announce, wait (bounded) for the parent, then _exit while + // still holding it. _exit skips C++/gtest teardown — correct for a forked + // child — so the lock's destructor never runs and the file is left behind; + // the kernel still drops the flock on process exit. + auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + if (lock == nullptr) { + _exit(2); + } + { std::ofstream(acquired_signal).put('x'); } + for (int i = 0; i < 200 && !fs::exists(release_signal); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(25)); + } + _exit(0); + } + + // PARENT: wait for the child to take the lock (up to ~5 s). + bool child_acquired = false; + for (int i = 0; i < 200 && !child_acquired; ++i) { + if (fs::exists(acquired_signal)) { + child_acquired = true; + } else { + std::this_thread::sleep_for(std::chrono::milliseconds(25)); + } + } + ASSERT_TRUE(child_acquired) << "child process never acquired the lock"; + + // A different process holds it — we must be locked out. + EXPECT_EQ(CrossProcessFileLock::TryAcquireForDirectory(dir.path()), nullptr); + + // Release the child and reap it. + { std::ofstream(release_signal).put('x'); } + int status = 0; + ASSERT_EQ(::waitpid(pid, &status, 0), pid); + EXPECT_TRUE(WIFEXITED(status)) << "child did not exit normally"; + EXPECT_EQ(WEXITSTATUS(status), 0) << "child failed to acquire the lock"; + + // The holder process is gone: the kernel released its flock even though the + // lock file is still on disk, so the next acquirer simply re-locks it. + auto reacquired = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + EXPECT_NE(reacquired, nullptr) << "lock not released after the holder process exited"; +} +#endif // !_WIN32 From 124f3f3c36243bfceaea211b56bb55ff2f78a20e Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 03:51:23 -0500 Subject: [PATCH 20/36] download: serialize model downloads with one process-wide mutex Replace the per-model lock map (model_locks_/GetModelLock) with a single download_mutex_ so only one model download runs at a time in-process. The per-model worker pool already saturates the network and disk, so concurrent multi-model downloads mostly contended for the same bandwidth anyway; serializing is simpler and drops the unbounded per-path mutex map. The cross-process guarantee is unchanged (CrossProcessFileLock). Replace the now-invalid ConcurrentDownloadsOfDifferentModelsRunConcurrently test with ModelDownloadsSerializeUnderGlobalLock, which asserts the peak in-flight download count stays at 1 across two different models. Also in file_writer.cc: drop the redundant & 0xFFFFFFFF masks when splitting the 64-bit offset across OVERLAPPED.Offset/OffsetHigh (the DWORD casts already truncate), and reword the EnsureFileExistsAtSize comment to say it recreates the file when the size differs (larger or smaller). Rename OpenTruncatesIfSizeChanged to OpenRecreatesFileWhenSizeDiffers since the test grows the file. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/download_manager.cc | 27 ++++--------- sdk_v2/cpp/src/download/download_manager.h | 21 ++-------- sdk_v2/cpp/src/download/file_writer.cc | 11 ++++-- sdk_v2/cpp/test/internal_api/download_test.cc | 39 ++++++++----------- .../cpp/test/internal_api/file_writer_test.cc | 2 +- 5 files changed, 37 insertions(+), 63 deletions(-) diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index 364e641b..e30f3dab 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -237,25 +237,14 @@ std::string DownloadManager::ComputeModelPath(const ModelInfo& info) const { return full_path.string(); } -std::shared_ptr DownloadManager::GetModelLock(const std::string& model_path) const { - std::lock_guard guard(model_locks_mutex_); - auto& slot = model_locks_[model_path]; - if (!slot) { - slot = std::make_shared(); - } - return slot; -} - std::string DownloadManager::DownloadModel(const ModelInfo& info, std::function progress_cb) { - // Resolve the cache path first, then serialize per model. Two downloads of the - // same model share one mutex and run one-at-a-time; downloads of different - // models take different mutexes and proceed concurrently. The cross-process - // file lock taken below extends the same-model guarantee across every process - // and app that shares this cache directory. + // Serialize all model downloads in this process: only one runs at a time, so it + // gets the full network and disk instead of competing with another download. + // The cross-process file lock taken below extends the guarantee across every + // process and app that shares this cache directory. + std::lock_guard download_guard(download_mutex_); auto model_path = ComputeModelPath(info); - auto model_lock = GetModelLock(model_path); - std::lock_guard download_guard(*model_lock); // Fast path: serve the cache without taking the cross-process lock. // A valid cache hit requires: directory exists, no in-progress signal file, and @@ -281,9 +270,9 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, std::filesystem::create_directories(model_path); // Serialize across processes that share this cache directory. Inside the - // running process the per-model lock already prevents reentry; the file lock - // protects against a second SDK instance (e.g. another service or CLI) racing - // on the same model directory. + // running process the download mutex already serializes downloads; the file + // lock protects against a second SDK instance (e.g. another service or CLI) + // racing on the same model directory. auto cancel_pred = [&progress_cb]() -> bool { // progress_cb returning non-zero is the SDK's cancellation signal. Reusing // it here also acts as a periodic heartbeat (0%) while we wait for the diff --git a/sdk_v2/cpp/src/download/download_manager.h b/sdk_v2/cpp/src/download/download_manager.h index 44f9ce38..9742eebf 100644 --- a/sdk_v2/cpp/src/download/download_manager.h +++ b/sdk_v2/cpp/src/download/download_manager.h @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -71,12 +70,6 @@ class DownloadManager { /// Uses {cache_dir}/{publisher}/{model_id_with_version_fix} std::string ComputeModelPath(const ModelInfo& info) const; - /// Get (creating on first use) the per-model serialization mutex for the - /// resolved cache path `model_path`. Downloads of the same model share one - /// mutex and run one-at-a-time; downloads of different models get distinct - /// mutexes and proceed concurrently in-process. - std::shared_ptr GetModelLock(const std::string& model_path) const; - std::string cache_directory_; // Explicit registry region override. Empty (or "auto") means "use the model's // detected_region, falling back to default registry region" — set at construction @@ -87,16 +80,10 @@ class DownloadManager { std::unique_ptr registry_client_; std::unique_ptr blob_downloader_; - /// Guards `model_locks_`. Held only briefly to look up or insert a per-model - /// mutex — never across an actual download. - mutable std::mutex model_locks_mutex_; - - /// Per-model serialization mutexes, keyed by resolved cache path. Bounded by - /// the number of distinct models this process downloads. The `shared_ptr` - /// keeps a mutex alive for an in-flight download even though its map entry - /// persists. Cross-process serialization is handled separately by - /// CrossProcessFileLock. - mutable std::map> model_locks_; + /// Serializes all model downloads in this process: only one runs at a time, so + /// each gets the full network/disk instead of competing with another download. + /// Cross-process serialization is handled separately by CrossProcessFileLock. + std::mutex download_mutex_; }; } // namespace fl diff --git a/sdk_v2/cpp/src/download/file_writer.cc b/sdk_v2/cpp/src/download/file_writer.cc index 46cc1716..0ac02d98 100644 --- a/sdk_v2/cpp/src/download/file_writer.cc +++ b/sdk_v2/cpp/src/download/file_writer.cc @@ -27,8 +27,9 @@ namespace fs = std::filesystem; namespace { -/// Ensure the data file exists at exactly `expected_size`. Skips truncation if -/// the file is already at that size — the resume path relies on this. +/// Ensure the data file exists at exactly `expected_size`, recreating it at the +/// new size if it currently differs (larger or smaller). An existing file that +/// is already the right size is left intact — the resume path relies on this. void EnsureFileExistsAtSize(const fs::path& path, int64_t expected_size) { std::error_code ec; auto cur_size = fs::file_size(path, ec); @@ -90,8 +91,10 @@ void FileWriter::WriteAt(int64_t offset, const uint8_t* data, size_t len) { // handle are safe for non-overlapping ranges; the kernel orders them. while (len > 0) { OVERLAPPED ov{}; - ov.Offset = static_cast(static_cast(offset) & 0xFFFFFFFFULL); - ov.OffsetHigh = static_cast((static_cast(offset) >> 32) & 0xFFFFFFFFULL); + // Split the 64-bit file offset across the OVERLAPPED halves: the DWORD casts + // keep the low 32 bits in Offset and the high 32 bits in OffsetHigh. + ov.Offset = static_cast(static_cast(offset)); + ov.OffsetHigh = static_cast(static_cast(offset) >> 32); DWORD to_write = static_cast(len > 0x7FFFFFFFu ? 0x7FFFFFFFu : len); DWORD written = 0; if (!::WriteFile(handle_, data, to_write, &written, &ov)) { diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 160aadef..bc8cfb3d 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1175,7 +1175,7 @@ TEST(DownloadManagerTest, ConcurrentDownloadsOfSameModelSerialize) { // the blob downloader proves both downloads occupy the critical section at the // same time: each arrival waits for its peer, so if the two ever serialized the // first arrival would time out waiting for a peer that can't enter yet. -TEST(DownloadManagerTest, ConcurrentDownloadsOfDifferentModelsRunConcurrently) { +TEST(DownloadManagerTest, ModelDownloadsSerializeUnderGlobalLock) { TempDir tmpdir; DownloadManager manager(tmpdir.string(), "eastus", 64, fl::test::NullLog()); @@ -1187,13 +1187,12 @@ TEST(DownloadManagerTest, ConcurrentDownloadsOfDifferentModelsRunConcurrently) { }); manager.SetModelRegistryClient(std::move(registry)); - class RendezvousDownloader : public IBlobDownloader { + // Tracks the peak number of downloads running at once. The global download + // mutex must keep this at 1 even for different models. + class ConcurrencyProbe : public IBlobDownloader { public: - std::mutex m; - std::condition_variable cv; - int arrived = 0; - bool released = false; - std::atomic timeouts{0}; + std::atomic active{0}; + std::atomic peak{0}; std::vector ListBlobs(const std::string&) override { return {{"variant-cpu/weights.bin", 16}}; @@ -1203,15 +1202,13 @@ TEST(DownloadManagerTest, ConcurrentDownloadsOfDifferentModelsRunConcurrently) { const std::string& local_path, int, BlobBytesWrittenFn bytes_written_cb, std::atomic*) override { - { - std::unique_lock lk(m); - if (++arrived >= 2) { // both concurrent downloads reached the rendezvous - released = true; - cv.notify_all(); - } else if (!cv.wait_for(lk, std::chrono::seconds(5), [&] { return released; })) { - ++timeouts; // peer never arrived within the window → downloads serialized - } + int now = ++active; + int prev = peak.load(); + while (now > prev && !peak.compare_exchange_weak(prev, now)) { } + // Hold long enough that a second concurrent download would overlap here. + std::this_thread::sleep_for(std::chrono::milliseconds(150)); + --active; auto parent = fs::path(local_path).parent_path(); if (!parent.empty()) { @@ -1225,9 +1222,9 @@ TEST(DownloadManagerTest, ConcurrentDownloadsOfDifferentModelsRunConcurrently) { } }; - auto rendezvous = std::make_unique(); - auto* rendezvous_raw = rendezvous.get(); - manager.SetBlobDownloader(std::move(rendezvous)); + auto probe = std::make_unique(); + auto* probe_raw = probe.get(); + manager.SetBlobDownloader(std::move(probe)); auto make_info = [](const char* id, const char* publisher) { ModelInfo info; @@ -1259,10 +1256,8 @@ TEST(DownloadManagerTest, ConcurrentDownloadsOfDifferentModelsRunConcurrently) { t2.join(); EXPECT_EQ(exceptions.load(), 0); - EXPECT_TRUE(rendezvous_raw->released) - << "Both different-model downloads should have met at the rendezvous."; - EXPECT_EQ(rendezvous_raw->timeouts.load(), 0) - << "Downloads of different models must run concurrently, not serialize."; + EXPECT_EQ(probe_raw->peak.load(), 1) + << "The global download mutex must serialize all model downloads, even for different models."; } // HasInferenceModelJson must return false instead of throwing when the path diff --git a/sdk_v2/cpp/test/internal_api/file_writer_test.cc b/sdk_v2/cpp/test/internal_api/file_writer_test.cc index 7a0cec21..c685506e 100644 --- a/sdk_v2/cpp/test/internal_api/file_writer_test.cc +++ b/sdk_v2/cpp/test/internal_api/file_writer_test.cc @@ -80,7 +80,7 @@ TEST(FileWriterTest, OpenPreservesExistingFileAtSameSize) { EXPECT_EQ(byte, 0xAB); } -TEST(FileWriterTest, OpenTruncatesIfSizeChanged) { +TEST(FileWriterTest, OpenRecreatesFileWhenSizeDiffers) { TempPath p; { std::ofstream f(p.path(), std::ios::binary); From faa99021f50fd97d99594f154f48e8d4ebd65412 Mon Sep 17 00:00:00 2001 From: bmehta001 Date: Fri, 19 Jun 2026 04:18:47 -0500 Subject: [PATCH 21/36] Nits --- sdk_v2/cpp/src/download/blob_downloader.cc | 17 ++++++++--------- sdk_v2/cpp/src/download/blob_downloader.h | 19 ++++++------------- .../src/download/cross_process_file_lock.cc | 3 +-- sdk_v2/cpp/src/download/file_writer.h | 3 +-- sdk_v2/cpp/test/internal_api/download_test.cc | 10 +++++----- 5 files changed, 21 insertions(+), 31 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 2bb4fe13..41f010a5 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -38,11 +38,10 @@ constexpr size_t kStreamingBufferBytes = 64 * 1024; // AzureBlobDownloader — real Azure Storage SDK implementation // ======================================================================== -/// Per-blob shared state passed to the protected virtuals. The production -/// virtuals dereference `blob_client` / `azure_ctx`; tests can ignore them. -/// Cancellation is observed through `azure_ctx`: the orchestrator calls -/// `Cancel()` on it after the first chunk failure or on external cancellation, -/// which interrupts every in-flight chunk read. +/// Per-blob shared state passed to the protected virtuals. Cancellation is +/// observed through `azure_ctx`: the orchestrator calls `Cancel()` on it after +/// the first chunk failure or on external cancellation, which interrupts every +/// in-flight chunk read. struct AzureBlobDownloader::ChunkContext { Azure::Storage::Blobs::BlobClient* blob_client; Azure::Core::Context* azure_ctx; @@ -460,13 +459,13 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, }); // Step 4: Calculate total size across every in-scope blob, including those - // already present on disk — so 100% always means "every byte is local". + // already present on disk. int64_t total_size = 0; for (const auto& [blob, _] : blobs_to_download) { total_size += blob.content_length; } - // Step 4.25: Skip blobs already present at the expected size. Their bytes + // Step 5: Skip blobs already present at the expected size. Their bytes // count toward "downloaded" so the percentage stays accurate when this is a // resume of a partially-completed download. int64_t skipped_bytes = 0; @@ -481,7 +480,7 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, }), blobs_to_download.end()); - // Step 4.5: Emit initial progress reflecting any already-on-disk bytes. + // Step 6: Emit initial progress reflecting any already-on-disk bytes. // If everything was skipped, emit 100% directly and return. if (blobs_to_download.empty()) { if (options.progress) { @@ -501,7 +500,7 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, } } - // Step 5: Download each blob with per-chunk progress. + // Step 7: Download each blob with per-chunk progress. // The cancellation flag is set when the progress callback returns non-zero. // It is shared with chunk download threads so they can exit promptly. std::atomic cancelled{false}; diff --git a/sdk_v2/cpp/src/download/blob_downloader.h b/sdk_v2/cpp/src/download/blob_downloader.h index 6835060b..2137ccc2 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.h +++ b/sdk_v2/cpp/src/download/blob_downloader.h @@ -86,23 +86,18 @@ class AzureBlobDownloader : public IBlobDownloader { protected: /// Opaque per-blob context. Defined in `blob_downloader.cc`; holds the Azure - /// SDK BlobClient + Context pointers used by the production virtuals. Test - /// subclasses can ignore this argument and use only the explicit parameters. + /// SDK BlobClient + Context pointers used by the production virtuals. struct ChunkContext; /// Return the blob size in bytes. Production calls `BlobClient::GetProperties`. - /// Test subclasses can override to return a constant without touching Azure. virtual int64_t GetBlobSize(ChunkContext& ctx); /// Read `size` bytes starting at `offset` from the blob and forward them - /// piecewise to `sink`. The production implementation pulls from the blob - /// client referenced by `ctx`; test subclasses can override to inject - /// chunk-level failures or slow reads. + /// piecewise to `sink`. Pulls from the blob client referenced by `ctx`. /// - /// `scratch` is a per-worker reusable buffer (default 64 KB) — implementers - /// may resize it but should avoid allocating one-buffer-per-chunk. `sink` - /// must be invoked with strictly contiguous ranges; the cumulative byte - /// count delivered to `sink` must equal `size` on success. + /// `scratch` is a per-worker reusable buffer (default 64 KB). `sink` must be + /// invoked with strictly contiguous ranges; the cumulative byte count + /// delivered to `sink` must equal `size` on success. /// /// Must throw on failure. Implementations should observe the cancellation /// flag accessible via `ctx` and exit promptly when cancellation is requested. @@ -115,9 +110,7 @@ class AzureBlobDownloader : public IBlobDownloader { /// Reports whether cooperative cancellation has been requested for this /// download. The orchestrator calls `Azure::Core::Context::Cancel()` after a /// sibling chunk fails or on external cancellation, and the Azure SDK - /// interrupts in-flight transfers as a result. Exposed for test subclasses - /// overriding `DownloadChunkStreaming` so their chunk simulations can observe - /// the same signal and exit promptly. + /// interrupts in-flight transfers as a result. bool IsCancellationRequested(ChunkContext& ctx); private: diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.cc b/sdk_v2/cpp/src/download/cross_process_file_lock.cc index 52eb0134..cf897fb1 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.cc +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.cc @@ -32,8 +32,7 @@ namespace { constexpr const char* kLockFileName = ".download.lock"; -/// `PID:,Time:\n` — mirrors what C# writes -/// (CrossProcessFileLock.cs:68) so the lock file is recognizable across SDKs. +/// `PID:,Time:\n` std::string FormatProcessInfo() { #ifdef _WIN32 auto pid = static_cast(_getpid()); diff --git a/sdk_v2/cpp/src/download/file_writer.h b/sdk_v2/cpp/src/download/file_writer.h index da581322..0be20021 100644 --- a/sdk_v2/cpp/src/download/file_writer.h +++ b/sdk_v2/cpp/src/download/file_writer.h @@ -36,8 +36,7 @@ class FileWriter { private: #ifdef _WIN32 - // Win32 HANDLE. Holds a valid handle while open, nullptr otherwise — Open() - // maps a CreateFileW failure to a throw, so INVALID_HANDLE_VALUE is never stored. + // Win32 HANDLE. Holds a valid handle while open, nullptr otherwise. void* handle_ = nullptr; #else int fd_ = -1; diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index bc8cfb3d..637d15d2 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -565,13 +565,13 @@ TEST(BlobDownloadTest, RedownloadsFilesWithWrongSize) { TEST(BlobDownloadTest, ReportsSkippedBytesInInitialProgress) { TempDir tmpdir; - // 500 of 1500 bytes already on disk → initial progress should be ~33%. + // 500 of 2000 bytes already on disk → initial progress should be 25%. std::ofstream(tmpdir.path() / "already.bin") << std::string(500, 'X'); MockBlobDownloader mock; mock.blobs_to_return = { {"already.bin", 500}, - {"missing.bin", 1000}, + {"missing.bin", 1500}, }; std::vector progress_values; @@ -584,8 +584,8 @@ TEST(BlobDownloadTest, ReportsSkippedBytesInInitialProgress) { DownloadBlobsToDirectory(mock, "https://test.blob/c?sig=x", tmpdir.string(), opts); ASSERT_FALSE(progress_values.empty()); - // First emitted progress reflects the already-on-disk bytes (500/1500 ≈ 33.3%). - EXPECT_NEAR(progress_values.front(), 100.0f * 500.0f / 1500.0f, 0.5f); + // First emitted progress reflects the already-on-disk bytes (500/2000 = 25%). + EXPECT_NEAR(progress_values.front(), 100.0f * 500.0f / 2000.0f, 0.5f); // Final progress must hit 100%. EXPECT_FLOAT_EQ(progress_values.back(), 100.0f); } @@ -612,7 +612,7 @@ TEST(BlobDownloadTest, EmitsHundredPercentWhenEverythingIsCached) { EXPECT_TRUE(mock.downloaded_blobs.empty()); ASSERT_FALSE(progress_values.empty()); - EXPECT_FLOAT_EQ(progress_values.back(), 100.0f); + EXPECT_FLOAT_EQ(progress_values.front(), 100.0f); } // ======================================================================== From 5443756c1c8ba9a5e97515f5976f65d9f52d8aec Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 08:46:14 -0500 Subject: [PATCH 22/36] download: skip the periodic sidecar save once the download is complete The 16 MB periodic save in the chunk worker loop could fire on the final chunk, writing a 100%-complete .dlstate sidecar that the success path then deletes microseconds later. Guard the save with !IsComplete() (read under the state mutex already held) so we never write a sidecar that is about to be removed. Crash safety is unchanged: a crash between the last chunk and the DeleteState just replays the few chunks since the previous periodic save. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 41f010a5..6ea9c257 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -292,7 +292,10 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, std::lock_guard lock(state->mutex()); state->MarkChunkComplete(chunk_idx); int32_t inc = chunks_since_save.fetch_add(1, std::memory_order_relaxed) + 1; - if (inc >= save_interval) { + // Skip the periodic save once every chunk is done: the finalization + // path below deletes the sidecar on success, so writing a fully + // complete sidecar here would just be undone microseconds later. + if (inc >= save_interval && !state->IsComplete()) { chunks_since_save.store(0, std::memory_order_relaxed); should_save = true; } From 6f823f7c58420900cd5388371fb85e1efa7f4342 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 16:29:31 -0500 Subject: [PATCH 23/36] download: fix stale comments flagged in Copilot review Three comment-only corrections left over from the per-model -> global-lock revert: - EnsureEmptyBlobFile's doc still described the old pre-allocation logic; it now just creates a zero-byte file for the empty-blob case (pre-allocation lives in FileWriter::Open). Reworded to match. - The periodic-save interval comment said "~20 MB"; it is ~16 MB (kBytesPerSidecarSave = 16 MB / 2 MB chunks = 8 chunks). - The comment above ModelDownloadsSerializeUnderGlobalLock still described the abandoned per-model "rendezvous" concurrency design; reworded to match the serialize-under-global-mutex behavior the test actually asserts. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 11 +++++------ sdk_v2/cpp/test/internal_api/download_test.cc | 9 ++++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 6ea9c257..fd0a2cc9 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -110,12 +110,11 @@ void AzureBlobDownloader::DownloadChunkStreaming( namespace { -/// Pre-allocate `local_path` to `blob_size` bytes if it does not already exist -/// at the expected size. Allows concurrent chunk writes to seek without races -/// and avoids re-zeroing a file we're resuming. +/// Create (truncate to) a zero-byte file at `local_path`, throwing on failure. /// -/// Used only for the empty-blob case below; the writers' `Open` method handles -/// pre-allocation for the streaming chunked path. +/// Used only for the empty-blob case below: a 0-length blob has no chunks to +/// stream, so there is nothing for `FileWriter::Open` to pre-allocate — we just +/// materialize the empty file. The chunked path's pre-allocation lives in `Open`. void EnsureEmptyBlobFile(const std::string& local_path) { std::ofstream f(local_path, std::ios::binary); if (!f.is_open()) { @@ -177,7 +176,7 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // Persist the sidecar now, before Open() pre-allocates the data file. // IsDownloadNeeded treats "data file at full size + no sidecar" as a // completed download and skips it. The periodic save below does not run - // until save_interval chunks are done (~20 MB), so a crash between + // until save_interval chunks are done (~16 MB), so a crash between // pre-allocation and that first save would otherwise leave a full-size, // mostly-empty file with no sidecar that the next run silently accepts as // complete — serving zeros. Writing the sidecar up front upholds the diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 637d15d2..1ac88baf 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1170,11 +1170,10 @@ TEST(DownloadManagerTest, ConcurrentDownloadsOfSameModelSerialize) { } } -// With per-model locking, two *different* models must download concurrently — -// they must not serialize through a shared in-process mutex. A rendezvous inside -// the blob downloader proves both downloads occupy the critical section at the -// same time: each arrival waits for its peer, so if the two ever serialized the -// first arrival would time out waiting for a peer that can't enter yet. +// All model downloads serialize through the process-wide download_mutex_, even +// for two *different* models. A concurrency probe records the peak number of +// downloads running at once; correct serialization keeps that peak at 1 (the +// second download can't enter until the first releases the mutex). TEST(DownloadManagerTest, ModelDownloadsSerializeUnderGlobalLock) { TempDir tmpdir; DownloadManager manager(tmpdir.string(), "eastus", 64, fl::test::NullLog()); From 74a993822282ebffa5cf0f7841ce06a781b00c64 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 16:46:09 -0500 Subject: [PATCH 24/36] download: include explicitly in blob_download_state.cc WriteNative/ReadNative static_assert on std::is_trivially_copyable_v, which lives in ; the translation unit compiled only because the header was pulled in transitively. Add the explicit include (include-what-you-use), matching the rest of the codebase. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_download_state.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc index 2a3ea3e2..f3c6dd1e 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.cc +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -7,6 +7,7 @@ #include #include #include +#include namespace fl { From 9dff27b244b18309a007175ad996aaa8d089331c Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 17:14:37 -0500 Subject: [PATCH 25/36] =?UTF-8?q?download:=20address=20Copilot=20review=20?= =?UTF-8?q?=E2=80=94=20resume=20integrity=20+=20non-blocking=20lock=20wait?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues raised in the Copilot review: - Resume trusted the .dlstate sidecar on blob-layout match alone, without checking the data file is intact. A truncated or removed data file with a surviving sidecar would skip "completed" chunks (Open recreates the file zero-filled), then DeleteState removes the sidecar on "success" -- a silently corrupt file. DownloadBlob now discards the sidecar unless the data file is present at exactly blob_size, starting fresh otherwise. Adds regression test IgnoresSidecarWhenDataFileTruncated. - DownloadModel held the process-wide download_mutex_ across the cross-process WaitForDirectoryLock (up to a 3h timeout), so a cross-process wait on one model froze every unrelated in-process download. Switch to unique_lock and release it for the duration of the wait, re-acquiring (and re-checking the cache) before the download. It still serializes actual downloads; it no longer serializes the passive wait. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 18 +++++++++ sdk_v2/cpp/src/download/download_manager.cc | 9 ++++- sdk_v2/cpp/test/internal_api/download_test.cc | 37 +++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index fd0a2cc9..a671c515 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -170,6 +170,24 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, auto state = BlobDownloadState::LoadState(blob_name, local_path, blob_size, static_cast(kChunkSize), num_chunks, logger_); + if (state) { + // Only trust the sidecar if the data file it describes is actually on disk + // at full size. If the data file was truncated or removed (e.g. an external + // cleanup) while the sidecar survived, the chunks it marks complete are gone: + // we would skip re-downloading them, Open() would recreate the file + // zero-filled, and the result would be a silently corrupt file. Discard the + // stale state and start fresh. + std::error_code data_ec; + auto data_size = std::filesystem::file_size(local_path, data_ec); + if (data_ec || data_size != static_cast(blob_size)) { + if (logger_) { + logger_->Log(LogLevel::Information, + "Resume sidecar for '" + local_path + + "' has no matching full-size data file; starting fresh"); + } + state.reset(); + } + } if (!state) { state = BlobDownloadState::CreateNew(blob_name, local_path, blob_size, static_cast(kChunkSize), num_chunks); diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index e30f3dab..7d2b0502 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -243,7 +243,7 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, // gets the full network and disk instead of competing with another download. // The cross-process file lock taken below extends the guarantee across every // process and app that shares this cache directory. - std::lock_guard download_guard(download_mutex_); + std::unique_lock download_guard(download_mutex_); auto model_path = ComputeModelPath(info); // Fast path: serve the cache without taking the cross-process lock. @@ -284,7 +284,14 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, logger_.Log(LogLevel::Information, "Model download is being performed by another process. Waiting on lock at '" + model_path + "'..."); + // Don't hold the in-process download mutex while blocking on the cross-process + // lock: that wait can last minutes to hours (another process is downloading), + // and freezing every unrelated in-process model download for that long is far + // worse than the bandwidth contention this mutex exists to prevent. Release it + // for the wait and re-acquire before the cache re-check + download below. + download_guard.unlock(); lock = WaitForDirectoryLock(model_path, cancel_pred, &logger_); + download_guard.lock(); } // Another process may have just completed the download we were waiting on. diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 1ac88baf..8362847c 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1589,6 +1589,43 @@ TEST(AzureBlobDownloaderResumeTest, SkipsChunksAlreadyMarkedCompleteInSidecar) { EXPECT_FALSE(fs::exists(BlobDownloadState::GetStateFilePath(local))); } +TEST(AzureBlobDownloaderResumeTest, IgnoresSidecarWhenDataFileTruncated) { + // A valid sidecar marks chunks complete, but the data file was truncated (e.g. + // an external cleanup) while the sidecar survived. The downloader must not trust + // the sidecar — those "completed" chunks are no longer on disk — and must + // re-download every chunk rather than leave them as zeros. + TempDir tmpdir; + auto local = tmpdir.path() / "blob.bin"; + + constexpr int32_t kChunkSize = 2 * 1024 * 1024; + constexpr int32_t kNumChunks = 10; + constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; + + // Sidecar claims chunks 0..4 are done. + { + auto state = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); + for (int32_t i = 0; i < 5; ++i) { + state->MarkChunkComplete(i); + } + state->SaveState(); + } + // ...but the data file is truncated, far smaller than kBlobSize. + { + std::ofstream f(local, std::ios::binary | std::ios::trunc); + f << "truncated"; + } + + FakeChunkAzureDownloader d; + d.blob_size = kBlobSize; + + d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/2); + + // The stale sidecar is ignored: every chunk is downloaded, not just 5..9. + EXPECT_EQ(d.chunk_call_count.load(), kNumChunks); + EXPECT_FALSE(fs::exists(BlobDownloadState::GetStateFilePath(local))); + EXPECT_EQ(fs::file_size(local), static_cast(kBlobSize)); +} + TEST(AzureBlobDownloaderResumeTest, DownloadsAllChunksWhenSidecarMissing) { TempDir tmpdir; auto local = tmpdir.path() / "blob.bin"; From defc6ed76bac8b9f665e50583b521e5cf1a08318 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 17:40:30 -0500 Subject: [PATCH 26/36] test(download): fix misleading comment in PersistsSidecarOnChunkFailure The comment described stripping the failure and re-running the download, but the test only loads the persisted sidecar and asserts a partial completed_count -- there is no second DownloadBlob call. Reword it to describe what the test actually verifies: that the sidecar records partial progress for a future resume. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/test/internal_api/download_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 8362847c..7ba846d7 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1676,9 +1676,9 @@ TEST(AzureBlobDownloaderResumeTest, PersistsSidecarOnChunkFailure) { // The sidecar should be persisted so a subsequent call can resume. EXPECT_TRUE(fs::exists(BlobDownloadState::GetStateFilePath(local))); - // On resume with the same offset blocked, we should still hit the failure - // but skip already-completed chunks. Strip the failure and rerun: the - // downloader should only process the chunks that weren't completed. + // Verify the persisted sidecar records partial progress — some chunks completed + // before the failure, but not all — so a future resume can skip the ones already + // done and re-fetch only the rest. auto retry_state = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); ASSERT_NE(retry_state, nullptr); EXPECT_GT(retry_state->completed_count, 0); From cc99205293df877e412de07f1b654d2c4020e845 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 18:21:01 -0500 Subject: [PATCH 27/36] fix(download): fail loud if the initial resume sidecar can't be persisted The "pre-allocated but unfinished <=> sidecar present" invariant that lets IsDownloadNeeded distinguish a complete file from an in-progress one depends on the initial SaveState landing before FileWriter::Open() pre-allocates the full-size data file. That first save was best-effort/void: if it silently failed but Open() still sparse-allocated the file, a crash before the next periodic save left a full-size, mostly-empty file with no sidecar that the next run accepts as complete -- serving zeros. SaveState now returns bool. Periodic and finalization saves stay best-effort (return value ignored); the initial pre-allocation save treats false as fatal and throws before Open() runs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_download_state.cc | 8 +++++--- sdk_v2/cpp/src/download/blob_download_state.h | 11 +++++++---- sdk_v2/cpp/src/download/blob_downloader.cc | 9 +++++++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc index f3c6dd1e..9bed673e 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.cc +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -278,7 +278,7 @@ std::vector BlobDownloadState::GetPendingChunks() const { return pending; } -void BlobDownloadState::SaveState(ILogger* logger) { +bool BlobDownloadState::SaveState(ILogger* logger) { // Advance bitmap_byte_aligned_start past any words that are now all 1s, so // the next save serializes only the unfinished tail. // Find the first word that is not fully complete. Every word below it is @@ -335,7 +335,7 @@ void BlobDownloadState::SaveState(ILogger* logger) { if (logger) { logger->Log(LogLevel::Warning, "Failed to open download state tmp file: " + tmp_path.string()); } - return; + return false; } out.write(kMagic, 4); WriteNative(out, kVersion); @@ -355,7 +355,7 @@ void BlobDownloadState::SaveState(ILogger* logger) { if (logger) { logger->Log(LogLevel::Warning, "Failed to write download state tmp file: " + tmp_path.string()); } - return; + return false; } } @@ -377,7 +377,9 @@ void BlobDownloadState::SaveState(ILogger* logger) { state_path.string() + " (" + ec.message() + "); previous state retained, will retry on next save"); } + return false; } + return true; } void BlobDownloadState::DeleteState(const std::filesystem::path& local_file_path, ILogger* logger) { diff --git a/sdk_v2/cpp/src/download/blob_download_state.h b/sdk_v2/cpp/src/download/blob_download_state.h index 8a8ae07c..7512186a 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.h +++ b/sdk_v2/cpp/src/download/blob_download_state.h @@ -95,10 +95,13 @@ class BlobDownloadState { /// Enumerate chunks in [0, total_chunks) that are not yet complete. std::vector GetPendingChunks() const; - /// Atomically write current state to `.dlstate`. Best-effort: - /// I/O errors are logged but not thrown — the next save will retry, and a - /// failed save just means the next resume will replay a few chunks. - void SaveState(ILogger* logger = nullptr); + /// Atomically write current state to `.dlstate`. Returns true + /// on success; on failure it logs and returns false rather than throwing. Most + /// callers treat a failed periodic save as best-effort (the next save retries, + /// and resume just replays a few chunks); the initial pre-allocation save + /// treats false as fatal, since the "pre-allocated <=> sidecar present" + /// invariant depends on it. + bool SaveState(ILogger* logger = nullptr); /// Remove the sidecar; called on successful completion. static void DeleteState(const std::filesystem::path& local_file_path, diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index a671c515..1bb0564f 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -198,8 +198,13 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // pre-allocation and that first save would otherwise leave a full-size, // mostly-empty file with no sidecar that the next run silently accepts as // complete — serving zeros. Writing the sidecar up front upholds the - // invariant "pre-allocated but unfinished <=> sidecar present". - state->SaveState(logger_); + // invariant "pre-allocated but unfinished <=> sidecar present" — so if it + // can't be persisted we abort here, before Open() pre-allocates, rather + // than risk a full-size file a later run reads as complete. + if (!state->SaveState(logger_)) { + FL_THROW(FOUNDRY_LOCAL_ERROR_INTERNAL, + "failed to persist initial download state for '" + local_path + "'"); + } } // Track cumulative bytes for progress reporting; seed with bytes already From 06a92cc040f18f2a6d597c2d78f34f0733121944 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 18:21:26 -0500 Subject: [PATCH 28/36] test(download): add cross-process-lock wait-then-serve-cached test Exercises the cross-process file-lock branch of DownloadModel that the in-process concurrency tests never reach: a second process (simulated by holding the lock directly) is mid-download on the same model directory. DownloadModel must observe the held lock, block in WaitForDirectoryLock without holding the in-process download mutex, and once the lock releases and inference_model.json is present, return the cached result via the post-lock recheck without re-downloading anything. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/test/internal_api/download_test.cc | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index 7ba846d7..eaaff606 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1283,6 +1283,56 @@ TEST(DownloadManagerTest, IsModelCachedReturnsFalseWhenPathIsRegularFile) { }); } +TEST(DownloadManagerTest, WaitsForCrossProcessLockThenServesCachedResult) { + TempDir tmpdir; + DownloadManager manager(tmpdir.string(), "eastus", 64, fl::test::NullLog()); + + // Registry + downloader that must stay untouched if the post-lock recheck works. + auto registry = std::make_unique( + "eastus", fl::test::NullLog(), std::make_unique(fl::test::NullLog(), false), + [](const std::string&) { + return MakeRegistryResponse( + R"({"blobSasUri": "https://storage.blob.core.windows.net/c?sig=test"})"); + }); + manager.SetModelRegistryClient(std::move(registry)); + + auto mock = std::make_unique(); + mock->blobs_to_return = {{"weights.bin", 100}}; // non-empty: a stray download would be visible + auto* mock_raw = mock.get(); + manager.SetBlobDownloader(std::move(mock)); + + ModelInfo info; + info.model_id = "wait-model:1"; + info.name = "wait-model"; + info.uri = "azureml://registries/test/models/wait-model/versions/1"; + info.string_properties[FOUNDRY_LOCAL_MODEL_PROP_PUBLISHER_STR] = "Pub"; + + // Simulate another process holding the model-directory lock mid-download. + auto model_dir = fs::path(tmpdir.string()) / "Pub" / "wait-model-1"; + fs::create_directories(model_dir); + auto held = CrossProcessFileLock::TryAcquireForDirectory(model_dir); + ASSERT_NE(held, nullptr); + + std::atomic done{false}; + std::string result; + std::thread worker([&] { result = manager.DownloadModel(info); done.store(true); }); + + // The call must block on the cross-process lock rather than proceed to download. + std::this_thread::sleep_for(std::chrono::milliseconds(300)); + EXPECT_FALSE(done.load()) << "DownloadModel should block while another process holds the lock"; + + // The "other process" finishes: publish inference_model.json, then release the lock. + { std::ofstream(model_dir / "inference_model.json") << "{}"; } + held.reset(); + + worker.join(); + + EXPECT_TRUE(done.load()); + EXPECT_EQ(result, model_dir.string()); + EXPECT_TRUE(mock_raw->downloaded_blobs.empty()) + << "Model became available while waiting; the post-lock recheck must skip the download"; +} + // ======================================================================== // End-to-end integration test — fetches catalog then downloads smallest model. // Disabled by default. Run with: --gtest_also_run_disabled_tests From eeb72789e81cbc3c5f5bf48b3a6ef4cca895b311 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 18:21:46 -0500 Subject: [PATCH 29/36] test(download): add DISABLED live resume-after-cancel test + fix cancel convention Adds DISABLED_DownloadFixture.ResumesPartialDownloadAfterCancel: cancels a real download once aggregate progress passes ~30% via the progress callback, then re-runs and asserts the .dlstate sidecar drove a partial resume (first reported percentage well above 0) rather than a fresh re-download. DISABLED so it only runs under the coverage/live path that downloads real artifacts. Also fixes two pre-existing DISABLED tests that returned `true` (== 1) from their progress callbacks. The download progress-callback convention is 0 = continue, non-zero = cancel, so `return true` cancelled the download immediately -- latent until this PR's linked cancellation made it bite. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/test/sdk_api/download_test.cc | 70 +++++++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/sdk_v2/cpp/test/sdk_api/download_test.cc b/sdk_v2/cpp/test/sdk_api/download_test.cc index 23c6ffcc..c72f03a5 100644 --- a/sdk_v2/cpp/test/sdk_api/download_test.cc +++ b/sdk_v2/cpp/test/sdk_api/download_test.cc @@ -77,7 +77,7 @@ TEST_F(DISABLED_DownloadFixture, RemoveAndRedownloadSmallestModel) { std::vector progress_values; target->Download([&progress_values](float pct) { progress_values.push_back(pct); - return true; // Continue downloading. + return 0; // Continue downloading (0 = continue, non-zero = cancel). }); EXPECT_TRUE(target->IsCached()) @@ -122,7 +122,7 @@ TEST_F(DISABLED_DownloadFixture, DownloadAlreadyCachedModelIsNoOp) { std::vector progress_values; model->Download([&progress_values](float pct) { progress_values.push_back(pct); - return true; + return 0; // 0 = continue, non-zero = cancel. }); EXPECT_TRUE(model->IsCached()); @@ -131,3 +131,69 @@ TEST_F(DISABLED_DownloadFixture, DownloadAlreadyCachedModelIsNoOp) { ASSERT_FALSE(progress_values.empty()); EXPECT_FLOAT_EQ(progress_values.back(), 100.0f); } + +TEST_F(DISABLED_DownloadFixture, ResumesPartialDownloadAfterCancel) { + // Live resume check: cancel a real download partway, then re-run and confirm the + // .dlstate sidecar drove a *partial* resume rather than a fresh re-download. + // Pick the smallest uncached CPU model (same selection as the redownload test). + foundry_local::IModel* target = nullptr; + int64_t target_size = std::numeric_limits::max(); + for (const auto& m : model_list()) { + if (m->IsLoaded()) { + continue; + } + for (const auto& v : m->GetVariants()) { + auto vi = v->GetInfo(); + if (vi.DeviceType() != FOUNDRY_LOCAL_DEVICE_CPU) { + continue; + } + int64_t size = vi.FilesizeMb().value_or(0); + if (size > 0 && size < target_size) { + target_size = size; + m->SelectVariant(*v); + target = m.get(); + } + } + } + ASSERT_NE(target, nullptr) << "No unloaded CPU model found in catalog"; + + if (target->IsCached()) { + target->RemoveFromCache(); + } + ASSERT_FALSE(target->IsCached()); + + // First attempt: cancel once aggregate progress passes ~30%, leaving partial + // data plus its .dlstate sidecar(s) on disk. The progress callback returns 0 to + // continue and non-zero to cancel. + float cancel_pct = -1.0f; + bool threw = false; + try { + target->Download([&cancel_pct](float pct) -> int { + if (pct >= 30.0f) { + cancel_pct = pct; + return 1; // cancel + } + return 0; // continue + }); + } catch (const std::exception&) { + threw = true; // cancellation surfaces as a thrown error + } + ASSERT_GE(cancel_pct, 30.0f) << "download never reached the cancel threshold"; + EXPECT_TRUE(threw) << "a cancelled download should surface an error"; + EXPECT_FALSE(target->IsCached()) << "a cancelled download must not report cached"; + + // Second attempt: let it finish, capturing every reported percentage. If the + // sidecar drove a partial resume, the first percentage reflects the bytes + // already on disk (well above 0) rather than restarting from scratch. + std::vector resume_progress; + target->Download([&resume_progress](float pct) -> int { + resume_progress.push_back(pct); + return 0; + }); + + ASSERT_FALSE(resume_progress.empty()); + EXPECT_GT(resume_progress.front(), 0.0f) + << "resume should start from the partial progress already on disk, not 0%"; + EXPECT_FLOAT_EQ(resume_progress.back(), 100.0f); + EXPECT_TRUE(target->IsCached()); +} From 4721485214f829bdbac6d8daef2777da115e9263 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 18:46:13 -0500 Subject: [PATCH 30/36] test(download): skip 0% heartbeat in resume assertion DownloadManager::DownloadModel emits a 0% heartbeat (progress_cb(0.0f)) before the transfer starts and Model::Download forwards it unchanged, so the first captured sample in the live resume test was always that heartbeat -- the EXPECT_GT(front, 0) assertion would always fail when the DISABLED test runs. Skip leading zeros and assert the first *real* (non-zero) sample lands well above a fresh-download first-chunk fraction, validating the sidecar-driven partial resume. Internal-API tests that drive DownloadBlobsToDirectory directly bypass this heartbeat, which is why they were unaffected. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/test/sdk_api/download_test.cc | 29 ++++++++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/sdk_v2/cpp/test/sdk_api/download_test.cc b/sdk_v2/cpp/test/sdk_api/download_test.cc index c72f03a5..af0592d5 100644 --- a/sdk_v2/cpp/test/sdk_api/download_test.cc +++ b/sdk_v2/cpp/test/sdk_api/download_test.cc @@ -182,9 +182,7 @@ TEST_F(DISABLED_DownloadFixture, ResumesPartialDownloadAfterCancel) { EXPECT_TRUE(threw) << "a cancelled download should surface an error"; EXPECT_FALSE(target->IsCached()) << "a cancelled download must not report cached"; - // Second attempt: let it finish, capturing every reported percentage. If the - // sidecar drove a partial resume, the first percentage reflects the bytes - // already on disk (well above 0) rather than restarting from scratch. + // Second attempt: let it finish, capturing every reported percentage. std::vector resume_progress; target->Download([&resume_progress](float pct) -> int { resume_progress.push_back(pct); @@ -192,8 +190,29 @@ TEST_F(DISABLED_DownloadFixture, ResumesPartialDownloadAfterCancel) { }); ASSERT_FALSE(resume_progress.empty()); - EXPECT_GT(resume_progress.front(), 0.0f) - << "resume should start from the partial progress already on disk, not 0%"; + + // DownloadManager::DownloadModel always emits a 0% heartbeat (progress_cb(0.0f)) + // before the transfer starts, which Model::Download forwards unchanged, so + // resume_progress.front() is that heartbeat -- not the resumed percentage. Skip + // the leading zero(s); the first non-zero sample is the initial on-disk + // reflection DownloadBlobsToDirectory emits from the bytes already present. + float first_real = 0.0f; + for (float pct : resume_progress) { + if (pct > 0.0f) { + first_real = pct; + break; + } + } + ASSERT_GT(first_real, 0.0f) << "resume produced no real progress past the 0% heartbeat"; + + // A sidecar-driven partial resume reports its first real progress at roughly the + // bytes already on disk (~the cancel point), so it lands well above the tiny + // first-chunk fraction a fresh re-download would start from. The threshold is + // intentionally lenient (sidecar saves are bounded at ~16 MB granularity); tune + // it if a different model than the smallest CPU one is selected. + constexpr float kMinResumeProgressPct = 10.0f; + EXPECT_GE(first_real, kMinResumeProgressPct) + << "first real progress " << first_real << "% looks like a fresh re-download, not a resume"; EXPECT_FLOAT_EQ(resume_progress.back(), 100.0f); EXPECT_TRUE(target->IsCached()); } From e4f9dfca7cdc2f94336c0a27b032c26ed8089bd3 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 19:21:10 -0500 Subject: [PATCH 31/36] fix(download): drain in-flight peers promptly on user cancel The per-chunk byte accounting and bytes_written_cb call sat just outside the try/catch that wraps DownloadChunkStreaming. On user cancellation bytes_written_cb (per_chunk_progress) sets the shared cancel flag and throws, so that throw unwound straight out of the worker without reaching the catch's azure_ctx.Cancel(). Peers blocked mid-chunk -- the real DownloadChunkStreaming relies on the Azure context for interruption and does not poll the flag between reads -- only noticed the cancel at their next top-of-loop check, i.e. after finishing their current 2 MB chunk. The chunk-failure path already cancelled promptly from its catch block. Move the accounting and callback inside the try so a user-cancel throw routes through the same catch and triggers azure_ctx.Cancel() immediately, matching the failure path. Adds UserCancelDrainsInFlightPeersFast, which parks every peer in its chunk before the cancel fires so only the Azure-context cascade can drain them (verified: ~120 ms with the fix, ~7.3 s without). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 15 +++-- sdk_v2/cpp/test/internal_api/download_test.cc | 62 +++++++++++++++++++ 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 1bb0564f..aaef73ac 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -293,6 +293,16 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, try { DownloadChunkStreaming(chunk_ctx, offset, size, scratch, sink); + + // Account for this chunk and fire the progress callback within the same + // try as the download: on user cancellation bytes_written_cb throws, and + // the catch below runs azure_ctx.Cancel() so peers blocked mid-chunk are + // interrupted immediately rather than only noticing the cancel flag when + // they finish their current chunk. + int64_t new_total = bytes_completed.fetch_add(size, std::memory_order_relaxed) + size; + if (bytes_written_cb) { + bytes_written_cb(new_total); + } } catch (...) { std::lock_guard lock(error_mutex); if (!first_error) { @@ -304,11 +314,6 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, return; } - int64_t new_total = bytes_completed.fetch_add(size, std::memory_order_relaxed) + size; - if (bytes_written_cb) { - bytes_written_cb(new_total); - } - bool should_save = false; { std::lock_guard lock(state->mutex()); diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index eaaff606..357339cb 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -1844,3 +1844,65 @@ TEST(AzureBlobDownloaderResumeTest, ChunkFailureCancelsInFlightPeersFast) { << "Cancel-cascade should drain in-flight peers fast; took " << elapsed_ms << " ms"; } +TEST(AzureBlobDownloaderResumeTest, UserCancelDrainsInFlightPeersFast) { + TempDir tmpdir; + auto local = tmpdir.path() / "blob.bin"; + + constexpr int32_t kChunkSize = 2 * 1024 * 1024; + constexpr int32_t kNumChunks = 10; + constexpr int64_t kBlobSize = static_cast(kNumChunks) * kChunkSize; + + FakeChunkAzureDownloader d; + d.blob_size = kBlobSize; + + // Chunk 0 is the cancel trigger; chunks 1..9 are the in-flight peers. The peers + // announce themselves and then sleep up to 5 s in 50-ms slices, polling the + // Azure-context cancellation. Chunk 0 waits until every peer is parked in that + // sleep loop before it completes, so no peer is at the worker top-of-loop to + // observe the shared cancel flag directly -- the only way they can exit + // promptly is the azure_ctx.Cancel() driven by the user-cancel throw. + std::atomic peers_parked{0}; + d.chunk_hook = [&peers_parked](int64_t offset, int64_t size, + const std::function& sink, + const std::function& is_cancelled) { + if (offset == 0) { + for (int i = 0; i < 400 && peers_parked.load() < kNumChunks - 1; ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + } + std::vector buf(static_cast(size), 0); + sink(buf.data(), buf.size()); + return; + } + peers_parked.fetch_add(1); + for (int i = 0; i < 100; ++i) { + if (is_cancelled()) { + FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "cancelled mid-chunk"); + } + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + std::vector buf(static_cast(size), 0); + sink(buf.data(), buf.size()); + }; + + // Mirror per_chunk_progress: the first progress callback cancels by setting the + // shared flag and throwing. + std::atomic cancelled{false}; + BlobBytesWrittenFn cancel_on_first_progress = [&cancelled](int64_t /*bytes*/) { + cancelled.store(true, std::memory_order_relaxed); + FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled by user callback return value"); + }; + + auto start = std::chrono::steady_clock::now(); + EXPECT_THROW(d.DownloadBlob(/*sas_uri=*/"", "blob", local.string(), /*max_concurrency=*/kNumChunks, + cancel_on_first_progress, &cancelled), + fl::Exception); + auto elapsed = std::chrono::steady_clock::now() - start; + auto elapsed_ms = std::chrono::duration_cast(elapsed).count(); + + // Without routing the user-cancel throw through azure_ctx.Cancel(), the parked + // peers would each sleep their full ~5 s before noticing. With it, they exit + // within a slice or two (well under 2 s). + EXPECT_LT(elapsed_ms, 2000) + << "User-cancel should drain in-flight peers fast; took " << elapsed_ms << " ms"; +} + From 0404ff2238ae5db3db7ef0f80cac4f3d49f6c06d Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Sat, 20 Jun 2026 10:32:17 +1000 Subject: [PATCH 32/36] Make some things references Add unit test --- .../cpp/src/download/blob_download_state.cc | 80 +++++++------------ sdk_v2/cpp/src/download/blob_download_state.h | 10 ++- sdk_v2/cpp/src/download/blob_downloader.cc | 39 ++++----- sdk_v2/cpp/src/download/blob_downloader.h | 7 +- .../src/download/cross_process_file_lock.cc | 14 ++-- .../src/download/cross_process_file_lock.h | 11 +-- sdk_v2/cpp/src/download/download_manager.cc | 6 +- .../internal_api/blob_download_state_test.cc | 43 +++++----- .../cross_process_file_lock_test.cc | 35 ++++---- sdk_v2/cpp/test/internal_api/download_test.cc | 77 ++++++++++++++++-- 10 files changed, 188 insertions(+), 134 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc index 9bed673e..adcde2a7 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.cc +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -100,7 +100,7 @@ std::unique_ptr BlobDownloadState::LoadState(std::string blob int64_t expected_blob_size, int32_t expected_chunk_size, int32_t expected_total_chunks, - ILogger* logger) { + ILogger& logger) { auto state_path = GetStateFilePath(local_file_path); std::error_code ec; if (!std::filesystem::exists(state_path, ec)) { @@ -109,9 +109,7 @@ std::unique_ptr BlobDownloadState::LoadState(std::string blob std::ifstream in(state_path, std::ios::binary); if (!in) { - if (logger) { - logger->Log(LogLevel::Warning, "Could not open download state file: " + state_path.string()); - } + logger.Log(LogLevel::Warning, "Could not open download state file: " + state_path.string()); return nullptr; } @@ -119,10 +117,8 @@ std::unique_ptr BlobDownloadState::LoadState(std::string blob in.read(magic, 4); uint8_t version = 0; if (!in || std::memcmp(magic, kMagic, 4) != 0 || !ReadNative(in, version) || version != kVersion) { - if (logger) { - logger->Log(LogLevel::Warning, - "Download state file " + state_path.string() + " has unexpected magic/version; ignoring"); - } + logger.Log(LogLevel::Warning, + "Download state file " + state_path.string() + " has unexpected magic/version; ignoring"); return nullptr; } @@ -137,29 +133,23 @@ std::unique_ptr BlobDownloadState::LoadState(std::string blob if (!ReadNative(in, blob_size) || !ReadNative(in, chunk_size) || !ReadNative(in, total_chunks) || !ReadNative(in, bitmap_byte_aligned_start) || !ReadNative(in, highest_completed_chunk) || !ReadNative(in, completed_count) || !ReadNative(in, last_modified_unix_ms) || !ReadNative(in, trunc_len)) { - if (logger) { - logger->Log(LogLevel::Warning, "Download state header truncated: " + state_path.string()); - } + logger.Log(LogLevel::Warning, "Download state header truncated: " + state_path.string()); return nullptr; } // Sanity / compatibility checks. if (blob_size != expected_blob_size || chunk_size != expected_chunk_size || total_chunks != expected_total_chunks) { - if (logger) { - logger->Log(LogLevel::Information, - "Download state for " + state_path.string() + - " is incompatible with current blob layout; starting fresh"); - } + logger.Log(LogLevel::Information, + "Download state for " + state_path.string() + + " is incompatible with current blob layout; starting fresh"); return nullptr; } if (bitmap_byte_aligned_start < 0 || bitmap_byte_aligned_start % 8 != 0 || bitmap_byte_aligned_start > total_chunks || completed_count < 0 || completed_count > total_chunks || highest_completed_chunk < -1 || highest_completed_chunk >= total_chunks) { - if (logger) { - logger->Log(LogLevel::Warning, "Download state header values out of range: " + state_path.string()); - } + logger.Log(LogLevel::Warning, "Download state header values out of range: " + state_path.string()); return nullptr; } @@ -186,18 +176,14 @@ std::unique_ptr BlobDownloadState::LoadState(std::string blob auto* dest = reinterpret_cast(bitmap.data()) + byte_offset; auto dest_capacity = bitmap.size() * sizeof(uint64_t) - byte_offset; if (trunc_len > dest_capacity) { - if (logger) { - logger->Log(LogLevel::Warning, - "Download state bitmap length exceeds expected capacity: " + state_path.string()); - } + logger.Log(LogLevel::Warning, + "Download state bitmap length exceeds expected capacity: " + state_path.string()); return nullptr; } in.read(reinterpret_cast(dest), trunc_len); if (!in) { - if (logger) { - logger->Log(LogLevel::Warning, - "Download state bitmap payload truncated: " + state_path.string()); - } + logger.Log(LogLevel::Warning, + "Download state bitmap payload truncated: " + state_path.string()); return nullptr; } } @@ -214,12 +200,10 @@ std::unique_ptr BlobDownloadState::LoadState(std::string blob state->last_modified_unix_ms = last_modified_unix_ms; state->full_completion_bitmap = std::move(bitmap); - if (logger) { - logger->Log(LogLevel::Information, - "Loaded download state " + state_path.string() + ": " + - std::to_string(completed_count) + "/" + std::to_string(total_chunks) + - " chunks already done"); - } + logger.Log(LogLevel::Information, + "Loaded download state " + state_path.string() + ": " + + std::to_string(completed_count) + "/" + std::to_string(total_chunks) + + " chunks already done"); return state; } @@ -278,7 +262,7 @@ std::vector BlobDownloadState::GetPendingChunks() const { return pending; } -bool BlobDownloadState::SaveState(ILogger* logger) { +bool BlobDownloadState::SaveState(ILogger& logger) { // Advance bitmap_byte_aligned_start past any words that are now all 1s, so // the next save serializes only the unfinished tail. // Find the first word that is not fully complete. Every word below it is @@ -332,9 +316,7 @@ bool BlobDownloadState::SaveState(ILogger* logger) { { std::ofstream out(tmp_path, std::ios::binary | std::ios::trunc); if (!out) { - if (logger) { - logger->Log(LogLevel::Warning, "Failed to open download state tmp file: " + tmp_path.string()); - } + logger.Log(LogLevel::Warning, "Failed to open download state tmp file: " + tmp_path.string()); return false; } out.write(kMagic, 4); @@ -352,9 +334,7 @@ bool BlobDownloadState::SaveState(ILogger* logger) { out.write(reinterpret_cast(src), trunc_len); } if (!out) { - if (logger) { - logger->Log(LogLevel::Warning, "Failed to write download state tmp file: " + tmp_path.string()); - } + logger.Log(LogLevel::Warning, "Failed to write download state tmp file: " + tmp_path.string()); return false; } } @@ -371,25 +351,23 @@ bool BlobDownloadState::SaveState(ILogger* logger) { // next SaveState call retry from the up-to-date in-memory state. std::error_code rm_ec; std::filesystem::remove(tmp_path, rm_ec); - if (logger) { - logger->Log(LogLevel::Warning, - "Failed to commit download state file: " + tmp_path.string() + " -> " + - state_path.string() + " (" + ec.message() + - "); previous state retained, will retry on next save"); - } + logger.Log(LogLevel::Warning, + "Failed to commit download state file: " + tmp_path.string() + " -> " + + state_path.string() + " (" + ec.message() + + "); previous state retained, will retry on next save"); return false; } return true; } -void BlobDownloadState::DeleteState(const std::filesystem::path& local_file_path, ILogger* logger) { +void BlobDownloadState::DeleteState(const std::filesystem::path& local_file_path, ILogger& logger) { auto state_path = GetStateFilePath(local_file_path); std::error_code ec; std::filesystem::remove(state_path, ec); - if (ec && logger) { - logger->Log(LogLevel::Warning, - "Failed to delete download state file: " + state_path.string() + " (" + - ec.message() + ")"); + if (ec) { + logger.Log(LogLevel::Warning, + "Failed to delete download state file: " + state_path.string() + " (" + + ec.message() + ")"); } } diff --git a/sdk_v2/cpp/src/download/blob_download_state.h b/sdk_v2/cpp/src/download/blob_download_state.h index 7512186a..362fed77 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.h +++ b/sdk_v2/cpp/src/download/blob_download_state.h @@ -71,12 +71,14 @@ class BlobDownloadState { /// `blob_size` / `chunk_size` / `total_chunks` (caller-provided values are /// authoritative — a mismatch means the blob has been reconfigured upstream /// and the partial download is no longer valid). + /// `logger` receives diagnostics for corrupt/incompatible state files. Required: the + /// downloader always has a logger, so there is no optional/null case to handle. static std::unique_ptr LoadState(std::string blob_name, std::filesystem::path local_file_path, int64_t expected_blob_size, int32_t expected_chunk_size, int32_t expected_total_chunks, - ILogger* logger = nullptr); + ILogger& logger); /// All chunks downloaded. bool IsComplete() const noexcept { return completed_count == total_chunks; } @@ -100,12 +102,12 @@ class BlobDownloadState { /// callers treat a failed periodic save as best-effort (the next save retries, /// and resume just replays a few chunks); the initial pre-allocation save /// treats false as fatal, since the "pre-allocated <=> sidecar present" - /// invariant depends on it. - bool SaveState(ILogger* logger = nullptr); + /// invariant depends on it. `logger` is required. + bool SaveState(ILogger& logger); /// Remove the sidecar; called on successful completion. static void DeleteState(const std::filesystem::path& local_file_path, - ILogger* logger = nullptr); + ILogger& logger); /// Mutex protecting concurrent `MarkChunkComplete` / `SaveState` calls from /// the chunk worker pool. diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index 1bb0564f..01e66eda 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -38,16 +38,20 @@ constexpr size_t kStreamingBufferBytes = 64 * 1024; // AzureBlobDownloader — real Azure Storage SDK implementation // ======================================================================== -/// Per-blob shared state passed to the protected virtuals. Cancellation is -/// observed through `azure_ctx`: the orchestrator calls `Cancel()` on it after -/// the first chunk failure or on external cancellation, which interrupts every -/// in-flight chunk read. +/// Per-blob shared state passed to the protected virtuals. Both members are +/// references to objects the orchestrator owns on the stack for the lifetime of +/// the download, so they are never null. `blob_client` is const because every +/// call routed through it (GetProperties / Download) is a const SDK operation. +/// `azure_ctx` is const here because the virtuals only *observe* cancellation +/// (IsCancelled, and handing the context to SDK reads); the orchestrator +/// initiates cancellation by calling Cancel() on the owning Context directly, +/// not through this view. struct AzureBlobDownloader::ChunkContext { - Azure::Storage::Blobs::BlobClient* blob_client; - Azure::Core::Context* azure_ctx; + const Azure::Storage::Blobs::BlobClient& blob_client; + const Azure::Core::Context& azure_ctx; }; -AzureBlobDownloader::AzureBlobDownloader(ILogger* logger) : logger_(logger) {} +AzureBlobDownloader::AzureBlobDownloader(ILogger& logger) : logger_(logger) {} std::vector AzureBlobDownloader::ListBlobs(const std::string& sas_uri) { try { @@ -71,12 +75,12 @@ std::vector AzureBlobDownloader::ListBlobs(const std::string& sas_ } int64_t AzureBlobDownloader::GetBlobSize(ChunkContext& ctx) { - auto props = ctx.blob_client->GetProperties({}, *ctx.azure_ctx).Value; + auto props = ctx.blob_client.GetProperties({}, ctx.azure_ctx).Value; return props.BlobSize; } bool AzureBlobDownloader::IsCancellationRequested(ChunkContext& ctx) { - return ctx.azure_ctx->IsCancelled(); + return ctx.azure_ctx.IsCancelled(); } void AzureBlobDownloader::DownloadChunkStreaming( @@ -84,7 +88,7 @@ void AzureBlobDownloader::DownloadChunkStreaming( const std::function& sink) { Azure::Storage::Blobs::DownloadBlobOptions range_opts; range_opts.Range = Azure::Core::Http::HttpRange{offset, size}; - auto result = ctx.blob_client->Download(range_opts, *ctx.azure_ctx); + auto result = ctx.blob_client.Download(range_opts, ctx.azure_ctx); auto& body_stream = *result.Value.BodyStream; if (scratch.size() < kStreamingBufferBytes) { @@ -93,9 +97,8 @@ void AzureBlobDownloader::DownloadChunkStreaming( int64_t remaining = size; while (remaining > 0) { - size_t to_read = - static_cast(std::min(remaining, static_cast(scratch.size()))); - size_t got = body_stream.Read(scratch.data(), to_read, *ctx.azure_ctx); + size_t to_read = static_cast(std::min(remaining, static_cast(scratch.size()))); + size_t got = body_stream.Read(scratch.data(), to_read, ctx.azure_ctx); if (got == 0) { // Zero-byte read before reaching `size` means the server closed early. // Treat as a hard error rather than silently writing a truncated chunk. @@ -152,7 +155,7 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // or by external cancellation; checked by workers between iterations. std::atomic internal_cancel{false}; - ChunkContext chunk_ctx{&blob_client, &azure_ctx}; + ChunkContext chunk_ctx{blob_client, azure_ctx}; int64_t blob_size = GetBlobSize(chunk_ctx); @@ -180,11 +183,9 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, std::error_code data_ec; auto data_size = std::filesystem::file_size(local_path, data_ec); if (data_ec || data_size != static_cast(blob_size)) { - if (logger_) { - logger_->Log(LogLevel::Information, - "Resume sidecar for '" + local_path + - "' has no matching full-size data file; starting fresh"); - } + logger_.Log(LogLevel::Information, + "Resume sidecar for '" + local_path + + "' has no matching full-size data file; starting fresh"); state.reset(); } } diff --git a/sdk_v2/cpp/src/download/blob_downloader.h b/sdk_v2/cpp/src/download/blob_downloader.h index 2137ccc2..5175d0d5 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.h +++ b/sdk_v2/cpp/src/download/blob_downloader.h @@ -72,8 +72,9 @@ class IBlobDownloader { /// large the blob or the chunk size is. class AzureBlobDownloader : public IBlobDownloader { public: - /// `logger` is used for diagnostics only (state file save/load events). May be null. - explicit AzureBlobDownloader(ILogger* logger = nullptr); + /// `logger` receives diagnostics only (state-file save/load events). It is required: + /// the orchestrator always has a logger, so there is no optional/null case to handle. + explicit AzureBlobDownloader(ILogger& logger); std::vector ListBlobs(const std::string& sas_uri) override; @@ -114,7 +115,7 @@ class AzureBlobDownloader : public IBlobDownloader { bool IsCancellationRequested(ChunkContext& ctx); private: - ILogger* logger_ = nullptr; + ILogger& logger_; }; /// High-level download function: enumerate, filter, and download all blobs from a SAS URI. diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.cc b/sdk_v2/cpp/src/download/cross_process_file_lock.cc index cf897fb1..7a031aa9 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.cc +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.cc @@ -89,19 +89,17 @@ struct CrossProcessFileLock::State { CrossProcessFileLock::CrossProcessFileLock(std::filesystem::path path, std::unique_ptr state, - ILogger* logger) + ILogger& logger) : path_(std::move(path)), state_(std::move(state)), logger_(logger) {} CrossProcessFileLock::~CrossProcessFileLock() { // Release the OS handle first so the "released" log message is accurate. state_.reset(); - if (logger_) { - logger_->Log(LogLevel::Debug, "CrossProcessFileLock released: " + path_.string()); - } + logger_.Log(LogLevel::Debug, "CrossProcessFileLock released: " + path_.string()); } std::unique_ptr CrossProcessFileLock::TryAcquireForDirectory( - const std::filesystem::path& directory, ILogger* logger) { + const std::filesystem::path& directory, ILogger& logger) { std::error_code ec; std::filesystem::create_directories(directory, ec); // Best-effort: if create_directories failed, the platform open below will @@ -186,9 +184,7 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto state = std::unique_ptr(new State{fd, lock_path}); #endif - if (logger) { - logger->Log(LogLevel::Debug, "CrossProcessFileLock acquired: " + lock_path.string()); - } + logger.Log(LogLevel::Debug, "CrossProcessFileLock acquired: " + lock_path.string()); return std::unique_ptr( new CrossProcessFileLock(std::move(lock_path), std::move(state), logger)); } @@ -196,7 +192,7 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto std::unique_ptr WaitForDirectoryLock( const std::filesystem::path& directory, const CancellationPredicate& is_cancelled, - ILogger* logger, + ILogger& logger, std::chrono::milliseconds poll_interval, std::chrono::milliseconds timeout) { auto deadline = std::chrono::steady_clock::now() + timeout; diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.h b/sdk_v2/cpp/src/download/cross_process_file_lock.h index 7efbace8..ecab1f2d 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.h +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.h @@ -20,10 +20,11 @@ class CrossProcessFileLock { public: /// Non-blocking acquisition. Returns nullptr if another process currently /// holds the lock. Creates `directory` if missing. Throws fl::Exception on - /// unexpected errors (permission denied, etc.). + /// unexpected errors (permission denied, etc.). `logger` receives acquire/ + /// release diagnostics and is required — callers always have one. static std::unique_ptr TryAcquireForDirectory( const std::filesystem::path& directory, - ILogger* logger = nullptr); + ILogger& logger); ~CrossProcessFileLock(); @@ -38,11 +39,11 @@ class CrossProcessFileLock { private: struct State; // Platform-specific; defined in the .cc. - CrossProcessFileLock(std::filesystem::path path, std::unique_ptr state, ILogger* logger); + CrossProcessFileLock(std::filesystem::path path, std::unique_ptr state, ILogger& logger); std::filesystem::path path_; std::unique_ptr state_; - ILogger* logger_; + ILogger& logger_; }; /// Returning true aborts WaitForDirectoryLock with FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED. @@ -55,7 +56,7 @@ using CancellationPredicate = std::function; std::unique_ptr WaitForDirectoryLock( const std::filesystem::path& directory, const CancellationPredicate& is_cancelled, - ILogger* logger = nullptr, + ILogger& logger, std::chrono::milliseconds poll_interval = std::chrono::milliseconds{1250}, std::chrono::milliseconds timeout = std::chrono::hours{3}); diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index 7d2b0502..1a017ba6 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -184,7 +184,7 @@ DownloadManager::DownloadManager(std::string cache_directory, std::string_view c logger_(logger), registry_client_(std::make_unique( kDefaultRegistryRegion, logger, std::make_unique(logger, !disable_region_fallback))), - blob_downloader_(std::make_unique(&logger)) {} + blob_downloader_(std::make_unique(logger)) {} DownloadManager::~DownloadManager() = default; @@ -279,7 +279,7 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, // other process to finish. return progress_cb && progress_cb(0.0f) != 0; }; - auto lock = CrossProcessFileLock::TryAcquireForDirectory(model_path, &logger_); + auto lock = CrossProcessFileLock::TryAcquireForDirectory(model_path, logger_); if (!lock) { logger_.Log(LogLevel::Information, "Model download is being performed by another process. Waiting on lock at '" + @@ -290,7 +290,7 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, // worse than the bandwidth contention this mutex exists to prevent. Release it // for the wait and re-acquire before the cache re-check + download below. download_guard.unlock(); - lock = WaitForDirectoryLock(model_path, cancel_pred, &logger_); + lock = WaitForDirectoryLock(model_path, cancel_pred, logger_); download_guard.lock(); } diff --git a/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc b/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc index 259e4a78..cb1fefbc 100644 --- a/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc +++ b/sdk_v2/cpp/test/internal_api/blob_download_state_test.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "download/blob_download_state.h" +#include "test_helpers.h" #include @@ -121,9 +122,10 @@ TEST(BlobDownloadStateTest, SaveAndLoadRoundTrip) { for (int32_t i : {0, 2, 4, 6, 8}) { s->MarkChunkComplete(i); } - s->SaveState(); + s->SaveState(fl::test::NullLog()); } - auto loaded = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); + auto loaded = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks, + fl::test::NullLog()); ASSERT_NE(loaded, nullptr); EXPECT_EQ(loaded->completed_count, 5); EXPECT_EQ(loaded->highest_completed_chunk, 8); @@ -148,13 +150,14 @@ TEST(BlobDownloadStateTest, SaveStateAdvancesBitmapByteAlignedStart) { for (int32_t i = 0; i < 80; ++i) { s->MarkChunkComplete(i); } - s->SaveState(); + s->SaveState(fl::test::NullLog()); // 64 bits = 1 full word; next 16 bits in word 1. Aligned start lands on // 80 (multiple of 8). EXPECT_EQ(s->bitmap_byte_aligned_start, 80); // Reload and verify the implicit prefix is still considered complete. - auto loaded = BlobDownloadState::LoadState("blob", local, kBigBlobSize, kChunkSize, kBigNumChunks); + auto loaded = BlobDownloadState::LoadState("blob", local, kBigBlobSize, kChunkSize, kBigNumChunks, + fl::test::NullLog()); ASSERT_NE(loaded, nullptr); for (int32_t i = 0; i < 80; ++i) { EXPECT_TRUE(loaded->IsChunkComplete(i)); @@ -182,7 +185,7 @@ TEST(BlobDownloadStateTest, SaveStateFromUnalignedStartDoesNotMarkPendingComplet for (int32_t i = 0; i < 8; ++i) { s->MarkChunkComplete(i); } - s->SaveState(); + s->SaveState(fl::test::NullLog()); EXPECT_EQ(s->bitmap_byte_aligned_start, 8); // Extend the contiguous prefix across the word boundary: chunks 0..64 done, @@ -190,13 +193,14 @@ TEST(BlobDownloadStateTest, SaveStateFromUnalignedStartDoesNotMarkPendingComplet for (int32_t i = 8; i <= 64; ++i) { s->MarkChunkComplete(i); } - s->SaveState(); + s->SaveState(fl::test::NullLog()); // Must round down to 64 (the byte boundary at/below the first pending chunk), // never overshoot to 72. EXPECT_EQ(s->bitmap_byte_aligned_start, 64); // Reload and prove chunks 65..71 (never downloaded) are still pending. - auto loaded = BlobDownloadState::LoadState("blob", local, kBigBlobSize, kChunkSize, kBigNumChunks); + auto loaded = BlobDownloadState::LoadState("blob", local, kBigBlobSize, kChunkSize, kBigNumChunks, + fl::test::NullLog()); ASSERT_NE(loaded, nullptr); EXPECT_TRUE(loaded->IsChunkComplete(64)); for (int32_t i = 65; i < 72; ++i) { @@ -210,7 +214,7 @@ TEST(BlobDownloadStateTest, SaveStateFromUnalignedStartDoesNotMarkPendingComplet TEST(BlobDownloadStateTest, LoadStateReturnsNullWhenFileMissing) { TempDir d; auto local = d.path() / "blob.bin"; - auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks, fl::test::NullLog()); EXPECT_EQ(s, nullptr); } @@ -224,7 +228,7 @@ TEST(BlobDownloadStateTest, LoadStateRejectsBadMagic) { f.put(static_cast(0)); // version for (int i = 0; i < 64; ++i) f.put(0); // padding } - auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks, fl::test::NullLog()); EXPECT_EQ(s, nullptr); } @@ -234,10 +238,11 @@ TEST(BlobDownloadStateTest, LoadStateRejectsBlobSizeMismatch) { { auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); s->MarkChunkComplete(0); - s->SaveState(); + s->SaveState(fl::test::NullLog()); } // Reload with a *different* expected blob_size — should be rejected. - auto s = BlobDownloadState::LoadState("blob", local, kBlobSize + 1, kChunkSize, kNumChunks); + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize + 1, kChunkSize, kNumChunks, + fl::test::NullLog()); EXPECT_EQ(s, nullptr); } @@ -247,9 +252,10 @@ TEST(BlobDownloadStateTest, LoadStateRejectsChunkSizeMismatch) { { auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); s->MarkChunkComplete(0); - s->SaveState(); + s->SaveState(fl::test::NullLog()); } - auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize + 1, kNumChunks); + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize + 1, kNumChunks, + fl::test::NullLog()); EXPECT_EQ(s, nullptr); } @@ -259,9 +265,10 @@ TEST(BlobDownloadStateTest, LoadStateRejectsTotalChunksMismatch) { { auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); s->MarkChunkComplete(0); - s->SaveState(); + s->SaveState(fl::test::NullLog()); } - auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks + 1); + auto s = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks + 1, + fl::test::NullLog()); EXPECT_EQ(s, nullptr); } @@ -271,13 +278,13 @@ TEST(BlobDownloadStateTest, DeleteStateRemovesSidecar) { { auto s = BlobDownloadState::CreateNew("blob", local, kBlobSize, kChunkSize, kNumChunks); s->MarkChunkComplete(0); - s->SaveState(); + s->SaveState(fl::test::NullLog()); } EXPECT_TRUE(fs::exists(BlobDownloadState::GetStateFilePath(local))); - BlobDownloadState::DeleteState(local); + BlobDownloadState::DeleteState(local, fl::test::NullLog()); EXPECT_FALSE(fs::exists(BlobDownloadState::GetStateFilePath(local))); // Re-deletion when the file is already absent is a no-op (best-effort). - BlobDownloadState::DeleteState(local); + BlobDownloadState::DeleteState(local, fl::test::NullLog()); } TEST(BlobDownloadStateTest, IsCompleteFlipsTrueWhenAllChunksMarked) { diff --git a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc index d140fb08..42c208f3 100644 --- a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc +++ b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "download/cross_process_file_lock.h" +#include "test_helpers.h" #include "exception.h" @@ -55,7 +56,7 @@ class TempDir { TEST(CrossProcessFileLockTest, TryAcquireSucceedsForFreshDirectory) { TempDir dir; - auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); ASSERT_NE(lock, nullptr); EXPECT_TRUE(fs::exists(lock->path())); @@ -68,7 +69,7 @@ TEST(CrossProcessFileLockTest, ReleaseOnDestructionRemovesLockFile) { fs::path lock_file; { - auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); ASSERT_NE(lock, nullptr); lock_file = lock->path(); EXPECT_TRUE(fs::exists(lock_file)); @@ -81,20 +82,20 @@ TEST(CrossProcessFileLockTest, ReleaseOnDestructionRemovesLockFile) { TEST(CrossProcessFileLockTest, SecondAcquireReturnsNullWhileFirstIsHeld) { TempDir dir; - auto first = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto first = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); ASSERT_NE(first, nullptr); - auto second = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto second = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); EXPECT_EQ(second, nullptr); } TEST(CrossProcessFileLockTest, ReacquireSucceedsAfterRelease) { TempDir dir; { - auto first = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto first = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); ASSERT_NE(first, nullptr); } - auto reacquired = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto reacquired = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); EXPECT_NE(reacquired, nullptr); } @@ -104,7 +105,7 @@ TEST(CrossProcessFileLockTest, CreatesDirectoryIfMissing) { ASSERT_FALSE(fs::exists(missing)); - auto lock = CrossProcessFileLock::TryAcquireForDirectory(missing); + auto lock = CrossProcessFileLock::TryAcquireForDirectory(missing, fl::test::NullLog()); ASSERT_NE(lock, nullptr); EXPECT_TRUE(fs::is_directory(missing)); @@ -115,7 +116,7 @@ TEST(CrossProcessFileLockTest, WaitForLockReturnsImmediatelyWhenAvailable) { TempDir dir; auto start = std::chrono::steady_clock::now(); - auto lock = WaitForDirectoryLock(dir.path(), []() { return false; }); + auto lock = WaitForDirectoryLock(dir.path(), []() { return false; }, fl::test::NullLog()); auto elapsed = std::chrono::steady_clock::now() - start; ASSERT_NE(lock, nullptr); @@ -125,7 +126,7 @@ TEST(CrossProcessFileLockTest, WaitForLockReturnsImmediatelyWhenAvailable) { TEST(CrossProcessFileLockTest, WaitForLockAcquiresAfterHolderReleases) { TempDir dir; - auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); ASSERT_NE(holder, nullptr); // Release the holder after a short delay on another thread. @@ -137,7 +138,7 @@ TEST(CrossProcessFileLockTest, WaitForLockAcquiresAfterHolderReleases) { auto start = std::chrono::steady_clock::now(); auto lock = WaitForDirectoryLock(dir.path(), []() { return false; }, - /*logger=*/nullptr, + /*logger=*/fl::test::NullLog(), /*poll_interval=*/std::chrono::milliseconds(100), /*timeout=*/std::chrono::seconds(10)); auto elapsed = std::chrono::steady_clock::now() - start; @@ -150,7 +151,7 @@ TEST(CrossProcessFileLockTest, WaitForLockAcquiresAfterHolderReleases) { TEST(CrossProcessFileLockTest, WaitForLockThrowsOnCancellation) { TempDir dir; - auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); ASSERT_NE(holder, nullptr); std::atomic cancel{false}; @@ -162,7 +163,7 @@ TEST(CrossProcessFileLockTest, WaitForLockThrowsOnCancellation) { try { (void)WaitForDirectoryLock(dir.path(), [&cancel]() { return cancel.load(); }, - /*logger=*/nullptr, + /*logger=*/fl::test::NullLog(), /*poll_interval=*/std::chrono::milliseconds(100), /*timeout=*/std::chrono::seconds(10)); canceller.join(); @@ -175,13 +176,13 @@ TEST(CrossProcessFileLockTest, WaitForLockThrowsOnCancellation) { TEST(CrossProcessFileLockTest, WaitForLockThrowsOnTimeout) { TempDir dir; - auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto holder = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); ASSERT_NE(holder, nullptr); try { (void)WaitForDirectoryLock(dir.path(), []() { return false; }, - /*logger=*/nullptr, + /*logger=*/fl::test::NullLog(), /*poll_interval=*/std::chrono::milliseconds(50), /*timeout=*/std::chrono::milliseconds(200)); FAIL() << "expected fl::Exception(FOUNDRY_LOCAL_ERROR_INTERNAL)"; @@ -213,7 +214,7 @@ TEST(CrossProcessFileLockTest, HeldAcrossProcessesAndReleasedWhenHolderExits) { // still holding it. _exit skips C++/gtest teardown — correct for a forked // child — so the lock's destructor never runs and the file is left behind; // the kernel still drops the flock on process exit. - auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto lock = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); if (lock == nullptr) { _exit(2); } @@ -236,7 +237,7 @@ TEST(CrossProcessFileLockTest, HeldAcrossProcessesAndReleasedWhenHolderExits) { ASSERT_TRUE(child_acquired) << "child process never acquired the lock"; // A different process holds it — we must be locked out. - EXPECT_EQ(CrossProcessFileLock::TryAcquireForDirectory(dir.path()), nullptr); + EXPECT_EQ(CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()), nullptr); // Release the child and reap it. { std::ofstream(release_signal).put('x'); } @@ -247,7 +248,7 @@ TEST(CrossProcessFileLockTest, HeldAcrossProcessesAndReleasedWhenHolderExits) { // The holder process is gone: the kernel released its flock even though the // lock file is still on disk, so the next acquirer simply re-locks it. - auto reacquired = CrossProcessFileLock::TryAcquireForDirectory(dir.path()); + auto reacquired = CrossProcessFileLock::TryAcquireForDirectory(dir.path(), fl::test::NullLog()); EXPECT_NE(reacquired, nullptr) << "lock not released after the holder process exited"; } #endif // !_WIN32 diff --git a/sdk_v2/cpp/test/internal_api/download_test.cc b/sdk_v2/cpp/test/internal_api/download_test.cc index eaaff606..37f62dc9 100644 --- a/sdk_v2/cpp/test/internal_api/download_test.cc +++ b/sdk_v2/cpp/test/internal_api/download_test.cc @@ -10,6 +10,7 @@ #include "catalog/azure_catalog_models.h" #include "download/blob_download_state.h" #include "download/blob_downloader.h" +#include "download/cross_process_file_lock.h" #include "download/download_manager.h" #include "download/inference_model_writer.h" #include "download/model_registry_client.h" @@ -1259,6 +1260,65 @@ TEST(DownloadManagerTest, ModelDownloadsSerializeUnderGlobalLock) { << "The global download mutex must serialize all model downloads, even for different models."; } +// Exercise the cross-process file-lock branch of DownloadModel that +// the in-process-only concurrency tests never reach. A second process (simulated +// here by holding the lock directly) is mid-download on the same model directory. +// DownloadModel must (1) observe the held lock, (2) block in WaitForDirectoryLock +// without holding the in-process download mutex, and (3) once the lock releases +// AND inference_model.json is present, return the cached result via the post-lock +// recheck WITHOUT re-downloading anything. +TEST(DownloadManagerTest, WaitsForCrossProcessLockThenServesCachedResult) { + TempDir tmpdir; + DownloadManager manager(tmpdir.string(), "eastus", 64, fl::test::NullLog()); + + // Registry + downloader that must stay untouched if the post-lock recheck works. + auto registry = std::make_unique( + "eastus", fl::test::NullLog(), std::make_unique(fl::test::NullLog(), false), + [](const std::string&) { + return MakeRegistryResponse( + R"({"blobSasUri": "https://storage.blob.core.windows.net/c?sig=test"})"); + }); + manager.SetModelRegistryClient(std::move(registry)); + + auto mock = std::make_unique(); + mock->blobs_to_return = {{"weights.bin", 100}}; // non-empty: a stray download would be visible + auto* mock_raw = mock.get(); + manager.SetBlobDownloader(std::move(mock)); + + ModelInfo info; + info.model_id = "wait-model:1"; + info.name = "wait-model"; + info.uri = "azureml://registries/test/models/wait-model/versions/1"; + info.string_properties[FOUNDRY_LOCAL_MODEL_PROP_PUBLISHER_STR] = "Pub"; + + // Simulate another process holding the model-directory lock mid-download. + auto model_dir = fs::path(tmpdir.string()) / "Pub" / "wait-model-1"; + fs::create_directories(model_dir); + auto held = CrossProcessFileLock::TryAcquireForDirectory(model_dir, fl::test::NullLog()); + ASSERT_NE(held, nullptr); + + std::atomic done{false}; + std::string result; + std::thread worker([&] { result = manager.DownloadModel(info); done.store(true); }); + + // The call must block on the cross-process lock rather than proceed to download. + std::this_thread::sleep_for(std::chrono::milliseconds(300)); + EXPECT_FALSE(done.load()) << "DownloadModel should block while another process holds the lock"; + + // The "other process" finishes: publish inference_model.json, then release the lock. + { + std::ofstream(model_dir / "inference_model.json") << "{}"; + } + held.reset(); + + worker.join(); + + EXPECT_TRUE(done.load()); + EXPECT_EQ(result, model_dir.string()); + EXPECT_TRUE(mock_raw->downloaded_blobs.empty()) + << "Model became available while waiting; the post-lock recheck must skip the download"; +} + // HasInferenceModelJson must return false instead of throwing when the path // it's asked about is not a directory (e.g. a regular file). Previously the // underlying directory_iterator would throw filesystem_error. @@ -1322,7 +1382,9 @@ TEST(DownloadManagerTest, WaitsForCrossProcessLockThenServesCachedResult) { EXPECT_FALSE(done.load()) << "DownloadModel should block while another process holds the lock"; // The "other process" finishes: publish inference_model.json, then release the lock. - { std::ofstream(model_dir / "inference_model.json") << "{}"; } + { + std::ofstream(model_dir / "inference_model.json") << "{}"; + } held.reset(); worker.join(); @@ -1567,6 +1629,11 @@ class FakeChunkAzureDownloader : public AzureBlobDownloader { using AzureBlobDownloader::AzureBlobDownloader; + // AzureBlobDownloader now requires a logger reference. Tests don't care about + // diagnostics, so default-construct against the shared null logger to keep the + // many `FakeChunkAzureDownloader d;` sites terse. + FakeChunkAzureDownloader() : AzureBlobDownloader(fl::test::NullLog()) {} + protected: int64_t GetBlobSize(ChunkContext& /*ctx*/) override { return blob_size; } @@ -1620,7 +1687,7 @@ TEST(AzureBlobDownloaderResumeTest, SkipsChunksAlreadyMarkedCompleteInSidecar) { for (int32_t i = 0; i < 5; ++i) { state->MarkChunkComplete(i); } - state->SaveState(); + state->SaveState(fl::test::NullLog()); } FakeChunkAzureDownloader d; @@ -1657,7 +1724,7 @@ TEST(AzureBlobDownloaderResumeTest, IgnoresSidecarWhenDataFileTruncated) { for (int32_t i = 0; i < 5; ++i) { state->MarkChunkComplete(i); } - state->SaveState(); + state->SaveState(fl::test::NullLog()); } // ...but the data file is truncated, far smaller than kBlobSize. { @@ -1729,7 +1796,8 @@ TEST(AzureBlobDownloaderResumeTest, PersistsSidecarOnChunkFailure) { // Verify the persisted sidecar records partial progress — some chunks completed // before the failure, but not all — so a future resume can skip the ones already // done and re-fetch only the rest. - auto retry_state = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks); + auto retry_state = BlobDownloadState::LoadState("blob", local, kBlobSize, kChunkSize, kNumChunks, + fl::test::NullLog()); ASSERT_NE(retry_state, nullptr); EXPECT_GT(retry_state->completed_count, 0); EXPECT_LT(retry_state->completed_count, kNumChunks); @@ -1843,4 +1911,3 @@ TEST(AzureBlobDownloaderResumeTest, ChunkFailureCancelsInFlightPeersFast) { EXPECT_LT(elapsed_ms, 2000) << "Cancel-cascade should drain in-flight peers fast; took " << elapsed_ms << " ms"; } - From fdfa26743a56e5cca26defc78a34e10d5ee49f60 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Fri, 19 Jun 2026 19:45:51 -0500 Subject: [PATCH 33/36] fix(download): report global running total for monotonic progress The per-chunk progress rework reported each worker's own pre-add snapshot (bytes_completed.fetch_add(size) + size). Concurrent workers compute distinct snapshots and then race to call bytes_written_cb, so a worker holding a smaller snapshot can reach the user callback after one holding a larger one -- reported progress goes backwards, breaking the monotonic-non-decreasing contract that DISABLED_DownloadFixture.RemoveAndRedownloadSmallestModel asserts. Restore the pre-rework behavior of reporting bytes_completed.load() (the global running total, always non-decreasing) after the add. Stays inside the try, so the user-cancel fast-drain fix is unaffected. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index aaef73ac..01a5b428 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -299,9 +299,13 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // the catch below runs azure_ctx.Cancel() so peers blocked mid-chunk are // interrupted immediately rather than only noticing the cancel flag when // they finish their current chunk. - int64_t new_total = bytes_completed.fetch_add(size, std::memory_order_relaxed) + size; + // Report the global running total so progress stays monotonically + // non-decreasing: concurrent workers complete chunks out of order, and + // the public progress contract must never hand the callback a smaller + // percentage after a larger one. + bytes_completed.fetch_add(size, std::memory_order_relaxed); if (bytes_written_cb) { - bytes_written_cb(new_total); + bytes_written_cb(bytes_completed.load(std::memory_order_relaxed)); } } catch (...) { std::lock_guard lock(error_mutex); From 19b2f2bbff327f3055b0bac725e778bac25316e6 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Mon, 22 Jun 2026 01:06:57 -0500 Subject: [PATCH 34/36] Address review: simplify serialization, escalate save errors, formatting, static lock helper - blob_download_state: drop the redundant per-field temp buffer in WriteNative/ ReadNative and write/read trivially-copyable scalars directly. The template is kept for the type-safe call sites; atomicity comes from the tmp-file + rename and thread-safety from the caller's mutex, not from this buffer. - blob_download_state: raise the three SaveState write-path failures (open/write tmp, commit rename) to Error, since they mean the resume sidecar could not be persisted. LoadState parse failures stay Warning/Information (they self-recover by re-downloading) and DeleteState stays Warning (a stray sidecar is cosmetic). - blob_downloader: read the cancellation flag once into was_cancelled rather than loading the atomic twice in the post-join error/cancel path. - blob_downloader: put the initial-percent computation on a single line and add a blank line after the resume-validation block. - cross_process_file_lock: make WaitForDirectoryLock and CancellationPredicate members of CrossProcessFileLock alongside the TryAcquireForDirectory factory; update the caller and tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../cpp/src/download/blob_download_state.cc | 19 ++++--------- sdk_v2/cpp/src/download/blob_downloader.cc | 11 ++++---- .../src/download/cross_process_file_lock.cc | 2 +- .../src/download/cross_process_file_lock.h | 28 +++++++++---------- sdk_v2/cpp/src/download/download_manager.cc | 2 +- .../cross_process_file_lock_test.cc | 26 +++++++---------- 6 files changed, 37 insertions(+), 51 deletions(-) diff --git a/sdk_v2/cpp/src/download/blob_download_state.cc b/sdk_v2/cpp/src/download/blob_download_state.cc index adcde2a7..329109d7 100644 --- a/sdk_v2/cpp/src/download/blob_download_state.cc +++ b/sdk_v2/cpp/src/download/blob_download_state.cc @@ -44,21 +44,14 @@ constexpr int32_t kBitsPerWord = 64; template void WriteNative(std::ostream& out, T value) { static_assert(std::is_trivially_copyable_v); - unsigned char buf[sizeof(T)]; - std::memcpy(buf, &value, sizeof(T)); - out.write(reinterpret_cast(buf), sizeof(T)); + out.write(reinterpret_cast(&value), sizeof(T)); } template bool ReadNative(std::istream& in, T& out_value) { static_assert(std::is_trivially_copyable_v); - unsigned char buf[sizeof(T)]; - in.read(reinterpret_cast(buf), sizeof(T)); - if (!in) { - return false; - } - std::memcpy(&out_value, buf, sizeof(T)); - return true; + in.read(reinterpret_cast(&out_value), sizeof(T)); + return static_cast(in); } int64_t NowUnixMs() { @@ -316,7 +309,7 @@ bool BlobDownloadState::SaveState(ILogger& logger) { { std::ofstream out(tmp_path, std::ios::binary | std::ios::trunc); if (!out) { - logger.Log(LogLevel::Warning, "Failed to open download state tmp file: " + tmp_path.string()); + logger.Log(LogLevel::Error, "Failed to open download state tmp file: " + tmp_path.string()); return false; } out.write(kMagic, 4); @@ -334,7 +327,7 @@ bool BlobDownloadState::SaveState(ILogger& logger) { out.write(reinterpret_cast(src), trunc_len); } if (!out) { - logger.Log(LogLevel::Warning, "Failed to write download state tmp file: " + tmp_path.string()); + logger.Log(LogLevel::Error, "Failed to write download state tmp file: " + tmp_path.string()); return false; } } @@ -351,7 +344,7 @@ bool BlobDownloadState::SaveState(ILogger& logger) { // next SaveState call retry from the up-to-date in-memory state. std::error_code rm_ec; std::filesystem::remove(tmp_path, rm_ec); - logger.Log(LogLevel::Warning, + logger.Log(LogLevel::Error, "Failed to commit download state file: " + tmp_path.string() + " -> " + state_path.string() + " (" + ec.message() + "); previous state retained, will retry on next save"); diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index f4bb9df7..f46382ef 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -189,6 +189,7 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, state.reset(); } } + if (!state) { state = BlobDownloadState::CreateNew(blob_name, local_path, blob_size, static_cast(kChunkSize), num_chunks); @@ -361,13 +362,14 @@ void AzureBlobDownloader::DownloadBlob(const std::string& sas_uri, // observer that watches the data file sees a fully-closed handle. writer.Close(); - if (first_error || (cancelled && cancelled->load(std::memory_order_relaxed))) { + const bool was_cancelled = cancelled && cancelled->load(std::memory_order_relaxed); + if (first_error || was_cancelled) { // Persist what we have so the next attempt resumes from here. { std::lock_guard lock(state->mutex()); state->SaveState(logger_); } - if (cancelled && cancelled->load(std::memory_order_relaxed)) { + if (was_cancelled) { FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled"); } std::rethrow_exception(first_error); @@ -525,10 +527,7 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, } if (options.progress) { - float initial_percent = total_size > 0 - ? static_cast(skipped_bytes) / - static_cast(total_size) * 100.0f - : 0.0f; + float initial_percent = total_size > 0 ? static_cast(skipped_bytes) / static_cast(total_size) * 100.0f : 0.0f; int result = options.progress(initial_percent); if (result != 0) { FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled by user callback return value"); diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.cc b/sdk_v2/cpp/src/download/cross_process_file_lock.cc index 7a031aa9..5c01a334 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.cc +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.cc @@ -189,7 +189,7 @@ std::unique_ptr CrossProcessFileLock::TryAcquireForDirecto new CrossProcessFileLock(std::move(lock_path), std::move(state), logger)); } -std::unique_ptr WaitForDirectoryLock( +std::unique_ptr CrossProcessFileLock::WaitForDirectoryLock( const std::filesystem::path& directory, const CancellationPredicate& is_cancelled, ILogger& logger, diff --git a/sdk_v2/cpp/src/download/cross_process_file_lock.h b/sdk_v2/cpp/src/download/cross_process_file_lock.h index ecab1f2d..6c206275 100644 --- a/sdk_v2/cpp/src/download/cross_process_file_lock.h +++ b/sdk_v2/cpp/src/download/cross_process_file_lock.h @@ -18,6 +18,9 @@ class ILogger; /// is harmless. class CrossProcessFileLock { public: + /// Returning true aborts WaitForDirectoryLock with FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED. + using CancellationPredicate = std::function; + /// Non-blocking acquisition. Returns nullptr if another process currently /// holds the lock. Creates `directory` if missing. Throws fl::Exception on /// unexpected errors (permission denied, etc.). `logger` receives acquire/ @@ -26,6 +29,17 @@ class CrossProcessFileLock { const std::filesystem::path& directory, ILogger& logger); + /// Polls TryAcquireForDirectory until the lock is acquired, `is_cancelled()` + /// returns true, or `timeout` elapses. + /// Throws FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED on cancellation, or + /// FOUNDRY_LOCAL_ERROR_INTERNAL on timeout. + static std::unique_ptr WaitForDirectoryLock( + const std::filesystem::path& directory, + const CancellationPredicate& is_cancelled, + ILogger& logger, + std::chrono::milliseconds poll_interval = std::chrono::milliseconds{1250}, + std::chrono::milliseconds timeout = std::chrono::hours{3}); + ~CrossProcessFileLock(); CrossProcessFileLock(const CrossProcessFileLock&) = delete; @@ -46,18 +60,4 @@ class CrossProcessFileLock { ILogger& logger_; }; -/// Returning true aborts WaitForDirectoryLock with FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED. -using CancellationPredicate = std::function; - -/// Polls TryAcquireForDirectory until the lock is acquired, `is_cancelled()` -/// returns true, or `timeout` elapses. -/// Throws FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED on cancellation, or -/// FOUNDRY_LOCAL_ERROR_INTERNAL on timeout. -std::unique_ptr WaitForDirectoryLock( - const std::filesystem::path& directory, - const CancellationPredicate& is_cancelled, - ILogger& logger, - std::chrono::milliseconds poll_interval = std::chrono::milliseconds{1250}, - std::chrono::milliseconds timeout = std::chrono::hours{3}); - } // namespace fl diff --git a/sdk_v2/cpp/src/download/download_manager.cc b/sdk_v2/cpp/src/download/download_manager.cc index 1a017ba6..b576d3ee 100644 --- a/sdk_v2/cpp/src/download/download_manager.cc +++ b/sdk_v2/cpp/src/download/download_manager.cc @@ -290,7 +290,7 @@ std::string DownloadManager::DownloadModel(const ModelInfo& info, // worse than the bandwidth contention this mutex exists to prevent. Release it // for the wait and re-acquire before the cache re-check + download below. download_guard.unlock(); - lock = WaitForDirectoryLock(model_path, cancel_pred, logger_); + lock = CrossProcessFileLock::WaitForDirectoryLock(model_path, cancel_pred, logger_); download_guard.lock(); } diff --git a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc index 42c208f3..34b46496 100644 --- a/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc +++ b/sdk_v2/cpp/test/internal_api/cross_process_file_lock_test.cc @@ -116,7 +116,7 @@ TEST(CrossProcessFileLockTest, WaitForLockReturnsImmediatelyWhenAvailable) { TempDir dir; auto start = std::chrono::steady_clock::now(); - auto lock = WaitForDirectoryLock(dir.path(), []() { return false; }, fl::test::NullLog()); + auto lock = CrossProcessFileLock::WaitForDirectoryLock(dir.path(), []() { return false; }, fl::test::NullLog()); auto elapsed = std::chrono::steady_clock::now() - start; ASSERT_NE(lock, nullptr); @@ -136,11 +136,9 @@ TEST(CrossProcessFileLockTest, WaitForLockAcquiresAfterHolderReleases) { }); auto start = std::chrono::steady_clock::now(); - auto lock = WaitForDirectoryLock(dir.path(), - []() { return false; }, - /*logger=*/fl::test::NullLog(), - /*poll_interval=*/std::chrono::milliseconds(100), - /*timeout=*/std::chrono::seconds(10)); + auto lock = CrossProcessFileLock::WaitForDirectoryLock( + dir.path(), []() { return false; }, /*logger=*/fl::test::NullLog(), + /*poll_interval=*/std::chrono::milliseconds(100), /*timeout=*/std::chrono::seconds(10)); auto elapsed = std::chrono::steady_clock::now() - start; releaser.join(); @@ -161,11 +159,9 @@ TEST(CrossProcessFileLockTest, WaitForLockThrowsOnCancellation) { }); try { - (void)WaitForDirectoryLock(dir.path(), - [&cancel]() { return cancel.load(); }, - /*logger=*/fl::test::NullLog(), - /*poll_interval=*/std::chrono::milliseconds(100), - /*timeout=*/std::chrono::seconds(10)); + (void)CrossProcessFileLock::WaitForDirectoryLock( + dir.path(), [&cancel]() { return cancel.load(); }, /*logger=*/fl::test::NullLog(), + /*poll_interval=*/std::chrono::milliseconds(100), /*timeout=*/std::chrono::seconds(10)); canceller.join(); FAIL() << "expected fl::Exception(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED)"; } catch (const Exception& ex) { @@ -180,11 +176,9 @@ TEST(CrossProcessFileLockTest, WaitForLockThrowsOnTimeout) { ASSERT_NE(holder, nullptr); try { - (void)WaitForDirectoryLock(dir.path(), - []() { return false; }, - /*logger=*/fl::test::NullLog(), - /*poll_interval=*/std::chrono::milliseconds(50), - /*timeout=*/std::chrono::milliseconds(200)); + (void)CrossProcessFileLock::WaitForDirectoryLock( + dir.path(), []() { return false; }, /*logger=*/fl::test::NullLog(), + /*poll_interval=*/std::chrono::milliseconds(50), /*timeout=*/std::chrono::milliseconds(200)); FAIL() << "expected fl::Exception(FOUNDRY_LOCAL_ERROR_INTERNAL)"; } catch (const Exception& ex) { EXPECT_EQ(ex.code(), FOUNDRY_LOCAL_ERROR_INTERNAL); From 35729e9849be4f4ab4f714a79a00a35e9a2eadcb Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Mon, 22 Jun 2026 01:37:26 -0500 Subject: [PATCH 35/36] Wrap initial-percent assignment to stay within the 120-char line limit Break after the assignment so both lines fit under 120; this keeps the computation compact (two lines instead of the prior four) while satisfying the repository line-length limit. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/blob_downloader.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk_v2/cpp/src/download/blob_downloader.cc b/sdk_v2/cpp/src/download/blob_downloader.cc index f46382ef..c5da2fbb 100644 --- a/sdk_v2/cpp/src/download/blob_downloader.cc +++ b/sdk_v2/cpp/src/download/blob_downloader.cc @@ -527,7 +527,8 @@ void DownloadBlobsToDirectory(IBlobDownloader& downloader, } if (options.progress) { - float initial_percent = total_size > 0 ? static_cast(skipped_bytes) / static_cast(total_size) * 100.0f : 0.0f; + float initial_percent = + total_size > 0 ? static_cast(skipped_bytes) / static_cast(total_size) * 100.0f : 0.0f; int result = options.progress(initial_percent); if (result != 0) { FL_THROW(FOUNDRY_LOCAL_ERROR_OPERATION_CANCELLED, "download cancelled by user callback return value"); From 5cbaddde35d3b3c1a3adb2cac9cd3d6c8e68b985 Mon Sep 17 00:00:00 2001 From: Bhagirath Mehta Date: Mon, 22 Jun 2026 02:06:28 -0500 Subject: [PATCH 36/36] Drop unused includes from download_manager.h The public header references neither std::atomic nor CrossProcessFileLock (the type appears only in a comment; the lock is a local in download_manager.cc). Both and cross_process_file_lock.h are included directly by the .cc and by download_test.cc, so removing them from the header keeps its include surface minimal without affecting any consumer. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk_v2/cpp/src/download/download_manager.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdk_v2/cpp/src/download/download_manager.h b/sdk_v2/cpp/src/download/download_manager.h index 9742eebf..7099dcb8 100644 --- a/sdk_v2/cpp/src/download/download_manager.h +++ b/sdk_v2/cpp/src/download/download_manager.h @@ -3,11 +3,9 @@ #pragma once #include "download/blob_downloader.h" -#include "download/cross_process_file_lock.h" #include "download/model_registry_client.h" #include "model_info.h" -#include #include #include #include