From 71f3f86e05787de5663b8f2e434c014a54f1eb3a Mon Sep 17 00:00:00 2001 From: Longfang Zhao Date: Mon, 1 Jun 2026 14:57:33 -0700 Subject: [PATCH] File-backed mmap for XNNPACK packed weights (#19862) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Add file-backed mmap support to `XNNWeightsCache` so that packed weight allocations go to a `MAP_SHARED` file instead of dirty heap. After `msync(MS_ASYNC)`, pages become clean file-backed and drop out of iOS `phys_footprint`. ## How it works 1. `set_packed_cache_path()` configures the cache file path via `BackendOptions` 2. `initialize_for_runtime()` opens the cache file 3. Each `reserve_space()` call extends the file via `ftruncate` and creates a `MAP_SHARED` mmap region — XNNPACK packs weights directly into file-backed pages 4. `finalize_for_runtime()` calls `msync(MS_ASYNC)` on newly added regions only (incremental sync), making pages clean 5. On Windows, mmap is unavailable — all code paths fall back to heap allocation automatically (`packed_file_fd_` stays -1) ## Expected savings ~400 MB packed weights move from dirty heap to clean file-backed pages (0 `phys_footprint` on iOS). Differential Revision: D106673663 --- backends/xnnpack/runtime/XNNPACKBackend.cpp | 6 + backends/xnnpack/runtime/XNNPACKBackend.h | 7 + backends/xnnpack/runtime/XNNWeightsCache.cpp | 181 ++++++++++++++++-- backends/xnnpack/runtime/XNNWeightsCache.h | 56 +++++- .../xnnpack/runtime/XnnpackBackendOptions.cpp | 26 +++ .../xnnpack/runtime/XnnpackBackendOptions.h | 5 + .../test/runtime/test_xnn_weights_cache.cpp | 69 +++++++ 7 files changed, 327 insertions(+), 23 deletions(-) diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp index 9eaadda86f8..3a5d6ab7958 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.cpp +++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp @@ -98,6 +98,12 @@ class XnnpackBackend final weights_cache_mutex_, std::defer_lock); if (use_weight_cache) { lock_weights_cache.lock(); + + const auto& cache_path = options_.get_packed_cache_path(); + if (!cache_path.empty()) { + weights_cache_->set_packed_cache_path(cache_path); + } + weights_cache_->initialize_for_runtime( context.get_runtime_allocator(), named_data_map); workspace->set_uses_weight_cache(); diff --git a/backends/xnnpack/runtime/XNNPACKBackend.h b/backends/xnnpack/runtime/XNNPACKBackend.h index eb40047f3f8..e3492c3f5f3 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.h +++ b/backends/xnnpack/runtime/XNNPACKBackend.h @@ -13,6 +13,13 @@ const char workspace_sharing_mode_option_key[] = "workspace_sharing_mode"; // across delegate instances. Changes only affect subsequently loaded models. const char weight_cache_option_key[] = "weight_cache_enabled"; +/// Path for the packed weight file. When set, reserve_space() allocates from +/// a MAP_SHARED file instead of heap; msync makes pages clean on iOS. +// Must remain a C array (not const char*) so it can bind to the +// BackendOptions::set_option(const char (&)[N], ...) template overloads. +// @lint-ignore CLANGTIDY facebook-hte-CArray +const char packed_cache_path_option_key[] = "packed_cache_path"; + /// Workspace sharing mode. This is a backend option that can be set via the /// set_option API to control memory sharing between CALL_DELEGATE instances. /// This is useful for reducing memory consumption. diff --git a/backends/xnnpack/runtime/XNNWeightsCache.cpp b/backends/xnnpack/runtime/XNNWeightsCache.cpp index 7767c65285a..70c410e5729 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.cpp +++ b/backends/xnnpack/runtime/XNNWeightsCache.cpp @@ -9,7 +9,14 @@ #include #include #include +#ifndef _WIN32 +#include +#include +#include #include +#include +#include +#endif #include #include #include @@ -41,6 +48,21 @@ XNNWeightsCache::XNNWeightsCache() { (enum xnn_status(*)(void*))XNNWeightsCache::delete_cache; } +XNNWeightsCache::~XNNWeightsCache() { +#ifndef _WIN32 + for (auto& region : mmap_regions_) { + if (region.addr != nullptr && region.addr != MAP_FAILED) { + munmap(region.addr, region.size); + } + } + mmap_regions_.clear(); + if (packed_file_fd_ >= 0) { + close(packed_file_fd_); + packed_file_fd_ = -1; + } +#endif +} + Error XNNWeightsCache::initialize_for_runtime( MemoryAllocator* runtime_allocator, const NamedDataMap* named_data_map) { @@ -48,6 +70,41 @@ Error XNNWeightsCache::initialize_for_runtime( named_data_map_ = named_data_map; is_finalized_ = false; +#ifndef _WIN32 + // Open the file for packed weights. Each reserve_space() call + // independently mmaps a region of the file. Once packed_file_disabled_ + // is set we never re-open — re-opening with O_TRUNC would corrupt any + // still-live mappings into the same path and cause SIGBUS on access. + if (!packed_cache_path_.empty() && packed_file_fd_ < 0 && + !packed_file_disabled_) { + packed_file_fd_ = + open(packed_cache_path_.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600); + if (packed_file_fd_ < 0) { + ET_LOG( + Error, + "Failed to open packed weight file: %s (errno=%d)", + packed_cache_path_.c_str(), + errno); + } else if (flock(packed_file_fd_, LOCK_EX | LOCK_NB) != 0) { + // Another XNNWeightsCache instance (this process or another) is + // already using this path. O_TRUNC above would corrupt its mappings. + // Disable mmap for this instance to prevent collision; fall back to + // heap allocation for the remainder of this cache's lifetime. + ET_LOG( + Error, + "Another instance is using packed weight cache file %s (errno=%d); " + "disabling mmap path", + packed_cache_path_.c_str(), + errno); + close(packed_file_fd_); + packed_file_fd_ = -1; + packed_file_disabled_ = true; + } else { + ET_LOG(Info, "Opened packed weight file: %s", packed_cache_path_.c_str()); + } + } +#endif + return Error::Ok; } @@ -73,6 +130,26 @@ Result> XNNWeightsCache::finalize_for_runtime() { } } +#ifndef _WIN32 + // Schedule async flush for newly added regions only. + // MS_ASYNC returns immediately; OS flushes in the background. + if (mmap_regions_.size() > mmap_regions_synced_) { + size_t new_count = mmap_regions_.size() - mmap_regions_synced_; + for (size_t i = mmap_regions_synced_; i < mmap_regions_.size(); ++i) { + if (mmap_regions_[i].addr != nullptr) { + msync(mmap_regions_[i].addr, mmap_regions_[i].size, MS_ASYNC); + } + } + mmap_regions_synced_ = mmap_regions_.size(); + ET_LOG( + Info, + "Scheduled async flush: %zu new regions (%zu total), %zu MB packed weights", + new_count, + mmap_regions_.size(), + packed_file_used_ / (1024 * 1024)); + } +#endif + return packed_data_names; } @@ -111,12 +188,30 @@ Error XNNWeightsCache::delete_packed_data( entry->second.ref_count--; if (entry->second.ref_count == 0) { void* packed_data_ptr = packed_data_ptrs_[entry->second.offset]; - // Erase the key/value from the map frees the pointer holding the packed - // data + // Erase the key/value from the map frees the pointer holding the + // packed data. No-op on the file-backed mmap path, where the + // container is not populated. packed_pointer_to_container_.erase(packed_data_ptr); - // remove the pointer from the packed_data_ptrs_ +#ifndef _WIN32 + // File-backed mmap path: munmap the region so VM and page-cache + // usage is released, not just retained until cache destruction. + // The vector slot is set to nullptr below so existing offsets remain + // valid for any concurrent lookups. + auto region_it = file_ptr_to_region_index_.find(packed_data_ptr); + if (region_it != file_ptr_to_region_index_.end()) { + size_t idx = region_it->second; + MmapRegion& region = mmap_regions_[idx]; + if (region.addr != nullptr && region.addr != MAP_FAILED) { + munmap(region.addr, region.size); + region.addr = nullptr; + region.size = 0; + } + file_ptr_to_region_index_.erase(region_it); + } +#endif + // Remove the pointer from packed_data_ptrs_. packed_data_ptrs_[entry->second.offset] = nullptr; - // Erase the name to packed metadata entry + // Erase the name to packed metadata entry. name_to_packed_data_metadata_.erase(entry->first); } } @@ -158,38 +253,80 @@ size_t XNNWeightsCache::look_up( return packed_weight_entry->second.offset; } -/** - * Reserve space in the weight cache for n bytes of weight data, aligned to - * context->kPackedAllocationAlignment. This function will return nullptr if - * the allocation fails. - */ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { - // MemoryAllocator* allocator = context->runtime_allocator_; - // void* reserved_pointer = allocator->allocate(n, - // context->kPackedAllocationAlignment); +#ifndef _WIN32 + if (context->packed_file_fd_ >= 0) { + size_t page_size = sysconf(_SC_PAGESIZE); + size_t file_offset = + (context->packed_file_used_ + page_size - 1) & ~(page_size - 1); + size_t map_size = (n + page_size - 1) & ~(page_size - 1); + + if (ftruncate(context->packed_file_fd_, file_offset + map_size) != 0) { + ET_LOG( + Error, + "ftruncate to %zu failed (errno=%d)", + file_offset + map_size, + errno); + close(context->packed_file_fd_); + context->packed_file_fd_ = -1; + // Existing mmap_regions_ still reference this inode. Disable the + // file-backed path permanently so a future initialize_for_runtime + // doesn't re-open + O_TRUNC the same path and trigger SIGBUS on the + // stale mappings. + context->packed_file_disabled_ = true; + return context->reserve_space_heap(n); + } - // return reserved_pointer; + void* ptr = mmap( + nullptr, + map_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + context->packed_file_fd_, + file_offset); + if (ptr == MAP_FAILED) { + ET_LOG(Error, "mmap %zu bytes failed (errno=%d)", map_size, errno); + close(context->packed_file_fd_); + context->packed_file_fd_ = -1; + context->packed_file_disabled_ = true; + return context->reserve_space_heap(n); + } + + // mmap returns page-aligned (>= 4 KiB), which trivially satisfies the + // 64-byte kPackedAllocationAlignment XNNPACK expects. Assert defensively. + ET_DCHECK_MSG( + (reinterpret_cast(ptr) % kPackedAllocationAlignment) == 0, + "mmap returned ptr not aligned to %zu bytes", + kPackedAllocationAlignment); + + context->packed_file_used_ = file_offset + map_size; + context->file_ptr_to_region_index_[ptr] = context->mmap_regions_.size(); + context->mmap_regions_.push_back({ptr, map_size}); + return ptr; + } +#endif + + return context->reserve_space_heap(n); +} + +void* XNNWeightsCache::reserve_space_heap(size_t n) { try { std::string data_container; - size_t raw_allocation_size = n + context->kPackedAllocationAlignment - 1; + size_t raw_allocation_size = n + kPackedAllocationAlignment - 1; data_container.resize(raw_allocation_size); void* maybe_aligned_space = data_container.data(); void* aligned_space = std::align( - context->kPackedAllocationAlignment, + kPackedAllocationAlignment, n, maybe_aligned_space, raw_allocation_size // Note that std::align mutates this value. ); ET_CHECK_MSG(aligned_space != nullptr, "Memory alignment failed."); - context->packed_pointer_to_container_[aligned_space] = - std::move(data_container); + packed_pointer_to_container_[aligned_space] = std::move(data_container); return aligned_space; } catch (std::bad_alloc& e) { - // XNNPACK can gracefully handle allocation failures, so return nullptr. - // We want to be able to recover from a failed attempt to load a large - // model without a crash. ET_LOG( Error, "XNN weight cache failed to allocate %zu bytes: %s.", @@ -267,6 +404,10 @@ enum xnn_status XNNWeightsCache::delete_cache(XNNWeightsCache* context) { return xnn_status_success; } +void XNNWeightsCache::set_packed_cache_path(const std::string& path) { + packed_cache_path_ = path; +} + } // namespace delegate } // namespace xnnpack } // namespace backends diff --git a/backends/xnnpack/runtime/XNNWeightsCache.h b/backends/xnnpack/runtime/XNNWeightsCache.h index f8371f93d01..a41fed49fd1 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.h +++ b/backends/xnnpack/runtime/XNNWeightsCache.h @@ -41,6 +41,14 @@ struct PackedDataMeta { class XNNWeightsCache { public: XNNWeightsCache(); + ~XNNWeightsCache(); + + // Owns OS resources (file descriptor, mmap regions). Non-copyable, + // non-movable. cppcoreguidelines-special-member-functions. + XNNWeightsCache(const XNNWeightsCache&) = delete; + XNNWeightsCache& operator=(const XNNWeightsCache&) = delete; + XNNWeightsCache(XNNWeightsCache&&) = delete; + XNNWeightsCache& operator=(XNNWeightsCache&&) = delete; /** * Initializes the XNNWeightsCache for the next xnn_create_runtime @@ -73,29 +81,31 @@ class XNNWeightsCache { */ inline size_t get_num_unpacked_data() { return unpacked_data_.size(); - }; + } /** * Returns the names of all unpacked data */ inline std::vector get_unpacked_data_names() { std::vector names; + names.reserve(unpacked_data_to_name_.size()); for (const auto& pair : unpacked_data_to_name_) { names.push_back(pair.second); } return names; - }; + } /** * Returns the packed data names */ inline std::vector get_packed_data_names() { std::vector names; + names.reserve(name_to_packed_data_metadata_.size()); for (const auto& pair : name_to_packed_data_metadata_) { names.push_back(pair.first); } return names; - }; + } /** * Loads unpacked named data from the NamedDataMap into this XNNWeightsCache @@ -115,6 +125,19 @@ class XNNWeightsCache { */ Error delete_packed_data(const std::vector& packed_names); + /** + * Set the path for the file-backed packed weight storage. + * When set, reserve_space() allocates from a MAP_SHARED file instead + * of heap, and finalize_for_runtime() calls msync to make pages clean. + * + * The path MUST be unique per XNNWeightsCache instance — sharing it + * across instances (or processes) would mean O_TRUNC corrupts the other + * holder's mappings (SIGBUS on access). initialize_for_runtime() takes + * an advisory exclusive flock on the file; if the lock fails the mmap + * path is disabled for this instance and allocations fall back to heap. + */ + void set_packed_cache_path(const std::string& path); + private: // Runtime Allocator used to reserve memory for packed weights MemoryAllocator* runtime_allocator_; @@ -137,6 +160,29 @@ class XNNWeightsCache { // whether or not the weight cache is finalized bool is_finalized_; + // File-backed mmap for packed weights. When packed_cache_path_ is set, + // reserve_space() allocates from this mmap'd file instead of heap. + // After msync, pages become clean file-backed → 0 phys_footprint. + // + std::string packed_cache_path_; + int packed_file_fd_{-1}; + size_t packed_file_used_{0}; + // Set after an unrecoverable mmap/ftruncate failure. Prevents re-opening + // the cache file on subsequent initialize_for_runtime() calls — re-opening + // with O_TRUNC would truncate the inode beneath any still-live mmap pages + // and the next access would raise SIGBUS. Once disabled, all reserve_space + // calls fall back to heap allocation for the lifetime of this cache. + bool packed_file_disabled_{false}; + struct MmapRegion { + void* addr; + size_t size; + }; + std::vector mmap_regions_; + size_t mmap_regions_synced_{0}; + // For file-backed packed allocations, maps the returned ptr to its index + // in mmap_regions_, so delete_packed_data() can munmap when ref_count==0. + std::unordered_map file_ptr_to_region_index_; + // Function pointers to override XNNPACK's default xnn_weights_cache_provider // functions. static size_t look_up( @@ -145,6 +191,10 @@ class XNNWeightsCache { static void* reserve_space(XNNWeightsCache* context, size_t n); + // Heap-backed allocation path. Used when the mmap path is not configured + // or has failed for this allocation. + void* reserve_space_heap(size_t n); + static size_t look_up_or_insert( XNNWeightsCache* context, const xnn_weights_cache_look_up_key* cache_key, diff --git a/backends/xnnpack/runtime/XnnpackBackendOptions.cpp b/backends/xnnpack/runtime/XnnpackBackendOptions.cpp index aa5f6f0302b..ffaba9508d8 100644 --- a/backends/xnnpack/runtime/XnnpackBackendOptions.cpp +++ b/backends/xnnpack/runtime/XnnpackBackendOptions.cpp @@ -37,6 +37,12 @@ Error XnnpackBackendOptions::get_option(BackendOption& option) const { option.value = static_cast(sharing_mode_.load()); } else if (strcmp(option.key, weight_cache_option_key) == 0) { option.value = weight_cache_enabled_.load(); + } else if (strcmp(option.key, packed_cache_path_option_key) == 0) { + std::array arr{}; + size_t len = + std::min(packed_cache_path_.size(), runtime::kMaxOptionValueLength - 1); + memcpy(arr.data(), packed_cache_path_.data(), len); + option.value = arr; } return Error::Ok; } @@ -66,6 +72,18 @@ Error XnnpackBackendOptions::set_option(const BackendOption& option) { } ET_LOG(Debug, "Setting XNNPACK weight cache enabled to %d.", *val); weight_cache_enabled_.store(*val); + } else if (strcmp(option.key, packed_cache_path_option_key) == 0) { + auto* val = std::get_if>( + &option.value); + if (!val) { + ET_LOG(Error, "XNNPACK packed cache path must be a string."); + return Error::InvalidArgument; + } + packed_cache_path_ = std::string(val->data()); + ET_LOG( + Debug, + "Setting XNNPACK packed cache path to %s.", + packed_cache_path_.c_str()); } return Error::Ok; } @@ -108,4 +126,12 @@ const XNNWorkspaceManager& XnnpackBackendOptions::workspace_manager() const { return workspace_manager_; } +const std::string& XnnpackBackendOptions::get_packed_cache_path() const { + return packed_cache_path_; +} + +void XnnpackBackendOptions::set_packed_cache_path(const std::string& path) { + packed_cache_path_ = path; +} + } // namespace executorch::backends::xnnpack diff --git a/backends/xnnpack/runtime/XnnpackBackendOptions.h b/backends/xnnpack/runtime/XnnpackBackendOptions.h index ab6c93c21a3..aed037ac835 100644 --- a/backends/xnnpack/runtime/XnnpackBackendOptions.h +++ b/backends/xnnpack/runtime/XnnpackBackendOptions.h @@ -41,6 +41,9 @@ class XnnpackBackendOptions { XNNWorkspaceManager& workspace_manager(); const XNNWorkspaceManager& workspace_manager() const; + const std::string& get_packed_cache_path() const; + void set_packed_cache_path(const std::string& path); + private: XNNWorkspaceManager workspace_manager_; @@ -56,6 +59,8 @@ class XnnpackBackendOptions { #else std::atomic weight_cache_enabled_{false}; #endif + + std::string packed_cache_path_; }; } // namespace executorch::backends::xnnpack diff --git a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp index ca149a67b5e..83937887e25 100644 --- a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp +++ b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp @@ -284,3 +284,72 @@ TEST_F(XNNWeightsCacheTest, ReusePackedWeights) { packed_data_names = weight_cache.get_packed_data_names(); ASSERT_EQ(packed_data_names.size(), 0); } + +#ifndef _WIN32 +// Verify pack-and-run works when packed weight allocations go to a +// MAP_SHARED file instead of heap. The cache path is unique per test so +// flock won't collide. +TEST_F(XNNWeightsCacheTest, PackedWeightsToMmapFile) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_test_") + + std::to_string(::getpid()) + ".packed_cache"; + // Ensure cleanup if a previous run left a file behind. + ::unlink(cache_path.c_str()); + + XNNWeightsCache weight_cache; + weight_cache.set_packed_cache_path(cache_path); + + std::vector batches{1, 2, 3}; + size_t num_batches = 1; + for (size_t batch_dim : batches) { + num_batches *= batch_dim; + } + size_t input_channels = 3; + size_t output_channels = 4; + size_t padding = 32; + std::vector input_tensor(num_batches * input_channels + padding, 1.0f); + std::vector output_tensor(num_batches * output_channels, 0.0f); + + weight_cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + weight_cache, + batches, + input_channels, + output_channels, + input_tensor.data(), + output_tensor.data()); + + // The cache file should have been created and contain packed weight bytes. + struct stat st {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); + ASSERT_GT(st.st_size, 0); + + // delete_packed_data should release the mmap region without crashing. + weight_cache.delete_packed_data(weight_cache.get_packed_data_names()); + ASSERT_EQ(weight_cache.get_packed_data_names().size(), 0); + + ::unlink(cache_path.c_str()); +} + +// A second XNNWeightsCache pointing at the same cache file while the first +// one still holds it must not corrupt the first instance's mmaps. The +// second one falls back to heap and runs to completion. +TEST_F(XNNWeightsCacheTest, PackedWeightsMmapPathLockCollision) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_collision_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + XNNWeightsCache cache_a; + cache_a.set_packed_cache_path(cache_path); + cache_a.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + + // Second cache holding the same path before cache_a is destroyed. + XNNWeightsCache cache_b; + cache_b.set_packed_cache_path(cache_path); + // Must not throw / abort — should log and fall back to heap. + Error err = + cache_b.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + ASSERT_EQ(err, Error::Ok); + + ::unlink(cache_path.c_str()); +} +#endif