Skip to content

Commit 568ad20

Browse files
authored
Fix a long standing bug on file memory mapping on windows. (microsoft#25833)
### Description <!-- Describe your changes. --> While memory profiling some models I noticed multiple file mapping failures. `WindowsEnv::MapFileIntoMemory()` While it properly checks for the mapping offset to be granularity aligned, it calculates it as page aligned. Also, while saving external tensors we do not need to align big tensors to windows granularity or anything that is platform dependent. Set it to 4096 for all platforms. Granularity matters only for calculating mapping address. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Multiple failures for file mapping for certain models. This saves some hundreds of Mbs for some models.
1 parent c9c23b0 commit 568ad20

File tree

7 files changed

+39
-69
lines changed

7 files changed

+39
-69
lines changed

include/onnxruntime/core/graph/model_saving_options.h

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,36 +9,30 @@ class PrepackedWeightsForGraph;
99

1010
// These options affect how the model initializers are written to the external file.
1111
// This includes options to align external initializer offset.
12-
// For models running on CPU, ORT will try to use mmap to load external
13-
// initializers. To use mmap, external initializer need to be offset aligned.
12+
// ORT will try to use mmap to load external initializers.
13+
//
1414
// ORT saves external initializers into single data file, each initializer is
1515
// accessed with offset(start position of initializer) and length(byte length of
16-
// initializer) of the data file. To use mmap, each offset need to be aligned
17-
// which means offset need to divisible by allocation granularity(64KB for
18-
// windows and 4K for other OSes). With align_offset to true, ORT will align
19-
// offset for large initializer when save ONNX model with external data file.
16+
// initializer) of the data file. With align_offset to true, ORT will align
17+
// offset for large initializer (larger than align_threshold)
18+
// when save ONNX model with external data file. It will align then to
19+
// on_disk_alignment value.
2020
struct ModelSavingOptions {
2121
explicit ModelSavingOptions(size_t size_threshold)
2222
: initializer_size_threshold(size_threshold) {}
2323

2424
// Minimal initializer size in bytes to be externalized on disk
2525
size_t initializer_size_threshold;
26-
// Offset will always be page aligned and allocation granularity aligned for
27-
// mmap support. This is done by padding previous tensor data with zeros
28-
// keeping same length.
26+
// Offset will always be aligned for mmap support.
27+
// This is done by padding previous tensor data with zeros keeping same length.
2928
bool align_offset = false;
3029
// Alignment threshold for size of data.
3130
// Having a low threshold will waste file space for small initializers.
3231
// Only when tensor's data size is > the page_align_threshold it will be force
3332
// aligned. Default to 1MB.
3433
int64_t align_threshold = 1048576;
35-
// The allocation Granularity for mmap() support.
36-
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
37-
#ifdef _WIN32
38-
int64_t allocation_granularity = 65536;
39-
#else
40-
int64_t allocation_granularity = 4096;
41-
#endif
34+
// Alignment factor for big tensors (bigger than align_threshold). Defaults to 4K.
35+
int64_t on_disk_alignment = 4096;
4236
// Force embed all external initializer into the Onnx file
4337
// Used for EPContext model generation while some nodes fallback on CPU which has external data dependency
4438
bool force_embed_external_ini = false;

onnxruntime/core/framework/tensor_external_data_info.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ void ExternalDataInfo::SetExternalLocationToProto(const std::filesystem::path& e
107107
std::ostream& ExternalDataInfo::WritePrepackedToFileAndAddToProto(
108108
const PrepackedWeightsForGraph& prepacked_for_graph,
109109
const InlinedHashSet<std::string>& blob_keys, bool align,
110-
int64_t align_threshold, int64_t allocation_granularity,
110+
int64_t align_threshold, int64_t on_disk_alignment,
111111
std::ostream& os, int64_t& external_offset, ::ONNX_NAMESPACE::TensorProto& proto) {
112112
size_t key_count = 0;
113113
for (const auto& key : blob_keys) {
@@ -120,7 +120,7 @@ std::ostream& ExternalDataInfo::WritePrepackedToFileAndAddToProto(
120120
const auto size_in_bytes = prepacked_weights->buffer_sizes_[i];
121121
if (align && static_cast<int64_t>(size_in_bytes) > align_threshold) {
122122
// return early on error
123-
if (!AlignAndPad(os, allocation_granularity, external_offset)) {
123+
if (!AlignAndPad(os, on_disk_alignment, external_offset)) {
124124
return os;
125125
}
126126
}

onnxruntime/core/framework/tensor_external_data_info.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,13 @@ class ExternalDataInfo {
4141
size_t tensor_bytes_size,
4242
::ONNX_NAMESPACE::TensorProto& proto);
4343

44-
// Pads the output with zeros according to the specified allocation_granularity
44+
// Pads the output with zeros according to the specified alignment_factor
4545
// It updates external_offset for alignment.
4646
// need to do padding before write actual tensor data as we do offset alignment at the begin of
47-
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
47+
// large tensors (offset need to be page aligned) like below:
4848
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
4949
// |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->|
50-
static std::ostream& AlignAndPad(std::ostream& stream, int64_t allocation_granularity, int64_t& external_offset) {
51-
// Align to the larger of the page size or the allocation granularity
52-
int64_t alignment_factor = std::max(static_cast<int64_t>(4096), allocation_granularity);
50+
static std::ostream& AlignAndPad(std::ostream& stream, int64_t alignment_factor, int64_t& external_offset) {
5351
// Align to the next page or alloc granularity boundary
5452
SafeInt<int64_t> safe_external_offset = external_offset;
5553
int64_t new_external_offset = ((safe_external_offset + alignment_factor - 1) / alignment_factor) *
@@ -66,7 +64,7 @@ class ExternalDataInfo {
6664
static std::ostream& WritePrepackedToFileAndAddToProto(
6765
const PrepackedWeightsForGraph& prepacked_for_graph,
6866
const InlinedHashSet<std::string>& blob_keys,
69-
bool align, int64_t align_threshold, int64_t allocation_granularity,
67+
bool align, int64_t align_threshold, int64_t on_disk_alignment,
7068
std::ostream& os,
7169
int64_t& external_offset,
7270
::ONNX_NAMESPACE::TensorProto& proto);

onnxruntime/core/graph/graph.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4536,14 +4536,14 @@ Status Graph::AddExternalInitializersToGraphProtoImpl(
45364536
continue;
45374537
}
45384538

4539-
// update external_offset for alignment
4539+
// update external_offset for alignment (if enabled)
45404540
// need to do padding before write actual tensor data as we do offset alignment at the begin of
4541-
// large tensors (offset need to be page aligned and allocation granularity aligned) like below:
4541+
// large tensors (offset need to be page aligned) like below:
45424542
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
45434543
// |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->|
45444544
if (model_saving_options.align_offset && static_cast<int64_t>(tensor_bytes_size) >
45454545
model_saving_options.align_threshold) {
4546-
ORT_RETURN_IF_NOT(ExternalDataInfo::AlignAndPad(external_stream, model_saving_options.allocation_granularity,
4546+
ORT_RETURN_IF_NOT(ExternalDataInfo::AlignAndPad(external_stream, model_saving_options.on_disk_alignment,
45474547
external_offset),
45484548
"Failed writing external data to: ", model_external_file_path);
45494549
}
@@ -4576,7 +4576,7 @@ Status Graph::AddExternalInitializersToGraphProtoImpl(
45764576
auto& os = ExternalDataInfo::WritePrepackedToFileAndAddToProto(
45774577
*prepacked_weights_for_graph_, blob_keys_to_external_data,
45784578
model_saving_options.align_offset, model_saving_options.align_threshold,
4579-
model_saving_options.allocation_granularity,
4579+
model_saving_options.on_disk_alignment,
45804580
external_stream, external_offset, *output_proto);
45814581
ORT_RETURN_IF_NOT(os.good(), "Failed to write pre-packed blobs to external file");
45824582
}

onnxruntime/core/platform/windows/env.cc

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ limitations under the License.
2929
#include <gsl/gsl>
3030
#include "core/common/logging/logging.h"
3131
#include "core/common/narrow.h"
32+
#include "core/common/safeint.h"
3233
#include "core/common/span_utils.h"
3334
#include "core/platform/env.h"
3435
#include "core/platform/scoped_resource.h"
@@ -439,30 +440,28 @@ Status WindowsEnv::MapFileIntoMemory(_In_z_ const ORTCHAR_T* file_path,
439440
SYSTEM_INFO sysinfo;
440441
GetSystemInfo(&sysinfo);
441442

442-
static const DWORD page_size = sysinfo.dwPageSize;
443443
static const DWORD allocation_granularity = sysinfo.dwAllocationGranularity;
444-
const FileOffsetType offset_to_page = offset % static_cast<FileOffsetType>(page_size);
445-
const size_t mapped_length = length + static_cast<size_t>(offset_to_page);
446-
const FileOffsetType mapped_offset = offset - offset_to_page;
447-
if (mapped_offset % allocation_granularity != 0) {
448-
const auto error_code = GetLastError();
449-
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
450-
"mapped offset must be a multiple of the allocation granularity",
451-
" , mapped_offset = ", mapped_offset,
452-
" , allocation_granularity = ", allocation_granularity,
453-
" , errcode = ", error_code,
454-
" - ", std::system_category().message(error_code));
455-
}
444+
const FileOffsetType offset_to_granularity = offset % static_cast<FileOffsetType>(allocation_granularity);
445+
const SIZE_T mapped_length = SafeInt<SIZE_T>(offset_to_granularity) + length;
446+
const FileOffsetType mapped_offset = offset - offset_to_granularity;
447+
assert((mapped_offset % allocation_granularity) == 0);
456448

457449
void* const mapped_base = MapViewOfFile(file_mapping_handle.get(),
458450
FILE_MAP_READ,
459451
static_cast<DWORD>((mapped_offset >> 32) & 0xFFFFFFFF),
460452
static_cast<DWORD>(mapped_offset & 0xFFFFFFFF),
461453
mapped_length);
462-
GSL_SUPPRESS(r.11)
454+
455+
if (mapped_base == nullptr) {
456+
const auto error_code = GetLastError();
457+
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
458+
"MapViewOfFile ", ToUTF8String(Basename(file_path)),
459+
" fail, errcode = ", error_code,
460+
" - ", std::system_category().message(error_code));
461+
}
463462

464463
mapped_memory =
465-
MappedMemoryPtr{reinterpret_cast<char*>(mapped_base) + offset_to_page,
464+
MappedMemoryPtr{reinterpret_cast<char*>(mapped_base) + offset_to_granularity,
466465
[mapped_base](void*) {
467466
UnmapFile(mapped_base);
468467
}};

onnxruntime/test/framework/save_model_with_external_initializers.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
8484
size_t tensor_offset;
8585
std::stringstream stream(entry.value());
8686
stream >> tensor_offset;
87-
ORT_RETURN_IF_NOT(tensor_offset % model_saving_options.allocation_granularity == 0,
87+
ORT_RETURN_IF_NOT(tensor_offset % model_saving_options.on_disk_alignment == 0,
8888
"tensor offset not align");
8989
}
9090
}

onnxruntime/test/platform/file_io_test.cc

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ TEST(FileIoTest, MapFileIntoMemory) {
157157
SYSTEM_INFO sysinfo;
158158
GetSystemInfo(&sysinfo);
159159
static const auto page_size = sysinfo.dwPageSize;
160-
static const auto allocation_granularity = sysinfo.dwAllocationGranularity;
161160
ASSERT_GT(page_size, static_cast<DWORD>(0));
162161

163162
TempFilePath tmp(ORT_TSTR("map_file_test_"));
@@ -167,21 +166,10 @@ TEST(FileIoTest, MapFileIntoMemory) {
167166
const auto offsets_and_lengths = GenerateValidOffsetLengthPairs(
168167
0, expected_data.size(), page_size / 10);
169168

170-
for (const auto& offset_and_length : offsets_and_lengths) {
171-
const auto offset = offset_and_length.first;
172-
const auto length = offset_and_length.second;
173-
174-
// The offset must be a multiple of the allocation granularity
175-
if (offset % allocation_granularity != 0) {
176-
continue;
177-
}
178-
169+
for (const auto& [offset, length] : offsets_and_lengths) {
179170
Env::MappedMemoryPtr mapped_memory{};
180-
auto status = Env::Default().MapFileIntoMemory(
181-
tmp.path.c_str(), offset, length, mapped_memory);
182-
ASSERT_TRUE(status.IsOK())
183-
<< "MapFileIntoMemory failed for offset " << offset << " and length " << length
184-
<< " with error: " << status.ErrorMessage();
171+
ASSERT_STATUS_OK(Env::Default().MapFileIntoMemory(
172+
tmp.path.c_str(), offset, length, mapped_memory));
185173

186174
auto mapped_span = gsl::make_span(mapped_memory.get(), length);
187175

@@ -190,20 +178,11 @@ TEST(FileIoTest, MapFileIntoMemory) {
190178
ASSERT_TRUE(SpanEq(mapped_span, expected_data_span));
191179
}
192180

193-
{
194-
Env::MappedMemoryPtr mapped_memory{};
195-
196-
// invalid - offset is not a multiple of the allocation granularity
197-
ASSERT_FALSE(Env::Default().MapFileIntoMemory(
198-
tmp.path.c_str(), allocation_granularity * 3 / 2, page_size / 10, mapped_memory)
199-
.IsOK());
200-
}
201-
202181
{
203182
Env::MappedMemoryPtr mapped_memory{};
204183

205184
// invalid - negative offset
206-
ASSERT_FALSE(Env::Default().MapFileIntoMemory(tmp.path.c_str(), -1, 0, mapped_memory).IsOK());
185+
ASSERT_STATUS_NOT_OK(Env::Default().MapFileIntoMemory(tmp.path.c_str(), -1, 0, mapped_memory));
207186
}
208187
}
209188
#endif

0 commit comments

Comments
 (0)