Skip to content

Commit c0c2ec9

Browse files
committed
refactor(balloon): do not madvise(MADV_DONTNEED) when resuming from file
This operation is completely useless after we mmap-ed anonymous private zero pages on top of the file mapping. Also, add a TODO for memfd-backed memory, as it's currently not supported using madvise. Signed-off-by: Riccardo Mancini <mancio@amazon.com>
1 parent 79c6f9c commit c0c2ec9

File tree

1 file changed

+47
-36
lines changed

1 file changed

+47
-36
lines changed

src/vmm/src/vstate/memory.rs

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -118,43 +118,54 @@ impl GuestRegionMmapExt {
118118
) -> Result<(), GuestMemoryError> {
119119
let phys_address = self.get_host_address(caddr)?;
120120

121-
// If and only if we are resuming from a snapshot file, we have a file and it's mapped
122-
// private
123-
if self.inner.file_offset().is_some() && self.inner.flags() & libc::MAP_PRIVATE != 0 {
124-
// Mmap a new anonymous region over the present one in order to create a hole
125-
// with zero pages.
126-
// This workaround is (only) needed after resuming from a snapshot file because the
127-
// guest memory is mmaped from file as private. In this case, MADV_DONTNEED on the
128-
// file only drops any anonymous pages in range, but subsequent accesses would read
129-
// whatever page is stored on the backing file. Mmapping anonymous pages ensures it's
130-
// zeroed.
131-
// SAFETY: The address and length are known to be valid.
132-
let ret = unsafe {
133-
libc::mmap(
134-
phys_address.cast(),
135-
len,
136-
libc::PROT_READ | libc::PROT_WRITE,
137-
libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
138-
-1,
139-
0,
140-
)
141-
};
142-
if ret == libc::MAP_FAILED {
143-
let os_error = std::io::Error::last_os_error();
144-
error!("discard_range: mmap failed: {:?}", os_error);
145-
return Err(GuestMemoryError::IOError(os_error));
121+
match (self.inner.file_offset(), self.inner.flags()) {
122+
// If and only if we are resuming from a snapshot file, we have a file and it's mapped
123+
// private
124+
(Some(_), flags) if flags & libc::MAP_PRIVATE != 0 => {
125+
// Mmap a new anonymous region over the present one in order to create a hole
126+
// with zero pages.
127+
// This workaround is (only) needed after resuming from a snapshot file because the
128+
// guest memory is mmaped from file as private. In this case, MADV_DONTNEED on the
129+
// file only drops any anonymous pages in range, but subsequent accesses would read
130+
// whatever page is stored on the backing file. Mmapping anonymous pages ensures
131+
// it's zeroed.
132+
// SAFETY: The address and length are known to be valid.
133+
let ret = unsafe {
134+
libc::mmap(
135+
phys_address.cast(),
136+
len,
137+
libc::PROT_READ | libc::PROT_WRITE,
138+
libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
139+
-1,
140+
0,
141+
)
142+
};
143+
if ret == libc::MAP_FAILED {
144+
let os_error = std::io::Error::last_os_error();
145+
error!("discard_range: mmap failed: {:?}", os_error);
146+
Err(GuestMemoryError::IOError(os_error))
147+
} else {
148+
Ok(())
149+
}
150+
}
151+
// Match either the case of an anonymous mapping, or the case
152+
// of a shared file mapping.
153+
// TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd
154+
// (or in general MAP_SHARED of a fd). In those cases we should use
155+
// fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE).
156+
// We keep falling to the madvise branch to keep the previous behaviour.
157+
_ => {
158+
// Madvise the region in order to mark it as not used.
159+
// SAFETY: The address and length are known to be valid.
160+
let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) };
161+
if ret < 0 {
162+
let os_error = std::io::Error::last_os_error();
163+
error!("discard_range: madvise failed: {:?}", os_error);
164+
Err(GuestMemoryError::IOError(os_error))
165+
} else {
166+
Ok(())
167+
}
146168
}
147-
}
148-
149-
// Madvise the region in order to mark it as not used.
150-
// SAFETY: The address and length are known to be valid.
151-
let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) };
152-
if ret < 0 {
153-
let os_error = std::io::Error::last_os_error();
154-
error!("discard_range: madvise failed: {:?}", os_error);
155-
Err(GuestMemoryError::IOError(os_error))
156-
} else {
157-
Ok(())
158169
}
159170
}
160171
}

0 commit comments

Comments
 (0)