Skip to content

Commit fd88929

Browse files
committed
test: Add integration tests for hinting/reporting
Add integration tests for free page hinting and reporting, both functional and performance tests. Update fast_page_helper so it can run in a oneshot mode, not requiring the signal to track the performance. New functional tests to ensure that hinting and reporting are reducing the RSS as expected in the guest. Updated reduce RSS test to touch memory to reduce the chance of flakiness. New performance tests for the balloon device. First being a test to track the CPU overhead of hinting and reporting. Second being a test to measure the faulting latency while reporting is running in the guest. Signed-off-by: Jack Thomson <jackabt@amazon.com>
1 parent 36eafae commit fd88929

File tree

3 files changed

+327
-26
lines changed

3 files changed

+327
-26
lines changed

resources/overlay/usr/local/bin/fast_page_fault_helper.c

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sys/mman.h> // mmap
1717
#include <time.h> // clock_gettime
1818
#include <fcntl.h> // open
19+
#include <getopt.h> // getopt
1920

2021
#define MEM_SIZE_MIB (128 * 1024 * 1024)
2122
#define NANOS_PER_SEC 1000000000
@@ -30,20 +31,39 @@ void touch_memory(void *mem, size_t size, char val) {
3031

3132
int main() {
3233
sigset_t set;
33-
int signal;
34+
int signal, character;
3435
void *ptr;
3536
struct timespec start, end;
3637
long duration_nanos;
3738
FILE *out_file;
3839

39-
sigemptyset(&set);
40-
if (sigaddset(&set, SIGUSR1) == -1) {
41-
perror("sigaddset");
42-
return 1;
40+
char *options = 0;
41+
int longindex = 0;
42+
int signal_wait = 1;
43+
44+
struct option longopts[] = {
45+
{"nosignal", no_argument, NULL, 's'},
46+
{NULL, 0, NULL, 0}
47+
};
48+
49+
while((character = getopt_long(argc, argv, "s", longopts, &longindex)) != -1) {
50+
switch (character) {
51+
case 's':
52+
signal_wait = 0;
53+
break;
54+
}
4355
}
44-
if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) {
45-
perror("sigprocmask");
46-
return 1;
56+
57+
if (signal_wait) {
58+
sigemptyset(&set);
59+
if (sigaddset(&set, SIGUSR1) == -1) {
60+
perror("sigaddset");
61+
return 1;
62+
}
63+
if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) {
64+
perror("sigprocmask");
65+
return 1;
66+
}
4767
}
4868

4969
ptr = mmap(NULL, MEM_SIZE_MIB, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
@@ -53,9 +73,11 @@ int main() {
5373
return 1;
5474
}
5575

56-
touch_memory(ptr, MEM_SIZE_MIB, 1);
76+
if (signal_wait) {
77+
touch_memory(ptr, MEM_SIZE_MIB, 1);
5778

58-
sigwait(&set, &signal);
79+
sigwait(&set, &signal);
80+
}
5981

6082
clock_gettime(CLOCK_BOOTTIME, &start);
6183
touch_memory(ptr, MEM_SIZE_MIB, 2);
@@ -76,4 +98,4 @@ int main() {
7698
}
7799

78100
return 0;
79-
}
101+
}

tests/integration_tests/functional/test_balloon.py

Lines changed: 145 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""Tests for guest-side operations on /balloon resources."""
44

55
import logging
6+
import signal
67
import time
78
from subprocess import TimeoutExpired
89

@@ -293,7 +294,8 @@ def test_reinflate_balloon(uvm_plain_any):
293294

294295

295296
# pylint: disable=C0103
296-
def test_size_reduction(uvm_plain_any):
297+
@pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"])
298+
def test_size_reduction(uvm_plain_any, method):
297299
"""
298300
Verify that ballooning reduces RSS usage on a newly booted guest.
299301
"""
@@ -302,30 +304,60 @@ def test_size_reduction(uvm_plain_any):
302304
test_microvm.basic_config()
303305
test_microvm.add_net_iface()
304306

307+
traditional_balloon = method == "traditional"
308+
free_page_reporting = method == "reporting"
309+
free_page_hinting = method == "hinting"
310+
305311
# Add a memory balloon.
306312
test_microvm.api.balloon.put(
307-
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0
313+
amount_mib=0,
314+
deflate_on_oom=True,
315+
stats_polling_interval_s=0,
316+
free_page_reporting=free_page_reporting,
317+
free_page_hinting=free_page_hinting,
308318
)
309319

310320
# Start the microvm.
311321
test_microvm.start()
312322
firecracker_pid = test_microvm.firecracker_pid
313323

314-
# Check memory usage.
324+
get_stable_rss_mem_by_pid(firecracker_pid)
325+
326+
test_microvm.ssh.check_output(
327+
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
328+
)
329+
330+
time.sleep(1)
331+
315332
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
316333

334+
_, pid, _ = test_microvm.ssh.check_output("pidof fast_page_fault_helper")
335+
# Kill the application which will free the held memory
336+
test_microvm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
337+
338+
# Sleep to allow guest to clean up
339+
time.sleep(1)
317340
# Have the guest drop its caches.
318341
test_microvm.ssh.run("sync; echo 3 > /proc/sys/vm/drop_caches")
319-
time.sleep(5)
342+
time.sleep(2)
320343

321344
# We take the initial reading of the RSS, then calculate the amount
322345
# we need to inflate the balloon with by subtracting it from the
323346
# VM size and adding an offset of 10 MiB in order to make sure we
324347
# get a lower reading than the initial one.
325348
inflate_size = 256 - int(first_reading / 1024) + 10
326349

327-
# Now inflate the balloon.
328-
test_microvm.api.balloon.patch(amount_mib=inflate_size)
350+
if traditional_balloon:
351+
# Now inflate the balloon
352+
test_microvm.api.balloon.patch(amount_mib=inflate_size)
353+
elif free_page_hinting:
354+
test_microvm.api.balloon_hinting_start.patch()
355+
356+
_ = get_stable_rss_mem_by_pid(firecracker_pid)
357+
358+
if traditional_balloon:
359+
# Deflate the balloon completely.
360+
test_microvm.api.balloon.patch(amount_mib=0)
329361

330362
# Check memory usage again.
331363
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
@@ -534,7 +566,92 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
534566
assert stats_after_snap["available_memory"] > latest_stats["available_memory"]
535567

536568

537-
def test_memory_scrub(uvm_plain_any):
569+
@pytest.mark.parametrize("method", ["reporting", "hinting"])
570+
def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
571+
"""
572+
Test that the balloon hinting and reporting works after pause/resume.
573+
"""
574+
vm = uvm_plain_any
575+
vm.spawn()
576+
vm.basic_config(
577+
vcpu_count=2,
578+
mem_size_mib=256,
579+
)
580+
vm.add_net_iface()
581+
582+
free_page_reporting = method == "reporting"
583+
free_page_hinting = method == "hinting"
584+
585+
# Add a memory balloon with stats enabled.
586+
vm.api.balloon.put(
587+
amount_mib=0,
588+
deflate_on_oom=True,
589+
stats_polling_interval_s=STATS_POLLING_INTERVAL_S,
590+
free_page_reporting=free_page_reporting,
591+
free_page_hinting=free_page_hinting,
592+
)
593+
594+
vm.start()
595+
596+
vm.ssh.check_output(
597+
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
598+
)
599+
600+
time.sleep(1)
601+
602+
# Get the firecracker pid, and open an ssh connection.
603+
firecracker_pid = vm.firecracker_pid
604+
605+
# Check memory usage.
606+
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
607+
608+
_, pid, _ = vm.ssh.check_output("pidof fast_page_fault_helper")
609+
# Kill the application which will free the held memory
610+
vm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
611+
time.sleep(2)
612+
613+
if free_page_hinting:
614+
vm.api.balloon_hinting_start.patch()
615+
616+
# Check memory usage again.
617+
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
618+
619+
# There should be a reduction in RSS, but it's inconsistent.
620+
# We only test that the reduction happens.
621+
assert first_reading > second_reading
622+
623+
snapshot = vm.snapshot_full()
624+
microvm = microvm_factory.build_from_snapshot(snapshot)
625+
626+
firecracker_pid = microvm.firecracker_pid
627+
628+
microvm.ssh.check_output(
629+
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
630+
)
631+
632+
time.sleep(1)
633+
634+
# Check memory usage.
635+
third_reading = get_stable_rss_mem_by_pid(firecracker_pid)
636+
637+
_, pid, _ = microvm.ssh.check_output("pidof fast_page_fault_helper")
638+
# Kill the application which will free the held memory
639+
microvm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
640+
time.sleep(2)
641+
642+
if free_page_hinting:
643+
microvm.api.balloon_hinting_start.patch()
644+
645+
# Check memory usage again.
646+
fourth_reading = get_stable_rss_mem_by_pid(firecracker_pid)
647+
648+
# There should be a reduction in RSS, but it's inconsistent.
649+
# We only test that the reduction happens.
650+
assert third_reading > fourth_reading
651+
652+
653+
@pytest.mark.parametrize("method", ["none", "hinting", "reporting"])
654+
def test_memory_scrub(uvm_plain_any, method):
538655
"""
539656
Test that the memory is zeroed after deflate.
540657
"""
@@ -543,29 +660,42 @@ def test_memory_scrub(uvm_plain_any):
543660
microvm.basic_config(vcpu_count=2, mem_size_mib=256)
544661
microvm.add_net_iface()
545662

663+
free_page_reporting = method == "reporting"
664+
free_page_hinting = method == "hinting"
665+
546666
# Add a memory balloon with stats enabled.
547667
microvm.api.balloon.put(
548-
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1
668+
amount_mib=0,
669+
deflate_on_oom=True,
670+
stats_polling_interval_s=1,
671+
free_page_reporting=free_page_reporting,
672+
free_page_hinting=free_page_hinting,
549673
)
550674

551675
microvm.start()
552676

553677
# Dirty 60MB of pages.
554678
make_guest_dirty_memory(microvm.ssh, amount_mib=60)
555679

556-
# Now inflate the balloon with 60MB of pages.
557-
microvm.api.balloon.patch(amount_mib=60)
680+
if method == "none":
681+
# Now inflate the balloon with 60MB of pages.
682+
microvm.api.balloon.patch(amount_mib=60)
683+
elif method == "hinting":
684+
time.sleep(1)
685+
microvm.api.balloon_hinting_start.patch()
686+
elif method == "reporting":
687+
time.sleep(2)
558688

559689
# Get the firecracker pid, and open an ssh connection.
560690
firecracker_pid = microvm.firecracker_pid
561691

562692
# Wait for the inflate to complete.
563693
_ = get_stable_rss_mem_by_pid(firecracker_pid)
564694

565-
# Deflate the balloon completely.
566-
microvm.api.balloon.patch(amount_mib=0)
567-
568-
# Wait for the deflate to complete.
569-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
695+
if method == "none":
696+
# Deflate the balloon completely.
697+
microvm.api.balloon.patch(amount_mib=0)
698+
# Wait for the deflate to complete.
699+
_ = get_stable_rss_mem_by_pid(firecracker_pid)
570700

571701
microvm.ssh.check_output("/usr/local/bin/readmem {} {}".format(60, 1))

0 commit comments

Comments
 (0)