Skip to content

Commit 6f09bdf

Browse files
committed
test: Add integration tests for hinting/reporting
Add integration tests for free page hinting and reporting, both functional and performance tests. Update fast_page_helper so it can run in a oneshot mode, not requiring the signal to track the performance. New functional tests to ensure that hinting and reporting are reducing the RSS as expected in the guest. Updated reduce RSS test to touch memory to reduce the chance of flakiness. New performance tests for the balloon device. First being a test to track the CPU overhead of hinting and reporting. Second being a test to measure the faulting latency while reporting is running in the guest. Signed-off-by: Jack Thomson <jackabt@amazon.com>
1 parent 603a03b commit 6f09bdf

File tree

3 files changed

+313
-28
lines changed

3 files changed

+313
-28
lines changed

resources/overlay/usr/local/bin/fast_page_fault_helper.c

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sys/mman.h> // mmap
1717
#include <time.h> // clock_gettime
1818
#include <fcntl.h> // open
19+
#include <getopt.h> // getopt
1920

2021
#define MEM_SIZE_MIB (128 * 1024 * 1024)
2122
#define NANOS_PER_SEC 1000000000
@@ -30,20 +31,39 @@ void touch_memory(void *mem, size_t size, char val) {
3031

3132
int main() {
3233
sigset_t set;
33-
int signal;
34+
int signal, character;
3435
void *ptr;
3536
struct timespec start, end;
3637
long duration_nanos;
3738
FILE *out_file;
3839

39-
sigemptyset(&set);
40-
if (sigaddset(&set, SIGUSR1) == -1) {
41-
perror("sigaddset");
42-
return 1;
40+
char *options = 0;
41+
int longindex = 0;
42+
int signal_wait = 1;
43+
44+
struct option longopts[] = {
45+
{"nosignal", no_argument, NULL, 's'},
46+
{NULL, 0, NULL, 0}
47+
};
48+
49+
while((character = getopt_long(argc, argv, "s", longopts, &longindex)) != -1) {
50+
switch (character) {
51+
case 's':
52+
signal_wait = 0;
53+
break;
54+
}
4355
}
44-
if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) {
45-
perror("sigprocmask");
46-
return 1;
56+
57+
if (signal_wait) {
58+
sigemptyset(&set);
59+
if (sigaddset(&set, SIGUSR1) == -1) {
60+
perror("sigaddset");
61+
return 1;
62+
}
63+
if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) {
64+
perror("sigprocmask");
65+
return 1;
66+
}
4767
}
4868

4969
ptr = mmap(NULL, MEM_SIZE_MIB, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
@@ -53,9 +73,11 @@ int main() {
5373
return 1;
5474
}
5575

56-
touch_memory(ptr, MEM_SIZE_MIB, 1);
76+
if (signal_wait) {
77+
touch_memory(ptr, MEM_SIZE_MIB, 1);
5778

58-
sigwait(&set, &signal);
79+
sigwait(&set, &signal);
80+
}
5981

6082
clock_gettime(CLOCK_BOOTTIME, &start);
6183
touch_memory(ptr, MEM_SIZE_MIB, 2);
@@ -76,4 +98,4 @@ int main() {
7698
}
7799

78100
return 0;
79-
}
101+
}

tests/integration_tests/functional/test_balloon.py

Lines changed: 138 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import logging
66
import time
7+
import signal
78
from subprocess import TimeoutExpired
89

910
import pytest
@@ -13,7 +14,6 @@
1314

1415
STATS_POLLING_INTERVAL_S = 1
1516

16-
1717
def get_stable_rss_mem_by_pid(pid, percentage_delta=1):
1818
"""
1919
Get the RSS memory that a guest uses, given the pid of the guest.
@@ -83,7 +83,6 @@ def make_guest_dirty_memory(ssh_connection, amount_mib=32):
8383

8484
time.sleep(5)
8585

86-
8786
def _test_rss_memory_lower(test_microvm):
8887
"""Check inflating the balloon makes guest use less rss memory."""
8988
# Get the firecracker pid, and open an ssh connection.
@@ -293,7 +292,8 @@ def test_reinflate_balloon(uvm_plain_any):
293292

294293

295294
# pylint: disable=C0103
296-
def test_size_reduction(uvm_plain_any):
295+
@pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"])
296+
def test_size_reduction(uvm_plain_any, method):
297297
"""
298298
Verify that ballooning reduces RSS usage on a newly booted guest.
299299
"""
@@ -302,30 +302,57 @@ def test_size_reduction(uvm_plain_any):
302302
test_microvm.basic_config()
303303
test_microvm.add_net_iface()
304304

305+
traditional_balloon = method == "traditional"
306+
free_page_reporting = method == "reporting"
307+
free_page_hinting = method == "hinting"
308+
305309
# Add a memory balloon.
306310
test_microvm.api.balloon.put(
307-
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0
311+
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0,
312+
free_page_reporting=free_page_reporting, free_page_hinting=free_page_hinting
308313
)
309314

310315
# Start the microvm.
311316
test_microvm.start()
312317
firecracker_pid = test_microvm.firecracker_pid
313318

314-
# Check memory usage.
319+
get_stable_rss_mem_by_pid(firecracker_pid)
320+
321+
test_microvm.ssh.check_output(
322+
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
323+
)
324+
325+
time.sleep(1)
326+
315327
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
316328

329+
_, pid, _ = test_microvm.ssh.check_output("pidof fast_page_fault_helper")
330+
# Kill the application which will free the held memory
331+
test_microvm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
332+
333+
# Sleep to allow guest to clean up
334+
time.sleep(1)
317335
# Have the guest drop its caches.
318336
test_microvm.ssh.run("sync; echo 3 > /proc/sys/vm/drop_caches")
319-
time.sleep(5)
337+
time.sleep(2)
320338

321339
# We take the initial reading of the RSS, then calculate the amount
322340
# we need to inflate the balloon with by subtracting it from the
323341
# VM size and adding an offset of 10 MiB in order to make sure we
324342
# get a lower reading than the initial one.
325343
inflate_size = 256 - int(first_reading / 1024) + 10
326344

327-
# Now inflate the balloon.
328-
test_microvm.api.balloon.patch(amount_mib=inflate_size)
345+
if traditional_balloon:
346+
# Now inflate the balloon
347+
test_microvm.api.balloon.patch(amount_mib=inflate_size)
348+
elif free_page_hinting:
349+
test_microvm.api.balloon_hinting_start.patch()
350+
351+
_ = get_stable_rss_mem_by_pid(firecracker_pid)
352+
353+
if traditional_balloon:
354+
# Deflate the balloon completely.
355+
test_microvm.api.balloon.patch(amount_mib=0)
329356

330357
# Check memory usage again.
331358
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
@@ -534,7 +561,91 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
534561
assert stats_after_snap["available_memory"] > latest_stats["available_memory"]
535562

536563

537-
def test_memory_scrub(uvm_plain_any):
564+
@pytest.mark.parametrize("method", ["reporting", "hinting"])
565+
def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
566+
"""
567+
Test that the balloon hinting and reporting works after pause/resume.
568+
"""
569+
vm = uvm_plain_any
570+
vm.spawn()
571+
vm.basic_config(
572+
vcpu_count=2,
573+
mem_size_mib=256,
574+
)
575+
vm.add_net_iface()
576+
577+
free_page_reporting = method == "reporting"
578+
free_page_hinting = method == "hinting"
579+
580+
# Add a memory balloon with stats enabled.
581+
vm.api.balloon.put(
582+
amount_mib=0,
583+
deflate_on_oom=True,
584+
stats_polling_interval_s=STATS_POLLING_INTERVAL_S,
585+
free_page_reporting=free_page_reporting, free_page_hinting=free_page_hinting
586+
)
587+
588+
vm.start()
589+
590+
vm.ssh.check_output(
591+
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
592+
)
593+
594+
time.sleep(1)
595+
596+
# Get the firecracker pid, and open an ssh connection.
597+
firecracker_pid = vm.firecracker_pid
598+
599+
# Check memory usage.
600+
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
601+
602+
_, pid, _ = vm.ssh.check_output("pidof fast_page_fault_helper")
603+
# Kill the application which will free the held memory
604+
vm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
605+
time.sleep(2)
606+
607+
if free_page_hinting:
608+
vm.api.balloon_hinting_start.patch()
609+
610+
# Check memory usage again.
611+
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
612+
613+
# There should be a reduction in RSS, but it's inconsistent.
614+
# We only test that the reduction happens.
615+
assert first_reading > second_reading
616+
617+
snapshot = vm.snapshot_full()
618+
microvm = microvm_factory.build_from_snapshot(snapshot)
619+
620+
firecracker_pid = microvm.firecracker_pid
621+
622+
microvm.ssh.check_output(
623+
"nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 </dev/null &"
624+
)
625+
626+
time.sleep(1)
627+
628+
# Check memory usage.
629+
third_reading = get_stable_rss_mem_by_pid(firecracker_pid)
630+
631+
_, pid, _ = microvm.ssh.check_output("pidof fast_page_fault_helper")
632+
# Kill the application which will free the held memory
633+
microvm.ssh.check_output(f"kill -s {signal.SIGUSR1} {pid}")
634+
time.sleep(2)
635+
636+
if free_page_hinting:
637+
microvm.api.balloon_hinting_start.patch()
638+
639+
# Check memory usage again.
640+
fourth_reading = get_stable_rss_mem_by_pid(firecracker_pid)
641+
642+
# There should be a reduction in RSS, but it's inconsistent.
643+
# We only test that the reduction happens.
644+
assert third_reading > fourth_reading
645+
646+
647+
@pytest.mark.parametrize("method", ["none", "hinting", "reporting"])
648+
def test_memory_scrub(uvm_plain_any, method):
538649
"""
539650
Test that the memory is zeroed after deflate.
540651
"""
@@ -543,29 +654,39 @@ def test_memory_scrub(uvm_plain_any):
543654
microvm.basic_config(vcpu_count=2, mem_size_mib=256)
544655
microvm.add_net_iface()
545656

657+
free_page_reporting = method == "reporting"
658+
free_page_hinting = method == "hinting"
659+
546660
# Add a memory balloon with stats enabled.
547661
microvm.api.balloon.put(
548-
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1
662+
amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1,
663+
free_page_reporting=free_page_reporting, free_page_hinting=free_page_hinting
549664
)
550665

551666
microvm.start()
552667

553668
# Dirty 60MB of pages.
554669
make_guest_dirty_memory(microvm.ssh, amount_mib=60)
555670

556-
# Now inflate the balloon with 60MB of pages.
557-
microvm.api.balloon.patch(amount_mib=60)
671+
if method == "none":
672+
# Now inflate the balloon with 60MB of pages.
673+
microvm.api.balloon.patch(amount_mib=60)
674+
elif method == "hinting":
675+
time.sleep(1)
676+
microvm.api.balloon_hinting_start.patch()
677+
elif method == "reporting":
678+
time.sleep(2)
558679

559680
# Get the firecracker pid, and open an ssh connection.
560681
firecracker_pid = microvm.firecracker_pid
561682

562683
# Wait for the inflate to complete.
563684
_ = get_stable_rss_mem_by_pid(firecracker_pid)
564685

565-
# Deflate the balloon completely.
566-
microvm.api.balloon.patch(amount_mib=0)
567-
568-
# Wait for the deflate to complete.
569-
_ = get_stable_rss_mem_by_pid(firecracker_pid)
686+
if method == "none":
687+
# Deflate the balloon completely.
688+
microvm.api.balloon.patch(amount_mib=0)
689+
# Wait for the deflate to complete.
690+
_ = get_stable_rss_mem_by_pid(firecracker_pid)
570691

571692
microvm.ssh.check_output("/usr/local/bin/readmem {} {}".format(60, 1))

0 commit comments

Comments
 (0)