Skip to content

Commit a156a24

Browse files
committed
Merge: KVM: Fix for access_tracking_perf_test random failures
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1109 Backport a patch that skips part of the test, when the test machine has NUMA balancing enabled. Now in case of the failure only a warning will be printed like this: WARNING: vCPU0: Too many pages still idle (36738 out of 262144), this will affect performance results. JIRA: https://issues.redhat.com/browse/RHEL-92771 Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com> Approved-by: Vitaly Kuznetsov <vkuznets@redhat.com> Approved-by: Bandan Das <bsd@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Jan Stancek <jstancek@redhat.com>
2 parents df1a9a1 + 75f5e2f commit a156a24

File tree

3 files changed

+85
-20
lines changed

3 files changed

+85
-20
lines changed

tools/testing/selftests/kvm/access_tracking_perf_test.c

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,16 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
6565
/* Whether to overlap the regions of memory vCPUs access. */
6666
static bool overlap_memory_access;
6767

68+
/*
69+
* If the test should only warn if there are too many idle pages (i.e., it is
70+
* expected).
71+
* -1: Not yet set.
72+
* 0: We do not expect too many idle pages, so FAIL if too many idle pages.
73+
* 1: Having too many idle pages is expected, so merely print a warning if
74+
* too many idle pages are found.
75+
*/
76+
static int idle_pages_warn_only = -1;
77+
6878
struct test_params {
6979
/* The backing source for the region of memory. */
7080
enum vm_mem_backing_src_type backing_src;
@@ -177,18 +187,12 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
177187
* arbitrary; high enough that we ensure most memory access went through
178188
* access tracking but low enough as to not make the test too brittle
179189
* over time and across architectures.
180-
*
181-
* When running the guest as a nested VM, "warn" instead of asserting
182-
* as the TLB size is effectively unlimited and the KVM doesn't
183-
* explicitly flush the TLB when aging SPTEs. As a result, more pages
184-
* are cached and the guest won't see the "idle" bit cleared.
185190
*/
186191
if (still_idle >= pages / 10) {
187-
#ifdef __x86_64__
188-
TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
192+
TEST_ASSERT(idle_pages_warn_only,
189193
"vCPU%d: Too many pages still idle (%lu out of %lu)",
190194
vcpu_idx, still_idle, pages);
191-
#endif
195+
192196
printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
193197
"this will affect performance results.\n",
194198
vcpu_idx, still_idle, pages);
@@ -328,6 +332,32 @@ static void run_test(enum vm_guest_mode mode, void *arg)
328332
memstress_destroy_vm(vm);
329333
}
330334

335+
static int access_tracking_unreliable(void)
336+
{
337+
#ifdef __x86_64__
338+
/*
339+
* When running nested, the TLB size may be effectively unlimited (for
340+
* example, this is the case when running on KVM L0), and KVM doesn't
341+
* explicitly flush the TLB when aging SPTEs. As a result, more pages
342+
* are cached and the guest won't see the "idle" bit cleared.
343+
*/
344+
if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
345+
puts("Skipping idle page count sanity check, because the test is run nested");
346+
return 1;
347+
}
348+
#endif
349+
/*
350+
* When NUMA balancing is enabled, guest memory will be unmapped to get
351+
* NUMA faults, dropping the Accessed bits.
352+
*/
353+
if (is_numa_balancing_enabled()) {
354+
puts("Skipping idle page count sanity check, because NUMA balancing is enabled");
355+
return 1;
356+
}
357+
358+
return 0;
359+
}
360+
331361
static void help(char *name)
332362
{
333363
puts("");
@@ -342,6 +372,12 @@ static void help(char *name)
342372
printf(" -v: specify the number of vCPUs to run.\n");
343373
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
344374
" them into a separate region of memory for each vCPU.\n");
375+
printf(" -w: Control whether the test warns or fails if more than 10%%\n"
376+
" of pages are still seen as idle/old after accessing guest\n"
377+
" memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
378+
" mode, the test fails by default, but switches to warn only\n"
379+
" if NUMA balancing is enabled or the test detects it's running\n"
380+
" in a VM.\n");
345381
backing_src_help("-s");
346382
puts("");
347383
exit(0);
@@ -359,7 +395,7 @@ int main(int argc, char *argv[])
359395

360396
guest_modes_append_default();
361397

362-
while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
398+
while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) {
363399
switch (opt) {
364400
case 'm':
365401
guest_modes_cmdline(optarg);
@@ -376,6 +412,11 @@ int main(int argc, char *argv[])
376412
case 's':
377413
params.backing_src = parse_backing_src_type(optarg);
378414
break;
415+
case 'w':
416+
idle_pages_warn_only =
417+
atoi_non_negative("Idle pages warning",
418+
optarg);
419+
break;
379420
case 'h':
380421
default:
381422
help(argv[0]);
@@ -388,6 +429,9 @@ int main(int argc, char *argv[])
388429
"CONFIG_IDLE_PAGE_TRACKING is not enabled");
389430
close(page_idle_fd);
390431

432+
if (idle_pages_warn_only == -1)
433+
idle_pages_warn_only = access_tracking_unreliable();
434+
391435
for_each_guest_mode(run_test, &params);
392436

393437
return 0;

tools/testing/selftests/kvm/include/test_util.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ bool is_backing_src_hugetlb(uint32_t i);
153153
void backing_src_help(const char *flag);
154154
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
155155
long get_run_delay(void);
156+
bool is_numa_balancing_enabled(void);
156157

157158
/*
158159
* Whether or not the given source type is shared memory (as opposed to

tools/testing/selftests/kvm/lib/test_util.c

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,37 +132,57 @@ void print_skip(const char *fmt, ...)
132132
puts(", skipping test");
133133
}
134134

135-
bool thp_configured(void)
135+
static bool test_sysfs_path(const char *path)
136136
{
137-
int ret;
138137
struct stat statbuf;
138+
int ret;
139139

140-
ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
140+
ret = stat(path, &statbuf);
141141
TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
142-
"Error in stating /sys/kernel/mm/transparent_hugepage");
142+
"Error in stat()ing '%s'", path);
143143

144144
return ret == 0;
145145
}
146146

147-
size_t get_trans_hugepagesz(void)
147+
bool thp_configured(void)
148+
{
149+
return test_sysfs_path("/sys/kernel/mm/transparent_hugepage");
150+
}
151+
152+
static size_t get_sysfs_val(const char *path)
148153
{
149154
size_t size;
150155
FILE *f;
151156
int ret;
152157

153-
TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
154-
155-
f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
156-
TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
158+
f = fopen(path, "r");
159+
TEST_ASSERT(f, "Error opening '%s'", path);
157160

158161
ret = fscanf(f, "%ld", &size);
162+
TEST_ASSERT(ret > 0, "Error reading '%s'", path);
163+
164+
/* Re-scan the input stream to verify the entire file was read. */
159165
ret = fscanf(f, "%ld", &size);
160-
TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
161-
fclose(f);
166+
TEST_ASSERT(ret < 1, "Error reading '%s'", path);
162167

168+
fclose(f);
163169
return size;
164170
}
165171

172+
size_t get_trans_hugepagesz(void)
173+
{
174+
TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
175+
176+
return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size");
177+
}
178+
179+
bool is_numa_balancing_enabled(void)
180+
{
181+
if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
182+
return false;
183+
return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
184+
}
185+
166186
size_t get_def_hugetlb_pagesz(void)
167187
{
168188
char buf[64];

0 commit comments

Comments
 (0)