Skip to content

Commit 6777885

Browse files
shvipinsean-jc
authored andcommitted
KVM: x86/mmu: Track possible NX huge pages separately for TDP vs. Shadow MMU
Track possible NX huge pages for the TDP MMU separately from Shadow MMUs in anticipation of doing recovery for the TDP MMU while holding mmu_lock for read instead of write. Use a small structure to hold the list of pages along with the number of pages/entries in the list, as relying on kvm->stat.nx_lpage_splits to calculating the number of pages to recover would result in over-zapping when both TDP and Shadow MMUs are active. Suggested-by: Sean Christopherson <seanjc@google.com> Suggested-by: David Matlack <dmatlack@google.com> Signed-off-by: Vipin Sharma <vipinsh@google.com> Co-developed-by: James Houghton <jthoughton@google.com> Signed-off-by: James Houghton <jthoughton@google.com> Link: https://lore.kernel.org/r/20250707224720.4016504-2-jthoughton@google.com [sean: rewrite changelog, use #ifdef instead of dummy KVM_TDP_MMU #define] Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent c17b750 commit 6777885

File tree

4 files changed

+72
-36
lines changed

4 files changed

+72
-36
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,30 @@ enum kvm_apicv_inhibit {
13481348
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \
13491349
__APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
13501350

1351+
struct kvm_possible_nx_huge_pages {
1352+
/*
1353+
* A list of kvm_mmu_page structs that, if zapped, could possibly be
1354+
* replaced by an NX huge page. A shadow page is on this list if its
1355+
* existence disallows an NX huge page (nx_huge_page_disallowed is set)
1356+
* and there are no other conditions that prevent a huge page, e.g.
1357+
* the backing host page is huge, dirtly logging is not enabled for its
1358+
* memslot, etc... Note, zapping shadow pages on this list doesn't
1359+
* guarantee an NX huge page will be created in its stead, e.g. if the
1360+
* guest attempts to execute from the region then KVM obviously can't
1361+
* create an NX huge page (without hanging the guest).
1362+
*/
1363+
struct list_head pages;
1364+
u64 nr_pages;
1365+
};
1366+
1367+
enum kvm_mmu_type {
1368+
KVM_SHADOW_MMU,
1369+
#ifdef CONFIG_X86_64
1370+
KVM_TDP_MMU,
1371+
#endif
1372+
KVM_NR_MMU_TYPES,
1373+
};
1374+
13511375
struct kvm_arch {
13521376
unsigned long n_used_mmu_pages;
13531377
unsigned long n_requested_mmu_pages;
@@ -1360,18 +1384,7 @@ struct kvm_arch {
13601384
bool pre_fault_allowed;
13611385
struct hlist_head *mmu_page_hash;
13621386
struct list_head active_mmu_pages;
1363-
/*
1364-
* A list of kvm_mmu_page structs that, if zapped, could possibly be
1365-
* replaced by an NX huge page. A shadow page is on this list if its
1366-
* existence disallows an NX huge page (nx_huge_page_disallowed is set)
1367-
* and there are no other conditions that prevent a huge page, e.g.
1368-
* the backing host page is huge, dirtly logging is not enabled for its
1369-
* memslot, etc... Note, zapping shadow pages on this list doesn't
1370-
* guarantee an NX huge page will be created in its stead, e.g. if the
1371-
* guest attempts to execute from the region then KVM obviously can't
1372-
* create an NX huge page (without hanging the guest).
1373-
*/
1374-
struct list_head possible_nx_huge_pages;
1387+
struct kvm_possible_nx_huge_pages possible_nx_huge_pages[KVM_NR_MMU_TYPES];
13751388
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
13761389
struct kvm_page_track_notifier_head track_notifier_head;
13771390
#endif
@@ -1526,7 +1539,7 @@ struct kvm_arch {
15261539
* is held in read mode:
15271540
* - tdp_mmu_roots (above)
15281541
* - the link field of kvm_mmu_page structs used by the TDP MMU
1529-
* - possible_nx_huge_pages;
1542+
* - possible_nx_huge_pages[KVM_TDP_MMU];
15301543
* - the possible_nx_huge_page_link field of kvm_mmu_page structs used
15311544
* by the TDP MMU
15321545
* Because the lock is only taken within the MMU lock, strictly

arch/x86/kvm/mmu/mmu.c

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
776776
kvm_flush_remote_tlbs_gfn(kvm, gfn, PG_LEVEL_4K);
777777
}
778778

779-
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
779+
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
780+
enum kvm_mmu_type mmu_type)
780781
{
781782
/*
782783
* If it's possible to replace the shadow page with an NX huge page,
@@ -790,8 +791,9 @@ void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
790791
return;
791792

792793
++kvm->stat.nx_lpage_splits;
794+
++kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages;
793795
list_add_tail(&sp->possible_nx_huge_page_link,
794-
&kvm->arch.possible_nx_huge_pages);
796+
&kvm->arch.possible_nx_huge_pages[mmu_type].pages);
795797
}
796798

797799
static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
@@ -800,7 +802,7 @@ static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
800802
sp->nx_huge_page_disallowed = true;
801803

802804
if (nx_huge_page_possible)
803-
track_possible_nx_huge_page(kvm, sp);
805+
track_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU);
804806
}
805807

806808
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -819,20 +821,22 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
819821
kvm_mmu_gfn_allow_lpage(slot, gfn);
820822
}
821823

822-
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
824+
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
825+
enum kvm_mmu_type mmu_type)
823826
{
824827
if (list_empty(&sp->possible_nx_huge_page_link))
825828
return;
826829

827830
--kvm->stat.nx_lpage_splits;
831+
--kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages;
828832
list_del_init(&sp->possible_nx_huge_page_link);
829833
}
830834

831835
static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
832836
{
833837
sp->nx_huge_page_disallowed = false;
834838

835-
untrack_possible_nx_huge_page(kvm, sp);
839+
untrack_possible_nx_huge_page(kvm, sp, KVM_SHADOW_MMU);
836840
}
837841

838842
static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu,
@@ -6737,11 +6741,12 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
67376741

67386742
int kvm_mmu_init_vm(struct kvm *kvm)
67396743
{
6740-
int r;
6744+
int r, i;
67416745

67426746
kvm->arch.shadow_mmio_value = shadow_mmio_value;
67436747
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6744-
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
6748+
for (i = 0; i < KVM_NR_MMU_TYPES; ++i)
6749+
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages[i].pages);
67456750
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
67466751

67476752
if (tdp_mmu_enabled) {
@@ -7582,16 +7587,32 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
75827587
return err;
75837588
}
75847589

7585-
static void kvm_recover_nx_huge_pages(struct kvm *kvm)
7590+
static unsigned long nx_huge_pages_to_zap(struct kvm *kvm,
7591+
enum kvm_mmu_type mmu_type)
75867592
{
7587-
unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
7593+
unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages[mmu_type].nr_pages);
7594+
unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
7595+
7596+
return ratio ? DIV_ROUND_UP(pages, ratio) : 0;
7597+
}
7598+
7599+
static void kvm_recover_nx_huge_pages(struct kvm *kvm,
7600+
enum kvm_mmu_type mmu_type)
7601+
{
7602+
#ifdef CONFIG_X86_64
7603+
const bool is_tdp_mmu = mmu_type == KVM_TDP_MMU;
7604+
#else
7605+
const bool is_tdp_mmu = false;
7606+
#endif
7607+
unsigned long to_zap = nx_huge_pages_to_zap(kvm, mmu_type);
7608+
struct list_head *nx_huge_pages;
75887609
struct kvm_memory_slot *slot;
7589-
int rcu_idx;
75907610
struct kvm_mmu_page *sp;
7591-
unsigned int ratio;
75927611
LIST_HEAD(invalid_list);
75937612
bool flush = false;
7594-
ulong to_zap;
7613+
int rcu_idx;
7614+
7615+
nx_huge_pages = &kvm->arch.possible_nx_huge_pages[mmu_type].pages;
75957616

75967617
rcu_idx = srcu_read_lock(&kvm->srcu);
75977618
write_lock(&kvm->mmu_lock);
@@ -7603,10 +7624,8 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
76037624
*/
76047625
rcu_read_lock();
76057626

7606-
ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
7607-
to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
76087627
for ( ; to_zap; --to_zap) {
7609-
if (list_empty(&kvm->arch.possible_nx_huge_pages))
7628+
if (list_empty(nx_huge_pages))
76107629
break;
76117630

76127631
/*
@@ -7616,7 +7635,7 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
76167635
* the total number of shadow pages. And because the TDP MMU
76177636
* doesn't use active_mmu_pages.
76187637
*/
7619-
sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
7638+
sp = list_first_entry(nx_huge_pages,
76207639
struct kvm_mmu_page,
76217640
possible_nx_huge_page_link);
76227641
WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
@@ -7653,7 +7672,7 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
76537672

76547673
if (slot && kvm_slot_dirty_track_enabled(slot))
76557674
unaccount_nx_huge_page(kvm, sp);
7656-
else if (is_tdp_mmu_page(sp))
7675+
else if (is_tdp_mmu)
76577676
flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
76587677
else
76597678
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
@@ -7684,9 +7703,10 @@ static void kvm_nx_huge_page_recovery_worker_kill(void *data)
76847703
static bool kvm_nx_huge_page_recovery_worker(void *data)
76857704
{
76867705
struct kvm *kvm = data;
7706+
long remaining_time;
76877707
bool enabled;
76887708
uint period;
7689-
long remaining_time;
7709+
int i;
76907710

76917711
enabled = calc_nx_huge_pages_recovery_period(&period);
76927712
if (!enabled)
@@ -7701,7 +7721,8 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
77017721
}
77027722

77037723
__set_current_state(TASK_RUNNING);
7704-
kvm_recover_nx_huge_pages(kvm);
7724+
for (i = 0; i < KVM_NR_MMU_TYPES; ++i)
7725+
kvm_recover_nx_huge_pages(kvm, i);
77057726
kvm->arch.nx_huge_page_last = get_jiffies_64();
77067727
return true;
77077728
}

arch/x86/kvm/mmu/mmu_internal.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,9 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm,
416416
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
417417
void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_level);
418418

419-
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
420-
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
419+
void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
420+
enum kvm_mmu_type mmu_type);
421+
void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
422+
enum kvm_mmu_type mmu_type);
421423

422424
#endif /* __KVM_X86_MMU_INTERNAL_H */

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
355355

356356
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
357357
sp->nx_huge_page_disallowed = false;
358-
untrack_possible_nx_huge_page(kvm, sp);
358+
untrack_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU);
359359
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
360360
}
361361

@@ -1303,7 +1303,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
13031303
fault->req_level >= iter.level) {
13041304
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
13051305
if (sp->nx_huge_page_disallowed)
1306-
track_possible_nx_huge_page(kvm, sp);
1306+
track_possible_nx_huge_page(kvm, sp, KVM_TDP_MMU);
13071307
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
13081308
}
13091309
}

0 commit comments

Comments
 (0)