@@ -876,7 +876,7 @@ static struct kvm_memory_slot *gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu
876876 */
877877#define KVM_RMAP_LOCKED BIT(1)
878878
879- static unsigned long kvm_rmap_lock (struct kvm_rmap_head * rmap_head )
879+ static unsigned long __kvm_rmap_lock (struct kvm_rmap_head * rmap_head )
880880{
881881 unsigned long old_val , new_val ;
882882
@@ -914,7 +914,7 @@ static unsigned long kvm_rmap_lock(struct kvm_rmap_head *rmap_head)
914914 /*
915915 * Use try_cmpxchg_acquire() to prevent reads and writes to the rmap
916916 * from being reordered outside of the critical section created by
917- * kvm_rmap_lock ().
917+ * __kvm_rmap_lock ().
918918 *
919919 * Pairs with the atomic_long_set_release() in kvm_rmap_unlock().
920920 *
@@ -923,39 +923,92 @@ static unsigned long kvm_rmap_lock(struct kvm_rmap_head *rmap_head)
923923 */
924924 } while (!atomic_long_try_cmpxchg_acquire (& rmap_head -> val , & old_val , new_val ));
925925
926- /* Return the old value, i.e. _without_ the LOCKED bit set. */
926+ /*
927+ * Return the old value, i.e. _without_ the LOCKED bit set. It's
928+ * impossible for the return value to be 0 (see above), i.e. the read-
929+ * only unlock flow can't get a false positive and fail to unlock.
930+ */
927931 return old_val ;
928932}
929933
930- static void kvm_rmap_unlock (struct kvm_rmap_head * rmap_head ,
931- unsigned long new_val )
934+ static unsigned long kvm_rmap_lock (struct kvm * kvm ,
935+ struct kvm_rmap_head * rmap_head )
936+ {
937+ lockdep_assert_held_write (& kvm -> mmu_lock );
938+
939+ return __kvm_rmap_lock (rmap_head );
940+ }
941+
942+ static void __kvm_rmap_unlock (struct kvm_rmap_head * rmap_head ,
943+ unsigned long val )
932944{
933- WARN_ON_ONCE ( new_val & KVM_RMAP_LOCKED );
945+ KVM_MMU_WARN_ON ( val & KVM_RMAP_LOCKED );
934946 /*
935947 * Ensure that all accesses to the rmap have completed before unlocking
936948 * the rmap.
937949 *
938- * Pairs with the atomic_long_try_cmpxchg_acquire() in kvm_rmap_lock .
950+ * Pairs with the atomic_long_try_cmpxchg_acquire() in __kvm_rmap_lock() .
939951 */
940- atomic_long_set_release (& rmap_head -> val , new_val );
952+ atomic_long_set_release (& rmap_head -> val , val );
953+ }
954+
955+ static void kvm_rmap_unlock (struct kvm * kvm ,
956+ struct kvm_rmap_head * rmap_head ,
957+ unsigned long new_val )
958+ {
959+ lockdep_assert_held_write (& kvm -> mmu_lock );
960+
961+ __kvm_rmap_unlock (rmap_head , new_val );
941962}
942963
943964static unsigned long kvm_rmap_get (struct kvm_rmap_head * rmap_head )
944965{
945966 return atomic_long_read (& rmap_head -> val ) & ~KVM_RMAP_LOCKED ;
946967}
947968
969+ /*
970+ * If mmu_lock isn't held, rmaps can only be locked in read-only mode. The
971+ * actual locking is the same, but the caller is disallowed from modifying the
972+ * rmap, and so the unlock flow is a nop if the rmap is/was empty.
973+ */
974+ __maybe_unused
975+ static unsigned long kvm_rmap_lock_readonly (struct kvm_rmap_head * rmap_head )
976+ {
977+ unsigned long rmap_val ;
978+
979+ preempt_disable ();
980+ rmap_val = __kvm_rmap_lock (rmap_head );
981+
982+ if (!rmap_val )
983+ preempt_enable ();
984+
985+ return rmap_val ;
986+ }
987+
988+ __maybe_unused
989+ static void kvm_rmap_unlock_readonly (struct kvm_rmap_head * rmap_head ,
990+ unsigned long old_val )
991+ {
992+ if (!old_val )
993+ return ;
994+
995+ KVM_MMU_WARN_ON (old_val != kvm_rmap_get (rmap_head ));
996+
997+ __kvm_rmap_unlock (rmap_head , old_val );
998+ preempt_enable ();
999+ }
1000+
9481001/*
9491002 * Returns the number of pointers in the rmap chain, not counting the new one.
9501003 */
951- static int pte_list_add (struct kvm_mmu_memory_cache * cache , u64 * spte ,
952- struct kvm_rmap_head * rmap_head )
1004+ static int pte_list_add (struct kvm * kvm , struct kvm_mmu_memory_cache * cache ,
1005+ u64 * spte , struct kvm_rmap_head * rmap_head )
9531006{
9541007 unsigned long old_val , new_val ;
9551008 struct pte_list_desc * desc ;
9561009 int count = 0 ;
9571010
958- old_val = kvm_rmap_lock (rmap_head );
1011+ old_val = kvm_rmap_lock (kvm , rmap_head );
9591012
9601013 if (!old_val ) {
9611014 new_val = (unsigned long )spte ;
@@ -987,7 +1040,7 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
9871040 desc -> sptes [desc -> spte_count ++ ] = spte ;
9881041 }
9891042
990- kvm_rmap_unlock (rmap_head , new_val );
1043+ kvm_rmap_unlock (kvm , rmap_head , new_val );
9911044
9921045 return count ;
9931046}
@@ -1035,7 +1088,7 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
10351088 unsigned long rmap_val ;
10361089 int i ;
10371090
1038- rmap_val = kvm_rmap_lock (rmap_head );
1091+ rmap_val = kvm_rmap_lock (kvm , rmap_head );
10391092 if (KVM_BUG_ON_DATA_CORRUPTION (!rmap_val , kvm ))
10401093 goto out ;
10411094
@@ -1061,7 +1114,7 @@ static void pte_list_remove(struct kvm *kvm, u64 *spte,
10611114 }
10621115
10631116out :
1064- kvm_rmap_unlock (rmap_head , rmap_val );
1117+ kvm_rmap_unlock (kvm , rmap_head , rmap_val );
10651118}
10661119
10671120static void kvm_zap_one_rmap_spte (struct kvm * kvm ,
@@ -1079,7 +1132,7 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
10791132 unsigned long rmap_val ;
10801133 int i ;
10811134
1082- rmap_val = kvm_rmap_lock (rmap_head );
1135+ rmap_val = kvm_rmap_lock (kvm , rmap_head );
10831136 if (!rmap_val )
10841137 return false;
10851138
@@ -1098,7 +1151,7 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
10981151 }
10991152out :
11001153 /* rmap_head is meaningless now, remember to reset it */
1101- kvm_rmap_unlock (rmap_head , 0 );
1154+ kvm_rmap_unlock (kvm , rmap_head , 0 );
11021155 return true;
11031156}
11041157
@@ -1171,23 +1224,18 @@ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
11711224 struct rmap_iterator * iter )
11721225{
11731226 unsigned long rmap_val = kvm_rmap_get (rmap_head );
1174- u64 * sptep ;
11751227
11761228 if (!rmap_val )
11771229 return NULL ;
11781230
11791231 if (!(rmap_val & KVM_RMAP_MANY )) {
11801232 iter -> desc = NULL ;
1181- sptep = (u64 * )rmap_val ;
1182- goto out ;
1233+ return (u64 * )rmap_val ;
11831234 }
11841235
11851236 iter -> desc = (struct pte_list_desc * )(rmap_val & ~KVM_RMAP_MANY );
11861237 iter -> pos = 0 ;
1187- sptep = iter -> desc -> sptes [iter -> pos ];
1188- out :
1189- BUG_ON (!is_shadow_present_pte (* sptep ));
1190- return sptep ;
1238+ return iter -> desc -> sptes [iter -> pos ];
11911239}
11921240
11931241/*
@@ -1197,35 +1245,36 @@ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
11971245 */
11981246static u64 * rmap_get_next (struct rmap_iterator * iter )
11991247{
1200- u64 * sptep ;
1201-
12021248 if (iter -> desc ) {
12031249 if (iter -> pos < PTE_LIST_EXT - 1 ) {
12041250 ++ iter -> pos ;
1205- sptep = iter -> desc -> sptes [iter -> pos ];
1206- if (sptep )
1207- goto out ;
1251+ if (iter -> desc -> sptes [iter -> pos ])
1252+ return iter -> desc -> sptes [iter -> pos ];
12081253 }
12091254
12101255 iter -> desc = iter -> desc -> more ;
12111256
12121257 if (iter -> desc ) {
12131258 iter -> pos = 0 ;
12141259 /* desc->sptes[0] cannot be NULL */
1215- sptep = iter -> desc -> sptes [iter -> pos ];
1216- goto out ;
1260+ return iter -> desc -> sptes [iter -> pos ];
12171261 }
12181262 }
12191263
12201264 return NULL ;
1221- out :
1222- BUG_ON (!is_shadow_present_pte (* sptep ));
1223- return sptep ;
12241265}
12251266
1226- #define for_each_rmap_spte (_rmap_head_ , _iter_ , _spte_ ) \
1227- for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \
1228- _spte_; _spte_ = rmap_get_next(_iter_))
1267+ #define __for_each_rmap_spte (_rmap_head_ , _iter_ , _sptep_ ) \
1268+ for (_sptep_ = rmap_get_first(_rmap_head_, _iter_); \
1269+ _sptep_; _sptep_ = rmap_get_next(_iter_))
1270+
1271+ #define for_each_rmap_spte (_rmap_head_ , _iter_ , _sptep_ ) \
1272+ __for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \
1273+ if (!WARN_ON_ONCE(!is_shadow_present_pte(*(_sptep_)))) \
1274+
1275+ #define for_each_rmap_spte_lockless (_rmap_head_ , _iter_ , _sptep_ , _spte_ ) \
1276+ __for_each_rmap_spte(_rmap_head_, _iter_, _sptep_) \
1277+ if (is_shadow_present_pte(_spte_ = mmu_spte_get_lockless(sptep)))
12291278
12301279static void drop_spte (struct kvm * kvm , u64 * sptep )
12311280{
@@ -1311,12 +1360,13 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
13111360 struct rmap_iterator iter ;
13121361 bool flush = false;
13131362
1314- for_each_rmap_spte (rmap_head , & iter , sptep )
1363+ for_each_rmap_spte (rmap_head , & iter , sptep ) {
13151364 if (spte_ad_need_write_protect (* sptep ))
13161365 flush |= test_and_clear_bit (PT_WRITABLE_SHIFT ,
13171366 (unsigned long * )sptep );
13181367 else
13191368 flush |= spte_clear_dirty (sptep );
1369+ }
13201370
13211371 return flush ;
13221372}
@@ -1637,7 +1687,7 @@ static void __rmap_add(struct kvm *kvm,
16371687 kvm_update_page_stats (kvm , sp -> role .level , 1 );
16381688
16391689 rmap_head = gfn_to_rmap (gfn , sp -> role .level , slot );
1640- rmap_count = pte_list_add (cache , spte , rmap_head );
1690+ rmap_count = pte_list_add (kvm , cache , spte , rmap_head );
16411691
16421692 if (rmap_count > kvm -> stat .max_mmu_rmap_size )
16431693 kvm -> stat .max_mmu_rmap_size = rmap_count ;
@@ -1771,13 +1821,14 @@ static unsigned kvm_page_table_hashfn(gfn_t gfn)
17711821 return hash_64 (gfn , KVM_MMU_HASH_SHIFT );
17721822}
17731823
1774- static void mmu_page_add_parent_pte (struct kvm_mmu_memory_cache * cache ,
1824+ static void mmu_page_add_parent_pte (struct kvm * kvm ,
1825+ struct kvm_mmu_memory_cache * cache ,
17751826 struct kvm_mmu_page * sp , u64 * parent_pte )
17761827{
17771828 if (!parent_pte )
17781829 return ;
17791830
1780- pte_list_add (cache , parent_pte , & sp -> parent_ptes );
1831+ pte_list_add (kvm , cache , parent_pte , & sp -> parent_ptes );
17811832}
17821833
17831834static void mmu_page_remove_parent_pte (struct kvm * kvm , struct kvm_mmu_page * sp ,
@@ -2467,7 +2518,7 @@ static void __link_shadow_page(struct kvm *kvm,
24672518
24682519 mmu_spte_set (sptep , spte );
24692520
2470- mmu_page_add_parent_pte (cache , sp , sptep );
2521+ mmu_page_add_parent_pte (kvm , cache , sp , sptep );
24712522
24722523 /*
24732524 * The non-direct sub-pagetable must be updated before linking. For
0 commit comments