Skip to content

Commit 8e088d3

Browse files
author
Herton R. Krzesinski
committed
Merge: rtmutex: Add acquire semantics for rtmutex lock acquisition slow path
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/1934 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1c0908d8e441631f5b8ba433523cf39339ee2ba0 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2163507 Conflicts: Corrected minor context diff This change is needed for the automotive kernel that runs on aarch64. commit 1c0908d Author: Mel Gorman <mgorman@techsingularity.net> Date: Fri Dec 2 10:02:23 2022 +0000 rtmutex: Add acquire semantics for rtmutex lock acquisition slow path Jan Kara reported the following bug triggering on 6.0.5-rt14 running dbench on XFS on arm64. kernel BUG at fs/inode.c:625! Internal error: Oops - BUG: 0 [ctrliq#1] PREEMPT_RT SMP CPU: 11 PID: 6611 Comm: dbench Tainted: G E 6.0.0-rt14-rt+ ctrliq#1 pc : clear_inode+0xa0/0xc0 lr : clear_inode+0x38/0xc0 Call trace: clear_inode+0xa0/0xc0 evict+0x160/0x180 iput+0x154/0x240 do_unlinkat+0x184/0x300 __arm64_sys_unlinkat+0x48/0xc0 el0_svc_common.constprop.4+0xe4/0x2c0 do_el0_svc+0xac/0x100 el0_svc+0x78/0x200 el0t_64_sync_handler+0x9c/0xc0 el0t_64_sync+0x19c/0x1a0 It also affects 6.1-rc7-rt5 and affects a preempt-rt fork of 5.14 so this is likely a bug that existed forever and only became visible when ARM support was added to preempt-rt. The same problem does not occur on x86-64 and he also reported that converting sb->s_inode_wblist_lock to raw_spinlock_t makes the problem disappear indicating that the RT spinlock variant is the problem. Which in turn means that RT mutexes on ARM64 and any other weakly ordered architecture are affected by this independent of RT. Will Deacon observed: "I'd be more inclined to be suspicious of the slowpath tbh, as we need to make sure that we have acquire semantics on all paths where the lock can be taken. Looking at the rtmutex code, this really isn't obvious to me -- for example, try_to_take_rt_mutex() appears to be able to return via the 'takeit' label without acquire semantics and it looks like we might be relying on the caller's subsequent _unlock_ of the wait_lock for ordering, but that will give us release semantics which aren't correct." Sebastian Andrzej Siewior prototyped a fix that does work based on that comment but it was a little bit overkill and added some fences that should not be necessary. The lock owner is updated with an IRQ-safe raw spinlock held, but the spin_unlock does not provide acquire semantics which are needed when acquiring a mutex. Adds the necessary acquire semantics for lock owner updates in the slow path acquisition and the waiter bit logic. It successfully completed 10 iterations of the dbench workload while the vanilla kernel fails on the first iteration. [ bigeasy@linutronix.de: Initial prototype fix ] Fixes: 700318d ("locking/rtmutex: Use acquire/release semantics") Fixes: 23f78d4 ("[PATCH] pi-futex: rt mutex core") Reported-by: Jan Kara <jack@suse.cz> Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221202100223.6mevpbl7i6x5udfd@techsingularity.net Signed-off-by: Brian Masney <bmasney@redhat.com> Approved-by: Andrew Halaney <ahalaney@redhat.com> Approved-by: Waiman Long <longman@redhat.com> Approved-by: Eric Chanudet <echanude@redhat.com> Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
2 parents 50ec555 + c3a495f commit 8e088d3

File tree

2 files changed

+49
-12
lines changed

2 files changed

+49
-12
lines changed

kernel/locking/rtmutex.c

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,31 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
8787
* set this bit before looking at the lock.
8888
*/
8989

90-
static __always_inline void
91-
rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
90+
static __always_inline struct task_struct *
91+
rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
9292
{
9393
unsigned long val = (unsigned long)owner;
9494

9595
if (rt_mutex_has_waiters(lock))
9696
val |= RT_MUTEX_HAS_WAITERS;
9797

98-
WRITE_ONCE(lock->owner, (struct task_struct *)val);
98+
return (struct task_struct *)val;
99+
}
100+
101+
static __always_inline void
102+
rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
103+
{
104+
/*
105+
* lock->wait_lock is held but explicit acquire semantics are needed
106+
* for a new lock owner so WRITE_ONCE is insufficient.
107+
*/
108+
xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
109+
}
110+
111+
static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
112+
{
113+
/* lock->wait_lock is held so the unlock provides release semantics. */
114+
WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
99115
}
100116

101117
static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
@@ -104,7 +120,8 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
104120
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
105121
}
106122

107-
static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
123+
static __always_inline void
124+
fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
108125
{
109126
unsigned long owner, *p = (unsigned long *) &lock->owner;
110127

@@ -170,8 +187,21 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
170187
* still set.
171188
*/
172189
owner = READ_ONCE(*p);
173-
if (owner & RT_MUTEX_HAS_WAITERS)
174-
WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
190+
if (owner & RT_MUTEX_HAS_WAITERS) {
191+
/*
192+
* See rt_mutex_set_owner() and rt_mutex_clear_owner() on
193+
* why xchg_acquire() is used for updating owner for
194+
* locking and WRITE_ONCE() for unlocking.
195+
*
196+
* WRITE_ONCE() would work for the acquire case too, but
197+
* in case that the lock acquisition failed it might
198+
* force other lockers into the slow path unnecessarily.
199+
*/
200+
if (acquire_lock)
201+
xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
202+
else
203+
WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
204+
}
175205
}
176206

177207
/*
@@ -206,6 +236,13 @@ static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
206236
owner = *p;
207237
} while (cmpxchg_relaxed(p, owner,
208238
owner | RT_MUTEX_HAS_WAITERS) != owner);
239+
240+
/*
241+
* The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
242+
* operations in the event of contention. Ensure the successful
243+
* cmpxchg is visible.
244+
*/
245+
smp_mb__after_atomic();
209246
}
210247

211248
/*
@@ -1241,7 +1278,7 @@ static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
12411278
* try_to_take_rt_mutex() sets the lock waiters bit
12421279
* unconditionally. Clean this up.
12431280
*/
1244-
fixup_rt_mutex_waiters(lock);
1281+
fixup_rt_mutex_waiters(lock, true);
12451282

12461283
return ret;
12471284
}
@@ -1600,7 +1637,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
16001637
* try_to_take_rt_mutex() sets the waiter bit
16011638
* unconditionally. We might have to fix that up.
16021639
*/
1603-
fixup_rt_mutex_waiters(lock);
1640+
fixup_rt_mutex_waiters(lock, true);
16041641
return ret;
16051642
}
16061643

@@ -1710,7 +1747,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
17101747
* try_to_take_rt_mutex() sets the waiter bit unconditionally.
17111748
* We might have to fix that up:
17121749
*/
1713-
fixup_rt_mutex_waiters(lock);
1750+
fixup_rt_mutex_waiters(lock, true);
17141751
debug_rt_mutex_free_waiter(&waiter);
17151752
}
17161753

kernel/locking/rtmutex_api.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
267267
void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
268268
{
269269
debug_rt_mutex_proxy_unlock(lock);
270-
rt_mutex_set_owner(lock, NULL);
270+
rt_mutex_clear_owner(lock);
271271
}
272272

273273
/**
@@ -382,7 +382,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
382382
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
383383
* have to fix that up.
384384
*/
385-
fixup_rt_mutex_waiters(lock);
385+
fixup_rt_mutex_waiters(lock, true);
386386
raw_spin_unlock_irq(&lock->wait_lock);
387387

388388
return ret;
@@ -438,7 +438,7 @@ bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
438438
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
439439
* have to fix that up.
440440
*/
441-
fixup_rt_mutex_waiters(lock);
441+
fixup_rt_mutex_waiters(lock, false);
442442

443443
raw_spin_unlock_irq(&lock->wait_lock);
444444

0 commit comments

Comments
 (0)