Skip to content

Commit 4d4c0d8

Browse files
committed
pci/aer_inject: switching inject_lock to raw_spinlock_t
When injecting AER errors under PREEMPT_RT, the kernel may trigger a lockdep warning about an invalid wait context: ``` [ 1850.950780] [ BUG: Invalid wait context ] [ 1850.951152] 6.17.0-11316-g7a405dbb0f03-dirty Rust-for-Linux#7 Not tainted [ 1850.951457] ----------------------------- [ 1850.951680] irq/16-PCIe PME/56 is trying to lock: [ 1850.952004] ffff800082865238 (inject_lock){+.+.}-{3:3}, at: aer_inj_read_config+0x38/0x1dc [ 1850.952731] other info that might help us debug this: [ 1850.952997] context-{5:5} [ 1850.953192] 5 locks held by irq/16-PCIe PME/56: [ 1850.953415] #0: ffff800082647390 (local_bh){.+.+}-{1:3}, at: __local_bh_disable_ip+0x30/0x268 [ 1850.953931] Rust-for-Linux#1: ffff8000826c6b38 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire+0x4/0x48 [ 1850.954453] Rust-for-Linux#2: ffff000004bb6c58 (&data->lock){+...}-{3:3}, at: pcie_pme_irq+0x34/0xc4 [ 1850.954949] Rust-for-Linux#3: ffff8000826c6b38 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire+0x4/0x48 [ 1850.955420] Rust-for-Linux#4: ffff800082863d10 (pci_lock){....}-{2:2}, at: pci_bus_read_config_dword+0x5c/0xd8 ``` This happens because the AER injection path (`aer_inj_read_config()`) is called in the context of the PCIe PME interrupt thread, which runs through `irq_forced_thread_fn()` under PREEMPT_RT. In this context, `pci_lock` (a raw_spinlock_t) is held with interrupts disabled (`spin_lock_irqsave()`), and then `aer_inj_read_config()` tries to acquire `inject_lock`, which is a `rt_spin_lock`. (Thanks Waiman Long) `rt_spin_lock` may sleep, so acquiring it while holding a raw spinlock with IRQs disabled violates the lock ordering rules. This leads to the “Invalid wait context” lockdep warning. In other words, the lock order looks like this: ``` raw_spin_lock_irqsave(&pci_lock); ↓ rt_spin_lock(&inject_lock); <-- not allowed ``` To fix this, convert `inject_lock` from an `rt_spin_lock` to a `raw_spinlock_t`, a raw spinlock is safe and consistent with the surrounding locking scheme. This resolves the lockdep “Invalid wait context” warning observed when injecting correctable AER errors through `/dev/aer_inject` on PREEMPT_RT. This was discovered while testing PCIe AER error injection on an arm64 QEMU virtual machine: ``` qemu-system-aarch64 \ -nographic \ -machine virt,highmem=off,gic-version=3 \ -cpu cortex-a72 \ -kernel arch/arm64/boot/Image \ -initrd initramfs.cpio.gz \ -append "console=ttyAMA0 root=/dev/ram rdinit=/linuxrc earlyprintk nokaslr" \ -m 2G \ -smp 1 \ -netdev user,id=net0,hostfwd=tcp::2223-:22 \ -device virtio-net-pci,netdev=net0 \ -device pcie-root-port,id=rp0,chassis=1,slot=0x0 \ -device pci-testdev -s -S ``` Injecting a correctable PCIe error via /dev/aer_inject caused a BUG report with "Invalid wait context" in the irq/PCIe thread. ``` ~ # export HEX="00020000000000000100000000000000000000000000000000000000" ~ # echo -n "$HEX" | xxd -r -p | tee /dev/aer_inject >/dev/null [ 1850.947170] pcieport 0000:00:02.0: aer_inject: Injecting errors 00000001/00000000 into device 0000:00:02.0 [ 1850.949951] [ 1850.950479] ============================= [ 1850.950780] [ BUG: Invalid wait context ] [ 1850.951152] 6.17.0-11316-g7a405dbb0f03-dirty Rust-for-Linux#7 Not tainted [ 1850.951457] ----------------------------- [ 1850.951680] irq/16-PCIe PME/56 is trying to lock: [ 1850.952004] ffff800082865238 (inject_lock){+.+.}-{3:3}, at: aer_inj_read_config+0x38/0x1dc [ 1850.952731] other info that might help us debug this: [ 1850.952997] context-{5:5} [ 1850.953192] 5 locks held by irq/16-PCIe PME/56: [ 1850.953415] #0: ffff800082647390 (local_bh){.+.+}-{1:3}, at: __local_bh_disable_ip+0x30/0x268 [ 1850.953931] Rust-for-Linux#1: ffff8000826c6b38 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire+0x4/0x48 [ 1850.954453] Rust-for-Linux#2: ffff000004bb6c58 (&data->lock){+...}-{3:3}, at: pcie_pme_irq+0x34/0xc4 [ 1850.954949] Rust-for-Linux#3: ffff8000826c6b38 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire+0x4/0x48 [ 1850.955420] Rust-for-Linux#4: ffff800082863d10 (pci_lock){....}-{2:2}, at: pci_bus_read_config_dword+0x5c/0xd8 [ 1850.955932] stack backtrace: [ 1850.956412] CPU: 0 UID: 0 PID: 56 Comm: irq/16-PCIe PME Not tainted 6.17.0-11316-g7a405dbb0f03-dirty Rust-for-Linux#7 PREEMPT_{RT,(full)} [ 1850.957039] Hardware name: linux,dummy-virt (DT) [ 1850.957409] Call trace: [ 1850.957727] show_stack+0x18/0x24 (C) [ 1850.958089] dump_stack_lvl+0x40/0xbc [ 1850.958339] dump_stack+0x18/0x24 [ 1850.958586] __lock_acquire+0xa84/0x3008 [ 1850.958907] lock_acquire+0x128/0x2a8 [ 1850.959171] rt_spin_lock+0x50/0x1b8 [ 1850.959476] aer_inj_read_config+0x38/0x1dc [ 1850.959821] pci_bus_read_config_dword+0x80/0xd8 [ 1850.960079] pcie_capability_read_dword+0xac/0xd8 [ 1850.960454] pcie_pme_irq+0x44/0xc4 [ 1850.960728] irq_forced_thread_fn+0x30/0x94 [ 1850.960984] irq_thread+0x1ac/0x3a4 [ 1850.961308] kthread+0x1b4/0x208 [ 1850.961557] ret_from_fork+0x10/0x20 [ 1850.963088] pcieport 0000:00:02.0: AER: Correctable error message received from 0000:00:02.0 [ 1850.963330] pcieport 0000:00:02.0: PCIe Bus Error: severity=Correctable, type=Physical Layer, (Receiver ID) [ 1850.963351] pcieport 0000:00:02.0: device [1b36:000c] error status/mask=00000001/0000e000 [ 1850.963385] pcieport 0000:00:02.0: [ 0] RxErr (First) ``` Signed-off-by: Guangbo Cui <jckeep.cuiguangbo@gmail.com>
1 parent fd94619 commit 4d4c0d8

File tree

1 file changed

+11
-21
lines changed

1 file changed

+11
-21
lines changed

drivers/pci/pcie/aer_inject.c

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ static LIST_HEAD(einjected);
7272
static LIST_HEAD(pci_bus_ops_list);
7373

7474
/* Protect einjected and pci_bus_ops_list */
75-
static DEFINE_SPINLOCK(inject_lock);
75+
static DEFINE_RAW_SPINLOCK(inject_lock);
7676

7777
static void aer_error_init(struct aer_error *err, u32 domain,
7878
unsigned int bus, unsigned int devfn,
@@ -123,15 +123,13 @@ static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus)
123123

124124
static struct pci_bus_ops *pci_bus_ops_pop(void)
125125
{
126-
unsigned long flags;
127126
struct pci_bus_ops *bus_ops;
128127

129-
spin_lock_irqsave(&inject_lock, flags);
128+
guard(raw_spinlock_irqsave)(&inject_lock);
130129
bus_ops = list_first_entry_or_null(&pci_bus_ops_list,
131130
struct pci_bus_ops, list);
132131
if (bus_ops)
133132
list_del(&bus_ops->list);
134-
spin_unlock_irqrestore(&inject_lock, flags);
135133
return bus_ops;
136134
}
137135

@@ -219,11 +217,10 @@ static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
219217
{
220218
u32 *sim;
221219
struct aer_error *err;
222-
unsigned long flags;
223220
int domain;
224221
int rv;
225222

226-
spin_lock_irqsave(&inject_lock, flags);
223+
guard(raw_spinlock_irqsave)(&inject_lock);
227224
if (size != sizeof(u32))
228225
goto out;
229226
domain = pci_domain_nr(bus);
@@ -236,12 +233,10 @@ static int aer_inj_read_config(struct pci_bus *bus, unsigned int devfn,
236233
sim = find_pci_config_dword(err, where, NULL);
237234
if (sim) {
238235
*val = *sim;
239-
spin_unlock_irqrestore(&inject_lock, flags);
240236
return 0;
241237
}
242238
out:
243239
rv = aer_inj_read(bus, devfn, where, size, val);
244-
spin_unlock_irqrestore(&inject_lock, flags);
245240
return rv;
246241
}
247242

@@ -250,12 +245,11 @@ static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
250245
{
251246
u32 *sim;
252247
struct aer_error *err;
253-
unsigned long flags;
254248
int rw1cs;
255249
int domain;
256250
int rv;
257251

258-
spin_lock_irqsave(&inject_lock, flags);
252+
guard(raw_spinlock_irqsave)(&inject_lock);
259253
if (size != sizeof(u32))
260254
goto out;
261255
domain = pci_domain_nr(bus);
@@ -271,12 +265,10 @@ static int aer_inj_write_config(struct pci_bus *bus, unsigned int devfn,
271265
*sim ^= val;
272266
else
273267
*sim = val;
274-
spin_unlock_irqrestore(&inject_lock, flags);
275268
return 0;
276269
}
277270
out:
278271
rv = aer_inj_write(bus, devfn, where, size, val);
279-
spin_unlock_irqrestore(&inject_lock, flags);
280272
return rv;
281273
}
282274

@@ -304,14 +296,14 @@ static int pci_bus_set_aer_ops(struct pci_bus *bus)
304296
if (!bus_ops)
305297
return -ENOMEM;
306298
ops = pci_bus_set_ops(bus, &aer_inj_pci_ops);
307-
spin_lock_irqsave(&inject_lock, flags);
299+
raw_spin_lock_irqsave(&inject_lock, flags);
308300
if (ops == &aer_inj_pci_ops)
309301
goto out;
310302
pci_bus_ops_init(bus_ops, bus, ops);
311303
list_add(&bus_ops->list, &pci_bus_ops_list);
312304
bus_ops = NULL;
313305
out:
314-
spin_unlock_irqrestore(&inject_lock, flags);
306+
raw_spin_unlock_irqrestore(&inject_lock, flags);
315307
kfree(bus_ops);
316308
return 0;
317309
}
@@ -383,7 +375,7 @@ static int aer_inject(struct aer_error_inj *einj)
383375
uncor_mask);
384376
}
385377

386-
spin_lock_irqsave(&inject_lock, flags);
378+
raw_spin_lock_irqsave(&inject_lock, flags);
387379

388380
err = __find_aer_error_by_dev(dev);
389381
if (!err) {
@@ -404,14 +396,14 @@ static int aer_inject(struct aer_error_inj *einj)
404396
!(einj->cor_status & ~cor_mask)) {
405397
ret = -EINVAL;
406398
pci_warn(dev, "The correctable error(s) is masked by device\n");
407-
spin_unlock_irqrestore(&inject_lock, flags);
399+
raw_spin_unlock_irqrestore(&inject_lock, flags);
408400
goto out_put;
409401
}
410402
if (!aer_mask_override && einj->uncor_status &&
411403
!(einj->uncor_status & ~uncor_mask)) {
412404
ret = -EINVAL;
413405
pci_warn(dev, "The uncorrectable error(s) is masked by device\n");
414-
spin_unlock_irqrestore(&inject_lock, flags);
406+
raw_spin_unlock_irqrestore(&inject_lock, flags);
415407
goto out_put;
416408
}
417409

@@ -445,7 +437,7 @@ static int aer_inject(struct aer_error_inj *einj)
445437
rperr->source_id &= 0x0000ffff;
446438
rperr->source_id |= PCI_DEVID(einj->bus, devfn) << 16;
447439
}
448-
spin_unlock_irqrestore(&inject_lock, flags);
440+
raw_spin_unlock_irqrestore(&inject_lock, flags);
449441

450442
if (aer_mask_override) {
451443
pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK,
@@ -523,7 +515,6 @@ static int __init aer_inject_init(void)
523515
static void __exit aer_inject_exit(void)
524516
{
525517
struct aer_error *err, *err_next;
526-
unsigned long flags;
527518
struct pci_bus_ops *bus_ops;
528519

529520
misc_deregister(&aer_inject_device);
@@ -533,12 +524,11 @@ static void __exit aer_inject_exit(void)
533524
kfree(bus_ops);
534525
}
535526

536-
spin_lock_irqsave(&inject_lock, flags);
527+
guard(raw_spinlock_irqsave)(&inject_lock);
537528
list_for_each_entry_safe(err, err_next, &einjected, list) {
538529
list_del(&err->list);
539530
kfree(err);
540531
}
541-
spin_unlock_irqrestore(&inject_lock, flags);
542532
}
543533

544534
module_init(aer_inject_init);

0 commit comments

Comments
 (0)