Skip to content

Commit ea57386

Browse files
committed
powerpc/watchdog: help remote CPUs to flush NMI printk output
jira LE-1907 Rebuild_History Non-Buildable kernel-rt-5.14.0-284.30.1.rt14.315.el9_2 commit-author Nicholas Piggin <npiggin@gmail.com> commit e012c49 The printk layer at the moment does not seem to have a good way to force flush printk messages that are created in NMI context, except in the panic path. NMI-context printk messages normally get to the console with irq_work, but that won't help if the CPU is stuck with irqs disabled, as can be the case for hard lockup watchdog messages. The watchdog currently flushes the printk buffers after detecting a lockup on remote CPUs, but they may not have processed their NMI IPI yet by that stage, or they may have self-detected a lockup in which case they won't go via this NMI IPI path. Improve the situation by having NMI-context mark a flag if it called printk, and have watchdog timer interrupts check if that flag was set and try to flush if it was. Latency is not a big problem because we were already stuck for a while, just need to try to make sure the messages eventually make it out. Depends-on: 5d5e452 ("printk: restore flushing of NMI buffers on remote CPUs after NMI backtraces") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Laurent Dufour <ldufour@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20211119113146.752759-6-npiggin@gmail.com (cherry picked from commit e012c49) Signed-off-by: Jonathan Maple <jmaple@ciq.com>
1 parent 484b2b3 commit ea57386

File tree

1 file changed

+31
-6
lines changed

1 file changed

+31
-6
lines changed

arch/powerpc/kernel/watchdog.c

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ static DEFINE_PER_CPU(u64, wd_timer_tb);
9090
/* SMP checker bits */
9191
static unsigned long __wd_smp_lock;
9292
static unsigned long __wd_reporting;
93+
static unsigned long __wd_nmi_output;
9394
static cpumask_t wd_smp_cpus_pending;
9495
static cpumask_t wd_smp_cpus_stuck;
9596
static u64 wd_smp_last_reset_tb;
@@ -158,6 +159,23 @@ static void wd_lockup_ipi(struct pt_regs *regs)
158159
else
159160
dump_stack();
160161

162+
/*
163+
* __wd_nmi_output must be set after we printk from NMI context.
164+
*
165+
* printk from NMI context defers printing to the console to irq_work.
166+
* If that NMI was taken in some code that is hard-locked, then irqs
167+
* are disabled so irq_work will never fire. That can result in the
168+
* hard lockup messages being delayed (indefinitely, until something
169+
* else kicks the console drivers).
170+
*
171+
* Setting __wd_nmi_output will cause another CPU to notice and kick
172+
* the console drivers for us.
173+
*
174+
* xchg is not needed here (it could be a smp_mb and store), but xchg
175+
* gives the memory ordering and atomicity required.
176+
*/
177+
xchg(&__wd_nmi_output, 1);
178+
161179
/* Do not panic from here because that can recurse into NMI IPI layer */
162180
}
163181

@@ -231,12 +249,6 @@ static void watchdog_smp_panic(int cpu)
231249
cpumask_clear(&wd_smp_cpus_ipi);
232250
}
233251

234-
/*
235-
* Force flush any remote buffers that might be stuck in IRQ context
236-
* and therefore could not run their irq_work.
237-
*/
238-
printk_trigger_flush();
239-
240252
if (hardlockup_panic)
241253
nmi_panic(NULL, "Hard LOCKUP");
242254

@@ -341,6 +353,17 @@ static void watchdog_timer_interrupt(int cpu)
341353

342354
if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
343355
watchdog_smp_panic(cpu);
356+
357+
if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
358+
/*
359+
* Something has called printk from NMI context. It might be
360+
* stuck, so this this triggers a flush that will get that
361+
* printk output to the console.
362+
*
363+
* See wd_lockup_ipi.
364+
*/
365+
printk_trigger_flush();
366+
}
344367
}
345368

346369
DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
@@ -390,6 +413,8 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
390413
print_irqtrace_events(current);
391414
show_regs(regs);
392415

416+
xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
417+
393418
if (sysctl_hardlockup_all_cpu_backtrace)
394419
trigger_allbutself_cpu_backtrace();
395420

0 commit comments

Comments
 (0)