Skip to content

Commit b8a154b

Browse files
committed
Merge: smp: Fix soft lockup problem in hard lockup test
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6855 JIRA: https://issues.redhat.com/browse/RHEL-16867 MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6855 The crasher tool mentioned in the Jira causes a deadlock in the CPU running the tool to cause a hard lockup panic. However, if another running task is sending IPIs to other CPUs and expecting responses from other CPUs at the same time, it may cause an avalanche of soft lockup messages in the kernel log which somehow prevents the watchdog from detecting the hard lockup. This problem is likely to be solved by backporting some upstream patches to reduce redundant soft lockup messages and eventually panic the system if the CSD lock get stuck for too long. As the problem isn't easy to reproduce, we can't easily verify if this MR can really fix the problem. Let hope for the best and create another bug if this happens again in the future. Signed-off-by: Waiman Long <longman@redhat.com> Approved-by: Rafael Aquini <raquini@redhat.com> Approved-by: Jay Shin <jaeshin@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Jan Stancek <jstancek@redhat.com>
2 parents 98c90ea + 100e22b commit b8a154b

File tree

2 files changed

+37
-14
lines changed

2 files changed

+37
-14
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5979,6 +5979,13 @@
59795979
This feature may be more efficiently disabled
59805980
using the csdlock_debug- kernel parameter.
59815981

5982+
smp.panic_on_ipistall= [KNL]
5983+
If a csd_lock_timeout extends for more than
5984+
the specified number of milliseconds, panic the
5985+
system. By default, let CSD-lock acquisition
5986+
take as long as they take. Specifying 300,000
5987+
for this value provides a 5-minute timeout.
5988+
59825989
smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
59835990
smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port
59845991
smsc-ircc2.ircc_sir= [HW] SIR base I/O port

kernel/smp.c

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,8 @@
3636

3737
#define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
3838

39-
struct cfd_percpu {
40-
call_single_data_t csd;
41-
};
42-
4339
struct call_function_data {
44-
struct cfd_percpu __percpu *pcpu;
40+
call_single_data_t __percpu *csd;
4541
cpumask_var_t cpumask;
4642
cpumask_var_t cpumask_ipi;
4743
};
@@ -50,6 +46,8 @@ static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
5046

5147
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
5248

49+
static DEFINE_PER_CPU(atomic_t, trigger_backtrace) = ATOMIC_INIT(1);
50+
5351
static void __flush_smp_call_function_queue(bool warn_cpu_offline);
5452

5553
int smpcfd_prepare_cpu(unsigned int cpu)
@@ -64,8 +62,8 @@ int smpcfd_prepare_cpu(unsigned int cpu)
6462
free_cpumask_var(cfd->cpumask);
6563
return -ENOMEM;
6664
}
67-
cfd->pcpu = alloc_percpu(struct cfd_percpu);
68-
if (!cfd->pcpu) {
65+
cfd->csd = alloc_percpu(call_single_data_t);
66+
if (!cfd->csd) {
6967
free_cpumask_var(cfd->cpumask);
7068
free_cpumask_var(cfd->cpumask_ipi);
7169
return -ENOMEM;
@@ -80,7 +78,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
8078

8179
free_cpumask_var(cfd->cpumask);
8280
free_cpumask_var(cfd->cpumask_ipi);
83-
free_percpu(cfd->pcpu);
81+
free_percpu(cfd->csd);
8482
return 0;
8583
}
8684

@@ -167,6 +165,8 @@ static DEFINE_PER_CPU(void *, cur_csd_info);
167165

168166
static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */
169167
module_param(csd_lock_timeout, ulong, 0444);
168+
static int panic_on_ipistall; /* CSD panic timeout in milliseconds, 300000 for five minutes. */
169+
module_param(panic_on_ipistall, int, 0444);
170170

171171
static atomic_t csd_bug_count = ATOMIC_INIT(0);
172172

@@ -227,6 +227,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
227227
}
228228

229229
ts2 = sched_clock();
230+
/* How long since we last checked for a stuck CSD lock.*/
230231
ts_delta = ts2 - *ts1;
231232
if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0))
232233
return false;
@@ -240,9 +241,17 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
240241
else
241242
cpux = cpu;
242243
cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
244+
/* How long since this CSD lock was stuck. */
245+
ts_delta = ts2 - ts0;
243246
pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
244-
firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
247+
firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts_delta,
245248
cpu, csd->func, csd->info);
249+
/*
250+
* If the CSD lock is still stuck after 5 minutes, it is unlikely
251+
* to become unstuck. Use a signed comparison to avoid triggering
252+
* on underflows when the TSC is out of sync between sockets.
253+
*/
254+
BUG_ON(panic_on_ipistall > 0 && (s64)ts_delta > ((s64)panic_on_ipistall * NSEC_PER_MSEC));
246255
if (cpu_cur_csd && csd != cpu_cur_csd) {
247256
pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
248257
*bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
@@ -252,13 +261,15 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
252261
*bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
253262
}
254263
if (cpu >= 0) {
255-
dump_cpu_task(cpu);
264+
if (atomic_cmpxchg_acquire(&per_cpu(trigger_backtrace, cpu), 1, 0))
265+
dump_cpu_task(cpu);
256266
if (!cpu_cur_csd) {
257267
pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
258268
arch_send_call_function_single_ipi(cpu);
259269
}
260270
}
261-
dump_stack();
271+
if (firsttime)
272+
dump_stack();
262273
*ts1 = ts2;
263274

264275
return false;
@@ -432,16 +443,21 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
432443
struct llist_node *entry, *prev;
433444
struct llist_head *head;
434445
static bool warned;
446+
atomic_t *tbt;
435447

436448
lockdep_assert_irqs_disabled();
437449

450+
/* Allow waiters to send backtrace NMI from here onwards */
451+
tbt = this_cpu_ptr(&trigger_backtrace);
452+
atomic_set_release(tbt, 1);
453+
438454
head = this_cpu_ptr(&call_single_queue);
439455
entry = llist_del_all(head);
440456
entry = llist_reverse_order(entry);
441457

442458
/* There shouldn't be any pending callbacks on an offline CPU. */
443459
if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
444-
!warned && !llist_empty(head))) {
460+
!warned && entry != NULL)) {
445461
warned = true;
446462
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
447463

@@ -780,7 +796,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
780796

781797
cpumask_clear(cfd->cpumask_ipi);
782798
for_each_cpu(cpu, cfd->cpumask) {
783-
call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
799+
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
784800

785801
if (cond_func && !cond_func(cpu, info)) {
786802
__cpumask_clear_cpu(cpu, cfd->cpumask);
@@ -828,7 +844,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
828844
for_each_cpu(cpu, cfd->cpumask) {
829845
call_single_data_t *csd;
830846

831-
csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
847+
csd = per_cpu_ptr(cfd->csd, cpu);
832848
csd_lock_wait(csd);
833849
}
834850
}

0 commit comments

Comments
 (0)