Skip to content

Commit 4e90cf8

Browse files
committed
Merge: sched/isolation: Prevent boot crash when the boot CPU is nohz_full
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/4811 sched/isolation: Prevent boot crash when the boot CPU is nohz_full JIRA: https://issues.redhat.com/browse/RHEL-30589 Signed-off-by: Oleg Nesterov <oleg@redhat.com> Approved-by: Valentin Schneider <vschneid@redhat.com> Approved-by: Chris von Recklinghausen <crecklin@redhat.com> Approved-by: Phil Auld <pauld@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Lucas Zampieri <lzampier@redhat.com>
2 parents f1f4a07 + c0408f4 commit 4e90cf8

File tree

3 files changed

+32
-35
lines changed

3 files changed

+32
-35
lines changed

Documentation/timers/no_hz.rst

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain
129129
online to handle timekeeping tasks in order to ensure that system
130130
calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
131131
(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
132-
user processes to observe slight drifts in clock rate.) Therefore, the
133-
boot CPU is prohibited from entering adaptive-ticks mode. Specifying a
134-
"nohz_full=" mask that includes the boot CPU will result in a boot-time
135-
error message, and the boot CPU will be removed from the mask. Note that
136-
this means that your system must have at least two CPUs in order for
132+
user processes to observe slight drifts in clock rate.) Note that this
133+
means that your system must have at least two CPUs in order for
137134
CONFIG_NO_HZ_FULL=y to do anything for you.
138135

139136
Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.

kernel/sched/isolation.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type)
4646
if (cpu < nr_cpu_ids)
4747
return cpu;
4848

49-
return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
49+
cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
50+
if (likely(cpu < nr_cpu_ids))
51+
return cpu;
52+
/*
53+
* Unless we have another problem this can only happen
54+
* at boot time before start_secondary() brings the 1st
55+
* housekeeping CPU up.
56+
*/
57+
WARN_ON_ONCE(system_state == SYSTEM_RUNNING ||
58+
type != HK_TYPE_TIMER);
5059
}
5160
}
5261
return smp_processor_id();
@@ -109,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type,
109118
static int __init housekeeping_setup(char *str, unsigned long flags)
110119
{
111120
cpumask_var_t non_housekeeping_mask, housekeeping_staging;
121+
unsigned int first_cpu;
112122
int err = 0;
113123

114124
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
@@ -129,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
129139
cpumask_andnot(housekeeping_staging,
130140
cpu_possible_mask, non_housekeeping_mask);
131141

132-
if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
142+
first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging);
143+
if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) {
133144
__cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
134145
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
135146
if (!housekeeping.flags) {
@@ -138,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
138149
}
139150
}
140151

152+
if (cpumask_empty(non_housekeeping_mask))
153+
goto free_housekeeping_staging;
154+
141155
if (!housekeeping.flags) {
142156
/* First setup call ("nohz_full=" or "isolcpus=") */
143157
enum hk_type type;

kernel/time/tick-common.c

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -179,26 +179,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
179179
}
180180
}
181181

182-
#ifdef CONFIG_NO_HZ_FULL
183-
static void giveup_do_timer(void *info)
184-
{
185-
int cpu = *(unsigned int *)info;
186-
187-
WARN_ON(tick_do_timer_cpu != smp_processor_id());
188-
189-
tick_do_timer_cpu = cpu;
190-
}
191-
192-
static void tick_take_do_timer_from_boot(void)
193-
{
194-
int cpu = smp_processor_id();
195-
int from = tick_do_timer_boot_cpu;
196-
197-
if (from >= 0 && from != cpu)
198-
smp_call_function_single(from, giveup_do_timer, &cpu, 1);
199-
}
200-
#endif
201-
202182
/*
203183
* Setup the tick device
204184
*/
@@ -222,19 +202,25 @@ static void tick_setup_device(struct tick_device *td,
222202
tick_next_period = ktime_get();
223203
#ifdef CONFIG_NO_HZ_FULL
224204
/*
225-
* The boot CPU may be nohz_full, in which case set
226-
* tick_do_timer_boot_cpu so the first housekeeping
227-
* secondary that comes up will take do_timer from
228-
* us.
205+
* The boot CPU may be nohz_full, in which case the
206+
* first housekeeping secondary will take do_timer()
207+
* from it.
229208
*/
230209
if (tick_nohz_full_cpu(cpu))
231210
tick_do_timer_boot_cpu = cpu;
232211

233-
} else if (tick_do_timer_boot_cpu != -1 &&
234-
!tick_nohz_full_cpu(cpu)) {
235-
tick_take_do_timer_from_boot();
212+
} else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) {
236213
tick_do_timer_boot_cpu = -1;
237-
WARN_ON(tick_do_timer_cpu != cpu);
214+
/*
215+
* The boot CPU will stay in periodic (NOHZ disabled)
216+
* mode until clocksource_done_booting() called after
217+
* smp_init() selects a high resolution clocksource and
218+
* timekeeping_notify() kicks the NOHZ stuff alive.
219+
*
220+
* So this WRITE_ONCE can only race with the READ_ONCE
221+
* check in tick_periodic() but this race is harmless.
222+
*/
223+
WRITE_ONCE(tick_do_timer_cpu, cpu);
238224
#endif
239225
}
240226

0 commit comments

Comments
 (0)