|
| 1 | +x86/mce/therm_throt: Undo thermal polling properly on CPU offline |
| 2 | + |
| 3 | +jira LE-3201 |
| 4 | +Rebuild_History Non-Buildable kernel-rt-4.18.0-553.22.1.rt7.363.el8_10 |
| 5 | +commit-author Thomas Gleixner <tglx@linutronix.de> |
| 6 | +commit d364847eed890211444ad74496bb549f838c6018 |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-rt-4.18.0-553.22.1.rt7.363.el8_10/d364847e.failed |
| 10 | + |
| 11 | +Chris Wilson reported splats from running the thermal throttling |
| 12 | +workqueue callback on offlined CPUs. The problem is that that callback |
| 13 | +should not even run on offlined CPUs but it happens nevertheless because |
| 14 | +the offlining callback thermal_throttle_offline() does not symmetrically |
| 15 | +undo the setup work done in its onlining counterpart. IOW, |
| 16 | + |
| 17 | + 1. The thermal interrupt vector should be masked out before ... |
| 18 | + |
| 19 | + 2. ... cancelling any pending work synchronously so that no new work is |
| 20 | + enqueued anymore. |
| 21 | + |
| 22 | +Do those things and fix the issue properly. |
| 23 | + |
| 24 | + [ bp: Write commit message. ] |
| 25 | + |
| 26 | +Fixes: f6656208f04e ("x86/mce/therm_throt: Optimize notifications of thermal throttle") |
| 27 | + Reported-by: Chris Wilson <chris@chris-wilson.co.uk> |
| 28 | + Tested-by: Pandruvada, Srinivas <srinivas.pandruvada@linux.intel.com> |
| 29 | + Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| 30 | + Signed-off-by: Borislav Petkov <bp@suse.de> |
| 31 | +Link: https://lkml.kernel.org/r/158120068234.18291.7938335950259651295@skylake-alporthouse-com |
| 32 | +(cherry picked from commit d364847eed890211444ad74496bb549f838c6018) |
| 33 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 34 | + |
| 35 | +# Conflicts: |
| 36 | +# drivers/thermal/intel/therm_throt.c |
| 37 | +diff --cc drivers/thermal/intel/therm_throt.c |
| 38 | +index 4845b553feb3,f36dc0742085..000000000000 |
| 39 | +--- a/drivers/thermal/intel/therm_throt.c |
| 40 | ++++ b/drivers/thermal/intel/therm_throt.c |
| 41 | +@@@ -287,9 -484,19 +287,22 @@@ static int thermal_throttle_online(unsi |
| 42 | + |
| 43 | + static int thermal_throttle_offline(unsigned int cpu) |
| 44 | + { |
| 45 | + - struct thermal_state *state = &per_cpu(thermal_state, cpu); |
| 46 | + struct device *dev = get_cpu_device(cpu); |
| 47 | ++ u32 l; |
| 48 | + |
| 49 | +++<<<<<<< HEAD:drivers/thermal/intel/therm_throt.c |
| 50 | + + intel_hfi_offline(cpu); |
| 51 | +++======= |
| 52 | ++ /* Mask the thermal vector before draining evtl. pending work */ |
| 53 | ++ l = apic_read(APIC_LVTTHMR); |
| 54 | ++ apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED); |
| 55 | ++ |
| 56 | ++ cancel_delayed_work_sync(&state->package_throttle.therm_work); |
| 57 | ++ cancel_delayed_work_sync(&state->core_throttle.therm_work); |
| 58 | ++ |
| 59 | ++ state->package_throttle.rate_control_active = false; |
| 60 | ++ state->core_throttle.rate_control_active = false; |
| 61 | +++>>>>>>> d364847eed89 (x86/mce/therm_throt: Undo thermal polling properly on CPU offline):arch/x86/kernel/cpu/mce/therm_throt.c |
| 62 | + |
| 63 | + thermal_throttle_remove_dev(dev); |
| 64 | + return 0; |
| 65 | +* Unmerged path drivers/thermal/intel/therm_throt.c |
0 commit comments