|
| 1 | +x86/mce/therm_throt: Mask out read-only and reserved MSR bits |
| 2 | + |
| 3 | +jira LE-3201 |
| 4 | +Rebuild_History Non-Buildable kernel-rt-4.18.0-553.22.1.rt7.363.el8_10 |
| 5 | +commit-author Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> |
| 6 | +commit 5a43b87b3c62ad149ba6e9d0d3e5c0e5da02a5ca |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-rt-4.18.0-553.22.1.rt7.363.el8_10/5a43b87b.failed |
| 10 | + |
| 11 | +While writing to MSR IA32_THERM_STATUS/IA32_PKG_THERM_STATUS, avoid |
| 12 | +writing 1 to read only and reserved fields because updating some fields |
| 13 | +generates exception. |
| 14 | + |
| 15 | + [ bp: Vertically align for better readability. ] |
| 16 | + |
| 17 | +Fixes: f6656208f04e ("x86/mce/therm_throt: Optimize notifications of thermal throttle") |
| 18 | + Reported-by: Dominik Brodowski <linux@dominikbrodowski.net> |
| 19 | + Tested-by: Dominik Brodowski <linux@dominikbrodowski.net> |
| 20 | + Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> |
| 21 | + Signed-off-by: Borislav Petkov <bp@suse.de> |
| 22 | + Cc: "H. Peter Anvin" <hpa@zytor.com> |
| 23 | + Cc: Ingo Molnar <mingo@redhat.com> |
| 24 | + Cc: linux-edac <linux-edac@vger.kernel.org> |
| 25 | + Cc: Thomas Gleixner <tglx@linutronix.de> |
| 26 | + Cc: Tony Luck <tony.luck@intel.com> |
| 27 | + Cc: x86-ml <x86@kernel.org> |
| 28 | +Link: https://lkml.kernel.org/r/20191128150824.22413-1-srinivas.pandruvada@linux.intel.com |
| 29 | +(cherry picked from commit 5a43b87b3c62ad149ba6e9d0d3e5c0e5da02a5ca) |
| 30 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 31 | + |
| 32 | +# Conflicts: |
| 33 | +# drivers/thermal/intel/therm_throt.c |
| 34 | +diff --cc drivers/thermal/intel/therm_throt.c |
| 35 | +index dd55d96efeff,b38010b541d6..000000000000 |
| 36 | +--- a/drivers/thermal/intel/therm_throt.c |
| 37 | ++++ b/drivers/thermal/intel/therm_throt.c |
| 38 | +@@@ -134,6 -192,112 +134,115 @@@ static const struct attribute_group the |
| 39 | + #define CORE_LEVEL 0 |
| 40 | + #define PACKAGE_LEVEL 1 |
| 41 | + |
| 42 | +++<<<<<<< HEAD:drivers/thermal/intel/therm_throt.c |
| 43 | +++======= |
| 44 | ++ #define THERM_THROT_POLL_INTERVAL HZ |
| 45 | ++ #define THERM_STATUS_PROCHOT_LOG BIT(1) |
| 46 | ++ |
| 47 | ++ #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15)) |
| 48 | ++ #define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)) |
| 49 | ++ |
| 50 | ++ static void clear_therm_status_log(int level) |
| 51 | ++ { |
| 52 | ++ int msr; |
| 53 | ++ u64 mask, msr_val; |
| 54 | ++ |
| 55 | ++ if (level == CORE_LEVEL) { |
| 56 | ++ msr = MSR_IA32_THERM_STATUS; |
| 57 | ++ mask = THERM_STATUS_CLEAR_CORE_MASK; |
| 58 | ++ } else { |
| 59 | ++ msr = MSR_IA32_PACKAGE_THERM_STATUS; |
| 60 | ++ mask = THERM_STATUS_CLEAR_PKG_MASK; |
| 61 | ++ } |
| 62 | ++ |
| 63 | ++ rdmsrl(msr, msr_val); |
| 64 | ++ msr_val &= mask; |
| 65 | ++ wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG); |
| 66 | ++ } |
| 67 | ++ |
| 68 | ++ static void get_therm_status(int level, bool *proc_hot, u8 *temp) |
| 69 | ++ { |
| 70 | ++ int msr; |
| 71 | ++ u64 msr_val; |
| 72 | ++ |
| 73 | ++ if (level == CORE_LEVEL) |
| 74 | ++ msr = MSR_IA32_THERM_STATUS; |
| 75 | ++ else |
| 76 | ++ msr = MSR_IA32_PACKAGE_THERM_STATUS; |
| 77 | ++ |
| 78 | ++ rdmsrl(msr, msr_val); |
| 79 | ++ if (msr_val & THERM_STATUS_PROCHOT_LOG) |
| 80 | ++ *proc_hot = true; |
| 81 | ++ else |
| 82 | ++ *proc_hot = false; |
| 83 | ++ |
| 84 | ++ *temp = (msr_val >> 16) & 0x7F; |
| 85 | ++ } |
| 86 | ++ |
| 87 | ++ static void throttle_active_work(struct work_struct *work) |
| 88 | ++ { |
| 89 | ++ struct _thermal_state *state = container_of(to_delayed_work(work), |
| 90 | ++ struct _thermal_state, therm_work); |
| 91 | ++ unsigned int i, avg, this_cpu = smp_processor_id(); |
| 92 | ++ u64 now = get_jiffies_64(); |
| 93 | ++ bool hot; |
| 94 | ++ u8 temp; |
| 95 | ++ |
| 96 | ++ get_therm_status(state->level, &hot, &temp); |
| 97 | ++ /* temperature value is offset from the max so lesser means hotter */ |
| 98 | ++ if (!hot && temp > state->baseline_temp) { |
| 99 | ++ if (state->rate_control_active) |
| 100 | ++ pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n", |
| 101 | ++ this_cpu, |
| 102 | ++ state->level == CORE_LEVEL ? "Core" : "Package", |
| 103 | ++ state->count); |
| 104 | ++ |
| 105 | ++ state->rate_control_active = false; |
| 106 | ++ return; |
| 107 | ++ } |
| 108 | ++ |
| 109 | ++ if (time_before64(now, state->next_check) && |
| 110 | ++ state->rate_control_active) |
| 111 | ++ goto re_arm; |
| 112 | ++ |
| 113 | ++ state->next_check = now + CHECK_INTERVAL; |
| 114 | ++ |
| 115 | ++ if (state->count != state->last_count) { |
| 116 | ++ /* There was one new thermal interrupt */ |
| 117 | ++ state->last_count = state->count; |
| 118 | ++ state->average = 0; |
| 119 | ++ state->sample_count = 0; |
| 120 | ++ state->sample_index = 0; |
| 121 | ++ } |
| 122 | ++ |
| 123 | ++ state->temp_samples[state->sample_index] = temp; |
| 124 | ++ state->sample_count++; |
| 125 | ++ state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples); |
| 126 | ++ if (state->sample_count < ARRAY_SIZE(state->temp_samples)) |
| 127 | ++ goto re_arm; |
| 128 | ++ |
| 129 | ++ avg = 0; |
| 130 | ++ for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i) |
| 131 | ++ avg += state->temp_samples[i]; |
| 132 | ++ |
| 133 | ++ avg /= ARRAY_SIZE(state->temp_samples); |
| 134 | ++ |
| 135 | ++ if (state->average > avg) { |
| 136 | ++ pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n", |
| 137 | ++ this_cpu, |
| 138 | ++ state->level == CORE_LEVEL ? "Core" : "Package", |
| 139 | ++ state->count); |
| 140 | ++ state->rate_control_active = true; |
| 141 | ++ } |
| 142 | ++ |
| 143 | ++ state->average = avg; |
| 144 | ++ |
| 145 | ++ re_arm: |
| 146 | ++ clear_therm_status_log(state->level); |
| 147 | ++ schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL); |
| 148 | ++ } |
| 149 | ++ |
| 150 | +++>>>>>>> 5a43b87b3c62 (x86/mce/therm_throt: Mask out read-only and reserved MSR bits):arch/x86/kernel/cpu/mce/therm_throt.c |
| 151 | + /*** |
| 152 | + * therm_throt_process - Process thermal throttling event from interrupt |
| 153 | + * @curr: Whether the condition is current or not (boolean), since the |
| 154 | +* Unmerged path drivers/thermal/intel/therm_throt.c |
0 commit comments