Skip to content

Commit 348153a

Browse files
committed
x86/mce/therm_throt: Mask out read-only and reserved MSR bits
jira LE-3201 Rebuild_History Non-Buildable kernel-rt-4.18.0-553.22.1.rt7.363.el8_10 commit-author Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> commit 5a43b87 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-rt-4.18.0-553.22.1.rt7.363.el8_10/5a43b87b.failed While writing to MSR IA32_THERM_STATUS/IA32_PKG_THERM_STATUS, avoid writing 1 to read only and reserved fields because updating some fields generates exception. [ bp: Vertically align for better readability. ] Fixes: f665620 ("x86/mce/therm_throt: Optimize notifications of thermal throttle") Reported-by: Dominik Brodowski <linux@dominikbrodowski.net> Tested-by: Dominik Brodowski <linux@dominikbrodowski.net> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: linux-edac <linux-edac@vger.kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: x86-ml <x86@kernel.org> Link: https://lkml.kernel.org/r/20191128150824.22413-1-srinivas.pandruvada@linux.intel.com (cherry picked from commit 5a43b87) Signed-off-by: Jonathan Maple <jmaple@ciq.com> # Conflicts: # drivers/thermal/intel/therm_throt.c
1 parent 598d507 commit 348153a

File tree

1 file changed

+154
-0
lines changed

1 file changed

+154
-0
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
x86/mce/therm_throt: Mask out read-only and reserved MSR bits
2+
3+
jira LE-3201
4+
Rebuild_History Non-Buildable kernel-rt-4.18.0-553.22.1.rt7.363.el8_10
5+
commit-author Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
6+
commit 5a43b87b3c62ad149ba6e9d0d3e5c0e5da02a5ca
7+
Empty-Commit: Cherry-Pick Conflicts during history rebuild.
8+
Will be included in final tarball splat. Ref for failed cherry-pick at:
9+
ciq/ciq_backports/kernel-rt-4.18.0-553.22.1.rt7.363.el8_10/5a43b87b.failed
10+
11+
While writing to MSR IA32_THERM_STATUS/IA32_PKG_THERM_STATUS, avoid
12+
writing 1 to read only and reserved fields because updating some fields
13+
generates exception.
14+
15+
[ bp: Vertically align for better readability. ]
16+
17+
Fixes: f6656208f04e ("x86/mce/therm_throt: Optimize notifications of thermal throttle")
18+
Reported-by: Dominik Brodowski <linux@dominikbrodowski.net>
19+
Tested-by: Dominik Brodowski <linux@dominikbrodowski.net>
20+
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
21+
Signed-off-by: Borislav Petkov <bp@suse.de>
22+
Cc: "H. Peter Anvin" <hpa@zytor.com>
23+
Cc: Ingo Molnar <mingo@redhat.com>
24+
Cc: linux-edac <linux-edac@vger.kernel.org>
25+
Cc: Thomas Gleixner <tglx@linutronix.de>
26+
Cc: Tony Luck <tony.luck@intel.com>
27+
Cc: x86-ml <x86@kernel.org>
28+
Link: https://lkml.kernel.org/r/20191128150824.22413-1-srinivas.pandruvada@linux.intel.com
29+
(cherry picked from commit 5a43b87b3c62ad149ba6e9d0d3e5c0e5da02a5ca)
30+
Signed-off-by: Jonathan Maple <jmaple@ciq.com>
31+
32+
# Conflicts:
33+
# drivers/thermal/intel/therm_throt.c
34+
diff --cc drivers/thermal/intel/therm_throt.c
35+
index dd55d96efeff,b38010b541d6..000000000000
36+
--- a/drivers/thermal/intel/therm_throt.c
37+
+++ b/drivers/thermal/intel/therm_throt.c
38+
@@@ -134,6 -192,112 +134,115 @@@ static const struct attribute_group the
39+
#define CORE_LEVEL 0
40+
#define PACKAGE_LEVEL 1
41+
42+
++<<<<<<< HEAD:drivers/thermal/intel/therm_throt.c
43+
++=======
44+
+ #define THERM_THROT_POLL_INTERVAL HZ
45+
+ #define THERM_STATUS_PROCHOT_LOG BIT(1)
46+
+
47+
+ #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
48+
+ #define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
49+
+
50+
+ static void clear_therm_status_log(int level)
51+
+ {
52+
+ int msr;
53+
+ u64 mask, msr_val;
54+
+
55+
+ if (level == CORE_LEVEL) {
56+
+ msr = MSR_IA32_THERM_STATUS;
57+
+ mask = THERM_STATUS_CLEAR_CORE_MASK;
58+
+ } else {
59+
+ msr = MSR_IA32_PACKAGE_THERM_STATUS;
60+
+ mask = THERM_STATUS_CLEAR_PKG_MASK;
61+
+ }
62+
+
63+
+ rdmsrl(msr, msr_val);
64+
+ msr_val &= mask;
65+
+ wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
66+
+ }
67+
+
68+
+ static void get_therm_status(int level, bool *proc_hot, u8 *temp)
69+
+ {
70+
+ int msr;
71+
+ u64 msr_val;
72+
+
73+
+ if (level == CORE_LEVEL)
74+
+ msr = MSR_IA32_THERM_STATUS;
75+
+ else
76+
+ msr = MSR_IA32_PACKAGE_THERM_STATUS;
77+
+
78+
+ rdmsrl(msr, msr_val);
79+
+ if (msr_val & THERM_STATUS_PROCHOT_LOG)
80+
+ *proc_hot = true;
81+
+ else
82+
+ *proc_hot = false;
83+
+
84+
+ *temp = (msr_val >> 16) & 0x7F;
85+
+ }
86+
+
87+
+ static void throttle_active_work(struct work_struct *work)
88+
+ {
89+
+ struct _thermal_state *state = container_of(to_delayed_work(work),
90+
+ struct _thermal_state, therm_work);
91+
+ unsigned int i, avg, this_cpu = smp_processor_id();
92+
+ u64 now = get_jiffies_64();
93+
+ bool hot;
94+
+ u8 temp;
95+
+
96+
+ get_therm_status(state->level, &hot, &temp);
97+
+ /* temperature value is offset from the max so lesser means hotter */
98+
+ if (!hot && temp > state->baseline_temp) {
99+
+ if (state->rate_control_active)
100+
+ pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
101+
+ this_cpu,
102+
+ state->level == CORE_LEVEL ? "Core" : "Package",
103+
+ state->count);
104+
+
105+
+ state->rate_control_active = false;
106+
+ return;
107+
+ }
108+
+
109+
+ if (time_before64(now, state->next_check) &&
110+
+ state->rate_control_active)
111+
+ goto re_arm;
112+
+
113+
+ state->next_check = now + CHECK_INTERVAL;
114+
+
115+
+ if (state->count != state->last_count) {
116+
+ /* There was one new thermal interrupt */
117+
+ state->last_count = state->count;
118+
+ state->average = 0;
119+
+ state->sample_count = 0;
120+
+ state->sample_index = 0;
121+
+ }
122+
+
123+
+ state->temp_samples[state->sample_index] = temp;
124+
+ state->sample_count++;
125+
+ state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
126+
+ if (state->sample_count < ARRAY_SIZE(state->temp_samples))
127+
+ goto re_arm;
128+
+
129+
+ avg = 0;
130+
+ for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
131+
+ avg += state->temp_samples[i];
132+
+
133+
+ avg /= ARRAY_SIZE(state->temp_samples);
134+
+
135+
+ if (state->average > avg) {
136+
+ pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
137+
+ this_cpu,
138+
+ state->level == CORE_LEVEL ? "Core" : "Package",
139+
+ state->count);
140+
+ state->rate_control_active = true;
141+
+ }
142+
+
143+
+ state->average = avg;
144+
+
145+
+ re_arm:
146+
+ clear_therm_status_log(state->level);
147+
+ schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
148+
+ }
149+
+
150+
++>>>>>>> 5a43b87b3c62 (x86/mce/therm_throt: Mask out read-only and reserved MSR bits):arch/x86/kernel/cpu/mce/therm_throt.c
151+
/***
152+
* therm_throt_process - Process thermal throttling event from interrupt
153+
* @curr: Whether the condition is current or not (boolean), since the
154+
* Unmerged path drivers/thermal/intel/therm_throt.c

0 commit comments

Comments
 (0)