Skip to content

Commit 327eb70

Browse files
author
CKI KWF Bot
committed
Merge: Update intel_pstate to upstream 6.17
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7314 JIRA: https://issues.redhat.com/browse/RHEL-112493 intel_pstate is the cpu frequency driver for intel processors and needs to be updated regularly. Signed-off-by: David Arcari <darcari@redhat.com> Approved-by: Rafael Aquini <raquini@redhat.com> Approved-by: Lenny Szubowicz <lszubowi@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 7508830 + 306c769 commit 327eb70

File tree

9 files changed

+889
-221
lines changed

9 files changed

+889
-221
lines changed

drivers/base/arch_topology.c

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,6 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
152152
per_cpu(arch_freq_scale, i) = scale;
153153
}
154154

155-
DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
156-
EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
157-
158-
void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
159-
{
160-
per_cpu(cpu_scale, cpu) = capacity;
161-
}
162-
163155
DEFINE_PER_CPU(unsigned long, hw_pressure);
164156

165157
/**
@@ -205,39 +197,9 @@ void topology_update_hw_pressure(const struct cpumask *cpus,
205197
}
206198
EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
207199

208-
static ssize_t cpu_capacity_show(struct device *dev,
209-
struct device_attribute *attr,
210-
char *buf)
211-
{
212-
struct cpu *cpu = container_of(dev, struct cpu, dev);
213-
214-
return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
215-
}
216-
217200
static void update_topology_flags_workfn(struct work_struct *work);
218201
static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
219202

220-
static DEVICE_ATTR_RO(cpu_capacity);
221-
222-
static int register_cpu_capacity_sysctl(void)
223-
{
224-
int i;
225-
struct device *cpu;
226-
227-
for_each_possible_cpu(i) {
228-
cpu = get_cpu_device(i);
229-
if (!cpu) {
230-
pr_err("%s: too early to get CPU%d device!\n",
231-
__func__, i);
232-
continue;
233-
}
234-
device_create_file(cpu, &dev_attr_cpu_capacity);
235-
}
236-
237-
return 0;
238-
}
239-
subsys_initcall(register_cpu_capacity_sysctl);
240-
241203
static int update_topology;
242204

243205
int topology_update_cpu_topology(void)

drivers/base/topology.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,55 @@ static int __init topology_sysfs_init(void)
208208
}
209209

210210
device_initcall(topology_sysfs_init);
211+
212+
DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
213+
EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
214+
215+
void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
216+
{
217+
per_cpu(cpu_scale, cpu) = capacity;
218+
}
219+
220+
static ssize_t cpu_capacity_show(struct device *dev,
221+
struct device_attribute *attr,
222+
char *buf)
223+
{
224+
struct cpu *cpu = container_of(dev, struct cpu, dev);
225+
226+
return sysfs_emit(buf, "%lu\n", topology_get_cpu_scale(cpu->dev.id));
227+
}
228+
229+
static DEVICE_ATTR_RO(cpu_capacity);
230+
231+
static int cpu_capacity_sysctl_add(unsigned int cpu)
232+
{
233+
struct device *cpu_dev = get_cpu_device(cpu);
234+
235+
if (!cpu_dev)
236+
return -ENOENT;
237+
238+
device_create_file(cpu_dev, &dev_attr_cpu_capacity);
239+
240+
return 0;
241+
}
242+
243+
static int cpu_capacity_sysctl_remove(unsigned int cpu)
244+
{
245+
struct device *cpu_dev = get_cpu_device(cpu);
246+
247+
if (!cpu_dev)
248+
return -ENOENT;
249+
250+
device_remove_file(cpu_dev, &dev_attr_cpu_capacity);
251+
252+
return 0;
253+
}
254+
255+
static int register_cpu_capacity_sysctl(void)
256+
{
257+
cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity",
258+
cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove);
259+
260+
return 0;
261+
}
262+
subsys_initcall(register_cpu_capacity_sysctl);

drivers/cpufreq/cpufreq.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1587,7 +1587,7 @@ static int cpufreq_online(unsigned int cpu)
15871587

15881588
/*
15891589
* Register with the energy model before
1590-
* sugov_eas_rebuild_sd() is called, which will result
1590+
* em_rebuild_sched_domains() is called, which will result
15911591
* in rebuilding of the sched domains, which should only be done
15921592
* once the energy model is properly initialized for the policy
15931593
* first.

drivers/cpufreq/intel_pstate.c

Lines changed: 138 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <linux/bitfield.h>
3030
#include <trace/events/power.h>
3131
#include <linux/units.h>
32+
#include <linux/cacheinfo.h>
3233

3334
#include <asm/cpu.h>
3435
#include <asm/div64.h>
@@ -221,6 +222,7 @@ struct global_params {
221222
* @sched_flags: Store scheduler flags for possible cross CPU update
222223
* @hwp_boost_min: Last HWP boosted min performance
223224
* @suspended: Whether or not the driver has been suspended.
225+
* @pd_registered: Set when a perf domain is registered for this CPU.
224226
* @hwp_notify_work: workqueue for HWP notifications.
225227
*
226228
* This structure stores per CPU instance data for all CPUs.
@@ -260,6 +262,9 @@ struct cpudata {
260262
unsigned int sched_flags;
261263
u32 hwp_boost_min;
262264
bool suspended;
265+
#ifdef CONFIG_ENERGY_MODEL
266+
bool pd_registered;
267+
#endif
263268
struct delayed_work hwp_notify_work;
264269
};
265270

@@ -303,6 +308,7 @@ static bool hwp_is_hybrid;
303308

304309
static struct cpufreq_driver *intel_pstate_driver __read_mostly;
305310

311+
#define INTEL_PSTATE_CORE_SCALING 100000
306312
#define HYBRID_SCALING_FACTOR_ADL 78741
307313
#define HYBRID_SCALING_FACTOR_MTL 80000
308314
#define HYBRID_SCALING_FACTOR_LNL 86957
@@ -311,7 +317,7 @@ static int hybrid_scaling_factor;
311317

312318
static inline int core_get_scaling(void)
313319
{
314-
return 100000;
320+
return INTEL_PSTATE_CORE_SCALING;
315321
}
316322

317323
#ifdef CONFIG_ACPI
@@ -948,12 +954,124 @@ static struct cpudata *hybrid_max_perf_cpu __read_mostly;
948954
*/
949955
static DEFINE_MUTEX(hybrid_capacity_lock);
950956

957+
#ifdef CONFIG_ENERGY_MODEL
958+
#define HYBRID_EM_STATE_COUNT 4
959+
960+
static int hybrid_active_power(struct device *dev, unsigned long *power,
961+
unsigned long *freq)
962+
{
963+
/*
964+
* Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100%
965+
* of the maximum capacity such that two CPUs of the same type will be
966+
* regarded as equally attractive if the utilization of each of them
967+
* falls into the same bin, which should prevent tasks from being
968+
* migrated between them too often.
969+
*
970+
* For this purpose, return the "frequency" of 2 for the first
971+
* performance level and otherwise leave the value set by the caller.
972+
*/
973+
if (!*freq)
974+
*freq = 2;
975+
976+
/* No power information. */
977+
*power = EM_MAX_POWER;
978+
979+
return 0;
980+
}
981+
982+
static int hybrid_get_cost(struct device *dev, unsigned long freq,
983+
unsigned long *cost)
984+
{
985+
struct pstate_data *pstate = &all_cpu_data[dev->id]->pstate;
986+
struct cpu_cacheinfo *cacheinfo = get_cpu_cacheinfo(dev->id);
987+
988+
/*
989+
* The smaller the perf-to-frequency scaling factor, the larger the IPC
990+
* ratio between the given CPU and the least capable CPU in the system.
991+
* Regard that IPC ratio as the primary cost component and assume that
992+
* the scaling factors for different CPU types will differ by at least
993+
* 5% and they will not be above INTEL_PSTATE_CORE_SCALING.
994+
*
995+
* Add the freq value to the cost, so that the cost of running on CPUs
996+
* of the same type in different "utilization bins" is different.
997+
*/
998+
*cost = div_u64(100ULL * INTEL_PSTATE_CORE_SCALING, pstate->scaling) + freq;
999+
/*
1000+
* Increase the cost slightly for CPUs able to access L3 to avoid
1001+
* touching it in case some other CPUs of the same type can do the work
1002+
* without it.
1003+
*/
1004+
if (cacheinfo) {
1005+
unsigned int i;
1006+
1007+
/* Check if L3 cache is there. */
1008+
for (i = 0; i < cacheinfo->num_leaves; i++) {
1009+
if (cacheinfo->info_list[i].level == 3) {
1010+
*cost += 2;
1011+
break;
1012+
}
1013+
}
1014+
}
1015+
1016+
return 0;
1017+
}
1018+
1019+
static bool hybrid_register_perf_domain(unsigned int cpu)
1020+
{
1021+
static const struct em_data_callback cb
1022+
= EM_ADV_DATA_CB(hybrid_active_power, hybrid_get_cost);
1023+
struct cpudata *cpudata = all_cpu_data[cpu];
1024+
struct device *cpu_dev;
1025+
1026+
/*
1027+
* Registering EM perf domains without enabling asymmetric CPU capacity
1028+
* support is not really useful and one domain should not be registered
1029+
* more than once.
1030+
*/
1031+
if (!hybrid_max_perf_cpu || cpudata->pd_registered)
1032+
return false;
1033+
1034+
cpu_dev = get_cpu_device(cpu);
1035+
if (!cpu_dev)
1036+
return false;
1037+
1038+
if (em_dev_register_pd_no_update(cpu_dev, HYBRID_EM_STATE_COUNT, &cb,
1039+
cpumask_of(cpu), false))
1040+
return false;
1041+
1042+
cpudata->pd_registered = true;
1043+
1044+
return true;
1045+
}
1046+
1047+
static void hybrid_register_all_perf_domains(void)
1048+
{
1049+
unsigned int cpu;
1050+
1051+
for_each_online_cpu(cpu)
1052+
hybrid_register_perf_domain(cpu);
1053+
}
1054+
1055+
static void hybrid_update_perf_domain(struct cpudata *cpu)
1056+
{
1057+
if (cpu->pd_registered)
1058+
em_adjust_cpu_capacity(cpu->cpu);
1059+
}
1060+
#else /* !CONFIG_ENERGY_MODEL */
1061+
static inline bool hybrid_register_perf_domain(unsigned int cpu) { return false; }
1062+
static inline void hybrid_register_all_perf_domains(void) {}
1063+
static inline void hybrid_update_perf_domain(struct cpudata *cpu) {}
1064+
#endif /* CONFIG_ENERGY_MODEL */
1065+
9511066
static void hybrid_set_cpu_capacity(struct cpudata *cpu)
9521067
{
9531068
arch_set_cpu_capacity(cpu->cpu, cpu->capacity_perf,
9541069
hybrid_max_perf_cpu->capacity_perf,
9551070
cpu->capacity_perf,
9561071
cpu->pstate.max_pstate_physical);
1072+
hybrid_update_perf_domain(cpu);
1073+
1074+
topology_set_cpu_scale(cpu->cpu, arch_scale_cpu_capacity(cpu->cpu));
9571075

9581076
pr_debug("CPU%d: perf = %u, max. perf = %u, base perf = %d\n", cpu->cpu,
9591077
cpu->capacity_perf, hybrid_max_perf_cpu->capacity_perf,
@@ -1042,6 +1160,11 @@ static void hybrid_refresh_cpu_capacity_scaling(void)
10421160
guard(mutex)(&hybrid_capacity_lock);
10431161

10441162
__hybrid_refresh_cpu_capacity_scaling();
1163+
/*
1164+
* Perf domains are not registered before setting hybrid_max_perf_cpu,
1165+
* so register them all after setting up CPU capacity scaling.
1166+
*/
1167+
hybrid_register_all_perf_domains();
10451168
}
10461169

10471170
static void hybrid_init_cpu_capacity_scaling(bool refresh)
@@ -1069,7 +1192,7 @@ static void hybrid_init_cpu_capacity_scaling(bool refresh)
10691192
hybrid_refresh_cpu_capacity_scaling();
10701193
/*
10711194
* Disabling ITMT causes sched domains to be rebuilt to disable asym
1072-
* packing and enable asym capacity.
1195+
* packing and enable asym capacity and EAS.
10731196
*/
10741197
sched_clear_itmt_support();
10751198
}
@@ -1147,6 +1270,14 @@ static void hybrid_update_capacity(struct cpudata *cpu)
11471270
}
11481271

11491272
hybrid_set_cpu_capacity(cpu);
1273+
/*
1274+
* If the CPU was offline to start with and it is going online for the
1275+
* first time, a perf domain needs to be registered for it if hybrid
1276+
* capacity scaling has been enabled already. In that case, sched
1277+
* domains need to be rebuilt to take the new perf domain into account.
1278+
*/
1279+
if (hybrid_register_perf_domain(cpu->cpu))
1280+
em_rebuild_sched_domains();
11501281

11511282
unlock:
11521283
mutex_unlock(&hybrid_capacity_lock);
@@ -2656,6 +2787,8 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
26562787
X86_MATCH(INTEL_TIGERLAKE, core_funcs),
26572788
X86_MATCH(INTEL_SAPPHIRERAPIDS_X, core_funcs),
26582789
X86_MATCH(INTEL_EMERALDRAPIDS_X, core_funcs),
2790+
X86_MATCH(INTEL_GRANITERAPIDS_D, core_funcs),
2791+
X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs),
26592792
{}
26602793
};
26612794
MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
@@ -2672,6 +2805,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
26722805
X86_MATCH(INTEL_GRANITERAPIDS_X, core_funcs),
26732806
X86_MATCH(INTEL_ATOM_CRESTMONT, core_funcs),
26742807
X86_MATCH(INTEL_ATOM_CRESTMONT_X, core_funcs),
2808+
X86_MATCH(INTEL_ATOM_DARKMONT_X, core_funcs),
26752809
{}
26762810
};
26772811
#endif
@@ -3130,8 +3264,8 @@ static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy,
31303264
int max_pstate = policy->strict_target ?
31313265
target_pstate : cpu->max_perf_ratio;
31323266

3133-
intel_cpufreq_hwp_update(cpu, target_pstate, max_pstate, 0,
3134-
fast_switch);
3267+
intel_cpufreq_hwp_update(cpu, target_pstate, max_pstate,
3268+
target_pstate, fast_switch);
31353269
} else if (target_pstate != old_pstate) {
31363270
intel_cpufreq_perf_ctl_update(cpu, target_pstate, fast_switch);
31373271
}

include/linux/arch_topology.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,6 @@ int topology_update_cpu_topology(void);
1414
struct device_node;
1515
bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
1616

17-
DECLARE_PER_CPU(unsigned long, cpu_scale);
18-
19-
static inline unsigned long topology_get_cpu_scale(int cpu)
20-
{
21-
return per_cpu(cpu_scale, cpu);
22-
}
23-
24-
void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
2517

2618
DECLARE_PER_CPU(unsigned long, capacity_freq_ref);
2719

0 commit comments

Comments
 (0)