Skip to content

Commit 7d36d7c

Browse files
author
Herton R. Krzesinski
committed
Merge: update cpufreq to v6.0
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/1478 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2122311 Commits for Tegra and other less supported platforms were applied out of order, to make it easier to remove them. The large number of commits for Qualcomm and MediaTek were not applied at all. Signed-off-by: Mark Langsdorf <mlangsdo@redhat.com> Approved-by: David Arcari <darcari@redhat.com> Approved-by: Prarit Bhargava <prarit@redhat.com> Approved-by: David Airlie <airlied@redhat.com> Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
2 parents c3967b3 + 4563376 commit 7d36d7c

29 files changed

+765
-296
lines changed

Documentation/power/energy-model.rst

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ More details about the above APIs can be found in include/linux/energy_model.h.
129129
3. Example driver
130130
-----------------
131131

132+
The CPUFreq framework supports dedicated callback for registering
133+
the EM for a given CPU(s) 'policy' object: cpufreq_driver::register_em().
134+
That callback has to be implemented properly for a given driver,
135+
because the framework would call it at the right time during setup.
132136
This section provides a simple example of a CPUFreq driver registering a
133137
performance domain in the Energy Model framework using the (fake) 'foo'
134138
protocol. The driver implements an est_power() function to be provided to the
@@ -158,25 +162,22 @@ EM framework::
158162
20 return 0;
159163
21 }
160164
22
161-
23 static int foo_cpufreq_init(struct cpufreq_policy *policy)
165+
23 static void foo_cpufreq_register_em(struct cpufreq_policy *policy)
162166
24 {
163167
25 struct em_data_callback em_cb = EM_DATA_CB(est_power);
164168
26 struct device *cpu_dev;
165-
27 int nr_opp, ret;
169+
27 int nr_opp;
166170
28
167171
29 cpu_dev = get_cpu_device(cpumask_first(policy->cpus));
168172
30
169-
31 /* Do the actual CPUFreq init work ... */
170-
32 ret = do_foo_cpufreq_init(policy);
171-
33 if (ret)
172-
34 return ret;
173-
35
174-
36 /* Find the number of OPPs for this policy */
175-
37 nr_opp = foo_get_nr_opp(policy);
173+
31 /* Find the number of OPPs for this policy */
174+
32 nr_opp = foo_get_nr_opp(policy);
175+
33
176+
34 /* And register the new performance domain */
177+
35 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus,
178+
36 true);
179+
37 }
176180
38
177-
39 /* And register the new performance domain */
178-
40 em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb, policy->cpus,
179-
41 true);
180-
42
181-
43 return 0;
182-
44 }
181+
39 static struct cpufreq_driver foo_cpufreq_driver = {
182+
40 .register_em = foo_cpufreq_register_em,
183+
41 };

arch/arm64/kernel/smp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
539539
{
540540
return &cpu_madt_gicc[cpu];
541541
}
542+
EXPORT_SYMBOL_GPL(acpi_cpu_get_madt_gicc);
542543

543544
/*
544545
* acpi_map_gic_cpu_interface - parse processor MADT entry

drivers/base/arch_topology.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
151151
}
152152

153153
DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
154+
EXPORT_PER_CPU_SYMBOL_GPL(cpu_scale);
154155

155156
void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
156157
{

drivers/cpufreq/acpi-cpufreq.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ static bool boost_state(unsigned int cpu)
7878

7979
switch (boot_cpu_data.x86_vendor) {
8080
case X86_VENDOR_INTEL:
81+
case X86_VENDOR_CENTAUR:
82+
case X86_VENDOR_ZHAOXIN:
8183
rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
8284
msr = lo | ((u64)hi << 32);
8385
return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
@@ -97,6 +99,8 @@ static int boost_set_msr(bool enable)
9799

98100
switch (boot_cpu_data.x86_vendor) {
99101
case X86_VENDOR_INTEL:
102+
case X86_VENDOR_CENTAUR:
103+
case X86_VENDOR_ZHAOXIN:
100104
msr_addr = MSR_IA32_MISC_ENABLE;
101105
msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
102106
break;

drivers/cpufreq/cppc_cpufreq.c

Lines changed: 190 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ static struct cppc_workaround_oem_info wa_info[] = {
6161
}
6262
};
6363

64+
static struct cpufreq_driver cppc_cpufreq_driver;
65+
6466
#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
6567

6668
/* Frequency invariance support */
@@ -75,7 +77,6 @@ struct cppc_freq_invariance {
7577
static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
7678
static struct kthread_worker *kworker_fie;
7779

78-
static struct cpufreq_driver cppc_cpufreq_driver;
7980
static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
8081
static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
8182
struct cppc_perf_fb_ctrs *fb_ctrs_t0,
@@ -440,15 +441,199 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
440441
}
441442
return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
442443
}
443-
444444
#else
445-
446445
static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
447446
{
448447
return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
449448
}
450449
#endif
451450

451+
#if defined(CONFIG_ARM64) && defined(CONFIG_ENERGY_MODEL)
452+
453+
static DEFINE_PER_CPU(unsigned int, efficiency_class);
454+
static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
455+
456+
/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
457+
#define CPPC_EM_CAP_STEP (20)
458+
/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
459+
#define CPPC_EM_COST_STEP (1)
460+
/* Add a cost gap correspnding to the energy of 4 CPUs. */
461+
#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
462+
/ CPPC_EM_CAP_STEP)
463+
464+
static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
465+
{
466+
struct cppc_perf_caps *perf_caps;
467+
unsigned int min_cap, max_cap;
468+
struct cppc_cpudata *cpu_data;
469+
int cpu = policy->cpu;
470+
471+
cpu_data = policy->driver_data;
472+
perf_caps = &cpu_data->perf_caps;
473+
max_cap = arch_scale_cpu_capacity(cpu);
474+
min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
475+
if ((min_cap == 0) || (max_cap < min_cap))
476+
return 0;
477+
return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
478+
}
479+
480+
/*
481+
* The cost is defined as:
482+
* cost = power * max_frequency / frequency
483+
*/
484+
static inline unsigned long compute_cost(int cpu, int step)
485+
{
486+
return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
487+
step * CPPC_EM_COST_STEP;
488+
}
489+
490+
static int cppc_get_cpu_power(struct device *cpu_dev,
491+
unsigned long *power, unsigned long *KHz)
492+
{
493+
unsigned long perf_step, perf_prev, perf, perf_check;
494+
unsigned int min_step, max_step, step, step_check;
495+
unsigned long prev_freq = *KHz;
496+
unsigned int min_cap, max_cap;
497+
struct cpufreq_policy *policy;
498+
499+
struct cppc_perf_caps *perf_caps;
500+
struct cppc_cpudata *cpu_data;
501+
502+
policy = cpufreq_cpu_get_raw(cpu_dev->id);
503+
cpu_data = policy->driver_data;
504+
perf_caps = &cpu_data->perf_caps;
505+
max_cap = arch_scale_cpu_capacity(cpu_dev->id);
506+
min_cap = div_u64(max_cap * perf_caps->lowest_perf,
507+
perf_caps->highest_perf);
508+
509+
perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
510+
min_step = min_cap / CPPC_EM_CAP_STEP;
511+
max_step = max_cap / CPPC_EM_CAP_STEP;
512+
513+
perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
514+
step = perf_prev / perf_step;
515+
516+
if (step > max_step)
517+
return -EINVAL;
518+
519+
if (min_step == max_step) {
520+
step = max_step;
521+
perf = perf_caps->highest_perf;
522+
} else if (step < min_step) {
523+
step = min_step;
524+
perf = perf_caps->lowest_perf;
525+
} else {
526+
step++;
527+
if (step == max_step)
528+
perf = perf_caps->highest_perf;
529+
else
530+
perf = step * perf_step;
531+
}
532+
533+
*KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
534+
perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
535+
step_check = perf_check / perf_step;
536+
537+
/*
538+
* To avoid bad integer approximation, check that new frequency value
539+
* increased and that the new frequency will be converted to the
540+
* desired step value.
541+
*/
542+
while ((*KHz == prev_freq) || (step_check != step)) {
543+
perf++;
544+
*KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
545+
perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
546+
step_check = perf_check / perf_step;
547+
}
548+
549+
/*
550+
* With an artificial EM, only the cost value is used. Still the power
551+
* is populated such as 0 < power < EM_MAX_POWER. This allows to add
552+
* more sense to the artificial performance states.
553+
*/
554+
*power = compute_cost(cpu_dev->id, step);
555+
556+
return 0;
557+
}
558+
559+
static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
560+
unsigned long *cost)
561+
{
562+
unsigned long perf_step, perf_prev;
563+
struct cppc_perf_caps *perf_caps;
564+
struct cpufreq_policy *policy;
565+
struct cppc_cpudata *cpu_data;
566+
unsigned int max_cap;
567+
int step;
568+
569+
policy = cpufreq_cpu_get_raw(cpu_dev->id);
570+
cpu_data = policy->driver_data;
571+
perf_caps = &cpu_data->perf_caps;
572+
max_cap = arch_scale_cpu_capacity(cpu_dev->id);
573+
574+
perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
575+
perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
576+
step = perf_prev / perf_step;
577+
578+
*cost = compute_cost(cpu_dev->id, step);
579+
580+
return 0;
581+
}
582+
583+
static int populate_efficiency_class(void)
584+
{
585+
struct acpi_madt_generic_interrupt *gicc;
586+
DECLARE_BITMAP(used_classes, 256) = {};
587+
int class, cpu, index;
588+
589+
for_each_possible_cpu(cpu) {
590+
gicc = acpi_cpu_get_madt_gicc(cpu);
591+
class = gicc->efficiency_class;
592+
bitmap_set(used_classes, class, 1);
593+
}
594+
595+
if (bitmap_weight(used_classes, 256) <= 1) {
596+
pr_debug("Efficiency classes are all equal (=%d). "
597+
"No EM registered", class);
598+
return -EINVAL;
599+
}
600+
601+
/*
602+
* Squeeze efficiency class values on [0:#efficiency_class-1].
603+
* Values are per spec in [0:255].
604+
*/
605+
index = 0;
606+
for_each_set_bit(class, used_classes, 256) {
607+
for_each_possible_cpu(cpu) {
608+
gicc = acpi_cpu_get_madt_gicc(cpu);
609+
if (gicc->efficiency_class == class)
610+
per_cpu(efficiency_class, cpu) = index;
611+
}
612+
index++;
613+
}
614+
cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
615+
616+
return 0;
617+
}
618+
619+
static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
620+
{
621+
struct cppc_cpudata *cpu_data;
622+
struct em_data_callback em_cb =
623+
EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
624+
625+
cpu_data = policy->driver_data;
626+
em_dev_register_perf_domain(get_cpu_device(policy->cpu),
627+
get_perf_level_count(policy), &em_cb,
628+
cpu_data->shared_cpu_map, 0);
629+
}
630+
631+
#else
632+
static int populate_efficiency_class(void)
633+
{
634+
return 0;
635+
}
636+
#endif
452637

453638
static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu)
454639
{
@@ -558,6 +743,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
558743
}
559744

560745
policy->fast_switch_possible = cppc_allow_fast_switch();
746+
policy->dvfs_possible_from_any_cpu = true;
561747

562748
/*
563749
* If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
@@ -766,6 +952,7 @@ static int __init cppc_cpufreq_init(void)
766952

767953
cppc_check_hisi_workaround();
768954
cppc_freq_invariance_init();
955+
populate_efficiency_class();
769956

770957
ret = cpufreq_register_driver(&cppc_cpufreq_driver);
771958
if (ret)

drivers/cpufreq/cpufreq-dt.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ struct private_data {
2929

3030
cpumask_var_t cpus;
3131
struct device *cpu_dev;
32-
struct opp_table *opp_table;
3332
struct cpufreq_frequency_table *freq_table;
3433
bool have_static_opps;
34+
int opp_token;
3535
};
3636

3737
static LIST_HEAD(priv_list);
@@ -194,7 +194,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu)
194194
struct private_data *priv;
195195
struct device *cpu_dev;
196196
bool fallback = false;
197-
const char *reg_name;
197+
const char *reg_name[] = { NULL, NULL };
198198
int ret;
199199

200200
/* Check if this CPU is already covered by some other policy */
@@ -219,12 +219,11 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu)
219219
* OPP layer will be taking care of regulators now, but it needs to know
220220
* the name of the regulator first.
221221
*/
222-
reg_name = find_supply_name(cpu_dev);
223-
if (reg_name) {
224-
priv->opp_table = dev_pm_opp_set_regulators(cpu_dev, &reg_name,
225-
1);
226-
if (IS_ERR(priv->opp_table)) {
227-
ret = PTR_ERR(priv->opp_table);
222+
reg_name[0] = find_supply_name(cpu_dev);
223+
if (reg_name[0]) {
224+
priv->opp_token = dev_pm_opp_set_regulators(cpu_dev, reg_name);
225+
if (priv->opp_token < 0) {
226+
ret = priv->opp_token;
228227
if (ret != -EPROBE_DEFER)
229228
dev_err(cpu_dev, "failed to set regulators: %d\n",
230229
ret);
@@ -296,7 +295,7 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu)
296295
out:
297296
if (priv->have_static_opps)
298297
dev_pm_opp_of_cpumask_remove_table(priv->cpus);
299-
dev_pm_opp_put_regulators(priv->opp_table);
298+
dev_pm_opp_put_regulators(priv->opp_token);
300299
free_cpumask:
301300
free_cpumask_var(priv->cpus);
302301
return ret;
@@ -310,7 +309,7 @@ static void dt_cpufreq_release(void)
310309
dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &priv->freq_table);
311310
if (priv->have_static_opps)
312311
dev_pm_opp_of_cpumask_remove_table(priv->cpus);
313-
dev_pm_opp_put_regulators(priv->opp_table);
312+
dev_pm_opp_put_regulators(priv->opp_token);
314313
free_cpumask_var(priv->cpus);
315314
list_del(&priv->node);
316315
}

0 commit comments

Comments
 (0)