2929#include <linux/bitfield.h>
3030#include <trace/events/power.h>
3131#include <linux/units.h>
32+ #include <linux/cacheinfo.h>
3233
3334#include <asm/cpu.h>
3435#include <asm/div64.h>
@@ -221,6 +222,7 @@ struct global_params {
221222 * @sched_flags: Store scheduler flags for possible cross CPU update
222223 * @hwp_boost_min: Last HWP boosted min performance
223224 * @suspended: Whether or not the driver has been suspended.
225+ * @pd_registered: Set when a perf domain is registered for this CPU.
224226 * @hwp_notify_work: workqueue for HWP notifications.
225227 *
226228 * This structure stores per CPU instance data for all CPUs.
@@ -260,6 +262,9 @@ struct cpudata {
260262 unsigned int sched_flags ;
261263 u32 hwp_boost_min ;
262264 bool suspended ;
265+ #ifdef CONFIG_ENERGY_MODEL
266+ bool pd_registered ;
267+ #endif
263268 struct delayed_work hwp_notify_work ;
264269};
265270
@@ -303,6 +308,7 @@ static bool hwp_is_hybrid;
303308
304309static struct cpufreq_driver * intel_pstate_driver __read_mostly ;
305310
311+ #define INTEL_PSTATE_CORE_SCALING 100000
306312#define HYBRID_SCALING_FACTOR_ADL 78741
307313#define HYBRID_SCALING_FACTOR_MTL 80000
308314#define HYBRID_SCALING_FACTOR_LNL 86957
@@ -311,7 +317,7 @@ static int hybrid_scaling_factor;
311317
312318static inline int core_get_scaling (void )
313319{
314- return 100000 ;
320+ return INTEL_PSTATE_CORE_SCALING ;
315321}
316322
317323#ifdef CONFIG_ACPI
@@ -948,12 +954,124 @@ static struct cpudata *hybrid_max_perf_cpu __read_mostly;
948954 */
949955static DEFINE_MUTEX (hybrid_capacity_lock );
950956
957+ #ifdef CONFIG_ENERGY_MODEL
958+ #define HYBRID_EM_STATE_COUNT 4
959+
960+ static int hybrid_active_power (struct device * dev , unsigned long * power ,
961+ unsigned long * freq )
962+ {
963+ /*
964+ * Create "utilization bins" of 0-40%, 40%-60%, 60%-80%, and 80%-100%
965+ * of the maximum capacity such that two CPUs of the same type will be
966+ * regarded as equally attractive if the utilization of each of them
967+ * falls into the same bin, which should prevent tasks from being
968+ * migrated between them too often.
969+ *
970+ * For this purpose, return the "frequency" of 2 for the first
971+ * performance level and otherwise leave the value set by the caller.
972+ */
973+ if (!* freq )
974+ * freq = 2 ;
975+
976+ /* No power information. */
977+ * power = EM_MAX_POWER ;
978+
979+ return 0 ;
980+ }
981+
982+ static int hybrid_get_cost (struct device * dev , unsigned long freq ,
983+ unsigned long * cost )
984+ {
985+ struct pstate_data * pstate = & all_cpu_data [dev -> id ]-> pstate ;
986+ struct cpu_cacheinfo * cacheinfo = get_cpu_cacheinfo (dev -> id );
987+
988+ /*
989+ * The smaller the perf-to-frequency scaling factor, the larger the IPC
990+ * ratio between the given CPU and the least capable CPU in the system.
991+ * Regard that IPC ratio as the primary cost component and assume that
992+ * the scaling factors for different CPU types will differ by at least
993+ * 5% and they will not be above INTEL_PSTATE_CORE_SCALING.
994+ *
995+ * Add the freq value to the cost, so that the cost of running on CPUs
996+ * of the same type in different "utilization bins" is different.
997+ */
998+ * cost = div_u64 (100ULL * INTEL_PSTATE_CORE_SCALING , pstate -> scaling ) + freq ;
999+ /*
1000+ * Increase the cost slightly for CPUs able to access L3 to avoid
1001+ * touching it in case some other CPUs of the same type can do the work
1002+ * without it.
1003+ */
1004+ if (cacheinfo ) {
1005+ unsigned int i ;
1006+
1007+ /* Check if L3 cache is there. */
1008+ for (i = 0 ; i < cacheinfo -> num_leaves ; i ++ ) {
1009+ if (cacheinfo -> info_list [i ].level == 3 ) {
1010+ * cost += 2 ;
1011+ break ;
1012+ }
1013+ }
1014+ }
1015+
1016+ return 0 ;
1017+ }
1018+
1019+ static bool hybrid_register_perf_domain (unsigned int cpu )
1020+ {
1021+ static const struct em_data_callback cb
1022+ = EM_ADV_DATA_CB (hybrid_active_power , hybrid_get_cost );
1023+ struct cpudata * cpudata = all_cpu_data [cpu ];
1024+ struct device * cpu_dev ;
1025+
1026+ /*
1027+ * Registering EM perf domains without enabling asymmetric CPU capacity
1028+ * support is not really useful and one domain should not be registered
1029+ * more than once.
1030+ */
1031+ if (!hybrid_max_perf_cpu || cpudata -> pd_registered )
1032+ return false;
1033+
1034+ cpu_dev = get_cpu_device (cpu );
1035+ if (!cpu_dev )
1036+ return false;
1037+
1038+ if (em_dev_register_pd_no_update (cpu_dev , HYBRID_EM_STATE_COUNT , & cb ,
1039+ cpumask_of (cpu ), false))
1040+ return false;
1041+
1042+ cpudata -> pd_registered = true;
1043+
1044+ return true;
1045+ }
1046+
1047+ static void hybrid_register_all_perf_domains (void )
1048+ {
1049+ unsigned int cpu ;
1050+
1051+ for_each_online_cpu (cpu )
1052+ hybrid_register_perf_domain (cpu );
1053+ }
1054+
1055+ static void hybrid_update_perf_domain (struct cpudata * cpu )
1056+ {
1057+ if (cpu -> pd_registered )
1058+ em_adjust_cpu_capacity (cpu -> cpu );
1059+ }
1060+ #else /* !CONFIG_ENERGY_MODEL */
1061+ static inline bool hybrid_register_perf_domain (unsigned int cpu ) { return false; }
1062+ static inline void hybrid_register_all_perf_domains (void ) {}
1063+ static inline void hybrid_update_perf_domain (struct cpudata * cpu ) {}
1064+ #endif /* CONFIG_ENERGY_MODEL */
1065+
9511066static void hybrid_set_cpu_capacity (struct cpudata * cpu )
9521067{
9531068 arch_set_cpu_capacity (cpu -> cpu , cpu -> capacity_perf ,
9541069 hybrid_max_perf_cpu -> capacity_perf ,
9551070 cpu -> capacity_perf ,
9561071 cpu -> pstate .max_pstate_physical );
1072+ hybrid_update_perf_domain (cpu );
1073+
1074+ topology_set_cpu_scale (cpu -> cpu , arch_scale_cpu_capacity (cpu -> cpu ));
9571075
9581076 pr_debug ("CPU%d: perf = %u, max. perf = %u, base perf = %d\n" , cpu -> cpu ,
9591077 cpu -> capacity_perf , hybrid_max_perf_cpu -> capacity_perf ,
@@ -1042,6 +1160,11 @@ static void hybrid_refresh_cpu_capacity_scaling(void)
10421160 guard (mutex )(& hybrid_capacity_lock );
10431161
10441162 __hybrid_refresh_cpu_capacity_scaling ();
1163+ /*
1164+ * Perf domains are not registered before setting hybrid_max_perf_cpu,
1165+ * so register them all after setting up CPU capacity scaling.
1166+ */
1167+ hybrid_register_all_perf_domains ();
10451168}
10461169
10471170static void hybrid_init_cpu_capacity_scaling (bool refresh )
@@ -1069,7 +1192,7 @@ static void hybrid_init_cpu_capacity_scaling(bool refresh)
10691192 hybrid_refresh_cpu_capacity_scaling ();
10701193 /*
10711194 * Disabling ITMT causes sched domains to be rebuilt to disable asym
1072- * packing and enable asym capacity.
1195+ * packing and enable asym capacity and EAS .
10731196 */
10741197 sched_clear_itmt_support ();
10751198 }
@@ -1147,6 +1270,14 @@ static void hybrid_update_capacity(struct cpudata *cpu)
11471270 }
11481271
11491272 hybrid_set_cpu_capacity (cpu );
1273+ /*
1274+ * If the CPU was offline to start with and it is going online for the
1275+ * first time, a perf domain needs to be registered for it if hybrid
1276+ * capacity scaling has been enabled already. In that case, sched
1277+ * domains need to be rebuilt to take the new perf domain into account.
1278+ */
1279+ if (hybrid_register_perf_domain (cpu -> cpu ))
1280+ em_rebuild_sched_domains ();
11501281
11511282unlock :
11521283 mutex_unlock (& hybrid_capacity_lock );
@@ -2656,6 +2787,8 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
26562787 X86_MATCH (INTEL_TIGERLAKE , core_funcs ),
26572788 X86_MATCH (INTEL_SAPPHIRERAPIDS_X , core_funcs ),
26582789 X86_MATCH (INTEL_EMERALDRAPIDS_X , core_funcs ),
2790+ X86_MATCH (INTEL_GRANITERAPIDS_D , core_funcs ),
2791+ X86_MATCH (INTEL_GRANITERAPIDS_X , core_funcs ),
26592792 {}
26602793};
26612794MODULE_DEVICE_TABLE (x86cpu , intel_pstate_cpu_ids );
@@ -2672,6 +2805,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
26722805 X86_MATCH (INTEL_GRANITERAPIDS_X , core_funcs ),
26732806 X86_MATCH (INTEL_ATOM_CRESTMONT , core_funcs ),
26742807 X86_MATCH (INTEL_ATOM_CRESTMONT_X , core_funcs ),
2808+ X86_MATCH (INTEL_ATOM_DARKMONT_X , core_funcs ),
26752809 {}
26762810};
26772811#endif
@@ -3130,8 +3264,8 @@ static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy,
31303264 int max_pstate = policy -> strict_target ?
31313265 target_pstate : cpu -> max_perf_ratio ;
31323266
3133- intel_cpufreq_hwp_update (cpu , target_pstate , max_pstate , 0 ,
3134- fast_switch );
3267+ intel_cpufreq_hwp_update (cpu , target_pstate , max_pstate ,
3268+ target_pstate , fast_switch );
31353269 } else if (target_pstate != old_pstate ) {
31363270 intel_cpufreq_perf_ctl_update (cpu , target_pstate , fast_switch );
31373271 }
0 commit comments