4848#include <trace/events/power.h>
4949#include <linux/sched.h>
5050#include <linux/sched/smt.h>
51+ #include <linux/mutex.h>
5152#include <linux/notifier.h>
5253#include <linux/cpu.h>
5354#include <linux/moduleparam.h>
55+ #include <linux/sysfs.h>
5456#include <asm/cpu_device_id.h>
5557#include <asm/intel-family.h>
5658#include <asm/mwait.h>
@@ -91,9 +93,15 @@ struct idle_cpu {
9193 */
9294 unsigned long auto_demotion_disable_flags ;
9395 bool disable_promotion_to_c1e ;
96+ bool c1_demotion_supported ;
9497 bool use_acpi ;
9598};
9699
100+ static bool c1_demotion_supported ;
101+ static DEFINE_MUTEX (c1_demotion_mutex );
102+
103+ static struct device * sysfs_root __initdata ;
104+
97105static const struct idle_cpu * icpu __initdata ;
98106static struct cpuidle_state * cpuidle_state_table __initdata ;
99107
@@ -142,8 +150,8 @@ static __always_inline int __intel_idle(struct cpuidle_device *dev,
142150 int index , bool irqoff )
143151{
144152 struct cpuidle_state * state = & drv -> states [index ];
145- unsigned long eax = flg2MWAIT (state -> flags );
146- unsigned long ecx = 1 * irqoff ; /* break on interrupt flag */
153+ unsigned int eax = flg2MWAIT (state -> flags );
154+ unsigned int ecx = 1 * irqoff ; /* break on interrupt flag */
147155
148156 mwait_idle_with_hints (eax , ecx );
149157
@@ -216,9 +224,9 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
216224static __cpuidle int intel_idle_s2idle (struct cpuidle_device * dev ,
217225 struct cpuidle_driver * drv , int index )
218226{
219- unsigned long ecx = 1 ; /* break on interrupt flag */
220227 struct cpuidle_state * state = & drv -> states [index ];
221- unsigned long eax = flg2MWAIT (state -> flags );
228+ unsigned int eax = flg2MWAIT (state -> flags );
229+ unsigned int ecx = 1 ; /* break on interrupt flag */
222230
223231 if (state -> flags & CPUIDLE_FLAG_INIT_XSTATE )
224232 fpu_idle_fpregs ();
@@ -1548,18 +1556,21 @@ static const struct idle_cpu idle_cpu_gmt __initconst = {
15481556static const struct idle_cpu idle_cpu_spr __initconst = {
15491557 .state_table = spr_cstates ,
15501558 .disable_promotion_to_c1e = true,
1559+ .c1_demotion_supported = true,
15511560 .use_acpi = true,
15521561};
15531562
15541563static const struct idle_cpu idle_cpu_gnr __initconst = {
15551564 .state_table = gnr_cstates ,
15561565 .disable_promotion_to_c1e = true,
1566+ .c1_demotion_supported = true,
15571567 .use_acpi = true,
15581568};
15591569
15601570static const struct idle_cpu idle_cpu_gnrd __initconst = {
15611571 .state_table = gnrd_cstates ,
15621572 .disable_promotion_to_c1e = true,
1573+ .c1_demotion_supported = true,
15631574 .use_acpi = true,
15641575};
15651576
@@ -1598,12 +1609,14 @@ static const struct idle_cpu idle_cpu_snr __initconst = {
15981609static const struct idle_cpu idle_cpu_grr __initconst = {
15991610 .state_table = grr_cstates ,
16001611 .disable_promotion_to_c1e = true,
1612+ .c1_demotion_supported = true,
16011613 .use_acpi = true,
16021614};
16031615
16041616static const struct idle_cpu idle_cpu_srf __initconst = {
16051617 .state_table = srf_cstates ,
16061618 .disable_promotion_to_c1e = true,
1619+ .c1_demotion_supported = true,
16071620 .use_acpi = true,
16081621};
16091622
@@ -1664,7 +1677,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
16641677};
16651678
16661679static const struct x86_cpu_id intel_mwait_ids [] __initconst = {
1667- X86_MATCH_VENDOR_FAM_FEATURE (INTEL , 6 , X86_FEATURE_MWAIT , NULL ),
1680+ X86_MATCH_VENDOR_FAM_FEATURE (INTEL , X86_FAMILY_ANY , X86_FEATURE_MWAIT , NULL ),
16681681 {}
16691682};
16701683
@@ -2323,6 +2336,88 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
23232336 cpuidle_unregister_device (per_cpu_ptr (intel_idle_cpuidle_devices , i ));
23242337}
23252338
2339+ static void intel_c1_demotion_toggle (void * enable )
2340+ {
2341+ unsigned long long msr_val ;
2342+
2343+ rdmsrl (MSR_PKG_CST_CONFIG_CONTROL , msr_val );
2344+ /*
2345+ * Enable/disable C1 undemotion along with C1 demotion, as this is the
2346+ * most sensible configuration in general.
2347+ */
2348+ if (enable )
2349+ msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE ;
2350+ else
2351+ msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE );
2352+ wrmsrl (MSR_PKG_CST_CONFIG_CONTROL , msr_val );
2353+ }
2354+
2355+ static ssize_t intel_c1_demotion_store (struct device * dev ,
2356+ struct device_attribute * attr ,
2357+ const char * buf , size_t count )
2358+ {
2359+ bool enable ;
2360+ int err ;
2361+
2362+ err = kstrtobool (buf , & enable );
2363+ if (err )
2364+ return err ;
2365+
2366+ mutex_lock (& c1_demotion_mutex );
2367+ /* Enable/disable C1 demotion on all CPUs */
2368+ on_each_cpu (intel_c1_demotion_toggle , (void * )enable , 1 );
2369+ mutex_unlock (& c1_demotion_mutex );
2370+
2371+ return count ;
2372+ }
2373+
2374+ static ssize_t intel_c1_demotion_show (struct device * dev ,
2375+ struct device_attribute * attr , char * buf )
2376+ {
2377+ unsigned long long msr_val ;
2378+
2379+ /*
2380+ * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
2381+ * configuration would be a BIOS bug.
2382+ */
2383+ rdmsrl (MSR_PKG_CST_CONFIG_CONTROL , msr_val );
2384+ return sysfs_emit (buf , "%d\n" , !!(msr_val & NHM_C1_AUTO_DEMOTE ));
2385+ }
2386+ static DEVICE_ATTR_RW (intel_c1_demotion );
2387+
2388+ static int __init intel_idle_sysfs_init (void )
2389+ {
2390+ int err ;
2391+
2392+ if (!c1_demotion_supported )
2393+ return 0 ;
2394+
2395+ sysfs_root = bus_get_dev_root (& cpu_subsys );
2396+ if (!sysfs_root )
2397+ return 0 ;
2398+
2399+ err = sysfs_add_file_to_group (& sysfs_root -> kobj ,
2400+ & dev_attr_intel_c1_demotion .attr ,
2401+ "cpuidle" );
2402+ if (err ) {
2403+ put_device (sysfs_root );
2404+ return err ;
2405+ }
2406+
2407+ return 0 ;
2408+ }
2409+
2410+ static void __init intel_idle_sysfs_uninit (void )
2411+ {
2412+ if (!sysfs_root )
2413+ return ;
2414+
2415+ sysfs_remove_file_from_group (& sysfs_root -> kobj ,
2416+ & dev_attr_intel_c1_demotion .attr ,
2417+ "cpuidle" );
2418+ put_device (sysfs_root );
2419+ }
2420+
23262421static int __init intel_idle_init (void )
23272422{
23282423 const struct x86_cpu_id * id ;
@@ -2373,6 +2468,8 @@ static int __init intel_idle_init(void)
23732468 auto_demotion_disable_flags = icpu -> auto_demotion_disable_flags ;
23742469 if (icpu -> disable_promotion_to_c1e )
23752470 c1e_promotion = C1E_PROMOTION_DISABLE ;
2471+ if (icpu -> c1_demotion_supported )
2472+ c1_demotion_supported = true;
23762473 if (icpu -> use_acpi || force_use_acpi )
23772474 intel_idle_acpi_cst_extract ();
23782475 } else if (!intel_idle_acpi_cst_extract ()) {
@@ -2386,6 +2483,10 @@ static int __init intel_idle_init(void)
23862483 if (!intel_idle_cpuidle_devices )
23872484 return - ENOMEM ;
23882485
2486+ retval = intel_idle_sysfs_init ();
2487+ if (retval )
2488+ pr_warn ("failed to initialized sysfs" );
2489+
23892490 intel_idle_cpuidle_driver_init (& intel_idle_driver );
23902491
23912492 retval = cpuidle_register_driver (& intel_idle_driver );
@@ -2404,17 +2505,20 @@ static int __init intel_idle_init(void)
24042505 pr_debug ("Local APIC timer is reliable in %s\n" ,
24052506 boot_cpu_has (X86_FEATURE_ARAT ) ? "all C-states" : "C1" );
24062507
2508+ arch_cpu_rescan_dead_smt_siblings ();
2509+
24072510 return 0 ;
24082511
24092512hp_setup_fail :
24102513 intel_idle_cpuidle_devices_uninit ();
24112514 cpuidle_unregister_driver (& intel_idle_driver );
24122515init_driver_fail :
2516+ intel_idle_sysfs_uninit ();
24132517 free_percpu (intel_idle_cpuidle_devices );
24142518 return retval ;
24152519
24162520}
2417- device_initcall (intel_idle_init );
2521+ subsys_initcall_sync (intel_idle_init );
24182522
24192523/*
24202524 * We are not really modular, but we used to support that. Meaning we also
0 commit comments