4848#include <trace/events/power.h>
4949#include <linux/sched.h>
5050#include <linux/sched/smt.h>
51+ #include <linux/mutex.h>
5152#include <linux/notifier.h>
5253#include <linux/cpu.h>
5354#include <linux/moduleparam.h>
55+ #include <linux/sysfs.h>
5456#include <asm/cpuid.h>
5557#include <asm/cpu_device_id.h>
5658#include <asm/intel-family.h>
@@ -92,9 +94,15 @@ struct idle_cpu {
9294 unsigned long auto_demotion_disable_flags ;
9395 bool byt_auto_demotion_disable_flag ;
9496 bool disable_promotion_to_c1e ;
97+ bool c1_demotion_supported ;
9598 bool use_acpi ;
9699};
97100
101+ static bool c1_demotion_supported ;
102+ static DEFINE_MUTEX (c1_demotion_mutex );
103+
104+ static struct device * sysfs_root __initdata ;
105+
98106static const struct idle_cpu * icpu __initdata ;
99107static struct cpuidle_state * cpuidle_state_table __initdata ;
100108
@@ -143,8 +151,8 @@ static __always_inline int __intel_idle(struct cpuidle_device *dev,
143151 int index , bool irqoff )
144152{
145153 struct cpuidle_state * state = & drv -> states [index ];
146- unsigned long eax = flg2MWAIT (state -> flags );
147- unsigned long ecx = 1 * irqoff ; /* break on interrupt flag */
154+ unsigned int eax = flg2MWAIT (state -> flags );
155+ unsigned int ecx = 1 * irqoff ; /* break on interrupt flag */
148156
149157 mwait_idle_with_hints (eax , ecx );
150158
@@ -217,9 +225,9 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
217225static __cpuidle int intel_idle_s2idle (struct cpuidle_device * dev ,
218226 struct cpuidle_driver * drv , int index )
219227{
220- unsigned long ecx = 1 ; /* break on interrupt flag */
221228 struct cpuidle_state * state = & drv -> states [index ];
222- unsigned long eax = flg2MWAIT (state -> flags );
229+ unsigned int eax = flg2MWAIT (state -> flags );
230+ unsigned int ecx = 1 ; /* break on interrupt flag */
223231
224232 if (state -> flags & CPUIDLE_FLAG_INIT_XSTATE )
225233 fpu_idle_fpregs ();
@@ -1542,18 +1550,21 @@ static const struct idle_cpu idle_cpu_gmt __initconst = {
15421550static const struct idle_cpu idle_cpu_spr __initconst = {
15431551 .state_table = spr_cstates ,
15441552 .disable_promotion_to_c1e = true,
1553+ .c1_demotion_supported = true,
15451554 .use_acpi = true,
15461555};
15471556
15481557static const struct idle_cpu idle_cpu_gnr __initconst = {
15491558 .state_table = gnr_cstates ,
15501559 .disable_promotion_to_c1e = true,
1560+ .c1_demotion_supported = true,
15511561 .use_acpi = true,
15521562};
15531563
15541564static const struct idle_cpu idle_cpu_gnrd __initconst = {
15551565 .state_table = gnrd_cstates ,
15561566 .disable_promotion_to_c1e = true,
1567+ .c1_demotion_supported = true,
15571568 .use_acpi = true,
15581569};
15591570
@@ -1592,12 +1603,14 @@ static const struct idle_cpu idle_cpu_snr __initconst = {
15921603static const struct idle_cpu idle_cpu_grr __initconst = {
15931604 .state_table = grr_cstates ,
15941605 .disable_promotion_to_c1e = true,
1606+ .c1_demotion_supported = true,
15951607 .use_acpi = true,
15961608};
15971609
15981610static const struct idle_cpu idle_cpu_srf __initconst = {
15991611 .state_table = srf_cstates ,
16001612 .disable_promotion_to_c1e = true,
1613+ .c1_demotion_supported = true,
16011614 .use_acpi = true,
16021615};
16031616
@@ -1658,7 +1671,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
16581671};
16591672
16601673static const struct x86_cpu_id intel_mwait_ids [] __initconst = {
1661- X86_MATCH_VENDOR_FAM_FEATURE (INTEL , 6 , X86_FEATURE_MWAIT , NULL ),
1674+ X86_MATCH_VENDOR_FAM_FEATURE (INTEL , X86_FAMILY_ANY , X86_FEATURE_MWAIT , NULL ),
16621675 {}
16631676};
16641677
@@ -2295,6 +2308,88 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
22952308 cpuidle_unregister_device (per_cpu_ptr (intel_idle_cpuidle_devices , i ));
22962309}
22972310
2311+ static void intel_c1_demotion_toggle (void * enable )
2312+ {
2313+ unsigned long long msr_val ;
2314+
2315+ rdmsrl (MSR_PKG_CST_CONFIG_CONTROL , msr_val );
2316+ /*
2317+ * Enable/disable C1 undemotion along with C1 demotion, as this is the
2318+ * most sensible configuration in general.
2319+ */
2320+ if (enable )
2321+ msr_val |= NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE ;
2322+ else
2323+ msr_val &= ~(NHM_C1_AUTO_DEMOTE | SNB_C1_AUTO_UNDEMOTE );
2324+ wrmsrl (MSR_PKG_CST_CONFIG_CONTROL , msr_val );
2325+ }
2326+
2327+ static ssize_t intel_c1_demotion_store (struct device * dev ,
2328+ struct device_attribute * attr ,
2329+ const char * buf , size_t count )
2330+ {
2331+ bool enable ;
2332+ int err ;
2333+
2334+ err = kstrtobool (buf , & enable );
2335+ if (err )
2336+ return err ;
2337+
2338+ mutex_lock (& c1_demotion_mutex );
2339+ /* Enable/disable C1 demotion on all CPUs */
2340+ on_each_cpu (intel_c1_demotion_toggle , (void * )enable , 1 );
2341+ mutex_unlock (& c1_demotion_mutex );
2342+
2343+ return count ;
2344+ }
2345+
2346+ static ssize_t intel_c1_demotion_show (struct device * dev ,
2347+ struct device_attribute * attr , char * buf )
2348+ {
2349+ unsigned long long msr_val ;
2350+
2351+ /*
2352+ * Read the MSR value for a CPU and assume it is the same for all CPUs. Any other
2353+ * configuration would be a BIOS bug.
2354+ */
2355+ rdmsrl (MSR_PKG_CST_CONFIG_CONTROL , msr_val );
2356+ return sysfs_emit (buf , "%d\n" , !!(msr_val & NHM_C1_AUTO_DEMOTE ));
2357+ }
2358+ static DEVICE_ATTR_RW (intel_c1_demotion );
2359+
2360+ static int __init intel_idle_sysfs_init (void )
2361+ {
2362+ int err ;
2363+
2364+ if (!c1_demotion_supported )
2365+ return 0 ;
2366+
2367+ sysfs_root = bus_get_dev_root (& cpu_subsys );
2368+ if (!sysfs_root )
2369+ return 0 ;
2370+
2371+ err = sysfs_add_file_to_group (& sysfs_root -> kobj ,
2372+ & dev_attr_intel_c1_demotion .attr ,
2373+ "cpuidle" );
2374+ if (err ) {
2375+ put_device (sysfs_root );
2376+ return err ;
2377+ }
2378+
2379+ return 0 ;
2380+ }
2381+
2382+ static void __init intel_idle_sysfs_uninit (void )
2383+ {
2384+ if (!sysfs_root )
2385+ return ;
2386+
2387+ sysfs_remove_file_from_group (& sysfs_root -> kobj ,
2388+ & dev_attr_intel_c1_demotion .attr ,
2389+ "cpuidle" );
2390+ put_device (sysfs_root );
2391+ }
2392+
22982393static int __init intel_idle_init (void )
22992394{
23002395 const struct x86_cpu_id * id ;
@@ -2344,6 +2439,8 @@ static int __init intel_idle_init(void)
23442439 auto_demotion_disable_flags = icpu -> auto_demotion_disable_flags ;
23452440 if (icpu -> disable_promotion_to_c1e )
23462441 c1e_promotion = C1E_PROMOTION_DISABLE ;
2442+ if (icpu -> c1_demotion_supported )
2443+ c1_demotion_supported = true;
23472444 if (icpu -> use_acpi || force_use_acpi )
23482445 intel_idle_acpi_cst_extract ();
23492446 } else if (!intel_idle_acpi_cst_extract ()) {
@@ -2357,6 +2454,10 @@ static int __init intel_idle_init(void)
23572454 if (!intel_idle_cpuidle_devices )
23582455 return - ENOMEM ;
23592456
2457+ retval = intel_idle_sysfs_init ();
2458+ if (retval )
2459+ pr_warn ("failed to initialized sysfs" );
2460+
23602461 intel_idle_cpuidle_driver_init (& intel_idle_driver );
23612462
23622463 retval = cpuidle_register_driver (& intel_idle_driver );
@@ -2375,17 +2476,20 @@ static int __init intel_idle_init(void)
23752476 pr_debug ("Local APIC timer is reliable in %s\n" ,
23762477 boot_cpu_has (X86_FEATURE_ARAT ) ? "all C-states" : "C1" );
23772478
2479+ arch_cpu_rescan_dead_smt_siblings ();
2480+
23782481 return 0 ;
23792482
23802483hp_setup_fail :
23812484 intel_idle_cpuidle_devices_uninit ();
23822485 cpuidle_unregister_driver (& intel_idle_driver );
23832486init_driver_fail :
2487+ intel_idle_sysfs_uninit ();
23842488 free_percpu (intel_idle_cpuidle_devices );
23852489 return retval ;
23862490
23872491}
2388- device_initcall (intel_idle_init );
2492+ subsys_initcall_sync (intel_idle_init );
23892493
23902494/*
23912495 * We are not really modular, but we used to support that. Meaning we also
0 commit comments