@@ -19,6 +19,7 @@ use serde::{Deserialize, Serialize};
1919use crate :: arch:: x86_64:: interrupts;
2020use crate :: arch:: x86_64:: msr:: { create_boot_msr_entries, MsrError } ;
2121use crate :: arch:: x86_64:: regs:: { SetupFpuError , SetupRegistersError , SetupSpecialRegistersError } ;
22+ use crate :: arch_gen:: x86:: msr_index:: { MSR_IA32_TSC , MSR_IA32_TSC_DEADLINE } ;
2223use crate :: cpu_config:: x86_64:: { cpuid, CpuConfiguration } ;
2324use crate :: logger:: { IncMetric , METRICS } ;
2425use crate :: vstate:: memory:: { Address , GuestAddress , GuestMemoryMmap } ;
@@ -282,6 +283,39 @@ impl KvmVcpu {
282283 Ok ( cpuid)
283284 }
284285
286+ /// If the IA32_TSC_DEADLINE MSR value is zero, update it
287+ /// with the IA32_TSC value to guarantee that
288+ /// the vCPU will continue receiving interrupts after restoring from a snapshot.
289+ ///
290+ /// Rationale: we observed that sometimes when taking a snapshot,
291+ /// the IA32_TSC_DEADLINE MSR is cleared, but the interrupt is not
292+ /// delivered to the guest, leading to a situation where one
293+ /// of the vCPUs never receives TSC interrupts after restoring,
294+ /// until the MSR is updated externally, eg by setting the system time.
295+ fn fix_zero_tsc_deadline_msr ( msr_chunks : & mut [ Msrs ] ) {
296+ // We do not expect more than 1 TSC MSR entry, but if there are multiple, pick the maximum.
297+ let max_tsc_value = msr_chunks
298+ . iter ( )
299+ . flat_map ( |msrs| msrs. as_slice ( ) )
300+ . filter ( |msr| msr. index == MSR_IA32_TSC )
301+ . map ( |msr| msr. data )
302+ . max ( ) ;
303+
304+ if let Some ( tsc_value) = max_tsc_value {
305+ msr_chunks
306+ . iter_mut ( )
307+ . flat_map ( |msrs| msrs. as_mut_slice ( ) )
308+ . filter ( |msr| msr. index == MSR_IA32_TSC_DEADLINE && msr. data == 0 )
309+ . for_each ( |msr| {
310+ warn ! (
311+ "MSR_IA32_TSC_DEADLINE is 0, replacing with {:x}." ,
312+ tsc_value
313+ ) ;
314+ msr. data = tsc_value;
315+ } ) ;
316+ }
317+ }
318+
285319 /// Get MSR chunks for the given MSR index list.
286320 ///
287321 /// KVM only supports getting `KVM_MAX_MSR_ENTRIES` at a time, so we divide
@@ -321,6 +355,8 @@ impl KvmVcpu {
321355 msr_chunks. push ( msrs) ;
322356 }
323357
358+ Self :: fix_zero_tsc_deadline_msr ( & mut msr_chunks) ;
359+
324360 Ok ( msr_chunks)
325361 }
326362
@@ -594,6 +630,7 @@ mod tests {
594630
595631 use std:: os:: unix:: io:: AsRawFd ;
596632
633+ use kvm_bindings:: kvm_msr_entry;
597634 use kvm_ioctls:: Cap ;
598635
599636 use super :: * ;
@@ -949,4 +986,77 @@ mod tests {
949986 }
950987 }
951988 }
989+
990+ fn msrs_from_entries ( msr_entries : & [ ( u32 , u64 ) ] ) -> Msrs {
991+ Msrs :: from_entries (
992+ & msr_entries
993+ . iter ( )
994+ . map ( |& ( index, data) | kvm_msr_entry {
995+ index,
996+ data,
997+ ..Default :: default ( )
998+ } )
999+ . collect :: < Vec < _ > > ( ) ,
1000+ )
1001+ . unwrap ( )
1002+ }
1003+
1004+ fn assert_msrs ( msr_chunks : & [ Msrs ] , expected_msr_entries : & [ ( u32 , u64 ) ] ) {
1005+ let flattened_msrs = msr_chunks. iter ( ) . flat_map ( |msrs| msrs. as_slice ( ) ) ;
1006+ for ( a, b) in flattened_msrs. zip ( expected_msr_entries. iter ( ) ) {
1007+ assert_eq ! ( a. index, b. 0 ) ;
1008+ assert_eq ! ( a. data, b. 1 ) ;
1009+ }
1010+ }
1011+
1012+ #[ test]
1013+ fn test_fix_zero_tsc_deadline_msr_zero_same_chunk ( ) {
1014+ // Place both TSC and TSC_DEADLINE MSRs in the same chunk.
1015+ let mut msr_chunks = [ msrs_from_entries ( & [
1016+ ( MSR_IA32_TSC_DEADLINE , 0 ) ,
1017+ ( MSR_IA32_TSC , 42 ) ,
1018+ ] ) ] ;
1019+
1020+ KvmVcpu :: fix_zero_tsc_deadline_msr ( & mut msr_chunks) ;
1021+
1022+ // We expect for the MSR_IA32_TSC_DEADLINE to get updated with the MSR_IA32_TSC value.
1023+ assert_msrs (
1024+ & msr_chunks,
1025+ & [ ( MSR_IA32_TSC_DEADLINE , 42 ) , ( MSR_IA32_TSC , 42 ) ] ,
1026+ ) ;
1027+ }
1028+
1029+ #[ test]
1030+ fn test_fix_zero_tsc_deadline_msr_zero_separate_chunks ( ) {
1031+ // Place both TSC and TSC_DEADLINE MSRs in separate chunks.
1032+ let mut msr_chunks = [
1033+ msrs_from_entries ( & [ ( MSR_IA32_TSC_DEADLINE , 0 ) ] ) ,
1034+ msrs_from_entries ( & [ ( MSR_IA32_TSC , 42 ) ] ) ,
1035+ ] ;
1036+
1037+ KvmVcpu :: fix_zero_tsc_deadline_msr ( & mut msr_chunks) ;
1038+
1039+ // We expect for the MSR_IA32_TSC_DEADLINE to get updated with the MSR_IA32_TSC value.
1040+ assert_msrs (
1041+ & msr_chunks,
1042+ & [ ( MSR_IA32_TSC_DEADLINE , 42 ) , ( MSR_IA32_TSC , 42 ) ] ,
1043+ ) ;
1044+ }
1045+
1046+ #[ test]
1047+ fn test_fix_zero_tsc_deadline_msr_non_zero ( ) {
1048+ let mut msr_chunks = [ msrs_from_entries ( & [
1049+ ( MSR_IA32_TSC_DEADLINE , 1 ) ,
1050+ ( MSR_IA32_TSC , 2 ) ,
1051+ ] ) ] ;
1052+
1053+ KvmVcpu :: fix_zero_tsc_deadline_msr ( & mut msr_chunks) ;
1054+
1055+ // We expect that MSR_IA32_TSC_DEADLINE should remain unchanged, because it is non-zero
1056+ // already.
1057+ assert_msrs (
1058+ & msr_chunks,
1059+ & [ ( MSR_IA32_TSC_DEADLINE , 1 ) , ( MSR_IA32_TSC , 2 ) ] ,
1060+ ) ;
1061+ }
9521062}
0 commit comments