@@ -30,6 +30,7 @@ static u32 host_vtimer_irq_flags;
3030static u32 host_ptimer_irq_flags ;
3131
3232static DEFINE_STATIC_KEY_FALSE (has_gic_active_state );
33+ DEFINE_STATIC_KEY_FALSE (broken_cntvoff_key );
3334
3435static const u8 default_ppi [] = {
3536 [TIMER_PTIMER ] = 30 ,
@@ -101,21 +102,6 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
101102 }
102103}
103104
104- static u64 timer_get_offset (struct arch_timer_context * ctxt )
105- {
106- u64 offset = 0 ;
107-
108- if (!ctxt )
109- return 0 ;
110-
111- if (ctxt -> offset .vm_offset )
112- offset += * ctxt -> offset .vm_offset ;
113- if (ctxt -> offset .vcpu_offset )
114- offset += * ctxt -> offset .vcpu_offset ;
115-
116- return offset ;
117- }
118-
119105static void timer_set_ctl (struct arch_timer_context * ctxt , u32 ctl )
120106{
121107 struct kvm_vcpu * vcpu = ctxt -> vcpu ;
@@ -441,11 +427,30 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu)
441427 regs -> device_irq_level |= KVM_ARM_DEV_EL1_PTIMER ;
442428}
443429
430+ static void kvm_timer_update_status (struct arch_timer_context * ctx , bool level )
431+ {
432+ /*
433+ * Paper over NV2 brokenness by publishing the interrupt status
434+ * bit. This still results in a poor quality of emulation (guest
435+ * writes will have no effect until the next exit).
436+ *
437+ * But hey, it's fast, right?
438+ */
439+ if (is_hyp_ctxt (ctx -> vcpu ) &&
440+ (ctx == vcpu_vtimer (ctx -> vcpu ) || ctx == vcpu_ptimer (ctx -> vcpu ))) {
441+ unsigned long val = timer_get_ctl (ctx );
442+ __assign_bit (__ffs (ARCH_TIMER_CTRL_IT_STAT ), & val , level );
443+ timer_set_ctl (ctx , val );
444+ }
445+ }
446+
444447static void kvm_timer_update_irq (struct kvm_vcpu * vcpu , bool new_level ,
445448 struct arch_timer_context * timer_ctx )
446449{
447450 int ret ;
448451
452+ kvm_timer_update_status (timer_ctx , new_level );
453+
449454 timer_ctx -> irq .level = new_level ;
450455 trace_kvm_timer_update_irq (vcpu -> vcpu_id , timer_irq (timer_ctx ),
451456 timer_ctx -> irq .level );
@@ -471,6 +476,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
471476 return ;
472477 }
473478
479+ kvm_timer_update_status (ctx , should_fire );
480+
474481 /*
475482 * If the timer can fire now, we don't need to have a soft timer
476483 * scheduled for the future. If the timer cannot fire at all,
@@ -513,7 +520,12 @@ static void timer_save_state(struct arch_timer_context *ctx)
513520 case TIMER_VTIMER :
514521 case TIMER_HVTIMER :
515522 timer_set_ctl (ctx , read_sysreg_el0 (SYS_CNTV_CTL ));
516- timer_set_cval (ctx , read_sysreg_el0 (SYS_CNTV_CVAL ));
523+ cval = read_sysreg_el0 (SYS_CNTV_CVAL );
524+
525+ if (has_broken_cntvoff ())
526+ cval -= timer_get_offset (ctx );
527+
528+ timer_set_cval (ctx , cval );
517529
518530 /* Disable the timer */
519531 write_sysreg_el0 (0 , SYS_CNTV_CTL );
@@ -618,8 +630,15 @@ static void timer_restore_state(struct arch_timer_context *ctx)
618630
619631 case TIMER_VTIMER :
620632 case TIMER_HVTIMER :
621- set_cntvoff (timer_get_offset (ctx ));
622- write_sysreg_el0 (timer_get_cval (ctx ), SYS_CNTV_CVAL );
633+ cval = timer_get_cval (ctx );
634+ offset = timer_get_offset (ctx );
635+ if (has_broken_cntvoff ()) {
636+ set_cntvoff (0 );
637+ cval += offset ;
638+ } else {
639+ set_cntvoff (offset );
640+ }
641+ write_sysreg_el0 (cval , SYS_CNTV_CVAL );
623642 isb ();
624643 write_sysreg_el0 (timer_get_ctl (ctx ), SYS_CNTV_CTL );
625644 break ;
@@ -762,7 +781,7 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
762781
763782static void timer_set_traps (struct kvm_vcpu * vcpu , struct timer_map * map )
764783{
765- bool tpt , tpc ;
784+ bool tvt , tpt , tvc , tpc , tvt02 , tpt02 ;
766785 u64 clr , set ;
767786
768787 /*
@@ -777,7 +796,29 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
777796 * within this function, reality kicks in and we start adding
778797 * traps based on emulation requirements.
779798 */
780- tpt = tpc = false;
799+ tvt = tpt = tvc = tpc = false;
800+ tvt02 = tpt02 = false;
801+
802+ /*
803+ * NV2 badly breaks the timer semantics by redirecting accesses to
804+ * the EL1 timer state to memory, so let's call ECV to the rescue if
805+ * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
806+ *
807+ * The treatment slightly varies depending whether we run a nVHE or
808+ * VHE guest: nVHE will use the _EL0 registers directly, while VHE
809+ * will use the _EL02 accessors. This translates in different trap
810+ * bits.
811+ *
812+ * None of the trapping is required when running in non-HYP context,
813+ * unless required by the L1 hypervisor settings once we advertise
814+ * ECV+NV in the guest, or that we need trapping for other reasons.
815+ */
816+ if (cpus_have_final_cap (ARM64_HAS_ECV ) && is_hyp_ctxt (vcpu )) {
817+ if (vcpu_el2_e2h_is_set (vcpu ))
818+ tvt02 = tpt02 = true;
819+ else
820+ tvt = tpt = true;
821+ }
781822
782823 /*
783824 * We have two possibility to deal with a physical offset:
@@ -792,10 +833,21 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
792833 if (!has_cntpoff () && timer_get_offset (map -> direct_ptimer ))
793834 tpt = tpc = true;
794835
836+ /*
837+ * For the poor sods that could not correctly substract one value
838+ * from another, trap the full virtual timer and counter.
839+ */
840+ if (has_broken_cntvoff () && timer_get_offset (map -> direct_vtimer ))
841+ tvt = tvc = true;
842+
795843 /*
796844 * Apply the enable bits that the guest hypervisor has requested for
797845 * its own guest. We can only add traps that wouldn't have been set
798846 * above.
847+ * Implementation choices: we do not support NV when E2H=0 in the
848+ * guest, and we don't support configuration where E2H is writable
849+ * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
850+ * not both). This simplifies the handling of the EL1NV* bits.
799851 */
800852 if (vcpu_has_nv (vcpu ) && !is_hyp_ctxt (vcpu )) {
801853 u64 val = __vcpu_sys_reg (vcpu , CNTHCTL_EL2 );
@@ -806,6 +858,9 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
806858
807859 tpt |= !(val & (CNTHCTL_EL1PCEN << 10 ));
808860 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10 ));
861+
862+ tpt02 |= (val & CNTHCTL_EL1NVPCT );
863+ tvt02 |= (val & CNTHCTL_EL1NVVCT );
809864 }
810865
811866 /*
@@ -817,6 +872,10 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
817872
818873 assign_clear_set_bit (tpt , CNTHCTL_EL1PCEN << 10 , set , clr );
819874 assign_clear_set_bit (tpc , CNTHCTL_EL1PCTEN << 10 , set , clr );
875+ assign_clear_set_bit (tvt , CNTHCTL_EL1TVT , clr , set );
876+ assign_clear_set_bit (tvc , CNTHCTL_EL1TVCT , clr , set );
877+ assign_clear_set_bit (tvt02 , CNTHCTL_EL1NVVCT , clr , set );
878+ assign_clear_set_bit (tpt02 , CNTHCTL_EL1NVPCT , clr , set );
820879
821880 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
822881 sysreg_clear_set (cnthctl_el2 , clr , set );
@@ -905,6 +964,54 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
905964 kvm_timer_blocking (vcpu );
906965}
907966
967+ void kvm_timer_sync_nested (struct kvm_vcpu * vcpu )
968+ {
969+ /*
970+ * When NV2 is on, guest hypervisors have their EL1 timer register
971+ * accesses redirected to the VNCR page. Any guest action taken on
972+ * the timer is postponed until the next exit, leading to a very
973+ * poor quality of emulation.
974+ *
975+ * This is an unmitigated disaster, only papered over by FEAT_ECV,
976+ * which allows trapping of the timer registers even with NV2.
977+ * Still, this is still worse than FEAT_NV on its own. Meh.
978+ */
979+ if (!vcpu_el2_e2h_is_set (vcpu )) {
980+ if (cpus_have_final_cap (ARM64_HAS_ECV ))
981+ return ;
982+
983+ /*
984+ * A non-VHE guest hypervisor doesn't have any direct access
985+ * to its timers: the EL2 registers trap (and the HW is
986+ * fully emulated), while the EL0 registers access memory
987+ * despite the access being notionally direct. Boo.
988+ *
989+ * We update the hardware timer registers with the
990+ * latest value written by the guest to the VNCR page
991+ * and let the hardware take care of the rest.
992+ */
993+ write_sysreg_el0 (__vcpu_sys_reg (vcpu , CNTV_CTL_EL0 ), SYS_CNTV_CTL );
994+ write_sysreg_el0 (__vcpu_sys_reg (vcpu , CNTV_CVAL_EL0 ), SYS_CNTV_CVAL );
995+ write_sysreg_el0 (__vcpu_sys_reg (vcpu , CNTP_CTL_EL0 ), SYS_CNTP_CTL );
996+ write_sysreg_el0 (__vcpu_sys_reg (vcpu , CNTP_CVAL_EL0 ), SYS_CNTP_CVAL );
997+ } else {
998+ /*
999+ * For a VHE guest hypervisor, the EL2 state is directly
1000+ * stored in the host EL1 timers, while the emulated EL0
1001+ * state is stored in the VNCR page. The latter could have
1002+ * been updated behind our back, and we must reset the
1003+ * emulation of the timers.
1004+ */
1005+ struct timer_map map ;
1006+ get_timer_map (vcpu , & map );
1007+
1008+ soft_timer_cancel (& map .emul_vtimer -> hrtimer );
1009+ soft_timer_cancel (& map .emul_ptimer -> hrtimer );
1010+ timer_emulate (map .emul_vtimer );
1011+ timer_emulate (map .emul_ptimer );
1012+ }
1013+ }
1014+
9081015/*
9091016 * With a userspace irqchip we have to check if the guest de-asserted the
9101017 * timer and if so, unmask the timer irq signal on the host interrupt
@@ -1363,6 +1470,37 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
13631470 return 0 ;
13641471}
13651472
1473+ static void kvm_timer_handle_errata (void )
1474+ {
1475+ u64 mmfr0 , mmfr1 , mmfr4 ;
1476+
1477+ /*
1478+ * CNTVOFF_EL2 is broken on some implementations. For those, we trap
1479+ * all virtual timer/counter accesses, requiring FEAT_ECV.
1480+ *
1481+ * However, a hypervisor supporting nesting is likely to mitigate the
1482+ * erratum at L0, and not require other levels to mitigate it (which
1483+ * would otherwise be a terrible performance sink due to trap
1484+ * amplification).
1485+ *
1486+ * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
1487+ * and that NV is likely not to (because of limitations of the
1488+ * architecture), only enable the workaround when FEAT_VHE and
1489+ * FEAT_E2H0 are both detected. Time will tell if this actually holds.
1490+ */
1491+ mmfr0 = read_sanitised_ftr_reg (SYS_ID_AA64MMFR0_EL1 );
1492+ mmfr1 = read_sanitised_ftr_reg (SYS_ID_AA64MMFR1_EL1 );
1493+ mmfr4 = read_sanitised_ftr_reg (SYS_ID_AA64MMFR4_EL1 );
1494+ if (SYS_FIELD_GET (ID_AA64MMFR1_EL1 , VH , mmfr1 ) &&
1495+ !SYS_FIELD_GET (ID_AA64MMFR4_EL1 , E2H0 , mmfr4 ) &&
1496+ SYS_FIELD_GET (ID_AA64MMFR0_EL1 , ECV , mmfr0 ) &&
1497+ (has_vhe () || has_hvhe ()) &&
1498+ cpus_have_final_cap (ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF )) {
1499+ static_branch_enable (& broken_cntvoff_key );
1500+ kvm_info ("Broken CNTVOFF_EL2, trapping virtual timer\n" );
1501+ }
1502+ }
1503+
13661504int __init kvm_timer_hyp_init (bool has_gic )
13671505{
13681506 struct arch_timer_kvm_info * info ;
@@ -1431,6 +1569,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
14311569 goto out_free_vtimer_irq ;
14321570 }
14331571
1572+ kvm_timer_handle_errata ();
14341573 return 0 ;
14351574
14361575out_free_ptimer_irq :
0 commit comments