Skip to content

Commit 02e5f74

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Fix the handling of ZCR_EL2 in NV VMs - Pick the correct translation regime when doing a PTW on the back of a SEA - Prevent userspace from injecting an event into a vcpu that isn't initialised yet - Move timer save/restore to the sysreg handling code, fixing EL2 timer access in the process - Add FGT-based trapping of MDSCR_EL1 to reduce the overhead of debug - Fix trapping configuration when the host isn't GICv3 - Improve the detection of HCR_EL2.E2H being RES1 - Drop a spurious 'break' statement in the S1 PTW - Don't try to access SPE when owned by EL3 Documentation updates: - Document the failure modes of event injection - Document that a GICv3 guest can be created on a GICv5 host with FEAT_GCIE_LEGACY Selftest improvements: - Add a selftest for the effective value of HCR_EL2.AMO - Address build warning in the timer selftest when building with clang - Teach irqfd selftests about non-x86 architectures - Add missing sysregs to the set_id_regs selftest - Fix vcpu allocation in the vgic_lpi_stress selftest - Correctly enable interrupts in the vgic_lpi_stress selftest x86: - Expand the KVM_PRE_FAULT_MEMORY selftest to add a regression test for the bug fixed by commit 3ccbf6f ("KVM: x86/mmu: Return -EAGAIN if userspace deletes/moves memslot during prefault") - Don't try to get PMU capabilities from perf when running a CPU with hybrid CPUs/PMUs, as perf will rightly WARN. guest_memfd: - Rework KVM_CAP_GUEST_MEMFD_MMAP (newly introduced in 6.18) into a more generic KVM_CAP_GUEST_MEMFD_FLAGS - Add a guest_memfd INIT_SHARED flag and require userspace to explicitly set said flag to initialize memory as SHARED, irrespective of MMAP. The behavior merged in 6.18 is that enabling mmap() implicitly initializes memory as SHARED, which would result in an ABI collision for x86 CoCo VMs as their memory is currently always initialized PRIVATE. - Allow mmap() on guest_memfd for x86 CoCo VMs, i.e. on VMs with private memory, to enable testing such setups, i.e. to hopefully flush out any other lurking ABI issues before 6.18 is officially released. - Add testcases to the guest_memfd selftest to cover guest_memfd without MMAP, and host userspace accesses to mmap()'d private memory" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (46 commits) arm64: Revamp HCR_EL2.E2H RES1 detection KVM: arm64: nv: Use FGT write trap of MDSCR_EL1 when available KVM: arm64: Compute per-vCPU FGTs at vcpu_load() KVM: arm64: selftests: Fix misleading comment about virtual timer encoding KVM: arm64: selftests: Add an E2H=0-specific configuration to get_reg_list KVM: arm64: selftests: Make dependencies on VHE-specific registers explicit KVM: arm64: Kill leftovers of ad-hoc timer userspace access KVM: arm64: Fix WFxT handling of nested virt KVM: arm64: Move CNT*CT_EL0 userspace accessors to generic infrastructure KVM: arm64: Move CNT*_CVAL_EL0 userspace accessors to generic infrastructure KVM: arm64: Move CNT*_CTL_EL0 userspace accessors to generic infrastructure KVM: arm64: Add timer UAPI workaround to sysreg infrastructure KVM: arm64: Make timer_set_offset() generally accessible KVM: arm64: Replace timer context vcpu pointer with timer_id KVM: arm64: Introduce timer_context_to_vcpu() helper KVM: arm64: Hide CNTHV_*_EL2 from userspace for nVHE guests Documentation: KVM: Update GICv3 docs for GICv5 hosts KVM: arm64: gic-v3: Only set ICH_HCR traps for v2-on-v3 or v3 guests KVM: arm64: selftests: Actually enable IRQs in vgic_lpi_stress KVM: arm64: selftests: Allocate vcpus with correct size ...
2 parents 0e622c4 + 4361f5a commit 02e5f74

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+940
-539
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,6 +1229,9 @@ It is not possible to read back a pending external abort (injected via
12291229
KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered
12301230
directly to the virtual CPU).
12311231

1232+
Calling this ioctl on a vCPU that hasn't been initialized will return
1233+
-ENOEXEC.
1234+
12321235
::
12331236

12341237
struct kvm_vcpu_events {
@@ -1309,6 +1312,8 @@ exceptions by manipulating individual registers using the KVM_SET_ONE_REG API.
13091312

13101313
See KVM_GET_VCPU_EVENTS for the data structure.
13111314

1315+
Calling this ioctl on a vCPU that hasn't been initialized will return
1316+
-ENOEXEC.
13121317

13131318
4.33 KVM_GET_DEBUGREGS
13141319
----------------------
@@ -6432,9 +6437,18 @@ most one mapping per page, i.e. binding multiple memory regions to a single
64326437
guest_memfd range is not allowed (any number of memory regions can be bound to
64336438
a single guest_memfd file, but the bound ranges must not overlap).
64346439

6435-
When the capability KVM_CAP_GUEST_MEMFD_MMAP is supported, the 'flags' field
6436-
supports GUEST_MEMFD_FLAG_MMAP. Setting this flag on guest_memfd creation
6437-
enables mmap() and faulting of guest_memfd memory to host userspace.
6440+
The capability KVM_CAP_GUEST_MEMFD_FLAGS enumerates the `flags` that can be
6441+
specified via KVM_CREATE_GUEST_MEMFD. Currently defined flags:
6442+
6443+
============================ ================================================
6444+
GUEST_MEMFD_FLAG_MMAP Enable using mmap() on the guest_memfd file
6445+
descriptor.
6446+
GUEST_MEMFD_FLAG_INIT_SHARED Make all memory in the file shared during
6447+
KVM_CREATE_GUEST_MEMFD (memory files created
6448+
without INIT_SHARED will be marked private).
6449+
Shared memory can be faulted into host userspace
6450+
page tables. Private memory cannot.
6451+
============================ ================================================
64386452

64396453
When the KVM MMU performs a PFN lookup to service a guest fault and the backing
64406454
guest_memfd has the GUEST_MEMFD_FLAG_MMAP set, then the fault will always be

Documentation/virt/kvm/devices/arm-vgic-v3.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ will act as the VM interrupt controller, requiring emulated user-space devices
1313
to inject interrupts to the VGIC instead of directly to CPUs. It is not
1414
possible to create both a GICv3 and GICv2 on the same VM.
1515

16-
Creating a guest GICv3 device requires a host GICv3 as well.
16+
Creating a guest GICv3 device requires a host GICv3 host, or a GICv5 host with
17+
support for FEAT_GCIE_LEGACY.
1718

1819

1920
Groups:

arch/arm64/include/asm/el2_setup.h

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,48 @@
2424
* ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it
2525
* can reset into an UNKNOWN state and might not read as 1 until it has
2626
* been initialized explicitly.
27-
*
28-
* Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
29-
* don't advertise it (they predate this relaxation).
30-
*
3127
* Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
3228
* indicating whether the CPU is running in E2H mode.
3329
*/
3430
mrs_s x1, SYS_ID_AA64MMFR4_EL1
3531
sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
3632
cmp x1, #0
37-
b.ge .LnVHE_\@
33+
b.lt .LnE2H0_\@
3834

35+
/*
36+
* Unfortunately, HCR_EL2.E2H can be RES1 even if not advertised
37+
* as such via ID_AA64MMFR4_EL1.E2H0:
38+
*
39+
* - Fruity CPUs predate the !FEAT_E2H0 relaxation, and seem to
40+
* have HCR_EL2.E2H implemented as RAO/WI.
41+
*
42+
* - On CPUs that lack FEAT_FGT, a hypervisor can't trap guest
43+
* reads of ID_AA64MMFR4_EL1 to advertise !FEAT_E2H0. NV
44+
* guests on these hosts can write to HCR_EL2.E2H without
45+
* trapping to the hypervisor, but these writes have no
46+
* functional effect.
47+
*
48+
* Handle both cases by checking for an essential VHE property
49+
* (system register remapping) to decide whether we're
50+
* effectively VHE-only or not.
51+
*/
52+
msr_hcr_el2 x0 // Setup HCR_EL2 as nVHE
53+
isb
54+
mov x1, #1 // Write something to FAR_EL1
55+
msr far_el1, x1
56+
isb
57+
mov x1, #2 // Try to overwrite it via FAR_EL2
58+
msr far_el2, x1
59+
isb
60+
mrs x1, far_el1 // If we see the latest write in FAR_EL1,
61+
cmp x1, #2 // we can safely assume we are VHE only.
62+
b.ne .LnVHE_\@ // Otherwise, we know that nVHE works.
63+
64+
.LnE2H0_\@:
3965
orr x0, x0, #HCR_E2H
40-
.LnVHE_\@:
4166
msr_hcr_el2 x0
4267
isb
68+
.LnVHE_\@:
4369
.endm
4470

4571
.macro __init_el2_sctlr

arch/arm64/include/asm/kvm_host.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,11 @@ struct kvm_vcpu_arch {
816816
u64 hcrx_el2;
817817
u64 mdcr_el2;
818818

819+
struct {
820+
u64 r;
821+
u64 w;
822+
} fgt[__NR_FGT_GROUP_IDS__];
823+
819824
/* Exception Information */
820825
struct kvm_vcpu_fault_info fault;
821826

@@ -1600,6 +1605,51 @@ static inline bool kvm_arch_has_irq_bypass(void)
16001605
void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt);
16011606
void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1);
16021607
void check_feature_map(void);
1608+
void kvm_vcpu_load_fgt(struct kvm_vcpu *vcpu);
1609+
1610+
static __always_inline enum fgt_group_id __fgt_reg_to_group_id(enum vcpu_sysreg reg)
1611+
{
1612+
switch (reg) {
1613+
case HFGRTR_EL2:
1614+
case HFGWTR_EL2:
1615+
return HFGRTR_GROUP;
1616+
case HFGITR_EL2:
1617+
return HFGITR_GROUP;
1618+
case HDFGRTR_EL2:
1619+
case HDFGWTR_EL2:
1620+
return HDFGRTR_GROUP;
1621+
case HAFGRTR_EL2:
1622+
return HAFGRTR_GROUP;
1623+
case HFGRTR2_EL2:
1624+
case HFGWTR2_EL2:
1625+
return HFGRTR2_GROUP;
1626+
case HFGITR2_EL2:
1627+
return HFGITR2_GROUP;
1628+
case HDFGRTR2_EL2:
1629+
case HDFGWTR2_EL2:
1630+
return HDFGRTR2_GROUP;
1631+
default:
1632+
BUILD_BUG_ON(1);
1633+
}
1634+
}
16031635

1636+
#define vcpu_fgt(vcpu, reg) \
1637+
({ \
1638+
enum fgt_group_id id = __fgt_reg_to_group_id(reg); \
1639+
u64 *p; \
1640+
switch (reg) { \
1641+
case HFGWTR_EL2: \
1642+
case HDFGWTR_EL2: \
1643+
case HFGWTR2_EL2: \
1644+
case HDFGWTR2_EL2: \
1645+
p = &(vcpu)->arch.fgt[id].w; \
1646+
break; \
1647+
default: \
1648+
p = &(vcpu)->arch.fgt[id].r; \
1649+
break; \
1650+
} \
1651+
\
1652+
p; \
1653+
})
16041654

16051655
#endif /* __ARM64_KVM_HOST_H__ */

arch/arm64/kvm/arch_timer.c

Lines changed: 14 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ static int nr_timers(struct kvm_vcpu *vcpu)
6666

6767
u32 timer_get_ctl(struct arch_timer_context *ctxt)
6868
{
69-
struct kvm_vcpu *vcpu = ctxt->vcpu;
69+
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
7070

7171
switch(arch_timer_ctx_index(ctxt)) {
7272
case TIMER_VTIMER:
@@ -85,7 +85,7 @@ u32 timer_get_ctl(struct arch_timer_context *ctxt)
8585

8686
u64 timer_get_cval(struct arch_timer_context *ctxt)
8787
{
88-
struct kvm_vcpu *vcpu = ctxt->vcpu;
88+
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
8989

9090
switch(arch_timer_ctx_index(ctxt)) {
9191
case TIMER_VTIMER:
@@ -104,7 +104,7 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
104104

105105
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
106106
{
107-
struct kvm_vcpu *vcpu = ctxt->vcpu;
107+
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
108108

109109
switch(arch_timer_ctx_index(ctxt)) {
110110
case TIMER_VTIMER:
@@ -126,7 +126,7 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
126126

127127
static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
128128
{
129-
struct kvm_vcpu *vcpu = ctxt->vcpu;
129+
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
130130

131131
switch(arch_timer_ctx_index(ctxt)) {
132132
case TIMER_VTIMER:
@@ -146,16 +146,6 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
146146
}
147147
}
148148

149-
static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
150-
{
151-
if (!ctxt->offset.vm_offset) {
152-
WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
153-
return;
154-
}
155-
156-
WRITE_ONCE(*ctxt->offset.vm_offset, offset);
157-
}
158-
159149
u64 kvm_phys_timer_read(void)
160150
{
161151
return timecounter->cc->read(timecounter->cc);
@@ -343,7 +333,7 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
343333
u64 ns;
344334

345335
ctx = container_of(hrt, struct arch_timer_context, hrtimer);
346-
vcpu = ctx->vcpu;
336+
vcpu = timer_context_to_vcpu(ctx);
347337

348338
trace_kvm_timer_hrtimer_expire(ctx);
349339

@@ -436,8 +426,9 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
436426
*
437427
* But hey, it's fast, right?
438428
*/
439-
if (is_hyp_ctxt(ctx->vcpu) &&
440-
(ctx == vcpu_vtimer(ctx->vcpu) || ctx == vcpu_ptimer(ctx->vcpu))) {
429+
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
430+
if (is_hyp_ctxt(vcpu) &&
431+
(ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) {
441432
unsigned long val = timer_get_ctl(ctx);
442433
__assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
443434
timer_set_ctl(ctx, val);
@@ -470,7 +461,7 @@ static void timer_emulate(struct arch_timer_context *ctx)
470461
trace_kvm_timer_emulate(ctx, should_fire);
471462

472463
if (should_fire != ctx->irq.level)
473-
kvm_timer_update_irq(ctx->vcpu, should_fire, ctx);
464+
kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx);
474465

475466
kvm_timer_update_status(ctx, should_fire);
476467

@@ -498,7 +489,7 @@ static void set_cntpoff(u64 cntpoff)
498489

499490
static void timer_save_state(struct arch_timer_context *ctx)
500491
{
501-
struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
492+
struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
502493
enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
503494
unsigned long flags;
504495

@@ -609,7 +600,7 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
609600

610601
static void timer_restore_state(struct arch_timer_context *ctx)
611602
{
612-
struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
603+
struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
613604
enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
614605
unsigned long flags;
615606

@@ -668,7 +659,7 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo
668659

669660
static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
670661
{
671-
struct kvm_vcpu *vcpu = ctx->vcpu;
662+
struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
672663
bool phys_active = false;
673664

674665
/*
@@ -677,7 +668,7 @@ static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
677668
* this point and the register restoration, we'll take the
678669
* interrupt anyway.
679670
*/
680-
kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
671+
kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx);
681672

682673
if (irqchip_in_kernel(vcpu->kvm))
683674
phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
@@ -1063,7 +1054,7 @@ static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
10631054
struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
10641055
struct kvm *kvm = vcpu->kvm;
10651056

1066-
ctxt->vcpu = vcpu;
1057+
ctxt->timer_id = timerid;
10671058

10681059
if (timerid == TIMER_VTIMER)
10691060
ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
@@ -1121,49 +1112,6 @@ void kvm_timer_cpu_down(void)
11211112
disable_percpu_irq(host_ptimer_irq);
11221113
}
11231114

1124-
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
1125-
{
1126-
struct arch_timer_context *timer;
1127-
1128-
switch (regid) {
1129-
case KVM_REG_ARM_TIMER_CTL:
1130-
timer = vcpu_vtimer(vcpu);
1131-
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1132-
break;
1133-
case KVM_REG_ARM_TIMER_CNT:
1134-
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1135-
&vcpu->kvm->arch.flags)) {
1136-
timer = vcpu_vtimer(vcpu);
1137-
timer_set_offset(timer, kvm_phys_timer_read() - value);
1138-
}
1139-
break;
1140-
case KVM_REG_ARM_TIMER_CVAL:
1141-
timer = vcpu_vtimer(vcpu);
1142-
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1143-
break;
1144-
case KVM_REG_ARM_PTIMER_CTL:
1145-
timer = vcpu_ptimer(vcpu);
1146-
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
1147-
break;
1148-
case KVM_REG_ARM_PTIMER_CNT:
1149-
if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET,
1150-
&vcpu->kvm->arch.flags)) {
1151-
timer = vcpu_ptimer(vcpu);
1152-
timer_set_offset(timer, kvm_phys_timer_read() - value);
1153-
}
1154-
break;
1155-
case KVM_REG_ARM_PTIMER_CVAL:
1156-
timer = vcpu_ptimer(vcpu);
1157-
kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
1158-
break;
1159-
1160-
default:
1161-
return -1;
1162-
}
1163-
1164-
return 0;
1165-
}
1166-
11671115
static u64 read_timer_ctl(struct arch_timer_context *timer)
11681116
{
11691117
/*
@@ -1180,31 +1128,6 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
11801128
return ctl;
11811129
}
11821130

1183-
u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
1184-
{
1185-
switch (regid) {
1186-
case KVM_REG_ARM_TIMER_CTL:
1187-
return kvm_arm_timer_read(vcpu,
1188-
vcpu_vtimer(vcpu), TIMER_REG_CTL);
1189-
case KVM_REG_ARM_TIMER_CNT:
1190-
return kvm_arm_timer_read(vcpu,
1191-
vcpu_vtimer(vcpu), TIMER_REG_CNT);
1192-
case KVM_REG_ARM_TIMER_CVAL:
1193-
return kvm_arm_timer_read(vcpu,
1194-
vcpu_vtimer(vcpu), TIMER_REG_CVAL);
1195-
case KVM_REG_ARM_PTIMER_CTL:
1196-
return kvm_arm_timer_read(vcpu,
1197-
vcpu_ptimer(vcpu), TIMER_REG_CTL);
1198-
case KVM_REG_ARM_PTIMER_CNT:
1199-
return kvm_arm_timer_read(vcpu,
1200-
vcpu_ptimer(vcpu), TIMER_REG_CNT);
1201-
case KVM_REG_ARM_PTIMER_CVAL:
1202-
return kvm_arm_timer_read(vcpu,
1203-
vcpu_ptimer(vcpu), TIMER_REG_CVAL);
1204-
}
1205-
return (u64)-1;
1206-
}
1207-
12081131
static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
12091132
struct arch_timer_context *timer,
12101133
enum kvm_arch_timer_regs treg)

0 commit comments

Comments
 (0)