Skip to content

Commit 4ba21ea

Browse files
committed
x86/sev: Do not touch VMSA pages during SNP guest memory kdump
JIRA: https://issues.redhat.com/browse/RHEL-10019 commit d2062cc Author: Ashish Kalra <ashish.kalra@amd.com> Date: Mon Apr 28 21:41:51 2025 +0000 x86/sev: Do not touch VMSA pages during SNP guest memory kdump When kdump is running makedumpfile to generate vmcore and dump SNP guest memory it touches the VMSA page of the vCPU executing kdump. It then results in unrecoverable #NPF/RMP faults as the VMSA page is marked busy/in-use when the vCPU is running and subsequently a causes guest softlockup/hang. Additionally, other APs may be halted in guest mode and their VMSA pages are marked busy and touching these VMSA pages during guest memory dump will also cause #NPF. Issue AP_DESTROY GHCB calls on other APs to ensure they are kicked out of guest mode and then clear the VMSA bit on their VMSA pages. If the vCPU running kdump is an AP, mark it's VMSA page as offline to ensure that makedumpfile excludes that page while dumping guest memory. Fixes: 3074152 ("x86/sev: Convert shared memory back to private on kexec") Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Srikanth Aithal <sraithal@amd.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250428214151.155464-1-Ashish.Kalra@amd.com RHEL Conflicts: RHEL commit changes the function signature of wakeup_secondary_cpu_via_init(u32 phys_apicid, unsigned long start_eip) to static int wakeup_secondary_cpu_via_init(u32 phys_apicid, unsigned long start_eip, unsigned int cpu). However, no logic or code related to this patch is affected. Signed-off-by: Bandan Das <bsd@redhat.com>
1 parent 7dc962d commit 4ba21ea

File tree

1 file changed

+158
-86
lines changed

1 file changed

+158
-86
lines changed

arch/x86/coco/sev/core.c

Lines changed: 158 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
10101010
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
10111011
}
10121012

1013+
static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
1014+
{
1015+
bool create = event != SVM_VMGEXIT_AP_DESTROY;
1016+
struct ghcb_state state;
1017+
unsigned long flags;
1018+
struct ghcb *ghcb;
1019+
int ret = 0;
1020+
1021+
local_irq_save(flags);
1022+
1023+
ghcb = __sev_get_ghcb(&state);
1024+
1025+
vc_ghcb_invalidate(ghcb);
1026+
1027+
if (create)
1028+
ghcb_set_rax(ghcb, vmsa->sev_features);
1029+
1030+
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
1031+
ghcb_set_sw_exit_info_1(ghcb,
1032+
((u64)apic_id << 32) |
1033+
((u64)snp_vmpl << 16) |
1034+
event);
1035+
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
1036+
1037+
sev_es_wr_ghcb_msr(__pa(ghcb));
1038+
VMGEXIT();
1039+
1040+
if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
1041+
lower_32_bits(ghcb->save.sw_exit_info_1)) {
1042+
pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
1043+
ret = -EINVAL;
1044+
}
1045+
1046+
__sev_put_ghcb(&state);
1047+
1048+
local_irq_restore(flags);
1049+
1050+
return ret;
1051+
}
1052+
1053+
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
1054+
{
1055+
int ret;
1056+
1057+
if (snp_vmpl) {
1058+
struct svsm_call call = {};
1059+
unsigned long flags;
1060+
1061+
local_irq_save(flags);
1062+
1063+
call.caa = this_cpu_read(svsm_caa);
1064+
call.rcx = __pa(va);
1065+
1066+
if (make_vmsa) {
1067+
/* Protocol 0, Call ID 2 */
1068+
call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
1069+
call.rdx = __pa(caa);
1070+
call.r8 = apic_id;
1071+
} else {
1072+
/* Protocol 0, Call ID 3 */
1073+
call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
1074+
}
1075+
1076+
ret = svsm_perform_call_protocol(&call);
1077+
1078+
local_irq_restore(flags);
1079+
} else {
1080+
/*
1081+
* If the kernel runs at VMPL0, it can change the VMSA
1082+
* bit for a page using the RMPADJUST instruction.
1083+
* However, for the instruction to succeed it must
1084+
* target the permissions of a lesser privileged (higher
1085+
* numbered) VMPL level, so use VMPL1.
1086+
*/
1087+
u64 attrs = 1;
1088+
1089+
if (make_vmsa)
1090+
attrs |= RMPADJUST_VMSA_PAGE_BIT;
1091+
1092+
ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
1093+
}
1094+
1095+
return ret;
1096+
}
1097+
1098+
static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
1099+
{
1100+
int err;
1101+
1102+
err = snp_set_vmsa(vmsa, NULL, apic_id, false);
1103+
if (err)
1104+
pr_err("clear VMSA page failed (%u), leaking page\n", err);
1105+
else
1106+
free_page((unsigned long)vmsa);
1107+
}
1108+
10131109
static void set_pte_enc(pte_t *kpte, int level, void *va)
10141110
{
10151111
struct pte_enc_desc d = {
@@ -1106,6 +1202,65 @@ void snp_kexec_begin(void)
11061202
pr_warn("Failed to stop shared<->private conversions\n");
11071203
}
11081204

1205+
/*
1206+
* Shutdown all APs except the one handling kexec/kdump and clearing
1207+
* the VMSA tag on AP's VMSA pages as they are not being used as
1208+
* VMSA page anymore.
1209+
*/
1210+
static void shutdown_all_aps(void)
1211+
{
1212+
struct sev_es_save_area *vmsa;
1213+
int apic_id, this_cpu, cpu;
1214+
1215+
this_cpu = get_cpu();
1216+
1217+
/*
1218+
* APs are already in HLT loop when enc_kexec_finish() callback
1219+
* is invoked.
1220+
*/
1221+
for_each_present_cpu(cpu) {
1222+
vmsa = per_cpu(sev_vmsa, cpu);
1223+
1224+
/*
1225+
* The BSP or offlined APs do not have guest allocated VMSA
1226+
* and there is no need to clear the VMSA tag for this page.
1227+
*/
1228+
if (!vmsa)
1229+
continue;
1230+
1231+
/*
1232+
* Cannot clear the VMSA tag for the currently running vCPU.
1233+
*/
1234+
if (this_cpu == cpu) {
1235+
unsigned long pa;
1236+
struct page *p;
1237+
1238+
pa = __pa(vmsa);
1239+
/*
1240+
* Mark the VMSA page of the running vCPU as offline
1241+
* so that is excluded and not touched by makedumpfile
1242+
* while generating vmcore during kdump.
1243+
*/
1244+
p = pfn_to_online_page(pa >> PAGE_SHIFT);
1245+
if (p)
1246+
__SetPageOffline(p);
1247+
continue;
1248+
}
1249+
1250+
apic_id = cpuid_to_apicid[cpu];
1251+
1252+
/*
1253+
* Issue AP destroy to ensure AP gets kicked out of guest mode
1254+
* to allow using RMPADJUST to remove the VMSA tag on it's
1255+
* VMSA page.
1256+
*/
1257+
vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
1258+
snp_cleanup_vmsa(vmsa, apic_id);
1259+
}
1260+
1261+
put_cpu();
1262+
}
1263+
11091264
void snp_kexec_finish(void)
11101265
{
11111266
struct sev_es_runtime_data *data;
@@ -1120,6 +1275,8 @@ void snp_kexec_finish(void)
11201275
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
11211276
return;
11221277

1278+
shutdown_all_aps();
1279+
11231280
unshare_all_memory();
11241281

11251282
/*
@@ -1141,51 +1298,6 @@ void snp_kexec_finish(void)
11411298
}
11421299
}
11431300

1144-
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
1145-
{
1146-
int ret;
1147-
1148-
if (snp_vmpl) {
1149-
struct svsm_call call = {};
1150-
unsigned long flags;
1151-
1152-
local_irq_save(flags);
1153-
1154-
call.caa = this_cpu_read(svsm_caa);
1155-
call.rcx = __pa(va);
1156-
1157-
if (make_vmsa) {
1158-
/* Protocol 0, Call ID 2 */
1159-
call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
1160-
call.rdx = __pa(caa);
1161-
call.r8 = apic_id;
1162-
} else {
1163-
/* Protocol 0, Call ID 3 */
1164-
call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
1165-
}
1166-
1167-
ret = svsm_perform_call_protocol(&call);
1168-
1169-
local_irq_restore(flags);
1170-
} else {
1171-
/*
1172-
* If the kernel runs at VMPL0, it can change the VMSA
1173-
* bit for a page using the RMPADJUST instruction.
1174-
* However, for the instruction to succeed it must
1175-
* target the permissions of a lesser privileged (higher
1176-
* numbered) VMPL level, so use VMPL1.
1177-
*/
1178-
u64 attrs = 1;
1179-
1180-
if (make_vmsa)
1181-
attrs |= RMPADJUST_VMSA_PAGE_BIT;
1182-
1183-
ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
1184-
}
1185-
1186-
return ret;
1187-
}
1188-
11891301
#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
11901302
#define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
11911303
#define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
@@ -1217,24 +1329,10 @@ static void *snp_alloc_vmsa_page(void)
12171329
return page_address(p + 1);
12181330
}
12191331

1220-
static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
1221-
{
1222-
int err;
1223-
1224-
err = snp_set_vmsa(vmsa, NULL, apic_id, false);
1225-
if (err)
1226-
pr_err("clear VMSA page failed (%u), leaking page\n", err);
1227-
else
1228-
free_page((unsigned long)vmsa);
1229-
}
1230-
12311332
static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu)
12321333
{
12331334
struct sev_es_save_area *cur_vmsa, *vmsa;
1234-
struct ghcb_state state;
12351335
struct svsm_ca *caa;
1236-
unsigned long flags;
1237-
struct ghcb *ghcb;
12381336
u8 sipi_vector;
12391337
int ret;
12401338
u64 cr4;
@@ -1333,33 +1431,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned
13331431
}
13341432

13351433
/* Issue VMGEXIT AP Creation NAE event */
1336-
local_irq_save(flags);
1337-
1338-
ghcb = __sev_get_ghcb(&state);
1339-
1340-
vc_ghcb_invalidate(ghcb);
1341-
ghcb_set_rax(ghcb, vmsa->sev_features);
1342-
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
1343-
ghcb_set_sw_exit_info_1(ghcb,
1344-
((u64)apic_id << 32) |
1345-
((u64)snp_vmpl << 16) |
1346-
SVM_VMGEXIT_AP_CREATE);
1347-
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
1348-
1349-
sev_es_wr_ghcb_msr(__pa(ghcb));
1350-
VMGEXIT();
1351-
1352-
if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
1353-
lower_32_bits(ghcb->save.sw_exit_info_1)) {
1354-
pr_err("SNP AP Creation error\n");
1355-
ret = -EINVAL;
1356-
}
1357-
1358-
__sev_put_ghcb(&state);
1359-
1360-
local_irq_restore(flags);
1361-
1362-
/* Perform cleanup if there was an error */
1434+
ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
13631435
if (ret) {
13641436
snp_cleanup_vmsa(vmsa, apic_id);
13651437
vmsa = NULL;

0 commit comments

Comments
 (0)