@@ -1010,6 +1010,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
10101010 set_pages_state (vaddr , npages , SNP_PAGE_STATE_PRIVATE );
10111011}
10121012
1013+ static int vmgexit_ap_control (u64 event , struct sev_es_save_area * vmsa , u32 apic_id )
1014+ {
1015+ bool create = event != SVM_VMGEXIT_AP_DESTROY ;
1016+ struct ghcb_state state ;
1017+ unsigned long flags ;
1018+ struct ghcb * ghcb ;
1019+ int ret = 0 ;
1020+
1021+ local_irq_save (flags );
1022+
1023+ ghcb = __sev_get_ghcb (& state );
1024+
1025+ vc_ghcb_invalidate (ghcb );
1026+
1027+ if (create )
1028+ ghcb_set_rax (ghcb , vmsa -> sev_features );
1029+
1030+ ghcb_set_sw_exit_code (ghcb , SVM_VMGEXIT_AP_CREATION );
1031+ ghcb_set_sw_exit_info_1 (ghcb ,
1032+ ((u64 )apic_id << 32 ) |
1033+ ((u64 )snp_vmpl << 16 ) |
1034+ event );
1035+ ghcb_set_sw_exit_info_2 (ghcb , __pa (vmsa ));
1036+
1037+ sev_es_wr_ghcb_msr (__pa (ghcb ));
1038+ VMGEXIT ();
1039+
1040+ if (!ghcb_sw_exit_info_1_is_valid (ghcb ) ||
1041+ lower_32_bits (ghcb -> save .sw_exit_info_1 )) {
1042+ pr_err ("SNP AP %s error\n" , (create ? "CREATE" : "DESTROY" ));
1043+ ret = - EINVAL ;
1044+ }
1045+
1046+ __sev_put_ghcb (& state );
1047+
1048+ local_irq_restore (flags );
1049+
1050+ return ret ;
1051+ }
1052+
1053+ static int snp_set_vmsa (void * va , void * caa , int apic_id , bool make_vmsa )
1054+ {
1055+ int ret ;
1056+
1057+ if (snp_vmpl ) {
1058+ struct svsm_call call = {};
1059+ unsigned long flags ;
1060+
1061+ local_irq_save (flags );
1062+
1063+ call .caa = this_cpu_read (svsm_caa );
1064+ call .rcx = __pa (va );
1065+
1066+ if (make_vmsa ) {
1067+ /* Protocol 0, Call ID 2 */
1068+ call .rax = SVSM_CORE_CALL (SVSM_CORE_CREATE_VCPU );
1069+ call .rdx = __pa (caa );
1070+ call .r8 = apic_id ;
1071+ } else {
1072+ /* Protocol 0, Call ID 3 */
1073+ call .rax = SVSM_CORE_CALL (SVSM_CORE_DELETE_VCPU );
1074+ }
1075+
1076+ ret = svsm_perform_call_protocol (& call );
1077+
1078+ local_irq_restore (flags );
1079+ } else {
1080+ /*
1081+ * If the kernel runs at VMPL0, it can change the VMSA
1082+ * bit for a page using the RMPADJUST instruction.
1083+ * However, for the instruction to succeed it must
1084+ * target the permissions of a lesser privileged (higher
1085+ * numbered) VMPL level, so use VMPL1.
1086+ */
1087+ u64 attrs = 1 ;
1088+
1089+ if (make_vmsa )
1090+ attrs |= RMPADJUST_VMSA_PAGE_BIT ;
1091+
1092+ ret = rmpadjust ((unsigned long )va , RMP_PG_SIZE_4K , attrs );
1093+ }
1094+
1095+ return ret ;
1096+ }
1097+
1098+ static void snp_cleanup_vmsa (struct sev_es_save_area * vmsa , int apic_id )
1099+ {
1100+ int err ;
1101+
1102+ err = snp_set_vmsa (vmsa , NULL , apic_id , false);
1103+ if (err )
1104+ pr_err ("clear VMSA page failed (%u), leaking page\n" , err );
1105+ else
1106+ free_page ((unsigned long )vmsa );
1107+ }
1108+
10131109static void set_pte_enc (pte_t * kpte , int level , void * va )
10141110{
10151111 struct pte_enc_desc d = {
@@ -1056,7 +1152,8 @@ static void unshare_all_memory(void)
10561152 data = per_cpu (runtime_data , cpu );
10571153 ghcb = (unsigned long )& data -> ghcb_page ;
10581154
1059- if (addr <= ghcb && ghcb <= addr + size ) {
1155+ /* Handle the case of a huge page containing the GHCB page */
1156+ if (addr <= ghcb && ghcb < addr + size ) {
10601157 skipped_addr = true;
10611158 break ;
10621159 }
@@ -1106,11 +1203,70 @@ void snp_kexec_begin(void)
11061203 pr_warn ("Failed to stop shared<->private conversions\n" );
11071204}
11081205
1206+ /*
1207+ * Shutdown all APs except the one handling kexec/kdump and clearing
1208+ * the VMSA tag on AP's VMSA pages as they are not being used as
1209+ * VMSA page anymore.
1210+ */
1211+ static void shutdown_all_aps (void )
1212+ {
1213+ struct sev_es_save_area * vmsa ;
1214+ int apic_id , this_cpu , cpu ;
1215+
1216+ this_cpu = get_cpu ();
1217+
1218+ /*
1219+ * APs are already in HLT loop when enc_kexec_finish() callback
1220+ * is invoked.
1221+ */
1222+ for_each_present_cpu (cpu ) {
1223+ vmsa = per_cpu (sev_vmsa , cpu );
1224+
1225+ /*
1226+ * The BSP or offlined APs do not have guest allocated VMSA
1227+ * and there is no need to clear the VMSA tag for this page.
1228+ */
1229+ if (!vmsa )
1230+ continue ;
1231+
1232+ /*
1233+ * Cannot clear the VMSA tag for the currently running vCPU.
1234+ */
1235+ if (this_cpu == cpu ) {
1236+ unsigned long pa ;
1237+ struct page * p ;
1238+
1239+ pa = __pa (vmsa );
1240+ /*
1241+ * Mark the VMSA page of the running vCPU as offline
1242+ * so that is excluded and not touched by makedumpfile
1243+ * while generating vmcore during kdump.
1244+ */
1245+ p = pfn_to_online_page (pa >> PAGE_SHIFT );
1246+ if (p )
1247+ __SetPageOffline (p );
1248+ continue ;
1249+ }
1250+
1251+ apic_id = cpuid_to_apicid [cpu ];
1252+
1253+ /*
1254+ * Issue AP destroy to ensure AP gets kicked out of guest mode
1255+ * to allow using RMPADJUST to remove the VMSA tag on it's
1256+ * VMSA page.
1257+ */
1258+ vmgexit_ap_control (SVM_VMGEXIT_AP_DESTROY , vmsa , apic_id );
1259+ snp_cleanup_vmsa (vmsa , apic_id );
1260+ }
1261+
1262+ put_cpu ();
1263+ }
1264+
11091265void snp_kexec_finish (void )
11101266{
11111267 struct sev_es_runtime_data * data ;
1268+ unsigned long size , addr ;
11121269 unsigned int level , cpu ;
1113- unsigned long size ;
11141270 struct ghcb * ghcb ;
11151271 pte_t * pte ;
11161272
@@ -1120,6 +1276,8 @@ void snp_kexec_finish(void)
11201276 if (!IS_ENABLED (CONFIG_KEXEC_CORE ))
11211277 return ;
11221278
1279+ shutdown_all_aps ();
1280+
11231281 unshare_all_memory ();
11241282
11251283 /*
@@ -1136,54 +1294,11 @@ void snp_kexec_finish(void)
11361294 ghcb = & data -> ghcb_page ;
11371295 pte = lookup_address ((unsigned long )ghcb , & level );
11381296 size = page_level_size (level );
1139- set_pte_enc (pte , level , (void * )ghcb );
1140- snp_set_memory_private ((unsigned long )ghcb , (size / PAGE_SIZE ));
1141- }
1142- }
1143-
1144- static int snp_set_vmsa (void * va , void * caa , int apic_id , bool make_vmsa )
1145- {
1146- int ret ;
1147-
1148- if (snp_vmpl ) {
1149- struct svsm_call call = {};
1150- unsigned long flags ;
1151-
1152- local_irq_save (flags );
1153-
1154- call .caa = this_cpu_read (svsm_caa );
1155- call .rcx = __pa (va );
1156-
1157- if (make_vmsa ) {
1158- /* Protocol 0, Call ID 2 */
1159- call .rax = SVSM_CORE_CALL (SVSM_CORE_CREATE_VCPU );
1160- call .rdx = __pa (caa );
1161- call .r8 = apic_id ;
1162- } else {
1163- /* Protocol 0, Call ID 3 */
1164- call .rax = SVSM_CORE_CALL (SVSM_CORE_DELETE_VCPU );
1165- }
1166-
1167- ret = svsm_perform_call_protocol (& call );
1168-
1169- local_irq_restore (flags );
1170- } else {
1171- /*
1172- * If the kernel runs at VMPL0, it can change the VMSA
1173- * bit for a page using the RMPADJUST instruction.
1174- * However, for the instruction to succeed it must
1175- * target the permissions of a lesser privileged (higher
1176- * numbered) VMPL level, so use VMPL1.
1177- */
1178- u64 attrs = 1 ;
1179-
1180- if (make_vmsa )
1181- attrs |= RMPADJUST_VMSA_PAGE_BIT ;
1182-
1183- ret = rmpadjust ((unsigned long )va , RMP_PG_SIZE_4K , attrs );
1297+ /* Handle the case of a huge page containing the GHCB page */
1298+ addr = (unsigned long )ghcb & page_level_mask (level );
1299+ set_pte_enc (pte , level , (void * )addr );
1300+ snp_set_memory_private (addr , (size / PAGE_SIZE ));
11841301 }
1185-
1186- return ret ;
11871302}
11881303
11891304#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
@@ -1217,24 +1332,10 @@ static void *snp_alloc_vmsa_page(void)
12171332 return page_address (p + 1 );
12181333}
12191334
1220- static void snp_cleanup_vmsa (struct sev_es_save_area * vmsa , int apic_id )
1221- {
1222- int err ;
1223-
1224- err = snp_set_vmsa (vmsa , NULL , apic_id , false);
1225- if (err )
1226- pr_err ("clear VMSA page failed (%u), leaking page\n" , err );
1227- else
1228- free_page ((unsigned long )vmsa );
1229- }
1230-
12311335static int wakeup_cpu_via_vmgexit (u32 apic_id , unsigned long start_ip , unsigned int cpu )
12321336{
12331337 struct sev_es_save_area * cur_vmsa , * vmsa ;
1234- struct ghcb_state state ;
12351338 struct svsm_ca * caa ;
1236- unsigned long flags ;
1237- struct ghcb * ghcb ;
12381339 u8 sipi_vector ;
12391340 int ret ;
12401341 u64 cr4 ;
@@ -1333,33 +1434,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned
13331434 }
13341435
13351436 /* Issue VMGEXIT AP Creation NAE event */
1336- local_irq_save (flags );
1337-
1338- ghcb = __sev_get_ghcb (& state );
1339-
1340- vc_ghcb_invalidate (ghcb );
1341- ghcb_set_rax (ghcb , vmsa -> sev_features );
1342- ghcb_set_sw_exit_code (ghcb , SVM_VMGEXIT_AP_CREATION );
1343- ghcb_set_sw_exit_info_1 (ghcb ,
1344- ((u64 )apic_id << 32 ) |
1345- ((u64 )snp_vmpl << 16 ) |
1346- SVM_VMGEXIT_AP_CREATE );
1347- ghcb_set_sw_exit_info_2 (ghcb , __pa (vmsa ));
1348-
1349- sev_es_wr_ghcb_msr (__pa (ghcb ));
1350- VMGEXIT ();
1351-
1352- if (!ghcb_sw_exit_info_1_is_valid (ghcb ) ||
1353- lower_32_bits (ghcb -> save .sw_exit_info_1 )) {
1354- pr_err ("SNP AP Creation error\n" );
1355- ret = - EINVAL ;
1356- }
1357-
1358- __sev_put_ghcb (& state );
1359-
1360- local_irq_restore (flags );
1361-
1362- /* Perform cleanup if there was an error */
1437+ ret = vmgexit_ap_control (SVM_VMGEXIT_AP_CREATE , vmsa , apic_id );
13631438 if (ret ) {
13641439 snp_cleanup_vmsa (vmsa , apic_id );
13651440 vmsa = NULL ;
0 commit comments