@@ -959,6 +959,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
959959 set_pages_state (vaddr , npages , SNP_PAGE_STATE_PRIVATE );
960960}
961961
962+ static int vmgexit_ap_control (u64 event , struct sev_es_save_area * vmsa , u32 apic_id )
963+ {
964+ bool create = event != SVM_VMGEXIT_AP_DESTROY ;
965+ struct ghcb_state state ;
966+ unsigned long flags ;
967+ struct ghcb * ghcb ;
968+ int ret = 0 ;
969+
970+ local_irq_save (flags );
971+
972+ ghcb = __sev_get_ghcb (& state );
973+
974+ vc_ghcb_invalidate (ghcb );
975+
976+ if (create )
977+ ghcb_set_rax (ghcb , vmsa -> sev_features );
978+
979+ ghcb_set_sw_exit_code (ghcb , SVM_VMGEXIT_AP_CREATION );
980+ ghcb_set_sw_exit_info_1 (ghcb ,
981+ ((u64 )apic_id << 32 ) |
982+ ((u64 )snp_vmpl << 16 ) |
983+ event );
984+ ghcb_set_sw_exit_info_2 (ghcb , __pa (vmsa ));
985+
986+ sev_es_wr_ghcb_msr (__pa (ghcb ));
987+ VMGEXIT ();
988+
989+ if (!ghcb_sw_exit_info_1_is_valid (ghcb ) ||
990+ lower_32_bits (ghcb -> save .sw_exit_info_1 )) {
991+ pr_err ("SNP AP %s error\n" , (create ? "CREATE" : "DESTROY" ));
992+ ret = - EINVAL ;
993+ }
994+
995+ __sev_put_ghcb (& state );
996+
997+ local_irq_restore (flags );
998+
999+ return ret ;
1000+ }
1001+
1002+ static int snp_set_vmsa (void * va , void * caa , int apic_id , bool make_vmsa )
1003+ {
1004+ int ret ;
1005+
1006+ if (snp_vmpl ) {
1007+ struct svsm_call call = {};
1008+ unsigned long flags ;
1009+
1010+ local_irq_save (flags );
1011+
1012+ call .caa = this_cpu_read (svsm_caa );
1013+ call .rcx = __pa (va );
1014+
1015+ if (make_vmsa ) {
1016+ /* Protocol 0, Call ID 2 */
1017+ call .rax = SVSM_CORE_CALL (SVSM_CORE_CREATE_VCPU );
1018+ call .rdx = __pa (caa );
1019+ call .r8 = apic_id ;
1020+ } else {
1021+ /* Protocol 0, Call ID 3 */
1022+ call .rax = SVSM_CORE_CALL (SVSM_CORE_DELETE_VCPU );
1023+ }
1024+
1025+ ret = svsm_perform_call_protocol (& call );
1026+
1027+ local_irq_restore (flags );
1028+ } else {
1029+ /*
1030+ * If the kernel runs at VMPL0, it can change the VMSA
1031+ * bit for a page using the RMPADJUST instruction.
1032+ * However, for the instruction to succeed it must
1033+ * target the permissions of a lesser privileged (higher
1034+ * numbered) VMPL level, so use VMPL1.
1035+ */
1036+ u64 attrs = 1 ;
1037+
1038+ if (make_vmsa )
1039+ attrs |= RMPADJUST_VMSA_PAGE_BIT ;
1040+
1041+ ret = rmpadjust ((unsigned long )va , RMP_PG_SIZE_4K , attrs );
1042+ }
1043+
1044+ return ret ;
1045+ }
1046+
1047+ static void snp_cleanup_vmsa (struct sev_es_save_area * vmsa , int apic_id )
1048+ {
1049+ int err ;
1050+
1051+ err = snp_set_vmsa (vmsa , NULL , apic_id , false);
1052+ if (err )
1053+ pr_err ("clear VMSA page failed (%u), leaking page\n" , err );
1054+ else
1055+ free_page ((unsigned long )vmsa );
1056+ }
1057+
9621058static void set_pte_enc (pte_t * kpte , int level , void * va )
9631059{
9641060 struct pte_enc_desc d = {
@@ -1005,7 +1101,8 @@ static void unshare_all_memory(void)
10051101 data = per_cpu (runtime_data , cpu );
10061102 ghcb = (unsigned long )& data -> ghcb_page ;
10071103
1008- if (addr <= ghcb && ghcb <= addr + size ) {
1104+ /* Handle the case of a huge page containing the GHCB page */
1105+ if (addr <= ghcb && ghcb < addr + size ) {
10091106 skipped_addr = true;
10101107 break ;
10111108 }
@@ -1055,11 +1152,70 @@ void snp_kexec_begin(void)
10551152 pr_warn ("Failed to stop shared<->private conversions\n" );
10561153}
10571154
1155+ /*
1156+ * Shutdown all APs except the one handling kexec/kdump and clearing
1157+ * the VMSA tag on AP's VMSA pages as they are not being used as
1158+ * VMSA page anymore.
1159+ */
1160+ static void shutdown_all_aps (void )
1161+ {
1162+ struct sev_es_save_area * vmsa ;
1163+ int apic_id , this_cpu , cpu ;
1164+
1165+ this_cpu = get_cpu ();
1166+
1167+ /*
1168+ * APs are already in HLT loop when enc_kexec_finish() callback
1169+ * is invoked.
1170+ */
1171+ for_each_present_cpu (cpu ) {
1172+ vmsa = per_cpu (sev_vmsa , cpu );
1173+
1174+ /*
1175+ * The BSP or offlined APs do not have guest allocated VMSA
1176+ * and there is no need to clear the VMSA tag for this page.
1177+ */
1178+ if (!vmsa )
1179+ continue ;
1180+
1181+ /*
1182+ * Cannot clear the VMSA tag for the currently running vCPU.
1183+ */
1184+ if (this_cpu == cpu ) {
1185+ unsigned long pa ;
1186+ struct page * p ;
1187+
1188+ pa = __pa (vmsa );
1189+ /*
1190+ * Mark the VMSA page of the running vCPU as offline
1191+ * so that is excluded and not touched by makedumpfile
1192+ * while generating vmcore during kdump.
1193+ */
1194+ p = pfn_to_online_page (pa >> PAGE_SHIFT );
1195+ if (p )
1196+ __SetPageOffline (p );
1197+ continue ;
1198+ }
1199+
1200+ apic_id = cpuid_to_apicid [cpu ];
1201+
1202+ /*
1203+ * Issue AP destroy to ensure AP gets kicked out of guest mode
1204+ * to allow using RMPADJUST to remove the VMSA tag on it's
1205+ * VMSA page.
1206+ */
1207+ vmgexit_ap_control (SVM_VMGEXIT_AP_DESTROY , vmsa , apic_id );
1208+ snp_cleanup_vmsa (vmsa , apic_id );
1209+ }
1210+
1211+ put_cpu ();
1212+ }
1213+
10581214void snp_kexec_finish (void )
10591215{
10601216 struct sev_es_runtime_data * data ;
1217+ unsigned long size , addr ;
10611218 unsigned int level , cpu ;
1062- unsigned long size ;
10631219 struct ghcb * ghcb ;
10641220 pte_t * pte ;
10651221
@@ -1069,6 +1225,8 @@ void snp_kexec_finish(void)
10691225 if (!IS_ENABLED (CONFIG_KEXEC_CORE ))
10701226 return ;
10711227
1228+ shutdown_all_aps ();
1229+
10721230 unshare_all_memory ();
10731231
10741232 /*
@@ -1085,54 +1243,11 @@ void snp_kexec_finish(void)
10851243 ghcb = & data -> ghcb_page ;
10861244 pte = lookup_address ((unsigned long )ghcb , & level );
10871245 size = page_level_size (level );
1088- set_pte_enc (pte , level , (void * )ghcb );
1089- snp_set_memory_private ((unsigned long )ghcb , (size / PAGE_SIZE ));
1090- }
1091- }
1092-
1093- static int snp_set_vmsa (void * va , void * caa , int apic_id , bool make_vmsa )
1094- {
1095- int ret ;
1096-
1097- if (snp_vmpl ) {
1098- struct svsm_call call = {};
1099- unsigned long flags ;
1100-
1101- local_irq_save (flags );
1102-
1103- call .caa = this_cpu_read (svsm_caa );
1104- call .rcx = __pa (va );
1105-
1106- if (make_vmsa ) {
1107- /* Protocol 0, Call ID 2 */
1108- call .rax = SVSM_CORE_CALL (SVSM_CORE_CREATE_VCPU );
1109- call .rdx = __pa (caa );
1110- call .r8 = apic_id ;
1111- } else {
1112- /* Protocol 0, Call ID 3 */
1113- call .rax = SVSM_CORE_CALL (SVSM_CORE_DELETE_VCPU );
1114- }
1115-
1116- ret = svsm_perform_call_protocol (& call );
1117-
1118- local_irq_restore (flags );
1119- } else {
1120- /*
1121- * If the kernel runs at VMPL0, it can change the VMSA
1122- * bit for a page using the RMPADJUST instruction.
1123- * However, for the instruction to succeed it must
1124- * target the permissions of a lesser privileged (higher
1125- * numbered) VMPL level, so use VMPL1.
1126- */
1127- u64 attrs = 1 ;
1128-
1129- if (make_vmsa )
1130- attrs |= RMPADJUST_VMSA_PAGE_BIT ;
1131-
1132- ret = rmpadjust ((unsigned long )va , RMP_PG_SIZE_4K , attrs );
1246+ /* Handle the case of a huge page containing the GHCB page */
1247+ addr = (unsigned long )ghcb & page_level_mask (level );
1248+ set_pte_enc (pte , level , (void * )addr );
1249+ snp_set_memory_private (addr , (size / PAGE_SIZE ));
11331250 }
1134-
1135- return ret ;
11361251}
11371252
11381253#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
@@ -1166,24 +1281,10 @@ static void *snp_alloc_vmsa_page(int cpu)
11661281 return page_address (p + 1 );
11671282}
11681283
1169- static void snp_cleanup_vmsa (struct sev_es_save_area * vmsa , int apic_id )
1170- {
1171- int err ;
1172-
1173- err = snp_set_vmsa (vmsa , NULL , apic_id , false);
1174- if (err )
1175- pr_err ("clear VMSA page failed (%u), leaking page\n" , err );
1176- else
1177- free_page ((unsigned long )vmsa );
1178- }
1179-
11801284static int wakeup_cpu_via_vmgexit (u32 apic_id , unsigned long start_ip )
11811285{
11821286 struct sev_es_save_area * cur_vmsa , * vmsa ;
1183- struct ghcb_state state ;
11841287 struct svsm_ca * caa ;
1185- unsigned long flags ;
1186- struct ghcb * ghcb ;
11871288 u8 sipi_vector ;
11881289 int cpu , ret ;
11891290 u64 cr4 ;
@@ -1297,33 +1398,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
12971398 }
12981399
12991400 /* Issue VMGEXIT AP Creation NAE event */
1300- local_irq_save (flags );
1301-
1302- ghcb = __sev_get_ghcb (& state );
1303-
1304- vc_ghcb_invalidate (ghcb );
1305- ghcb_set_rax (ghcb , vmsa -> sev_features );
1306- ghcb_set_sw_exit_code (ghcb , SVM_VMGEXIT_AP_CREATION );
1307- ghcb_set_sw_exit_info_1 (ghcb ,
1308- ((u64 )apic_id << 32 ) |
1309- ((u64 )snp_vmpl << 16 ) |
1310- SVM_VMGEXIT_AP_CREATE );
1311- ghcb_set_sw_exit_info_2 (ghcb , __pa (vmsa ));
1312-
1313- sev_es_wr_ghcb_msr (__pa (ghcb ));
1314- VMGEXIT ();
1315-
1316- if (!ghcb_sw_exit_info_1_is_valid (ghcb ) ||
1317- lower_32_bits (ghcb -> save .sw_exit_info_1 )) {
1318- pr_err ("SNP AP Creation error\n" );
1319- ret = - EINVAL ;
1320- }
1321-
1322- __sev_put_ghcb (& state );
1323-
1324- local_irq_restore (flags );
1325-
1326- /* Perform cleanup if there was an error */
1401+ ret = vmgexit_ap_control (SVM_VMGEXIT_AP_CREATE , vmsa , apic_id );
13271402 if (ret ) {
13281403 snp_cleanup_vmsa (vmsa , apic_id );
13291404 vmsa = NULL ;
0 commit comments