@@ -78,6 +78,32 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
7878 panic ("TDCALL %lld failed (Buggy TDX module!)\n" , fn );
7979}
8080
81+ /* Read TD-scoped metadata */
82+ static inline u64 tdg_vm_rd (u64 field , u64 * value )
83+ {
84+ struct tdx_module_args args = {
85+ .rdx = field ,
86+ };
87+ u64 ret ;
88+
89+ ret = __tdcall_ret (TDG_VM_RD , & args );
90+ * value = args .r8 ;
91+
92+ return ret ;
93+ }
94+
95+ /* Write TD-scoped metadata */
96+ static inline u64 tdg_vm_wr (u64 field , u64 value , u64 mask )
97+ {
98+ struct tdx_module_args args = {
99+ .rdx = field ,
100+ .r8 = value ,
101+ .r9 = mask ,
102+ };
103+
104+ return __tdcall (TDG_VM_WR , & args );
105+ }
106+
81107/**
82108 * tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT
83109 * subtype 0) using TDG.MR.REPORT TDCALL.
@@ -168,7 +194,87 @@ static void __noreturn tdx_panic(const char *msg)
168194 __tdx_hypercall (& args );
169195}
170196
171- static void tdx_parse_tdinfo (u64 * cc_mask )
197+ /*
198+ * The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
199+ * that no #VE will be delivered for accesses to TD-private memory.
200+ *
201+ * TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
202+ * controls if the guest will receive such #VE with TD attribute
203+ * ATTR_SEPT_VE_DISABLE.
204+ *
205+ * Newer TDX modules allow the guest to control if it wants to receive SEPT
206+ * violation #VEs.
207+ *
208+ * Check if the feature is available and disable SEPT #VE if possible.
209+ *
210+ * If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
211+ * attribute is no longer reliable. It reflects the initial state of the
212+ * control for the TD, but it will not be updated if someone (e.g. bootloader)
213+ * changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
214+ * determine if SEPT #VEs are enabled or disabled.
215+ */
216+ static void disable_sept_ve (u64 td_attr )
217+ {
218+ const char * msg = "TD misconfiguration: SEPT #VE has to be disabled" ;
219+ bool debug = td_attr & ATTR_DEBUG ;
220+ u64 config , controls ;
221+
222+ /* Is this TD allowed to disable SEPT #VE */
223+ tdg_vm_rd (TDCS_CONFIG_FLAGS , & config );
224+ if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE )) {
225+ /* No SEPT #VE controls for the guest: check the attribute */
226+ if (td_attr & ATTR_SEPT_VE_DISABLE )
227+ return ;
228+
229+ /* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
230+ if (debug )
231+ pr_warn ("%s\n" , msg );
232+ else
233+ tdx_panic (msg );
234+ return ;
235+ }
236+
237+ /* Check if SEPT #VE has been disabled before us */
238+ tdg_vm_rd (TDCS_TD_CTLS , & controls );
239+ if (controls & TD_CTLS_PENDING_VE_DISABLE )
240+ return ;
241+
242+ /* Keep #VEs enabled for splats in debugging environments */
243+ if (debug )
244+ return ;
245+
246+ /* Disable SEPT #VEs */
247+ tdg_vm_wr (TDCS_TD_CTLS , TD_CTLS_PENDING_VE_DISABLE ,
248+ TD_CTLS_PENDING_VE_DISABLE );
249+ }
250+
251+ /*
252+ * TDX 1.0 generates a #VE when accessing topology-related CPUID leafs (0xB and
253+ * 0x1F) and the X2APIC_APICID MSR. The kernel returns all zeros on CPUID #VEs.
254+ * In practice, this means that the kernel can only boot with a plain topology.
255+ * Any complications will cause problems.
256+ *
257+ * The ENUM_TOPOLOGY feature allows the VMM to provide topology information.
258+ * Enabling the feature eliminates topology-related #VEs: the TDX module
259+ * virtualizes accesses to the CPUID leafs and the MSR.
260+ *
261+ * Enable ENUM_TOPOLOGY if it is available.
262+ */
263+ static void enable_cpu_topology_enumeration (void )
264+ {
265+ u64 configured ;
266+
267+ /* Has the VMM provided a valid topology configuration? */
268+ tdg_vm_rd (TDCS_TOPOLOGY_ENUM_CONFIGURED , & configured );
269+ if (!configured ) {
270+ pr_err ("VMM did not configure X2APIC_IDs properly\n" );
271+ return ;
272+ }
273+
274+ tdg_vm_wr (TDCS_TD_CTLS , TD_CTLS_ENUM_TOPOLOGY , TD_CTLS_ENUM_TOPOLOGY );
275+ }
276+
277+ static void tdx_setup (u64 * cc_mask )
172278{
173279 struct tdx_module_args args = {};
174280 unsigned int gpa_width ;
@@ -193,21 +299,13 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
193299 gpa_width = args .rcx & GENMASK (5 , 0 );
194300 * cc_mask = BIT_ULL (gpa_width - 1 );
195301
196- /*
197- * The kernel can not handle #VE's when accessing normal kernel
198- * memory. Ensure that no #VE will be delivered for accesses to
199- * TD-private memory. Only VMM-shared memory (MMIO) will #VE.
200- */
201302 td_attr = args .rdx ;
202- if (!(td_attr & ATTR_SEPT_VE_DISABLE )) {
203- const char * msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set." ;
204303
205- /* Relax SEPT_VE_DISABLE check for debug TD. */
206- if (td_attr & ATTR_DEBUG )
207- pr_warn ("%s\n" , msg );
208- else
209- tdx_panic (msg );
210- }
304+ /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
305+ tdg_vm_wr (TDCS_NOTIFY_ENABLES , 0 , -1ULL );
306+
307+ disable_sept_ve (td_attr );
308+ enable_cpu_topology_enumeration ();
211309}
212310
213311/*
@@ -929,10 +1027,6 @@ static void tdx_kexec_finish(void)
9291027
9301028void __init tdx_early_init (void )
9311029{
932- struct tdx_module_args args = {
933- .rdx = TDCS_NOTIFY_ENABLES ,
934- .r9 = -1ULL ,
935- };
9361030 u64 cc_mask ;
9371031 u32 eax , sig [3 ];
9381032
@@ -947,11 +1041,11 @@ void __init tdx_early_init(void)
9471041 setup_force_cpu_cap (X86_FEATURE_TSC_RELIABLE );
9481042
9491043 cc_vendor = CC_VENDOR_INTEL ;
950- tdx_parse_tdinfo (& cc_mask );
951- cc_set_mask (cc_mask );
9521044
953- /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
954- tdcall (TDG_VM_WR , & args );
1045+ /* Configure the TD */
1046+ tdx_setup (& cc_mask );
1047+
1048+ cc_set_mask (cc_mask );
9551049
9561050 /*
9571051 * All bits above GPA width are reserved and kernel treats shared bit
0 commit comments