@@ -77,6 +77,32 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
7777 panic ("TDCALL %lld failed (Buggy TDX module!)\n" , fn );
7878}
7979
80+ /* Read TD-scoped metadata */
81+ static inline u64 tdg_vm_rd (u64 field , u64 * value )
82+ {
83+ struct tdx_module_args args = {
84+ .rdx = field ,
85+ };
86+ u64 ret ;
87+
88+ ret = __tdcall_ret (TDG_VM_RD , & args );
89+ * value = args .r8 ;
90+
91+ return ret ;
92+ }
93+
94+ /* Write TD-scoped metadata */
95+ static inline u64 tdg_vm_wr (u64 field , u64 value , u64 mask )
96+ {
97+ struct tdx_module_args args = {
98+ .rdx = field ,
99+ .r8 = value ,
100+ .r9 = mask ,
101+ };
102+
103+ return __tdcall (TDG_VM_WR , & args );
104+ }
105+
80106/**
81107 * tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT
82108 * subtype 0) using TDG.MR.REPORT TDCALL.
@@ -167,7 +193,87 @@ static void __noreturn tdx_panic(const char *msg)
167193 __tdx_hypercall (& args );
168194}
169195
170- static void tdx_parse_tdinfo (u64 * cc_mask )
196+ /*
197+ * The kernel cannot handle #VEs when accessing normal kernel memory. Ensure
198+ * that no #VE will be delivered for accesses to TD-private memory.
199+ *
200+ * TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
201+ * controls if the guest will receive such #VE with TD attribute
202+ * ATTR_SEPT_VE_DISABLE.
203+ *
204+ * Newer TDX modules allow the guest to control if it wants to receive SEPT
205+ * violation #VEs.
206+ *
207+ * Check if the feature is available and disable SEPT #VE if possible.
208+ *
209+ * If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
210+ * attribute is no longer reliable. It reflects the initial state of the
211+ * control for the TD, but it will not be updated if someone (e.g. bootloader)
212+ * changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
213+ * determine if SEPT #VEs are enabled or disabled.
214+ */
215+ static void disable_sept_ve (u64 td_attr )
216+ {
217+ const char * msg = "TD misconfiguration: SEPT #VE has to be disabled" ;
218+ bool debug = td_attr & ATTR_DEBUG ;
219+ u64 config , controls ;
220+
221+ /* Is this TD allowed to disable SEPT #VE */
222+ tdg_vm_rd (TDCS_CONFIG_FLAGS , & config );
223+ if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE )) {
224+ /* No SEPT #VE controls for the guest: check the attribute */
225+ if (td_attr & ATTR_SEPT_VE_DISABLE )
226+ return ;
227+
228+ /* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
229+ if (debug )
230+ pr_warn ("%s\n" , msg );
231+ else
232+ tdx_panic (msg );
233+ return ;
234+ }
235+
236+ /* Check if SEPT #VE has been disabled before us */
237+ tdg_vm_rd (TDCS_TD_CTLS , & controls );
238+ if (controls & TD_CTLS_PENDING_VE_DISABLE )
239+ return ;
240+
241+ /* Keep #VEs enabled for splats in debugging environments */
242+ if (debug )
243+ return ;
244+
245+ /* Disable SEPT #VEs */
246+ tdg_vm_wr (TDCS_TD_CTLS , TD_CTLS_PENDING_VE_DISABLE ,
247+ TD_CTLS_PENDING_VE_DISABLE );
248+ }
249+
250+ /*
251+ * TDX 1.0 generates a #VE when accessing topology-related CPUID leafs (0xB and
252+ * 0x1F) and the X2APIC_APICID MSR. The kernel returns all zeros on CPUID #VEs.
253+ * In practice, this means that the kernel can only boot with a plain topology.
254+ * Any complications will cause problems.
255+ *
256+ * The ENUM_TOPOLOGY feature allows the VMM to provide topology information.
257+ * Enabling the feature eliminates topology-related #VEs: the TDX module
258+ * virtualizes accesses to the CPUID leafs and the MSR.
259+ *
260+ * Enable ENUM_TOPOLOGY if it is available.
261+ */
262+ static void enable_cpu_topology_enumeration (void )
263+ {
264+ u64 configured ;
265+
266+ /* Has the VMM provided a valid topology configuration? */
267+ tdg_vm_rd (TDCS_TOPOLOGY_ENUM_CONFIGURED , & configured );
268+ if (!configured ) {
269+ pr_err ("VMM did not configure X2APIC_IDs properly\n" );
270+ return ;
271+ }
272+
273+ tdg_vm_wr (TDCS_TD_CTLS , TD_CTLS_ENUM_TOPOLOGY , TD_CTLS_ENUM_TOPOLOGY );
274+ }
275+
276+ static void tdx_setup (u64 * cc_mask )
171277{
172278 struct tdx_module_args args = {};
173279 unsigned int gpa_width ;
@@ -192,21 +298,13 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
192298 gpa_width = args .rcx & GENMASK (5 , 0 );
193299 * cc_mask = BIT_ULL (gpa_width - 1 );
194300
195- /*
196- * The kernel can not handle #VE's when accessing normal kernel
197- * memory. Ensure that no #VE will be delivered for accesses to
198- * TD-private memory. Only VMM-shared memory (MMIO) will #VE.
199- */
200301 td_attr = args .rdx ;
201- if (!(td_attr & ATTR_SEPT_VE_DISABLE )) {
202- const char * msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set." ;
203302
204- /* Relax SEPT_VE_DISABLE check for debug TD. */
205- if (td_attr & ATTR_DEBUG )
206- pr_warn ("%s\n" , msg );
207- else
208- tdx_panic (msg );
209- }
303+ /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
304+ tdg_vm_wr (TDCS_NOTIFY_ENABLES , 0 , -1ULL );
305+
306+ disable_sept_ve (td_attr );
307+ enable_cpu_topology_enumeration ();
210308}
211309
212310/*
@@ -924,10 +1022,6 @@ static void tdx_kexec_finish(void)
9241022
9251023void __init tdx_early_init (void )
9261024{
927- struct tdx_module_args args = {
928- .rdx = TDCS_NOTIFY_ENABLES ,
929- .r9 = -1ULL ,
930- };
9311025 u64 cc_mask ;
9321026 u32 eax , sig [3 ];
9331027
@@ -942,11 +1036,11 @@ void __init tdx_early_init(void)
9421036 setup_force_cpu_cap (X86_FEATURE_TSC_RELIABLE );
9431037
9441038 cc_vendor = CC_VENDOR_INTEL ;
945- tdx_parse_tdinfo (& cc_mask );
946- cc_set_mask (cc_mask );
9471039
948- /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
949- tdcall (TDG_VM_WR , & args );
1040+ /* Configure the TD */
1041+ tdx_setup (& cc_mask );
1042+
1043+ cc_set_mask (cc_mask );
9501044
9511045 /*
9521046 * All bits above GPA width are reserved and kernel treats shared bit
0 commit comments