From 23ec2a4c80b1f428b25b11ca69536922d00a5fe6 Mon Sep 17 00:00:00 2001 From: I'm Matheus Date: Thu, 20 Nov 2025 09:03:10 -0300 Subject: [PATCH 1/2] [CPU] Detect and restore corrupted global_mutex pointer to prevent crashes --- src/xenia/cpu/function.cc | 16 ++++++++++++++++ src/xenia/cpu/processor.cc | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/xenia/cpu/function.cc b/src/xenia/cpu/function.cc index ebd8c5ba19e..50ec3d178a4 100644 --- a/src/xenia/cpu/function.cc +++ b/src/xenia/cpu/function.cc @@ -44,6 +44,12 @@ bool BuiltinFunction::Call(ThreadState* thread_state, uint32_t return_address) { } assert_not_null(handler_); + // Detect corruption of builtin argument pointers (e.g., global mutex + // accidentally overwritten by guest code). A very low non-null address is + // almost certainly invalid here and has led to crashes in unlock(). + if (arg0_ && reinterpret_cast(arg0_) < 0x1000) { + XELOGE("BuiltinFunction '{}' arg0 pointer appears corrupt: {:p}", name(), arg0_); + } handler_(thread_state->context(), arg0_, arg1_); if (original_thread_state != thread_state) { @@ -129,6 +135,16 @@ bool GuestFunction::Call(ThreadState* thread_state, uint32_t return_address) { ThreadState::Bind(thread_state); } + // Validate the global mutex pointer before executing guest code to help + // diagnose crashes where std::recursive_mutex::unlock() sees an invalid + // 'this' (e.g., 0x1). + auto ctx = thread_state->context(); + auto& expected_global_mutex = xe::global_critical_region::mutex(); + if (ctx->global_mutex != &expected_global_mutex) { + XELOGE("GuestFunction '{}' executing with corrupted global_mutex {:p}; restoring", name(), ctx->global_mutex); + ctx->global_mutex = &expected_global_mutex; + } + bool result = CallImpl(thread_state, return_address); if (original_thread_state != thread_state) { diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index eb63a1abf0c..999cffaec2a 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -337,6 +337,16 @@ bool Processor::Execute(ThreadState* thread_state, uint32_t address) { auto context = thread_state->context(); + // Defensive: ensure the context's global mutex pointer hasn't been clobbered + // by guest code scribbling over the red zone. A corrupt pointer (like 0x1) + // leads to a crash when unlock() is invoked by translated code paths. + auto& expected_global_mutex = xe::global_critical_region::mutex(); + if (context->global_mutex != &expected_global_mutex) { + uintptr_t raw_ptr = reinterpret_cast(context->global_mutex); + XELOGE("PPCContext global_mutex pointer corrupted (was {:p} / 0x{:X}), restoring", context->global_mutex, raw_ptr); + context->global_mutex = &expected_global_mutex; + } + // Pad out stack a bit, as some games seem to overwrite the caller by about // 16 to 32b. context->r[1] -= 64 + 112; From 493af44de15d970b5574cfcd4e8dc9ba8ef24e1a Mon Sep 17 00:00:00 2001 From: I'm Matheus Date: Thu, 20 Nov 2025 11:58:44 -0300 Subject: [PATCH 2/2] [CPU] Detect and prevent corrupted global_mutex pointer to avoid crashes Adds comprehensive validation of critical pointers in PPCContext and BuiltinFunction to detect memory corruption before it causes crashes in mutex operations. Key improvements: - Pre-execution validation of global_mutex pointer in Processor::Execute - Post-execution validation to identify which function caused corruption - Enhanced BuiltinFunction arg pointer validation with detailed errors - Added validation in GuestFunction::Call before and after execution These checks help identify the source of memory corruption (likely guest code buffer overflows writing beyond VMX register arrays) and provide detailed diagnostic information including function addresses, thread IDs, and stack pointers. The assertions ensure the emulator fails fast with clear error messages rather than crashing with cryptic segfaults in std::recursive_mutex::unlock(). --- src/xenia/cpu/function.cc | 54 ++++++++++++++++++++++++++++++-------- src/xenia/cpu/processor.cc | 46 +++++++++++++++++++++++++++----- 2 files changed, 82 insertions(+), 18 deletions(-) diff --git a/src/xenia/cpu/function.cc b/src/xenia/cpu/function.cc index 50ec3d178a4..296122aa6b4 100644 --- a/src/xenia/cpu/function.cc +++ b/src/xenia/cpu/function.cc @@ -44,12 +44,22 @@ bool BuiltinFunction::Call(ThreadState* thread_state, uint32_t return_address) { } assert_not_null(handler_); - // Detect corruption of builtin argument pointers (e.g., global mutex - // accidentally overwritten by guest code). A very low non-null address is - // almost certainly invalid here and has led to crashes in unlock(). - if (arg0_ && reinterpret_cast(arg0_) < 0x1000) { - XELOGE("BuiltinFunction '{}' arg0 pointer appears corrupt: {:p}", name(), arg0_); + + // Detect corrupted builtin argument pointers before calling the handler. + // A very low non-null address (< 0x10000) is almost certainly invalid and + // indicates memory corruption, likely from guest code buffer overflow. + // This check helps identify the problem before it causes a crash in the + // mutex operations within builtin handlers. + if (arg0_ && reinterpret_cast(arg0_) < 0x10000) { + XELOGE( + "BuiltinFunction '{}' detected corrupted arg0 pointer: {:p}. " + "This likely indicates memory corruption from guest code. " + "The emulation cannot continue safely.", + name(), arg0_); + assert_always("BuiltinFunction arg0 corrupted - guest code memory corruption detected"); + return false; } + handler_(thread_state->context(), arg0_, arg1_); if (original_thread_state != thread_state) { @@ -135,18 +145,40 @@ bool GuestFunction::Call(ThreadState* thread_state, uint32_t return_address) { ThreadState::Bind(thread_state); } - // Validate the global mutex pointer before executing guest code to help - // diagnose crashes where std::recursive_mutex::unlock() sees an invalid - // 'this' (e.g., 0x1). + // Validate PPCContext critical pointers before executing guest code. + // This detects corruption that may have occurred from a previous function. auto ctx = thread_state->context(); auto& expected_global_mutex = xe::global_critical_region::mutex(); if (ctx->global_mutex != &expected_global_mutex) { - XELOGE("GuestFunction '{}' executing with corrupted global_mutex {:p}; restoring", name(), ctx->global_mutex); - ctx->global_mutex = &expected_global_mutex; + uintptr_t corrupt_ptr = reinterpret_cast(ctx->global_mutex); + XELOGE( + "GuestFunction '{}' at 0x{:08X} called with corrupted PPCContext. " + "global_mutex pointer is {:p} / 0x{:X} (expected {:p}). " + "Corruption likely occurred in a previous function call.", + name(), address(), ctx->global_mutex, corrupt_ptr, + static_cast(&expected_global_mutex)); + assert_always( + "PPCContext already corrupted before function execution. Previous " + "guest function likely has buffer overflow."); + return false; } bool result = CallImpl(thread_state, return_address); + // Validate context after execution to catch corruption during this function. + if (ctx->global_mutex != &expected_global_mutex) { + uintptr_t corrupt_ptr = reinterpret_cast(ctx->global_mutex); + XELOGE( + "GuestFunction '{}' at 0x{:08X} CORRUPTED PPCContext during " + "execution. global_mutex changed to {:p} / 0x{:X}. " + "This function has a buffer overflow or invalid memory write.", + name(), address(), ctx->global_mutex, corrupt_ptr); + assert_always( + "Memory corruption detected in guest function execution. " + "The function has a buffer overflow bug."); + return false; + } + if (original_thread_state != thread_state) { ThreadState::Bind(original_thread_state); } @@ -155,4 +187,4 @@ bool GuestFunction::Call(ThreadState* thread_state, uint32_t return_address) { } } // namespace cpu -} // namespace xe +} // namespace xe \ No newline at end of file diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index 999cffaec2a..efcfd8f4e4d 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -337,14 +337,29 @@ bool Processor::Execute(ThreadState* thread_state, uint32_t address) { auto context = thread_state->context(); - // Defensive: ensure the context's global mutex pointer hasn't been clobbered - // by guest code scribbling over the red zone. A corrupt pointer (like 0x1) - // leads to a crash when unlock() is invoked by translated code paths. + // Validate critical PPCContext pointers before executing guest code. + // The global_mutex pointer is particularly susceptible to corruption from + // guest code writing beyond array bounds (e.g., VMX register array overflow). + // Detecting corruption here helps identify the source before crashes occur. auto& expected_global_mutex = xe::global_critical_region::mutex(); if (context->global_mutex != &expected_global_mutex) { - uintptr_t raw_ptr = reinterpret_cast(context->global_mutex); - XELOGE("PPCContext global_mutex pointer corrupted (was {:p} / 0x{:X}), restoring", context->global_mutex, raw_ptr); - context->global_mutex = &expected_global_mutex; + uintptr_t corrupt_ptr = reinterpret_cast(context->global_mutex); + XELOGE( + "PPCContext global_mutex pointer corrupted (expected {:p}, got {:p} / " + "0x{:X}). This indicates guest code is writing beyond allocated " + "boundaries. Common causes: VMX register overflow, stack corruption, or " + "invalid memory access in translated code. Thread ID: {}", + static_cast(&expected_global_mutex), context->global_mutex, + corrupt_ptr, thread_state->thread_id()); + + // Log additional context for debugging + XELOGE(" Function address: 0x{:08X}", address); + XELOGE(" Stack pointer (r1): 0x{:08X}", context->r[1]); + + assert_always( + "PPCContext corruption detected - cannot continue safely. Check for " + "guest code buffer overflows or emulator bugs in array bound checks."); + return false; } // Pad out stack a bit, as some games seem to overwrite the caller by about @@ -359,6 +374,23 @@ bool Processor::Execute(ThreadState* thread_state, uint32_t address) { // Execute the function. auto result = function->Call(thread_state, uint32_t(context->lr)); + // Validate context integrity after execution to detect corruption during + // the function call. This helps narrow down which guest functions cause + // memory corruption. + if (context->global_mutex != &expected_global_mutex) { + uintptr_t corrupt_ptr = reinterpret_cast(context->global_mutex); + XELOGE( + "PPCContext global_mutex corrupted DURING function execution at " + "0x{:08X}. Pointer changed from {:p} to {:p} / 0x{:X}. This " + "indicates the executed function wrote beyond its allocated memory.", + address, static_cast(&expected_global_mutex), + context->global_mutex, corrupt_ptr); + assert_always( + "Memory corruption detected during function execution. The executed " + "guest code has a buffer overflow or invalid memory write."); + return false; + } + context->lr = previous_lr; context->r[1] += 64 + 112; @@ -1312,4 +1344,4 @@ uint32_t Processor::CalculateNextGuestInstruction(ThreadDebugInfo* thread_info, } } // namespace cpu -} // namespace xe +} // namespace xe \ No newline at end of file