Skip to content

Commit fe74003

Browse files
authored
Fixes a race condition in killing Sandboxes (#959)
Signed-off-by: Simon Davies <simongdavies@users.noreply.github.com>
1 parent f3acc2e commit fe74003

File tree

8 files changed

+1145
-196
lines changed

8 files changed

+1145
-196
lines changed

Justfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,9 @@ test-integration guest target=default-target features="":
168168
{{if os() == "windows" { "$env:" } else { "" } }}GUEST="{{guest}}"{{if os() == "windows" { ";" } else { "" } }} {{ cargo-cmd }} test --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test integration_test execute_on_heap {{ if features =="" {""} else {"--features " + features} }} -- --ignored
169169

170170
@# run the rest of the integration tests
171-
{{if os() == "windows" { "$env:" } else { "" } }}GUEST="{{guest}}"{{if os() == "windows" { ";" } else { "" } }} {{ cargo-cmd }} test -p hyperlight-host {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F init-paging," + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test '*'
171+
@# skip interrupt_random_kill_stress_test and then run it explicitly so we can see the output more
172+
{{if os() == "windows" { "$env:" } else { "" } }}GUEST="{{guest}}"{{if os() == "windows" { ";" } else { "" } }} {{ cargo-cmd }} test -p hyperlight-host {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F init-paging," + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test '*' -- --skip interrupt_random_kill_stress_test
173+
{{if os() == "windows" { "$env:" } else { "" } }}GUEST="{{guest}}"{{if os() == "windows" { ";" } else { "" } }} {{ cargo-cmd }} test -p hyperlight-host {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F init-paging," + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} {{ target-triple-flag }} --test integration_test interrupt_random_kill_stress_test -- --nocapture --exact
172174

173175
# tests compilation with no default features on different platforms
174176
test-compilation-no-default-features target=default-target:

src/hyperlight_host/src/hypervisor/hyperv_linux.rs

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,8 @@ impl HypervLinuxDriver {
390390

391391
let interrupt_handle = Arc::new(LinuxInterruptHandle {
392392
running: AtomicU64::new(0),
393-
cancel_requested: AtomicBool::new(false),
393+
cancel_requested: AtomicU64::new(0),
394+
call_active: AtomicBool::new(false),
394395
#[cfg(gdb)]
395396
debug_interrupt: AtomicBool::new(false),
396397
#[cfg(all(
@@ -658,17 +659,14 @@ impl Hypervisor for HypervLinuxDriver {
658659

659660
self.interrupt_handle
660661
.tid
661-
.store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed);
662-
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
663-
// Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
664-
self.interrupt_handle
665-
.set_running_and_increment_generation()
666-
.map_err(|e| {
667-
new_error!(
668-
"Error setting running state and incrementing generation: {}",
669-
e
670-
)
671-
})?;
662+
.store(unsafe { libc::pthread_self() as u64 }, Ordering::Release);
663+
// Note: if `InterruptHandle::kill()` is called while this thread is **here**
664+
// (after set_running_bit but before checking cancel_requested):
665+
// - kill() will stamp cancel_requested with the current generation
666+
// - We will check cancel_requested below and skip the VcpuFd::run() call
667+
// - This is the desired behavior - the kill takes effect immediately
668+
let generation = self.interrupt_handle.set_running_bit();
669+
672670
#[cfg(not(gdb))]
673671
let debug_interrupt = false;
674672
#[cfg(gdb)]
@@ -677,14 +675,16 @@ impl Hypervisor for HypervLinuxDriver {
677675
.debug_interrupt
678676
.load(Ordering::Relaxed);
679677

680-
// Don't run the vcpu if `cancel_requested` is true
678+
// Don't run the vcpu if `cancel_requested` is set for our generation
681679
//
682-
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
683-
// Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call
680+
// Note: if `InterruptHandle::kill()` is called while this thread is **here**
681+
// (after checking cancel_requested but before vcpu.run()):
682+
// - kill() will stamp cancel_requested with the current generation
683+
// - We will proceed with vcpu.run(), but signals will be sent to interrupt it
684+
// - The vcpu will be interrupted and return EINTR (handled below)
684685
let exit_reason = if self
685686
.interrupt_handle
686-
.cancel_requested
687-
.load(Ordering::Relaxed)
687+
.is_cancel_requested_for_generation(generation)
688688
|| debug_interrupt
689689
{
690690
Err(mshv_ioctls::MshvError::from(libc::EINTR))
@@ -705,27 +705,32 @@ impl Hypervisor for HypervLinuxDriver {
705705
#[cfg(mshv3)]
706706
self.vcpu_fd.run()
707707
};
708-
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
709-
// Then signals will be sent to this thread until `running` is set to false.
710-
// This is fine since the signal handler is a no-op.
708+
// Note: if `InterruptHandle::kill()` is called while this thread is **here**
709+
// (after vcpu.run() returns but before clear_running_bit):
710+
// - kill() continues sending signals to this thread (running bit is still set)
711+
// - The signals are harmless (no-op handler), we just need to check cancel_requested
712+
// - We load cancel_requested below to determine if this run was cancelled
711713
let cancel_requested = self
712714
.interrupt_handle
713-
.cancel_requested
714-
.load(Ordering::Relaxed);
715+
.is_cancel_requested_for_generation(generation);
715716
#[cfg(gdb)]
716717
let debug_interrupt = self
717718
.interrupt_handle
718719
.debug_interrupt
719720
.load(Ordering::Relaxed);
720-
// Note: if a `InterruptHandle::kill()` called while this thread is **here**
721-
// Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**.
722-
// Additionally signals will be sent to this thread until `running` is set to false.
723-
// This is fine since the signal handler is a no-op.
721+
// Note: if `InterruptHandle::kill()` is called while this thread is **here**
722+
// (after loading cancel_requested but before clear_running_bit):
723+
// - kill() stamps cancel_requested with the CURRENT generation (not the one we just loaded)
724+
// - kill() continues sending signals until running bit is cleared
725+
// - The newly stamped cancel_requested will affect the NEXT vcpu.run() call
726+
// - Signals sent now are harmless (no-op handler)
724727
self.interrupt_handle.clear_running_bit();
725-
// At this point, `running` is false so no more signals will be sent to this thread,
726-
// but we may still receive async signals that were sent before this point.
727-
// To prevent those signals from interrupting subsequent calls to `run()`,
728-
// we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below).
728+
// At this point, running bit is clear so kill() will stop sending signals.
729+
// However, we may still receive delayed signals that were sent before clear_running_bit.
730+
// These stale signals are harmless because:
731+
// - The signal handler is a no-op
732+
// - We check generation matching in cancel_requested before treating EINTR as cancellation
733+
// - If generation doesn't match, we return Retry instead of Cancelled
729734
let result = match exit_reason {
730735
Ok(m) => match m.header.message_type {
731736
HALT_MESSAGE => {
@@ -805,14 +810,16 @@ impl Hypervisor for HypervLinuxDriver {
805810
}
806811
},
807812
Err(e) => match e.errno() {
808-
// we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled
813+
// We send a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR
809814
libc::EINTR => {
810-
// If cancellation was not requested for this specific vm, the vcpu was interrupted because of debug interrupt or
811-
// a stale signal that meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it
815+
// Check if cancellation was requested for THIS specific generation.
816+
// If not, the EINTR came from:
817+
// - A debug interrupt (if GDB is enabled)
818+
// - A stale signal from a previous guest call (generation mismatch)
819+
// - A signal meant for a different sandbox on the same thread
820+
// In these cases, we return Retry to continue execution.
812821
if cancel_requested {
813-
self.interrupt_handle
814-
.cancel_requested
815-
.store(false, Ordering::Relaxed);
822+
self.interrupt_handle.clear_cancel_requested();
816823
HyperlightExit::Cancelled()
817824
} else {
818825
#[cfg(gdb)]

src/hyperlight_host/src/hypervisor/hyperv_windows.rs

Lines changed: 101 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ limitations under the License.
1717
use std::fmt;
1818
use std::fmt::{Debug, Formatter};
1919
use std::string::String;
20-
use std::sync::atomic::{AtomicBool, Ordering};
20+
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
2121
use std::sync::{Arc, Mutex};
2222

2323
use log::LevelFilter;
@@ -327,10 +327,11 @@ impl HypervWindowsDriver {
327327
};
328328

329329
let interrupt_handle = Arc::new(WindowsInterruptHandle {
330-
running: AtomicBool::new(false),
331-
cancel_requested: AtomicBool::new(false),
330+
running: AtomicU64::new(0),
331+
cancel_requested: AtomicU64::new(0),
332332
#[cfg(gdb)]
333333
debug_interrupt: AtomicBool::new(false),
334+
call_active: AtomicBool::new(false),
334335
partition_handle,
335336
dropped: AtomicBool::new(false),
336337
});
@@ -549,7 +550,8 @@ impl Hypervisor for HypervWindowsDriver {
549550
&mut self,
550551
#[cfg(feature = "trace_guest")] tc: &mut crate::sandbox::trace::TraceContext,
551552
) -> Result<super::HyperlightExit> {
552-
self.interrupt_handle.running.store(true, Ordering::Relaxed);
553+
// Get current generation and set running bit
554+
let generation = self.interrupt_handle.set_running_bit();
553555

554556
#[cfg(not(gdb))]
555557
let debug_interrupt = false;
@@ -559,11 +561,10 @@ impl Hypervisor for HypervWindowsDriver {
559561
.debug_interrupt
560562
.load(Ordering::Relaxed);
561563

562-
// Don't run the vcpu if `cancel_requested` is true
564+
// Check if cancellation was requested for THIS generation
563565
let exit_context = if self
564566
.interrupt_handle
565-
.cancel_requested
566-
.load(Ordering::Relaxed)
567+
.is_cancel_requested_for_generation(generation)
567568
|| debug_interrupt
568569
{
569570
WHV_RUN_VP_EXIT_CONTEXT {
@@ -578,12 +579,21 @@ impl Hypervisor for HypervWindowsDriver {
578579

579580
self.processor.run()?
580581
};
581-
self.interrupt_handle
582-
.cancel_requested
583-
.store(false, Ordering::Relaxed);
584-
self.interrupt_handle
585-
.running
586-
.store(false, Ordering::Relaxed);
582+
583+
// Clear running bit
584+
self.interrupt_handle.clear_running_bit();
585+
586+
let is_canceled = exit_context.ExitReason == WHV_RUN_VP_EXIT_REASON(8193i32); // WHvRunVpExitReasonCanceled
587+
588+
// Check if this was a manual cancellation (vs internal Windows cancellation)
589+
let cancel_was_requested_manually = self
590+
.interrupt_handle
591+
.is_cancel_requested_for_generation(generation);
592+
593+
// Only clear cancel_requested if we're actually processing a cancellation for this generation
594+
if is_canceled && cancel_was_requested_manually {
595+
self.interrupt_handle.clear_cancel_requested();
596+
}
587597

588598
#[cfg(gdb)]
589599
let debug_interrupt = self
@@ -659,12 +669,32 @@ impl Hypervisor for HypervWindowsDriver {
659669
// return a special exit reason so that the gdb thread can handle it
660670
// and resume execution
661671
HyperlightExit::Debug(VcpuStopReason::Interrupt)
672+
} else if !cancel_was_requested_manually {
673+
// This was an internal cancellation
674+
// The virtualization stack can use this function to return the control
675+
// of a virtual processor back to the virtualization stack in case it
676+
// needs to change the state of a VM or to inject an event into the processor
677+
// see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvcancelrunvirtualprocessor#remarks
678+
debug!("Internal cancellation detected, returning Retry error");
679+
HyperlightExit::Retry()
662680
} else {
663681
HyperlightExit::Cancelled()
664682
}
665683

666684
#[cfg(not(gdb))]
667-
HyperlightExit::Cancelled()
685+
{
686+
if !cancel_was_requested_manually {
687+
// This was an internal cancellation
688+
// The virtualization stack can use this function to return the control
689+
// of a virtual processor back to the virtualization stack in case it
690+
// needs to change the state of a VM or to inject an event into the processor
691+
// see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvcancelrunvirtualprocessor#remarks
692+
debug!("Internal cancellation detected, returning Retry error");
693+
HyperlightExit::Retry()
694+
} else {
695+
HyperlightExit::Cancelled()
696+
}
697+
}
668698
}
669699
#[cfg(gdb)]
670700
WHV_RUN_VP_EXIT_REASON(4098i32) => {
@@ -964,30 +994,77 @@ impl Drop for HypervWindowsDriver {
964994

965995
#[derive(Debug)]
966996
pub struct WindowsInterruptHandle {
967-
// `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag.
968-
running: AtomicBool,
969-
cancel_requested: AtomicBool,
997+
/// Combined running flag (bit 63) and generation counter (bits 0-62).
998+
///
999+
/// The generation increments with each guest function call to prevent
1000+
/// stale cancellations from affecting new calls (ABA problem).
1001+
///
1002+
/// Layout: `[running:1 bit][generation:63 bits]`
1003+
running: AtomicU64,
1004+
1005+
/// Combined cancel_requested flag (bit 63) and generation counter (bits 0-62).
1006+
///
1007+
/// When kill() is called, this stores the current generation along with
1008+
/// the cancellation flag. The VCPU only honors the cancellation if the
1009+
/// generation matches its current generation.
1010+
///
1011+
/// Layout: `[cancel_requested:1 bit][generation:63 bits]`
1012+
cancel_requested: AtomicU64,
1013+
9701014
// This is used to signal the GDB thread to stop the vCPU
9711015
#[cfg(gdb)]
9721016
debug_interrupt: AtomicBool,
1017+
/// Flag indicating whether a guest function call is currently in progress.
1018+
///
1019+
/// **true**: A guest function call is active (between call start and completion)
1020+
/// **false**: No guest function call is active
1021+
///
1022+
/// # Purpose
1023+
///
1024+
/// This flag prevents kill() from having any effect when called outside of a
1025+
/// guest function call. This solves the "kill-in-advance" problem where kill()
1026+
/// could be called before a guest function starts and would incorrectly cancel it.
1027+
call_active: AtomicBool,
9731028
partition_handle: WHV_PARTITION_HANDLE,
9741029
dropped: AtomicBool,
9751030
}
9761031

9771032
impl InterruptHandle for WindowsInterruptHandle {
9781033
fn kill(&self) -> bool {
979-
self.cancel_requested.store(true, Ordering::Relaxed);
980-
self.running.load(Ordering::Relaxed)
981-
&& unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
1034+
// Check if a call is actually active first
1035+
if !self.call_active.load(Ordering::Acquire) {
1036+
return false;
1037+
}
1038+
1039+
// Get the current running state and generation
1040+
let (running, generation) = self.get_running_and_generation();
1041+
1042+
// Set cancel_requested with the current generation
1043+
self.set_cancel_requested(generation);
1044+
1045+
// Only call WHvCancelRunVirtualProcessor if VCPU is actually running in guest mode
1046+
running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
9821047
}
9831048
#[cfg(gdb)]
9841049
fn kill_from_debugger(&self) -> bool {
9851050
self.debug_interrupt.store(true, Ordering::Relaxed);
986-
self.running.load(Ordering::Relaxed)
987-
&& unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
1051+
let (running, _) = self.get_running_and_generation();
1052+
running && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() }
1053+
}
1054+
1055+
fn get_call_active(&self) -> &AtomicBool {
1056+
&self.call_active
1057+
}
1058+
1059+
fn get_dropped(&self) -> &AtomicBool {
1060+
&self.dropped
1061+
}
1062+
1063+
fn get_running(&self) -> &AtomicU64 {
1064+
&self.running
9881065
}
9891066

990-
fn dropped(&self) -> bool {
991-
self.dropped.load(Ordering::Relaxed)
1067+
fn get_cancel_requested(&self) -> &AtomicU64 {
1068+
&self.cancel_requested
9921069
}
9931070
}

0 commit comments

Comments
 (0)