6262//! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
6363
6464// Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
65- // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
66- // but this is not used anywhere so it's omitted here.
65+ // 1. In the operational semantics, loads acquire the vector clock of the atomic location
66+ // irrespective of which store buffer element is loaded. That's incorrect; the synchronization clock
67+ // needs to be tracked per-store-buffer-element. (The paper has a field "clocks" for that purpose,
68+ // but it is not actuallt used.) tsan11 does this correctly
69+ // (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L305).
6770//
6871// 2. In the operational semantics, each store element keeps the timestamp of a thread when it loads from the store.
6972// If the same thread loads from the same store element multiple times, then the timestamps at all loads are saved in a list of load elements.
@@ -138,16 +141,17 @@ enum LoadRecency {
138141
139142#[ derive( Debug , Clone , PartialEq , Eq ) ]
140143struct StoreElement {
141- /// The identifier of the vector index, corresponding to a thread
142- /// that performed the store.
143- store_index : VectorIdx ,
144+ /// The thread that performed the store.
145+ store_thread : VectorIdx ,
146+ /// The timestamp of the storing thread when it performed the store
147+ store_timestamp : VTimestamp ,
148+
149+ /// The vector clock that can be acquired by loading this store.
150+ sync_clock : VClock ,
144151
145152 /// Whether this store is SC.
146153 is_seqcst : bool ,
147154
148- /// The timestamp of the storing thread when it performed the store
149- timestamp : VTimestamp ,
150-
151155 /// The value of this store. `None` means uninitialized.
152156 // FIXME: Currently, we cannot represent partial initialization.
153157 val : Option < Scalar > ,
@@ -246,8 +250,10 @@ impl<'tcx> StoreBuffer {
246250 let store_elem = StoreElement {
247251 // The thread index and timestamp of the initialisation write
248252 // are never meaningfully used, so it's fine to leave them as 0
249- store_index : VectorIdx :: from ( 0 ) ,
250- timestamp : VTimestamp :: ZERO ,
253+ store_thread : VectorIdx :: from ( 0 ) ,
254+ store_timestamp : VTimestamp :: ZERO ,
255+ // The initialization write is non-atomic so nothing can be acquired.
256+ sync_clock : VClock :: default ( ) ,
251257 val : init,
252258 is_seqcst : false ,
253259 load_info : RefCell :: new ( LoadInfo :: default ( ) ) ,
@@ -276,7 +282,7 @@ impl<'tcx> StoreBuffer {
276282 thread_mgr : & ThreadManager < ' _ > ,
277283 is_seqcst : bool ,
278284 rng : & mut ( impl rand:: Rng + ?Sized ) ,
279- validate : impl FnOnce ( ) -> InterpResult < ' tcx > ,
285+ validate : impl FnOnce ( Option < & VClock > ) -> InterpResult < ' tcx > ,
280286 ) -> InterpResult < ' tcx , ( Option < Scalar > , LoadRecency ) > {
281287 // Having a live borrow to store_buffer while calling validate_atomic_load is fine
282288 // because the race detector doesn't touch store_buffer
@@ -293,7 +299,7 @@ impl<'tcx> StoreBuffer {
293299 // after we've picked a store element from the store buffer, as presented
294300 // in ATOMIC LOAD rule of the paper. This is because fetch_store
295301 // requires access to ThreadClockSet.clock, which is updated by the race detector
296- validate ( ) ?;
302+ validate ( Some ( & store_elem . sync_clock ) ) ?;
297303
298304 let ( index, clocks) = global. active_thread_state ( thread_mgr) ;
299305 let loaded = store_elem. load_impl ( index, & clocks, is_seqcst) ;
@@ -306,10 +312,11 @@ impl<'tcx> StoreBuffer {
306312 global : & DataRaceState ,
307313 thread_mgr : & ThreadManager < ' _ > ,
308314 is_seqcst : bool ,
315+ sync_clock : VClock ,
309316 ) -> InterpResult < ' tcx > {
310317 let ( index, clocks) = global. active_thread_state ( thread_mgr) ;
311318
312- self . store_impl ( val, index, & clocks. clock , is_seqcst) ;
319+ self . store_impl ( val, index, & clocks. clock , is_seqcst, sync_clock ) ;
313320 interp_ok ( ( ) )
314321 }
315322
@@ -336,7 +343,9 @@ impl<'tcx> StoreBuffer {
336343 return false ;
337344 }
338345
339- keep_searching = if store_elem. timestamp <= clocks. clock [ store_elem. store_index ] {
346+ keep_searching = if store_elem. store_timestamp
347+ <= clocks. clock [ store_elem. store_thread ]
348+ {
340349 // CoWR: if a store happens-before the current load,
341350 // then we can't read-from anything earlier in modification order.
342351 // C++20 §6.9.2.2 [intro.races] paragraph 18
@@ -348,15 +357,15 @@ impl<'tcx> StoreBuffer {
348357 // then we cannot read-from anything earlier in modification order.
349358 // C++20 §6.9.2.2 [intro.races] paragraph 16
350359 false
351- } else if store_elem. timestamp <= clocks. write_seqcst [ store_elem. store_index ]
360+ } else if store_elem. store_timestamp <= clocks. write_seqcst [ store_elem. store_thread ]
352361 && store_elem. is_seqcst
353362 {
354363 // The current non-SC load, which may be sequenced-after an SC fence,
355364 // cannot read-before the last SC store executed before the fence.
356365 // C++17 §32.4 [atomics.order] paragraph 4
357366 false
358367 } else if is_seqcst
359- && store_elem. timestamp <= clocks. read_seqcst [ store_elem. store_index ]
368+ && store_elem. store_timestamp <= clocks. read_seqcst [ store_elem. store_thread ]
360369 {
361370 // The current SC load cannot read-before the last store sequenced-before
362371 // the last SC fence.
@@ -394,17 +403,19 @@ impl<'tcx> StoreBuffer {
394403 }
395404 }
396405
397- /// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
406+ /// ATOMIC STORE IMPL in the paper
398407 fn store_impl (
399408 & mut self ,
400409 val : Scalar ,
401410 index : VectorIdx ,
402411 thread_clock : & VClock ,
403412 is_seqcst : bool ,
413+ sync_clock : VClock ,
404414 ) {
405415 let store_elem = StoreElement {
406- store_index : index,
407- timestamp : thread_clock[ index] ,
416+ store_thread : index,
417+ store_timestamp : thread_clock[ index] ,
418+ sync_clock,
408419 // In the language provided in the paper, an atomic store takes the value from a
409420 // non-atomic memory location.
410421 // But we already have the immediate value here so we don't need to do the memory
@@ -422,7 +433,7 @@ impl<'tcx> StoreBuffer {
422433 // so that in a later SC load, only the last SC store (i.e. this one) or stores that
423434 // aren't ordered by hb with the last SC is picked.
424435 self . buffer . iter_mut ( ) . rev ( ) . for_each ( |elem| {
425- if elem. timestamp <= thread_clock[ elem. store_index ] {
436+ if elem. store_timestamp <= thread_clock[ elem. store_thread ] {
426437 elem. is_seqcst = true ;
427438 }
428439 } )
@@ -465,7 +476,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
465476 let ( alloc_id, base_offset, ..) = this. ptr_get_alloc_id ( place. ptr ( ) , 0 ) ?;
466477 if let (
467478 crate :: AllocExtra {
468- data_race : AllocDataRaceHandler :: Vclocks ( _ , Some ( alloc_buffers) ) ,
479+ data_race : AllocDataRaceHandler :: Vclocks ( data_race_clocks , Some ( alloc_buffers) ) ,
469480 ..
470481 } ,
471482 crate :: MiriMachine {
@@ -478,20 +489,29 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
478489 global. sc_write ( threads) ;
479490 }
480491 let range = alloc_range ( base_offset, place. layout . size ) ;
492+ let sync_clock = data_race_clocks. sync_clock ( range) ;
481493 let buffer = alloc_buffers. get_or_create_store_buffer_mut ( range, Some ( init) ) ?;
482494 // The RMW always reads from the most recent store.
483495 buffer. read_from_last_store ( global, threads, atomic == AtomicRwOrd :: SeqCst ) ;
484- buffer. buffered_write ( new_val, global, threads, atomic == AtomicRwOrd :: SeqCst ) ?;
496+ buffer. buffered_write (
497+ new_val,
498+ global,
499+ threads,
500+ atomic == AtomicRwOrd :: SeqCst ,
501+ sync_clock,
502+ ) ?;
485503 }
486504 interp_ok ( ( ) )
487505 }
488506
507+ /// The argument to `validate` is the synchronization clock of the memory that is being read,
508+ /// if we are reading from a store buffer element.
489509 fn buffered_atomic_read (
490510 & self ,
491511 place : & MPlaceTy < ' tcx > ,
492512 atomic : AtomicReadOrd ,
493513 latest_in_mo : Scalar ,
494- validate : impl FnOnce ( ) -> InterpResult < ' tcx > ,
514+ validate : impl FnOnce ( Option < & VClock > ) -> InterpResult < ' tcx > ,
495515 ) -> InterpResult < ' tcx , Option < Scalar > > {
496516 let this = self . eval_context_ref ( ) ;
497517 ' fallback: {
@@ -529,14 +549,16 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
529549 }
530550
531551 // Race detector or weak memory disabled, simply read the latest value
532- validate ( ) ?;
552+ validate ( None ) ?;
533553 interp_ok ( Some ( latest_in_mo) )
534554 }
535555
536556 /// Add the given write to the store buffer. (Does not change machine memory.)
537557 ///
538558 /// `init` says with which value to initialize the store buffer in case there wasn't a store
539559 /// buffer for this memory range before.
560+ ///
561+ /// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
540562 fn buffered_atomic_write (
541563 & mut self ,
542564 val : Scalar ,
@@ -548,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
548570 let ( alloc_id, base_offset, ..) = this. ptr_get_alloc_id ( dest. ptr ( ) , 0 ) ?;
549571 if let (
550572 crate :: AllocExtra {
551- data_race : AllocDataRaceHandler :: Vclocks ( _ , Some ( alloc_buffers) ) ,
573+ data_race : AllocDataRaceHandler :: Vclocks ( data_race_clocks , Some ( alloc_buffers) ) ,
552574 ..
553575 } ,
554576 crate :: MiriMachine {
@@ -560,9 +582,18 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
560582 global. sc_write ( threads) ;
561583 }
562584
563- let buffer = alloc_buffers
564- . get_or_create_store_buffer_mut ( alloc_range ( base_offset, dest. layout . size ) , init) ?;
565- buffer. buffered_write ( val, global, threads, atomic == AtomicWriteOrd :: SeqCst ) ?;
585+ let range = alloc_range ( base_offset, dest. layout . size ) ;
586+ // It's a bit annoying that we have to go back to the data race part to get the clock...
587+ // but it does make things a lot simpler.
588+ let sync_clock = data_race_clocks. sync_clock ( range) ;
589+ let buffer = alloc_buffers. get_or_create_store_buffer_mut ( range, init) ?;
590+ buffer. buffered_write (
591+ val,
592+ global,
593+ threads,
594+ atomic == AtomicWriteOrd :: SeqCst ,
595+ sync_clock,
596+ ) ?;
566597 }
567598
568599 // Caller should've written to dest with the vanilla scalar write, we do nothing here
0 commit comments