6262//! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
6363
6464// Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
65- // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
66- // but this is not used anywhere so it's omitted here.
65+ // 1. In the operational semantics, loads acquire the vector clock of the atomic location
66+ // irrespective of which store buffer element is loaded. That's incorrect; the synchronization clock
67+ // needs to be tracked per-store-buffer-element. (The paper has a field "clocks" for that purpose,
68+ // but it is not actuallt used.) tsan11 does this correctly
69+ // (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L305).
6770//
6871// 2. In the operational semantics, each store element keeps the timestamp of a thread when it loads from the store.
6972// If the same thread loads from the same store element multiple times, then the timestamps at all loads are saved in a list of load elements.
@@ -138,16 +141,17 @@ enum LoadRecency {
138141
139142#[ derive( Debug , Clone , PartialEq , Eq ) ]
140143struct StoreElement {
141- /// The identifier of the vector index, corresponding to a thread
142- /// that performed the store.
143- store_index : VectorIdx ,
144+ /// The thread that performed the store.
145+ store_thread : VectorIdx ,
146+ /// The timestamp of the storing thread when it performed the store
147+ store_timestamp : VTimestamp ,
148+
149+ /// The vector clock that can be acquired by loading this store.
150+ sync_clock : VClock ,
144151
145152 /// Whether this store is SC.
146153 is_seqcst : bool ,
147154
148- /// The timestamp of the storing thread when it performed the store
149- timestamp : VTimestamp ,
150-
151155 /// The value of this store. `None` means uninitialized.
152156 // FIXME: Currently, we cannot represent partial initialization.
153157 val : Option < Scalar > ,
@@ -159,9 +163,12 @@ struct StoreElement {
159163
160164#[ derive( Debug , Clone , PartialEq , Eq , Default ) ]
161165struct LoadInfo {
162- /// Timestamp of first loads from this store element by each thread
166+ /// Timestamp of first loads from this store element by each thread.
163167 timestamps : FxHashMap < VectorIdx , VTimestamp > ,
164- /// Whether this store element has been read by an SC load
168+ /// Whether this store element has been read by an SC load.
169+ /// This is crucial to ensure we respect coherence-ordered-before. Concretely we use
170+ /// this to ensure that if a store element is seen by an SC load, then all later SC loads
171+ /// cannot see `mo`-earlier store elements.
165172 sc_loaded : bool ,
166173}
167174
@@ -243,8 +250,10 @@ impl<'tcx> StoreBuffer {
243250 let store_elem = StoreElement {
244251 // The thread index and timestamp of the initialisation write
245252 // are never meaningfully used, so it's fine to leave them as 0
246- store_index : VectorIdx :: from ( 0 ) ,
247- timestamp : VTimestamp :: ZERO ,
253+ store_thread : VectorIdx :: from ( 0 ) ,
254+ store_timestamp : VTimestamp :: ZERO ,
255+ // The initialization write is non-atomic so nothing can be acquired.
256+ sync_clock : VClock :: default ( ) ,
248257 val : init,
249258 is_seqcst : false ,
250259 load_info : RefCell :: new ( LoadInfo :: default ( ) ) ,
@@ -273,7 +282,7 @@ impl<'tcx> StoreBuffer {
273282 thread_mgr : & ThreadManager < ' _ > ,
274283 is_seqcst : bool ,
275284 rng : & mut ( impl rand:: Rng + ?Sized ) ,
276- validate : impl FnOnce ( ) -> InterpResult < ' tcx > ,
285+ validate : impl FnOnce ( Option < & VClock > ) -> InterpResult < ' tcx > ,
277286 ) -> InterpResult < ' tcx , ( Option < Scalar > , LoadRecency ) > {
278287 // Having a live borrow to store_buffer while calling validate_atomic_load is fine
279288 // because the race detector doesn't touch store_buffer
@@ -290,7 +299,7 @@ impl<'tcx> StoreBuffer {
290299 // after we've picked a store element from the store buffer, as presented
291300 // in ATOMIC LOAD rule of the paper. This is because fetch_store
292301 // requires access to ThreadClockSet.clock, which is updated by the race detector
293- validate ( ) ?;
302+ validate ( Some ( & store_elem . sync_clock ) ) ?;
294303
295304 let ( index, clocks) = global. active_thread_state ( thread_mgr) ;
296305 let loaded = store_elem. load_impl ( index, & clocks, is_seqcst) ;
@@ -303,10 +312,11 @@ impl<'tcx> StoreBuffer {
303312 global : & DataRaceState ,
304313 thread_mgr : & ThreadManager < ' _ > ,
305314 is_seqcst : bool ,
315+ sync_clock : VClock ,
306316 ) -> InterpResult < ' tcx > {
307317 let ( index, clocks) = global. active_thread_state ( thread_mgr) ;
308318
309- self . store_impl ( val, index, & clocks. clock , is_seqcst) ;
319+ self . store_impl ( val, index, & clocks. clock , is_seqcst, sync_clock ) ;
310320 interp_ok ( ( ) )
311321 }
312322
@@ -333,7 +343,9 @@ impl<'tcx> StoreBuffer {
333343 return false ;
334344 }
335345
336- keep_searching = if store_elem. timestamp <= clocks. clock [ store_elem. store_index ] {
346+ keep_searching = if store_elem. store_timestamp
347+ <= clocks. clock [ store_elem. store_thread ]
348+ {
337349 // CoWR: if a store happens-before the current load,
338350 // then we can't read-from anything earlier in modification order.
339351 // C++20 §6.9.2.2 [intro.races] paragraph 18
@@ -345,15 +357,15 @@ impl<'tcx> StoreBuffer {
345357 // then we cannot read-from anything earlier in modification order.
346358 // C++20 §6.9.2.2 [intro.races] paragraph 16
347359 false
348- } else if store_elem. timestamp <= clocks. write_seqcst [ store_elem. store_index ]
360+ } else if store_elem. store_timestamp <= clocks. write_seqcst [ store_elem. store_thread ]
349361 && store_elem. is_seqcst
350362 {
351363 // The current non-SC load, which may be sequenced-after an SC fence,
352364 // cannot read-before the last SC store executed before the fence.
353365 // C++17 §32.4 [atomics.order] paragraph 4
354366 false
355367 } else if is_seqcst
356- && store_elem. timestamp <= clocks. read_seqcst [ store_elem. store_index ]
368+ && store_elem. store_timestamp <= clocks. read_seqcst [ store_elem. store_thread ]
357369 {
358370 // The current SC load cannot read-before the last store sequenced-before
359371 // the last SC fence.
@@ -391,17 +403,19 @@ impl<'tcx> StoreBuffer {
391403 }
392404 }
393405
394- /// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
406+ /// ATOMIC STORE IMPL in the paper
395407 fn store_impl (
396408 & mut self ,
397409 val : Scalar ,
398410 index : VectorIdx ,
399411 thread_clock : & VClock ,
400412 is_seqcst : bool ,
413+ sync_clock : VClock ,
401414 ) {
402415 let store_elem = StoreElement {
403- store_index : index,
404- timestamp : thread_clock[ index] ,
416+ store_thread : index,
417+ store_timestamp : thread_clock[ index] ,
418+ sync_clock,
405419 // In the language provided in the paper, an atomic store takes the value from a
406420 // non-atomic memory location.
407421 // But we already have the immediate value here so we don't need to do the memory
@@ -419,7 +433,7 @@ impl<'tcx> StoreBuffer {
419433 // so that in a later SC load, only the last SC store (i.e. this one) or stores that
420434 // aren't ordered by hb with the last SC is picked.
421435 self . buffer . iter_mut ( ) . rev ( ) . for_each ( |elem| {
422- if elem. timestamp <= thread_clock[ elem. store_index ] {
436+ if elem. store_timestamp <= thread_clock[ elem. store_thread ] {
423437 elem. is_seqcst = true ;
424438 }
425439 } )
@@ -462,7 +476,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
462476 let ( alloc_id, base_offset, ..) = this. ptr_get_alloc_id ( place. ptr ( ) , 0 ) ?;
463477 if let (
464478 crate :: AllocExtra {
465- data_race : AllocDataRaceHandler :: Vclocks ( _ , Some ( alloc_buffers) ) ,
479+ data_race : AllocDataRaceHandler :: Vclocks ( data_race_clocks , Some ( alloc_buffers) ) ,
466480 ..
467481 } ,
468482 crate :: MiriMachine {
@@ -475,19 +489,29 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
475489 global. sc_write ( threads) ;
476490 }
477491 let range = alloc_range ( base_offset, place. layout . size ) ;
492+ let sync_clock = data_race_clocks. sync_clock ( range) ;
478493 let buffer = alloc_buffers. get_or_create_store_buffer_mut ( range, Some ( init) ) ?;
494+ // The RMW always reads from the most recent store.
479495 buffer. read_from_last_store ( global, threads, atomic == AtomicRwOrd :: SeqCst ) ;
480- buffer. buffered_write ( new_val, global, threads, atomic == AtomicRwOrd :: SeqCst ) ?;
496+ buffer. buffered_write (
497+ new_val,
498+ global,
499+ threads,
500+ atomic == AtomicRwOrd :: SeqCst ,
501+ sync_clock,
502+ ) ?;
481503 }
482504 interp_ok ( ( ) )
483505 }
484506
507+ /// The argument to `validate` is the synchronization clock of the memory that is being read,
508+ /// if we are reading from a store buffer element.
485509 fn buffered_atomic_read (
486510 & self ,
487511 place : & MPlaceTy < ' tcx > ,
488512 atomic : AtomicReadOrd ,
489513 latest_in_mo : Scalar ,
490- validate : impl FnOnce ( ) -> InterpResult < ' tcx > ,
514+ validate : impl FnOnce ( Option < & VClock > ) -> InterpResult < ' tcx > ,
491515 ) -> InterpResult < ' tcx , Option < Scalar > > {
492516 let this = self . eval_context_ref ( ) ;
493517 ' fallback: {
@@ -525,14 +549,16 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
525549 }
526550
527551 // Race detector or weak memory disabled, simply read the latest value
528- validate ( ) ?;
552+ validate ( None ) ?;
529553 interp_ok ( Some ( latest_in_mo) )
530554 }
531555
532556 /// Add the given write to the store buffer. (Does not change machine memory.)
533557 ///
534558 /// `init` says with which value to initialize the store buffer in case there wasn't a store
535559 /// buffer for this memory range before.
560+ ///
561+ /// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
536562 fn buffered_atomic_write (
537563 & mut self ,
538564 val : Scalar ,
@@ -544,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
544570 let ( alloc_id, base_offset, ..) = this. ptr_get_alloc_id ( dest. ptr ( ) , 0 ) ?;
545571 if let (
546572 crate :: AllocExtra {
547- data_race : AllocDataRaceHandler :: Vclocks ( _ , Some ( alloc_buffers) ) ,
573+ data_race : AllocDataRaceHandler :: Vclocks ( data_race_clocks , Some ( alloc_buffers) ) ,
548574 ..
549575 } ,
550576 crate :: MiriMachine {
@@ -556,9 +582,18 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
556582 global. sc_write ( threads) ;
557583 }
558584
559- let buffer = alloc_buffers
560- . get_or_create_store_buffer_mut ( alloc_range ( base_offset, dest. layout . size ) , init) ?;
561- buffer. buffered_write ( val, global, threads, atomic == AtomicWriteOrd :: SeqCst ) ?;
585+ let range = alloc_range ( base_offset, dest. layout . size ) ;
586+ // It's a bit annoying that we have to go back to the data race part to get the clock...
587+ // but it does make things a lot simpler.
588+ let sync_clock = data_race_clocks. sync_clock ( range) ;
589+ let buffer = alloc_buffers. get_or_create_store_buffer_mut ( range, init) ?;
590+ buffer. buffered_write (
591+ val,
592+ global,
593+ threads,
594+ atomic == AtomicWriteOrd :: SeqCst ,
595+ sync_clock,
596+ ) ?;
562597 }
563598
564599 // Caller should've written to dest with the vanilla scalar write, we do nothing here
0 commit comments